def filter(self): xingCharMapMinCount = flag.getFlag('xing_char_map_min_count') xingMapMinCount = flag.getFlag('xing_map_min_count') mingCharMapMinCount = flag.getFlag('ming_char_map_min_count') mingMapMinCount = flag.getFlag('ming_map_min_count') xingMingMapMinCount = flag.getFlag('xing_ming_map_min_count') log.info('==== Start filter, before: ====') log.info('Number of different xing: %s' % len(self.xingMap)) log.info('Number of different xing char: %s' % len(self.xingCharMap)) log.info('Number of different ming: %s' % len(self.mingMap)) log.info('Number of different ming char: %s' % len(self.mingCharMap)) log.info('Number of different xing ming: %s' % len(self.xingMingMap)) Result.filterMapOnThreshold(self.xingCharMap, xingCharMapMinCount) Result.filterMapOnThreshold(self.xingMap, xingMapMinCount) Result.filterMapOnThreshold(self.mingCharMap, mingCharMapMinCount) Result.filterMapOnThreshold(self.mingMap, mingMapMinCount) Result.filterMapOnThreshold(self.xingMingMap, xingMingMapMinCount) log.info('==== After filter: ====') log.info('Number of different xing: %s' % len(self.xingMap)) log.info('Number of different xing char: %s' % len(self.xingCharMap)) log.info('Number of different ming: %s' % len(self.mingMap)) log.info('Number of different ming char: %s' % len(self.mingCharMap)) log.info('Number of different xing ming: %s' % len(self.xingMingMap))
def analyse(self): """Analyse the data.""" profiles = self.getProfiles() log.info('Total Profile number: %s' % len(profiles)) self.processProfiles(profiles) if flag.getFlag('use_result_filter'): self.result.filter() self.result.caculate()
def main(): flag.processArguments() log.config(GC.LOG_FILE_DIR + 'CrawlManager', 'info', 'info') signal.signal(signal.SIGINT, detectSignal) waitingTime = flag.getFlag('waiting_time') log.info('Wait for: ' + str(waitingTime) + ' minutes') time.sleep(waitingTime * 60) manager = CrawlManager() manager.start()