Example #1
0
    def filter(self):
        xingCharMapMinCount = flag.getFlag('xing_char_map_min_count')
        xingMapMinCount = flag.getFlag('xing_map_min_count')
        mingCharMapMinCount = flag.getFlag('ming_char_map_min_count')
        mingMapMinCount = flag.getFlag('ming_map_min_count')
        xingMingMapMinCount = flag.getFlag('xing_ming_map_min_count')

        log.info('==== Start filter, before: ====')
        log.info('Number of different xing:  %s' % len(self.xingMap))
        log.info('Number of different xing char:  %s' % len(self.xingCharMap))
        log.info('Number of different ming:  %s' % len(self.mingMap))
        log.info('Number of different ming char:  %s' % len(self.mingCharMap))
        log.info('Number of different xing ming:  %s' % len(self.xingMingMap))

        Result.filterMapOnThreshold(self.xingCharMap, xingCharMapMinCount)
        Result.filterMapOnThreshold(self.xingMap, xingMapMinCount)
        Result.filterMapOnThreshold(self.mingCharMap, mingCharMapMinCount)
        Result.filterMapOnThreshold(self.mingMap, mingMapMinCount)
        Result.filterMapOnThreshold(self.xingMingMap, xingMingMapMinCount)

        log.info('==== After filter: ====')
        log.info('Number of different xing:  %s' % len(self.xingMap))
        log.info('Number of different xing char:  %s' % len(self.xingCharMap))
        log.info('Number of different ming:  %s' % len(self.mingMap))
        log.info('Number of different ming char:  %s' % len(self.mingCharMap))
        log.info('Number of different xing ming:  %s' % len(self.xingMingMap))
Example #2
0
 def analyse(self):
     """Analyse the data."""
     profiles = self.getProfiles()
     log.info('Total Profile number:  %s' % len(profiles))
     self.processProfiles(profiles)
     if flag.getFlag('use_result_filter'):
         self.result.filter()
     self.result.caculate()
Example #3
0
def main():
    flag.processArguments()
    log.config(GC.LOG_FILE_DIR + 'CrawlManager', 'info', 'info')
    signal.signal(signal.SIGINT, detectSignal)
    waitingTime = flag.getFlag('waiting_time')
    log.info('Wait for: ' + str(waitingTime) + ' minutes')
    time.sleep(waitingTime * 60)
    manager = CrawlManager()
    manager.start()