from resource.renrenaccount import RenrenAccountErrorCode from resource.renrenaccountpool import createProdRenrenAccountPool from resource.renrenaccountpool import RenrenAccountPool from resource.proxypool import createProdProxyPool from resource.proxy import Proxy from crawl.crawler import Crawler from crawl.crawler import CrawlerException from crawl.crawler import CrawlerErrorCode from crawl.renrenagent import RenrenAgent from crawl.startnodecrawler import StartNodeCrawler import time import threading import signal flag.defineFlag(name='waiting_time', type_=FlagType.INT, default=0,\ description='Wait before crawling to let account become avaliable.(In mins)') flag.defineFlag(name='accounts_limit', type_=FlagType.INT, default=10,\ description='Account limit in a single thread.') flag.defineFlag(name='thread_number', type_=FlagType.INT, default=8,\ description='Crawling thread number in a single round.') flag.defineFlag(name='round_number', type_=FlagType.INT, default=30,\ description='Crawling round number.') currentCrawler = None stopSignal = False def detectSignal(a, b): print "INT Signal detect" Crawler.setStopSignal()
# -*- coding: utf-8 -*- from jx import log from jx import flag from utils.util import isHanChar from utils import confidential as CFD from utils import globalconfig as GC from data.database import Profile from data.database import Gender from data.readonlydatastore import createProdReadOnlyDataStore from data.readonlydatastore import ReadOnlyDataStore from analyse.result import Result from entities.name_helper import NameHelper flag.defineFlag('use_result_filter', flag.FlagType.BOOLEAN, True,\ 'Whether there is need to filter some unnecessary content in result.') def valueCmp(x, y): return x[1].count < y[1].count class Analyser: """Analysis the data in data store and build the index of the result.""" dataStore = None # The data source. result = Result() def __init__(self): self.dataStore = createProdReadOnlyDataStore() def analyse(self): """Analyse the data."""
#!/usr/bin/python # -*- coding: utf-8 -*- from jx import flag from jx import log from entities.name_pb2 import RawNameItemInfo, GlobalNameInfo from entities.name_helper import NameHelper from struct import * flag.defineFlag('xing_char_map_min_count', flag.FlagType.INT, 2,\ 'item info with count smaller than this will be filtered out.') flag.defineFlag('xing_map_min_count', flag.FlagType.INT, 2,\ 'item info with count smaller than this will be filtered out.') flag.defineFlag('ming_char_map_min_count', flag.FlagType.INT, 2,\ 'item info with count smaller than this will be filtered out.') flag.defineFlag('ming_map_min_count', flag.FlagType.INT, 5,\ 'item info with count smaller than this will be filtered out.') flag.defineFlag('xing_ming_map_min_count', flag.FlagType.INT, 5,\ 'item info with count smaller than this will be filtered out.') class Result: """Analysis result.""" globalInfo = None xingCharMap = None # Family name character map object. xingCharSortedArray = None # A array contains sorted xing Char. # Set by @caculate. xingMap = None # Family name string map object.