def run(self): self.dataBase = createProdDataBase() self.renrenAccountPool = createProdRenrenAccountPool() for i in range(0, self.ROUND_NUMBER): log.info('>>>>>>>> Main Crawl Thread Round(%s) <<<<<<<<' % (i+1)) if self.dataBase.needMoreStartNode(): startNodeCrawler = StartNodeCrawler(\ self.dataBase, self.renrenAccountPool) startNodeCrawler.startCrawling() self.startMultiThreadCrawling(self.THREAD_NUMBER) #self.startMultiThreadCrawlingWithProxy(1) #manager.startSignleThreadCrawling() try: Crawler.detectStopSignal() except Exception, e: break log.info('>>>>>> Router disconnect PPPoE <<<<<<') router.disconnectPPPoE() time.sleep(2) log.info('>>>>>> Router connect PPPoE <<<<<<') router.connectPPPoE() # Wait for the connection being established. time.sleep(10)
def main(): log.config(GC.LOG_FILE_DIR + "import_start_nodes", "info", "info") fileName = "tools/data/start_nodes" importCount = 0 failCount = 0 dataBase = database.createProdDataBase() with open(fileName) as importedFile: lines = importedFile.readlines() for line in lines: strs = line.split() if len(strs) < 1: continue # May be not a valid account id = strs[0] # Start node id. log.info("Importing start node: " + id) success = dataBase.insertIntoStartList(id) if success: importCount += 1 else: failCount += 1 log.info( "Finish importing..........\n" + "Total imported start nodes number: " + str(importCount) + "\n" + "Fail start nodes number: " + str(failCount) )
def __init__(self, dataBase=None, accountPool=None): if dataBase: self.dataBase = dataBase else: self.dataBase = createProdDataBase() if accountPool: self.renrenAccountPool = accountPool else: self.renrenAccountPool = createProdRenrenAccountPool() self.dataBase.releaseAllStartNode() self.userList = [] self.shareList = [] self.requestCount = 0 self.crawledShareSet = set()