예제 #1
0
    def run(self):
        self.dataBase = createProdDataBase()
        self.renrenAccountPool = createProdRenrenAccountPool()
        for i in range(0, self.ROUND_NUMBER):
            log.info('>>>>>>>>  Main Crawl Thread Round(%s)  <<<<<<<<' % (i+1))

            if self.dataBase.needMoreStartNode():
                startNodeCrawler = StartNodeCrawler(\
                    self.dataBase, self.renrenAccountPool)
                startNodeCrawler.startCrawling()

            self.startMultiThreadCrawling(self.THREAD_NUMBER)
            #self.startMultiThreadCrawlingWithProxy(1)
            #manager.startSignleThreadCrawling()

            try:
                Crawler.detectStopSignal()
            except Exception, e:
                break

            log.info('>>>>>> Router disconnect PPPoE  <<<<<<')
            router.disconnectPPPoE()
            time.sleep(2)
            log.info('>>>>>> Router connect PPPoE  <<<<<<')
            router.connectPPPoE()
            # Wait for the connection being established.
            time.sleep(10)
예제 #2
0
def main():
    log.config(GC.LOG_FILE_DIR + "import_start_nodes", "info", "info")
    fileName = "tools/data/start_nodes"
    importCount = 0
    failCount = 0
    dataBase = database.createProdDataBase()
    with open(fileName) as importedFile:
        lines = importedFile.readlines()
        for line in lines:
            strs = line.split()
            if len(strs) < 1:
                continue  # May be not a valid account
            id = strs[0]  # Start node id.
            log.info("Importing start node: " + id)
            success = dataBase.insertIntoStartList(id)
            if success:
                importCount += 1
            else:
                failCount += 1

    log.info(
        "Finish importing..........\n"
        + "Total imported start nodes number: "
        + str(importCount)
        + "\n"
        + "Fail start nodes number: "
        + str(failCount)
    )
예제 #3
0
 def __init__(self, dataBase=None, accountPool=None):
     if dataBase:
         self.dataBase = dataBase
     else:
         self.dataBase = createProdDataBase()
     if accountPool:
         self.renrenAccountPool = accountPool
     else:
         self.renrenAccountPool = createProdRenrenAccountPool()
     
     self.dataBase.releaseAllStartNode()
     self.userList = []
     self.shareList = []
     self.requestCount = 0
     self.crawledShareSet = set()