def test(): log.config(GC.LOG_FILE_DIR + 'crawler_test', 'info', 'info') db = createConnection() createTables(db) dropTables(db) createTables(db) pool = renrenaccountpool.createProdRenrenAccountPool() accounts = pool.getAccounts(1) account = accounts[0] global crawler try: crawler = Crawler(db) agent = RenrenAgent(account) agent.login() crawler.setAgent(agent) id = "322601086" crawler.crawl(id, 30) except CrawlerException, e: log.info("Crawler end, reason: " + str(e)) if e.errorCode == CrawlerErrorCode.DETECT_STOP_SIGNAL: print "detect int signal" return
def main(): reload(sys) sys.setdefaultencoding("utf-8") log.config(GC.LOG_FILE_DIR + 'import_proxies', 'info', 'info') #log.config(GC.LOG_FILE_DIR + 'import_proxies', 'debug', 'debug') importer = ProxyImporter() importer.start()
def main(): log.config(GC.LOG_FILE_DIR + "import_start_nodes", "info", "info") fileName = "tools/data/start_nodes" importCount = 0 failCount = 0 dataBase = database.createProdDataBase() with open(fileName) as importedFile: lines = importedFile.readlines() for line in lines: strs = line.split() if len(strs) < 1: continue # May be not a valid account id = strs[0] # Start node id. log.info("Importing start node: " + id) success = dataBase.insertIntoStartList(id) if success: importCount += 1 else: failCount += 1 log.info( "Finish importing..........\n" + "Total imported start nodes number: " + str(importCount) + "\n" + "Fail start nodes number: " + str(failCount) )
def main(): flag.processArguments() log.config(GC.LOG_FILE_DIR + 'CrawlManager', 'info', 'info') signal.signal(signal.SIGINT, detectSignal) waitingTime = flag.getFlag('waiting_time') log.info('Wait for: ' + str(waitingTime) + ' minutes') time.sleep(waitingTime * 60) manager = CrawlManager() manager.start()
def main(): #log.config(GC.LOG_FILE_DIR + 'agent_test', 'info', 'info') log.config(GC.LOG_FILE_DIR + 'agent_test', 'debug', 'debug') startList = [ '255617816', '45516', '200656024', '601630763'] #runTest('*****@*****.**', '12345678', 1, 200, startList) recursiveProfileTest('*****@*****.**', '12345678', 1, 20, startList)
def main(): log.config(GC.LOG_FILE_DIR + 'database_test', 'debug', 'debug') db = createConnection() createTables(db) dropTables(db) createTables(db) test(db) testStartList(db) dropTables(db) db.close() log.info("Pass the test!")
def mainProfileTest(): log.config(GC.LOG_FILE_DIR + 'agent_test', 'debug', 'debug') ids = ['67922197', '172442794', '344429329'] agent = getTestAgent() agent.login() for root, dirs, files in os.walk(GC.TEST_STATIC_PAGE_PATH): for file in files: filePath = os.path.join(root, file) getProfileTest(agent, None, filePath) for id in ids: getProfileTest(agent, id)
def main(): log.config(GC.LOG_FILE_DIR + 'account_pool_test', 'debug', 'debug') pool = createTestRenrenAccountPool() createTables(pool) dropTables(pool) createTables(pool) testRenrenAccountPool() testProxyPool() dropTables(pool) log.info("Pass the test!")
def importFromFile(fname): log.config(GC.LOG_FILE_DIR + 'import_accounts', 'info', 'info') fileName = fname accounts = [] pool = createProdRenrenAccountPool() with open(fileName) as importedFile: lines = importedFile.readlines() for line in lines: strs = line.split() if len(strs) < 2: continue # May be not a valid account username = strs[0] # User name first. password = strs[1] # And then password. log.info("Find username: "******" " +\ "password: "******"Finish importing..........\n" +\ "Success on verify accounts number: " +\ str(verifySuccessCount) + "\n" +\ "Fail on verify accounts number: " +\ str(verifyFailCount)) log.info('Success imported number: %s' % importSuccessCount) log.info('Fail imported number: %s' % importFailCount)
def main(): log.config(GC.LOG_FILE_DIR + "analysed_data_base_test", "debug", "debug") testAanlysedDataBaseImportFromResult() testAanlysedDataBaseImportFromFile() log.info("Pass the all the test!")
self.result.xingMap[xing] = info self.setInfo(info, profile) def accumulateMing(self, ming, profile): """Accumulate a single Name.""" self.result.globalInfo.ming_count += 1 info = self.result.mingMap.get(ming) if not info: info = NameHelper.getInitedRawNameItemInfo(ming) self.result.mingMap[ming] = info self.setInfo(info, profile) def accumulateXingMing(self, xingMing, profile): """Accumulate a single Name.""" self.result.globalInfo.xing_ming_count += 1 info = self.result.xingMingMap.get(xingMing) if not info: info = NameHelper.getInitedRawNameItemInfo(xingMing) self.result.xingMingMap[xingMing] = info self.setInfo(info, profile) def main(): analyser = Analyser() analyser.analyse() analyser.buildIndex() if __name__ == "__main__": flag.processArguments() log.config(GC.LOG_FILE_DIR + 'Analyser', 'info', 'info') main()
def main(): log.config(GC.LOG_FILE_DIR + 'result_test', 'debug', 'debug') testResultSerialization() log.info("Pass the test!")
def main(): log.config(GC.LOG_FILE_DIR + 'import_result_to_database', 'info', 'info') importResultToDataBase()
def main(): log.config(GC.LOG_FILE_DIR + 'crawl_start_node', 'debug', 'debug') crawler = StartNodeCrawler() crawler.startCrawling()
def main(): log.config(GC.LOG_FILE_DIR + 'save_accounts', 'info', 'info') pool = createProdRenrenAccountPool() saveInUsingAccounts(pool) saveInvalidAccount(pool)
def main(): log.config(GC.LOG_FILE_DIR + 'readonlydatastore_test', 'debug', 'debug') dataStore = createProdReadOnlyDataStore() test(dataStore) dataStore.close()