def get_words(self): while True: end_time = time.time() if end_time - self.start_time > 30 * 60: print('程序休眠中') self.account_id, self.headers = account_hearder_id() time.sleep(5*60) self.start_time = end_time # finalTime = time.time() # if finalTime-self.beginTime >60*60: # # print('更换cookie') # cookie = get_cookie('https://www.toutiao.com') # print("*" * 50) # print(cookie) # print("*" * 50) # self.headers['cookie'] = cookie # self.beginTime = finalTime sql = """select words,id from search_words_wxh where toutiao_status=0 and category=2 order by id desc limit 1""" result = select_data(sql) if result: for i in result : sql="""update search_words_wxh set toutiao_status=1 where id=%s"""% i[1] update_data(sql) self.get_page(i[0]) sql = """update search_words_wxh set toutiao_status=2 where id=%s"""% i[1] update_data(sql) else: print("已经爬取完") break
def account_select_id(): sql_select_cookie_account = """SELECT id,cookie,useCount FROM cookie_wxh WHERE status = 0 and platform='toutiao' order by useCount asc limit 1""" resultTuple = select_data(sql_select_cookie_account) if len(resultTuple) == 0: return useCount = resultTuple[0][2] + 1 sql_update_cookie_account_useCount = 'UPDATE cookie_wxh set useCount=%d where id=%d' % ( useCount, resultTuple[0][0]) update_data(sql_update_cookie_account_useCount) #返回id和cookie return resultTuple[0][0], resultTuple[0][1]
def get_words(): global start_time while True: end_time = time.time() if end_time - start_time > 20 * 60: print('程序休眠中') time.sleep(5*60) start_time = end_time sql = """select words,id from search_words_wxh where jianshu_status=0 and category=2 limit 1""" result = select_data(sql) if result: for i in result: sql = """update search_words_wxh set jianshu_status=1 where id=%s""" % i[1] update_data(sql) spider(i[0]) sql = """update search_words_wxh set jianshu_status=2 where id=%s""" % i[1] update_data(sql) else: print("已经爬取完") break
def run(self): while True: end_time = time.time() if end_time - self.start_time > 15 * 60: print('程序休眠中') time.sleep(60) self.start_time = end_time sql = """select words,id from search_words_wxh where sohu_status=0 limit 1""" result = select_data(sql) if result: for i in result: sql = """update search_words_wxh set sohu_status=1 where id=%s""" % i[1] update_data(sql) self.get_page(i[0]) sql = """update search_words_wxh set sohu_status=2 where id=%s""" % i[1] update_data(sql) else: print("已经爬取完") print("休眠20分钟") time.sleep(20*60) continue
def run(self): while True: end_time = time.time() if end_time - self.start_time > 20 * 60: print('程序休眠中') time.sleep(1*60) self.start_time = end_time self.threadLock.acquire() sql = """select words,id from search_words_wxh where dongfang_status=0 and category=2 limit 1 """ result = select_data(sql) if result: for i in result: sql = """update search_words_wxh set dongfang_status=1 where id=%s""" % i[1] update_data(sql) self.threadLock.release() self.get_page(i[0]) sql = """update search_words_wxh set dongfang_status=2 where id=%s""" % i[1] update_data(sql) else: print("已经爬取完") self.threadLock.release() break