Beispiel #1
0
    def get_words(self):
        while True:
            end_time = time.time()
            if end_time - self.start_time > 30 * 60:
                print('程序休眠中')
                self.account_id, self.headers = account_hearder_id()
                time.sleep(5*60)
                self.start_time = end_time


            # finalTime = time.time()
            # if finalTime-self.beginTime >60*60:
            #
            #     print('更换cookie')
            #     cookie = get_cookie('https://www.toutiao.com')
            #     print("*" * 50)
            #     print(cookie)
            #     print("*" * 50)
            #     self.headers['cookie'] = cookie
            #     self.beginTime = finalTime
            sql = """select words,id from search_words_wxh where toutiao_status=0 and category=2  order by id  desc limit 1"""
            result = select_data(sql)
            if result:
                for i in result :
                    sql="""update search_words_wxh set toutiao_status=1 where id=%s"""% i[1]
                    update_data(sql)
                    self.get_page(i[0])
                    sql = """update search_words_wxh set toutiao_status=2 where id=%s"""% i[1]
                    update_data(sql)
            else:
                print("已经爬取完")
                break
Beispiel #2
0
def account_select_id():
    sql_select_cookie_account = """SELECT id,cookie,useCount FROM cookie_wxh WHERE   status = 0 and platform='toutiao' order by useCount asc limit 1"""
    resultTuple = select_data(sql_select_cookie_account)
    if len(resultTuple) == 0:
        return

    useCount = resultTuple[0][2] + 1
    sql_update_cookie_account_useCount = 'UPDATE  cookie_wxh set useCount=%d where id=%d' % (
        useCount, resultTuple[0][0])
    update_data(sql_update_cookie_account_useCount)
    #返回id和cookie
    return resultTuple[0][0], resultTuple[0][1]
Beispiel #3
0
def get_words():
    global start_time
    while True:
        end_time = time.time()
        if end_time - start_time > 20 * 60:
            print('程序休眠中')
            time.sleep(5*60)
            start_time = end_time
        sql = """select words,id from search_words_wxh where jianshu_status=0 and category=2 limit 1"""
        result = select_data(sql)
        if result:
            for i in result:
                sql = """update search_words_wxh set jianshu_status=1 where id=%s""" % i[1]
                update_data(sql)
                spider(i[0])
                sql = """update search_words_wxh set jianshu_status=2 where id=%s""" % i[1]
                update_data(sql)
        else:
            print("已经爬取完")
            break
Beispiel #4
0
 def run(self):
     while True:
         end_time = time.time()
         if end_time - self.start_time > 15 * 60:
             print('程序休眠中')
             time.sleep(60)
             self.start_time = end_time
         sql = """select words,id from search_words_wxh where sohu_status=0 limit 1"""
         result = select_data(sql)
         if result:
             for i in result:
                 sql = """update search_words_wxh set sohu_status=1 where id=%s""" % i[1]
                 update_data(sql)
                 self.get_page(i[0])
                 sql = """update search_words_wxh set sohu_status=2 where id=%s""" % i[1]
                 update_data(sql)
         else:
             print("已经爬取完")
             print("休眠20分钟")
             time.sleep(20*60)
             continue
Beispiel #5
0
 def run(self):
     while True:
         end_time = time.time()
         if end_time - self.start_time > 20 * 60:
             print('程序休眠中')
             time.sleep(1*60)
             self.start_time = end_time
         self.threadLock.acquire()
         sql = """select words,id from search_words_wxh where dongfang_status=0 and category=2 limit 1  """
         result = select_data(sql)
         if result:
             for i in result:
                 sql = """update search_words_wxh set dongfang_status=1 where id=%s""" % i[1]
                 update_data(sql)
                 self.threadLock.release()
                 self.get_page(i[0])
                 sql = """update search_words_wxh set dongfang_status=2 where id=%s""" % i[1]
                 update_data(sql)
         else:
             print("已经爬取完")
             self.threadLock.release()
             break