예제 #1
0
    def run(self):
        print ("启动商品咨询线程 %d ...\n" % self.threadID)
        jda = JdAnysisConsult(self.threadID)
        jdb = Jd_Db()
        while True:
            with gdb_lock:
                full_url = jdb.db_query_process()

            if full_url:
                jda.get_product_consults(full_url)
            else:
                print("咨询线程[%d]提取产品为空,等待..." % self.threadID)
                time.sleep(20)

        print ("退出商品咨询线程 %d ..." % self.threadID)
예제 #2
0
    def run(self):
        print("启动商品咨询线程 %d ...\n" % self.threadID)
        jda = JdAnysisConsult(self.threadID)
        jdb = Jd_Db(jd_config.SQLITE_DB)
        while True:
            with gdb_lock:
                full_url = jdb.db_query_process()

            if full_url:
                jda.get_product_consults(full_url)
            else:
                print("咨询线程[%d]提取产品为空,等待..." % self.threadID)
                time.sleep(10)

        print("退出商品咨询线程 %d ..." % self.threadID)
예제 #3
0
    def run(self):
        print ("启动线程 %d ...\n" % self.threadID)
        jdb = Jd_Db()
        while True:
            #if jdb.db_unprocess_count() > 200000:
            #    #print ("系统负载重,暂停展开网页...\n")
            #    time.sleep(60)
            #    continue

            with gdb_lock:
                while True:
                    full_url = jdb.db_query_extend()
                    if full_url:
                        break
                    else:
                        time.sleep(20)

            print("线程[%d]正在处理:%s" % (self.threadID, full_url) )
            get_product_ids(full_url, jdb, self.threadID)

        print ("退出线程 %d ..." % self.threadID)
예제 #4
0
    def run(self):
        print("启动线程 %d ...\n" % self.threadID)
        jdb = Jd_Db()
        while True:
            #if jdb.db_unprocess_count() > 200000:
            #    #print ("系统负载重,暂停展开网页...\n")
            #    time.sleep(60)
            #    continue

            with gdb_lock:
                while True:
                    full_url = jdb.db_query_extend()
                    if full_url:
                        break
                    else:
                        time.sleep(20)

            print("线程[%d]正在处理:%s" % (self.threadID, full_url))
            get_product_ids(full_url, jdb, self.threadID)

        print("退出线程 %d ..." % self.threadID)
예제 #5
0
 def run(self):
     print ("启动线程 %d ...\n" % self.threadID)
     jdb = Jd_Db(jd_config.SQLITE_DB)
     while True:
         if jdb.db_unprocess_count() > 200000:                
             #print ("系统负载重,暂停展开网页...\n")
             time.sleep(60)
             continue 
             
         with gdb_lock:
             while True:
                 full_url = jdb.db_query_extend()
                 if full_url:
                     #if re.match(r'^http://(help|red|tuan|auction|jr|smart|gongyi|app|en|media|m|myjd|chat|read|chongzhi|z|giftcard|fw|you|mobile).jd.com', full_url) or re.match(r'^http://www.jd.com/compare/', full_url) or re.match(r'^http://club.jd.com/consultation/', full_url) :
                     #    print("线程[%d]正在处理:%s [删除]" % (self.threadID, full_url) )
                     #    jdb.db_drop_rubbish(full_url)
                     #else:
                     #    break
                     break
                 
         print("线程[%d]正在处理:%s" % (self.threadID, full_url) )
         get_product_ids(full_url, jdb, self.threadID)
                 
     print ("退出线程 %d ..." % self.threadID)
예제 #6
0
    def run(self):
        print("启动线程 %d ...\n" % self.threadID)
        jdb = Jd_Db(jd_config.SQLITE_DB)
        while True:
            if jdb.db_unprocess_count() > 200000:
                #print ("系统负载重,暂停展开网页...\n")
                time.sleep(60)
                continue

            with gdb_lock:
                while True:
                    full_url = jdb.db_query_extend()
                    if full_url:
                        #if re.match(r'^http://(help|red|tuan|auction|jr|smart|gongyi|app|en|media|m|myjd|chat|read|chongzhi|z|giftcard|fw|you|mobile).jd.com', full_url) or re.match(r'^http://www.jd.com/compare/', full_url) or re.match(r'^http://club.jd.com/consultation/', full_url) :
                        #    print("线程[%d]正在处理:%s [删除]" % (self.threadID, full_url) )
                        #    jdb.db_drop_rubbish(full_url)
                        #else:
                        #    break
                        break

            print("线程[%d]正在处理:%s" % (self.threadID, full_url))
            get_product_ids(full_url, jdb, self.threadID)

        print("退出线程 %d ..." % self.threadID)