Esempio n. 1
0
def main():
    thread_count = 3
    pre_threads = []

    initdb()                                            # 初始化redis数据库
    initGlobalArgs()
    initContentUrl_dict()                               # 初始化去重表

    log_thread = log.LogThread()                        # 启动日志记录线程
    log_thread.start()

    QueryWebsiteUrl_thread = QueryWebsiteUrlThread()    # 启动读取网站地址线程
    QueryWebsiteUrl_thread.start()
    pre_threads.append(QueryWebsiteUrl_thread)

    filterContentUrl_thread = FilterContentUrlThread()  # 启动爬取内容地址线程
    filterContentUrl_thread.start()
    pre_threads.append(filterContentUrl_thread)

    for i in range(thread_count):
        thread = FilterContentInfoThread()
        thread.start()
        pre_threads.append(thread)

    unrecognizedWebsiteUrl_thread = UnrecognizedWebsiteUrl_Thread()
    unrecognizedWebsiteUrl_thread.start()
    pre_threads.append(unrecognizedWebsiteUrl_thread)

    unrecognizedContentUrl_thread = UnrecognizedContentUrl_Thread()
    unrecognizedContentUrl_thread.start()
    pre_threads.append(unrecognizedContentUrl_thread)


    while not global_EXIT: pass

    time.sleep(5)

    saveWebsiteDelaytime()              # 保存各网站的延迟时间

    for t in pre_threads:
        t.join()

    log.logMsg(LogType.success, "--------------------bye---------------------\n")
    while not Cache.qempty(cache.log_queue): pass  # 等待把所有日志写到文件中
    Cache.setDict(cache.globalArgs_dict, "LogThread_EXIT", True)
    log_thread.join()

    if db: db.dispose()
Esempio n. 2
0
    def run(self):
        while not global_EXIT:
            try:
                if Cache.qempty(cache.websiteUrl_queue):
                    records = mysql.Mysql.queryWebsiteUrl()
                    for record in records:  # record: id,url,xpath,detail,delay_time
                        record = [str(item) for item in record]
                        self.initWebsite_delay_dict(record)
                        t = threading.Thread(target=self.putRecord, args=(record,))
                        t.setDaemon(True)
                        t.start()

            except Exception as e:
                log.logMsg(LogType.error, "[QueryWebsiteUrlThread] %s" % (traceback.format_exc()))
            for i in range(60):
                if global_EXIT: break
                time.sleep(1)