def get_baiduzd_faq(question): answers = [] # 返回值 question_list_html = None # 返回百度知道搜索结果的第一页 html 文本 while question_list_html is None: question_list_html = get_baiduzd_page(question) # 返回百度知道搜索结果的第一页 html 文本 if(question_list_html is None): print("爬取百度知道答案列表页面失败!") return None questions = get_sim_questions(question_list_html) # questions 为 问题句子-链接 字典 threadPool = [] # 线程队列 for question in questions: # 遍历每个问题链接,获取该问题的前 5 个答案 thread = MyThread(get_a_question_ans,args=(question['link'])) # 多线程加速 thread.start() threadPool.append(thread) for thread in threadPool: thread.join() for thread in threadPool: res = thread.get_result() if res is not None: answers += res return answers
def get_apps_info(self, url, classification, type): main_html = self.get_html(url) if main_html == None: print(url + "爬取失败!") return # 获取当前分类下的总页数 pageList_div = main_html.find("div", {"class": "pagelist", "id": "pagelist"}) pageList = pageList_div.find_all("a") #当前软件分类下的总页数 all_pages = int(pageList[len(pageList) - 2].get_text()) # 全局变量 global SHARE_Q threads = [] # 向队列中放入任务, 真正使用时, 应该设置为可持续的放入任务 for i in range(0, all_pages): current_url = url + "/" + str(i + 1) SHARE_Q.put(current_url) # 开启_WORKER_THREAD_NUM个线程 for i in range(_WORKER_THREAD_NUM): thread = MyThread(self.worker(classification, type)) # 线程开始处理任务 thread.start() threads.append(thread) for thread in threads: thread.join() # 等待所有任务完成 SHARE_Q.join()
def test_ip(self, ips): #检查代理ip可用性 print("检查代理ip可用性...共有" + str(len(ips)) + "个ip待检查") global SHARE_Q threads = [] # 向队列中放入任务, 真正使用时, 应该设置为可持续的放入任务 for task in range(0, len(ips)): SHARE_Q.put(ips[task]) # 开启_WORKER_THREAD_NUM个线程 for i in range(_WORKER_THREAD_NUM): thread = MyThread(self.worker) thread.start() # 线程开始处理任务 threads.append(thread) for thread in threads: thread.join() # 等待所有任务完成 SHARE_Q.join() print("可用代理ip:" + str(self.valid_ips))
def main(): set_hook() thread = MyThread() thread.start() time.sleep(5) thread.join()
# ch3/example1.py import sys sys.path.append( "'C:\\Users\\tnguy\\PycharmProjects\\MasteringConcurrency\\Chapter03\\my_thread.py" ) from my_thread import MyThread thread1 = MyThread('A', 0.5) thread2 = MyThread('B', 0.5) thread1.start() thread2.start() thread1.join() thread2.join() print('Finished.')