def run(self): loop = asyncio.new_event_loop() while True: if self.q_material.empty(): self.event.set() break material_id = self.q_material.get() print("开始抓取模块%s" % material_id) self.q_material.task_done() page = 1 while True: _ = tools.get_random_num(12) json = loop.run_until_complete( tools.get_goods_by_material_id(self.activity_id, _, material_id, page)) if json is None: print("抓取模块%s完毕,总页数:%s" % (material_id, page)) break if json.get('data') is None or len(json.get('data')) <= 0: print("抓取模块%s完毕,总页数:%s" % (material_id, page)) break if json.get('data').get('list') is None or len( json.get('data').get('list')) <= 0: print("抓取模块%s完毕,总页数:%s" % (material_id, page)) break items = json.get('data').get('list') for item in items: goods_id = item.get('product_id') if self.global_goods_ids.__contains__(goods_id): continue self.global_goods_ids.append(goods_id) one = loop.run_until_complete( tools.get_goods_by_id(goods_id)) if one: item = one.get('data') else: continue # 判断栈是否已经满 if self.queue.full(): print("队列已满,总数%s" % self.queue.qsize()) # 栈满 线程进入等待 self.event.wait() # 线程唤醒后将flag设置为False if self.event.isSet(): self.event.clear() else: # 判断栈是否为空,为空则在向栈添加数据后,则将Flag设置为True, # 唤醒前所有在等待的消费者线程 if self.queue.empty(): # 未满 向栈添加数据 self.queue.put(item) # print("生产数据:%s" + str(item)) # 将Flag设置为True self.event.set() else: # 未满 向栈添加数据 self.queue.put(item) # print("生产数据:%s" + str(item)) page += 1 print(self.name + "结束")
def run(self): loop = asyncio.new_event_loop() page = len(self.global_page) self.global_page.append(page) while True: json = loop.run_until_complete(tools.get_recommend_goods(page)) if json is None: print("抓取精选完毕,总页数:%s" % (page)) break if json.get('data') is None or len(json.get('data')) <= 0: print("抓取精选完毕,总页数:%s" % (page)) break if json.get('data').get('list') is None or len(json.get('data').get('list')) <= 0: print("抓取精选完毕,总页数:%s" % (page)) break items = json.get('data').get('list') for item in items: goods_id = item.get('product_id') if not goods_id: continue sell_num = item.get('sell_num') if sell_num < 1: continue if self.global_goods_ids.__contains__(goods_id): print("重复") continue self.global_goods_ids.append(goods_id) one = loop.run_until_complete(tools.get_goods_by_id(goods_id)) if one: item = one.get('data') else: continue # 判断栈是否已经满 if self.queue.full(): print("队列已满,总数%s" % self.queue.qsize()) # 栈满 线程进入等待 self.event.wait() # 线程唤醒后将flag设置为False if self.event.isSet(): self.event.clear() else: # 判断栈是否为空,为空则在向栈添加数据后,则将Flag设置为True, # 唤醒前所有在等待的消费者线程 if self.queue.empty(): # 未满 向栈添加数据 self.queue.put(item) # print("生产数据:%s" + str(item)) # 将Flag设置为True self.event.set() else: # 未满 向栈添加数据 self.queue.put(item) self.event.set() # print("生产数据:%s" + str(item)) page = len(self.global_page) self.global_page.append(page) print(self.name + "结束")
def run(self): loop = asyncio.new_event_loop() while True: if self.q_goods.empty(): self.event.set() break goods_id = self.q_goods.get() print(u"开始抓取商品%s" % goods_id) self.q_goods.task_done() item = loop.run_until_complete(tools.get_goods_by_id(goods_id)) if not item or not item.get('data'): continue if not item.get('data').get('name') or item.get('data').get( 'name') == '': print("下架: %s" % goods_id) continue if self.global_goods_ids.__contains__(goods_id): continue self.global_goods_ids.append(goods_id) # 判断栈是否已经满 if self.queue.full(): print("队列已满,总数%s" % self.queue.qsize()) # 栈满 线程进入等待 self.event.wait() # 线程唤醒后将flag设置为False if self.event.isSet(): self.event.clear() else: # 判断栈是否为空,为空则在向栈添加数据后,则将Flag设置为True, # 唤醒前所有在等待的消费者线程 if self.queue.empty(): # 未满 向栈添加数据 self.queue.put(item.get('data')) # print("生产数据:%s" + str(item)) # 将Flag设置为True self.event.set() else: # 未满 向栈添加数据 self.queue.put(item.get('data')) # print("生产数据:%s" + str(item)) self.event.set() print(self.name + "结束")
def run(self): loop = asyncio.new_event_loop() while True: if self.q_shops.empty(): self.event.set() break shop_id = self.q_shops.get().shop_id print("开始抓取店铺%s" % shop_id) self.q_shops.task_done() page = 0 shop_key = [] while True: json = loop.run_until_complete( tools.get_first_goods_by_shop(shop_id, page)) if json is None: print("抓取店铺%s完毕,总页数:%s" % (shop_id, page)) break if json.get('data') is None or len(json.get('data')) <= 0: print("抓取店铺%s完毕,总页数:%s" % (shop_id, page)) break if json.get('data').get('list') is None or len( json.get('data').get('list')) <= 0: print("抓取店铺%s完毕,总页数:%s" % (shop_id, page)) break items = json.get('data').get('list') for item in items: goods_id = item.get('product_id') if self.global_goods_ids.__contains__(goods_id): continue self.global_goods_ids.append(goods_id) one = loop.run_until_complete( tools.get_goods_by_id(goods_id)) if one: item = one.get('data') shop_id = item.get('shop_id') shop_tel = item.get('shop_tel') key = '%s %s' % (shop_id, shop_tel) if shop_key.__contains__(key): continue shop_key.append(key) else: continue # 判断栈是否已经满 if self.queue.full(): print("队列已满,总数%s" % self.queue.qsize()) # 栈满 线程进入等待 self.event.wait() # 线程唤醒后将flag设置为False if self.event.isSet(): self.event.clear() else: # 判断栈是否为空,为空则在向栈添加数据后,则将Flag设置为True, # 唤醒前所有在等待的消费者线程 if self.queue.empty(): # 未满 向栈添加数据 self.queue.put(item) # print("生产数据:%s" + str(item)) # 将Flag设置为True self.event.set() else: # 未满 向栈添加数据 self.queue.put(item) self.event.set() # print("生产数据:%s" + str(item)) page += 1 page = 0 while True: json = loop.run_until_complete( tools.get_goods_by_shop(shop_id, page)) # print("%s第%s页" % (shop_id, page)) # time.sleep(3) if json is None: print("抓取店铺%s完毕,总页数:%s" % (shop_id, page)) break if json.get('data') is None or len( json.get('data').get('list')) <= 0: print("抓取店铺%s完毕,总页数:%s" % (shop_id, page)) break if json.get('data').get('list') is None or len( json.get('data').get('list')) <= 0: print("抓取店铺%s完毕,总页数:%s" % (shop_id, page)) break items = json.get('data').get('list') for item in items: sell_num = item.get('sell_num') goods_id = item.get('product_id') one = loop.run_until_complete( tools.get_goods_by_id(goods_id)) if one: item = one.get('data') shop_id = item.get('shop_id') shop_tel = item.get('shop_tel') key = '%s %s' % (shop_id, shop_tel) if shop_key.__contains__(key): continue shop_key.append(key) else: print(item) continue if self.global_goods_ids.__contains__(goods_id): continue self.global_goods_ids.append(goods_id) # 判断栈是否已经满 if self.queue.full(): print("队列已满,总数%s" % self.queue.qsize()) # 栈满 线程进入等待 self.event.wait() # 线程唤醒后将flag设置为False if self.event.isSet(): self.event.clear() else: # 判断栈是否为空,为空则在向栈添加数据后,则将Flag设置为True, # 唤醒前所有在等待的消费者线程 if self.queue.empty(): # 未满 向栈添加数据 self.queue.put(item) # print("生产数据:%s" + str(item)) # 将Flag设置为True self.event.set() else: # 未满 向栈添加数据 self.queue.put(item) # self.event.set() # print("生产数据:%s" + str(item)) page += 1 print(self.name + "结束")
def run(self): loop = asyncio.new_event_loop() while True: if self.q_category.empty(): self.event.set() break categoryDto = self.q_category.get() print("开始抓取分类%s" % categoryDto["id"]) self.q_category.task_done() page = 0 while True: json = loop.run_until_complete( tools.get_goods_by_category(categoryDto["cids"], categoryDto["id"], categoryDto["parent_id"], page)) # print("%s第%s页" % (categoryDto["id"], page)) # time.sleep(3) if json is None: print("抓取分类%s完毕,总页数:%s" % (categoryDto["id"], page)) break if json.get('data') is None or len(json.get('data')) <= 0: print("抓取分类%s完毕,总页数:%s" % (categoryDto["id"], page)) break if json.get('data').get('list') is None or len( json.get('data').get('list')) <= 0: print("抓取分类%s完毕,总页数:%s" % (categoryDto["id"], page)) break items = json.get('data').get('list') for item in items: sell_num = item.get('sell_num') goods_id = item.get('product_id') if sell_num < 1: continue if self.global_goods_ids.__contains__(goods_id): continue self.global_goods_ids.append(goods_id) one = loop.run_until_complete( tools.get_goods_by_id(goods_id)) if one: item = one.get('data') else: continue # 判断栈是否已经满 if self.queue.full(): print("队列已满,总数%s" % self.queue.qsize()) # 栈满 线程进入等待 self.event.wait() # 线程唤醒后将flag设置为False if self.event.isSet(): self.event.clear() else: # 判断栈是否为空,为空则在向栈添加数据后,则将Flag设置为True, # 唤醒前所有在等待的消费者线程 if self.queue.empty(): # 未满 向栈添加数据 self.queue.put(item) # print("生产数据:%s" + str(item)) # 将Flag设置为True self.event.set() else: # 未满 向栈添加数据 self.queue.put(item) # print("生产数据:%s" + str(item)) page += 1 print(self.name + "结束")
def run(self): loop = asyncio.new_event_loop() while True: if self.q_ids.empty(): self.event.set() break id = self.q_ids.get() print("开始抓取ID%s" % id) self.q_ids.task_done() page = 0 has_over = False while True: json = loop.run_until_complete( tools.get_goods_by_detail_id(self.mertial_id, id, page)) # time.sleep(3) if json is None and not has_over: has_over = True page += 1 continue if (json.get('data') is None or len(json.get('data')) <= 0) and not has_over: has_over = True page += 1 continue if json is None and has_over: print("抓取ID%s完毕,总页数:%s" % (id, page)) break if (json.get('data') is None or len(json.get('data')) <= 0) and has_over: print("抓取ID%s完毕,总页数:%s" % (id, page)) break items = json.get('data') for item in items: sell_num = item.get('sell_num') goods_id = item.get('product_id') if sell_num < 1: continue if self.global_goods_ids.__contains__(goods_id): continue self.global_goods_ids.append(goods_id) one = loop.run_until_complete( tools.get_goods_by_id(goods_id)) if one: item = one.get('data') else: continue # 判断栈是否已经满 if self.queue.full(): print("队列已满,总数%s" % self.queue.qsize()) # 栈满 线程进入等待 self.event.wait() # 线程唤醒后将flag设置为False if self.event.isSet(): self.event.clear() else: # 判断栈是否为空,为空则在向栈添加数据后,则将Flag设置为True, # 唤醒前所有在等待的消费者线程 if self.queue.empty(): # 未满 向栈添加数据 self.queue.put(item) # print("生产数据:%s" + str(item)) # 将Flag设置为True self.event.set() else: # 未满 向栈添加数据 self.queue.put(item) # print("生产数据:%s" + str(item)) page += 1 print(self.name + "结束")