def _item_info_update(items): column_id = items[0] item_id = items[1] item_id = str(item_id) sq = Sql() pr = Proxy() if PROXY_CRAWL: # Using free proxy pool while True: proxy_name = pr.get_proxy(0) # tuple: header, proxy name = Crawler.get_name_jd(item_id, proxy_name[0], proxy_name[1]) if name: sq.update_item_name(column_id, name) while True: proxy_price = pr.get_proxy(1) # tuple: header, proxy price = Crawler.get_price_jd(item_id, proxy_price[0], proxy_price[1]) if price: sq.update_item_price(column_id, price) break break else: # Using local ip name = Crawler.get_name_jd(item_id, pr.get_ua()) sq.update_item_name(column_id, name) price = Crawler.get_price_jd(item_id, pr.get_ua()) sq.update_item_price(column_id, price) return name, price
def _item_info_update(self, item): # 1.参数说明:需要传入一个item集合:从里面取出column_id、item_id # 2.实例化Sql类、Proxy类 # 3. column_id = item['column_id'] #取出 item_id item_id = str(item['item_id']) sq = Sql() pr = Proxy() if PROXY_CRAWL == 1: # Using free proxy pool while True: # tuple: header, proxy. Header for Js crawler proxy_info = pr.get_proxy() cr = Crawler(proxy_info[1]) item_info = cr.get_jd_item(item_id) if item_info: sq.update_item_name(column_id, item_info['name']) sq.update_item_price(column_id, item_info['price']) sq.update_item_plus_price(column_id, item_info['plus_price']) sq.update_item_subtitle(column_id, item_info['subtitle']) cr = Crawler(proxy_info[1]) huihui_info = cr.get_huihui_item(item_id) if huihui_info: sq.update_item_max_price(column_id, huihui_info[0]) sq.update_item_min_price(column_id, huihui_info[1]) break #PROXY_CRAAWL通过from导入 elif PROXY_CRAWL == 2: # Using zhima proxy while True: if not self.proxy_info_zhima: self.proxy_info_zhima = pr.get_proxy_zhima() logging.info('Zhima proxy: %s', self.proxy_info_zhima[1]) # tuple: header, proxy. Header for Js crawler cr = Crawler(self.proxy_info_zhima[1]) item_info = cr.get_jd_item(item_id) if not item_info: self.proxy_info_zhima = () logging.warning('Zhima proxy crawl failure, changing proxy...') time.sleep(5) continue else: sq.update_item_name(column_id, item_info['name']) sq.update_item_price(column_id, item_info['price']) sq.update_item_plus_price(column_id, item_info['plus_price']) sq.update_item_subtitle(column_id, item_info['subtitle']) cr = Crawler(self.proxy_info_zhima[1]) huihui_info = cr.get_huihui_item(item_id) if huihui_info: sq.update_item_max_price(column_id, huihui_info['max_price']) sq.update_item_min_price(column_id, huihui_info['min_price']) break else: # Using local ip cr = Crawler() # item_info: {name, price, plus_price, subtitle} item_info = cr.get_jd_item(item_id) sq.update_item_name(column_id, item_info['name']) sq.update_item_price(column_id, item_info['price']) sq.update_item_plus_price(column_id, item_info['plus_price']) sq.update_item_subtitle(column_id, item_info['subtitle']) cr = Crawler() # huihui_info = {max_price, min_price} # 访问慧慧获取商品的最高价格和平均价格。 huihui_info = cr.get_huihui_item(item_id) if huihui_info: sq.update_item_max_price(column_id, huihui_info['max_price']) sq.update_item_min_price(column_id, huihui_info['min_price']) return item_info
def _item_info_update(self, items): column_id = items[0] item_id = items[1] item_id = str(item_id) sq = Sql() pr = Proxy() if PROXY_CRAWL == 1: # Using free proxy pool while True: proxy_info = pr.get_proxy(0) # tuple: header, proxy name = Crawler.get_name_jd(item_id, proxy_info[0], proxy_info[1]) if name: sq.update_item_name(column_id, name) while True: proxy_price = pr.get_proxy(1) # tuple: header, proxy price = Crawler.get_price_jd(item_id, proxy_price[0], proxy_price[1]) if price: sq.update_item_price(column_id, price) break break elif PROXY_CRAWL == 2: # Using zhima proxy while True: if not self.proxy_info_zhima_name: self.proxy_info_zhima_name = pr.get_proxy_zhima() print('Name proxy:', self.proxy_info_zhima_name, items) name = Crawler.get_name_jd(item_id, self.proxy_info_zhima_name[0], self.proxy_info_zhima_name[1]) if not name: self.proxy_info_zhima_name = () time.sleep(20) continue else: time.sleep(5) # Avoid get proxy too fast sq.update_item_name(column_id, name) while True: if not self.proxy_info_zhima_price: self.proxy_info_zhima_price = pr.get_proxy_zhima() print('Price proxy:', self.proxy_info_zhima_price, items) price = Crawler.get_price_jd( item_id, self.proxy_info_zhima_price[0], self.proxy_info_zhima_price[1]) if not price: self.proxy_info_zhima_price = () time.sleep(20) continue else: sq.update_item_price(column_id, price) break break else: # Using local ip name = Crawler.get_name_jd(item_id, pr.get_ua()) sq.update_item_name(column_id, name) price = Crawler.get_price_jd(item_id, pr.get_ua()) sq.update_item_price(column_id, price) return name, price