def _item_info_update(items): column_id = items[0] item_id = items[1] item_id = str(item_id) sq = Sql() pr = Proxy() if PROXY_CRAWL: # Using free proxy pool while True: proxy_name = pr.get_proxy(0) # tuple: header, proxy name = Crawler.get_name_jd(item_id, proxy_name[0], proxy_name[1]) if name: sq.update_item_name(column_id, name) while True: proxy_price = pr.get_proxy(1) # tuple: header, proxy price = Crawler.get_price_jd(item_id, proxy_price[0], proxy_price[1]) if price: sq.update_item_price(column_id, price) break break else: # Using local ip name = Crawler.get_name_jd(item_id, pr.get_ua()) sq.update_item_name(column_id, name) price = Crawler.get_price_jd(item_id, pr.get_ua()) sq.update_item_price(column_id, price) return name, price
def _check_item(): """ 检查本轮需要爬取的商品 :return: [{column_id, item_id}, ...] """ sq = Sql() items = sq.read_all_not_updated_item() logging.warning('This loop ready to crawl: %s', items) return items
def _send_email(): sq = Sql() # items_alert = {column_id, item_id, user_price, item_price, name, email} items_alert = sq.check_item_need_to_remind() logging.warning('This loop sent email: %s', items_alert) for item_alert in items_alert: # item: [email, item_name, item_price, user_price, item_id, column_id] item_url = 'https://item.jd.com/' + str(item_alert['item_id']) + '.html' email_text = ' 您监控的物品:' + item_alert['name'] + ',现在价格为:' + item_alert['item_price'] + \ ',您设定的价格为:' + item_alert['user_price'] + ',赶紧购买吧!' + item_url email_subject = '您监控的物品降价了!' try: send_email = Mail(email_text, '价格监控系统', '亲爱的用户', email_subject, item_alert['email']) send_email.send() time.sleep(Email_TIME) except: logging.critical('Sent email failure, skip in this loop: %s', item_alert['email']) continue sq.update_status(item_alert['column_id']) logging.warning('Sent monitor email SUCCESS: %s', item_alert['email'])
def _send_email(): # Send email in a loop, avoid sending simultaneously. sq = Sql() items = sq.check_item_need_to_remind() logging.warning('This loop sent email: %s', items) for item in items: # email, item_name, item_price, user_price, item_id, column_id item_url = 'https://item.jd.com/' + str(item[4]) + '.html' email_text = '您监控的物品:' + item[1] + ',现在价格为:' + item[2] + \ ',您设定的价格为:' + item[3] + ',赶紧购买吧!' + item_url email_subject = '您监控的物品降价了!' try: send_email = Mail(email_text, 'admin', 'user', email_subject, item[0]) send_email.send() time.sleep(Email_TIME) except: logging.critical('Sent email failure, skip in this loop: %s', item[0]) continue sq.update_status(item[5]) logging.warning('Sent email SUCCESS: %s', item[0])
def _item_info_update(self, item): # 1.参数说明:需要传入一个item集合:从里面取出column_id、item_id # 2.实例化Sql类、Proxy类 # 3. column_id = item['column_id'] #取出 item_id item_id = str(item['item_id']) sq = Sql() pr = Proxy() if PROXY_CRAWL == 1: # Using free proxy pool while True: # tuple: header, proxy. Header for Js crawler proxy_info = pr.get_proxy() cr = Crawler(proxy_info[1]) item_info = cr.get_jd_item(item_id) if item_info: sq.update_item_name(column_id, item_info['name']) sq.update_item_price(column_id, item_info['price']) sq.update_item_plus_price(column_id, item_info['plus_price']) sq.update_item_subtitle(column_id, item_info['subtitle']) cr = Crawler(proxy_info[1]) huihui_info = cr.get_huihui_item(item_id) if huihui_info: sq.update_item_max_price(column_id, huihui_info[0]) sq.update_item_min_price(column_id, huihui_info[1]) break #PROXY_CRAAWL通过from导入 elif PROXY_CRAWL == 2: # Using zhima proxy while True: if not self.proxy_info_zhima: self.proxy_info_zhima = pr.get_proxy_zhima() logging.info('Zhima proxy: %s', self.proxy_info_zhima[1]) # tuple: header, proxy. Header for Js crawler cr = Crawler(self.proxy_info_zhima[1]) item_info = cr.get_jd_item(item_id) if not item_info: self.proxy_info_zhima = () logging.warning('Zhima proxy crawl failure, changing proxy...') time.sleep(5) continue else: sq.update_item_name(column_id, item_info['name']) sq.update_item_price(column_id, item_info['price']) sq.update_item_plus_price(column_id, item_info['plus_price']) sq.update_item_subtitle(column_id, item_info['subtitle']) cr = Crawler(self.proxy_info_zhima[1]) huihui_info = cr.get_huihui_item(item_id) if huihui_info: sq.update_item_max_price(column_id, huihui_info['max_price']) sq.update_item_min_price(column_id, huihui_info['min_price']) break else: # Using local ip cr = Crawler() # item_info: {name, price, plus_price, subtitle} item_info = cr.get_jd_item(item_id) sq.update_item_name(column_id, item_info['name']) sq.update_item_price(column_id, item_info['price']) sq.update_item_plus_price(column_id, item_info['plus_price']) sq.update_item_subtitle(column_id, item_info['subtitle']) cr = Crawler() # huihui_info = {max_price, min_price} # 访问慧慧获取商品的最高价格和平均价格。 huihui_info = cr.get_huihui_item(item_id) if huihui_info: sq.update_item_max_price(column_id, huihui_info['max_price']) sq.update_item_min_price(column_id, huihui_info['min_price']) return item_info
def _check_item(): sq = Sql() updated_time = UPDATE_TIME items = sq.read_all_not_updated_item(updated_time) logging.warning('This loop: %s', items) return items
def _item_info_update(self, items): column_id = items[0] item_id = items[1] item_id = str(item_id) sq = Sql() pr = Proxy() if PROXY_CRAWL == 1: # Using free proxy pool while True: proxy_info = pr.get_proxy(0) # tuple: header, proxy name = Crawler.get_name_jd(item_id, proxy_info[0], proxy_info[1]) if name: sq.update_item_name(column_id, name) while True: proxy_price = pr.get_proxy(1) # tuple: header, proxy price = Crawler.get_price_jd(item_id, proxy_price[0], proxy_price[1]) if price: sq.update_item_price(column_id, price) break break elif PROXY_CRAWL == 2: # Using zhima proxy while True: if not self.proxy_info_zhima_name: self.proxy_info_zhima_name = pr.get_proxy_zhima() print('Name proxy:', self.proxy_info_zhima_name, items) name = Crawler.get_name_jd(item_id, self.proxy_info_zhima_name[0], self.proxy_info_zhima_name[1]) if not name: self.proxy_info_zhima_name = () time.sleep(20) continue else: time.sleep(5) # Avoid get proxy too fast sq.update_item_name(column_id, name) while True: if not self.proxy_info_zhima_price: self.proxy_info_zhima_price = pr.get_proxy_zhima() print('Price proxy:', self.proxy_info_zhima_price, items) price = Crawler.get_price_jd( item_id, self.proxy_info_zhima_price[0], self.proxy_info_zhima_price[1]) if not price: self.proxy_info_zhima_price = () time.sleep(20) continue else: sq.update_item_price(column_id, price) break break else: # Using local ip name = Crawler.get_name_jd(item_id, pr.get_ua()) sq.update_item_name(column_id, name) price = Crawler.get_price_jd(item_id, pr.get_ua()) sq.update_item_price(column_id, price) return name, price