예제 #1
0
 def _item_info_update(items):
     column_id = items[0]
     item_id = items[1]
     item_id = str(item_id)
     sq = Sql()
     pr = Proxy()
     if PROXY_CRAWL:
         # Using free proxy pool
         while True:
             proxy_name = pr.get_proxy(0)  # tuple: header, proxy
             name = Crawler.get_name_jd(item_id, proxy_name[0],
                                        proxy_name[1])
             if name:
                 sq.update_item_name(column_id, name)
                 while True:
                     proxy_price = pr.get_proxy(1)  # tuple: header, proxy
                     price = Crawler.get_price_jd(item_id, proxy_price[0],
                                                  proxy_price[1])
                     if price:
                         sq.update_item_price(column_id, price)
                         break
                 break
     else:
         # Using local ip
         name = Crawler.get_name_jd(item_id, pr.get_ua())
         sq.update_item_name(column_id, name)
         price = Crawler.get_price_jd(item_id, pr.get_ua())
         sq.update_item_price(column_id, price)
         return name, price
예제 #2
0
    def _item_info_update(self, item):
        # 1.参数说明:需要传入一个item集合:从里面取出column_id、item_id
        # 2.实例化Sql类、Proxy类
        # 3.

        column_id = item['column_id']
        #取出 item_id
        item_id = str(item['item_id'])
        sq = Sql()
        pr = Proxy()
        if PROXY_CRAWL == 1:
            # Using free proxy pool
            while True:
                # tuple: header, proxy. Header for Js crawler
                proxy_info = pr.get_proxy()
                cr = Crawler(proxy_info[1])
                item_info = cr.get_jd_item(item_id)
                if item_info:
                    sq.update_item_name(column_id, item_info['name'])
                    sq.update_item_price(column_id, item_info['price'])
                    sq.update_item_plus_price(column_id, item_info['plus_price'])
                    sq.update_item_subtitle(column_id, item_info['subtitle'])
                    cr = Crawler(proxy_info[1])
                    huihui_info = cr.get_huihui_item(item_id)
                    if huihui_info:
                        sq.update_item_max_price(column_id, huihui_info[0])
                        sq.update_item_min_price(column_id, huihui_info[1])
                    break
        #PROXY_CRAAWL通过from导入
        elif PROXY_CRAWL == 2:
            # Using zhima proxy
            while True:
                if not self.proxy_info_zhima:
                    self.proxy_info_zhima = pr.get_proxy_zhima()
                logging.info('Zhima proxy: %s', self.proxy_info_zhima[1])
                # tuple: header, proxy. Header for Js crawler
                cr = Crawler(self.proxy_info_zhima[1])
                item_info = cr.get_jd_item(item_id)
                if not item_info:
                    self.proxy_info_zhima = ()
                    logging.warning('Zhima proxy crawl failure, changing proxy...')
                    time.sleep(5)
                    continue
                else:
                    sq.update_item_name(column_id, item_info['name'])
                    sq.update_item_price(column_id, item_info['price'])
                    sq.update_item_plus_price(column_id, item_info['plus_price'])
                    sq.update_item_subtitle(column_id, item_info['subtitle'])
                    cr = Crawler(self.proxy_info_zhima[1])
                    huihui_info = cr.get_huihui_item(item_id)
                    if huihui_info:
                        sq.update_item_max_price(column_id, huihui_info['max_price'])
                        sq.update_item_min_price(column_id, huihui_info['min_price'])
                    break
        else:
            # Using local ip
            cr = Crawler()
            # item_info: {name, price, plus_price, subtitle}
            item_info = cr.get_jd_item(item_id)
            sq.update_item_name(column_id, item_info['name'])
            sq.update_item_price(column_id, item_info['price'])
            sq.update_item_plus_price(column_id, item_info['plus_price'])
            sq.update_item_subtitle(column_id, item_info['subtitle'])

            cr = Crawler()
            # huihui_info = {max_price, min_price}
            # 访问慧慧获取商品的最高价格和平均价格。
            huihui_info = cr.get_huihui_item(item_id)
            if huihui_info:
                sq.update_item_max_price(column_id, huihui_info['max_price'])
                sq.update_item_min_price(column_id, huihui_info['min_price'])

        return item_info
예제 #3
0
 def _item_info_update(self, items):
     column_id = items[0]
     item_id = items[1]
     item_id = str(item_id)
     sq = Sql()
     pr = Proxy()
     if PROXY_CRAWL == 1:
         # Using free proxy pool
         while True:
             proxy_info = pr.get_proxy(0)  # tuple: header, proxy
             name = Crawler.get_name_jd(item_id, proxy_info[0],
                                        proxy_info[1])
             if name:
                 sq.update_item_name(column_id, name)
                 while True:
                     proxy_price = pr.get_proxy(1)  # tuple: header, proxy
                     price = Crawler.get_price_jd(item_id, proxy_price[0],
                                                  proxy_price[1])
                     if price:
                         sq.update_item_price(column_id, price)
                         break
                 break
     elif PROXY_CRAWL == 2:
         # Using zhima proxy
         while True:
             if not self.proxy_info_zhima_name:
                 self.proxy_info_zhima_name = pr.get_proxy_zhima()
             print('Name proxy:', self.proxy_info_zhima_name, items)
             name = Crawler.get_name_jd(item_id,
                                        self.proxy_info_zhima_name[0],
                                        self.proxy_info_zhima_name[1])
             if not name:
                 self.proxy_info_zhima_name = ()
                 time.sleep(20)
                 continue
             else:
                 time.sleep(5)  # Avoid get proxy too fast
                 sq.update_item_name(column_id, name)
                 while True:
                     if not self.proxy_info_zhima_price:
                         self.proxy_info_zhima_price = pr.get_proxy_zhima()
                     print('Price proxy:', self.proxy_info_zhima_price,
                           items)
                     price = Crawler.get_price_jd(
                         item_id, self.proxy_info_zhima_price[0],
                         self.proxy_info_zhima_price[1])
                     if not price:
                         self.proxy_info_zhima_price = ()
                         time.sleep(20)
                         continue
                     else:
                         sq.update_item_price(column_id, price)
                         break
                 break
     else:
         # Using local ip
         name = Crawler.get_name_jd(item_id, pr.get_ua())
         sq.update_item_name(column_id, name)
         price = Crawler.get_price_jd(item_id, pr.get_ua())
         sq.update_item_price(column_id, price)
         return name, price