Ejemplo n.º 1
0
    def grab_product(self, flag, url):
        product = Product()
        # 来源
        product.flag = flag
        # 源链接
        product.url = url
        # 状态正常
        product.status = "1"
        # 英文
        product.language_id = "2"
        # 品牌
        product.brand = "Ted Baker"

        pg = self.do_visit(url)
        # 解析商品信息
        if self.ana_product_info(product, pg):
            # 保存商品信息
            self.save_product(product)
            # 保存描述信息
            self.save_product_desc(product)
            pimg = ProductImages()
            pimg.spid = product.spider_product_id
            # 解析图片信息
            self.ana_product_images(pimg, pg)
            # 保存图片信息
            self.save_product_images(pimg)
            psku = ProductSku()
            # 解析并保存sku信息
            self.ana_and_save_product_sku(psku, product)
        else:
            self.log_info("".join([url, " product not saved!"]))
Ejemplo n.º 2
0
    def grab_product(self, flag, url):
        product = Product()
        # 来源
        product.flag = flag
        # 源链接
        product.url = url
        # 状态正常
        product.status = "1"
        # 英文
        product.language_id = "2"

        pg = self.do_visit(url)
        # 解析商品信息
        if self.ana_product_info(product, pg):
            # 保存商品信息
            self.save_product(product)
            # 保存描述信息
            self.save_product_desc(product)
            # 保存图片信息
            pimg = ProductImages()
            pimg.spid = product.spider_product_id
            if len(product.images) > 0:
                pimg.images = ",".join(product.images)
                self.save_product_images(pimg)
            else:
                # 记录没有图片的商品
                self.log_info("".join([str(pimg.spid), " no images!"]))
            psku = ProductSku()
            # 保存sku信息
            self.ana_and_save_product_sku(psku, product)
        else:
            self.log_info("".join([url, " product not saved!"]))
Ejemplo n.º 3
0
def save_product_images(product, pidao):
    pimg = ProductImages()
    pimg.spid = product.spider_product_id
    images = product.images
    img_arr = []
    for img in images:
        img_url = "".join([img['url'], "?wid=750"])
        img_arr.append(img_url)
    pimg.images = ",".join(img_arr)
    if pidao.is_exists_product_images(pimg.spid):
        # pidao.update_product_images(pimg)
        print "".join([str(pimg.spid), " images is exists!"])
    else:
        pidao.save(pimg)
Ejemplo n.º 4
0
    def grab_product(self, flag, url):
        # 先爬取意大利站的信息
        iturl = url.replace("/cn/", "/it/")

        it_page = self.do_visit(iturl)
        product = Product()
        product.flag = flag
        # 源链接
        product.url = iturl
        # 状态设为在架
        product.status = "1"
        # 先爬取的意大利站信息,记录描述的语言为英语
        product.language_id = "2"
        # 解析商品信息
        if self.ana_product_info(product, it_page):
            # 保存商品信息
            self.save_product(product)
            # 保存描述信息
            self.save_product_desc(product)
            pimg = ProductImages()
            pimg.spid = product.spider_product_id
            # 解析保存图片信息
            self.ana_product_images(pimg, it_page)
            self.save_product_images(pimg)
            # 换成中国站,爬取中文描述
            cnurl = iturl.replace("/it/", "/cn/")
            cn_page = self.do_visit(cnurl)
            # 设置描述语言为中文
            product.language_id = "1"
            # 解析并保存商品描述
            self.ana_product_info(product, cn_page)
            self.save_product_desc(product)
            # 调用商品sku信息接口获取sku信息
            product_url = "".join([
                "https://www.farfetch.cn/it/product/GetDetailState?productId=",
                product.resource_code, "&designerId=0"
            ])
            skus = self.get_json(product_url)
            # 解析并保存sku信息
            self.ana_and_save_skus(product.spider_product_id, skus)
        else:
            # 未解析成功的记录爬取失败
            self.log_info("".join([url, " product not find!"]))
Ejemplo n.º 5
0
    def grab_product(self, flag, url):
        product = Product()
        # 来源
        product.flag = flag
        # 源链接
        product.url = url
        # 状态正常
        product.status = "1"
        # 英语
        product.language_id = "2"

        pg = self.do_visit(url)
        # 解析商品信息
        if self.ana_product_info(product, pg):
            # 保存商品信息
            self.save_product(product)
            # 保存描述信息
            self.save_product_desc(product)
            pimg = ProductImages()
            pimg.spid = product.spider_product_id
            img_arr = []
            if len(product.images) > 0:
                for img in product.images:
                    # -20后缀结束的图片不是需要的图片
                    if "-20." in img:
                        continue
                    img_arr.append(img)
                pimg.images = ",".join(img_arr)
                # 保存图片信息
                self.save_product_images(pimg)
            else:
                # 记录没有图片的商品
                self.log_info("".join([str(pimg.spid), " no images!"]))
            psku = ProductSku()
            # 解析并保存sku信息
            self.ana_and_save_product_sku(psku, product, pg)
        else:
            self.log_info("".join([url, " product not saved!"]))
Ejemplo n.º 6
0
 def grab_product(self, flag, url):
     # 将连接替换成意大利站点(意大利站是欧元价)
     surl = url.replace("/cn/", "/it/")
     pg = self.do_visit(surl)
     product = Product()
     # 来源
     product.flag = flag
     # 源链接
     product.url = surl
     # 此站只有女士
     product.gender = "1"
     # 状态正常
     product.status = "1"
     # 中文
     product.language_id = "1"
     # 解析商品信息
     if self.ana_product_info(product, pg):
         # 保存商品信息
         self.save_product(product)
         # 保存商品描述
         self.save_product_desc(product)
         pimg = ProductImages()
         pimg.spid = product.spider_product_id
         self.ana_product_images(pimg, pg)
         self.save_product_images(pimg)
         psku = ProductSku()
         psku.spid = product.spider_product_id
         self.ana_and_save_product_sku(psku, pg)
         # 语言切换成英文
         en_pg = self.do_visit(surl.replace("/zh/", "/en/"))
         product.language_id = "2"
         # 保存英文版商品信息
         if self.ana_product_info(product, en_pg):
             self.save_product_desc(product)
     else:
         self.log_info("".join([surl, " product not saved!"]))