def grab_product(self, flag, url): product = Product() # 来源 product.flag = flag # 源链接 product.url = url # 状态正常 product.status = "1" # 英文 product.language_id = "2" # 品牌 product.brand = "Ted Baker" pg = self.do_visit(url) # 解析商品信息 if self.ana_product_info(product, pg): # 保存商品信息 self.save_product(product) # 保存描述信息 self.save_product_desc(product) pimg = ProductImages() pimg.spid = product.spider_product_id # 解析图片信息 self.ana_product_images(pimg, pg) # 保存图片信息 self.save_product_images(pimg) psku = ProductSku() # 解析并保存sku信息 self.ana_and_save_product_sku(psku, product) else: self.log_info("".join([url, " product not saved!"]))
def grab_product(self, flag, url): product = Product() # 来源 product.flag = flag # 源链接 product.url = url # 状态正常 product.status = "1" # 英文 product.language_id = "2" pg = self.do_visit(url) # 解析商品信息 if self.ana_product_info(product, pg): # 保存商品信息 self.save_product(product) # 保存描述信息 self.save_product_desc(product) # 保存图片信息 pimg = ProductImages() pimg.spid = product.spider_product_id if len(product.images) > 0: pimg.images = ",".join(product.images) self.save_product_images(pimg) else: # 记录没有图片的商品 self.log_info("".join([str(pimg.spid), " no images!"])) psku = ProductSku() # 保存sku信息 self.ana_and_save_product_sku(psku, product) else: self.log_info("".join([url, " product not saved!"]))
def save_product_images(product, pidao): pimg = ProductImages() pimg.spid = product.spider_product_id images = product.images img_arr = [] for img in images: img_url = "".join([img['url'], "?wid=750"]) img_arr.append(img_url) pimg.images = ",".join(img_arr) if pidao.is_exists_product_images(pimg.spid): # pidao.update_product_images(pimg) print "".join([str(pimg.spid), " images is exists!"]) else: pidao.save(pimg)
def grab_product(self, flag, url): # 先爬取意大利站的信息 iturl = url.replace("/cn/", "/it/") it_page = self.do_visit(iturl) product = Product() product.flag = flag # 源链接 product.url = iturl # 状态设为在架 product.status = "1" # 先爬取的意大利站信息,记录描述的语言为英语 product.language_id = "2" # 解析商品信息 if self.ana_product_info(product, it_page): # 保存商品信息 self.save_product(product) # 保存描述信息 self.save_product_desc(product) pimg = ProductImages() pimg.spid = product.spider_product_id # 解析保存图片信息 self.ana_product_images(pimg, it_page) self.save_product_images(pimg) # 换成中国站,爬取中文描述 cnurl = iturl.replace("/it/", "/cn/") cn_page = self.do_visit(cnurl) # 设置描述语言为中文 product.language_id = "1" # 解析并保存商品描述 self.ana_product_info(product, cn_page) self.save_product_desc(product) # 调用商品sku信息接口获取sku信息 product_url = "".join([ "https://www.farfetch.cn/it/product/GetDetailState?productId=", product.resource_code, "&designerId=0" ]) skus = self.get_json(product_url) # 解析并保存sku信息 self.ana_and_save_skus(product.spider_product_id, skus) else: # 未解析成功的记录爬取失败 self.log_info("".join([url, " product not find!"]))
def grab_product(self, flag, url): product = Product() # 来源 product.flag = flag # 源链接 product.url = url # 状态正常 product.status = "1" # 英语 product.language_id = "2" pg = self.do_visit(url) # 解析商品信息 if self.ana_product_info(product, pg): # 保存商品信息 self.save_product(product) # 保存描述信息 self.save_product_desc(product) pimg = ProductImages() pimg.spid = product.spider_product_id img_arr = [] if len(product.images) > 0: for img in product.images: # -20后缀结束的图片不是需要的图片 if "-20." in img: continue img_arr.append(img) pimg.images = ",".join(img_arr) # 保存图片信息 self.save_product_images(pimg) else: # 记录没有图片的商品 self.log_info("".join([str(pimg.spid), " no images!"])) psku = ProductSku() # 解析并保存sku信息 self.ana_and_save_product_sku(psku, product, pg) else: self.log_info("".join([url, " product not saved!"]))
def grab_product(self, flag, url): # 将连接替换成意大利站点(意大利站是欧元价) surl = url.replace("/cn/", "/it/") pg = self.do_visit(surl) product = Product() # 来源 product.flag = flag # 源链接 product.url = surl # 此站只有女士 product.gender = "1" # 状态正常 product.status = "1" # 中文 product.language_id = "1" # 解析商品信息 if self.ana_product_info(product, pg): # 保存商品信息 self.save_product(product) # 保存商品描述 self.save_product_desc(product) pimg = ProductImages() pimg.spid = product.spider_product_id self.ana_product_images(pimg, pg) self.save_product_images(pimg) psku = ProductSku() psku.spid = product.spider_product_id self.ana_and_save_product_sku(psku, pg) # 语言切换成英文 en_pg = self.do_visit(surl.replace("/zh/", "/en/")) product.language_id = "2" # 保存英文版商品信息 if self.ana_product_info(product, en_pg): self.save_product_desc(product) else: self.log_info("".join([surl, " product not saved!"]))