def parse(self, response): result = json.loads(json.loads(response.text)) # print(result) # print(result["status"]) if result["status"]["pageIndex"] == "1": pagenum = int(math.ceil(int(result["status"]["pagenum"])/30)) if pagenum > 100: pagenum = 100 for i in range(2, pagenum + 1): url = "http://res.ggang.cn/SteelList/GetProductList/?productType=全部&productName=全部&material=%s&standard=全部&productId=0&mills=全部&home=&maxPrice=&maxThick=&maxlength=&maxwidth=&minPrice=&minThick=&minlength=&minwidth=&pageIndex=%d&productCity=&key=&IsPriceSort=0&pageSize=30&pageType=1" % (response.meta["material"], i) meta = { "pageIndex": i, "material": response.meta["material"], } yield scrapy.Request(method="get", url=url, meta=meta, headers=self.headers, callback=self.parse) for product in result["msg"]: item = GangItem() item['url'] = response.url item['status'] = product["ID"] item['grabtime'] = time.strftime('%Y-%m-%d %X', time.localtime()) item['name'] = product["name"] item['material'] = product["material"] item['weight'] = product["piece"] item['type'] = product["norms"] item['factory'] = product["plant"] item['price'] = product["price"] item['storage'] = product["wareHouse"] item['city'] = product["place"] item['company'] = product["source"] # item['posttime'] = response.xpath("ul/li/div[1]/text()").extract_first() # print(item) yield item
def parse(self, response): result = json.loads(response.text) if result["has_more"] == 1: url = "https://www.ouyeel.com/jk-mobile/search/main-search/" data = { "page": str(int(response.meta["page"]) + 1), "keywords": response.meta["keywords"], } yield scrapy.FormRequest(method="post", url=url, meta=data, formdata=data, headers=self.headers, dont_filter=True) for product in result["data"]: item = GangItem() item['url'] = response.url item['status'] = str(product["id"]) item['grabtime'] = time.strftime('%Y-%m-%d %X', time.localtime()) item['name'] = product["product_name"] item['material'] = product["shop_sign"] item['weight'] = product["weight"] item['type'] = product["spec"] item['factory'] = product["manufacturer"] item['price'] = product["price"] item['storage'] = product["warehouse_name"] item['city'] = product["store_city_name"] item['company'] = product["provider_name"] item['posttime'] = product["active_date"] # print(item) yield item
def parse(self, response): # print(response.text) result = json.loads(response.text) if response.meta["page"] == "1": pagenum = result["data"]["totalPages"] for i in range(2, pagenum + 1): url = "http://www.opsteel.cn/search/getResourcesList/" meta = { "page": str(i), "shopSign": response.meta["shopSign"], "pageSize": "20", } yield scrapy.FormRequest(method="post", url=url, meta=meta, formdata=meta, headers=self.headers, callback=self.parse, dont_filter=True) for product in result["data"]["items"]: item = GangItem() item['url'] = response.url item['status'] = product["id"] item['grabtime'] = time.strftime('%Y-%m-%d %X', time.localtime()) item['name'] = product["product_name"] item['material'] = product["shop_sign"] item['weight'] = product["weight"] item['type'] = product["spec"] item['factory'] = product["producing_name"] item['price'] = product["price"] item['storage'] = product["storage_place_name"] # item['city'] = product["place"] item['company'] = product["provider_name"] item['posttime'] = product["upload_time"] item['quantity'] = product["pieces"] # print(item) yield item
def parse(self, response): next = response.xpath("//*[@class='page-nex page-control']") if next: url = "http://www.csesteel.com/online/resource/hall/goodList?ajaxCmd=goodContent&securityToken=" data = { "current": str(int(response.meta["current"]) + 1), "ph": response.meta["ph"], } yield scrapy.FormRequest(method="post", url=url, meta=data, formdata=data, headers=self.headers, dont_filter=True) products = response.xpath( "//*[@class='main-content']/div[@class='list']") for product in products: item = GangItem() item['url'] = response.url item['status'] = product.xpath("ul/li/div[1]/a/@href").re("\d+")[0] item['grabtime'] = time.strftime('%Y-%m-%d %X', time.localtime()) item['name'] = product.xpath( "ul/li/div[1]/a/text()").extract_first() item['material'] = product.xpath( "ul/li/div[2]/text()").extract_first() item['weight'] = product.xpath( "ul/li/div[8]/text()").extract_first() item['type'] = product.xpath("ul/li/div[3]/text()").extract_first() item['factory'] = product.xpath( "ul/li/div[4]/text()").extract_first() item['price'] = product.xpath( "ul/li/div[9]/span/text()").extract_first().strip().replace( "¥", "") item['storage'] = product.xpath( "ul/li/div[5]/div/div[2]/text()").extract_first() item['city'] = product.xpath( "ul/li/div[5]/span/text()").extract_first() # item['company'] = response.xpath("ul/li/div[1]/text()").extract_first() # item['posttime'] = response.xpath("ul/li/div[1]/text()").extract_first() # print(item) yield item
def parse_product(self, response): result = json.loads(response.text) for product in result["recordList"]: item = GangItem() item['url'] = response.url item['status'] = product["summaryCode"] item['grabtime'] = time.strftime('%Y-%m-%d %X', time.localtime()) item['name'] = product["categoryName"] item['material'] = product["materialName"] item['weight'] = product["qty"] item['quantity'] = product["num"] item['type'] = product["specName"] item['factory'] = product["factoryName"] item['price'] = product["price"] item['storage'] = product["warehouseName"] item['city'] = product["areaName"] # print(item) yield item
def parse_product(self, response): result = json.loads(response.text) for product in result["PaperList"]: item = GangItem() item['url'] = response.url item['status'] = str(product["Id"]) item['grabtime'] = time.strftime('%Y-%m-%d %X', time.localtime()) item['name'] = product["Pm"] item['material'] = product["Cz"] item['weight'] = product["Ton"] item['type'] = product["Gg"] item['factory'] = product["ProductArea"] item['price'] = product["Price"] item['storage'] = product["StockArea"] item['city'] = product["DeliveryArea"] item['company'] = response.meta["company"] item['posttime'] = response.meta["posttime"] # print(item) yield item
def parse_products(self, response): # print(response.request.meta) # print(response.text) result = json.loads(response.text) for product in result["data"]["list"]: item = GangItem() item['url'] = response.url item['status'] = product["id"] item['grabtime'] = time.strftime('%Y-%m-%d %X', time.localtime()) item['name'] = product["categoryName"] item['material'] = product["material"] item['weight'] = product["pieceWeight"] item['type'] = product["specification"] item['factory'] = product["factory"] item['price'] = product["mallPrice"] item['storage'] = product["warehouse"] item['city'] = product["city"] item['company'] = product["orgTitleName"] item['posttime'] = response.request.meta["posttime"] # print(item) yield item
def parse(self, response): result = json.loads(response.text) for product in result["data"]["list"]: productids = product["productIds"] posttime = product["lastUpdateTime"] data_dict = { "dataVersion": 9, "pageIndex": 1, "sonDataIndex": result["data"]["list"].index(product), "spotIds": productids, "posttime": posttime } if not product["companyName"]: item = GangItem() item['url'] = response.url item['status'] = product["id"] item['grabtime'] = time.strftime('%Y-%m-%d %X', time.localtime()) item['name'] = product["categoryName"] item['material'] = product["materialName"] item['weight'] = product["pieceWeight"] item['type'] = product["specificationName"] item['factory'] = product["factoryName"] item['price'] = product["mallPrice"] item['storage'] = product["warehouseName"] item['city'] = product["city"] item['company'] = "找钢网" item['posttime'] = posttime # print(item) yield item else: yield scrapy.Request( url= "http://mall.zhaogang.com/api/goods/findThirdGoodsItems", meta=data_dict, callback=self.parse_products, dont_filter=True)