def parse(self, response): products = response.xpath( '//div[@id="mainsrp-itemlist"]//div[@class="items"][1]//div[contains(@class,"item"]' ) for product in products: item = ProductItem() item["price"] = ''.join( product.xpath('.//div[contains(@class,"price")]//text()'). extract()).strip() item["title"] = ''.join( product.xpath('.//div[contains(@class,"title")]//text()'). extract()).strip() item["shop"] = ''.join( product.xpath('.//div[contains(@class,"shop")]//text()'). extract()).strip() item["image"] = ''.join( product.xpath( './/div[@class="pic")]//img[contains(@class,"img")]/@data-src' ).extract()).strip() item["deal"] = ''.join( product.xpath('.//div[contains(@class,"deal-cnt")]//text()'). extract()).strip() item["location"] = ''.join( product.xpath('.//div[contains(@class,"location")]//text()'). extract()).strip() yield item
def parse(self, response): products = response.xpath( '//div[@id="mainsrp-itemlist"]//div[@class="items"][1]//div[contains(@class, "item")]' ) for product in products: item = ProductItem() item['price'] = ''.join( product.xpath('.//div[contains(@class, "price")]//text()'). extract()).strip() item['title'] = ''.join( product.xpath('.//div[contains(@class, "title")]//text()'). extract()).strip() item['shop'] = ''.join( product.xpath('.//div[contains(@class, "shop")]//text()'). extract()).strip() item['image'] = ''.join( product.xpath( './/div[@class="pic"]//img[contains(@class, "img")]/@data-src' ).extract()).strip() item['deal'] = product.xpath( './/div[contains(@class, "deal-cnt")]//text()').extract_first( ) item['location'] = product.xpath( './/div[contains(@class, "location")]//text()').extract_first( ) yield item print(dict(item)) print('ok' * 30)
def parse(self, response): products = response.xpath( '//*[@id="mainsrp-itemlist"]/div/div/div[1]/div') for product in products: item = ProductItem() item['image'] = response.urljoin( product.xpath('//div[@class="pic"]//a//img/@data-src'). extract_first().strip()) item['price'] = product.xpath( '//div[contains(@class, "price"]/strong/text()').extract_first( ).strip() item['deal'] = product.xpath( '//div[@class="deal-cnt"]/text()').extract_first().strip()[:-3] item['title'] = ''.join( product.xpath('//*[contails(@class, "title")]/a/span/text()'). extract()).strip() item['shop'] = ''.join( product.xpath( '//div[@class="shop"]/a/span/text()').extract()).strip() item['location'] = product.xpath( '//div[@class="location"]/text()').extract_first().strip() print('item: ', item) yield item
def parse(self, response): # 获取本页面下所有商品信息 products = response.css('ul.gl-warp li') for product in products: item = ProductItem() item['name'] = product.css( 'div.p-name a em::text').extract_first().strip() item['price'] = product.css( 'div.p-price strong.J_price i::text').extract_first() item['shop'] = product.css( 'div.p-shop span a::attr(title)').extract_first() item['commit'] = product.css( 'strong a.comment::text').extract_first() #item['image'] = 'https:' + str(product.css('div.p-img a img::attr(src)').extract_first().strip()) #item['image'] = ''.join(product.xpath('.//div[@class="p-img"]/a/img/@src').extract()).strip() if product.xpath('.//div[@class="p-img"]/a/img/@src' ).extract_first() != None: item['image'] = product.xpath( './/div[@class="p-img"]/a/img/@src').extract_first() else: item['image'] = product.xpath( './/div[@class="p-img"]/a/img/@data-lazy-img' ).extract_first() yield item
def parse(self, response): products = response.xpath( "//div[@id='mainsrp-itemlist']//div[@class='items'][1]//div[contains(@class, 'item')]" ) for product in products: item = ProductItem() item["price"] = ''.join( product.xpath(".//div[contains(@class, 'price')]//text()"). extract()).strip() item["title"] = ''.join( product.xpath(".//div[contains(@class, 'title')]//text()"). extract()).strip() item["shop"] = ''.join( product.xpath(".//div[contains(@class, 'shop')]//text()"). extract()).strip() item["image"] = ''.join( product.xpath( ".//div[contains(@class, 'pic')]//img[contains(@class, 'img')]/@data-src" ).extract()).strip() item["deal"] = ''.join( product.xpath(".//div[contains(@class, 'deal-cnt')]//text()"). extract()).strip() item["location"] = ''.join( product.xpath(".//div[contains(@class, 'location')]//text()"). extract()).strip() yield item
def parse(self, response): products=response.xpath('//div[@id="mainsrp-itemlist"]//div[@class="items"][1]//div[contains(@class,"item")]') for product in products: item=ProductItem() item['title']=''.join(product.xpath('.//div[contains(@class,"title")]//text()').extract()).strip() item['shop']=''.join(product.xpath('.//div[contains(@class,"shop")]//text()').extract()).strip() item['location']=product.xpath('.//div[contains(@class,"location")]//text()').extract_first() item['price']=product.xpath('.//div[contains(@class,"price")]//strong/text()').extract_first() item['deal']=product.xpath('.//div[contains(@class,"deal-cnt")]//text()').extract_first() item['href']='https:'+product.xpath('.//div[contains(@class,"title")]/a/@href').extract_first() yield item
def parse(self, response): products = response.xpath( # 用xpath进行解析,调用response变量的xpath方法即可 '//div[@id="mainsrp-itemlist"]//div[@class="items"][1]//div[contains(@class, "item")]') for product in products: # 对结果进行遍历,依次选取每个商品 item = ProductItem() # 传递选取所有商品对应的XPath,可匹配所有商品, item['price'] = ''.join(product.xpath('.//div[contains(@class, "price")]//text()').extract()).strip() # 价格 item['item'] = ''.join(product.xpath('.//div[contains(@class, "item")]//text()').extract()).strip() # 名称 item['shop'] = ''.join(product.xpath('.//div[contains(@class, "shop")]//text()').extract()).strip() item['image'] = ''.join(product.xpath('.//div[contains(@class, "pic")]//text()').extract()).strip() # 图片 item['deal'] = product.xpath('.//div[contains(@class, "deal-cnt")]//text()').extract_first() item['location'] = product.xpath('.//div[contains(@class, "location")]//text()').extract_first() yield item # 构造并返回一个ProductItem对象
def parse(self, response): products = response.xpath('//*[@id="J_goodsList"]/ul/li') for product in products: item = ProductItem() item['price'] = ''.join( product.xpath( './/div/div[3]/strong/i/text()').extract()).strip() item['title'] = ''.join( product.xpath('.//div/div[4]/a/em//text()').extract()).strip() item['shop'] = ''.join( product.xpath( './/div/div[7]/span/a//text()').extract()).strip() item['image'] = ''.join( product.xpath('.//div/div[1]/a/img/@src').extract()).strip() item['deal'] = product.xpath( './/div/div[5]/strong/a//text()').extract_first() yield item
def parse(self, response): products = response.xpath('//*[@id="component_59"]/li') for product in products: item = ProductItem() item['image'] = response.urljoin( product.xpath( './/a[@class="pic"]/img/@src').extract_first().strip()) item['price'] = product.xpath( './/p[@class="price"]/span/text()').extract_first().strip()[1:] item['title'] = ''.join( product.xpath( './/p[@class="name"]/a/text()').extract()).strip() item['deal'] = product.xpath( './/p[@class="star"]/a/text()').extract_first().strip()[:-3] item['shop'] = product.xpath( './/p[@class="link"]/a/text()').extract_first() item['location'] = 'no localtion' yield item
def parse(self, response): print('##############3.###################') # print(response.text) products = response.xpath( '//div[@id="mainsrp-itemlist"]//div[@class="items"][1]//div[contains(@class,"item")]' ) for product in products: item = ProductItem() item['price'] = ''.join( product.xpath('.//div[contains(@class,"price")]//text()'). extract()).strip() item['title'] = ''.join( product.xpath('.//div[contains(@class,"title")]//text()'). extract()).strip() item['shop'] = ''.join( product.xpath('.//div[contains(@class,"shop")]//text()'). extract()).strip() item['image'] = ''.join( product.xpath( './/div[@class="pic"]/a/img/@data-src').extract()).strip() print(item) yield item
def parse(self, response): # print(type(response.body)) res = str(response.body, encoding='utf-8') result = re.findall('"itemlist":(.*?);\n\s+g_srp_loadCss', res, re.S)[0] data = re.findall('"auctions":\[(.*?)\],"recommendAuctions"', result, re.S)[0] titles = re.findall('"pid":"","title":"(.*?)","raw_title"', data, re.S) pic_urls = re.findall('"pic_url":"(.*?)","detail_url"', data, re.S) prices = re.findall('"view_price":"(.*?)","view_fee"', data, re.S) locations = re.findall('"item_loc":"(.*?)","', data, re.S) shops = re.findall('"nick":"(.*?)","', data, re.S) for title, pic_url, price, location, shop in zip( titles, pic_urls, prices, locations, shops): item = ProductItem() item['image'] = 'https:' + pic_url.strip() item['price'] = price.strip() item['title'] = re.sub('<span.*>|</span>', '', title).strip() item['shop'] = shop.strip() item['location'] = location.strip() yield item