def parse_sale(self, response): item = MyspiderItem() item._values = response.meta tr_list = response.xpath("/html/body/div[5]/div[3]/div[2]/div/div[2]/div[1]/table/tr") item["S_S_date"] = [] item["S_S_M_sale"] = [] item["S_S_M_rank"] = [] item["S_S_rank"] = [] for tr in tr_list[1: ]: item["S_S_date"].append(tr.xpath("./td/text()").extract()[0]) # 时间数组 item["S_S_M_sale"].append(tr.xpath("./td/text()").extract()[1]) # 相应时间月销量数组 item["S_S_M_rank"].append(tr.xpath("./td/a/text()").extract()[0]) # 相应时间月销量排名数组 item["S_S_rank"].append(tr.xpath("./td/text()").extract()[2]) # 相应时间占厂商份额数组 yield item
def parse_sale(self, response): item = MyspiderItem() item._values = response.meta tr_list = response.xpath( "/html/body/div[5]/div[3]/div[2]/div/div[2]/div[1]/table/tr") item["B_S_date"] = [] item["B_S_sale"] = [] item["B_S_share"] = [] item["B_S_detail"] = [] for tr in tr_list[1:]: item["B_S_date"].append( tr.xpath("./td/text()").extract()[0]) # 时间数组 item["B_S_sale"].append( tr.xpath("./td/text()").extract()[1]) # 相应时间销量数组 item["B_S_share"].append( tr.xpath("./td/text()").extract()[2]) # 相应时间市场份额数组 item["B_S_detail"].append( 'https://xl.16888.com' + tr.xpath("./td/a/@href").extract()[0]) # 相应时间市场份额链接数组 yield item