Beispiel #1
0
    def parse_sale(self, response):
        item = MyspiderItem()
        item._values = response.meta

        tr_list = response.xpath("/html/body/div[5]/div[3]/div[2]/div/div[2]/div[1]/table/tr")
        item["S_S_date"] = []
        item["S_S_M_sale"] = []
        item["S_S_M_rank"] = []
        item["S_S_rank"] = []
        for tr in tr_list[1: ]:

            item["S_S_date"].append(tr.xpath("./td/text()").extract()[0]) # 时间数组
            item["S_S_M_sale"].append(tr.xpath("./td/text()").extract()[1]) # 相应时间月销量数组
            item["S_S_M_rank"].append(tr.xpath("./td/a/text()").extract()[0]) # 相应时间月销量排名数组
            item["S_S_rank"].append(tr.xpath("./td/text()").extract()[2]) # 相应时间占厂商份额数组
        
        yield item
Beispiel #2
0
    def parse_sale(self, response):
        item = MyspiderItem()
        item._values = response.meta

        tr_list = response.xpath(
            "/html/body/div[5]/div[3]/div[2]/div/div[2]/div[1]/table/tr")
        item["B_S_date"] = []
        item["B_S_sale"] = []
        item["B_S_share"] = []
        item["B_S_detail"] = []
        for tr in tr_list[1:]:

            item["B_S_date"].append(
                tr.xpath("./td/text()").extract()[0])  # 时间数组
            item["B_S_sale"].append(
                tr.xpath("./td/text()").extract()[1])  # 相应时间销量数组
            item["B_S_share"].append(
                tr.xpath("./td/text()").extract()[2])  # 相应时间市场份额数组
            item["B_S_detail"].append(
                'https://xl.16888.com' +
                tr.xpath("./td/a/@href").extract()[0])  # 相应时间市场份额链接数组

        yield item