コード例 #1
0
ファイル: chehang168.py プロジェクト: Flat-Chen/ChenProject
    def parse_list(self, response):
        # print(response.text)
        # next = response.xpath("//a[contains(text(), '下一页')]")
        # if next:
        #     yield scrapy.Request(url=response.urljoin(next.xpath("@href").extract_first()), meta=response.meta, callback=self.parse_list)

        cars = response.xpath("//*[@class='ch_carlistv3']/li")
        if cars:
            # with open("D:\chehang168_family_log", "a") as f:
            # with open("/root/familyname_log_full.txt", "a") as f:
            with open("/Users/cagey/PycharmProjects/zt_scrapy/projects/koubei_project/koubei/familyname_log_full.txt", "a") as f:
                f.write(response.meta["familyname"] + "\n")
            f.close()
        for car in cars:
            item = Chehang168Item()
            item['url'] = response.url
            item['grabtime'] = time.strftime('%Y-%m-%d %X', time.localtime())
            item['brandname'] = response.meta["brandname"]
            item['brandcode'] = response.meta["brandcode"]
            item['familyname'] = response.meta["familyname"]
            item['familycode'] = response.meta["familycode"]
            item['title'] = car.xpath("div/h3/a/text()").extract_first()
            item['guideprice'] = car.xpath("div/h3/b/text()").extract_first()
            item['price'] = car.xpath("div/span/b/text()").extract_first().replace("万", "")
            item['store'] = car.xpath("p[@class='c3']/a/text()").extract_first()

            item['desc1'] = car.xpath("p[@class='c1']/text()[1]").extract_first()
            item['desc2'] = car.xpath("p[@class='c2']/text()").extract_first()
            item['time'] = car.xpath("p[@class='c3']/cite[1]/text()").extract_first()
            item['desc3_2'] = car.xpath("p[@class='c3']/cite[2]/text()").extract_first()
            item['desc3_3'] = car.xpath("p[@class='c3']/cite[3]/text()").extract_first()
            item['status'] = item["title"] + "-" + item["desc1"] + "-" + item["store"]

            print(item)
コード例 #2
0
 def parse(self, response):
     item = Chehang168Item()
     li_list = response.xpath("//*/div/ul[@class='cyxx_wrap_ull pt_1']/li")
     for li in li_list:
         a_list = li.xpath("./a")
         for a in a_list:
             item["brandcode"] = a.xpath("./@href").get()
             item["brandname"] = a.xpath("./text()").get()
             list_url = "http://www.chehang168.com" + item["brandcode"]
             yield scrapy.Request(
                 url=list_url,
                 callback=self.detail_url,
                 meta={"item": deepcopy(item)},
                 cookies=self.cookies,
                 headers=self.headers,
                 dont_filter=True
             )