def parse_day(self, response):
        node_list = response.xpath("//div[@class='row']//tbody/tr")
        city_name = response.xpath("//h2[@id='title']/text()").extract_first()

        if not len(node_list):
            return

        node_list.pop(0)

        for node in node_list:
            item = AqiItem()
            #item["city"] = response.meta["city"]
            item["city"] = city_name
            item["date"] = node.xpath("./td[1]//text()").extract_first()
            item["aqi"] = node.xpath("./td[2]//text()").extract_first()
            item["level"] = node.xpath("./td[3]//text()").extract_first()
            item["pm2_5"] = node.xpath("./td[4]//text()").extract_first()
            item["pm10"] = node.xpath("./td[5]//text()").extract_first()
            item["so2"] = node.xpath("./td[6]//text()").extract_first()
            item["co"] = node.xpath("./td[7]//text()").extract_first()
            item["no2"] = node.xpath("./td[8]//text()").extract_first()
            item["o3"] = node.xpath("./td[9]//text()").extract_first()

            yield item
Beispiel #2
0
    def parse_day(self, response):

        item = AqiItem()

        # 解析 标题 在提取城市名字
        title = response.xpath('//*[@id="title"]/text()').extract_first()
        item['city_name'] = title[8:-11]

        # 1. 取出所有 tr_list
        tr_list = response.xpath('//tr')

        # 2.删除表头
        tr_list.pop(0)

        for tr in tr_list:
            # 日期
            item['date'] = tr.xpath('./td[1]/text()').extract_first()
            # AQI
            item['aqi'] = tr.xpath('./td[2]/text()').extract_first()
            # 质量等级
            item['level'] = tr.xpath('./td[3]//text()').extract_first()
            # PM2.5
            item['pm2_5'] = tr.xpath('./td[4]/text()').extract_first()
            # PM10
            item['pm10'] = tr.xpath('./td[5]/text()').extract_first()
            # 二氧化硫
            item['so_2'] = tr.xpath('./td[6]/text()').extract_first()
            # 一氧化碳
            item['co'] = tr.xpath('./td[7]/text()').extract_first()
            # 二氧化氮
            item['no_2'] = tr.xpath('./td[8]/text()').extract_first()
            # 臭氧
            item['o3'] = tr.xpath('./td[9]/text()').extract_first()

            # 将数据 -->engine-->pipeline
            yield item