def parse_day(self, response): node_list = response.xpath("//div[@class='row']//tbody/tr") city_name = response.xpath("//h2[@id='title']/text()").extract_first() if not len(node_list): return node_list.pop(0) for node in node_list: item = AqiItem() #item["city"] = response.meta["city"] item["city"] = city_name item["date"] = node.xpath("./td[1]//text()").extract_first() item["aqi"] = node.xpath("./td[2]//text()").extract_first() item["level"] = node.xpath("./td[3]//text()").extract_first() item["pm2_5"] = node.xpath("./td[4]//text()").extract_first() item["pm10"] = node.xpath("./td[5]//text()").extract_first() item["so2"] = node.xpath("./td[6]//text()").extract_first() item["co"] = node.xpath("./td[7]//text()").extract_first() item["no2"] = node.xpath("./td[8]//text()").extract_first() item["o3"] = node.xpath("./td[9]//text()").extract_first() yield item
def parse_day(self, response): item = AqiItem() # 解析 标题 在提取城市名字 title = response.xpath('//*[@id="title"]/text()').extract_first() item['city_name'] = title[8:-11] # 1. 取出所有 tr_list tr_list = response.xpath('//tr') # 2.删除表头 tr_list.pop(0) for tr in tr_list: # 日期 item['date'] = tr.xpath('./td[1]/text()').extract_first() # AQI item['aqi'] = tr.xpath('./td[2]/text()').extract_first() # 质量等级 item['level'] = tr.xpath('./td[3]//text()').extract_first() # PM2.5 item['pm2_5'] = tr.xpath('./td[4]/text()').extract_first() # PM10 item['pm10'] = tr.xpath('./td[5]/text()').extract_first() # 二氧化硫 item['so_2'] = tr.xpath('./td[6]/text()').extract_first() # 一氧化碳 item['co'] = tr.xpath('./td[7]/text()').extract_first() # 二氧化氮 item['no_2'] = tr.xpath('./td[8]/text()').extract_first() # 臭氧 item['o3'] = tr.xpath('./td[9]/text()').extract_first() # 将数据 -->engine-->pipeline yield item