def parse(self, response): uls = response.xpath("//*[@id='listContent']/ul") for ul in uls: item = CheguansuoItem() item["name"] = ul.xpath("li[1]/a/text()").extract_first() item["addr"] = ul.xpath("li[2]/a/text()").extract_first() item["tel"] = ul.xpath("li[3]/text()").extract_first() item['grabtime'] = time.strftime('%Y-%m-%d %X', time.localtime()) item['url'] = response.url item['status'] = response.url + "-" + str(uls.index(ul)) yield item
def parse(self, response): obj = json.loads(response.body) for loc in obj["results"]: item = CheguansuoItem() item["name"] = loc["name"] item["addr"] = loc["address"] item['grabtime'] = time.strftime('%Y-%m-%d %X', time.localtime()) item['url'] = response.url item['status'] = response.url + "-" + str(obj["results"].index(loc)) item['lat'] = loc["location"]["lat"] item['lng'] = loc["location"]["lng"] item['city_name'] = response.meta["city_name"] yield item
def parse_detail(self, response): obj = json.loads(response.body) if len(obj["content"]) > 10: locs = obj["content"][:-1] else: locs = obj["content"] for loc in locs: item = CheguansuoItem() item['grabtime'] = time.strftime('%Y-%m-%d %X', time.localtime()) item['url'] = response.url item['status'] = response.url + "-" + str( obj["content"].index(loc)) item['name'] = loc["name"] item['addr'] = loc["addr"] item['x'] = loc["x"] item['y'] = loc["y"] item['city_name'] = city_name yield item