def parse_item(self, response): print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>') l = ItemLoader(item=PlainItem(), response=response) l.add_value('url', response.url) try: l.add_xpath('name', '/html/body/div[5]/div[2]/div[1]/h1/strong/text()') except: l.add_value('name', '') try: l.add_xpath('area', '/html/body/div[3]/div/div[2]/a[4]/text()') except: l.add_value('area', '') try: l.add_xpath('price', '/html/body/div[5]/div[2]/div[2]/span[1]/text()') except: l.add_value('price', '') try: details = response.xpath( '//div[@class="firstright"]/div[@class="Rinfolist"]/ul/li') for i in range(len(details)): l.add_value('item{}'.format(i), details[i].xpath('string(.)').extract_first()) except: for i in range(9): l.add_value('item{}'.format(i), '') yield l.load_item()
def parse_item(self, response): print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>') l = ItemLoader(item=PlainItem(), response=response) l.add_value('url', response.url) try: l.add_xpath('name', '/html/body/div[4]/div/div[1]/h1/text()') except: l.add_value('name', '') try: l.add_xpath('address', '/html/body/div[4]/div/div[1]/div/text()') except: l.add_value('address', '') try: l.add_xpath('build_year', '/html/body/div[6]/div[2]/div[2]/div[1]/span[2]/text()') except: l.add_value('build_year', '') try: l.add_xpath('buildings', '/html/body/div[6]/div[2]/div[2]/div[6]/span[2]/text()') except: l.add_value('buildings', '') try: l.add_xpath('familys', '/html/body/div[6]/div[2]/div[2]/div[7]/span[2]/text()') except: l.add_value('familys', '') try: l.add_xpath('area', '/html/body/div[5]/div[1]/a[3]/text()') except: l.add_value('area', '') try: l.add_value('subway', response.meta['subway']) except: l.add_value('subway', '') try: l.add_xpath('price', '/html/body/div[6]/div[2]/div[1]/div/span[1]/text()') except: l.add_value('price', '') try: l.add_xpath('estate', '/html/body/div[6]/div[2]/div[2]/div[4]/span[2]/text()') except: l.add_value('estate', '') # try: # details = response.xpath('//div[@class="p-parameter"]/ul[2]/*/text()').extract() # for i in range(len(details)): # l.add_value('item{}'.format(i), details[i]) # except: # for i in range(9): # l.add_value('item{}'.format(i), '') yield l.load_item()
def parse(self, response): obj_json = json.loads(response.text) data_list = obj_json["results"] for num, message in enumerate(data_list): item = PlainItem() item["search_name"] = response.meta['name'] item['num'] = num + 1 item['name'] = message['name'] item['location_lat'] = message['location']['lat'] item['location_lng'] = message['location']['lng'] item['address'] = message['address'] try: item['province'] = message['province'] except: item['province'] = "" try: item['city'] = message['city'] except: item['city'] = "" try: item['area'] = message['area'] except: item['area'] = "" try: item['street_id'] = message['street_id'] except: item['street_id'] = "" try: item['telephone'] = message['telephone'] except: item['telephone'] = "" try: item['detail'] = message['detail'] except: item['detail'] = "" try: item['uid'] = message['uid'] except: item['uid'] = "" yield item
def parse(self, response): obj_json = json.loads(response.text) index = response.meta['index'] page_num = response.meta['page_num'] if 'page_num=0' in response.url: number = obj_json["total"] if number % 20 == 0: pages = number / 20 else: pages = number // 20 + 1 if pages > 1: for n in range(1, pages): url = response.url.replace('page_num=0', 'page_num={}'.format(n)) yield scrapy.Request(url=url, callback=self.parse, meta={ 'index': index, 'page_num': n }) data_list = obj_json["results"] if len(data_list) > 0: total = obj_json["total"] for num, message in enumerate(data_list): item = PlainItem() item['url'] = response.url item["area_index"] = index + 1 item["total"] = total item['num'] = page_num * 20 + num + 1 item['name'] = message['name'] item['location_lat'] = message['location']['lat'] item['location_lng'] = message['location']['lng'] item['address'] = message['address'] try: item['province'] = message['province'] except: item['province'] = "" try: item['city'] = message['city'] except: item['city'] = "" try: item['area'] = message['area'] except: item['area'] = "" try: item['street_id'] = message['street_id'] except: item['street_id'] = "" try: item['telephone'] = message['telephone'] except: item['telephone'] = "" try: item['detail'] = message['detail'] except: item['detail'] = "" try: item['uid'] = message['uid'] except: item['uid'] = "" yield item