Example #1
0
    def parse_item(self, response):
        print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
        l = ItemLoader(item=PlainItem(), response=response)
        l.add_value('url', response.url)
        try:
            l.add_xpath('name',
                        '/html/body/div[5]/div[2]/div[1]/h1/strong/text()')
        except:
            l.add_value('name', '')

        try:
            l.add_xpath('area', '/html/body/div[3]/div/div[2]/a[4]/text()')
        except:
            l.add_value('area', '')

        try:
            l.add_xpath('price',
                        '/html/body/div[5]/div[2]/div[2]/span[1]/text()')
        except:
            l.add_value('price', '')

        try:
            details = response.xpath(
                '//div[@class="firstright"]/div[@class="Rinfolist"]/ul/li')
            for i in range(len(details)):
                l.add_value('item{}'.format(i),
                            details[i].xpath('string(.)').extract_first())
        except:
            for i in range(9):
                l.add_value('item{}'.format(i), '')
        yield l.load_item()
Example #2
0
    def parse_item(self, response):
        print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
        l = ItemLoader(item=PlainItem(), response=response)
        l.add_value('url', response.url)
        try:
            l.add_xpath('name', '/html/body/div[4]/div/div[1]/h1/text()')
        except:
            l.add_value('name', '')

        try:
            l.add_xpath('address', '/html/body/div[4]/div/div[1]/div/text()')
        except:
            l.add_value('address', '')

        try:
            l.add_xpath('build_year', '/html/body/div[6]/div[2]/div[2]/div[1]/span[2]/text()')
        except:
            l.add_value('build_year', '')

        try:
            l.add_xpath('buildings', '/html/body/div[6]/div[2]/div[2]/div[6]/span[2]/text()')
        except:
            l.add_value('buildings', '')

        try:
            l.add_xpath('familys', '/html/body/div[6]/div[2]/div[2]/div[7]/span[2]/text()')
        except:
            l.add_value('familys', '')

        try:
            l.add_xpath('area', '/html/body/div[5]/div[1]/a[3]/text()')
        except:
            l.add_value('area', '')
            
        try:
            l.add_value('subway', response.meta['subway'])
        except:
            l.add_value('subway', '')

        try:
            l.add_xpath('price', '/html/body/div[6]/div[2]/div[1]/div/span[1]/text()')
        except:
            l.add_value('price', '')

        try:
            l.add_xpath('estate', '/html/body/div[6]/div[2]/div[2]/div[4]/span[2]/text()')
        except:
            l.add_value('estate', '')

        # try:
        #     details = response.xpath('//div[@class="p-parameter"]/ul[2]/*/text()').extract()
        #     for i in range(len(details)):
        #         l.add_value('item{}'.format(i), details[i])
        # except:
        #     for i in range(9):
        #         l.add_value('item{}'.format(i), '')
        yield l.load_item()
Example #3
0
 def parse(self, response):
     obj_json = json.loads(response.text)
     data_list = obj_json["results"]
     for num, message in enumerate(data_list):
         item = PlainItem()
         item["search_name"] = response.meta['name']
         item['num'] = num + 1
         item['name'] = message['name']
         item['location_lat'] = message['location']['lat']
         item['location_lng'] = message['location']['lng']
         item['address'] = message['address']
         try:
             item['province'] = message['province']
         except:
             item['province'] = ""
         try:
             item['city'] = message['city']
         except:
             item['city'] = ""
         try:
             item['area'] = message['area']
         except:
             item['area'] = ""
         try:
             item['street_id'] = message['street_id']
         except:
             item['street_id'] = ""
         try:
             item['telephone'] = message['telephone']
         except:
             item['telephone'] = ""
         try:
             item['detail'] = message['detail']
         except:
             item['detail'] = ""
         try:
             item['uid'] = message['uid']
         except:
             item['uid'] = ""
         yield item
Example #4
0
 def parse(self, response):
     obj_json = json.loads(response.text)
     index = response.meta['index']
     page_num = response.meta['page_num']
     if 'page_num=0' in response.url:
         number = obj_json["total"]
         if number % 20 == 0:
             pages = number / 20
         else:
             pages = number // 20 + 1
         if pages > 1:
             for n in range(1, pages):
                 url = response.url.replace('page_num=0',
                                            'page_num={}'.format(n))
                 yield scrapy.Request(url=url,
                                      callback=self.parse,
                                      meta={
                                          'index': index,
                                          'page_num': n
                                      })
     data_list = obj_json["results"]
     if len(data_list) > 0:
         total = obj_json["total"]
         for num, message in enumerate(data_list):
             item = PlainItem()
             item['url'] = response.url
             item["area_index"] = index + 1
             item["total"] = total
             item['num'] = page_num * 20 + num + 1
             item['name'] = message['name']
             item['location_lat'] = message['location']['lat']
             item['location_lng'] = message['location']['lng']
             item['address'] = message['address']
             try:
                 item['province'] = message['province']
             except:
                 item['province'] = ""
             try:
                 item['city'] = message['city']
             except:
                 item['city'] = ""
             try:
                 item['area'] = message['area']
             except:
                 item['area'] = ""
             try:
                 item['street_id'] = message['street_id']
             except:
                 item['street_id'] = ""
             try:
                 item['telephone'] = message['telephone']
             except:
                 item['telephone'] = ""
             try:
                 item['detail'] = message['detail']
             except:
                 item['detail'] = ""
             try:
                 item['uid'] = message['uid']
             except:
                 item['uid'] = ""
             yield item