def parseLocations(self, response):
     referer = response.request.headers.get('Referer', None).decode("utf-8")
     BuildType = self._getBuildType(response.url)
     hxs = HtmlXPathSelector(response)
     if BuildType == 'Display Locations':
         for data in hxs.xpath('//div[@class="view-content"]/div'):
             l = RealtyLoader(RealtyspidersItem(), hxs)
             l.add_value('url', response.url)
             l.add_value('BuildType', BuildType)
             l.add_value('BuilderLogo', self.logo)
             HomeDesignMainImage = data.xpath(
                 './/div[@class="views-field views-field-field-hero"]/div/img/@src'
             ).extract_first()
             DisplayLocation = data.xpath(
                 './/div[@class="views-field views-field-field-location-description"]/div/text()'
             ).extract_first()
             BuilderEmailAddress = data.xpath(
                 './/div[@class="views-label views-label-field-contact-email"]/div/a/text()'
             ).extract_first()
             OtherInclusions = data.xpath(
                 './/div[@class="views-field views-field-field-contact-mobile"]/div/text()'
             ).extract_first()
             OtherInclusions1 = data.xpath(
                 './/div[@class="views-field views-field-field-opening-hours"]/div/text()'
             ).extract_first()
             OtherInclusions2 = data.xpath(
                 './/div[@class="views-field views-field-field-location-map"]/div/a/@href'
             ).extract_first()
             BuilderName = data.xpath(
                 './/div[@class="views-field views-field-title"]/span/text()'
             ).extract_first()
             l.add_value('HomeDesignMainImage', HomeDesignMainImage)
             l.add_value('DisplayLocation', DisplayLocation)
             l.add_value('BuilderEmailAddress', BuilderEmailAddress)
             l.add_value('OtherInclusions', OtherInclusions)
             l.add_value('OtherInclusions1', OtherInclusions1)
             l.add_value('OtherInclusions2', OtherInclusions2)
             l.add_value('BuilderName', BuilderName)
             yield l.load_item()
     else:
         for data in hxs.xpath(
                 '//div[@class="panel panel-home node node-dhfs node-promoted"]'
         ):
             l = RealtyLoader(RealtyspidersItem(), hxs)
             l.add_value('url', response.url)
             l.add_value('BuildType', BuildType)
             l.add_value('BuilderLogo', self.logo)
             HomeDesignMainImage = data.xpath(
                 './/div[@class="panel-image"]/img/@src').extract_first()
             BasePrice = data.xpath(
                 './/div[@class="panel-footer"]/ul/li/text()'
             ).extract_first()
             BuilderName = data.xpath(
                 './/div[@class="panel-footer"]/text()').extract_first()
             l.add_value('HomeDesignMainImage', HomeDesignMainImage)
             l.add_value('BasePrice', BasePrice)
             l.add_value('BuilderName', BuilderName)
             yield l.load_item()
Exemple #2
0
    def parseItem(self, response):
        referer = response.request.headers.get('Referer', None).decode("utf-8")
        # with open('testURL', 'a') as file:
        #     file.write(response.url + '   ' + referer + '\n')
        hxs = HtmlXPathSelector(response)
        BuildType = self._getBuildType(referer)
        imgXpath = '''//ul[@class="slides"]/li[{}]/img/@src'''
        # descriptionXPath = '//div[@id="listing_options"]/ul/li/text()'
        # areaXpath = '//div[@class="table-light"]/table/tbody/tr/td[text()="{}"]/following-sibling::td[1]/text()'
        # roomsXpath = '''//h1[text()="Room dimensions"]/following-sibling::
        #                         dl/dt[text()="{}"]/following-sibling::dd[1]/text()'''
        # data = hxs.xpath(roomsXpath).extract()
        # with open('testURL','a') as file:
        #     for i in data:
        #         file.write(i+'\n')
        l = RealtyLoader(RealtyspidersItem(), hxs)
        l.add_value('url', response.url)
        l.add_value('BuildType', BuildType)
        l.add_value('BuilderLogo', self.logo)
        if BuildType == 'Display Homes':
            l.add_value('Lot_BlockAddress', response.meta['address'])
        else:
            if response.meta['storey']:
                l.add_value('Storey', '1')
            else:
                l.add_value('Storey', '2')

        l.add_xpath('DesignName',
                    '//div[@class="banner-content stick-bar"]//h2/text()')

        l.add_xpath('Bedrooms',
                    '//span[@class="ico-beds"]/ancestor::li/text()')
        l.add_xpath('Bathrooms',
                    '//span[@class="ico-baths"]/ancestor::li/text()')
        l.add_xpath('Garage',
                    '//span[@class="ico-garage"]/ancestor::li/text()')
        #
        l.add_xpath('HouseWidth',
                    '//th[text()="House Width"]/following-sibling::td/text()')
        l.add_xpath(
            'HouseLength',
            '//th[text()="House Length"]/following-sibling::td/text()')
        l.add_xpath('GarageDimension',
                    '//th[text()="Garage"]/following-sibling::td/text()')
        l.add_xpath('AlfrescoDimension',
                    '//th[text()="Alfresco"]/following-sibling::td/text()')
        l.add_xpath('Alfresco_Yes_No',
                    '//th[text()="Alfresco"]/following-sibling::td/text()')
        if BuildType == 'Portfolio':
            l.add_xpath('Squares',
                        'string(//div[@class="col-3 floor-plan-legend"]/p)',
                        **{'re': '(?<=AREA|Area).*'})
        else:
            l.add_xpath(
                'Squares',
                '//th[text()="Total Area"]/following-sibling::td/text()')
        # l.add_xpath('MasterBedroomDimension', [roomsXpath.format('Master Bed'), roomsXpath.format('Bedroom 1')])
        # l.add_xpath('Bedroom2Dimension', [roomsXpath.format('Bed 2'), roomsXpath.format('Bedroom 2')])
        # l.add_xpath('Bedroom3Dimension', [roomsXpath.format('Bed 3'), roomsXpath.format('Bedroom 3')])
        # l.add_xpath('Bedroom4Dimension', [roomsXpath.format('Bed 4'), roomsXpath.format('Bedroom 4')])
        # l.add_xpath('Study_Yes_No', [roomsXpath.format('Study'), roomsXpath.format('Study')])
        # l.add_xpath('StudyDimension', [roomsXpath.format('Study'), roomsXpath.format('Study (ground floor)'),
        #                                roomsXpath.format('Study (first floor)'),
        #                                roomsXpath.format('Study (First floor)')])
        # l.add_xpath('FamilyDimension', [roomsXpath.format('Family')])
        # l.add_xpath('Meals_DiningDimension',
        #             [roomsXpath.format('Family / Meals'), roomsXpath.format('Meals/Family'),
        #              roomsXpath.format('Living / Meals'), roomsXpath.format('Meals')])
        # l.add_xpath('TheatreDimension', [roomsXpath.format('Theatre')])
        #
        l.add_xpath('BrochureImage_pdf',
                    '//a[text()="Download the Floor Plan"]/@href',
                    **{'myRefer': self.start_urls[0][0:-1]})
        l.add_xpath('InclusionsImage_pdf',
                    '//a[text()="Download the Inclusions Brochure "]/@href',
                    **{'myRefer': self.start_urls[0][0:-1]})
        l.add_xpath('BasePrice',
                    '''//a[text()="Download the Price List"]/@href''',
                    **{'myRefer': self.start_urls[0][0:-1]})
        l.add_xpath('FloorPlanImage1',
                    '//div[@class="col-wrap floor-plan-box"]//img/@src',
                    **{'myRefer': self.start_urls[0]})
        l.add_xpath('HomeDesignMainImage',
                    '//section[@id="overview-anhor"]//img/@src',
                    **{'myRefer': self.start_urls[0]})
        l.add_xpath('Image1', imgXpath.format('1'),
                    **{'myRefer': self.start_urls[0][0:-1]})
        l.add_xpath('Image2', imgXpath.format('2'),
                    **{'myRefer': self.start_urls[0][0:-1]})
        l.add_xpath('Image3', imgXpath.format('3'),
                    **{'myRefer': self.start_urls[0][0:-1]})
        l.add_xpath('Image4', imgXpath.format('4'),
                    **{'myRefer': self.start_urls[0][0:-1]})
        l.add_xpath('Image5', imgXpath.format('5'),
                    **{'myRefer': self.start_urls[0][0:-1]})
        l.add_xpath('Image6', imgXpath.format('6'),
                    **{'myRefer': self.start_urls[0][0:-1]})
        l.add_xpath('Image7', imgXpath.format('7'),
                    **{'myRefer': self.start_urls[0][0:-1]})
        l.add_xpath('Image8', imgXpath.format('8'),
                    **{'myRefer': self.start_urls[0][0:-1]})
        l.add_xpath('Image9', imgXpath.format('9'),
                    **{'myRefer': self.start_urls[0][0:-1]})
        l.add_xpath('Image10', imgXpath.format('10'),
                    **{'myRefer': self.start_urls[0][0:-1]})
        l.add_xpath('Image11', imgXpath.format('11'),
                    **{'myRefer': self.start_urls[0][0:-1]})
        l.add_xpath('Image12', imgXpath.format('12'),
                    **{'myRefer': self.start_urls[0][0:-1]})
        l.add_xpath('Image13', imgXpath.format('13'),
                    **{'myRefer': self.start_urls[0][0:-1]})
        l.add_xpath('Image14', imgXpath.format('14'),
                    **{'myRefer': self.start_urls[0][0:-1]})
        l.add_xpath('Image15', imgXpath.format('15'),
                    **{'myRefer': self.start_urls[0][0:-1]})

        return l.load_item()
    def parseItem(self, response):
        print('parseItem')
        referer = response.request.headers.get('Referer', None).decode("utf-8")
        BuildType = self._getBuildType(referer)
        if not BuildType:
            return None
        hxs = HtmlXPathSelector(response)
        # with open('testURL', 'a') as file:
        #     file.writelines('\n'.join(hxs.xpath('//ul[@class="measurements-list"]/li/span[1]/text()').extract()))
        inclusionsXpath = '''//div[@data-tab="inclusions"]/div/p/text()'''
        imgXpath = '//div[@data-tab="gallery/images"]/div/img[{}]/@src'
        descriptionXPath = '''//div[@class="tab-pane floorplans"][{}]/div[@class="clearfix"]/div
                /ul[@class="measurements-list"]/li/span[text()="{}"]/following-sibling::span/text()'''

        count = hxs.xpath(
            '//a[text()="Floorplans"]/following-sibling::ul/li/a/text()'
        ).extract()
        for i, design in enumerate(count):
            other = []
            for name in self.oth:
                size = hxs.xpath(descriptionXPath.format(
                    i + 1, name)).extract_first()
                if size:
                    other.append('{}:{}'.format(name, size))
            l = RealtyLoader(RealtyspidersItem(), hxs)
            l.add_value('url', response.url)
            l.add_value('BuildType', BuildType)
            # l.add_value('BuilderEmailAddress', '*****@*****.**')
            l.add_xpath('HomeDesignMainImage',
                        '''//div[@data-tab="overview"]/img/@src''')
            l.add_value('BuilderLogo', self.logo)

            l.add_value('DesignName', design)

            l.add_xpath('Region', descriptionXPath.format(i + 1, 'Region'))
            #
            l.add_xpath('Bedrooms',
                        '//span[@class="bedroom"]/ancestor::li/text()')
            l.add_xpath('Bathrooms',
                        '//span[@class="bathroom"]/ancestor::li/text()')
            l.add_xpath('Garage',
                        '//span[@class="garage"]/ancestor::li/text()')
            l.add_xpath('BrochureImage_pdf',
                        '//a[@class="gt-after download-price-list"]/@href')
            l.add_xpath('InclusionsImage_pdf',
                        '//div[@data-tab="inclusions"]/div/a/@href')
            l.add_xpath(
                'OtherInclusions1',
                '//div[@class="tab-pane floorplans"][{}]/div/div/a/@dref'.
                format(i + 1))
            l.add_value('BasePrice', response.meta['BasePrice'])
            l.add_xpath(
                'FloorPlanImage1',
                '//div[@class="tab-pane floorplans"][{}]/div/div/img/@src'.
                format(i + 1))
            l.add_xpath('Image1', imgXpath.format('1'))
            l.add_xpath('Image2', imgXpath.format('2'))
            l.add_xpath('Image3', imgXpath.format('3'))
            l.add_xpath('Image4', imgXpath.format('4'))
            l.add_xpath('Image5', imgXpath.format('5'))
            l.add_xpath('Image6', imgXpath.format('6'))
            l.add_xpath('Image7', imgXpath.format('7'))
            l.add_xpath('Image8', imgXpath.format('8'))
            l.add_xpath('Image9', imgXpath.format('9'))
            l.add_xpath('Image10', imgXpath.format('10'))
            l.add_xpath('Image11', imgXpath.format('11'))
            l.add_xpath('Image12', imgXpath.format('12'))
            l.add_xpath('Image13', imgXpath.format('13'))
            l.add_xpath('Image14', imgXpath.format('14'))
            l.add_xpath('Image15', imgXpath.format('15'))

            l.add_xpath('MasterBedroomDimension', [
                descriptionXPath.format(i + 1, 'Master Suite'),
                descriptionXPath.format(i + 1, 'Master Bedroom'),
                descriptionXPath.format(i + 1, 'Master Bed')
            ])
            l.add_xpath('Bedroom2Dimension',
                        descriptionXPath.format(i + 1, 'Bedroom 2'))
            l.add_xpath('Bedroom3Dimension',
                        descriptionXPath.format(i + 1, 'Bedroom 3'))
            l.add_xpath('Bedroom4Dimension',
                        descriptionXPath.format(i + 1, 'Bedroom 4'))
            l.add_xpath('StudyDimension', [
                descriptionXPath.format(i + 1, 'Study'),
                descriptionXPath.format(i + 1, 'Study/TV Area')
            ])
            l.add_xpath('Meals_DiningDimension', [
                descriptionXPath.format(i + 1, 'Dining/Living'),
                descriptionXPath.format(i + 1, 'Family/Meals')
            ])
            l.add_xpath('FamilyDimension',
                        descriptionXPath.format(i + 1, 'Family/Meals'))
            l.add_xpath('TheatreDimension', [
                descriptionXPath.format(i + 1, 'Study/TV Area'),
                descriptionXPath.format(i + 1, 'TV Area'),
                descriptionXPath.format(i + 1, 'Home Theatre')
            ])
            l.add_xpath('AlfrescoDimension',
                        descriptionXPath.format(i + 1, 'Alfresco'))
            # l.add_xpath('HouseWidth', descriptionXPath.format('Min block width'))
            l.add_xpath('GarageDimension', [
                descriptionXPath.format(i + 1, 'Garage'),
                descriptionXPath.format(i + 1, 'Double Garage')
            ])
            l.add_xpath('KitchenDimension', [
                descriptionXPath.format(i + 1, 'Kitchen/Meals'),
                descriptionXPath.format(i + 1, 'Kitchen')
            ])
            l.add_xpath('LoungeDimension',
                        descriptionXPath.format(i + 1, 'Lounge'))
            l.add_xpath('Squares',
                        descriptionXPath.format(i + 1, 'Total Size'))
            # l.add_xpath('LandSize', descriptionXPath.format('Land Size sqm'))
            l.add_xpath('LivingArea', descriptionXPath.format(i + 1, 'Living'))
            #
            # Block Yes No
            l.add_xpath('TheatreRoom_Yes_No', [
                descriptionXPath.format(i + 1, 'Study/TV Area'),
                descriptionXPath.format(i + 1, 'TV Area'),
                descriptionXPath.format(i + 1, 'Home Theatre')
            ])
            l.add_xpath('Alfresco_Yes_No', [
                descriptionXPath.format(i + 1, 'Alfresco'),
                descriptionXPath.format(i + 1, 'Second Alfresco')
            ])
            l.add_xpath('Study_Yes_No', [
                descriptionXPath.format(i + 1, 'Study'),
                descriptionXPath.format(i + 1, 'Study/TV Area')
            ])
            l.add_value('OtherInclusions', ', '.join(other))

            l.add_xpath('SturturalWarranty', inclusionsXpath,
                        **{'re': '.*guarantee.*|.*[Ww]arranty.*'})

            l.add_xpath('Windows', inclusionsXpath,
                        **{'re': '.*[Ww]indows?.*'})

            l.add_xpath(
                'KitchenBenchtop', inclusionsXpath, **{
                    're':
                    '.*[Kk]itchen.*[Bb]enchtop.*|.*[Bb]enchtop.*[Kk]itchen.*'
                })

            l.add_xpath(
                'SecuritySystem', inclusionsXpath, **{
                    're':
                    '.*[Ss]ecurity.*[sS]ystem.*}.*[sS]ystem.*[Ss]ecurity.*'
                })

            l.add_xpath(
                'EnergyRating', inclusionsXpath,
                **{'re': '.*[Ee]nergy.*[rR]ating.*|.*[rR]ating.*[Ee]nergy.*'})

            l.add_xpath(
                'KitchenAppliance', inclusionsXpath, **{
                    're':
                    '.*([Kk]itchen.*[Aa]ppliance).*|.*([Aa]ppliance.*[Kk]itchen).*'
                })

            l.add_xpath('ApplianceBrand', inclusionsXpath,
                        **{'re': '.*[\w\s]+[Ss]ecurity System.*'})

            l.add_xpath('Splashback', inclusionsXpath,
                        **{'re': '.*[Ss]plashback.*'})

            l.add_xpath(
                'FloorCovering', inclusionsXpath, **{
                    're':
                    '.*[Ff]loor.*[Cc]overings?.*|.*[Cc]overings?.*[Ff]loor.*'
                })

            l.add_xpath('Cooling', inclusionsXpath, **{'re': '.*[Cc]ooling.*'})

            l.add_xpath('Bath', inclusionsXpath,
                        **{'re': '.*[Ss]ecurity.*[Ss]ystem.*'})

            l.add_xpath('CeilingHeight', inclusionsXpath,
                        **{'re': '.*[Bb]ath.*'})

            l.add_xpath('EnsuiteWallTiling', inclusionsXpath,
                        **{'re': '.*[Tt]ile.*'})

            l.add_xpath(
                'EnsuiteBenchtop', inclusionsXpath, **{
                    're':
                    '.*[Ee]nsuite.*[Bb]enchtop.*|.*[Bb]enchtop.*[Ee]nsuite.*'
                })

            l.add_xpath('EnsuiteShowerbase', inclusionsXpath,
                        **{'re': '.*[Ss]howerbase.*'})

            l.add_xpath(
                'WallPaint', inclusionsXpath,
                **{'re': '.*[Ww]all.*[Pp]aint.*|.*[Pp]aint.*[Ww]all.*'})

            l.add_xpath('WIRFitouts', inclusionsXpath,
                        **{'re': '.*walk in robe.*|.*WIR.*'})

            l.add_xpath('Downlights', inclusionsXpath,
                        **{'re': '.*[Dd]ownlights.*'})

            l.add_xpath('Landscaping', inclusionsXpath,
                        **{'re': '.*[Ll]andscaping.*'})

            l.add_xpath('Driveway', inclusionsXpath,
                        **{'re': '.*[Dd]riveway.*'})

            l.add_xpath('Promotion', inclusionsXpath,
                        **{'re': '.*[Pp]romotion.*'})

            yield l.load_item()
Exemple #4
0
    def parseItem(self, response):
        referer = response.request.headers.get('Referer', None).decode("utf-8")
        hxs = HtmlXPathSelector(response)
        # with open('testURL', 'a') as file:
        #     file.writelines('\n'.join(hxs.xpath('//div[@class="col-md-8"]/table/tbody/tr/td[1]/text()').extract()))
        inclusionsXpath = '''//div[@class="clearfix inclusions-block-inner"]/ul/li/text()'''
        imgXpath = '//input[@class="mfp-images"][{}]/@value'
        descriptionXPath = '//div[@class="admin-content"]/p/text()'

        l = RealtyLoader(RealtyspidersItem(), hxs)
        l.add_value('url', response.url)
        l.add_value('BuildType', self._getBuildType(response.url))
        # l.add_value('BuilderEmailAddress', '*****@*****.**')
        l.add_xpath('HomeDesignMainImage',
                    '//div[@class="imagefill h550"]//img/@src')
        l.add_value('BuilderLogo', self.logo)

        l.add_xpath('DesignName', '/html/body/div[2]/div/section[2]/h1/text()')
        data = response.meta['data']
        if isinstance(data, int):
            l.add_xpath('Storey', str(data))
        elif data:
            l.add_value('Region', data)
        l.add_value('State', self._getState(response.url))

        l.add_xpath('Squares', '//span[@class="img-caption"]//i/text()',
                    **{'re': '(?<=Home [Ss]ize - ).+'})
        l.add_xpath(
            'Bedrooms',
            '//span[@class="facility-list clearfix"]/em[1]//strong/text()')
        l.add_xpath(
            'Bathrooms',
            '//span[@class="facility-list clearfix"]/em[2]//strong/text()')
        l.add_xpath(
            'Garage',
            '//span[@class="facility-list clearfix"]/em[3]//strong/text()')
        l.add_xpath('LandSize', '//span[@class="img-caption"]//i/text()',
                    **{'re': '(?<=Land size - ).+'})
        l.add_xpath('BasePrice', '//span[@class="product-price"]/text()')
        l.add_xpath('Lot_BlockAddress',
                    '//div[@class="admin-content"]/h3/text()')
        l.add_xpath('BrochureImage_pdf', '//a[text()="Download Flyer"]/@href')
        l.add_xpath('InclusionsImage_pdf',
                    '//a[text()="Download Inclusions"]/@href')
        l.add_xpath('OtherInclusions',
                    '//a[text()="Download Floorplan & Options"]/@href')
        l.add_xpath('FloorPlanImage1', '//a[@class="image-lightbox"]/img/@src')
        l.add_xpath('Image1', imgXpath.format('1'))
        l.add_xpath('Image2', imgXpath.format('2'))
        l.add_xpath('Image3', imgXpath.format('3'))
        l.add_xpath('Image4', imgXpath.format('4'))
        l.add_xpath('Image5', imgXpath.format('5'))
        l.add_xpath('Image6', imgXpath.format('6'))
        l.add_xpath('Image7', imgXpath.format('7'))
        l.add_xpath('Image8', imgXpath.format('8'))
        l.add_xpath('Image9', imgXpath.format('9'))
        l.add_xpath('Image10', imgXpath.format('10'))
        l.add_xpath('Image11', imgXpath.format('11'))
        l.add_xpath('Image12', imgXpath.format('12'))
        l.add_xpath('Image13', imgXpath.format('13'))
        l.add_xpath('Image14', imgXpath.format('14'))
        l.add_xpath('Image15', imgXpath.format('15'))
        l.add_xpath('HouseWidth', '//em[@class="width-length"]/i/text()',
                    **{'re': '(?<=Lot [Ww]idth - ).+'})
        l.add_xpath('HouseLength', '//em[@class="width-length"]/i/text()',
                    **{'re': '(?<=Lot [Ll]ength - ).+'})

        # Block Yes No
        l.add_xpath('TheatreRoom_Yes_No', [descriptionXPath, inclusionsXpath])
        l.add_xpath('SeparateMeals_Yes_No',
                    [descriptionXPath, inclusionsXpath])
        l.add_xpath('Alfresco_Yes_No', [descriptionXPath, inclusionsXpath])
        l.add_xpath('Study_Yes_No', [descriptionXPath, inclusionsXpath])
        l.add_xpath('WalkinPantry_Yes_No', [descriptionXPath, inclusionsXpath])
        l.add_xpath('BultersPantry_Yes_No',
                    [descriptionXPath, inclusionsXpath])
        l.add_xpath('SteelStructure_Yes_No',
                    [descriptionXPath, inclusionsXpath])
        l.add_xpath('Balcony_Yes_No', [descriptionXPath, inclusionsXpath])

        # Гарантія
        l.add_xpath('SturturalWarranty', [descriptionXPath, inclusionsXpath],
                    **{'re': '.*guarantee.*|.*[Ww]arranty.*'})
        # Вікна
        l.add_xpath('Windows', [descriptionXPath, inclusionsXpath],
                    **{'re': '.*[Ww]indows?.*'})
        # Кухонна плита
        l.add_xpath(
            'KitchenBenchtop', [descriptionXPath, inclusionsXpath], **{
                're': '.*[Kk]itchen.*[Bb]enchtop.*|.*[Bb]enchtop.*[Kk]itchen.*'
            })
        # Сигналізація
        l.add_xpath(
            'SecuritySystem', [descriptionXPath, inclusionsXpath],
            **{'re': '.*[Ss]ecurity.*[sS]ystem.*}.*[sS]ystem.*[Ss]ecurity.*'})
        # Клас енергозбереження
        l.add_xpath(
            'EnergyRating', [descriptionXPath, inclusionsXpath],
            **{'re': '.*[Ee]nergy.*[rR]ating.*|.*[rR]ating.*[Ee]nergy.*'})
        # Кухонне приладдя
        l.add_xpath(
            'KitchenAppliance', [descriptionXPath, inclusionsXpath], **{
                're':
                '.*([Kk]itchen.*[Aa]ppliance).*|.*([Aa]ppliance.*[Kk]itchen).*'
            })
        # Бренд пристрою
        l.add_xpath('ApplianceBrand', [descriptionXPath, inclusionsXpath],
                    **{'re': '.*[\w\s]+[Ss]ecurity System.*'})
        # Kахель над умивальної раковиною
        l.add_xpath('Splashback', [descriptionXPath, inclusionsXpath],
                    **{'re': '.*[Ss]plashback.*'})
        # Покриття підлоги
        l.add_xpath(
            'FloorCovering', [descriptionXPath, inclusionsXpath], **{
                're': '.*[Ff]loor.*[Cc]overings?.*|.*[Cc]overings?.*[Ff]loor.*'
            })
        # Охолодження
        l.add_xpath('Cooling', [descriptionXPath, inclusionsXpath],
                    **{'re': '.*[Cc]ooling.*'})
        # Ванна
        l.add_xpath('Bath', [descriptionXPath, inclusionsXpath],
                    **{'re': '.*[Ss]ecurity.*[Ss]ystem.*'})
        # Висота стели
        l.add_xpath('CeilingHeight', [descriptionXPath, inclusionsXpath],
                    **{'re': '.*[Bb]ath.*'})
        # Плитка в ванній
        l.add_xpath('EnsuiteWallTiling', descriptionXPath,
                    **{'re': '.*[Tt]ile.*'})
        # Плита в ванній
        l.add_xpath(
            'EnsuiteBenchtop', [descriptionXPath, inclusionsXpath], **{
                're': '.*[Ee]nsuite.*[Bb]enchtop.*|.*[Bb]enchtop.*[Ee]nsuite.*'
            })
        # Душова
        l.add_xpath('EnsuiteShowerbase', [descriptionXPath, inclusionsXpath],
                    **{'re': '.*[Ss]howerbase.*'})
        # Фарба на стінах
        l.add_xpath('WallPaint', [descriptionXPath, inclusionsXpath],
                    **{'re': '.*[Ww]all.*[Pp]aint.*|.*[Pp]aint.*[Ww]all.*'})
        # Гардероб
        l.add_xpath('WIRFitouts', [descriptionXPath, inclusionsXpath],
                    **{'re': '.*walk in robe.*|.*WIR.*'})
        # Світильники
        l.add_xpath('Downlights', [descriptionXPath, inclusionsXpath],
                    **{'re': '.*[Dd]ownlights.*'})
        # Ландшафтний дизайн
        l.add_xpath('Landscaping', [descriptionXPath, inclusionsXpath],
                    **{'re': '.*[Ll]andscaping.*'})
        # Дорожка до дому
        l.add_xpath('Driveway', [descriptionXPath, inclusionsXpath],
                    **{'re': '.*[Dd]riveway.*'})
        # Реклама
        l.add_xpath('Promotion', [descriptionXPath, inclusionsXpath],
                    **{'re': '.*[Pp]romotion.*'})
        # # # інші штуки

        # # l.add_xpath('OtherInclusions1',
        # #             descriptionXPath, **{'re': '[\w\s]+[Ss]ecurity System'})
        # # l.add_xpath('OtherInclusions2',
        # #             descriptionXPath, **{'re': '[\w\s]+[Ss]ecurity System'})
        # # l.add_xpath('OtherInclusions3',
        # #             descriptionXPath, **{'re': '[\w\s]+[Ss]ecurity System'})
        # # l.add_xpath('OtherInclusions4',
        # #             descriptionXPath, **{'re': '[\w\s]+[Ss]ecurity System'})
        # # l.add_xpath('OtherInclusions5',
        # #             descriptionXPath, **{'re': '[\w\s]+[Ss]ecurity System'})
        return l.load_item()
Exemple #5
0
    def parseItem(self, response):
        referer = response.request.headers.get('Referer', None).decode("utf-8")
        hxs = HtmlXPathSelector(response)
        BuildType = referer
        BuildType = self.getBuildType(BuildType)

        l = RealtyLoader(RealtyspidersItem(), hxs)
        l.add_value('BuildType', BuildType)
        l.add_value('State', 'VIC')
        l.add_value('Region', 'MELBOURNE')
        l.add_value('url', response.url)
        if BuildType == 'Home Designs':
            l.add_xpath('DesignName', '//h2[@class="darkblue lowercase"]/text()')
        l.add_xpath('Squares', '//*[@id="details"]/tr/th[text()="Floor Area"]/following-sibling::td/text()')
        l.add_xpath('Bedrooms', '//*[@id="details"]/tr/th[text()="Bedrooms"]/following-sibling::td/text()')
        l.add_xpath('Bathrooms', '//*[@id="details"]/tr/th[text()="Bathrooms"]/following-sibling::td/text()')
        l.add_xpath('Garage', '//*[@id="details"]/tr/th[text()="Garages"]/following-sibling::td/text()')
        l.add_xpath('LandSize', '//*[@id="details"]/tr/th[text()="Land Area"]/following-sibling::td/text()')
        l.add_xpath('Lot_BlockWidth', '//*[@id="details"][2]/tbody/tr/th[text()="Frontage"]/following-sibling::td[2]/text()')
        l.add_xpath('HomeDesignMainImage', '//li[@id="img1"]/a/img/@src', **{'myRefer': 'http://www.frenkenhomes.com.au',
                                                                're':'.*jpg'})
        l.add_xpath('FloorPlanImage1',
                    '//div[@class="property-details-buttons"]/a/span[text()="Floor Plan"]/ancestor::a/@href')
        l.add_xpath('Image1', '//li[@id="img1"]/a/img/@src', **{'myRefer': 'http://www.frenkenhomes.com.au',
                                                                're':'.*jpg'})
        l.add_xpath('Image2', '//li[@id="img2"]/a/img/@src', **{'myRefer': 'http://www.frenkenhomes.com.au',
                                                                're':'.*jpg'})
        l.add_xpath('Image3', '//li[@id="img3"]/a/img/@src', **{'myRefer': 'http://www.frenkenhomes.com.au',
                                                                're':'.*jpg'})
        l.add_xpath('Image4', '//li[@id="img4"]/a/img/@src', **{'myRefer': 'http://www.frenkenhomes.com.au',
                                                                're':'.*jpg'})
        l.add_xpath('Image5', '//li[@id="img5"]/a/img/@src', **{'myRefer': 'http://www.frenkenhomes.com.au',
                                                                're':'.*jpg'})
        l.add_xpath('Image6', '//li[@id="img6"]/a/img/@src', **{'myRefer': 'http://www.frenkenhomes.com.au',
                                                                're':'.*jpg'})
        l.add_xpath('Image7', '//li[@id="img7"]/a/img/@src', **{'myRefer': 'http://www.frenkenhomes.com.au',
                                                                're':'.*jpg'})
        l.add_xpath('Image8', '//li[@id="img8"]/a/img/@src', **{'myRefer': 'http://www.frenkenhomes.com.au',
                                                                're':'.*jpg'})
        l.add_xpath('Image9', '//li[@id="img9"]/a/img/@src', **{'myRefer': 'http://www.frenkenhomes.com.au',
                                                                're':'.*jpg'})
        l.add_xpath('Image10', '//li[@id="img10"]/a/img/@src', **{'myRefer': 'http://www.frenkenhomes.com.au',
                                                                're':'.*jpg'})
        l.add_xpath('Image11', '//li[@id="img11"]/a/img/@src', **{'myRefer': 'http://www.frenkenhomes.com.au',
                                                                're':'.*jpg'})
        l.add_xpath('Image12', '//li[@id="img12"]/a/img/@src', **{'myRefer': 'http://www.frenkenhomes.com.au',
                                                                're':'.*jpg'})
        l.add_xpath('Image13', '//li[@id="img13"]/a/img/@src', **{'myRefer': 'http://www.frenkenhomes.com.au',
                                                                're':'.*jpg'})
        l.add_xpath('Image14', '//li[@id="img14"]/a/img/@src', **{'myRefer': 'http://www.frenkenhomes.com.au',
                                                                're':'.*jpg'})
        l.add_xpath('Image15', '//li[@id="img15"]/a/img/@src', **{'myRefer': 'http://www.frenkenhomes.com.au',
                                                                're':'.*jpg'})
        l.add_value('BuilderLogo', self.logo)
        return l.load_item()
Exemple #6
0
    def parseItem(self, response):
        referer = response.request.headers.get('Referer', None).decode("utf-8")
        Storey = self.getStorey(referer)
        hxs = HtmlXPathSelector(response)
        l = RealtyLoader(RealtyspidersItem(), hxs)
        l.add_value('BuildType', response.meta['BuildType'])
        l.add_value('url', response.url)
        l.add_value('BuilderLogo', self.logo)
        l.add_value('State', 'VIC')
        l.add_value('Region', 'MELBOURNE')
        l.add_value('Storey', Storey)
        try:
            l.add_value('Squares', response.meta['Squares'])
        except KeyError:
            pass
        try:
            l.add_value('HouseWidth', response.meta['HouseWidth'])
        except KeyError:
            pass
        try:
            l.add_value('HouseLength', response.meta['HouseLength'])
        except KeyError:
            pass
        l.add_xpath('DesignName', '//h2[@class="page-title  "]/span/text()',
                    **{'reSub': '^Lot\s*\d+,\s*(St)?(Mt\.)?\s*[\w\s]+,'})
        l.add_xpath('Lot_BlockAddress',
                    '//h2[@class="page-title  "]/span/text()',
                    **{'re': '^Lot\s*\d+,\s*(St)?(Mt\.)?\s*[\w\s]+'})

        l.add_xpath(
            'Bedrooms',
            '''//div[@class="single-property"]/div/span[@class="bed"]/strong/text()'''
        )
        l.add_xpath(
            'Bathrooms',
            '''//div[@class="single-property"]/div/span[@class="bath"]/strong/text()'''
        )
        l.add_xpath(
            'Garage',
            '''//div[@class="single-property"]/div/span[@class="car"]/strong/text()'''
        )
        l.add_xpath(
            'Squares',
            '''//div[@class="single-property"]/div/span[@class="area "]/strong/text()'''
        )
        l.add_xpath('SturturalWarranty', '//div[@id="description"]/p/text()',
                    **{'re': '"?.*[\w\s]+guarantee.*"?'})

        l.add_xpath('TheatreRoom_Yes_No', '//div[@id="description"]/p/text()',
                    **{'re': '[Tt]heatre [Rr]ooms?'})
        l.add_xpath('SeparateMeals_Yes_No',
                    '//div[@id="description"]/p/text()',
                    **{'re': '[Ss]eparate [Mm]eals'})
        l.add_xpath('Alfresco_Yes_No', '//div[@id="description"]/p/text()',
                    **{'re': '[Aa]lfresco'})
        l.add_xpath('Study_Yes_No', '//div[@id="description"]/p/text()',
                    **{'re': '([Ss]tudy)|([Ss}chool)|([Uu]niversity)'})
        l.add_xpath('WalkinPantry_Yes_No', '//div[@id="description"]/p/text()',
                    **{'re': '([Ww]alkin|[Pp]antry)'})
        l.add_xpath('BultersPantry_Yes_No',
                    '//div[@id="description"]/p/text()',
                    **{'re': '[Bb]ulter[`]?s?'})
        l.add_xpath('BultersPantry_Yes_No',
                    '//div[@id="description"]/p/text()',
                    **{'re': '[Bb]ulter[`]?s?'})
        l.add_xpath('SteelStructure_Yes_No',
                    '//div[@id="description"]/p/text()',
                    **{'re': '[Ss]teel [Ss]tructure'})
        l.add_xpath('Balcony_Yes_No', '//div[@id="description"]/p/text()',
                    **{'re': '[Bb]alcony'})

        l.add_xpath('Windows', '//div[@id="description"]/p/text()',
                    **{'re': '"?.*[\w\s]+[Ww]indows?.*"?'})
        l.add_xpath('KitchenBenchtop', '//div[@id="description"]/p/text()',
                    **{'re': '"?.*[\w\s]+[Bb]enchtop.*"?'})
        l.add_xpath('SecuritySystem', '//div[@id="description"]/p/text()',
                    **{'re': '"?.*[\w\s]+[Ss]ecurity System.*"?'})

        l.add_xpath('FloorPlanImage1', '//div[@id="floor-plan"]/a/img/@src')
        l.add_xpath('BrochureImage_pdf',
                    '//a[text()="View property brochure PDF"]/@href')
        l.add_xpath('HomeDesignMainImage',
                    '//div[@id="property-images"]/ul/li[1]/img/@src')
        l.add_xpath('Image1', '//div[@id="property-images"]/ul/li[1]/img/@src')
        l.add_xpath('Image2', '//div[@id="property-images"]/ul/li[2]/img/@src')
        l.add_xpath('Image3', '//div[@id="property-images"]/ul/li[3]/img/@src')
        l.add_xpath('Image4', '//div[@id="property-images"]/ul/li[4]/img/@src')
        l.add_xpath('Image5', '//div[@id="property-images"]/ul/li[5]/img/@src')
        l.add_xpath('Image6', '//div[@id="property-images"]/ul/li[6]/img/@src')
        l.add_xpath('Image7', '//div[@id="property-images"]/ul/li[7]/img/@src')
        l.add_xpath('Image8', '//div[@id="property-images"]/ul/li[8]/img/@src')
        l.add_xpath('Image9', '//div[@id="property-images"]/ul/li[9]/img/@src')
        l.add_xpath('Image10',
                    '//div[@id="property-images"]/ul/li[10]/img/@src')
        l.add_xpath('Image11',
                    '//div[@id="property-images"]/ul/li[11]/img/@src')
        l.add_xpath('Image12',
                    '//div[@id="property-images"]/ul/li[12]/img/@src')
        l.add_xpath('Image13',
                    '//div[@id="property-images"]/ul/li[13]/img/@src')
        l.add_xpath('Image14',
                    '//div[@id="property-images"]/ul/li[14]/img/@src')
        l.add_xpath('Image15',
                    '//div[@id="property-images"]/ul/li[15]/img/@src')

        return l.load_item()
    def parseItem(self, response):
        referer = response.request.headers.get('Referer', None).decode("utf-8")
        hxs = HtmlXPathSelector(response)
        BuildType = self._getBuildType(referer)
        imgXpath = '//div[@class="portfolio-single__main-content"]/p/img[{}]/@src'
        descriptionXPath = '//div[@class="portfolio-single__main-content"]/p[2]/text()'
        l = RealtyLoader(RealtyspidersItem(), hxs)
        l.add_value('url', response.url)
        l.add_value('BuildType',BuildType)
        l.add_value('BuilderLogo', self.logo)
        if BuildType == 'PRESTIGE HOMES':
            l.add_value('State', 'MELBOURNE')
        l.add_xpath('BuilderEmailAddress',
                    '//div[@class="entry-content span5"]/p/strong[text()="Email:"]/following-sibling::a/text()')

        l.add_xpath('DesignName', '//h1[@class="title-header"]/text()')
        l.add_xpath('FloorPlanImage1', '//div[@class="entry-content span5"]/p[1]/a/@href')
        l.add_xpath('HomeDesignMainImage', '//div[@class="portfolio-single__main-content"]/p[1]/img/@src')
        l.add_xpath('Image1', imgXpath.format('1'))
        l.add_xpath('Image2', imgXpath.format('2'))
        l.add_xpath('Image3', imgXpath.format('3'))
        l.add_xpath('Image4', imgXpath.format('4'))
        l.add_xpath('Image5', imgXpath.format('5'))
        l.add_xpath('Image6', imgXpath.format('6'))
        l.add_xpath('Image7', imgXpath.format('7'))
        l.add_xpath('Image8', imgXpath.format('8'))
        l.add_xpath('Image9', imgXpath.format('9'))
        l.add_xpath('Image10', imgXpath.format('10'))
        l.add_xpath('Image11', imgXpath.format('11'))
        l.add_xpath('Image12', imgXpath.format('12'))
        l.add_xpath('Image13', imgXpath.format('13'))
        l.add_xpath('Image14', imgXpath.format('14'))
        l.add_xpath('Image15', imgXpath.format('15'))
        l.add_xpath('HomeDesignMainImage', '//div[@class="portfolio-single__main-content"]/p[2]/text()')

        # Block Yes No
        l.add_xpath('TheatreRoom_Yes_No',
                    descriptionXPath, **{'re': '([Tt]heatre.*[Rr]ooms?)|([Rr]ooms?.*[Tt]heatre)'})
        l.add_xpath('SeparateMeals_Yes_No',
                    descriptionXPath, **{'re': '([Ss]eparate.*[Mm]eals)|([Mm]eals.*[Ss]eparate)'})
        l.add_xpath('Alfresco_Yes_No',
                    descriptionXPath, **{'re': '[Aa]lfresco'})
        l.add_xpath('Study_Yes_No',
                    descriptionXPath, **{'re': '([Ss]tudy)|([Ss}chool)|([Uu]niversity)'})
        l.add_xpath('WalkinPantry_Yes_No',
                    descriptionXPath, **{'re': '([Ww]alkin|[Pp]antry)'})
        l.add_xpath('BultersPantry_Yes_No',
                    descriptionXPath, **{'re': '[Bb]ulter[`]?s?'})
        l.add_xpath('BultersPantry_Yes_No',
                    descriptionXPath, **{'re': '[Bb]ulter[`]?s?'})
        l.add_xpath('SteelStructure_Yes_No',
                    descriptionXPath, **{'re': '([Ss]teel.*[Ss]tructure)|([Ss]tructure.*[Ss]teel)'})
        l.add_xpath('Balcony_Yes_No',
                    descriptionXPath, **{'re': '[Bb]alcony'})


        return l.load_item()
Exemple #8
0
    def parseItem(self, response):
        referer = response.request.headers.get('Referer', None).decode("utf-8")
        Region = self.getParams(response.url)
        Referer = str(response.request.headers.get('Referer', None))
        hxs = HtmlXPathSelector(response)
        l = RealtyLoader(RealtyspidersItem(), hxs)
        l.add_value('BuildType', self.getBuildType(Referer))
        l.add_value('url', response.url)
        l.add_value('BuilderLogo', self.logo)
        l.add_value('Region', Region)

        l.add_value('Storey', self._getSrorey(referer))

        l.add_xpath(
            'State',
            '''//div[@class="dimensions-wrapper clearfix border-top"]/div/
                                         p[text()="Region"]/following-sibling::p/text()'''
        )
        l.add_xpath(
            'DesignName',
            '''//ul[@class="normalize-ul design-list"]/li/a[@style="background-color: #e36420;"]/text()'''
        )
        l.add_xpath(
            'BasePrice',
            '''//div[@class="dimensions-wrapper clearfix border-top"]/div/
                                         p[text()="Price"]/following-sibling::p/text()'''
        )
        l.add_xpath(
            'Squares',
            '''//div[@class="dimensions-wrapper clearfix border-top"]/div/
                                 p[text()="House Size"]/following-sibling::p/text()'''
        )
        l.add_xpath(
            'HouseWidth',
            '''//div[@class="dimensions-wrapper clearfix border-top"]/div/
                                         p[text()="House Width"]/following-sibling::p/text()'''
        )
        l.add_xpath(
            'HouseLength',
            '''//div[@class="dimensions-wrapper clearfix border-top"]/div/
                                         p[text()="House Length"]/following-sibling::p/text()'''
        )
        l.add_xpath(
            'Bedrooms',
            '''//div[@class="icon-wrapper clearfix"]/div/img[@alt="bed-gray"]/following-sibling::span/text()'''
        )
        l.add_xpath(
            'Bathrooms',
            '''//div[@class="icon-wrapper clearfix"]/div/img[@alt="bathtub-gray"]/following-sibling::span/text()'''
        )
        l.add_xpath(
            'Garage',
            '''//div[@class="icon-wrapper clearfix"]/div/img[@alt="car-gray"]/following-sibling::span/text()'''
        )
        l.add_xpath('SturturalWarranty', '//div[@class="one-halve"]/ul/li',
                    **{'re': '[\w\s]+guarantee'})
        l.add_xpath('TheatreRoom_Yes_No', '//div[@class="one-halve"]/ul/li',
                    **{'re': '[Tt]heatre [Rr]ooms?'})
        l.add_xpath('SeparateMeals_Yes_No', '//div[@class="one-halve"]/ul/li',
                    **{'re': '[Ss]eparate [Mm]eals'})
        l.add_xpath('Alfresco_Yes_No', '//div[@class="one-halve"]/ul/li',
                    **{'re': '[Aa]lfresco'})
        l.add_xpath('Study_Yes_No', '//div[@class="one-halve"]/ul/li',
                    **{'re': '([Ss]tudy)|([Ss}chool)|([Uu]niversity)'})
        l.add_xpath('WalkinPantry_Yes_No', '//div[@class="one-halve"]/ul/li',
                    **{'re': '([Ww]alkin|[Pp]antry)'})
        l.add_xpath('BultersPantry_Yes_No', '//div[@class="one-halve"]/ul/li',
                    **{'re': '[Bb]ulter[`]?s?'})
        l.add_xpath('BultersPantry_Yes_No', '//div[@class="one-halve"]/ul/li',
                    **{'re': '[Bb]ulter[`]?s?'})
        l.add_xpath('SteelStructure_Yes_No', '//div[@class="one-halve"]/ul/li',
                    **{'re': '[Ss]teel [Ss]tructure'})
        l.add_xpath('Balcony_Yes_No', '//div[@class="one-halve"]/ul/li',
                    **{'re': '[Bb]alcony'})
        l.add_xpath('Windows', '//div[@class="one-halve"]/ul/li',
                    **{'re': '[\w\s]+[Ww]indows?'})
        l.add_xpath('KitchenBenchtop', '//div[@class="one-halve"]/ul/li',
                    **{'re': '[\w\s]+[Bb]enchtop'})
        l.add_xpath('SecuritySystem', '//div[@class="one-halve"]/ul/li',
                    **{'re': '[\w\s]+[Ss]ecurity System'})
        l.add_xpath(
            'BuilderEmailAddress',
            '//div[@class="tablet desktop editable"]/table/tbody/tr/th[text()="Email"]/following-sibling::td/text()'
        )
        l.add_xpath('FloorPlanImage1',
                    '//div[@class="floor-plans-wrapper"]/div/a/img/@src',
                    **{'myRefer': referer})
        l.add_xpath('BrochureImage_pdf',
                    '//a[text()="Download Brochure"]/@href')
        l.add_xpath('InclusionsImage_pdf',
                    '//a[text()="View Our Standard Inclusions List"]/@href',
                    **{'myRefer': 'http://nostrahomes.com.au/'})
        l.add_xpath('Image1', '//*[@id="top-image"]/img/@src',
                    **{'myRefer': referer})
        l.add_xpath('Image1',
                    '//ul[@class="slides normalize-ul"]/li[1]/img/@src',
                    **{'myRefer': referer})
        l.add_xpath('Image2',
                    '//ul[@class="slides normalize-ul"]/li[2]/img/@src',
                    **{'myRefer': referer})
        l.add_xpath('Image3',
                    '//ul[@class="slides normalize-ul"]/li[3]/img/@src',
                    **{'myRefer': referer})
        l.add_xpath('Image4',
                    '//ul[@class="slides normalize-ul"]/li[4]/img/@src',
                    **{'myRefer': referer})
        l.add_xpath('Image5',
                    '//ul[@class="slides normalize-ul"]/li[5]/img/@src',
                    **{'myRefer': referer})
        l.add_xpath('Image6',
                    '//ul[@class="slides normalize-ul"]/li[6]/img/@src',
                    **{'myRefer': referer})
        l.add_xpath('Image7',
                    '//ul[@class="slides normalize-ul"]/li[7]/img/@src',
                    **{'myRefer': referer})
        l.add_xpath('Image8',
                    '//ul[@class="slides normalize-ul"]/li[8]/img/@src',
                    **{'myRefer': referer})
        l.add_xpath('Image9',
                    '//ul[@class="slides normalize-ul"]/li[9]/img/@src',
                    **{'myRefer': referer})
        l.add_xpath('Image10',
                    '//ul[@class="slides normalize-ul"]/li[10]/img/@src',
                    **{'myRefer': referer})
        l.add_xpath('Image11',
                    '//ul[@class="slides normalize-ul"]/li[11]/img/@src',
                    **{'myRefer': referer})
        l.add_xpath('Image12',
                    '//ul[@class="slides normalize-ul"]/li[12]/img/@src',
                    **{'myRefer': referer})
        l.add_xpath('Image13',
                    '//ul[@class="slides normalize-ul"]/li[13]/img/@src',
                    **{'myRefer': referer})
        l.add_xpath('Image14',
                    '//ul[@class="slides normalize-ul"]/li[14]/img/@src',
                    **{'myRefer': referer})
        l.add_xpath('Image15',
                    '//ul[@class="slides normalize-ul"]/li[15]/img/@src',
                    **{'myRefer': referer})

        return l.load_item()
 def parseList(self, response):
     referer = response.request.headers.get('Referer', None).decode("utf-8")
     hxs = HtmlXPathSelector(response)
     hxsItemsList = hxs.select(
         '//div[@id="itemListPrimary"]/div[@class="itemContainer itemContainerLast"]'
     )
     for hxsItems in hxsItemsList:
         l = RealtyLoader(RealtyspidersItem(), hxsItems)
         l.add_value('BuildType', self._getBuildType(response.url))
         l.add_value('BuilderEmailAddress', '*****@*****.**')
         l.add_value('BuilderLogo', self.logo)
         l.add_value('url', response.url)
         l.add_xpath('DesignName',
                     './/div[@class="packages-cat-title"]/text()',
                     **{'re': '.*-'})
         l.add_xpath('Squares',
                     './/div[@class="packages-cat-title"]/text()',
                     **{'re': '\d+\ssq'})
         l.add_xpath(
             'Region',
             './/div[@class="packages-cat-middle"]/div[@class="estate"]/text()',
             **{'re': ',.*'})
         l.add_xpath(
             'DisplayLocation',
             './/div[@class="packages-cat-middle"]/div[@class="estate"]/text()',
             **{'re': '.*,'})
         l.add_xpath(
             'LandSize',
             './/div[@class="packages-cat-middle"]/div[@class="sq"]/text()')
         l.add_xpath(
             'Bedrooms',
             './/div[@class="packages-cat-middle"]/div[@class="bed"]/text()'
         )
         l.add_xpath(
             'Bathrooms',
             './/div[@class="packages-cat-middle"]/div[@class="bath"]/text()'
         )
         l.add_xpath(
             'Garage',
             './/div[@class="packages-cat-middle"]/div[@class="car"]/text()'
         )
         l.add_xpath('BrochureImage_pdf',
                     './/div[@class="brochure"]/a/@href',
                     **{'myRefer': referer})
         l.add_xpath('BasePrice',
                     '''.//div[@class="price"]/strong/text()''')
         l.add_xpath('HomeDesignMainImage',
                     './/div[@class="packages-cat-left"]/img/@src',
                     **{'myRefer': self.start_urls[0]})
         yield l.load_item()
    def parseItem(self, response):
        referer = response.request.headers.get('Referer', None).decode("utf-8")
        hxs = HtmlXPathSelector(response)
        inclusionsXpath = '''//h2[text()="Package Inclusions"]/following-sibling::div//li/text()'''
        imgXpath = '//div[@class="cycle-slideshow"]/img[{}]/@src'
        descriptionXPath = '''//div[@class="col-sm-4 col-hd-house-dimensions hd-house-dimensions"]
                              //tr/td[text()="{}"]/following-sibling::td/text()'''
        idPage = hxs.xpath(
            '//a[text()="Download info pack"]/@data-home-id').extract_first()
        BuildType = self._getBuildType(referer)
        other = []
        for name in self.oth:
            size = hxs.xpath(descriptionXPath.format(name)).extract_first()
            if size:
                other.append('{}:{}'.format(name, size))

        l = RealtyLoader(RealtyspidersItem(), hxs)
        l.add_value('url', response.url)
        l.add_value('BuildType', BuildType)
        # l.add_value('BuilderEmailAddress', '*****@*****.**')
        l.add_xpath('HomeDesignMainImage', imgXpath.format('1'))
        l.add_value('BuilderLogo', self.logo)

        l.add_xpath('DesignName', [
            '/html/body/div[3]/div/div[1]/div/div[1]/h1/text()',
            '/html/body/div[3]/div/div[1]/h1/text()'
        ])
        if BuildType.find('Double'):
            l.add_value('Storey', '2')
        else:
            l.add_value('Storey', '1')
        # l.add_xpath('Region', '/html/body/div[3]/div/div[1]/div/div[1]/h3/text()')
        l.add_xpath('Region', descriptionXPath.format('Region'))
        #
        l.add_xpath('Bedrooms',
                    '//span[@class="hh-icon-beds"]/ancestor::li/text()')
        l.add_xpath('Bathrooms',
                    '//span[@class="hh-icon-baths"]/ancestor::li/text()')
        l.add_xpath('Garage',
                    '//span[@class="hh-icon-car"]/ancestor::li/text()')
        l.add_xpath('LivingArea',
                    '//span[@class="hh-icon-living"]/ancestor::li/text()')
        l.add_xpath('BasePrice', [
            '/html/body/div[3]/div/div[1]/div/div[1]/h2/text()',
            '/html/body/div[3]/div/div[1]/h2/text()'
        ])
        l.add_xpath(
            'FloorPlanImage1',
            '//div[@class="js-fp-panzoom js-fp-panzoom-reset"]/img/@src')
        if idPage:
            l.add_value('BrochureImage_pdf',
                        '{}{}'.format(self.pdfUrl, idPage))
        l.add_xpath('Image1', imgXpath.format('1'))
        l.add_xpath('Image2', imgXpath.format('2'))
        l.add_xpath('Image3', imgXpath.format('3'))
        l.add_xpath('Image4', imgXpath.format('4'))
        l.add_xpath('Image5', imgXpath.format('5'))
        l.add_xpath('Image6', imgXpath.format('6'))
        l.add_xpath('Image7', imgXpath.format('7'))
        l.add_xpath('Image8', imgXpath.format('8'))
        l.add_xpath('Image9', imgXpath.format('9'))
        l.add_xpath('Image10', imgXpath.format('10'))
        l.add_xpath('Image11', imgXpath.format('11'))
        l.add_xpath('Image12', imgXpath.format('12'))
        l.add_xpath('Image13', imgXpath.format('13'))
        l.add_xpath('Image14', imgXpath.format('14'))
        l.add_xpath('Image15', imgXpath.format('15'))

        l.add_xpath('MasterBedroomDimension',
                    descriptionXPath.format('Master Bedroom'))
        l.add_xpath('Bedroom2Dimension', descriptionXPath.format('Bedroom 2'))
        l.add_xpath('Bedroom3Dimension', descriptionXPath.format('Bedroom 3'))
        l.add_xpath('Bedroom4Dimension', [
            descriptionXPath.format('Bedroom 4'),
            descriptionXPath.format('Study/Bedroom 4')
        ])
        l.add_xpath('StudyDimension', [
            descriptionXPath.format('Study/Bedroom 4'),
            descriptionXPath.format('Study')
        ])
        l.add_xpath('Meals_DiningDimension', [
            descriptionXPath.format('Meals'),
            descriptionXPath.format('Family/Meals')
        ])
        l.add_xpath('FamilyDimension', [
            descriptionXPath.format('Meals'),
            descriptionXPath.format('Family/Meals')
        ])
        l.add_xpath('TheatreDimension', [
            descriptionXPath.format('Media Room'),
            descriptionXPath.format('Media')
        ])
        l.add_xpath('AlfrescoDimension', descriptionXPath.format('Alfresco'))
        l.add_xpath('HouseWidth', descriptionXPath.format('Min block width'))
        l.add_xpath('GarageDimension', descriptionXPath.format('Garage'))
        l.add_xpath('KitchenDimension', descriptionXPath.format('Kitchen'))
        l.add_xpath('Squares', descriptionXPath.format('Floor Area sqm'))
        l.add_xpath('LandSize', descriptionXPath.format('Land Size sqm'))

        # # Block Yes No
        l.add_xpath('TheatreRoom_Yes_No', [
            descriptionXPath.format('Media Room'),
            descriptionXPath.format('Media')
        ])
        l.add_xpath('Alfresco_Yes_No', [
            descriptionXPath.format('Alfresco'),
            descriptionXPath.format('Second Alfresco')
        ])
        l.add_xpath('Study_Yes_No', [
            descriptionXPath.format('Study/Bedroom 4'),
            descriptionXPath.format('Study')
        ])
        l.add_value('OtherInclusions', ', '.join(other))

        # Гарантія
        l.add_xpath('SturturalWarranty', inclusionsXpath,
                    **{'re': '.*guarantee.*|.*[Ww]arranty.*'})
        # Вікна
        l.add_xpath('Windows', inclusionsXpath, **{'re': '.*[Ww]indows?.*'})
        # Кухонна плита
        l.add_xpath(
            'KitchenBenchtop', inclusionsXpath, **{
                're': '.*[Kk]itchen.*[Bb]enchtop.*|.*[Bb]enchtop.*[Kk]itchen.*'
            })
        # Сигналізація
        l.add_xpath(
            'SecuritySystem', inclusionsXpath,
            **{'re': '.*[Ss]ecurity.*[sS]ystem.*}.*[sS]ystem.*[Ss]ecurity.*'})
        # Клас енергозбереження
        l.add_xpath(
            'EnergyRating', inclusionsXpath,
            **{'re': '.*[Ee]nergy.*[rR]ating.*|.*[rR]ating.*[Ee]nergy.*'})
        # Кухонне приладдя
        l.add_xpath(
            'KitchenAppliance', inclusionsXpath, **{
                're':
                '.*([Kk]itchen.*[Aa]ppliance).*|.*([Aa]ppliance.*[Kk]itchen).*'
            })
        # Бренд пристрою
        l.add_xpath('ApplianceBrand', inclusionsXpath,
                    **{'re': '.*[\w\s]+[Ss]ecurity System.*'})
        # Kахель над умивальної раковиною
        l.add_xpath('Splashback', inclusionsXpath,
                    **{'re': '.*[Ss]plashback.*'})
        # Покриття підлоги
        l.add_xpath(
            'FloorCovering', inclusionsXpath, **{
                're': '.*[Ff]loor.*[Cc]overings?.*|.*[Cc]overings?.*[Ff]loor.*'
            })
        # Охолодження
        l.add_xpath('Cooling', inclusionsXpath, **{'re': '.*[Cc]ooling.*'})
        # Ванна
        l.add_xpath('Bath', inclusionsXpath,
                    **{'re': '.*[Ss]ecurity.*[Ss]ystem.*'})
        # Висота стели
        l.add_xpath('CeilingHeight', inclusionsXpath, **{'re': '.*[Bb]ath.*'})
        # Плитка в ванній
        l.add_xpath('EnsuiteWallTiling', descriptionXPath,
                    **{'re': '.*[Tt]ile.*'})
        # Плита в ванній
        l.add_xpath(
            'EnsuiteBenchtop', inclusionsXpath, **{
                're': '.*[Ee]nsuite.*[Bb]enchtop.*|.*[Bb]enchtop.*[Ee]nsuite.*'
            })
        # Душова
        l.add_xpath('EnsuiteShowerbase', inclusionsXpath,
                    **{'re': '.*[Ss]howerbase.*'})
        # Фарба на стінах
        l.add_xpath('WallPaint', inclusionsXpath,
                    **{'re': '.*[Ww]all.*[Pp]aint.*|.*[Pp]aint.*[Ww]all.*'})
        # Гардероб
        l.add_xpath('WIRFitouts', inclusionsXpath,
                    **{'re': '.*walk in robe.*|.*WIR.*'})
        # Світильники
        l.add_xpath('Downlights', inclusionsXpath,
                    **{'re': '.*[Dd]ownlights.*'})
        # Ландшафтний дизайн
        l.add_xpath('Landscaping', inclusionsXpath,
                    **{'re': '.*[Ll]andscaping.*'})
        # Дорожка до дому
        l.add_xpath('Driveway', inclusionsXpath, **{'re': '.*[Dd]riveway.*'})
        # Реклама
        l.add_xpath('Promotion', inclusionsXpath, **{'re': '.*[Pp]romotion.*'})

        return l.load_item()
Exemple #11
0
    def parseItem(self, response):
        referer = response.request.headers.get('Referer', None).decode("utf-8")
        hxs = HtmlXPathSelector(response)
        BuildType = self._getBuildType(referer)
        imgXpath = '//a[@class="proPhotoThumbLink"]/img[{}]/@src'
        descriptionXPath = '//div[@id="listing_options"]/ul/li/text()'
        l = RealtyLoader(RealtyspidersItem(), hxs)
        l.add_value('url', response.url)
        l.add_value('BuildType', BuildType)
        l.add_value('BuilderLogo', self.logo)
        # if BuildType == 'PRESTIGE HOMES':
        #     l.add_value('State', 'MELBOURNE')
        # l.add_xpath('BuilderEmailAddress',
        #             '//div[@class="entry-content span5"]/p/strong[text()="Email:"]/following-sibling::a/text()')
        #
        l.add_xpath('DesignName', '//div[@id="listing_options"]/h4/text()')

        l.add_xpath('Bedrooms', '//div[@id="listing_options"]/text()',
                    **{'re': '\d(?=\sBeds)'})
        l.add_xpath('Bathrooms', '//div[@id="listing_options"]/text()',
                    **{'re': '\d(?=\sBaths)'})
        l.add_xpath('Lot_BlockWidth', '//div[@id="listing_options"]/text()',
                    **{'re': '(?<=Ideal block width = )[\w\.\s]+'})

        l.add_xpath('LivingArea', descriptionXPath,
                    **{'re': '(?<=Living Area - )[\w\.\s]+'})
        l.add_xpath('Squares', descriptionXPath,
                    **{'re': '(?<=Total Area - )[\w\.\s]+'})
        l.add_xpath('GarageDimension', descriptionXPath,
                    **{'re': '(?<=Garage Area - )[\w\.\s]+'})
        l.add_xpath('AlfrescoDimension', descriptionXPath,
                    **{'re': '(?<=Alfresco Area - )[\w\.\s]+'})

        l.add_xpath(
            'FloorPlanImage1',
            '//div[@id="listing_text"]/h4/a[text()="Download floor plan"]/@href',
            **{'myRefer': self.start_urls[0]})
        l.add_xpath(
            'BrochureImage_pdf',
            '//div[@id="listing_text"]/h4/a[text()="View the Specification"]/@href',
            **{'myRefer': self.start_urls[0]})
        l.add_xpath('HomeDesignMainImage',
                    '//div[@class="mainImageTarget"]/img/@src',
                    **{'myRefer': self.start_urls[0]})
        l.add_xpath('Image1', imgXpath.format('1'),
                    **{'myRefer': self.start_urls[0]})
        l.add_xpath('Image2', imgXpath.format('2'),
                    **{'myRefer': self.start_urls[0]})
        l.add_xpath('Image3', imgXpath.format('3'),
                    **{'myRefer': self.start_urls[0]})
        l.add_xpath('Image4', imgXpath.format('4'),
                    **{'myRefer': self.start_urls[0]})
        l.add_xpath('Image5', imgXpath.format('5'),
                    **{'myRefer': self.start_urls[0]})
        l.add_xpath('Image6', imgXpath.format('6'),
                    **{'myRefer': self.start_urls[0]})
        l.add_xpath('Image7', imgXpath.format('7'),
                    **{'myRefer': self.start_urls[0]})
        l.add_xpath('Image8', imgXpath.format('8'),
                    **{'myRefer': self.start_urls[0]})
        l.add_xpath('Image9', imgXpath.format('9'),
                    **{'myRefer': self.start_urls[0]})
        l.add_xpath('Image10', imgXpath.format('10'),
                    **{'myRefer': self.start_urls[0]})
        l.add_xpath('Image11', imgXpath.format('11'),
                    **{'myRefer': self.start_urls[0]})
        l.add_xpath('Image12', imgXpath.format('12'),
                    **{'myRefer': self.start_urls[0]})
        l.add_xpath('Image13', imgXpath.format('13'),
                    **{'myRefer': self.start_urls[0]})
        l.add_xpath('Image14', imgXpath.format('14'),
                    **{'myRefer': self.start_urls[0]})
        l.add_xpath('Image15', imgXpath.format('15'),
                    **{'myRefer': self.start_urls[0]})
        #
        # Block Yes No
        l.add_value('TheatreRoom_Yes_No', self.getFeatures(response.url, '35'))
        # l.add_xpath('SeparateMeals_Yes_No',
        #             descriptionXPath, **{'re': '([Ss]eparate.*[Mm]eals)|([Mm]eals.*[Ss]eparate)'})
        l.add_value('Alfresco_Yes_No', self.getFeatures(response.url, '25'))
        l.add_value('Study_Yes_No', self.getFeatures(response.url, '36'))
        # l.add_xpath('WalkinPantry_Yes_No',
        #             descriptionXPath, **{'re': '([Ww]alkin|[Pp]antry)'})
        # l.add_xpath('BultersPantry_Yes_No',
        #             descriptionXPath, **{'re': '[Bb]ulter[`]?s?'})
        # l.add_xpath('BultersPantry_Yes_No',
        #             descriptionXPath, **{'re': '[Bb]ulter[`]?s?'})
        # l.add_xpath('SteelStructure_Yes_No',
        #             descriptionXPath, **{'re': '([Ss]teel.*[Ss]tructure)|([Ss]tructure.*[Ss]teel)'})
        # l.add_xpath('Balcony_Yes_No',
        #             descriptionXPath, **{'re': '[Bb]alcony'})

        return l.load_item()
Exemple #12
0
    def parseItem(self, response):
        referer = response.request.headers.get('Referer', None).decode("utf-8")
        hxs = HtmlXPathSelector(response)
        # all = hxs.select('//div[@class="property-info-agent clear"]/span/strong/text()').extract()
        # with open('testURL', 'a') as file:
        #     for l in all:
        #         file.writelines(l+'\n')
        roomsXpath = '''//div[@class="property-info-agent clear"]/span/strong[text()="{}"]/ancestor::span/text()'''
        Bedrooms = hxs.xpath(roomsXpath.format('Bedrooms:')).extract()
        Bathrooms = hxs.xpath(roomsXpath.format('Bathrooms:')).extract()
        Garage = hxs.xpath(roomsXpath.format('Car Spaces:')).extract()
        HouseWidth = hxs.xpath(roomsXpath.format('Overall Width:')).extract()
        GarageDimension = hxs.xpath(roomsXpath.format('Garage:')).extract()
        AlfrescoDimension = hxs.xpath(roomsXpath.format('Alfresco:')).extract()
        Alfresco_Yes_No = hxs.xpath(roomsXpath.format('Alfresco:')).extract()
        Squares = hxs.xpath(roomsXpath.format('Total:')).extract()
        Storey = hxs.xpath(roomsXpath.format('First Floor Living:')).extract()

        # overviewXpath = '''//table[@id="hf-property-overview"]/tr/td/div[text()="{}"]/ancestor::td/following-sibling::
        #                     td[@class="item-value"]/div/div[@class="field-value"]/text()'''
        # imgXpath = '//div[@class=" flexslider_gallery image hf-property-gallery"]/div/ul/li[{}]/img/@src'
        descriptionXPath = '//div[@id="0"]//li/text()'

        l = RealtyLoader(RealtyspidersItem(), hxs)
        l.add_value('url', response.url)
        l.add_value('BuildType', 'HOME DESIGNS')
        # l.add_value('BuilderEmailAddress', '*****@*****.**')
        #
        # try:
        #     l.add_value('HomeDesignMainImage', self.itemsList[response.url])
        # except KeyError:
        #     pass
        l.add_value('BuilderLogo', self.logo)

        l.add_xpath('DesignName', [
            '//section[@class="page-title-block header-bg"]/div/h2/text()',
            '//section[@class="page-title-block-default header-bg"]/div/h2/text()'
        ])
        # if response.url.find('/lot') == -1:
        # else:
        #     l.add_xpath('DesignName', overviewXpath.format('Home Design'))
        #     l.add_xpath('Region', '//h1[@class="property-detail-title"]/text()', **{'re': ',.*$'})
        #     l.add_value('State', 'MELBOURNE')

        # l.add_xpath('Squares', overviewXpath.format('Area'))
        l.add_value('Bedrooms', self._stripJoin(Bedrooms))
        l.add_value('Bathrooms', self._stripJoin(Bathrooms))
        l.add_value('Garage', self._stripJoin(Garage))
        if Storey:
            l.add_value('Storey', '2')
        else:
            l.add_value('Storey', '1')

        l.add_value('HouseWidth', self._stripJoin(HouseWidth))
        l.add_value('GarageDimension', self._stripJoin(GarageDimension))
        l.add_value('AlfrescoDimension', self._stripJoin(AlfrescoDimension))
        l.add_value('Alfresco_Yes_No', self._stripJoin(Alfresco_Yes_No))
        l.add_value('Squares', self._stripJoin(Squares))
        # l.add_xpath('LandSize', overviewXpath.format('Land Size'))
        # l.add_xpath('BasePrice', '//*[@id="main-content"]/div/div[1]/div/div/div[2]/div/div[2]/text()')
        l.add_xpath('BrochureImage_pdf', '//div[@id="0"]//a/@href')
        # l.add_xpath('InclusionsImage_pdf', '//a[text()="Specifications and Inclusions"]/@href')
        l.add_xpath('FloorPlanImage1', '//div[@id="1"]/img/@src')
        l.add_xpath('HomeDesignMainImage', '//ul[@class="slides"]//a/@href')
        # l.add_xpath('Image1', imgXpath.format('2'))
        # l.add_xpath('Image2', imgXpath.format('3'))
        # l.add_xpath('Image3', imgXpath.format('4'))
        # l.add_xpath('Image4', imgXpath.format('5'))
        # l.add_xpath('Image5', imgXpath.format('6'))
        # l.add_xpath('Image6', imgXpath.format('7'))
        # l.add_xpath('Image7', imgXpath.format('8'))
        # l.add_xpath('Image8', imgXpath.format('9'))
        # l.add_xpath('Image9', imgXpath.format('10'))
        # l.add_xpath('Image10', imgXpath.format('11'))
        # l.add_xpath('Image11', imgXpath.format('12'))
        # l.add_xpath('Image12', imgXpath.format('13'))
        # l.add_xpath('Image13', imgXpath.format('14'))
        # l.add_xpath('Image14', imgXpath.format('15'))
        # l.add_xpath('Image15', imgXpath.format('16'))
        #
        # l.add_xpath('MasterBedroomDimension', roomsXpath.format('Master Bedroom'))
        # l.add_xpath('Bedroom2Dimension', roomsXpath.format('Bedroom 2'))
        # l.add_xpath('Bedroom3Dimension', roomsXpath.format('Bedroom 3'))
        # l.add_xpath('Bedroom4Dimension', roomsXpath.format('Bedroom 4'))
        # l.add_xpath('StudyDimension', roomsXpath.format('Study'))
        # l.add_xpath('Meals_DiningDimension', roomsXpath.format('Meals'))
        # l.add_xpath('FamilyDimension', roomsXpath.format('Family'))
        # l.add_xpath('AlfrescoDimension', roomsXpath.format('Alfresco'))
        # l.add_xpath('HouseWidth', roomsXpath.format('Overall Width'))
        # l.add_xpath('HouseLength', roomsXpath.format('Overall Length'))
        #
        # Block Yes No
        l.add_xpath('WalkinPantry_Yes_No', descriptionXPath,
                    **{'re': '([Ww]alkin|[Pp]antry)'})
        l.add_xpath('BultersPantry_Yes_No', descriptionXPath,
                    **{'re': '[Bb]ulter[`]?s?'})
        l.add_xpath(
            'SteelStructure_Yes_No', descriptionXPath,
            **{'re': '([Ss]teel.*[Ss]tructure)|([Ss]tructure.*[Ss]teel)'})
        l.add_xpath('Balcony_Yes_No', roomsXpath.format('Balcony'))
        #
        # Гарантія
        l.add_xpath('SturturalWarranty', descriptionXPath,
                    **{'re': '.*guarantee.*|.*[Ww]arranty.*'})
        # Вікна
        l.add_xpath('Windows', descriptionXPath, **{'re': '.*[Ww]indows?.*'})
        # Кухонна плита
        l.add_xpath(
            'KitchenBenchtop', descriptionXPath, **{
                're': '.*[Kk]itchen.*[Bb]enchtop.*|.*[Bb]enchtop.*[Kk]itchen.*'
            })
        # Сигналізація
        l.add_xpath(
            'SecuritySystem', descriptionXPath,
            **{'re': '.*[Ss]ecurity.*[sS]ystem.*}.*[sS]ystem.*[Ss]ecurity.*'})
        # Клас енергозбереження
        l.add_xpath(
            'EnergyRating', descriptionXPath,
            **{'re': '.*[Ee]nergy.*[rR]ating.*|.*[rR]ating.*[Ee]nergy.*'})
        # Кухонне приладдя
        l.add_xpath(
            'KitchenAppliance', descriptionXPath, **{
                're':
                '.*([Kk]itchen.*[Aa]ppliance).*|.*([Aa]ppliance.*[Kk]itchen).*'
            })
        # Бренд пристрою
        l.add_xpath('ApplianceBrand', descriptionXPath,
                    **{'re': '.*[\w\s]+[Ss]ecurity System.*'})
        # Kахель над умивальної раковиною
        l.add_xpath('Splashback', descriptionXPath,
                    **{'re': '.*[Ss]plashback.*'})
        # Покриття підлоги
        l.add_xpath(
            'FloorCovering', descriptionXPath, **{
                're': '.*[Ff]loor.*[Cc]overings?.*|.*[Cc]overings?.*[Ff]loor.*'
            })
        # Охолодження
        l.add_xpath('Cooling', descriptionXPath, **{'re': '.*[Cc]ooling.*'})
        # Ванна
        l.add_xpath('Bath', descriptionXPath,
                    **{'re': '.*[Ss]ecurity.*[Ss]ystem.*'})
        # Висота стели
        l.add_xpath('CeilingHeight', descriptionXPath, **{'re': '.*[Bb]ath.*'})
        # Плитка в ванній
        l.add_xpath('EnsuiteWallTiling', descriptionXPath,
                    **{'re': '.*[Tt]ile.*'})
        # Плита в ванній
        l.add_xpath(
            'EnsuiteBenchtop', descriptionXPath, **{
                're': '.*[Ee]nsuite.*[Bb]enchtop.*|.*[Bb]enchtop.*[Ee]nsuite.*'
            })
        # Душова
        l.add_xpath('EnsuiteShowerbase', descriptionXPath,
                    **{'re': '.*[Ss]howerbase.*'})
        # Фарба на стінах
        l.add_xpath('WallPaint', descriptionXPath,
                    **{'re': '.*[Ww]all.*[Pp]aint.*|.*[Pp]aint.*[Ww]all.*'})
        # Гардероб
        l.add_xpath('WIRFitouts', descriptionXPath,
                    **{'re': '.*walk in robe.*|.*WIR.*'})
        # Світильники
        l.add_xpath('Downlights', descriptionXPath,
                    **{'re': '.*[Dd]ownlights.*'})
        # Ландшафтний дизайн
        l.add_xpath('Landscaping', descriptionXPath,
                    **{'re': '.*[Ll]andscaping.*'})
        # Дорожка до дому
        l.add_xpath('Driveway', descriptionXPath, **{'re': '.*[Dd]riveway.*'})
        # Реклама
        l.add_xpath('Promotion', descriptionXPath,
                    **{'re': '.*[Pp]romotion.*'})
        # # # інші штуки
        # # l.add_xpath('OtherInclusions',
        # #             descriptionXPath, **{'re': '[\w\s]+[Ss]ecurity System'})
        # # l.add_xpath('OtherInclusions1',
        # #             descriptionXPath, **{'re': '[\w\s]+[Ss]ecurity System'})
        # # l.add_xpath('OtherInclusions2',
        # #             descriptionXPath, **{'re': '[\w\s]+[Ss]ecurity System'})
        # # l.add_xpath('OtherInclusions3',
        # #             descriptionXPath, **{'re': '[\w\s]+[Ss]ecurity System'})
        # # l.add_xpath('OtherInclusions4',
        # #             descriptionXPath, **{'re': '[\w\s]+[Ss]ecurity System'})
        # # l.add_xpath('OtherInclusions5',
        # #             descriptionXPath, **{'re': '[\w\s]+[Ss]ecurity System'})
        return l.load_item()
    def parseOurhomes(self, response):
        referer = response.request.headers.get('Referer', None).decode("utf-8")
        areaXpath = '''//*[@id="floorplan-1"]/div[@class="specs-table"]/div/div[text()="{}"]/following-sibling::
                    div[@class="size"]/text()'''
        imgXpath = '//div[@class="home--single__gallery-images hidden-sm hidden-xs"]/a[{}]/@href'
        descrXpath = '//*[@id="sb-site"]/div[2]/div[3]/div/div/div[1]/p/text()'
        hxs = HtmlXPathSelector(response)
        # data = hxs.xpath('//div[@class="specs-table"]/div/div[@class="area"]/text()').extract()
        # with open('testURL', 'a') as file:
        #     for i in data:
        #         file.writelines(i + '\n')
        other = []
        for name in self.oth:
            size = hxs.xpath(areaXpath.format(name)).extract_first()
            if size:
                other.append('{}:{}'.format(name, size))

        l = RealtyLoader(RealtyspidersItem(), hxs)
        l.add_value('url', response.url)
        l.add_value('BuildType', self._getBuildType(response.url))
        l.add_value('BuilderLogo', self.logo)
        l.add_xpath('DesignName',
                    '//*[@id="sb-site"]/div[2]/div[1]/div/div/h1/text()')
        l.add_xpath(
            'BrochureImage_pdf',
            '//*[@id="sb-site"]/div[2]/div[3]/div/div/div[2]/div[2]/a/@href')
        l.add_xpath(
            'InclusionsImage_pdf',
            '//*[@id="sb-site"]/div[2]/div[3]/div/div/div[2]/div[3]/a/@href')
        l.add_xpath('Squares',
                    '//*[@id="floorplan-1"]/div[@class="squares"]/text()')
        l.add_xpath('Bedrooms',
                    '//*[@id="floorplan-1"]/div[@class="bedrooms"]/text()')
        l.add_xpath('Bathrooms',
                    '//*[@id="floorplan-1"]/div[@class="bathrooms"]/text()')
        l.add_xpath('Garage',
                    '//*[@id="floorplan-1"]/div[@class="cars"]/text()')
        l.add_xpath('FamilyDimension', [
            areaXpath.format('Family'),
            areaXpath.format('Family/Lounge'),
            areaXpath.format('family')
        ])
        l.add_xpath(
            'Meals_DiningDimension',
            [areaXpath.format('Meals'),
             areaXpath.format('Family/Meals')])
        l.add_xpath('LoungeDimension', areaXpath.format('Lounge'))
        l.add_xpath('AlfrescoDimension', areaXpath.format('Alfresco'))
        l.add_xpath('Alfresco_Yes_No', areaXpath.format('Alfresco'))
        l.add_xpath('TheatreRoom_Yes_No', areaXpath.format('Theatre'))
        l.add_xpath('TheatreDimension', areaXpath.format('Theatre'))
        l.add_xpath('GarageDimension', areaXpath.format('Garage'))
        l.add_xpath('MasterBedroomDimension', [
            areaXpath.format('Master Bedroom'),
            areaXpath.format('Bedroom 1'),
            areaXpath.format('Bed 1'),
            areaXpath.format('Master')
        ])
        l.add_xpath('Bedroom2Dimension', [
            areaXpath.format('Bedroom 2'),
            areaXpath.format('Bed 2'),
            areaXpath.format('Bedroom 2/Lounge')
        ])
        l.add_xpath('Bedroom3Dimension',
                    [areaXpath.format('Bedroom 3'),
                     areaXpath.format('Bed 3')])
        l.add_xpath('Bedroom4Dimension',
                    [areaXpath.format('Bedroom 4'),
                     areaXpath.format('Bed 4')])
        l.add_xpath('KitchenDimension', areaXpath.format('Kitchen'))
        l.add_xpath('Study_Yes_No', areaXpath.format('Study'))
        l.add_xpath('StudyDimension', areaXpath.format('Study'))
        l.add_xpath('FloorPlanImage1', '//*[@id="floorplan-1"]/@src')
        l.add_xpath('HomeDesignMainImage',
                    '//*[@class="home--single__full-image"]/a/@href')
        l.add_xpath('Image1', imgXpath.format('1'))
        l.add_xpath('Image2', imgXpath.format('2'))
        l.add_xpath('Image3', imgXpath.format('3'))
        l.add_xpath('Image4', imgXpath.format('4'))
        l.add_xpath('Image5', imgXpath.format('5'))
        l.add_xpath('Image6', imgXpath.format('6'))
        l.add_xpath('Image7', imgXpath.format('7'))
        l.add_xpath('Image8', imgXpath.format('8'))
        l.add_xpath('Image9', imgXpath.format('9'))
        l.add_xpath('Image10', imgXpath.format('10'))
        l.add_xpath('Image11', imgXpath.format('11'))
        l.add_xpath('Image12', imgXpath.format('12'))
        l.add_xpath('Image13', imgXpath.format('13'))
        l.add_xpath('Image14', imgXpath.format('14'))
        l.add_xpath('Image15', imgXpath.format('15'))
        l.add_xpath('BuilderEmailAddress', descrXpath,
                    **{'re': '[a-zA-Z]+@[a-z]+\.com\.au'})

        # Block Yes No
        l.add_xpath('TheatreRoom_Yes_No', descrXpath, **{'re': '[tT]heatre'})
        l.add_xpath('SeparateMeals_Yes_No', descrXpath,
                    **{'re': '[Ss]eparate|[Mm]eals'})
        l.add_xpath('WalkinPantry_Yes_No', descrXpath,
                    **{'re': '([Ww]alkin|[Pp]antry)'})
        l.add_xpath('BultersPantry_Yes_No', descrXpath,
                    **{'re': '[Bb]ulter[`]?s?'})
        l.add_xpath(
            'SteelStructure_Yes_No', descrXpath,
            **{'re': '([Ss]teel.*[Ss]tructure)|([Ss]tructure.*[Ss]teel)'})
        l.add_xpath('Balcony_Yes_No', descrXpath, **{'re': '[Bb]alcony'})
        #
        # Гарантія
        l.add_xpath('SturturalWarranty', descrXpath,
                    **{'re': '.*guarantee.*|.*[Ww]arranty.*'})
        # Вікна
        l.add_xpath('Windows', descrXpath, **{'re': '.*[Ww]indows?.*'})
        # Кухонна плита
        l.add_xpath(
            'KitchenBenchtop', descrXpath, **{
                're': '.*[Kk]itchen.*[Bb]enchtop.*|.*[Bb]enchtop.*[Kk]itchen.*'
            })
        # Сигналізація
        l.add_xpath(
            'SecuritySystem', descrXpath,
            **{'re': '.*[Ss]ecurity.*[sS]ystem.*}.*[sS]ystem.*[Ss]ecurity.*'})
        # Клас енергозбереження
        l.add_xpath(
            'EnergyRating', descrXpath,
            **{'re': '.*[Ee]nergy.*[rR]ating.*|.*[rR]ating.*[Ee]nergy.*'})
        # Кухонне приладдя
        l.add_xpath(
            'KitchenAppliance', descrXpath, **{
                're':
                '.*([Kk]itchen.*[Aa]ppliance).*|.*([Aa]ppliance.*[Kk]itchen).*'
            })
        # Бренд пристрою
        l.add_xpath('ApplianceBrand', descrXpath,
                    **{'re': '.*[\w\s]+[Ss]ecurity System.*'})
        # Kахель над умивальної раковиною
        l.add_xpath('Splashback', descrXpath, **{'re': '.*[Ss]plashback.*'})
        # Покриття підлоги
        l.add_xpath(
            'FloorCovering', descrXpath, **{
                're': '.*[Ff]loor.*[Cc]overings?.*|.*[Cc]overings?.*[Ff]loor.*'
            })
        # Охолодження
        l.add_xpath('Cooling', descrXpath, **{'re': '.*[Cc]ooling.*'})
        # Ванна
        l.add_xpath('Bath', descrXpath, **{'re': '.*[Ss]ecurity.*[Ss]ystem.*'})
        # Висота стели
        l.add_xpath('CeilingHeight', descrXpath, **{'re': '.*[Bb]ath.*'})
        # Плитка в ванній
        l.add_xpath('EnsuiteWallTiling', descrXpath, **{'re': '.*[Tt]ile.*'})
        # Плита в ванній
        l.add_xpath(
            'EnsuiteBenchtop', descrXpath, **{
                're': '.*[Ee]nsuite.*[Bb]enchtop.*|.*[Bb]enchtop.*[Ee]nsuite.*'
            })
        # Душова
        l.add_xpath('EnsuiteShowerbase', descrXpath,
                    **{'re': '.*[Ss]howerbase.*'})
        # Фарба на стінах
        l.add_xpath('WallPaint', descrXpath,
                    **{'re': '.*[Ww]all.*[Pp]aint.*|.*[Pp]aint.*[Ww]all.*'})
        # Гардероб
        l.add_xpath('WIRFitouts', descrXpath,
                    **{'re': '.*walk in robe.*|.*WIR.*'})
        # Світильники
        l.add_xpath('Downlights', descrXpath, **{'re': '.*[Dd]ownlights.*'})
        # Ландшафтний дизайн
        l.add_xpath('Landscaping', descrXpath, **{'re': '.*[Ll]andscaping.*'})
        # Дорожка до дому
        l.add_xpath('Driveway', descrXpath, **{'re': '.*[Dd]riveway.*'})
        # Реклама
        l.add_xpath('Promotion', descrXpath, **{'re': '.*[Pp]romotion.*'})
        l.add_value('OtherInclusions', ', '.join(other))

        return l.load_item()
    def parseHL(self, response):
        referer = response.request.headers.get('Referer', None).decode("utf-8")
        hxs = HtmlXPathSelector(response)
        descrXpath = '//div[@class="houseland__description"]/pre/text()'
        imgXpath = '//div[@id="houseland_gallery-image-1"]/a/@href'
        l = RealtyLoader(RealtyspidersItem(), hxs)
        l.add_value('url', response.url)
        l.add_value('BuildType', self._getBuildType(response.url))
        l.add_value('BuilderLogo', self.logo)
        l.add_xpath(
            'DesignName',
            '//*[@id="sb-site"]/div[2]/div/div/div[2]/div/h1/span/text()')
        l.add_xpath('DisplayLocation',
                    '//*[@id="sb-site"]/div[2]/div/div/div[2]/div/h1/text()')
        l.add_xpath('BasePrice', '//div[@class="houseland__price"]/text()')
        l.add_xpath('Bedrooms', '//div[@class="houseland__bedrooms"]/text()')
        l.add_xpath('Bathrooms', '//div[@class="houseland__bathrooms"]/text()')
        l.add_xpath('Garage', '//div[@class="houseland__cars"]/text()')
        l.add_xpath('HomeDesignMainImage',
                    '//div[@class="houseland__main-image"]/a/@href')
        l.add_xpath('Image1', imgXpath.format('1'))
        l.add_xpath('Image2', imgXpath.format('2'))
        l.add_xpath('Image3', imgXpath.format('3'))
        l.add_xpath('Image4', imgXpath.format('4'))
        l.add_xpath('Image5', imgXpath.format('5'))
        l.add_xpath('Image6', imgXpath.format('6'))
        l.add_xpath('Image7', imgXpath.format('7'))
        l.add_xpath('Image8', imgXpath.format('8'))
        l.add_xpath('Image9', imgXpath.format('9'))
        l.add_xpath('Image10', imgXpath.format('10'))
        l.add_xpath('Image11', imgXpath.format('11'))
        l.add_xpath('Image12', imgXpath.format('12'))
        l.add_xpath('Image13', imgXpath.format('13'))
        l.add_xpath('Image14', imgXpath.format('14'))
        l.add_xpath('Image15', imgXpath.format('15'))

        l.add_xpath('TheatreRoom_Yes_No', descrXpath, **{'re': '[tT]heatre'})
        l.add_xpath('SeparateMeals_Yes_No', descrXpath,
                    **{'re': '[Ss]eparate|[Mm]eals'})
        l.add_xpath('WalkinPantry_Yes_No', descrXpath,
                    **{'re': '([Ww]alkin|[Pp]antry)'})
        l.add_xpath('BultersPantry_Yes_No', descrXpath,
                    **{'re': '[Bb]ulter[`]?s?'})
        l.add_xpath(
            'SteelStructure_Yes_No', descrXpath,
            **{'re': '([Ss]teel.*[Ss]tructure)|([Ss]tructure.*[Ss]teel)'})
        l.add_xpath('Balcony_Yes_No', descrXpath, **{'re': '[Bb]alcony'})

        return l.load_item()
Exemple #15
0
 def parseList(self,response):
     referer = response.request.headers.get('Referer', None).decode("utf-8")
     hxs = HtmlXPathSelector(response)
     hxsItemsList = hxs.xpath('//div[@class="property-item"]')
     for hxsItem in hxsItemsList:
         l = RealtyLoader(RealtyspidersItem(), hxsItem)
         l.add_value('url', response.url)
         l.add_value('BuildType', 'Browse our H&L packages')
         l.add_value('BuilderLogo', self.logo)
         l.add_xpath('Lot_BlockAddress', './/span[@class="street"]/text()')
         l.add_xpath('Squares', './/span[@class="area"]/text()')
         l.add_xpath('Bedrooms', '//li[@class="beds"]/text()')
         l.add_xpath('Bathrooms', '//li[@class="baths"]/text()')
         l.add_xpath('Garage', '//li[@class="garages"]/text()')
         l.add_xpath('LivingArea', '//li[@class="storeys"]/text()')
         l.add_xpath('BasePrice',
                 './/div[@class="field-prefix" and text()="$"]/following-sibling::div[@class="field-value"]/text()')
         l.add_xpath('HomeDesignMainImage', './/img/@src')
         yield l.load_item()
    def parseItem(self, response):
        referer = response.request.headers.get('Referer', None).decode("utf-8")
        if self._chakURL(response.url):
            if re.search(r'\d+-special-offers', response.url):
                return None
            hxs = HtmlXPathSelector(response)
            l = RealtyLoader(RealtyspidersItem(), hxs)
            l.add_value('BuildType', self._getBuildType(response.url))
            l.add_value('BuilderEmailAddress', '*****@*****.**')

            try:
                l.add_value('HomeDesignMainImage',
                            self.start_urls[0] + self.itemsList[response.url])
            except KeyError:
                pass
            l.add_value('BuilderLogo', self.logo)
            l.add_xpath('DesignName',
                        '//div[@class="content-columns"]/h2[1]/text()')
            l.add_xpath('Squares',
                        '//div[@id="house-details"]/div[@class="sq"]/text()')
            l.add_xpath('Bedrooms',
                        '//div[@id="house-details"]/div[@class="bed"]/text()')
            l.add_xpath(
                'Bathrooms',
                '//div[@id="house-details"]/div[@class="bath"]/text()')
            l.add_xpath('Garage',
                        '//div[@id="house-details"]/div[@class="car"]/text()')
            l.add_xpath(
                'BrochureImage_pdf',
                '//div[@class="house-attachment"]/a[text()="Download Brochure"]/@href',
                **{'myRefer': self.start_urls[0]})
            l.add_xpath('FloorPlanImage1',
                        '//li[@class="sigProThumb"][1]/span/span/a/@href',
                        **{'myRefer': self.start_urls[0]})
            l.add_xpath('Image1',
                        '//li[@class="sigProThumb"][2]/span/span/a/@href',
                        **{'myRefer': self.start_urls[0]})
            l.add_xpath('Image2',
                        '//li[@class="sigProThumb"][3]/span/span/a/@href',
                        **{'myRefer': self.start_urls[0]})
            l.add_xpath('Image3',
                        '//li[@class="sigProThumb"][4]/span/span/a/@href',
                        **{'myRefer': self.start_urls[0]})
            l.add_xpath('Image4',
                        '//li[@class="sigProThumb"][5]/span/span/a/@href',
                        **{'myRefer': self.start_urls[0]})
            l.add_xpath('Image5',
                        '//li[@class="sigProThumb"][6]/span/span/a/@href',
                        **{'myRefer': self.start_urls[0]})
            l.add_xpath('Image6',
                        '//li[@class="sigProThumb"][7]/span/span/a/@href',
                        **{'myRefer': self.start_urls[0]})
            l.add_xpath('Image7',
                        '//li[@class="sigProThumb"][8]/span/span/a/@href',
                        **{'myRefer': self.start_urls[0]})
            l.add_xpath('Image8',
                        '//li[@class="sigProThumb"][9]/span/span/a/@href',
                        **{'myRefer': self.start_urls[0]})
            l.add_xpath('Image9',
                        '//li[@class="sigProThumb"][10]/span/span/a/@href',
                        **{'myRefer': self.start_urls[0]})
            l.add_xpath('Image10',
                        '//li[@class="sigProThumb"][11]/span/span/a/@href',
                        **{'myRefer': self.start_urls[0]})
            l.add_xpath('Image11',
                        '//li[@class="sigProThumb"][12]/span/span/a/@href',
                        **{'myRefer': self.start_urls[0]})
            l.add_xpath('Image12',
                        '//li[@class="sigProThumb"][12]/span/span/a/@href',
                        **{'myRefer': self.start_urls[0]})
            l.add_xpath('Image13',
                        '//li[@class="sigProThumb"][14]/span/span/a/@href',
                        **{'myRefer': self.start_urls[0]})
            l.add_xpath('Image14',
                        '//li[@class="sigProThumb"][15]/span/span/a/@href',
                        **{'myRefer': self.start_urls[0]})
            l.add_xpath('Image15',
                        '//li[@class="sigProThumb"][16]/span/span/a/@href',
                        **{'myRefer': self.start_urls[0]})
            l.add_value('url', response.url)

            descriptionXPath = '//div[@id="content-body"]/div/ul/li/span/text()'
            # Block Yes No
            l.add_xpath(
                'TheatreRoom_Yes_No', descriptionXPath,
                **{'re': '([Tt]heatre.*[Rr]ooms?)|([Rr]ooms?.*[Tt]heatre)'})
            l.add_xpath(
                'SeparateMeals_Yes_No', descriptionXPath,
                **{'re': '([Ss]eparate.*[Mm]eals)|([Mm]eals.*[Ss]eparate)'})
            l.add_xpath('Alfresco_Yes_No', descriptionXPath,
                        **{'re': '[Aa]lfresco'})
            l.add_xpath('Study_Yes_No', descriptionXPath,
                        **{'re': '([Ss]tudy)|([Ss}chool)|([Uu]niversity)'})
            l.add_xpath('WalkinPantry_Yes_No', descriptionXPath,
                        **{'re': '([Ww]alkin|[Pp]antry)'})
            l.add_xpath('BultersPantry_Yes_No', descriptionXPath,
                        **{'re': '[Bb]ulter[`]?s?'})
            l.add_xpath('BultersPantry_Yes_No', descriptionXPath,
                        **{'re': '[Bb]ulter[`]?s?'})
            l.add_xpath(
                'SteelStructure_Yes_No', descriptionXPath,
                **{'re': '([Ss]teel.*[Ss]tructure)|([Ss]tructure.*[Ss]teel)'})
            l.add_xpath('Balcony_Yes_No', descriptionXPath,
                        **{'re': '[Bb]alcony'})

            # Гарантія
            l.add_xpath('SturturalWarranty', descriptionXPath,
                        **{'re': '.*guarantee.*|.*[Ww]arranty.*'})
            # Вікна
            l.add_xpath('Windows', descriptionXPath,
                        **{'re': '.*[Ww]indows?.*'})
            # Кухонна плита
            l.add_xpath(
                'KitchenBenchtop', descriptionXPath, **{
                    're':
                    '.*[Kk]itchen.*[Bb]enchtop.*|.*[Bb]enchtop.*[Kk]itchen.*'
                })
            # Сигналізація
            l.add_xpath(
                'SecuritySystem', descriptionXPath, **{
                    're':
                    '.*[Ss]ecurity.*[sS]ystem.*}.*[sS]ystem.*[Ss]ecurity.*'
                })
            # Клас енергозбереження
            l.add_xpath(
                'EnergyRating', descriptionXPath,
                **{'re': '.*[Ee]nergy.*[rR]ating.*|.*[rR]ating.*[Ee]nergy.*'})
            # Кухонне приладдя
            # l.add_xpath('KitchenAppliance',
            #             descriptionXPath, **{'re': '.*([Kk]itchen.*[Aa]ppliance).*|.*([Aa]ppliance.*[Kk]itchen).*'})
            # Бренд пристрою
            # l.add_xpath('ApplianceBrand',
            #             descriptionXPath, **{'re': '.*[\w\s]+[Ss]ecurity System.*'})
            # Kахель над умивальної раковиною
            l.add_xpath('Splashback', descriptionXPath,
                        **{'re': '.*[Ss]plashback.*'})
            # Покриття підлоги
            l.add_xpath(
                'FloorCovering', descriptionXPath, **{
                    're':
                    '.*[Ff]loor.*[Cc]overings?.*|.*[Cc]overings?.*[Ff]loor.*'
                })
            # Охолодження
            l.add_xpath('Cooling', descriptionXPath,
                        **{'re': '.*[Cc]ooling.*'})
            # Ванна
            l.add_xpath('Bath', descriptionXPath,
                        **{'re': '.*[Ss]ecurity.*[Ss]ystem.*'})
            # Висота стели
            l.add_xpath('CeilingHeight', descriptionXPath,
                        **{'re': '.*[Bb]ath.*'})
            # Плитка в ванній
            l.add_xpath('EnsuiteWallTiling', descriptionXPath,
                        **{'re': '.*[Tt]ile.*'})
            # Плита в ванній
            l.add_xpath(
                'EnsuiteBenchtop', descriptionXPath, **{
                    're':
                    '.*[Ee]nsuite.*[Bb]enchtop.*|.*[Bb]enchtop.*[Ee]nsuite.*'
                })
            # Душова
            l.add_xpath('EnsuiteShowerbase', descriptionXPath,
                        **{'re': '.*[Ss]howerbase.*'})
            # Фарба на стінах
            l.add_xpath(
                'WallPaint', descriptionXPath,
                **{'re': '.*[Ww]all.*[Pp]aint.*|.*[Pp]aint.*[Ww]all.*'})
            # Гардероб
            l.add_xpath('WIRFitouts', descriptionXPath,
                        **{'re': '.*walk in robe.*|.*WIR.*'})
            # Світильники
            l.add_xpath('Downlights', descriptionXPath,
                        **{'re': '.*[Dd]ownlights.*'})
            # Ландшафтний дизайн
            l.add_xpath('Landscaping', descriptionXPath,
                        **{'re': '.*[Ll]andscaping.*'})
            # Дорожка до дому
            l.add_xpath('Driveway', descriptionXPath,
                        **{'re': '.*[Dd]riveway.*'})
            # Реклама
            l.add_xpath('Promotion', descriptionXPath,
                        **{'re': '.*[Pp]romotion.*'})
            # # інші штуки
            # l.add_xpath('OtherInclusions',
            #             descriptionXPath, **{'re': '[\w\s]+[Ss]ecurity System'})
            # l.add_xpath('OtherInclusions1',
            #             descriptionXPath, **{'re': '[\w\s]+[Ss]ecurity System'})
            # l.add_xpath('OtherInclusions2',
            #             descriptionXPath, **{'re': '[\w\s]+[Ss]ecurity System'})
            # l.add_xpath('OtherInclusions3',
            #             descriptionXPath, **{'re': '[\w\s]+[Ss]ecurity System'})
            # l.add_xpath('OtherInclusions4',
            #             descriptionXPath, **{'re': '[\w\s]+[Ss]ecurity System'})
            # l.add_xpath('OtherInclusions5',
            #             descriptionXPath, **{'re': '[\w\s]+[Ss]ecurity System'})

            return l.load_item()
        else:
            hxs = HtmlXPathSelector(response)
            itemsURL = hxs.xpath(
                '//div[@class="homes-cat-left"]/a/@href').extract()
            imgURL = hxs.xpath(
                '//div[@class="homes-cat-left"]/a/img/@src').extract()
            itemsURL = list(map(lambda x: self.start_urls[0] + x, itemsURL))
            self.itemsList = {
                items: img
                for items, img in zip(itemsURL, imgURL)
            }
Exemple #17
0
    def parseItem(self, response):
        referer = response.request.headers.get('Referer', None).decode("utf-8")
        hxs = HtmlXPathSelector(response)
        # with open('testURL', 'a') as file:
        #     file.write(str(response.meta)+ '\n')
        #     file.writelines('\n'.join(hxs.xpath('//div[@class="col-md-8"]/table/tbody/tr/td[1]/text()').extract()))
        roomsXpath = '''//div[@class="room_dimensions overview_table"]
                        //tr/td[text()="Master Bedroom"]/following-sibling::td/text()'''
        overviewXpath = '''//table[@id="hf-property-overview"]/tr/td/div[text()="{}"]/ancestor::td/following-sibling::
                            td[@class="item-value"]/div/div[@class="field-value"]/text()'''
        imgXpath = '//div[@class=" flexslider_gallery image hf-property-gallery"]/div/ul/li[{}]/img/@src'
        descriptionXPath = '//div[@id="col-md-8"]/p/text()'
        # data = hxs.xpath(roomsXpath).extract()
        # with open('testURL','a') as file:
        #     for i in data:
        #         file.write(i+'\n')
        other = []
        for name in self.oth:
            size = hxs.xpath(roomsXpath.format(name)).extract_first()
            if size:
                other.append('{}:{}'.format(name, size))

        l = RealtyLoader(RealtyspidersItem(), hxs)
        l.add_value('url', response.url)
        l.add_value('BuildType', self._getBuildType(referer))
        l.add_value('BuilderLogo', self.logo)
        l.add_xpath('DesignName', '//h3[@class="title-post"]/text()')
        l.add_value('State', 'MELBOURNE')
        l.add_xpath('Squares', '//div[@class="info-box1 "]/p[1]/text()')
        l.add_xpath('Bedrooms', '//li[@class="beds"]/text()')
        l.add_xpath('Bathrooms', '//li[@class="baths"]/text()')
        l.add_xpath('Garage', '//li[@class="garages"]/text()')
        l.add_xpath('BasePrice',
                    '//div[@class="field-prefix" and text()="$"]/following-sibling::div[@class="field-value"]/text()')

        l.add_value('Storey', self._getStorey(response.meta['Storey']))

        l.add_xpath('HouseWidth', '//div[text()="MIN. BLOCK WIDTH"]/text()[2]')
        l.add_xpath('HouseLength', '//div[text()="\n                        MIN. BLOCK LENGTH"]/text()[2]')
        l.add_xpath('BrochureImage_pdf', '//a[text()="Brochure"]/@href')
        l.add_xpath('InclusionsImage_pdf', '//a[text()="Inclusions"]/@href')
        l.add_xpath('FloorPlanImage1', '//a[@class="floor-plan fancybox"]/img/@src')
        l.add_xpath('HomeDesignMainImage', imgXpath.format('1'))
        l.add_xpath('Image1', imgXpath.format('2'))
        l.add_xpath('Image2', imgXpath.format('3'))
        l.add_xpath('Image3', imgXpath.format('4'))
        l.add_xpath('Image4', imgXpath.format('5'))
        l.add_xpath('Image5', imgXpath.format('6'))
        l.add_xpath('Image6', imgXpath.format('7'))
        l.add_xpath('Image7', imgXpath.format('8'))
        l.add_xpath('Image8', imgXpath.format('9'))
        l.add_xpath('Image9', imgXpath.format('10'))
        l.add_xpath('Image10', imgXpath.format('11'))
        l.add_xpath('Image11', imgXpath.format('12'))
        l.add_xpath('Image12', imgXpath.format('13'))
        l.add_xpath('Image13', imgXpath.format('14'))
        l.add_xpath('Image14', imgXpath.format('15'))
        l.add_xpath('Image15', imgXpath.format('16'))




        l.add_xpath('MasterBedroomDimension', roomsXpath.format('Master Bedroom'))
        l.add_xpath('Bedroom2Dimension', roomsXpath.format('Bedroom 2'))
        l.add_xpath('Bedroom3Dimension', roomsXpath.format('Bedroom 3'))
        l.add_xpath('Bedroom4Dimension', roomsXpath.format('Bedroom 4'))
        l.add_xpath('StudyDimension', [roomsXpath.format('Study'),roomsXpath.format('Study nook')])
        l.add_xpath('Meals_DiningDimension', roomsXpath.format('Meals'))
        l.add_xpath('FamilyDimension', roomsXpath.format('Family'))
        l.add_xpath('AlfrescoDimension', roomsXpath.format('Alfresco'))
        l.add_xpath('LoungeDimension', roomsXpath.format('Lounge'))
        l.add_xpath('TheatreDimension', roomsXpath.format('Theatre'))
        l.add_value('OtherInclusions', ', '.join(other))

        # Block Yes No
        l.add_xpath('TheatreRoom_Yes_No',
                    roomsXpath.format('Theatre'))
        l.add_xpath('SeparateMeals_Yes_No',
                    roomsXpath.format('Meals'))
        l.add_xpath('Alfresco_Yes_No',
                    roomsXpath.format('Alfresco'))
        l.add_xpath('Study_Yes_No',
                    [roomsXpath.format('Study Nook'),roomsXpath.format('Study')])
        l.add_xpath('WalkinPantry_Yes_No',
                    descriptionXPath, **{'re': '([Ww]alkin|[Pp]antry)'})
        l.add_xpath('BultersPantry_Yes_No',
                    descriptionXPath, **{'re': '[Bb]ulter[`]?s?'})
        l.add_xpath('SteelStructure_Yes_No',
                    descriptionXPath, **{'re': '([Ss]teel.*[Ss]tructure)|([Ss]tructure.*[Ss]teel)'})
        l.add_xpath('Balcony_Yes_No',
                    roomsXpath.format('Balcony'))
        #
        # Гарантія
        l.add_xpath('SturturalWarranty',
                    descriptionXPath, **{'re': '.*guarantee.*|.*[Ww]arranty.*'})
        # Вікна
        l.add_xpath('Windows',
                    descriptionXPath, **{'re': '.*[Ww]indows?.*'})
        # Кухонна плита
        l.add_xpath('KitchenBenchtop',
                    descriptionXPath, **{'re': '.*[Kk]itchen.*[Bb]enchtop.*|.*[Bb]enchtop.*[Kk]itchen.*'})
        # Сигналізація
        l.add_xpath('SecuritySystem',
                    descriptionXPath, **{'re': '.*[Ss]ecurity.*[sS]ystem.*}.*[sS]ystem.*[Ss]ecurity.*'})
        # Клас енергозбереження
        l.add_xpath('EnergyRating',
                    descriptionXPath, **{'re': '.*[Ee]nergy.*[rR]ating.*|.*[rR]ating.*[Ee]nergy.*'})
        # Кухонне приладдя
        l.add_xpath('KitchenAppliance',
                    descriptionXPath, **{'re': '.*([Kk]itchen.*[Aa]ppliance).*|.*([Aa]ppliance.*[Kk]itchen).*'})
        # Бренд пристрою
        l.add_xpath('ApplianceBrand',
                    descriptionXPath, **{'re': '.*[\w\s]+[Ss]ecurity System.*'})
        # Kахель над умивальної раковиною
        l.add_xpath('Splashback',
                    descriptionXPath, **{'re': '.*[Ss]plashback.*'})
        # Покриття підлоги
        l.add_xpath('FloorCovering',
                    descriptionXPath, **{'re': '.*[Ff]loor.*[Cc]overings?.*|.*[Cc]overings?.*[Ff]loor.*'})
        # Охолодження
        l.add_xpath('Cooling',
                    descriptionXPath, **{'re': '.*[Cc]ooling.*'})
        # Ванна
        l.add_xpath('Bath',
                    descriptionXPath, **{'re': '.*[Ss]ecurity.*[Ss]ystem.*'})
        # Висота стели
        l.add_xpath('CeilingHeight',
                    descriptionXPath, **{'re': '.*[Bb]ath.*'})
        # Плитка в ванній
        l.add_xpath('EnsuiteWallTiling',
                    descriptionXPath, **{'re': '.*[Tt]ile.*'})
        # Плита в ванній
        l.add_xpath('EnsuiteBenchtop',
                    descriptionXPath, **{'re': '.*[Ee]nsuite.*[Bb]enchtop.*|.*[Bb]enchtop.*[Ee]nsuite.*'})
        # Душова
        l.add_xpath('EnsuiteShowerbase',
                    descriptionXPath, **{'re': '.*[Ss]howerbase.*'})
        # Фарба на стінах
        l.add_xpath('WallPaint',
                    descriptionXPath, **{'re': '.*[Ww]all.*[Pp]aint.*|.*[Pp]aint.*[Ww]all.*'})
        # Гардероб
        l.add_xpath('WIRFitouts',
                    descriptionXPath, **{'re': '.*walk in robe.*|.*WIR.*'})
        # Світильники
        l.add_xpath('Downlights',
                    descriptionXPath, **{'re': '.*[Dd]ownlights.*'})
        # Ландшафтний дизайн
        l.add_xpath('Landscaping',
                    descriptionXPath, **{'re': '.*[Ll]andscaping.*'})
        # Дорожка до дому
        l.add_xpath('Driveway',
                    descriptionXPath, **{'re': '.*[Dd]riveway.*'})
        # Реклама
        l.add_xpath('Promotion',
                    descriptionXPath, **{'re': '.*[Pp]romotion.*'})
        # # # інші штуки
        # # l.add_xpath('OtherInclusions',
        # #             descriptionXPath, **{'re': '[\w\s]+[Ss]ecurity System'})
        # # l.add_xpath('OtherInclusions1',
        # #             descriptionXPath, **{'re': '[\w\s]+[Ss]ecurity System'})
        # # l.add_xpath('OtherInclusions2',
        # #             descriptionXPath, **{'re': '[\w\s]+[Ss]ecurity System'})
        # # l.add_xpath('OtherInclusions3',
        # #             descriptionXPath, **{'re': '[\w\s]+[Ss]ecurity System'})
        # # l.add_xpath('OtherInclusions4',
        # #             descriptionXPath, **{'re': '[\w\s]+[Ss]ecurity System'})
        # # l.add_xpath('OtherInclusions5',
        # #             descriptionXPath, **{'re': '[\w\s]+[Ss]ecurity System'})
        return l.load_item()
    def parseItem(self, response):
        referer = response.request.headers.get('Referer', None).decode("utf-8")
        hxs = HtmlXPathSelector(response)
        BuildType = self._getBuildType(referer)
        imgXpath = '''//main[@id="content"]/div[@class="carousel -arrows flickity "]
        /div[{}]/figure/div/img/@data-flickity-lazyload'''
        # descriptionXPath = '//div[@id="listing_options"]/ul/li/text()'
        areaXpath = '//div[@class="table-light"]/table/tbody/tr/td[text()="{}"]/following-sibling::td[1]/text()'
        roomsXpath = '''//h1[text()="Room dimensions"]/following-sibling::
                        dl/dt[text()="{}"]/following-sibling::dd[1]/text()'''
        # roomsDIMENSIONSXpath = '''//h1[text()="Room dimensions"]/following-sibling::
        #                 dl/dt/text()'''
        # data = hxs.xpath(roomsDIMENSIONSXpath).extract()
        # with open('testURL','a') as file:
        #     for i in data:
        #         file.write(i+'\n')
        other = []
        for name in self.oth:
            size = hxs.xpath(roomsXpath.format(name)).extract_first()
            if size:
                other.append('{}:{}'.format(name, size))

        l = RealtyLoader(RealtyspidersItem(), hxs)
        l.add_value('url', response.url)
        l.add_value('BuildType', BuildType)
        l.add_value('BuilderLogo', self.logo)
        if BuildType == 'Displays Homes':
            l.add_value('Lot_BlockAddress', response.meta['address'])
        else:
            l.add_value('Storey', response.meta['storey'])
        l.add_xpath(
            'DesignName',
            '//h1[@class="h1 +margin-none +color-dark"]/strong/text()')

        l.add_xpath('Bedrooms', '//dl[@class="rooms-count"]/dd[1]/text()')
        l.add_xpath('Bathrooms', '//dl[@class="rooms-count"]/dd[2]/text()')
        l.add_xpath('Garage', '//dl[@class="rooms-count"]/dd[3]/text()')
        l.add_xpath(
            'BasePrice', '''//div/small[text()="Priced From"]/ancestor::
                    div/following-sibling::div[@class="h1 +color-dark"]/text()'''
        )

        l.add_xpath('HouseWidth', '//div[@class="h5 +color-dark"]/text()',
                    **{'re': '((?<=Exterior Width )\d+\.\d+m)'})
        l.add_xpath('HouseLength', '//div[@class="h5 +color-dark"]/text()',
                    **{'re': '((?<=Exterior Length )\d+\.\d+m)'})
        l.add_xpath('GarageDimension', areaXpath.format('Garage'))
        l.add_xpath('AlfrescoDimension', areaXpath.format('Porch'))
        l.add_xpath('Alfresco_Yes_No', areaXpath.format('Porch'))
        l.add_xpath(
            'Squares',
            '//div[@class="table-light"]/table/tfoot/tr/td[text()="Total Area"]/following-sibling::td[1]/text()'
        )
        l.add_xpath(
            'MasterBedroomDimension',
            [roomsXpath.format('Master Bed'),
             roomsXpath.format('Bedroom 1')])
        l.add_xpath(
            'Bedroom2Dimension',
            [roomsXpath.format('Bed 2'),
             roomsXpath.format('Bedroom 2')])
        l.add_xpath(
            'Bedroom3Dimension',
            [roomsXpath.format('Bed 3'),
             roomsXpath.format('Bedroom 3')])
        l.add_xpath(
            'Bedroom4Dimension',
            [roomsXpath.format('Bed 4'),
             roomsXpath.format('Bedroom 4')])
        l.add_xpath('Study_Yes_No', [
            roomsXpath.format('Study'),
            roomsXpath.format('Study (ground floor)'),
            roomsXpath.format('Study (first floor)'),
            roomsXpath.format('Study (First floor)')
        ])
        l.add_xpath('StudyDimension', [
            roomsXpath.format('Study'),
            roomsXpath.format('Study (ground floor)'),
            roomsXpath.format('Study (first floor)'),
            roomsXpath.format('Study (First floor)')
        ])
        l.add_xpath('FamilyDimension', [roomsXpath.format('Family')])
        l.add_xpath('Meals_DiningDimension', [
            roomsXpath.format('Family / Meals'),
            roomsXpath.format('Meals/Family'),
            roomsXpath.format('Living / Meals'),
            roomsXpath.format('Meals')
        ])
        l.add_xpath('TheatreRoom_Yes_No', [roomsXpath.format('Theatre')])
        l.add_xpath('TheatreDimension', [roomsXpath.format('Theatre')])
        l.add_xpath('LivingArea', [roomsXpath.format('Living')])

        l.add_xpath(
            'BrochureImage_pdf',
            '//div[@class="+v-spacer-xs +t-margin-sm"]/div/a[text()="\t\tDownload Floorplan\n\t"]/@href'
        )
        l.add_xpath('FloorPlanImage1',
                    '//div[@class="section +t-padding-md"]//img/@src')
        l.add_xpath('HomeDesignMainImage', imgXpath.format('1'))
        l.add_xpath('Image1', imgXpath.format('1'))
        l.add_xpath('Image2', imgXpath.format('2'))
        l.add_xpath('Image3', imgXpath.format('3'))
        l.add_xpath('Image4', imgXpath.format('4'))
        l.add_xpath('Image5', imgXpath.format('5'))
        l.add_xpath('Image6', imgXpath.format('6'))
        l.add_xpath('Image7', imgXpath.format('7'))
        l.add_xpath('Image8', imgXpath.format('8'))
        l.add_xpath('Image9', imgXpath.format('9'))
        l.add_xpath('Image10', imgXpath.format('10'))
        l.add_xpath('Image11', imgXpath.format('11'))
        l.add_xpath('Image12', imgXpath.format('12'))
        l.add_xpath('Image13', imgXpath.format('13'))
        l.add_xpath('Image14', imgXpath.format('14'))
        l.add_xpath('Image15', imgXpath.format('15'))

        l.add_value('OtherInclusions', ', '.join(other))

        return l.load_item()