Esempio n. 1
0
    def parse_object(self, response):
        breadCrumbTitle = Extractor.string(response,
                                           'a.active span').split(' - ')
        city = Extractor.string(breadCrumbTitle[0])
        breadCrumbTitle.pop(0)
        street = ' - '.join(breadCrumbTitle)

        volume = Structure.find_in_definition(
            response, '.pand-specs.panddetail-desc li > span',
            'Woonoppervlakte')
        if volume is not None and isinstance(volume, str):
            volume = Extractor.volume(volume)

        rooms = Structure.find_in_definition(
            response, '.pand-specs.panddetail-desc li > span', 'Kamers')
        if rooms is not None and isinstance(rooms, str):
            Extractor.string(rooms)

        type = Structure.find_in_definition(
            response, '.pand-specs.panddetail-desc li > span', 'Type')
        if type is not None and isinstance(type, str):
            Extractor.string(type)

        price = Extractor.string(response, '.panddetail-price')
        price = price.split('-')[0]
        price = Extractor.euro(price)

        availability = Structure.find_in_definition(
            response, '.pand-specs.panddetail-desc li > span',
            'Beschikbaarheid')
        if availability is not None and isinstance(type, str):
            availability = Extractor.string(availability)

        yield {
            'street':
            street,
            'city':
            city,
            'region':
            self.region,
            'volume':
            volume,
            'rooms':
            rooms,
            'availability':
            availability,
            'type':
            type,
            'pricePerMonth':
            price,
            'reference':
            Extractor.urlWithoutQueryString(response),
            'estateAgent':
            '123Wonen.nl',
            'images':
            Extractor.images(response, 'a[data-fancybox="group1"]::attr(href)',
                             True),
        }
Esempio n. 2
0
    def parse_object(self, response):
        address_heading = Extractor.string(response, '.address').split(',')
        street = address_heading[0]
        postcode_and_city = address_heading[1].split(' ')
        city = postcode_and_city[(len(postcode_and_city) - 1)]

        volume = Structure.find_in_definition(
            response, 'table.table-striped.feautures tr td',
            'Gebruiksoppervlakte wonen')
        if volume is not None and isinstance(volume, str):
            volume = Extractor.volume(volume)

        rooms = Structure.find_in_definition(
            response, 'table.table-striped.feautures tr td', 'Aantal kamers')
        if rooms is not None and isinstance(rooms, str):
            rooms = rooms.split(' (w')[0]

        type = Structure.find_in_definition(
            response, 'table.table-striped.feautures tr td', 'Type object')
        if type is not None and isinstance(type, str):
            type = type.split(', ')
            lastindex = (len(type) - 1)
            type = type[lastindex]

        price = Structure.find_in_definition(
            response, 'table.table-striped.feautures tr td', 'Prijs')
        if price is not None and isinstance(type, str):
            price = Extractor.euro(price.split('-')[0])

        yield {
            'street':
            street,
            'city':
            city,
            'region':
            self.region,
            'volume':
            volume,
            'rooms':
            rooms,
            'availability':
            response.meta['availability'],
            'type':
            type,
            'pricePerMonth':
            price,
            'reference':
            Extractor.urlWithoutQueryString(response),
            'estateAgent':
            'Domica',
            'images':
            Extractor.images(
                response,
                '#cycle-slideshow2 > a.gallery-link img::attr(estate_crawler)',
                True),
        }
Esempio n. 3
0
    def parse_object(self, response):
        street = Extractor.string(response, 'h1.text-regular')
        city = Extractor.string(response, '.col-md-8 .fixed-lh p.color-medium')
        availability = Extractor.string(
            response, '.col-md-8 .horizontal-items ul li:last-child')

        # Sometimes Nederwoon mistakenly adds the zip code in the city field, filter it out
        city = re.sub('\d{4}?\s*[a-zA-Z]{2}', '', city).replace(' ', '')

        rooms = Extractor.string(
            Structure.find_in_definition(response,
                                         '.table-striped.table-specs td',
                                         'Aantal kamers'))
        price = Extractor.euro(
            Structure.find_in_definition(response,
                                         '.table-striped.table-specs td',
                                         'Totale huur per maand', 2))
        volume = Extractor.volume(
            Structure.find_in_definition(response,
                                         '.table-striped.table-specs td',
                                         'Woonoppervlakte'))
        type = Extractor.string(
            Structure.find_in_definition(response,
                                         '.table-striped.table-specs td',
                                         'Soort woonruimte'))

        yield {
            'street':
            street,
            'city':
            city,
            'region':
            self.region,
            'volume':
            volume,
            'rooms':
            rooms,
            'availability':
            availability,
            'type':
            type,
            'pricePerMonth':
            price,
            'reference':
            Extractor.urlWithoutQueryString(response),
            'estateAgent':
            'NederWoon',
            'images':
            Extractor.images(
                response,
                '.slider.slider-media > div img::attr(estate_crawler)'),
        }
Esempio n. 4
0
    def parse_object(self, response):
        availability = Extractor.string(
            Structure.find_in_definition(response,
                                         '#properties .row > .col-xs-6',
                                         'Ingangsdatum'))
        rooms = Extractor.string(
            Structure.find_in_definition(response,
                                         '#properties .row > .col-xs-6',
                                         'Aantal kamers'))
        price = Extractor.euro(
            Structure.find_in_definition(response,
                                         '#properties .row > .col-xs-6',
                                         'Totale huur').split('-')[0])
        volume = Extractor.volume(
            Structure.find_in_definition(response,
                                         '#properties .row > .col-xs-6',
                                         'Oppervlakte (ca.)'))
        type = Extractor.string(
            Structure.find_in_definition(response,
                                         '#properties .row > .col-xs-6',
                                         'Soort'))

        yield {
            'street':
            response.meta['street'],
            'city':
            response.meta['city'],
            'region':
            self.region,
            'volume':
            volume,
            'rooms':
            rooms,
            'availability':
            availability,
            'type':
            type,
            'pricePerMonth':
            price,
            'reference':
            Extractor.urlWithoutQueryString(response),
            'estateAgent':
            'Rotsvast',
            'images':
            Extractor.images(response, '.slider img::attr(estate_crawler)',
                             True),
        }
Esempio n. 5
0
    def parse_object(self, response):
        type = Structure.find_in_definition(response, '.house-info div',
                                            'Type woning')
        volume = Extractor.volume(
            Structure.find_in_definition(response, '.house-info div',
                                         'Woonoppervlak'))
        rooms = Structure.find_in_definition(response, '.house-info div',
                                             'Aantal kamers')
        price = Extractor.euro(response, '.house-info div:nth-child(2)')
        availability = Structure.find_in_definition(response, '#tab-1 tr td',
                                                    'Aanvaarding')

        yield {
            'street':
            response.meta['street'],
            'city':
            response.meta['city'],
            'region':
            self.region,
            'volume':
            volume,
            'rooms':
            rooms,
            'availability':
            availability,
            'type':
            type,
            'pricePerMonth':
            price,
            'reference':
            Extractor.urlWithoutQueryString(response),
            'estateAgent':
            'Eervast',
            'images':
            Extractor.images(response, '.tab-content > a.gallery::attr(href)',
                             True),
        }
Esempio n. 6
0
    def parse(self, response):
        pageSelector = Selector(response)
        objects = pageSelector.css('.pandlist-container')
        objects.extract()

        for index, object in enumerate(objects):
            # Determine if the object is still available for rent
            objectStatus = str(Extractor.string(object,
                                                '.pand-status')).lower()
            if objectStatus in ['verhuurd', 'in optie']:
                continue

            # Skip crawling storage spaces and garages
            type = Structure.find_in_definition(object,
                                                '.pand-specs li > span',
                                                'Type').lower()
            if type in [
                    'garagebox', 'berging/opslag', 'kantoorruimte', 'loods',
                    'parkeerplaats', 'winkelpand'
            ]:
                continue

            yield scrapy.Request(
                Extractor.string(
                    object,
                    'a.textlink-design:contains("Details")::attr(href)'),
                self.parse_object)

        # Crawl the next pages
        nextPageSelector = '.productBrowser a:contains("volgende")'
        nextPageLink = pageSelector.css(nextPageSelector).extract_first()
        if nextPageLink is not None and isinstance(nextPageLink, str):
            nextPageSelector += '::attr(href)'
            yield scrapy.Request(
                Extractor.url(response, pageSelector, nextPageSelector),
                self.parse,
            )
Esempio n. 7
0
    def parse_object(self, response):
        tableSelector = '.property-overview dl > *'

        yield {
            'street':
            Extractor.string(response, 'h1.entry-title'),
            'city':
            Extractor.string(
                Structure.find_in_definition(response, tableSelector,
                                             'Plaats')).title(),
            'region':
            self.region,
            'volume':
            Extractor.volume(
                Structure.find_in_definition(response, tableSelector,
                                             'Woonoppervlakte')),
            'rooms':
            Extractor.string(
                Structure.find_in_definition(response, tableSelector,
                                             'Kamers')),
            'availability':
            Extractor.string(
                Structure.find_in_definition(response, tableSelector,
                                             'Status')),
            'type':
            Extractor.string(
                Structure.find_in_definition(response, tableSelector, 'Type')),
            'pricePerMonth':
            Extractor.euro(
                Structure.find_in_definition(response, tableSelector,
                                             'Prijs')),
            'reference':
            Extractor.urlWithoutQueryString(response),
            'estateAgent':
            'Van der Hulst',
            'images':
            Extractor.images(response,
                             '.property-detail-gallery a::attr(href)', True),
        }