Exemple #1
0
        class item(ItemElement):
            klass = Housing

            obj_id = CleanText('./@data-classified-id')
            obj_title = CleanText('./div/h2[@itemprop="name"]/a')
            obj_location = CleanText(
                './div/h2[@itemprop="name"]/span[class="item-localisation"]')
            obj_cost = CleanDecimal('./div/div/span[@class="price-label"]')
            obj_currency = Regexp(
                CleanText('./div/div/span[@class="price-label"]'),
                '.*([%s%s%s])' % (u'€', u'$', u'£'),
                default=u'€')
            obj_text = CleanText('./div/div/div[@itemprop="description"]')
            obj_area = CleanDecimal(Regexp(
                CleanText('./div/h2[@itemprop="name"]/a'),
                '(.*?)(\d*) m2(.*?)',
                '\\2',
                default=None),
                                    default=NotAvailable)
            obj_price_per_meter = PricePerMeterFilter()

            def obj_phone(self):
                phone = CleanText(
                    './div/div/ul/li/span[@class="js-clickphone"]',
                    replace=[(u'Téléphoner : ', u'')],
                    default=NotAvailable)(self)

                if '...' in phone:
                    return NotLoaded
                return phone

            def obj_photos(self):
                url = CleanText('./div/div/a/img[@itemprop="image"]/@src')(
                    self)
                return [HousingPhoto(url)]
Exemple #2
0
class SeLogerItem(ItemElement):
    klass = Housing

    obj_id = CleanText('idAnnonce')
    obj_title = Format(
        "%s %s%s - %s",
        CleanText('titre'),
        CleanText('surface'),
        CleanText('surfaceUnite'),
        CleanText('ville'),
    )
    obj_date = DateTime(CleanText('dtFraicheur'))
    obj_cost = CleanDecimal('prix')

    obj_currency = Regexp(CleanText('prixUnite'),
                          '.*([%s%s%s])' % (u'€', u'$', u'£'),
                          default=u'€')

    obj_area = CleanDecimal('surface', default=NotAvailable)
    obj_price_per_meter = PricePerMeterFilter()
    obj_text = CleanText('descriptif')
    obj_rooms = CleanDecimal('nbPiece|nbPieces', default=NotAvailable)
    obj_bedrooms = CleanDecimal('nbChambre|nbChambres', default=NotAvailable)

    def obj_location(self):
        location = CleanText('adresse', default="")(self)
        quartier = CleanText('quartier', default=None)(self)
        if not location and quartier is not None:
            location = quartier
        ville = CleanText('ville')(self)
        cp = CleanText('cp')(self)
        return u'%s %s (%s)' % (location, ville, cp)

    obj_station = CleanText('proximite', default=NotAvailable)
    obj_url = CleanText('permaLien')
Exemple #3
0
    class get_housing(ItemElement):
        klass = Housing

        obj_id = Env('_id')
        obj_title = CleanText('h1')

        obj_rooms = CleanDecimal(
            '//div[@class="stats"]/section/div[@id="divpieces"]/span[@class="stat"]'
        )

        obj_cost = CleanDecimal('(//div[@class="stats"]/div/h2)[2]')
        obj_currency = u'€'
        obj_utilities = UTILITIES.UNKNOWN
        obj_text = CleanHTML('//div[@class="textes"]')
        obj_location = CleanText('//input[@id="adressegeo"]/@value')
        obj_url = CleanText('//input[@id="hfurldetail"]/@value')

        obj_area = CleanDecimal(Regexp(CleanText(
            '//div[@class="stats"]/section/div[@id="divsurface"]/span[@class="stat"]'
        ),
                                       u'\s?(\d+)\sm\s2',
                                       default=NotAvailable),
                                default=NotAvailable)

        obj_price_per_meter = PricePerMeterFilter()
        obj_phone = CleanText('//input[@id="hftelA"]/@value')
        obj_date = datetime.now

        def obj_photos(self):
            photos = []
            for photo in self.xpath('//div[@id="plistimage"]/a/@urlbig'):
                photos.append(
                    HousingPhoto(
                        unicode("http://www.entreparticuliers.com/" + photo)))
            return photos
Exemple #4
0
    class get_housing(ItemElement):
        klass = Housing

        obj_id = Env('_id')
        obj_title = CleanText('//h1[@itemprop="name"]')
        obj_location = CleanText('//span[@class="informations-localisation"]')
        obj_cost = CleanDecimal('//span[@itemprop="price"]')
        obj_currency = Regexp(CleanText('//span[@itemprop="price"]'),
                              '.*([%s%s%s])' % (u'€', u'$', u'£'), default=u'€')
        obj_text = CleanHTML('//div[@itemprop="description"]')
        obj_url = BrowserURL('housing', _id=Env('_id'))
        obj_area = CleanDecimal(Regexp(CleanText('//h1[@itemprop="name"]'),
                                       '(.*?)(\d*) m2(.*?)', '\\2'), default=NotAvailable)
        obj_price_per_meter = PricePerMeterFilter()

        def obj_photos(self):
            photos = []
            for img in XPath('//a[@class="thumbnail-link"]/img[@itemprop="image"]')(self):
                url = Regexp(CleanText('./@src'), 'http://thbr\.figarocms\.net.*(http://.*)')(img)
                photos.append(HousingPhoto(url))
            return photos

        def obj_details(self):
            details = dict()
            for item in XPath('//div[@class="features clearfix"]/ul/li')(self):
                key = CleanText('./span[@class="name"]')(item)
                value = CleanText('./span[@class="value"]')(item)
                if value and key:
                    details[key] = value

            key = CleanText('//div[@class="title-dpe clearfix"]')(self)
            value = CleanText('//div[@class="energy-consumption"]')(self)
            if value and key:
                details[key] = value
            return details
Exemple #5
0
    class get_housing(ItemElement):
        klass = Housing
        obj_id = Regexp(CleanText('//p[has-class("property-reference")]'), r'\:(.*)$')

        def obj_url(self):
            return self.page.url

        obj_area = CleanDecimal(
            Regexp(
                CleanText('//table[@id="table"]//span[contains(text(), "Surface")]//following-sibling::span[has-class("r")]'),
                r'([\d\ ]+)m'
            ),
            default=NotAvailable
        )
        obj_title = CleanText('//span[has-class("mainh1")]')
        obj_cost = CleanDecimal('//span[has-class("price-info")]')
        obj_currency = Currency.get_currency(u'€')
        obj_rooms = CleanDecimal('//table[@id="table"]//span[contains(text(), "Pièce")]//following-sibling::span[has-class("r")]')
        obj_bedrooms = CleanDecimal('//table[@id="table"]//span[contains(text(), "Chambre")]//following-sibling::span[has-class("r")]')
        obj_location = CleanText(Regexp(CleanText('//span[has-class("mainh1")]'), r',(.+)$'))
        obj_text = CleanText('//div[has-class("property-description-main")]')
        obj_date = Date(
            Regexp(
                CleanText('//div[has-class("property-description-main")]'),
                r'Mise à jour le ([\d\\]+)', default=datetime.today()
            )
        )
        obj_phone = Attr('//button[@id="display-phonenumber-1"]', 'data-phone-number')

        def obj_photos(self):
            photos = []
            for photo in self.xpath('//div[@id="bxSliderContainer"]//ul//li//img'):
                url = Attr('.', 'src')(photo)
                if url[0] != '/':
                    photos.append(HousingPhoto(url))
            return photos

        def obj_details(self):
            return {
                'GES': CleanText('//span[@id="gassymbol"]', '')(self),
                'DPE': CleanText('//span[@id="energysymbol"]', '')(self),
            }

        def obj_utilities(self):
            price = CleanText('//span[has-class("price-info")]')(self)
            if 'CC' in price:
                return UTILITIES.INCLUDED
            elif 'HC' in price:
                return UTILITIES.EXCLUDED
            else:
                return UTILITIES.UNKNOWN

        obj_station = NotAvailable
        obj_price_per_meter = PricePerMeterFilter()
Exemple #6
0
    class get_housing(ItemElement):
        klass = Housing

        obj_title = CleanText('//Titre')

        def obj_cost(self):
            cost = CleanDecimal(Regexp(CleanText('//Prix'),
                                       u'(.*)\€.*',
                                       default=None),
                                default=None)(self)
            return cost if cost else CleanDecimal(
                Regexp(CleanText('//Prix'), u'(.*)€'))(self)

        obj_currency = u'€'

        obj_text = CleanText('//Description')
        obj_location = CleanHTML(CleanText('//Localisation'))

        obj_area = CleanDecimal('//SurfaceBien',
                                replace_dots=True,
                                default=NotAvailable)
        obj_price_per_meter = PricePerMeterFilter()
        obj_phone = CleanText('//Telephone')
        obj_date = datetime.now

        def obj_details(self):
            details = {}
            details[u'Type de bien'] = CleanText('//Tbien')(self)
            details[u'Reference'] = CleanText('(//Reference)[1]')(self)
            details[u'Nb pièces'] = CleanText('//Nbpieces')(self)

            _ener = CleanText('//Energie')(self)
            if _ener:
                details[u'Energie'] = _ener

            _lat = CleanText('//Latitude')(self)
            if _lat:
                details[u'Latitude'] = _lat

            _long = CleanText('//Longitude')(self)
            if _long:
                details[u'Longitude'] = _long

            return details

        def obj_photos(self):
            photos = []
            for i in range(1, CleanDecimal('//NbPhotos')(self) + 1):
                img = CleanText('//LienImage%s' % i,
                                replace=[(u'w=69&h=52', u'w=786&h=481')])(self)
                url = img if img.startswith(
                    'http') else u'http://www.entreparticuliers.com%s' % img
                photos.append(HousingPhoto(url))
            return photos
Exemple #7
0
    class get_housing(ItemElement):
        klass = Housing

        def parse(self, el):
            details = dict()
            self.env['area'] = NotAvailable
            for item in el.xpath('//div[@class="line"]/h2'):
                if 'Surface' in CleanText('./span[@class="property"]')(item):
                    self.env['area'] = CleanDecimal(Regexp(CleanText('./span[@class="value"]'), '(.*)m.*'),
                                                    replace_dots=(',', '.'))(item)

                else:
                    key = u'%s' % CleanText('./span[@class="property"]')(item)
                    if 'GES' in key or 'Classe' in key:
                        details[key] = CleanText('./span[@class="value"]/noscript/a')(item)
                    else:
                        details[key] = CleanText('./span[@class="value"]')(item)

            self.env['details'] = details

        obj_id = Env('_id')
        obj_title = CleanText('//title')
        obj_cost = CleanDecimal('//h2[@itemprop="price"]/@content', default=Decimal(0))

        obj_currency = Regexp(CleanText('//h2[@itemprop="price"]/span[@class="value"]'),
                              '.*([%s%s%s])' % (u'€', u'$', u'£'), default=u'€')
        obj_text = CleanText('//p[@itemprop="description"]')
        obj_location = CleanText('//span[@itemprop="address"]')
        obj_details = Env('details')
        obj_area = Env('area')
        obj_price_per_meter = PricePerMeterFilter()
        obj_url = BrowserURL('housing', _id=Env('_id'))

        def obj_date(self):
            _date = Regexp(CleanText('//p[has-class("line")]', replace=[(u'à', '')]),
                           '.*Mise en ligne le (.*)')(self)

            for fr, en in DATE_TRANSLATE_FR:
                _date = fr.sub(en, _date)

            self.env['tmp'] = _date
            return DateTime(Env('tmp'), LinearDateGuesser())(self)

        def obj_photos(self):
            items = re.findall(r'images\[\d\]\s*=\s*"([\w/\.]*\.jpg)";',
                               CleanText('//script')(self))
            photos = [HousingPhoto(u'http:%s' % item) for item in items]
            if not photos:
                img = CleanText('//meta[@itemprop="image"]/@content',
                                default=None)(self)
                if img:
                    photos.append(HousingPhoto(img))

            return photos
Exemple #8
0
    class get_housing(ItemElement):
        klass = Housing

        obj_id = Env('_id')
        obj_type = EPAdvertType(CleanText('//rubrique'))
        obj_advert_type = ADVERT_TYPES.PERSONAL
        obj_house_type = EPHouseType(CleanText('//tbien'))
        obj_title = CleanText('//titre')
        obj_rooms = CleanDecimal('//pieces')
        obj_cost = CleanDecimal('//prix')
        obj_currency = Currency.get_currency(u'€')
        obj_utilities = UTILITIES.UNKNOWN
        obj_text = CleanText('//titre')
        obj_location = CleanText('//ville')
        obj_url = CleanText('//urlDetailAnnonce')
        obj_area = CleanDecimal('//surface')
        obj_price_per_meter = PricePerMeterFilter()
        obj_phone = CleanText('//telephone1')
        obj_date = DateTime(CleanText('//DateCheck'))

        def obj_GES(self):
            value = CleanText('//GSE')(self)
            return getattr(ENERGY_CLASS, value.upper(), NotAvailable)

        def obj_photos(self):
            photos = []
            for photo in ['//UrlImage1', '//UrlImage2', '//UrlImage3']:
                p = CleanText(photo)(self)
                if p:
                    photos.append(HousingPhoto(p))
            return photos

        def obj_DPE(self):
            value = CleanText('//DPE')(self)
            return getattr(ENERGY_CLASS, value.upper(), NotAvailable)

        def obj_details(self):
            details = dict()
            d = [('//Nb_Etage', 'Nombre d\'etages'), ('//Neuf', 'Neuf'),
                 ('//Ancien_avec_du_Charme', 'Ancien avec charme'),
                 ('//Avec_terasse', 'Avec Terrasse'),
                 ('//latitude', 'Latitude'), ('//longitude', 'Longitude'),
                 ('//loyer', 'Loyer'), ('//piscine', 'Piscine'),
                 ('//surface_balcon', 'Surface du balcon'),
                 ('//surface_exp', 'Surface exploitable'),
                 ('//surface_terrain', 'Surface du Terrain'),
                 ('//Meuble', 'furnished')]

            for key, value in d:
                key = CleanText(key)(self)
                if key:
                    details[value] = key

            return details
Exemple #9
0
        class item(ItemElement):
            klass = Housing

            def condition(self):
                return Regexp(Link('./div[has-class("box-header")]/a[@class="title-item"]'), '/annonces/(.*)', default=None)(self)

            obj_id = Regexp(Link('./div[has-class("box-header")]/a[@class="title-item"]'), '/annonces/(.*)')
            obj_title = CleanText('./div[has-class("box-header")]/a[@class="title-item"]')
            obj_area = CleanDecimal(Regexp(CleanText('./div[has-class("box-header")]/a/span[@class="h1"]'),
                                           '(.*?)(\d*) m\xb2(.*?)', '\\2'), default=NotAvailable)
            obj_cost = CleanDecimal(CleanText('./div[has-class("box-header")]/a/span[@class="price"]'),
                                    replace_dots=True, default=Decimal(0))
            obj_currency = Regexp(CleanText('./div[has-class("box-header")]/a/span[@class="price"]'),
                                  '.*([%s%s%s])' % (u'€', u'$', u'£'), default=u'€')
            obj_utilities = UTILITIES.UNKNOWN

            def obj_date(self):
                _date = Regexp(CleanText('./div[has-class("box-header")]/p[@class="date"]'),
                               '.* / (.*)')(self)
                return parse_french_date(_date)

            obj_station = CleanText('./div[@class="box-body"]/div/div/p[@class="item-transports"]', default=NotAvailable)
            obj_location = CleanText('./div[@class="box-body"]/div/div/p[@class="item-description"]/strong')
            obj_text = CleanText('./div[@class="box-body"]/div/div/p[@class="item-description"]')
            obj_rooms = CleanDecimal(
                './div[@class="box-body"]/div/div/div[@class="clearfix"]/ul[has-class("item-summary")]/li[1]/strong',
                default=NotAvailable
            )
            obj_price_per_meter = PricePerMeterFilter()

            def obj_bedrooms(self):
                rooms_bedrooms_area = XPath(
                    './div[@class="box-body"]/div/div/div[@class="clearfix"]/ul[has-class("item-summary")]/li'
                )(self)
                if len(rooms_bedrooms_area) > 2:
                    return CleanDecimal(
                        './div[@class="box-body"]/div/div/div[@class="clearfix"]/ul[has-class("item-summary")]/li[2]/strong',
                        default=NotAvailable
                    )(self)
                else:
                    return NotAvailable

            obj_url = Format(
                u'http://www.pap.fr%s',
                Link(
                    './div[@class="box-body"]/div/div/div[@class="clearfix"]/div[@class="float-right"]/a'
                )
            )

            def obj_photos(self):
                photos = []
                for img in XPath('./div[@class="box-body"]/div/div/a/img/@src')(self):
                    photos.append(HousingPhoto(u'%s' % img))
                return photos
Exemple #10
0
class SeLogerItem(ItemElement):
    klass = Housing

    obj_id = CleanText('idAnnonce')

    def obj_type(self):
        idType = int(CleanText('idTypeTransaction')(self))
        type = next(k for k, v in TYPES.items() if v == idType)
        if type == POSTS_TYPES.FURNISHED_RENT:
            # SeLoger does not let us discriminate between furnished and not
            # furnished.
            return POSTS_TYPES.RENT
        return type

    def obj_house_type(self):
        idType = CleanText('idTypeBien')(self)
        try:
            return next(k for k, v in RET.items() if v == idType)
        except StopIteration:
            return NotAvailable

    obj_title = Format(
        "%s %s%s - %s",
        CleanText('titre'),
        CleanText('surface'),
        CleanText('surfaceUnite'),
        CleanText('ville'),
    )
    obj_date = DateTime(CleanText('dtFraicheur'))
    obj_cost = CleanDecimal('prix')

    obj_currency = Currency('prixUnite')

    obj_area = CleanDecimal('surface', default=NotAvailable)
    obj_price_per_meter = PricePerMeterFilter()
    obj_text = CleanText('descriptif')
    obj_rooms = CleanDecimal('nbPiece|nbPieces', default=NotAvailable)
    obj_bedrooms = CleanDecimal('nbChambre|nbChambres', default=NotAvailable)

    def obj_location(self):
        location = CleanText('adresse', default="")(self)
        quartier = CleanText('quartier', default=None)(self)
        if not location and quartier is not None:
            location = quartier
        ville = CleanText('ville')(self)
        cp = CleanText('cp')(self)
        return u'%s %s (%s)' % (location, ville, cp)

    obj_station = CleanText('proximite', default=NotAvailable)
    obj_url = CleanText('permaLien')
Exemple #11
0
class SeLogerItem(ItemElement):
    klass = Housing

    obj_id = CleanText('idAnnonce')
    obj_title = CleanText('titre')
    obj_date = DateTime(CleanText('dtFraicheur'))
    obj_cost = CleanDecimal('prix')
    obj_currency = CleanText('prixUnite')
    obj_area = CleanDecimal('surface', default=NotAvailable)
    obj_price_per_meter = PricePerMeterFilter()
    obj_text = CleanText('descriptif')
    obj_location = CleanText('ville')
    obj_station = CleanText('proximite', default=NotAvailable)
    obj_url = CleanText('permaLien')
Exemple #12
0
    class get_housing(ItemElement):
        klass = Housing

        obj_id = Env('_id')
        obj_title = CleanText('//h1[@class="clearfix"]/span[@class="title"]')
        obj_cost = CleanDecimal('//h1[@class="clearfix"]/span[@class="price"]',
                                replace_dots=True)
        obj_currency = Regexp(
            CleanText('//h1[@class="clearfix"]/span[@class="price"]'),
            '.*([%s%s%s])' % (u'€', u'$', u'£'),
            default=u'€')
        obj_area = CleanDecimal(Regexp(
            CleanText('//h1[@class="clearfix"]/span[@class="title"]'),
            '(.*?)(\d*) m\xb2(.*?)', '\\2'),
                                default=NotAvailable)
        obj_price_per_meter = PricePerMeterFilter()
        obj_location = CleanText('//div[@class="item-geoloc"]/h2')
        obj_text = CleanText(CleanHTML('//p[@class="item-description"]'))
        obj_station = CleanText('//div[@class="metro"]')
        obj_phone = CleanHTML('(//div[has-class("tel-wrapper")])[1]')
        obj_url = BrowserURL('housing', _id=Env('_id'))

        def obj_details(self):
            details = dict()
            for item in XPath('//ul[@class="item-summary"]/li')(self):
                key = CleanText('.', children=False)(item)
                value = CleanText('./strong')(item)
                if value and key:
                    details[key] = value

            key = CleanText(
                '//div[@class="box energy-box"]/div/div/p[@class="h3"]')(self)
            value = Format(
                '%s(%s)',
                CleanText('(//div[@class="box energy-box"]/div/div/p)[2]'),
                CleanText('//div[@class="box energy-box"]/div/div/@class',
                          replace=[('-', ''), ('rank', '')]))(self)
            if value and key:
                details[key] = value
            return details

        def obj_photos(self):
            photos = []
            for img in XPath(
                    '//div[has-class("showcase-thumbnail")]/img/@src')(self):
                photos.append(HousingPhoto(u'%s' % img))
            return photos
Exemple #13
0
    class get_housing(ItemElement):
        klass = Housing

        obj_id = Env('_id')
        obj_title = Dict('characteristics/titleWithTransaction')
        obj_location = Format('%s %s %s', Dict('location/address'),
                              Dict('location/postalCode'),
                              Dict('location/cityLabel'))
        obj_cost = TypeDecimal(Dict('characteristics/price'))
        obj_currency = u'€'
        obj_text = CleanHTML(Dict('characteristics/description'))
        obj_url = BrowserURL('housing_html', _id=Env('_id'))
        obj_area = TypeDecimal(Dict('characteristics/area'))
        obj_date = FromTimestamp(Dict('characteristics/date'))
        obj_price_per_meter = PricePerMeterFilter()

        def obj_photos(self):
            photos = []
            for img in Dict('characteristics/images')(self):
                m = re.search('http://thbr\.figarocms\.net.*(http://.*)',
                              img.get('xl'))
                if m:
                    photos.append(HousingPhoto(m.group(1)))
                else:
                    photos.append(HousingPhoto(img.get('xl')))
            return photos

        def obj_details(self):
            details = {}
            details['fees'] = Dict('characteristics/fees')(self)
            details['bedrooms'] = Dict('characteristics/bedroomCount')(self)
            details['energy'] = Dict(
                'characteristics/energyConsumptionCategory')(self)
            rooms = Dict('characteristics/roomCount')(self)
            if len(rooms):
                details['rooms'] = rooms[0]
            details['available'] = Dict('characteristics/available',
                                        default=NotAvailable)(self)
            return details
Exemple #14
0
        class item(ItemElement):
            klass = Housing

            obj_id = Format("%s#%s", Dict('rubrique'), Dict('idannonce'))
            obj_type = EPAdvertType(Dict('rubrique'))
            obj_advert_type = ADVERT_TYPES.PERSONAL
            obj_house_type = EPHouseType(Dict('tbien'))
            obj_title = Dict('titre')
            obj_cost = CleanDecimal(Dict('prix'))
            obj_currency = Currency.get_currency(u'€')
            obj_text = Dict('titre')
            obj_location = Dict('ville')
            obj_area = CleanDecimal(Dict('surface'))
            obj_rooms = CleanDecimal(Dict('pieces'))
            obj_date = DateTime(Dict('creationdate'))
            obj_utilities = UTILITIES.UNKNOWN
            obj_price_per_meter = PricePerMeterFilter()

            def obj_photos(self):
                photos = []
                photo = Dict('UrlImage', default=NotAvailable)(self)
                if not empty(photo):
                    photos.append(HousingPhoto(photo))
                return photos
Exemple #15
0
    class get_housing(ItemElement):
        klass = Housing

        obj_id = Env('_id')
        obj_title = CleanText(
            '//div[has-class("box-header")]/h1[@class="clearfix"]'
        )
        obj_cost = CleanDecimal('//h1[@class="clearfix"]/span[@class="price"]',
                                replace_dots=True)
        obj_currency = Regexp(CleanText('//h1[@class="clearfix"]/span[@class="price"]'),
                              '.*([%s%s%s])' % (u'€', u'$', u'£'), default=u'€')
        obj_utilities = UTILITIES.UNKNOWN
        obj_area = CleanDecimal(Regexp(CleanText('//h1[@class="clearfix"]/span[@class="title"]'),
                                '(.*?)(\d*) m\xb2(.*?)', '\\2'), default=NotAvailable)

        def obj_date(self):
            date = CleanText(
                '//div[has-class("box-header")]//p[has-class("date")]'
            )(self).split("/")[-1].strip()
            return parse_french_date(date)

        def obj_bedrooms(self):
            rooms_bedrooms_area = XPath(
                '//div[has-class("box-body")]//ul[has-class("item-summary")]/li'
            )(self)
            if len(rooms_bedrooms_area) > 2:
                return CleanDecimal(
                    '//div[has-class("box-body")]//ul[has-class("item-summary")]/li[2]/strong',
                    default=NotAvailable
                )(self)
            else:
                return NotAvailable

        obj_rooms = CleanText('//ul[has-class("item-summary")]/li[1]/strong',
                              default=NotAvailable)
        obj_price_per_meter = PricePerMeterFilter()
        obj_location = CleanText('//div[@class="item-geoloc"]/h2')
        obj_text = CleanText(CleanHTML('//p[@class="item-description"]'))

        def obj_station(self):
            return ", ".join([
                station.text
                for station in XPath(
                    '//ul[has-class("item-metro")]//span[has-class("label")]'
                )(self)
            ])

        def obj_phone(self):
            phone = CleanText('(//div[has-class("tel-wrapper")])[1]')(self)
            phone = phone.replace(' ', ', ')
            return phone.strip()

        obj_url = BrowserURL('housing', _id=Env('_id'))

        def obj_details(self):
            GES = Attr(
                '//div[has-class("energy-box")]//div[has-class("rank")]',
                'class',
                default=None
            )(self)
            if GES:
                GES = [x.replace("rank-", "").upper()
                       for x in GES.split() if x.startswith("rank-")][0]
            else:
                GES = NotAvailable
            return {
                "GES": GES
            }

        def obj_photos(self):
            photos = []
            for img in XPath('//div[has-class("owl-carousel-thumbs")]//img/@src')(self):
                photos.append(HousingPhoto(u'%s' % img))
            return photos
Exemple #16
0
        class item(ItemElement):
            offer_details_wrapper = (
                './div/div/div[has-class("offer-details-wrapper")]'
            )
            klass = Housing

            obj_id = Format(
                '%s-%s',
                Regexp(Env('type'), '(.*)-.*'),
                CleanText('./@id', replace=[('header-offer-', '')])
            )
            obj_title = Attr(
                offer_details_wrapper + '/div/div/p[@class="offer-type"]/a',
                'title'
            )
            obj_url = Format(
                "http://www.logic-immo.com/%s.htm",
                CleanText(
                    './@id',
                    replace=[('header-offer-', 'detail-location-')]
                )
            )
            obj_area = CleanDecimal(
                (
                    offer_details_wrapper +
                    '/div/div/div[has-class("offer-details-second")]' +
                    '/div/h3[has-class("offer-attributes")]/span' +
                    '/span[has-class("offer-area-number")]'
                ),
                default=NotAvailable
            )
            obj_rooms = CleanDecimal(
                (
                    offer_details_wrapper +
                    '/div/div/div[has-class("offer-details-second")]' +
                    '/div/h3[has-class("offer-attributes")]' +
                    '/span[has-class("offer-rooms")]' +
                    '/span[has-class("offer-rooms-number")]'
                ),
                default=NotAvailable
            )
            obj_price_per_meter = PricePerMeterFilter()
            obj_cost = CleanDecimal(
                Regexp(
                    CleanText(
                        (
                            offer_details_wrapper +
                            '/div/div/p[@class="offer-price"]/span'
                        ),
                        default=NotAvailable
                    ),
                    '(.*) [%s%s%s]' % (u'€', u'$', u'£'),
                    default=NotAvailable
                ),
                default=NotAvailable
            )
            obj_currency = Regexp(
                CleanText(
                    offer_details_wrapper + '/div/div/p[has-class("offer-price")]/span',
                    default=NotAvailable
                ),
                '.*([%s%s%s])' % (u'€', u'$', u'£'), default=u'€'
            )
            obj_utilities = UTILITIES.UNKNOWN
            obj_date = Date(
                Regexp(
                    CleanText(
                        './div/div/div[has-class("offer-picture-more")]/div/p[has-class("offer-update")]'
                    ),
                    ".*(\d{2}/\d{2}/\d{4}).*")
            )
            obj_text = CleanText(
                offer_details_wrapper + '/div/div/div/p[has-class("offer-description")]/span'
            )
            obj_location = CleanText(
                offer_details_wrapper +
                '//div[has-class("offer-places-block")]'
            )

            def obj_photos(self):
                photos = []
                url = Attr(
                    './div/div/div/div[has-class("picture-wrapper")]/div/img',
                    'src'
                )(self)
                if url:
                    photos.append(HousingPhoto(url))
                return photos

            def obj_details(self):
                details = {}
                honoraires = CleanText(
                    (
                        self.offer_details_wrapper +
                        '/div/div/p[@class="offer-agency-fees"]'
                    ),
                    default=None
                )(self)
                if honoraires:
                    details["Honoraires"] = (
                        "{} (TTC, en sus)".format(
                            honoraires.split(":")[1].strip()
                        )
                    )
                return details
Exemple #17
0
    class get_housing(ItemElement):
        klass = Housing

        def parse(self, el):
            self.env['details'] = {
                obj['key']: obj['value_label']
                for obj in self.el['adview']['attributes']
            }

        obj_id = Env('_id')

        obj_area = CleanDecimal(PopDetail('square', default=0),
                                default=NotAvailable)
        obj_rooms = CleanDecimal(PopDetail('rooms', default=0),
                                 default=NotAvailable)

        def obj_GES(self):
            ges = CleanText(PopDetail('ges', default='|'))(self)
            return getattr(ENERGY_CLASS, ges[0], NotAvailable)

        def obj_DPE(self):
            dpe = CleanText(PopDetail('energy_rate', default='|'))(self)
            return getattr(ENERGY_CLASS, dpe[0], NotAvailable)

        def obj_house_type(self):
            value = CleanText(PopDetail('real_estate_type'),
                              default=' ')(self).lower()
            if value == 'parking':
                return HOUSE_TYPES.PARKING
            elif value == 'appartement':
                return HOUSE_TYPES.APART
            elif value == 'maison':
                return HOUSE_TYPES.HOUSE
            elif value == 'terrain':
                return HOUSE_TYPES.LAND
            else:
                return HOUSE_TYPES.OTHER

        def obj_utilities(self):
            value = CleanText(PopDetail('charges_included', default='Non'),
                              default=NotAvailable)(self)
            if value == "Oui":
                return UTILITIES.INCLUDED
            else:
                return UTILITIES.EXCLUDED

        obj_title = Dict('adview/subject')
        obj_cost = CleanDecimal(Dict('adview/price/0', default=NotAvailable),
                                default=Decimal(0))
        obj_currency = BaseCurrency.get_currency(u'€')
        obj_text = Dict('adview/body')
        obj_location = Dict('adview/location/city_label')

        def obj_advert_type(self):
            line_pro = Dict('adview/owner/type')(self)
            if line_pro == u'pro':
                return ADVERT_TYPES.PROFESSIONAL
            else:
                return ADVERT_TYPES.PERSONAL

        obj_date = DateTime(Dict('adview/first_publication_date'))

        def obj_photos(self):
            photos = []
            for img in Dict('adview/images/urls_large', default=[])(self):
                photos.append(HousingPhoto(img))
            return photos

        def obj_type(self):
            try:
                breadcrumb = int(Dict('adview/category_id')(self))
            except ValueError:
                breadcrumb = None

            if breadcrumb == 11:
                return POSTS_TYPES.SHARING
            elif breadcrumb == 10:

                isFurnished = CleanText(PopDetail('furnished',
                                                  default=' '))(self)

                if isFurnished.lower() == u'meublé':
                    return POSTS_TYPES.FURNISHED_RENT
                else:
                    return POSTS_TYPES.RENT
            else:
                return POSTS_TYPES.SALE

        obj_price_per_meter = PricePerMeterFilter()
        obj_url = Dict('adview/url')
        obj_details = Env('details')
Exemple #18
0
        class item(ItemElement):
            offer_details_wrapper = (
                './/div[has-class("offer-details-wrapper")]')
            klass = Housing

            obj_id = Format(
                '%s-%s', Regexp(Env('type'), '(.*)-.*'),
                CleanText('./@id', replace=[('header-offer-', '')]))
            obj_type = Env('query_type')
            obj_advert_type = ADVERT_TYPES.PROFESSIONAL

            def obj_house_type(self):
                house_type = CleanText(
                    './/div[has-class("offer-details-type")]/a')(self).split(
                        ' ')[0].lower()
                if house_type == "appartement":
                    return HOUSE_TYPES.APART
                elif house_type == "maison":
                    return HOUSE_TYPES.HOUSE
                elif house_type == "terrain":
                    return HOUSE_TYPES.LAND
                elif house_type == "parking":
                    return HOUSE_TYPES.PARKING
                else:
                    return HOUSE_TYPES.OTHER

            obj_title = CleanText(
                './/div[has-class("offer-details-type")]/a/@title')

            obj_url = Format(
                u'%s%s', CleanText('.//div/a[@class="offer-link"]/@href'),
                CleanText('.//div/a[@class="offer-link"]/\
@data-orpi',
                          default=""))

            obj_area = CleanDecimal(
                (offer_details_wrapper +
                 '/div/div/div[has-class("offer-details-second")]' +
                 '/div/h3[has-class("offer-attributes")]/span' +
                 '/span[has-class("offer-area-number")]'),
                default=NotLoaded)
            obj_rooms = CleanDecimal(
                (offer_details_wrapper +
                 '/div/div/div[has-class("offer-details-second")]' +
                 '/div/h3[has-class("offer-attributes")]' +
                 '/span[has-class("offer-rooms")]' +
                 '/span[has-class("offer-rooms-number")]'),
                default=NotAvailable)
            obj_cost = CleanDecimal(Regexp(CleanText(
                (offer_details_wrapper + '/div/p[@class="offer-price"]/span'),
                default=NotLoaded),
                                           '(.*) [%s%s%s]' %
                                           (u'€', u'$', u'£'),
                                           default=NotLoaded),
                                    default=NotLoaded)
            obj_currency = Currency(offer_details_wrapper +
                                    '/div/p[has-class("offer-price")]/span')
            obj_price_per_meter = PricePerMeterFilter()
            obj_utilities = UTILITIES.UNKNOWN
            obj_text = CleanText(
                offer_details_wrapper +
                '/div/div/div/p[has-class("offer-description")]/span')
            obj_location = CleanText(offer_details_wrapper +
                                     '/div[@class="offer-details-location"]',
                                     replace=[('Voir sur la carte', '')])

            def obj_photos(self):
                photos = []
                url = Attr('.//div[has-class("offer-picture")]//img',
                           'src')(self)
                if url:
                    url = url.replace('400x267', '800x600')
                    url = urljoin(self.page.url, url)  # Ensure URL is absolute
                    photos.append(HousingPhoto(url))
                return photos

            def obj_details(self):
                details = {}
                honoraires = CleanText(
                    (self.offer_details_wrapper +
                     '/div/div/p[@class="offer-agency-fees"]'),
                    default=None)(self)
                if honoraires:
                    details["Honoraires"] = ("{} (TTC, en sus)".format(
                        honoraires.split(":")[1].strip()))
                return details
Exemple #19
0
    class get_housing(ItemElement):
        klass = Housing

        def parse(self, el):
            json_content = Regexp(CleanText('//script'),
                                  "var ava_data = ({.+?});")(self)
            json_content = json_content.replace("logged", "\"logged\"")
            json_content = json_content.replace("lengthcarrousel",
                                                "\"lengthcarrousel\"")
            json_content = json_content.replace("products", "\"products\"")
            json_content = json_content.replace(
                "// // ANNONCES_SIMILAIRE / RECO", "")
            self.house_json_datas = json.loads(json_content)['products'][0]

        obj_id = CleanText(
            '//form[@name="central"]/input[@name="idannonce"]/@value')

        def obj_house_type(self):
            naturebien = CleanText(
                '//form[@name="central"]/input[@name="naturebien"]/@value')(
                    self)
            try:
                return next(k for k, v in RET.items() if v == naturebien)
            except StopIteration:
                return NotLoaded

        def obj_type(self):
            idType = int(
                CleanText('//form[@name="central"]/input[@name="idtt"]/@value')
                (self))
            type = next(k for k, v in TYPES.items() if v == idType)
            if type == POSTS_TYPES.FURNISHED_RENT:
                # SeLoger does not let us discriminate between furnished and not furnished.
                return POSTS_TYPES.RENT
            return type

        def obj_advert_type(self):
            is_agency = (CleanText(
                '//form[@name="central"]/input[@name="nomagance"]/@value'
            )(self) or CleanText(
                '//form[@name="central"]/input[@name="urlagence"]/@value'
            )(self) or CleanText(
                '//form[@name="central"]/input[@name="adresseagence"]/@value')
                         (self))
            if is_agency:
                return ADVERT_TYPES.PROFESSIONAL
            else:
                return ADVERT_TYPES.PERSONAL

        def obj_photos(self):
            photos = []

            for photo in XPath('//div[@class="carrousel_slide"]/img/@src')(
                    self):
                photos.append(HousingPhoto("https:{}".format(photo)))

            for photo in XPath('//div[@class="carrousel_slide"]/@data-lazy')(
                    self):
                p = json.loads(photo)
                photos.append(HousingPhoto("https:{}".format(p['url'])))

            return photos

        obj_title = CleanText('//title[1]')

        def obj_location(self):
            quartier = Regexp(CleanText('//script'),
                              r"'nomQuartier', { value: \"([\w -]+)\", ")(self)
            ville = CleanText(
                '//form[@name="central"]/input[@name="ville"]/@value')(self)
            ville = ville if ville else ''
            cp = CleanText(
                '//form[@name="central"]/input[@name="codepostal"]/@value')(
                    self)
            cp = cp if cp else ''
            return u'%s %s (%s)' % (quartier, ville, cp)

        def obj_address(self):
            p = PostalAddress()

            p.street = Regexp(CleanText('//script'),
                              r"'nomQuartier', { value: \"([\w -]+)\", ")(self)
            p.postal_code = CleanText(
                '//form[@name="central"]/input[@name="codepostal"]/@value')(
                    self)
            p.city = CleanText(
                '//form[@name="central"]/input[@name="ville"]/@value')(self)
            p.full_address = Field('location')(self)
            return p

        obj_text = CleanText(
            '//form[@name="central"]/input[@name="description"]/@value')

        obj_cost = CleanDecimal(CleanText('//a[@id="price"]'),
                                default=NotLoaded)
        obj_currency = Currency(CleanText('//a[@id="price"]'),
                                default=NotLoaded)
        obj_price_per_meter = PricePerMeterFilter()

        obj_area = CleanDecimal(
            '//form[@name="central"]/input[@name="surface"]/@value',
            replace_dots=True)
        obj_url = CleanText(
            '//form[@name="central"]/input[@name="urlannonce"]/@value')
        obj_phone = CleanText(
            '//div[@class="data-action"]/a[@data-phone]/@data-phone')

        def obj_utilities(self):
            mention = CleanText('//span[@class="detail_indice_prix"]',
                                default="")(self)
            if "(CC) Loyer mensuel charges comprises" in mention:
                return UTILITIES.INCLUDED
            else:
                return UTILITIES.UNKNOWN

        def obj_bedrooms(self):
            return CleanDecimal(Dict('nb_chambres',
                                     default=NotLoaded))(self.house_json_datas)

        def obj_rooms(self):
            return CleanDecimal(Dict('nb_pieces',
                                     default=NotLoaded))(self.house_json_datas)
Exemple #20
0
        class item(ItemElement):
            klass = Housing

            def condition(self):
                title = self.obj_title(self)
                isNotFurnishedOk = True
                if self.env['query_type'] == POSTS_TYPES.RENT:
                    isNotFurnishedOk = 'meublé' not in title.lower()
                return (Regexp(Link('./div/a[has-class("item-title")]'),
                               '/annonces/(.*)',
                               default=None)(self) and isNotFurnishedOk)

            def parse(self, el):
                rooms_bedrooms_area = el.xpath(
                    './div/a[has-class("item-title")]/ul[has-class("item-tags")]/li'
                )
                self.env['rooms'] = NotLoaded
                self.env['bedrooms'] = NotLoaded
                self.env['area'] = NotLoaded

                for item in rooms_bedrooms_area:
                    name = CleanText('.')(item)
                    if 'chambre' in name.lower():
                        name = 'bedrooms'
                        value = CleanDecimal('.')(item)
                    elif 'pièce' in name.lower():
                        name = 'rooms'
                        value = CleanDecimal('.')(item)
                    else:
                        name = 'area'
                        value = CleanDecimal(
                            Regexp(CleanText('.'), r'(\d*\.*\d*) .*'))(item)
                    self.env[name] = value

            obj_id = Regexp(Link('./div/a[has-class("item-title")]'),
                            '/annonces/(.*)')
            obj_type = Env('query_type')
            obj_advert_type = ADVERT_TYPES.PERSONAL

            def obj_house_type(self):
                item_link = Link('./div/a[@class="item-title"]')(self)
                house_type = item_link.split('/')[-1].split('-')[0]
                if 'parking' in house_type:
                    return HOUSE_TYPES.PARKING
                elif 'appartement' in house_type:
                    return HOUSE_TYPES.APART
                elif 'terrain' in house_type:
                    return HOUSE_TYPES.LAND
                elif 'maison' in house_type:
                    return HOUSE_TYPES.HOUSE
                else:
                    return HOUSE_TYPES.OTHER

            obj_title = CleanText('./div/a[has-class("item-title")]')
            obj_area = Env('area')
            obj_cost = CleanDecimal(CleanText(
                './div/a[has-class("item-title")]/span[@class="item-price"]'),
                                    replace_dots=True,
                                    default=Decimal(0))
            obj_currency = Currency(
                './div/a[@class="item-title"]/span[@class="item-price"]')
            obj_utilities = UTILITIES.UNKNOWN

            obj_station = CleanText('./div/p[@class="item-transports"]',
                                    default=NotLoaded)

            def obj_location(self):
                return CleanText('./div/p[@class="item-description"]')(
                    self).split(".")[0]

            obj_text = CleanText('./div/p[@class="item-description"]',
                                 replace=[(' Lire la suite', '')])
            obj_rooms = Env('rooms')
            obj_bedrooms = Env('bedrooms')
            obj_price_per_meter = PricePerMeterFilter()

            obj_url = Format(u'http://www.pap.fr%s',
                             Link('./div/a[@class="item-title"]'))

            def obj_photos(self):
                photos = []
                for img in XPath('./a/img/@src')(self):
                    if (img.endswith("visuel-nophoto.png")
                            or img.endswith('miniature-video.png')):
                        continue
                    photos.append(HousingPhoto(u'%s' % img))
                return photos
Exemple #21
0
    class get_housing(ItemElement):
        klass = Housing

        def parse(self, el):
            rooms_bedrooms_area = el.xpath('.//ul[has-class("item-tags")]/li')
            self.env['rooms'] = NotAvailable
            self.env['bedrooms'] = NotAvailable
            self.env['area'] = NotAvailable

            for item in rooms_bedrooms_area:
                name = CleanText('.')(item)
                if 'chambre' in name.lower():
                    name = 'bedrooms'
                    value = CleanDecimal('./strong')(item)
                elif 'pièce' in name.lower():
                    name = 'rooms'
                    value = CleanDecimal('./strong')(item)
                elif ' m²' in name and 'le m²' not in name:
                    name = 'area'
                    value = CleanDecimal(
                        Regexp(CleanText('.'), r'(\d*\.*\d*) .*'))(item)
                self.env[name] = value

        obj_id = Env('_id')

        def obj_type(self):
            prev_link = Link('//ol[has-class("breadcrumb")]/li[1]/a')(self)
            if 'location' in prev_link:
                title = self.obj_title(self)
                if 'meublé' in title.lower():
                    return POSTS_TYPES.FURNISHED_RENT
                else:
                    return POSTS_TYPES.RENT
            elif 'vente' in prev_link:
                return POSTS_TYPES.SALE
            elif 'viager' in prev_link:
                return POSTS_TYPES.VIAGER
            else:
                return NotAvailable

        obj_advert_type = ADVERT_TYPES.PERSONAL

        def obj_house_type(self):
            prev_link = Link('//ol[has-class("breadcrumb")]/li[1]/a')(self)
            house_type = prev_link.split('-')[-1]
            if 'parking' in house_type:
                return HOUSE_TYPES.PARKING
            elif 'appartement' in house_type:
                return HOUSE_TYPES.APART
            elif 'terrain' in house_type:
                return HOUSE_TYPES.LAND
            elif 'maison' in house_type:
                return HOUSE_TYPES.HOUSE
            else:
                return HOUSE_TYPES.OTHER

        obj_title = CleanText('//h1[@class="item-title"]')
        obj_cost = CleanDecimal(
            '//h1[@class="item-title"]/span[@class="item-price"]',
            replace_dots=True)
        obj_currency = Currency(
            '//h1[@class="item-title"]/span[@class="item-price"]')
        obj_utilities = UTILITIES.UNKNOWN
        obj_area = Env('area')

        def obj_date(self):
            date = CleanText('//p[@class="item-date"]')(self).split(
                "/")[-1].strip()
            return parse_french_date(date)

        obj_rooms = Env('rooms')
        obj_bedrooms = Env('bedrooms')
        obj_price_per_meter = PricePerMeterFilter()
        obj_location = CleanText('//div[has-class("item-description")]/h2')
        obj_text = CleanText(
            CleanHTML('//div[has-class("item-description")]/div/p'))

        def obj_station(self):
            return ", ".join([
                station.text for station in XPath(
                    '//ul[has-class("item-transports")]//span[has-class("label")]'
                )(self)
            ])

        def obj_phone(self):
            phone = CleanText(
                '(//div[has-class("contact-proprietaire-box")]//strong[@class="tel-wrapper"])[1]'
            )(self)
            phone = phone.replace(' ', ', ')
            return phone

        obj_url = BrowserURL('housing', _id=Env('_id'))

        def obj_DPE(self):
            DPE = Attr(
                '//div[has-class("energy-box")]//div[has-class("energy-rank")]',
                'class',
                default="")(self)
            if DPE:
                DPE = [
                    x.replace("energy-rank-", "").upper() for x in DPE.split()
                    if x.startswith("energy-rank-")
                ][0]
            return getattr(ENERGY_CLASS, DPE, NotAvailable)

        def obj_photos(self):
            photos = []
            for img in XPath('//div[@class="owl-thumbs"]/a/img/@src')(self):
                if not img.endswith('miniature-video.png'):
                    photos.append(HousingPhoto(u'%s' % img))
            return photos
Exemple #22
0
        class item(ItemElement):
            klass = Housing

            obj_id = Format(
                '%s:%s', Env('type'),
                Attr('.//span[boolean(@data-reference)]', 'data-reference'))
            obj_url = AbsoluteLink('.//h3[has-class("TeaserOffer-title")]/a')
            obj_type = Env('query_type')
            obj_advert_type = ADVERT_TYPES.PROFESSIONAL

            def obj_house_type(self):
                url = self.obj_url(self)
                for house_type, types in QUERY_HOUSE_TYPES.items():
                    for type in types:
                        if ('/%s/' % type) in url:
                            return house_type
                return NotLoaded

            obj_url = AbsoluteLink('.//h3[has-class("TeaserOffer-title")]/a')
            obj_title = CleanText('.//h3[has-class("TeaserOffer-title")]')
            obj_area = CleanDecimal(Regexp(CleanText(
                './/div[has-class("MiniData")]//p[@data-behat="surfaceDesBiens"]'
            ),
                                           r'(\d*\.*\d*) .*',
                                           default=NotAvailable),
                                    default=NotAvailable)
            obj_cost = CleanDecimal(
                './/strong[has-class("TeaserOffer-price-num")]')
            obj_price_per_meter = PricePerMeterFilter()
            obj_currency = Currency(
                './/strong[has-class("TeaserOffer-price-num")]')
            obj_location = CleanText('.//p[has-class("TeaserOffer-loc")]')
            obj_text = CleanText('.//p[has-class("TeaserOffer-description")]')

            def obj_photos(self):
                url = CleanText(
                    Attr('.//a[has-class("TeaserOffer-ill")]/img',
                         'src'))(self)
                # If the used photo is a default no photo, the src is on the same domain.
                if url[0] == '/':
                    return []
                else:
                    return [HousingPhoto(url)]

            obj_date = datetime.date.today()

            def obj_utilities(self):
                price = CleanText(
                    './/strong[has-class("TeaserOffer-price-num")]')(self)
                if "charges comprises" in price.lower():
                    return UTILITIES.INCLUDED
                else:
                    return UTILITIES.EXCLUDED

            obj_rooms = CleanDecimal(
                './/div[has-class("MiniData")]//p[@data-behat="nbPiecesDesBiens"]',
                default=NotLoaded)
            obj_bedrooms = CleanDecimal(
                './/div[has-class("MiniData")]//p[@data-behat="nbChambresDesBiens"]',
                default=NotLoaded)

            def obj_details(self):
                return {
                    "dispo":
                    Date(
                        Attr('.//span[boolean(@data-dispo)]',
                             'data-dispo',
                             default=datetime.date.today().isoformat()))(self),
                    "priceMentions":
                    CleanText(
                        './/span[has-class("TeaserOffer-price-mentions")]')(
                            self)
                }
Exemple #23
0
    class get_housing(ItemElement):
        klass = Housing

        obj_id = Format(
            '%s:%s', Env('type'),
            Attr('//div[boolean(@data-property-reference)]',
                 'data-property-reference'))
        obj_advert_type = ADVERT_TYPES.PROFESSIONAL

        def obj_type(self):
            type = Env('type')(self)
            if type == 'location':
                if 'appartement-meuble' in self.page.url:
                    return POSTS_TYPES.FURNISHED_RENT
                else:
                    return POSTS_TYPES.RENT
            elif type == 'achat':
                return POSTS_TYPES.SALE
            else:
                return NotAvailable

        def obj_url(self):
            return self.page.url

        def obj_house_type(self):
            url = self.obj_url()
            for house_type, types in QUERY_HOUSE_TYPES.items():
                for type in types:
                    if ('/%s/' % type) in url:
                        return house_type
            return NotAvailable

        obj_title = CleanText('//h1[has-class("OfferTop-title")]')
        obj_area = CleanDecimal(Regexp(CleanText(
            '//div[has-class("MiniData")]//p[has-class("MiniData-item")][1]'),
                                       r'(\d*\.*\d*) .*',
                                       default=NotAvailable),
                                default=NotAvailable)
        obj_cost = CleanDecimal('//span[has-class("OfferTop-price")]',
                                default=NotAvailable)
        obj_price_per_meter = PricePerMeterFilter()
        obj_currency = Currency('//span[has-class("OfferTop-price")]')
        obj_location = Format('%s - %s',
                              CleanText('//p[@data-behat="adresseBien"]'),
                              CleanText('//p[has-class("OfferTop-loc")]'))
        obj_text = CleanText('//div[has-class("OfferDetails-content")]/p[1]')
        obj_phone = Regexp(Link('//a[has-class("OfferContact-btn--tel")]'),
                           r'tel:(.*)')

        def obj_photos(self):
            photos = []
            for photo in self.xpath('//div[has-class("OfferSlider")]//img'):
                photo_url = Attr('.', 'src')(photo)
                photo_url = photo_url.replace('640/480', '800/600')
                photos.append(HousingPhoto(photo_url))
            return photos

        obj_date = datetime.date.today()

        def obj_utilities(self):
            price = CleanText('//p[has-class("OfferTop-price")]')(self)
            if "charges comprises" in price.lower():
                return UTILITIES.INCLUDED
            else:
                return UTILITIES.EXCLUDED

        obj_rooms = CleanDecimal(
            '//div[has-class("MiniData")]//p[has-class("MiniData-item")][2]',
            default=NotAvailable)
        obj_bedrooms = CleanDecimal(
            '//div[has-class("MiniData")]//p[has-class("MiniData-item")][3]',
            default=NotAvailable)

        def obj_DPE(self):
            try:
                electric_consumption = CleanDecimal(
                    Regexp(
                        Attr('//div[has-class("OfferDetails-content")]//img',
                             'src'),
                        r'https://dpe.foncia.net\/(\d+)\/.*'))(self)
            except (RegexpError, XPathNotFound):
                electric_consumption = None

            DPE = ""
            if electric_consumption is not None:
                if electric_consumption <= 50:
                    DPE = "A"
                elif 50 < electric_consumption <= 90:
                    DPE = "B"
                elif 90 < electric_consumption <= 150:
                    DPE = "C"
                elif 150 < electric_consumption <= 230:
                    DPE = "D"
                elif 230 < electric_consumption <= 330:
                    DPE = "E"
                elif 330 < electric_consumption <= 450:
                    DPE = "F"
                else:
                    DPE = "G"
                return getattr(ENERGY_CLASS, DPE, NotAvailable)
            return NotAvailable

        def obj_details(self):
            details = {}

            dispo = Date(
                Regexp(CleanText('//p[has-class("OfferTop-dispo")]'),
                       r'.* (\d\d\/\d\d\/\d\d\d\d)',
                       default=datetime.date.today().isoformat()))(self)
            if dispo is not None:
                details["dispo"] = dispo

            priceMentions = CleanText('//p[has-class("OfferTop-mentions")]',
                                      default=None)(self)
            if priceMentions is not None:
                details["priceMentions"] = priceMentions

            agency = CleanText('//p[has-class("OfferContact-address")]',
                               default=None)(self)
            if agency is not None:
                details["agency"] = agency

            for item in self.xpath(
                    '//div[has-class("OfferDetails-columnize")]/div'):
                category = CleanText(
                    './h3[has-class("OfferDetails-title--2")]',
                    default=None)(item)
                if not category:
                    continue

                details[category] = {}

                for detail_item in item.xpath(
                        './/ul[has-class("List--data")]/li'):
                    detail_title = CleanText(
                        './/span[has-class("List-data")]')(detail_item)
                    detail_value = CleanText('.//*[has-class("List-value")]')(
                        detail_item)
                    details[category][detail_title] = detail_value

                for detail_item in item.xpath(
                        './/ul[has-class("List--bullet")]/li'):
                    detail_title = CleanText('.')(detail_item)
                    details[category][detail_title] = True

            try:
                electric_consumption = CleanDecimal(
                    Regexp(
                        Attr('//div[has-class("OfferDetails-content")]//img',
                             'src'),
                        r'https://dpe.foncia.net\/(\d+)\/.*'))(self)
                details["electric_consumption"] = (
                    '{} kWhEP/m².an'.format(electric_consumption))
            except (RegexpError, XPathNotFound):
                pass

            return details
Exemple #24
0
        class item(ItemElement):
            klass = Housing

            obj_id = CleanText('./@data-classified-id')
            obj_title = CleanText('./div/h2[@itemprop="name"]/a')
            obj_location = CleanText('./div/h2[@itemprop="name"]/span[@class="item-localisation"]/span[@class="localisation-label"]/strong')
            obj_cost = CleanDecimal('./div/div/span[@class="price-label"]|./div/div[@class="item-price-pdf"]',
                                    default=NotAvailable)
            obj_currency = Regexp(CleanText('./div/div/span[@class="price-label"]|./div/div[@class="item-price-pdf"]'),
                                  '.*([%s%s%s])' % (u'€', u'$', u'£'), default=u'€')

            def obj_utilities(self):
                utilities = Regexp(CleanText('./div/div/span[@class="price-label"]|./div/div[@class="item-price-pdf"]'),
                                  '.*[%s%s%s](.*)' % (u'€', u'$', u'£'), default=u'')(self)
                if "CC" in utilities:
                    return UTILITIES.INCLUDED
                else:
                    return UTILITIES.UNKNOWN

            obj_text = CleanText('./div/div/div[@itemprop="description"]')
            obj_area = CleanDecimal(Regexp(CleanText('./div/h2[@itemprop="name"]/a'),
                                           '(.*?)([\d,\.]*) m2(.*?)', '\\2', default=None),
                                    replace_dots=True,
                                    default=NotAvailable)
            obj_url = Format(
                "http://www.explorimmo.com%s",
                Link('./div/div/ul/li/a[has-class("js-goto-classified")]')
            )
            obj_price_per_meter = PricePerMeterFilter()

            def obj_phone(self):
                phone = CleanText('./div/div/ul/li[has-class("js-clickphone")]',
                                  replace=[(u'Téléphoner : ', u'')],
                                  default=NotAvailable)(self)

                if '...' in phone:
                    return NotLoaded

                return phone

            def obj_details(self):
                charges = CleanText('./div/div/span[@class="price-fees"]',
                                    default=None)(self)
                if charges:
                    return {
                        "fees": charges.split(":")[1].strip()
                    }
                else:
                    return NotLoaded

            def obj_photos(self):
                url = Attr(
                    './div/div/a/div/img[@itemprop="image"]',
                    'src',
                    default=None
                )(self)
                if url:
                    url = unquote(url)
                    if "http://" in url[3:]:
                        url = url[url.find("http://", 3):url.rfind("?")]
                    return [HousingPhoto(url)]
                else:
                    return NotAvailable
Exemple #25
0
    class get_housing(ItemElement):
        klass = Housing

        obj_id = Env('_id')

        def obj_type(self):
            url = BrowserURL('housing', _id=Env('_id'))(self)
            if 'colocation' in url:
                return POSTS_TYPES.SHARING
            elif 'location' in url:
                isFurnished = False
                for li in XPath('//ul[@itemprop="description"]/li')(self):
                    label = CleanText('./span[has-class("criteria-label")]')(
                        li)
                    if label.lower() == "meublé":
                        isFurnished = (
                            CleanText('./span[has-class("criteria-value")]')(
                                li).lower() == 'oui')
                if isFurnished:
                    return POSTS_TYPES.FURNISHED_RENT
                else:
                    return POSTS_TYPES.RENT
            elif 'vente' in url:
                offertype = Attr(
                    '//button[has-class("offer-contact-vertical-phone")][1]',
                    'data-offertransactiontype')(self)
                if offertype == '4':
                    return POSTS_TYPES.VIAGER
                else:
                    return POSTS_TYPES.SALE
            return NotAvailable

        obj_advert_type = ADVERT_TYPES.PROFESSIONAL

        def obj_house_type(self):
            house_type = CleanText('.//div[has-class("offer-type")]')(
                self).lower()
            if house_type == "appartement":
                return HOUSE_TYPES.APART
            elif house_type == "maison":
                return HOUSE_TYPES.HOUSE
            elif house_type == "terrain":
                return HOUSE_TYPES.LAND
            elif house_type == "parking":
                return HOUSE_TYPES.PARKING
            else:
                return HOUSE_TYPES.OTHER

        obj_title = CleanText(CleanHTML('//meta[@itemprop="name"]/@content'))
        obj_area = CleanDecimal(Regexp(CleanText(
            CleanHTML('//meta[@itemprop="name"]/@content')),
                                       '(.*?)(\d*) m\xb2(.*?)',
                                       '\\2',
                                       default=NotAvailable),
                                default=NotAvailable)
        obj_rooms = CleanDecimal(
            '//div[has-class("offer-info")]//span[has-class("offer-rooms-number")]',
            default=NotAvailable)
        obj_cost = CleanDecimal('//*[@itemprop="price"]', default=0)
        obj_currency = Currency('//*[@itemprop="price"]')

        def obj_utilities(self):
            notes = CleanText('//p[@class="offer-description-notes"]')(self)
            if "Loyer mensuel charges comprises" in notes:
                return UTILITIES.INCLUDED
            else:
                return UTILITIES.UNKNOWN

        obj_price_per_meter = PricePerMeterFilter()
        obj_date = Date(Regexp(
            CleanText(
                '//p[@class="offer-description-notes"]|//p[has-class("darkergrey")]'
            ), u'.* Mis à jour : (\d{2}/\d{2}/\d{4}).*'),
                        dayfirst=True)
        obj_text = CleanHTML(
            '//div[has-class("offer-description-text")]/meta[@itemprop="description"]/@content'
        )
        obj_location = CleanText('//*[@itemprop="address"]')
        obj_station = CleanText('//div[has-class("offer-description-metro")]',
                                default=NotAvailable)

        obj_url = BrowserURL('housing', _id=Env('_id'))

        def obj_photos(self):
            photos = []
            for img in XPath('//div[has-class("carousel-content")]//img/@src')(
                    self):
                url = u'%s' % img.replace('75x75', '800x600')
                url = urljoin(self.page.url, url)  # Ensure URL is absolute
                photos.append(HousingPhoto(url))
            return photos

        def obj_DPE(self):
            energy_value = CleanText(
                '//div[has-class("offer-energy-greenhouseeffect-summary")]//div[has-class("energy-summary")]',
                default="")(self)
            if len(energy_value):
                energy_value = energy_value.replace("DPE", "").strip()[0]
            return getattr(ENERGY_CLASS, energy_value, NotAvailable)

        def obj_GES(self):
            greenhouse_value = CleanText(
                '//div[has-class("offer-energy-greenhouseeffect-summary")]//div[has-class("greenhouse-summary")]',
                default="")(self)
            if len(greenhouse_value):
                greenhouse_value = greenhouse_value.replace("GES",
                                                            "").strip()[0]
            return getattr(ENERGY_CLASS, greenhouse_value, NotAvailable)

        def obj_details(self):
            details = {}

            details["creationDate"] = Date(Regexp(
                CleanText(
                    '//p[@class="offer-description-notes"]|//p[has-class("darkergrey")]'
                ), u'.*Mis en ligne : (\d{2}/\d{2}/\d{4}).*'),
                                           dayfirst=True)(self)

            honoraires = CleanText((
                '//div[has-class("offer-price")]/span[has-class("lbl-agencyfees")]'
            ),
                                   default=None)(self)
            if honoraires:
                details["Honoraires"] = ("{} (TTC, en sus)".format(
                    honoraires.split(":")[1].strip()))

            for li in XPath('//ul[@itemprop="description"]/li')(self):
                label = CleanText('./span[has-class("criteria-label")]')(li)
                value = CleanText('./span[has-class("criteria-value")]')(li)
                details[label] = value

            return details
Exemple #26
0
class AvendreAlouerItem(ItemElement):
    klass = Housing
    _url = AbsoluteLink('.//a[has-class("linkCtnr")]')

    load_details = _url & AsyncLoad

    obj_url = _url
    obj_id = Async('details') & CleanText(Regexp(CleanText('//p[has-class("property-reference")]'), r'\:(.*)$', default=''))

    obj_title = CleanText('.//a//ul')
    obj_area = CleanDecimal(
        CleanText('.//a//ul//li[has-class("first")]//following-sibling::li[2]'),
        default=NotAvailable
    )

    obj_cost = CleanDecimal(
        CleanText('.//span[has-class("price")]')
    )
    obj_price_per_meter = PricePerMeterFilter()
    obj_currency = CleanText(
        Regexp(
            CleanText('.//span[has-class("price")]'),
            r'[\d\ ]+(.*)'
        )
    )

    obj_location = CleanText('.//span[has-class("loca")]')
    obj_text = CleanText('.//p[has-class("propShortDesc")]')

    obj_date = Async('details') & Date(
        Regexp(
            CleanText('//div[has-class("property-description-main")]'),
            r'Mise à jour le ([\d\\]+)', default=datetime.today()
        )
    )

    def obj_details(self):
        page_doc = Async('details').loaded_page(self).doc

        return {
            'GES': CleanText('//span[@id="gassymbol"]', '')(page_doc),
            'DPE': CleanText('//span[@id="energysymbol"]', '')(page_doc),
        }

    def obj_utilities(self):
        price = CleanText('//span[has-class("price-info")]')(self)
        if 'CC' in price:
            return UTILITIES.INCLUDED
        elif 'HC' in price:
            return UTILITIES.EXCLUDED
        else:
            return UTILITIES.UNKNOWN

    obj_station = 'Test'
    obj_bedrooms = Async('details') & CleanDecimal(
        CleanText('.//td//span[contains(text(), "Chambre")]//following-sibling::span[has-class("r")]'),
        default=NotAvailable
    )

    obj_rooms = Async('details') & CleanDecimal(
        CleanText('.//td//span[contains(text(), "Pièce")]//following-sibling::span[has-class("r")]'),
        default=NotAvailable
    )

    def obj_photos(self):
        page_doc = Async('details').loaded_page(self).doc
        photos = []
        for photo in page_doc.xpath('//div[@id="bxSliderContainer"]//ul//li//img'):
            url = Attr('.', 'src')(photo)
            if url[0] != '/':
                photos.append(HousingPhoto(url))
        return photos

    def validate(self, obj):
        return obj.id != ''
Exemple #27
0
    class get_housing(ItemElement):
        klass = Housing

        def parse(self, el):
            details = dict()
            self.env['area'] = NotAvailable
            self.env['GES'] = NotAvailable
            self.env['DPE'] = NotAvailable
            self.env['typeBien'] = NotAvailable
            for item in el.xpath('//div[@class="line"]/h2'):
                property = CleanText('./span[@class="property"]')(item)
                if 'Surface' in property:
                    self.env['area'] = CleanDecimal(
                        Regexp(CleanText('./span[@class="value"]'), '(.*)m.*'),
                        replace_dots=(',', '.'))(item)

                elif 'Type de bien' in property:
                    value = CleanText('./span[@class="value"]')(item).lower()
                    if value == 'parking':
                        self.env['typeBien'] = HOUSE_TYPES.PARKING
                    elif value == 'appartement':
                        self.env['typeBien'] = HOUSE_TYPES.APART
                    elif value == 'maison':
                        self.env['typeBien'] = HOUSE_TYPES.HOUSE
                    elif value == 'terrain':
                        self.env['typeBien'] = HOUSE_TYPES.LAND
                    else:
                        self.env['typeBien'] = HOUSE_TYPES.OTHER
                elif 'Meublé' in property:
                    value = CleanText('./span[@class="value"]')(item).lower()
                    self.env['isFurnished'] = (value == 'meublé')
                else:
                    key = u'%s' % CleanText('./span[@class="property"]')(item)
                    if 'GES' in key or 'Classe' in key:
                        if 'Classe' in key:
                            key = 'DPE'

                        value = (
                            CleanText('./span[@class="value"]')(item).strip())
                        if len(value):
                            self.env[key] = getattr(ENERGY_CLASS, value[0],
                                                    NotAvailable)
                    else:
                        details[key] = CleanText('./span[@class="value"]')(
                            item)

            self.env['details'] = details

        obj_id = Env('_id')

        def obj_type(self):
            breadcrumb = Link(
                '(//nav[has-class("breadcrumbsNav")]//a)[last()]')(self)
            if 'colocations' in breadcrumb:
                return POSTS_TYPES.SHARING
            elif 'locations' in breadcrumb:
                if self.env['isFurnished']:
                    return POSTS_TYPES.FURNISHED_RENT
                else:
                    return POSTS_TYPES.RENT
            else:
                return POSTS_TYPES.SALE

        def obj_advert_type(self):
            line_pro = XPath('.//span[has-class("ispro")]', default=None)(self)
            if line_pro:
                return ADVERT_TYPES.PROFESSIONAL
            else:
                return ADVERT_TYPES.PERSONAL

        obj_house_type = Env('typeBien')

        obj_title = CleanText('//h1[@itemprop="name"]')
        obj_cost = CleanDecimal('//h2[@itemprop="price"]/@content',
                                default=Decimal(0))

        obj_currency = Currency('//h2[@itemprop="price"]/span[@class="value"]')

        def obj_utilities(self):
            utilities = Regexp(
                CleanText('//h2[@itemprop="price"]/span[@class="value"]'),
                '.*[%s%s%s](.*)' % (u'€', u'$', u'£'),
                default=u'')(self)
            if "C.C." in utilities:
                return UTILITIES.INCLUDED
            elif "H.C." in utilities:
                return UTILITIES.EXCLUDED
            else:
                return UTILITIES.UNKNOWN

        obj_DPE = Env('DPE')
        obj_GES = Env('GES')

        obj_text = CleanText('//p[@itemprop="description"]')
        obj_location = CleanText('//span[@itemprop="address"]')
        obj_details = Env('details')

        def obj_rooms(self):
            rooms = self.env["details"].get(u"Pièces", None)
            return Decimal(rooms) if rooms else NotAvailable

        obj_area = Env('area')
        obj_price_per_meter = PricePerMeterFilter()
        obj_url = BrowserURL('housing', _id=Env('_id'))

        def obj_date(self):
            _date = Regexp(
                CleanText('//p[has-class("line")]', replace=[(u'à', '')]),
                '.*Mise en ligne le (.*)')(self)

            for fr, en in DATE_TRANSLATE_FR:
                _date = fr.sub(en, _date)

            self.env['tmp'] = _date
            return DateTime(Env('tmp'), LinearDateGuesser())(self)

        def obj_photos(self):
            items = re.findall(r'images\[\d\]\s*=\s*"([\w:\/\.-]*\.jpg)";',
                               CleanText('//script')(self))
            photos = [HousingPhoto(unicode(item)) for item in items]
            if not photos:
                img = CleanText('//meta[@itemprop="image"]/@content',
                                default=None)(self)
                if img:
                    photos.append(HousingPhoto(img))

            return photos
Exemple #28
0
    class get_housing(ItemElement):
        klass = Housing

        def is_agency(self):
            return Dict('agency/isParticulier')(self) == 'false'

        obj_id = Env('_id')

        def obj_type(self):
            transaction = Dict('characteristics/transaction')(self)
            if transaction == 'location':
                if Dict('characteristics/isFurnished')(self):
                    return POSTS_TYPES.FURNISHED_RENT
                else:
                    return POSTS_TYPES.RENT
            elif transaction == 'vente':
                type = Dict('characteristics/estateType')(self).lower()
                if 'viager' in type:
                    return POSTS_TYPES.VIAGER
                else:
                    return POSTS_TYPES.SALE
            else:
                return NotAvailable

        def obj_advert_type(self):
            if self.is_agency:
                return ADVERT_TYPES.PROFESSIONAL
            else:
                return ADVERT_TYPES.PERSONAL

        def obj_house_type(self):
            type = Dict('characteristics/estateType')(self).lower()
            if 'appartement' in type:
                return HOUSE_TYPES.APART
            elif 'maison' in type:
                return HOUSE_TYPES.HOUSE
            elif 'parking' in type:
                return HOUSE_TYPES.PARKING
            elif 'terrain' in type:
                return HOUSE_TYPES.LAND
            else:
                return HOUSE_TYPES.OTHER

        obj_title = Dict('characteristics/titleWithTransaction')
        obj_location = Format('%s %s %s', Dict('location/address'),
                              Dict('location/cityLabel'),
                              Dict('location/postalCode'))

        def obj_cost(self):
            cost = TypeDecimal(Dict('characteristics/price'))(self)
            if cost == 0:
                cost = TypeDecimal(Dict('characteristics/priceMin'))(self)
            return cost

        obj_currency = BaseCurrency.get_currency('€')

        def obj_utilities(self):
            are_fees_included = Dict('characteristics/areFeesIncluded',
                                     default=None)(self)
            if are_fees_included:
                return UTILITIES.INCLUDED
            else:
                return UTILITIES.EXCLUDED

        obj_text = CleanHTML(Dict('characteristics/description'))
        obj_url = BrowserURL('housing_html', _id=Env('_id'))

        def obj_area(self):
            area = TypeDecimal(Dict('characteristics/area'))(self)
            if area == 0:
                area = TypeDecimal(Dict('characteristics/areaMin'))(self)
            return area

        obj_date = FromTimestamp(Dict('characteristics/date'))
        obj_bedrooms = TypeDecimal(Dict('characteristics/bedroomCount'))

        def obj_rooms(self):
            # TODO: Why is roomCount a list?
            rooms = Dict('characteristics/roomCount', default=[])(self)
            if rooms:
                return TypeDecimal(rooms[0])(self)
            return NotAvailable

        obj_price_per_meter = PricePerMeterFilter()

        def obj_photos(self):
            photos = []
            for img in Dict('characteristics/images')(self):
                m = re.search('http://thbr\.figarocms\.net.*(http://.*)',
                              img.get('xl'))
                if m:
                    photos.append(HousingPhoto(m.group(1)))
                else:
                    photos.append(HousingPhoto(img.get('xl')))
            return photos

        def obj_DPE(self):
            DPE = Dict('characteristics/energyConsumptionCategory',
                       default="")(self)
            return getattr(ENERGY_CLASS, DPE, NotAvailable)

        def obj_GES(self):
            GES = Dict('characteristics/greenhouseGasEmissionCategory',
                       default="")(self)
            return getattr(ENERGY_CLASS, GES, NotAvailable)

        def obj_details(self):
            details = {}
            details['fees'] = Dict('characteristics/fees',
                                   default=NotAvailable)(self)
            details['agencyFees'] = Dict('characteristics/agencyFees',
                                         default=NotAvailable)(self)
            details['guarantee'] = Dict('characteristics/guarantee',
                                        default=NotAvailable)(self)
            details['bathrooms'] = Dict('characteristics/bathroomCount',
                                        default=NotAvailable)(self)
            details['creationDate'] = FromTimestamp(Dict(
                'characteristics/creationDate', default=NotAvailable),
                                                    default=NotAvailable)(self)
            details['availabilityDate'] = Dict(
                'characteristics/estateAvailabilityDate',
                default=NotAvailable)(self)
            details['exposure'] = Dict('characteristics/exposure',
                                       default=NotAvailable)(self)
            details['heatingType'] = Dict('characteristics/heatingType',
                                          default=NotAvailable)(self)
            details['floor'] = Dict('characteristics/floor',
                                    default=NotAvailable)(self)
            details['bedrooms'] = Dict('characteristics/bedroomCount',
                                       default=NotAvailable)(self)
            details['isFurnished'] = Dict('characteristics/isFurnished',
                                          default=NotAvailable)(self)
            rooms = Dict('characteristics/roomCount', default=[])(self)
            if len(rooms):
                details['rooms'] = rooms[0]
            details['available'] = Dict('characteristics/isAvailable',
                                        default=NotAvailable)(self)
            agency = Dict('agency', default=NotAvailable)(self)
            details['agency'] = ', '.join([
                x for x in [
                    agency.get('corporateName', ''),
                    agency.get('corporateAddress', ''),
                    agency.get('corporatePostalCode', ''),
                    agency.get('corporateCity', '')
                ] if x
            ])
            return details
Exemple #29
0
    class get_housing(ItemElement):
        klass = Housing

        obj_id = Env('_id')
        obj_title = CleanText(CleanHTML('//meta[@itemprop="name"]/@content'))
        obj_area = CleanDecimal(Regexp(CleanText(CleanHTML('//meta[@itemprop="name"]/@content')),
                                       '(.*?)(\d*) m\xb2(.*?)', '\\2', default=NotAvailable),
                                default=NotAvailable)
        obj_rooms = CleanDecimal('//div[has-class("offer-info")]//span[has-class("offer-rooms-number")]',
                                default=NotAvailable)
        obj_cost = CleanDecimal('//*[@itemprop="price"]', default=0)
        obj_currency = Regexp(
            CleanText('//*[@itemprop="price"]'),
            '.*([%s%s%s])' % (u'€', u'$', u'£'),
            default=u'€'
        )
        def obj_utilities(self):
            notes = CleanText('//p[@class="offer-description-notes"]')(self)
            if "Loyer mensuel charges comprises" in notes:
                return UTILITIES.INCLUDED
            else:
                return UTILITIES.UNKNOWN

        obj_price_per_meter = PricePerMeterFilter()
        obj_date = Date(Regexp(CleanText('//p[@class="offer-description-notes"]|//p[has-class("darkergrey")]'),
                               u'.* Mis à jour : (\d{2}/\d{2}/\d{4}).*'),
                        dayfirst=True)
        obj_text = CleanHTML('//div[has-class("offer-description-text")]/meta[@itemprop="description"]/@content')
        obj_location = CleanText('//*[@itemprop="address"]')
        obj_station = CleanText(
            '//div[has-class("offer-description-metro")]',
            default=NotAvailable
        )

        obj_url = BrowserURL('housing', _id=Env('_id'))

        def obj_photos(self):
            photos = []
            for img in XPath('//div[@class="carousel-content"]/ul/li/a/img/@src|//div[@class="carousel"]/ul/li/a/img/@src')(self):
                photos.append(HousingPhoto(u'%s' % img.replace('75x75', '800x600')))
            return photos

        def obj_details(self):
            details = {}
            energy_value = CleanText(
                '//div[has-class("offer-energy-greenhouseeffect-summary")]//div[has-class("energy-summary")]',
                default=None
            )(self)
            if energy_value and len(energy_value) > 1:
                energy_value = energy_value.replace("DPE", "").strip()[0]
                if energy_value not in ["A", "B", "C", "D", "E", "F", "G"]:
                    energy_value = None
            if energy_value is None:
                energy_value = NotAvailable
            details["DPE"] = energy_value

            greenhouse_value = CleanText(
                '//div[has-class("offer-energy-greenhouseeffect-summary")]//div[has-class("greenhouse-summary")]',
                default=None
            )(self)
            if greenhouse_value and len(greenhouse_value) > 1:
                greenhouse_value = greenhouse_value.replace("GES", "").strip()[0]
                if greenhouse_value not in ["A", "B", "C", "D", "E", "F", "G"]:
                    greenhouse_value = None
            if greenhouse_value is None:
                greenhouse_value = NotAvailable
            details["GES"] = greenhouse_value

            details["creationDate"] = Date(
                Regexp(
                    CleanText(
                        '//p[@class="offer-description-notes"]|//p[has-class("darkergrey")]'
                    ),
                    u'.*Mis en ligne : (\d{2}/\d{2}/\d{4}).*'
                ),
                dayfirst=True
            )(self)

            honoraires = CleanText(
                (
                    '//div[has-class("offer-price")]/span[has-class("lbl-agencyfees")]'
                ),
                default=None
            )(self)
            if honoraires:
                details["Honoraires"] = (
                    "{} (TTC, en sus)".format(
                        honoraires.split(":")[1].strip()
                    )
                )

            for li in XPath('//ul[@itemprop="description"]/li')(self):
                label = CleanText('./div[has-class("criteria-label")]')(li)
                value = CleanText('./div[has-class("criteria-value")]')(li)
                details[label] = value

            return details
Exemple #30
0
        class item(ItemElement):
            klass = Housing
            price_selector = './/span[@class="price-label"]|./div/div[@class="item-price-pdf"]'

            def is_agency(self):
                agency = CleanText('.//span[has-class("item-agency-name")]')(
                    self.el)
                return 'annonce de particulier' not in agency.lower()

            def condition(self):
                if len(self.env['advert_types']) == 1:
                    is_agency = self.is_agency()
                    if self.env['advert_types'][0] == ADVERT_TYPES.PERSONAL:
                        return not is_agency
                    elif self.env['advert_types'][
                            0] == ADVERT_TYPES.PROFESSIONAL:
                        return is_agency
                return Attr('.', 'data-classified-id', default=False)(self)

            obj_id = Attr('.', 'data-classified-id')
            obj_type = Env('query_type')
            obj_title = CleanText('./div/h2[@class="item-type"]')

            def obj_advert_type(self):
                if self.is_agency():
                    return ADVERT_TYPES.PROFESSIONAL
                else:
                    return ADVERT_TYPES.PERSONAL

            def obj_house_type(self):
                type = self.obj_title(self).split()[0].lower()
                if type == "appartement" or type == "studio" or type == "chambre":
                    return HOUSE_TYPES.APART
                elif type == "maison" or type == "villa":
                    return HOUSE_TYPES.HOUSE
                elif type == "parking":
                    return HOUSE_TYPES.PARKING
                elif type == "terrain":
                    return HOUSE_TYPES.LAND
                else:
                    return HOUSE_TYPES.OTHER

            def obj_location(self):
                script = CleanText('./script')(self)
                try:
                    # Should be standard JSON+LD data
                    script = json.loads(script)
                except ValueError:
                    try:
                        # But explorimmo can't write JSON correctly and there
                        # is a trailing "}"
                        script = json.loads(script.strip().rstrip('}'))
                    except ValueError:
                        script = None
                if not script:
                    return NotLoaded

                try:
                    return '%s (%s)' % (script['address']['addressLocality'],
                                        script['address']['postalCode'])
                except (KeyError):
                    return NotLoaded

            def obj_cost(self):
                cost = CleanDecimal(
                    Regexp(CleanText(self.price_selector, default=''),
                           r'de (.*) à .*',
                           default=0))(self)
                if cost == 0:
                    return CleanDecimal(self.price_selector,
                                        default=NotAvailable)(self)
                else:
                    return cost

            obj_currency = Currency(price_selector)

            def obj_utilities(self):
                utilities = CleanText(
                    './div/div/span[@class="price-label"]|'
                    './div/div[@class="item-price-pdf"]|'
                    './div/div/span[@class="item-price"]')(self)
                if "CC" in utilities:
                    return UTILITIES.INCLUDED
                else:
                    return UTILITIES.UNKNOWN

            obj_text = CleanText('./div/p[@itemprop="description"]')
            obj_area = CleanDecimal(Regexp(obj_title,
                                           r'(.*?)([\d,\.]*) m2(.*?)',
                                           '\\2',
                                           default=None),
                                    replace_dots=True,
                                    default=NotLoaded)

            obj_url = Format(
                "https://immobilier.lefigaro.fr/annonces/annonce-%s.html",
                CleanText('./@data-classified-id'))

            obj_price_per_meter = PricePerMeterFilter()

            def obj_phone(self):
                phone = CleanText(
                    './div/div/ul/li[has-class("js-clickphone")]',
                    replace=[('Téléphoner : ', '')],
                    default=NotLoaded)(self)

                if '...' in phone:
                    return NotLoaded

                return phone

            def obj_details(self):
                charges = CleanText('.//span[@class="price-fees"]',
                                    default=None)(self)
                if charges:
                    return {"fees": charges.split(":")[1].strip()}
                else:
                    return NotLoaded

            def obj_photos(self):
                url = CleanText(
                    './div[has-class("default-img")]/img/@data-src')(self)
                if url:
                    url = unquote(url)
                    if "http://" in url[3:]:
                        rindex = url.rfind("?")
                        if rindex == -1:
                            rindex = None
                        url = url[url.find("http://", 3):rindex]
                    return [HousingPhoto(url)]
                else:
                    return NotLoaded