def parse(self, el): rooms_bedrooms_area = el.xpath( './/div[@class="clearfix"]/ul[has-class("item-tags")]/li' ) self.env['rooms'] = NotLoaded self.env['bedrooms'] = NotLoaded self.env['area'] = NotLoaded for item in rooms_bedrooms_area: name = CleanText('.')(item) if 'chambre' in name.lower(): name = 'bedrooms' value = CleanDecimal('./strong')(item) elif 'pièce' in name.lower(): name = 'rooms' value = CleanDecimal('./strong')(item) else: name = 'area' value = CleanDecimal( Regexp( CleanText( '.' ), r'(\d*\.*\d*) .*' ) )(item) self.env[name] = value
def parse(self, el): rooms_bedrooms_area = el.xpath( './/div[@class="clearfix"]/ul[has-class("item-tags")]/li' ) self.env['rooms'] = NotAvailable self.env['bedrooms'] = NotAvailable self.env['area'] = NotAvailable for item in rooms_bedrooms_area: name = CleanText('.')(item) if 'chambre' in name.lower(): name = 'bedrooms' value = CleanDecimal('./strong')(item) elif 'pièce' in name.lower(): name = 'rooms' value = CleanDecimal('./strong')(item) else: name = 'area' value = CleanDecimal( Regexp( CleanText( '.' ), r'(\d*\.*\d*) .*' ) )(item) self.env[name] = value
def parse(self, el): rooms_bedrooms_area = el.xpath( './/ul[has-class("item-tags")]/li' ) self.env['rooms'] = NotAvailable self.env['bedrooms'] = NotAvailable self.env['area'] = NotAvailable for item in rooms_bedrooms_area: name = CleanText('.')(item) if 'chambre' in name.lower(): name = 'bedrooms' value = CleanDecimal('./strong')(item) elif 'pièce' in name.lower(): name = 'rooms' value = CleanDecimal('./strong')(item) elif ' m²' in name and 'le m²' not in name: name = 'area' value = CleanDecimal( Regexp( CleanText( '.' ), r'(\d*\.*\d*) .*' ) )(item) self.env[name] = value
def obj_type(self): url = BrowserURL('housing', _id=Env('_id'))(self) if 'colocation' in url: return POSTS_TYPES.SHARING elif 'location' in url: isFurnished = False for li in XPath('//ul[@itemprop="description"]/li')(self): label = CleanText('./div[has-class("criteria-label")]')(li) if label.lower() == "meublé": isFurnished = ( CleanText('./div[has-class("criteria-value")]')(li).lower() == 'oui' ) if isFurnished: return POSTS_TYPES.FURNISHED_RENT else: return POSTS_TYPES.RENT elif 'vente' in url: offertype = Attr( '//button[has-class("offer-contact-vertical-phone")][1]', 'data-offertransactiontype' )(self) if offertype == '4': return POSTS_TYPES.VIAGER else: return POSTS_TYPES.SALE return NotAvailable
def obj_type(self): # card url is /compte/cav/xxx/carte/yyy so reverse to match "carte" before "cav" for word in Field('url')(self).lower().split('/')[::-1]: v = self.page.ACCOUNT_TYPES.get(word) if v: return v for word in Field('label')(self).replace('_', ' ').lower().split(): v = self.page.ACCOUNT_TYPES.get(word) if v: return v category = CleanText( './preceding-sibling::tr[has-class("list--accounts--master")]//h4' )(self) v = self.page.ACCOUNT_TYPES.get(category.lower()) if v: return v page = Async('details').loaded_page(self) if isinstance(page, LoanPage): return Account.TYPE_LOAN return Account.TYPE_UNKNOWN
def obj_utilities(self): price = CleanText( './/strong[has-class("TeaserOffer-price-num")]')(self) if "charges comprises" in price.lower(): return UTILITIES.INCLUDED else: return UTILITIES.EXCLUDED
def obj_type(self): url = BrowserURL('housing', _id=Env('_id'))(self) if 'colocation' in url: return POSTS_TYPES.SHARING elif 'location' in url: isFurnished = False for li in XPath('//ul[@itemprop="description"]/li')(self): label = CleanText('./span[has-class("criteria-label")]')( li) if label.lower() == "meublé": isFurnished = ( CleanText('./span[has-class("criteria-value")]')( li).lower() == 'oui') if isFurnished: return POSTS_TYPES.FURNISHED_RENT else: return POSTS_TYPES.RENT elif 'vente' in url: offertype = Attr( '//button[has-class("offer-contact-vertical-phone")][1]', 'data-offertransactiontype')(self) if offertype == '4': return POSTS_TYPES.VIAGER else: return POSTS_TYPES.SALE return NotAvailable
def find_account(self, acclabel, accowner): accowner = sorted(accowner.lower().split()) # first name and last name may not be ordered the same way on market site... # Check if history is present if CleanText(default=None).filter(self.doc.xpath('//body/p[contains(text(), "indisponible pour le moment")]')): return False ids = None for a in self.doc.xpath('//a[contains(@onclick, "indiceCompte")]'): self.logger.debug("get investment from onclick") label = CleanText('.')(a) owner = CleanText('./ancestor::tr/preceding-sibling::tr[@class="LnMnTiers"][1]')(a) owner = sorted(owner.lower().split()) if label == acclabel and owner == accowner: ids = list(re.search(r'indiceCompte[^\d]+(\d+).*idRacine[^\d]+(\d+)', Attr('.', 'onclick')(a)).groups()) ids.append(CleanText('./ancestor::td/preceding-sibling::td')(a)) self.logger.debug("assign value to ids: {}".format(ids)) return ids for a in self.doc.xpath('//a[contains(@href, "indiceCompte")]'): self.logger.debug("get investment from href") if CleanText('.')(a) == acclabel: ids = list(re.search(r'indiceCompte[^\d]+(\d+).*idRacine[^\d]+(\d+)', Attr('.', 'href')(a)).groups()) ids.append(CleanText('./ancestor::td/preceding-sibling::td')(a)) self.logger.debug("assign value to ids: {}".format(ids)) return ids
def obj_valuation(self): valuation = MyDecimal(TableCell('valuation', default=None))(self) h2 = CleanText( './ancestor::div[contains(@id, "Histo")][1]/preceding-sibling::h2[1]' )(self) return -valuation if valuation and any( word in h2.lower() for word in self.page.DEBIT_WORDS) else valuation
def on_load(self): error_msg = CleanText('//li[@class="globalErreurMessage"]')(self.doc) if error_msg: # Catch wrongpass accordingly wrongpass_messages = ("mot de passe incorrect", "votre compte n'est plus utilisable") if any(message in error_msg.lower() for message in wrongpass_messages): raise BrowserIncorrectPassword(error_msg) raise BrowserUnavailable(error_msg)
def obj_utilities(self): price = CleanText( '//p[has-class("OfferTop-price")]' )(self) if "charges comprises" in price.lower(): return UTILITIES.INCLUDED else: return UTILITIES.EXCLUDED
def parse(self, el): txt = CleanText( TableCell('availability')(self)[0].xpath('./span'))(self) self.env['availability_date'] = Date( dayfirst=True, default=NotAvailable).filter(txt) self.env['condition'] = Pocket.CONDITION_DATE if self.env['availability_date'] else \ self.page.CONDITIONS.get(txt.lower().split()[0], Pocket.CONDITION_UNKNOWN) self.env['matching_txt'] = txt
def obj_utilities(self): price = CleanText( './/strong[has-class("TeaserOffer-price-num")]' )(self) if "charges comprises" in price.lower(): return UTILITIES.INCLUDED else: return UTILITIES.EXCLUDED
def condition(self): title = CleanText('./div[has-class("box-header")]/a[@class="title-item"]')(self) isNotFurnishedOk = True if self.env['query_type'] == POSTS_TYPES.RENT: isNotFurnishedOk = 'meublé' not in title.lower() return ( Regexp(Link('./div/a[@class="item-title"]'), '/annonces/(.*)', default=None)(self) and isNotFurnishedOk )
def parse(self, el): page = Async('details').loaded_page(self) type = CleanText().filter( page.doc.xpath('//th[contains(text(), \ "Cadre fiscal")]/following-sibling::td[1]')) if not type: raise SkipItem() self.env['type'] = self.page.TYPES.get(type.lower(), Account.TYPE_UNKNOWN) self.env['page'] = page
def parse(self, el): rooms_bedrooms_area = el.xpath( './div/a[has-class("item-title")]/ul[has-class("item-tags")]/li' ) self.env['rooms'] = NotLoaded self.env['bedrooms'] = NotLoaded self.env['area'] = NotLoaded for item in rooms_bedrooms_area: name = CleanText('.')(item) if 'chambre' in name.lower(): name = 'bedrooms' value = CleanDecimal('.')(item) elif 'pièce' in name.lower(): name = 'rooms' value = CleanDecimal('.')(item) else: name = 'area' value = CleanDecimal( Regexp(CleanText('.'), r'(\d*\.*\d*) .*'))(item) self.env[name] = value
def get_ids(ref, acclabel, accowner): ids = None for a in self.doc.xpath('//a[contains(@%s, "indiceCompte")]' % ref): self.logger.debug("get investment from %s" % ref) label = CleanText('.')(a) owner = CleanText('./ancestor::tr/preceding-sibling::tr[@class="LnMnTiers"][1]')(a) owner = re.sub(r' \(.+', '', owner) owner = sorted(owner.lower().split()) if label == acclabel and owner == accowner: ids = list(re.search(r'indiceCompte[^\d]+(\d+).*idRacine[^\d]+(\d+)', Attr('.', ref)(a)).groups()) ids.append(CleanText('./ancestor::td/preceding-sibling::td')(a)) self.logger.debug("assign value to ids: {}".format(ids)) return ids
def obj_type(self): prev_link = Link('//ol[has-class("breadcrumb")]/li[1]/a')(self) if 'location' in prev_link: title = CleanText( '//div[has-class("box-header")]/h1[@class="clearfix"]')( self) if 'meublé' in title.lower(): return POSTS_TYPES.FURNISHED_RENT else: return POSTS_TYPES.RENT elif 'vente' in prev_link: return POSTS_TYPES.SALE elif 'viager' in prev_link: return POSTS_TYPES.VIAGER else: return NotAvailable
def parse(self, el): page = Async('details').loaded_page(self) label = CleanText(TableCell('label')(self)[0].xpath('./a[1]'))(self) # Try to get gross amount amount = None for td in page.doc.xpath('//td[em[1][contains(text(), "Total")]]/following-sibling::td'): amount = CleanDecimal('.', default=None)(td) if amount: break amount = amount or MyDecimal(TableCell('amount'))(self) if any(word in label.lower() for word in self.page.DEBIT_WORDS): amount = -amount self.env['label'] = label self.env['amount'] = amount self.env['investments'] = list(page.get_investments())
def obj_type(self): try: breadcrumb = int(Dict('adview/category_id')(self)) except ValueError: breadcrumb = None if breadcrumb == 11: return POSTS_TYPES.SHARING elif breadcrumb == 10: isFurnished = CleanText(PopDetail('furnished', default=' '))(self) if isFurnished.lower() == u'meublé': return POSTS_TYPES.FURNISHED_RENT else: return POSTS_TYPES.RENT else: return POSTS_TYPES.SALE
def get_ids(ref, acclabel, accowner): ids = None for a in self.doc.xpath('//a[contains(@%s, "indiceCompte")]' % ref): self.logger.debug("get investment from %s" % ref) label = CleanText('.')(a) owner = CleanText( './ancestor::tr/preceding-sibling::tr[@class="LnMnTiers"][1]' )(a) owner = re.sub(r' \(.+', '', owner) owner = sorted(owner.lower().split()) if label == acclabel and owner == accowner: ids = list( re.search( r'indiceCompte[^\d]+(\d+).*idRacine[^\d]+(\d+)', Attr('.', ref)(a)).groups()) ids.append( CleanText('./ancestor::td/preceding-sibling::td')(a)) self.logger.debug("assign value to ids: {}".format(ids)) return ids
def find_account(self, acclabel, accowner): accowner = sorted( accowner.lower().split() ) # first name and last name may not be ordered the same way on market site... # Check if history is present if CleanText(default=None).filter( self.doc.xpath( '//body/p[contains(text(), "indisponible pour le moment")]' )): return False ids = None for a in self.doc.xpath('//a[contains(@onclick, "indiceCompte")]'): self.logger.debug("get investment from onclick") label = CleanText('.')(a) owner = CleanText( './ancestor::tr/preceding-sibling::tr[@class="LnMnTiers"][1]')( a) owner = sorted(owner.lower().split()) if label == acclabel and owner == accowner: ids = list( re.search(r'indiceCompte[^\d]+(\d+).*idRacine[^\d]+(\d+)', Attr('.', 'onclick')(a)).groups()) ids.append( CleanText('./ancestor::td/preceding-sibling::td')(a)) self.logger.debug("assign value to ids: {}".format(ids)) return ids for a in self.doc.xpath('//a[contains(@href, "indiceCompte")]'): self.logger.debug("get investment from href") if CleanText('.')(a) == acclabel: ids = list( re.search(r'indiceCompte[^\d]+(\d+).*idRacine[^\d]+(\d+)', Attr('.', 'href')(a)).groups()) ids.append( CleanText('./ancestor::td/preceding-sibling::td')(a)) self.logger.debug("assign value to ids: {}".format(ids)) return ids
def parse(self, el): page = Async('details').loaded_page(self) label = CleanText( TableCell('label')(self)[0].xpath('./a[1]'))(self) # Try to get gross amount amount = None for td in page.doc.xpath( '//td[em[1][contains(text(), "Total")]]/following-sibling::td' ): amount = CleanDecimal('.', default=None)(td) if amount: break amount = amount or MyDecimal(TableCell('amount'))(self) if any(word in label.lower() for word in self.page.DEBIT_WORDS): amount = -amount self.env['label'] = label self.env['amount'] = amount self.env['investments'] = list(page.get_investments())
def obj_type(self): # card url is /compte/cav/xxx/carte/yyy so reverse to match "carte" before "cav" for word in Field('url')(self).lower().split('/')[::-1]: v = self.page.ACCOUNT_TYPES.get(word) if v: return v for word in Field('label')(self).replace('_', ' ').lower().split(): v = self.page.ACCOUNT_TYPES.get(word) if v: return v category = CleanText('./preceding-sibling::tr[has-class("list--accounts--master")]//h4')(self) v = self.page.ACCOUNT_TYPES.get(category.lower()) if v: return v page = Async('details').loaded_page(self) if isinstance(page, LoanPage): return Account.TYPE_LOAN return Account.TYPE_UNKNOWN
def obj_type(self): url = BrowserURL('housing', _id=Env('_id'))(self) if 'colocation' in url: return POSTS_TYPES.SHARING elif 'location' in url: isFurnished = False for li in XPath('//ul[@itemprop="description"]/li')(self): label = CleanText('./div[has-class("criteria-label")]')(li) if label.lower() == "meublé": isFurnished = ( CleanText('./div[has-class("criteria-value")]')( li).lower() == 'oui') if isFurnished: return POSTS_TYPES.FURNISHED_RENT else: return POSTS_TYPES.RENT elif 'vente' in url: if ('viager' in self.obj_text(self).lower() and 'rente' in self.obj_text(self).lower()): return POSTS_TYPES.VIAGER else: return POSTS_TYPES.SALE return NotAvailable
def obj_valuation(self): valuation = MyDecimal(TableCell('valuation', default=None))(self) h2 = CleanText('./ancestor::div[contains(@id, "Histo")][1]/preceding-sibling::h2[1]')(self) return -valuation if valuation and any(word in h2.lower() for word in self.page.DEBIT_WORDS) else valuation
def is_agency(self): agency = CleanText('.//span[has-class("item-agency-name")]')(self.el) return 'annonce de particulier' not in agency.lower()
def is_here(self): err = CleanText('//div[contains(@id, "alert-random")]/text()', children=False)(self.doc) return "compte inconnu" in err.lower()
def parse(self, el): txt = CleanText(TableCell('availability')(self)[0].xpath('./span'))(self) self.env['availability_date'] = Date(dayfirst=True, default=NotAvailable).filter(txt) self.env['condition'] = Pocket.CONDITION_DATE if self.env['availability_date'] else \ self.page.CONDITIONS.get(txt.lower().split()[0], Pocket.CONDITION_UNKNOWN) self.env['matching_txt'] = txt
def is_agency(self): agency = CleanText('.//span[has-class("item-agency-name")]')( self.el) return 'annonce de particulier' not in agency.lower()
def obj_utilities(self): price = CleanText('//p[has-class("OfferTop-price")]')(self) if "charges comprises" in price.lower(): return UTILITIES.INCLUDED else: return UTILITIES.EXCLUDED