def obj_details(self): details = {} energy_value = CleanText( '//div[has-class("offer-energy-greenhouseeffect-summary")]//div[has-class("energy-summary")]', default=None )(self) if energy_value and len(energy_value) > 1: energy_value = energy_value.replace("DPE", "").strip()[0] if energy_value not in ["A", "B", "C", "D", "E", "F", "G"]: energy_value = None if energy_value is None: energy_value = NotAvailable details["DPE"] = energy_value greenhouse_value = CleanText( '//div[has-class("offer-energy-greenhouseeffect-summary")]//div[has-class("greenhouse-summary")]', default=None )(self) if greenhouse_value and len(greenhouse_value) > 1: greenhouse_value = greenhouse_value.replace("GES", "").strip()[0] if greenhouse_value not in ["A", "B", "C", "D", "E", "F", "G"]: greenhouse_value = None if greenhouse_value is None: greenhouse_value = NotAvailable details["GES"] = greenhouse_value details["creationDate"] = Date( Regexp( CleanText( '//p[@class="offer-description-notes"]|//p[has-class("darkergrey")]' ), u'.*Mis en ligne : (\d{2}/\d{2}/\d{4}).*' ), dayfirst=True )(self) honoraires = CleanText( ( '//div[has-class("offer-price")]/span[has-class("lbl-agencyfees")]' ), default=None )(self) if honoraires: details["Honoraires"] = ( "{} (TTC, en sus)".format( honoraires.split(":")[1].strip() ) ) for li in XPath('//ul[@itemprop="description"]/li')(self): label = CleanText('./div[has-class("criteria-label")]')(li) value = CleanText('./div[has-class("criteria-value")]')(li) details[label] = value return details
def obj_size(self): rawsize = CleanText('./div[has-class("poid")]')(self) rawsize = rawsize.replace(',','.').strip() nsize = float(rawsize.split()[0]) usize = rawsize.split()[-1].upper().replace('O','B') size = get_bytes_size(nsize,usize) return size
def obj_size(self): rawsize = CleanText('(//div[@id="infosficher"]/span)[1]')(self) rawsize = rawsize.replace(',','.').strip() nsize = float(rawsize.split()[0]) usize = rawsize.split()[-1].upper().replace('O','B') size = get_bytes_size(nsize,usize) return size
def obj_size(self): rawsize = CleanText('./div[has-class("poid")]')(self) rawsize = rawsize.replace(',', '.').strip() nsize = float(rawsize.split()[0]) usize = rawsize.split()[-1].upper().replace('O', 'B') size = get_bytes_size(nsize, usize) return size
def obj_size(self): rawsize = CleanText('./td[2]')(self) rawsize = rawsize.replace(',', '.') nsize = float(rawsize.split()[0]) usize = rawsize.split()[-1].upper() size = get_bytes_size(nsize, usize) return size
def obj_size(self): rawsize = CleanText('(//div[@id="infosficher"]/span)[1]')(self) rawsize = rawsize.replace(',', '.').strip() nsize = float(rawsize.split()[0]) usize = rawsize.split()[-1].upper().replace('O', 'B') size = get_bytes_size(nsize, usize) return size
def obj_id(self): href = CleanText('./td[2]/a/@href', default=NotAvailable)(self) spl = href.replace('.html', '').split('/') lid = spl[2] aid = spl[3] sid = spl[4] return '%s|%s|%s' % (lid, aid, sid)
def obj_id(self): href = CleanText('./td[2]/a/@href', default=NotAvailable)(self) spl = href.replace('.html', '').split('/') lid = spl[2] aid = spl[3] sid = spl[4] return '%s|%s|%s' % (lid, aid, sid)
def obj_DPE(self): energy_value = CleanText( '//div[has-class("offer-energy-greenhouseeffect-summary")]//div[has-class("energy-summary")]', default="")(self) if len(energy_value): energy_value = energy_value.replace("DPE", "").strip()[0] return getattr(ENERGY_CLASS, energy_value, NotAvailable)
def obj_size(self): rawsize = CleanText('./td[2]')(self) rawsize = rawsize.replace(',','.') nsize = float(rawsize.split()[0]) usize = rawsize.split()[-1].upper() size = get_bytes_size(nsize,usize) return size
def obj_GES(self): greenhouse_value = CleanText( '//div[has-class("offer-energy-greenhouseeffect-summary")]//div[has-class("greenhouse-summary")]', default="" )(self) if len(greenhouse_value): greenhouse_value = greenhouse_value.replace("GES", "").strip()[0] return getattr(ENERGY_CLASS, greenhouse_value, NotAvailable)
def obj_DPE(self): energy_value = CleanText( '//div[has-class("offer-energy-greenhouseeffect-summary")]//div[has-class("energy-summary")]', default="" )(self) if len(energy_value): energy_value = energy_value.replace("DPE", "").strip()[0] return getattr(ENERGY_CLASS, energy_value, NotAvailable)
def obj_size(self): rawsize = CleanText('//span[has-class("folder") or has-class("folderopen")]')(self) rawsize = rawsize.split(': ')[-1].split(')')[0].strip() rawsize = rawsize.replace(',','.') nsize = float(rawsize.split()[0]) usize = rawsize.split()[-1].upper() size = get_bytes_size(nsize,usize) return size
def obj_id(self): href = CleanText( './/a[has-class("link") and has-class("grey") and has-class("font-small")]/@href' )(self) subid = href.replace('.html', '').replace('paroles-', '').split('/')[-2:] id = '%s|%s' % (subid[0], subid[1]) return id
def obj_GES(self): greenhouse_value = CleanText( '//div[has-class("offer-energy-greenhouseeffect-summary")]//div[has-class("greenhouse-summary")]', default="")(self) if len(greenhouse_value): greenhouse_value = greenhouse_value.replace("GES", "").strip()[0] return getattr(ENERGY_CLASS, greenhouse_value, NotAvailable)
def obj_size(self): rawsize = CleanText( '//span[has-class("folder") or has-class("folderopen")]')(self) rawsize = rawsize.split(': ')[-1].split(')')[0].strip() rawsize = rawsize.replace(',', '.') nsize = float(rawsize.split()[0]) usize = rawsize.split()[-1].upper() size = get_bytes_size(nsize, usize) return size
def obj_phone(self): phone = CleanText('(//div[has-class("contact-proprietaire-box")]//strong[@class="tel-wrapper"])[1]')(self) phone = phone.replace(' ', ', ') return phone
def obj_title(self): artist = CleanText('//h1[@id="profile_name"]//a', default=NotAvailable)(self) fullhead = CleanText('//h1[@id="profile_name"]', default=NotAvailable)(self) return fullhead.replace('by %s' % artist, '')
def obj_id(self): href = CleanText('./@href')(self) subid = href.replace('.html','').replace('paroles_','').split('/')[-2:] id = '%s|%s'%(subid[0], subid[1]) return id
def condition(self): title = CleanText('./div/p[@class="lyric-meta-title"]/a', default="")(self) content = CleanText('./pre[@class="lyric-body"]')(self) return content.replace(title, "").strip() != ""
def obj_phone(self): phone = CleanText('(//div[has-class("tel-wrapper")])[1]')(self) phone = phone.replace(' ', ', ') return phone.strip()
def obj_subscriber(self): subscriber = CleanText('//a[has-class("MainNav-item-logged")]')(self) subscriber = subscriber.replace('Bonjour', '').strip() return subscriber
def iter_accounts(self, next_pages): params = self.get_params() account = None currency = None for th in self.doc.xpath('//table[@id="TabCtes"]//thead//th'): m = re.match('.*\((\w+)\)$', th.text) if m and currency is None: currency = Account.get_currency(m.group(1)) for tr in self.doc.xpath('//table[@id="TabCtes"]/tbody/tr'): cols = tr.xpath('./td') id = CleanText(None).filter(cols[self.COL_ID]) if len(id) > 0: if account is not None: yield account account = Account() account.id = id.replace(' ', '') account.type = Account.TYPE_CARD account.balance = account.coming = Decimal('0') account._next_debit = datetime.date.today() account._prev_debit = datetime.date(2000, 1, 1) account.label = u' '.join([ CleanText(None).filter(cols[self.COL_TYPE]), CleanText(None).filter(cols[self.COL_LABEL]) ]) account.currency = currency account._params = None account._invest_params = None account._coming_params = params.copy() account._coming_params[ 'dialogActionPerformed'] = 'SELECTION_ENCOURS_CARTE' account._coming_params[ 'attribute($SEL_$%s)' % tr.attrib['id'].split('_')[0]] = tr.attrib['id'].split( '_', 1)[1] elif account is None: raise BrokenPageError('Unable to find accounts on cards page') else: account._params = params.copy() account._params[ 'dialogActionPerformed'] = 'SELECTION_ENCOURS_CARTE' account._params[ 'attribute($SEL_$%s)' % tr.attrib['id'].split('_')[0]] = tr.attrib['id'].split( '_', 1)[1] date_col = CleanText(None).filter(cols[self.COL_DATE]) m = re.search('(\d+)/(\d+)/(\d+)', date_col) if not m: self.logger.warning('Unable to parse date %r' % date_col) continue date = datetime.date(*reversed(map(int, m.groups()))) if date.year < 100: date = date.replace(year=date.year + 2000) amount = Decimal( FrenchTransaction.clean_amount( CleanText(None).filter(cols[self.COL_AMOUNT]))) if not date_col.endswith('(1)'): # debited account.coming += -abs(amount) account._next_debit = date elif date > account._prev_debit: account._prev_balance = -abs(amount) account._prev_debit = date if account is not None: yield account # Needed to preserve navigation. btn = self.doc.xpath('.//button/span[text()="Retour"]') if len(btn) > 0: btn = btn[0].getparent() actions = self.get_button_actions() _params = params.copy() _params.update(actions[btn.attrib['id']]) self.browser.open('/cyber/internet/ContinueTask.do', data=_params)
def obj_phone(self): phone = CleanText( '(//div[has-class("contact-proprietaire-box")]//strong[@class="tel-wrapper"])[1]' )(self) phone = phone.replace(' ', ', ') return phone
def condition(self): title = CleanText('./div/p[@class="lyric-meta-title"]/a', default="")(self) content = CleanText('./pre[@class="lyric-body"]')(self) return content.replace(title, "").strip() != ""
def obj_subscriber(self): subscriber = CleanText( '//a[has-class("MainNav-item-logged")]')(self) subscriber = subscriber.replace('Bonjour', '').strip() return subscriber
def obj_id(self): href = CleanText('.//a[has-class("link")]/@href')(self) subid = href.replace('.html','').replace('paroles-','').split('/')[-2:] id = '%s|%s'%(subid[0], subid[1]) return id
def iter_accounts(self, next_pages): params = self.get_params() account = None currency = None for th in self.doc.xpath('//table[@id="tbl1"]//thead//th'): m = re.match('.*\((\w+)\)$', th.text) if m and currency is None: currency = Account.get_currency(m.group(1)) if currency is None: currency = Account.get_currency(CleanText('//td[@id="tbl1_0_5_Cell"]//span')(self.doc)) for tr in self.doc.xpath('//table[@id="tbl1"]/tbody/tr'): cols = tr.xpath('./td') if len(cols) == 1 and CleanText('.')(cols[0]) == 'pas de carte': self.logger.debug('there are no cards on this page') continue id = CleanText(None).filter(cols[self.COL_ID]) if len(id) > 0: if account is not None: yield account account = Account() account.id = id.replace(' ', '') account.type = Account.TYPE_CARD account.balance = account.coming = Decimal('0') account._next_debit = datetime.date.today() account._prev_debit = datetime.date(2000,1,1) account.label = u' '.join([CleanText(None).filter(cols[self.COL_TYPE]), CleanText(None).filter(cols[self.COL_LABEL])]) account.currency = currency account._params = None account._invest_params = None account._coming_params = params.copy() account._coming_params['dialogActionPerformed'] = 'SELECTION_ENCOURS_CARTE' account._coming_params['attribute($SEL_$%s)' % tr.attrib['id'].split('_')[0]] = tr.attrib['id'].split('_', 1)[1] account._coming_count = len(self.doc.xpath('//table[@id="tbl1"]/tbody/tr/td[5]/span[not(contains(text(), "(1)"))]')) elif account is None: raise BrokenPageError('Unable to find accounts on cards page') else: account._params = params.copy() account._params['dialogActionPerformed'] = 'SELECTION_ENCOURS_CARTE' account._params['attribute($SEL_$%s)' % tr.attrib['id'].split('_')[0]] = tr.attrib['id'].split('_', 1)[1] date_col = CleanText(None).filter(cols[self.COL_DATE]) m = re.search('(\d+)/(\d+)/(\d+)', date_col) if not m: self.logger.warning('Unable to parse date %r' % date_col) continue date = datetime.date(*[int(c) for c in m.groups()][::-1]) if date.year < 100: date = date.replace(year=date.year+2000) amount = Decimal(FrenchTransaction.clean_amount(CleanText(None).filter(cols[self.COL_AMOUNT]))) if not date_col.endswith('(1)'): # debited account.coming += - abs(amount) account._next_debit = date elif date > account._prev_debit: account._prev_balance = - abs(amount) account._prev_debit = date if account is not None: yield account # Needed to preserve navigation. btn = self.doc.xpath('.//button[span[text()="Retour"]]') if len(btn) > 0: actions = self.get_button_actions() _params = params.copy() _params.update(actions[btn[0].attrib['id']]) self.browser.open('/cyber/internet/ContinueTask.do', data=_params)
def obj_id(self): href = CleanText('./@href')(self) subid = href.replace('.html', '').replace('paroles_', '').split('/')[-2:] id = '%s|%s' % (subid[0], subid[1]) return id