def get_history(self, date_guesser): seen = set() lines = self.document.xpath('(//table[@class="ca-table"])[2]/tr') for line in lines[1:]: # first line is balance is_balance = line.xpath('./td/@class="cel-texte cel-neg"') [date, label, _, amount] = [self.parser.tocleanstring(td) for td in line.xpath('./td')] t = Transaction(0) t.set_amount(amount) t.label = t.raw = label if is_balance: m = re.search('(\d+ [^ ]+ \d+)', label) if not m: raise BrokenPageError('Unable to read card balance in history: %r' % label) t.date = parse_french_date(m.group(1)) t.amount = -t.amount else: day, month = map(int, date.split('/', 1)) t.date = date_guesser.guess_date(day, month) t.type = t.TYPE_CARD t.rdate = t.date try: t.id = t.unique_id(seen) except UnicodeEncodeError: print t print t.label raise yield t
def get_loan_list(self): accounts = OrderedDict() # Old website for tr in self.doc.xpath('//table[@cellpadding="1"]/tr[not(@class) and td[a]]'): tds = tr.findall('td') account = Account() account.id = CleanText('./a')(tds[2]).split('-')[0].strip() account.label = CleanText('./a')(tds[2]).split('-')[-1].strip() account.type = Account.TYPE_LOAN account.balance = -CleanDecimal('./a', replace_dots=True)(tds[4]) account.currency = account.get_currency(CleanText('./a')(tds[4])) accounts[account.id] = account if len(accounts) == 0: # New website for table in self.doc.xpath('//div[@class="panel"]'): title = table.getprevious() if title is None: continue account_type = self.ACCOUNT_TYPES.get(CleanText('.')(title), Account.TYPE_UNKNOWN) for tr in table.xpath('./table/tbody/tr[contains(@id,"MM_SYNTHESE_CREDITS") and contains(@id,"IdTrGlobal")]'): tds = tr.findall('td') if len(tds) == 0 : continue for i in tds[0].xpath('.//a/strong'): label = i.text.strip() break if len(tds) == 3 and Decimal(FrenchTransaction.clean_amount(CleanText('.')(tds[-2]))) and any(cls in Attr('.', 'id')(tr) for cls in ['dgImmo', 'dgConso']) == False: # in case of Consumer credit or revolving credit, we substract avalaible amount with max amout # to get what was spend balance = Decimal(FrenchTransaction.clean_amount(CleanText('.')(tds[-2]))) - Decimal(FrenchTransaction.clean_amount(CleanText('.')(tds[-1]))) else: balance = Decimal(FrenchTransaction.clean_amount(CleanText('.')(tds[-1]))) account = Loan() account.id = label.split(' ')[-1] account.label = unicode(label) account.type = account_type account.balance = -abs(balance) account.currency = account.get_currency(CleanText('.')(tds[-1])) account._card_links = [] if "immobiliers" in CleanText('.')(title): xp = './/div[contains(@id, "IdDivDetail")]/table/tbody/tr[contains(@id, "%s")]/td' account.maturity_date = Date(CleanText(xp % 'IdDerniereEcheance'), dayfirst=True, default=NotAvailable)(tr) account.total_amount = CleanDecimal(CleanText(xp % 'IdCapitalEmprunte'), replace_dots=True, default=NotAvailable)(tr) account.subscription_date = Date(CleanText(xp % 'IdDateOuverture'), dayfirst=True, default=NotAvailable)(tr) account.next_payment_date = Date(CleanText(xp % 'IdDateProchaineEcheance'), dayfirst=True, default=NotAvailable)(tr) account.rate = CleanDecimal(CleanText(xp % 'IdTaux'), replace_dots=True, default=NotAvailable)(tr) account.next_payment_amount = CleanDecimal(CleanText(xp % 'IdMontantEcheance'), replace_dots=True, default=NotAvailable)(tr) elif "renouvelables" in CleanText('.')(title): self.go_loans_conso(tr) d = self.browser.loans_conso() if d: account.total_amount = d['contrat']['creditMaxAutorise'] account.available_amount = d['situationCredit']['disponible'] account.next_payment_amount = d['situationCredit']['mensualiteEnCours'] accounts[account.id] = account return accounts.values()
def parse_decimal(self, td, percentage=False): value = CleanText('.')(td) if value and value != '-': if percentage: return Decimal(FrenchTransaction.clean_amount(value)) / 100 return Decimal(FrenchTransaction.clean_amount(value)) else: return NotAvailable
def get_list(self): l = [] ids = set() for a in self.document.getiterator('a'): link=a.attrib.get('href') if link is None: continue if link.startswith("/outil/UWLM/ListeMouvements"): account = Account() #by default the website propose the last 7 days or last 45 days but we can force to have the last 55days account._link_id=link+"&mode=55" account._coming_links = [] parameters=link.split("?").pop().split("&") for parameter in parameters: list=parameter.split("=") value=list.pop() name=list.pop() if name=="agence": account.id=value elif name=="compte": account.id+=value elif name=="nature": # TODO parse this string to get the right Account.TYPE_* to # store in account.type. account._type=value if account.id in ids: continue ids.add(account.id) div = a.getparent().getprevious() if not div.text.strip(): div = div.find('div') account.label=u''+div.text.strip() balance = FrenchTransaction.clean_amount(a.text) if '-' in balance: balance='-'+balance.replace('-', '') account.balance=Decimal(balance) account.currency = account.get_currency(a.text) self.logger.debug('%s Type: %s' % (account.label, account._type)) l.append(account) if link.startswith('/outil/UWCB/UWCBEncours'): if len(l) == 0: self.logger.warning('There is a card account but not any check account') continue account = l[-1] coming = FrenchTransaction.clean_amount(a.text) if '-' in coming: coming = '-'+coming.replace('-', '') if not account.coming: account.coming = Decimal('0') account.coming += Decimal(coming) account._coming_links.append(link) return l
def iter_investments(self): # We did not get some html, but something like that (XX is a quantity, YY a price): # message='[...] # popup=2{6{E:ALO{PAR{{reel{695{380{ALSTOM REGROUPT#XX#YY,YY €#YY,YY €#1 YYY,YY €#-YYY,YY €#-42,42%#-0,98 %#42,42 %#|1|AXA#cotationValeur.php?val=E:CS&pl=6&nc=1& # popup=2{6{E:CS{PAR{{reel{695{380{AXA#XX#YY,YY €#YY,YYY €#YYY,YY €#YY,YY €#3,70%#42,42 %#42,42 %#|1|blablablab #cotationValeur.php?val=P:CODE&pl=6&nc=1& # [...] lines = self.doc.split("popup=2") lines.pop(0) invests = [] for line in lines: columns = line.split('#') _pl = columns[0].split('{')[1] _id = columns[0].split('{')[2] invest = Investment(_id) invest.label = unicode(columns[0].split('{')[-1]) invest.code = unicode(_id) if ':' in invest.code: invest.code = self.browser.titrevalue.open(val=invest.code,pl=_pl).get_isin() # The code we got is not a real ISIN code. if not re.match('^[A-Z]{2}[\d]{10}$|^[A-Z]{2}[\d]{5}[A-Z]{1}[\d]{4}$', invest.code): m = re.search('\{([A-Z]{2}[\d]{10})\{|\{([A-Z]{2}[\d]{5}[A-Z]{1}[\d]{4})\{', line) if m: invest.code = unicode(m.group(1) or m.group(2)) quantity = FrenchTransaction.clean_amount(columns[1]) invest.quantity = CleanDecimal(default=NotAvailable).filter(quantity) unitprice = FrenchTransaction.clean_amount(columns[2]) invest.unitprice = CleanDecimal(default=NotAvailable).filter(unitprice) unitvalue = FrenchTransaction.clean_amount(columns[3]) invest.unitvalue = CleanDecimal(default=NotAvailable).filter(unitvalue) valuation = FrenchTransaction.clean_amount(columns[4]) # valuation is not nullable, use 0 as default value invest.valuation = CleanDecimal(default=Decimal('0')).filter(valuation) diff = FrenchTransaction.clean_amount(columns[5]) invest.diff = CleanDecimal(default=NotAvailable).filter(diff) # On some case we have a multine investment with a total column # for now we have only see this on 2 lines, we will need to adapt it when o if columns[9] == u'|Total' and _id == 'fichevaleur': prev_inv = invest invest = invests.pop(-1) if prev_inv.quantity: invest.quantity = invest.quantity + prev_inv.quantity if prev_inv.valuation: invest.valuation = invest.valuation + prev_inv.valuation if prev_inv.diff: invest.diff = invest.diff + prev_inv.diff invests.append(invest) for invest in invests: yield invest
def get_funding_src(self, t): if 'fundingSource' not in self.doc['data']['details']: return None funding_src_lst = [src for src in self.doc['data']['details']['fundingSource']['fundingSourceList'] if src['type'] != 'BALANCE'] assert len(funding_src_lst) <= 1 for src in funding_src_lst: tr = FrenchTransaction(t.id+'_fundingSrc') tr.amount = CleanDecimal(replace_dots=True).filter(src['amount']) tr.date = tr.rdate = t.date tr.label = tr.raw = u'Crédit depuis %s' % src['institution'] return tr
def get_monthly_transactions(self, trs): groups = [list(g) for k, g in groupby(sorted(trs, key=lambda tr: tr.date), lambda tr: tr.date)] trs = [] for group in groups: if group[0].date > date.today(): continue tr = FrenchTransaction() tr.raw = tr.label = u"RELEVE CARTE %s" % group[0].date tr.amount = -sum([t.amount for t in group]) tr.date = tr.rdate = tr.vdate = group[0].date tr.type = FrenchTransaction.TYPE_CARD_SUMMARY trs.append(tr) return trs
def iter_investments(self): # We did not get some html, but something like that (XX is a quantity, YY a price): # message='[...] # popup=2{6{E:ALO{PAR{{reel{695{380{ALSTOM REGROUPT#XX#YY,YY €#YY,YY €#1 YYY,YY €#-YYY,YY €#-42,42%#-0,98 %#42,42 %#|1|AXA#cotationValeur.php?val=E:CS&pl=6&nc=1& # popup=2{6{E:CS{PAR{{reel{695{380{AXA#XX#YY,YY €#YY,YYY €#YYY,YY €#YY,YY €#3,70%#42,42 %#42,42 %#|1|blablablab #cotationValeur.php?val=P:CODE&pl=6&nc=1& # [...] lines = self.doc.split("popup=2") lines.pop(0) for line in lines: columns = line.split('#') _pl = columns[0].split('{')[1] _id = columns[0].split('{')[2] invest = Investment(_id) invest.label = unicode(columns[0].split('{')[-1]) invest.code = unicode(_id) if ':' in invest.code: invest.code = self.browser.titrevalue.open(val=invest.code,pl=_pl).get_isin() # The code we got is not a real ISIN code. if not re.match('^[A-Z]{2}[\d]{10}$|^[A-Z]{2}[\d]{5}[A-Z]{1}[\d]{4}$', invest.code): m = re.search('\{([A-Z]{2}[\d]{10})\{|\{([A-Z]{2}[\d]{5}[A-Z]{1}[\d]{4})\{', line) if m: invest.code = unicode(m.group(1) or m.group(2)) quantity = FrenchTransaction.clean_amount(columns[1]) if quantity != '': invest.quantity = Decimal(quantity) else: invest.quantity = NotAvailable unitprice = FrenchTransaction.clean_amount(columns[2]) if unitprice != '': invest.unitprice = Decimal(unitprice) else: invest.unitprice = NotAvailable unitvalue = FrenchTransaction.clean_amount(columns[3]) if unitvalue != '': invest.unitvalue = Decimal(unitvalue) else: invest.unitvalue = NotAvailable valuation = FrenchTransaction.clean_amount(columns[4]) if valuation != '': invest.valuation = Decimal(valuation) else: # valuation is not nullable. invest.valuation = Decimal('0') diff = FrenchTransaction.clean_amount(columns[5]) if diff != '': invest.diff = Decimal(diff) else: invest.diff = NotAvailable yield invest
def get_monthly_transactions(self, trs): date_getter = attrgetter('date') groups = [list(g) for k, g in groupby(sorted(trs, key=date_getter), date_getter)] trs = [] for group in groups: if group[0].date > datetime.today().date(): continue tr = FrenchTransaction() tr.raw = tr.label = "RELEVE CARTE %s" % group[0].date tr.amount = -sum(t.amount for t in group) tr.date = tr.rdate = tr.vdate = group[0].date tr.type = FrenchTransaction.TYPE_CARD_SUMMARY tr._is_coming = False tr._is_manualsum = True trs.append(tr) return trs
def iter_bills(self, sub): #pdb.set_trace() years = [None] + self.document.xpath('//ul[@class="years"]/li/a') for year in years: #pdb.set_trace() if year is not None and year.attrib['href']: self.browser.location(year.attrib['href']) tables = self.browser.page.document.xpath('//table[contains(@summary, "factures")]') for table in tables: for tr in table.xpath('.//tr'): list_tds = tr.xpath('.//td') if len(list_tds) == 0: continue url = re.sub('[\r\n\t]', '', list_tds[0].xpath('.//a')[0].attrib['href']) date_search = re.search('dateFactureQE=(\d+/\d+/\d+)', url) if not date_search: continue date = datetime.strptime(date_search.group(1), "%d/%m/%Y").date() amount = self.parser.tocleanstring(list_tds[2]) if amount is None: continue bill = Bill() bill.id = sub._id + "." + date.strftime("%Y%m%d") bill.price = Decimal(FrenchTransaction.clean_amount(amount)) bill.currency = bill.get_currency(amount) bill.date = date bill.label = self.parser.tocleanstring(list_tds[0]) bill.format = u'pdf' bill._url = url yield bill
def get_list(self): accounts = [] previous_account = None noaccounts = self.get_from_js('_js_noMvts =', ';') if noaccounts is not None: assert 'avez aucun compte' in noaccounts return [] txt = self.get_from_js('_data = new Array(', ');', is_list=True) if txt is None: raise BrowserUnavailable('Unable to find accounts list in scripts') data = json.loads('[%s]' % txt.replace("'", '"')) for line in data: a = Account() a.id = line[self.COL_ID].replace(' ', '') if re.match(r'Classement=(.*?):::Banque=(.*?):::Agence=(.*?):::SScompte=(.*?):::Serie=(.*)', a.id): a.id = str(CleanDecimal().filter(a.id)) a._acc_nb = a.id.split('_')[0] if len(a.id.split('_')) > 1 else None a.label = MyStrip(line[self.COL_LABEL], xpath='.//div[@class="libelleCompteTDB"]') # This account can be multiple life insurance accounts if a.label == 'ASSURANCE VIE-BON CAPI-SCPI-DIVERS *': continue a.balance = Decimal(FrenchTransaction.clean_amount(line[self.COL_BALANCE])) a.currency = a.get_currency(line[self.COL_BALANCE]) a.type = self.get_account_type(a.label) # The parent account must be created right before if a.type == Account.TYPE_CARD: # duplicate if find_object(accounts, id=a.id): self.logger.warning('Ignoring duplicate card %r', a.id) continue a.parent = previous_account if line[self.COL_HISTORY] == 'true': a._inv = False a._link = self.get_history_link() a._args = self.make__args_dict(line) else: a._inv = True a._args = {'_ipc_eventValue': line[self.COL_ID], '_ipc_fireEvent': line[self.COL_FIRE_EVENT], } a._link = self.doc.xpath('//form[@name="changePageForm"]')[0].attrib['action'] if a.type is Account.TYPE_CARD: a.coming = a.balance a.balance = Decimal('0.0') accounts.append(a) previous_account = a return accounts
def get_list(self): account_type = Account.TYPE_UNKNOWN params = {} for field in self.document.xpath('//input'): params[field.attrib['name']] = field.attrib.get('value', '') for div in self.document.xpath('//div[@class="btit"]'): account_type = self.ACCOUNT_TYPES.get(div.text.strip(), Account.TYPE_UNKNOWN) for tr in div.getnext().xpath('.//tbody/tr'): if not 'id' in tr.attrib: continue args = dict(parse_qsl(tr.attrib['id'])) tds = tr.findall('td') if len(tds) < 4 or not 'identifiant' in args: self.logger.warning('Unable to parse an account') continue account = Account() account.id = args['identifiant'] account.label = u' '.join([u''.join([txt.strip() for txt in tds[1].itertext()]), u''.join([txt.strip() for txt in tds[2].itertext()])]).strip() account.type = account_type balance = u''.join([txt.strip() for txt in tds[3].itertext()]) account.balance = Decimal(FrenchTransaction.clean_amount(balance)) account.currency = account.get_currency(balance) account._params = params.copy() account._params['dialogActionPerformed'] = 'SOLDE' account._params['attribute($SEL_$%s)' % tr.attrib['id'].split('_')[0]] = tr.attrib['id'].split('_', 1)[1] yield account return
def get_loan_list(self): accounts = OrderedDict() # New website for table in self.document.xpath('//div[@class="panel"]'): title = table.getprevious() if title is None: continue account_type = self.ACCOUNT_TYPES.get(self.parser.tocleanstring(title), Account.TYPE_UNKNOWN) for tr in table.xpath('./table/tbody/tr[contains(@id,"MM_SYNTHESE_CREDITS") and contains(@id,"IdTrGlobal")]'): tds = tr.findall('td') if len(tds) == 0 : continue for i in tds[0].xpath('.//a/strong'): label = i.text.strip() break balance = Decimal(FrenchTransaction.clean_amount(self.parser.tocleanstring(tds[-1]))) account = Account() account.id = label.split(' ')[-1] account.label = unicode(label) account.type = account_type account.balance = -abs(balance) account.currency = account.get_currency(self.parser.tocleanstring(tds[-1])) account._card_links = [] accounts[account.id] = account return accounts.itervalues()
def _add_account(self, accounts, link, label, account_type, balance): info = self._get_account_info(link) if info is None: self.logger.warning('Unable to parse account %r: %r' % (label, link)) return account = Account() account.id = info['id'] account.iban = u'FR76' + info['id'] account._info = info account.label = label account.type = info['acc_type'] if 'acc_type' in info else account_type account.balance = Decimal(FrenchTransaction.clean_amount(balance)) if balance else self.get_balance(account) account.currency = account.get_currency(balance) account._card_links = [] if account._info['type'] == 'HISTORIQUE_CB' and account.id in accounts: a = accounts[account.id] if not a.coming: a.coming = Decimal('0.0') a.coming += account.balance a._card_links.append(account._info) return accounts[account.id] = account
def parse_table(self, what): tables = self.document.xpath("//table[@id='%s']" % what, smart_strings=False) if len(tables) < 1: return lines = tables[0].xpath(".//tbody/tr") for line in lines: account = Account() tmp = line.xpath("./td//a")[0] account.label = to_unicode(tmp.text) account._link_id = tmp.get("href") if "BourseEnLigne" in account._link_id: continue tmp = line.xpath("./td/span/strong") if len(tmp) >= 2: tmp_id = tmp[0].text tmp_balance = tmp[1].text else: tmp_id = line.xpath("./td//span")[1].text tmp_balance = tmp[0].text account.id = tmp_id account.currency = account.get_currency(tmp_balance) account.balance = Decimal(FrenchTransaction.clean_amount(tmp_balance)) if account.id in self.accounts: a = self.accounts[account.id] a._card_links.append(account._link_id) if not a.coming: a.coming = Decimal("0.0") a.coming += account.balance else: account._card_links = [] self.accounts[account.id] = account
def get_list(self): account_type = Account.TYPE_UNKNOWN accounts = [] for tr in self.document.xpath('//table[@class="ecli"]/tr'): if tr.attrib.get('class', '') == 'entete': account_type = self.ACCOUNT_TYPES.get(tr.find('th').text.strip(), Account.TYPE_UNKNOWN) continue tds = tr.findall('td') balance = tds[-1].text.strip() if balance == '': continue account = Account() account.label = u' '.join([txt.strip() for txt in tds[0].itertext()]) account.label = re.sub(u'[ \xa0\u2022\r\n\t]+', u' ', account.label).strip() account.id = re.findall('(\d+)', account.label)[0] account.balance = Decimal(FrenchTransaction.clean_amount(balance)) account.currency = account.get_currency(balance) account.type = account_type m = re.search(r"javascript:submitForm\(([\w_]+),'([^']+)'\);", tds[0].find('a').attrib['onclick']) if not m: self.logger.warning('Unable to find link for %r' % account.label) account._link = None else: account._link = m.group(2) accounts.append(account) return accounts
def _add_account(self, accounts, link, label, account_type, balance): info = self._get_account_info(link, accounts) if info is None: self.logger.warning('Unable to parse account %r: %r' % (label, link)) return account = Account() account.id = info['id'] if is_rib_valid(info['id']): account.iban = rib2iban(info['id']) account._info = info account.label = label account.type = self.ACCOUNT_TYPES.get(label, info['acc_type'] if 'acc_type' in info else account_type) balance = balance or self.get_balance(account) account.balance = Decimal(FrenchTransaction.clean_amount(balance)) if balance and balance is not NotAvailable else NotAvailable account.currency = account.get_currency(balance) if balance and balance is not NotAvailable else NotAvailable account._card_links = [] if account._info['type'] == 'HISTORIQUE_CB' and account.id in accounts: a = accounts[account.id] if not a.coming: a.coming = Decimal('0.0') if account.balance and account.balance is not NotAvailable: a.coming += account.balance a._card_links.append(account._info) return accounts[account.id] = account
def get_list(self): div = self.document.xpath('//div[@id="descriptifdroite"]')[0] account = Account() account.id = re.search(u'(\d+)', div.xpath('.//div[@class="credithauttexte"]')[0].text).group(1) account.label = u'Carte PASS' account.balance = Decimal('0') for tr in div.xpath('.//table/tr'): tds = tr.findall('td') if len(tds) < 3: continue label = u''.join([txt.strip() for txt in tds[1].itertext()]) value = u''.join([txt.strip() for txt in tds[2].itertext()]) if 'encours depuis le dernier' in label.lower(): coming = u'-' + value account.coming = Decimal(FrenchTransaction.clean_amount(coming)) account.currency = account.get_currency(coming) elif u'arrêté de compte' in label.lower(): m = re.search(u'(\d+)/(\d+)/(\d+)', label) if m: account._outstanding_date = datetime.date(*reversed(map(int, m.groups()))) break yield account
def get_list(self): account_type = Account.TYPE_UNKNOWN params = {} for field in self.document.xpath('//input'): params[field.attrib['name']] = field.attrib.get('value', '') for div in self.document.xpath('//div[@class="btit"]'): account_type = self.ACCOUNT_TYPES.get(div.text.strip(), Account.TYPE_UNKNOWN) for tr in div.getnext().xpath('.//tbody/tr'): args = dict(parse_qsl(tr.attrib['id'])) tds = tr.findall('td') account = Account() account.id = args['identifiant'] account.label = u' '.join([u''.join([txt.strip() for txt in tds[1].itertext()]), u''.join([txt.strip() for txt in tds[2].itertext()])]).strip() account.type = account_type link = tds[3].find('a') account.balance = Decimal(FrenchTransaction.clean_amount(link.find('span').text)) account._params = params.copy() account._params['dialogActionPerformed'] = 'SOLDE' account._params['attribute($SEL_$%s)' % tr.attrib['id'].split('_')[0]] = tr.attrib['id'].split('_', 1)[1] yield account return
def get_list(self): l = [] divabo = self.document.xpath('//div[@id="accountSummary"]')[0] owner = divabo.xpath('a/h3')[0].text phone = divabo.xpath('dl/dd')[0].text credit = divabo.xpath('dl/dd')[1].text expiredate = divabo.xpath('dl/dd')[2].text phoneplan = divabo.xpath('dl/dd')[3].text self.browser.logger.debug('Found ' + owner + ' as subscriber') self.browser.logger.debug('Found ' + phone + ' as phone number') self.browser.logger.debug('Found ' + credit + ' as available credit') self.browser.logger.debug('Found ' + expiredate + ' as expire date ') self.browser.logger.debug('Found %s as subscription type', phoneplan) subscription = Subscription(phone) subscription.label = unicode(u'%s - %s - %s - %s' % (phone, credit, phoneplan, expiredate)) subscription.subscriber = unicode(owner) expiredate = date(*reversed([int(x) for x in expiredate.split(".")])) subscription.validity = expiredate subscription._balance = Decimal(FrenchTransaction.clean_amount(credit)) l.append(subscription) return l
def get_measure_accounts_list(self): self.home.go() # Make sure we are on list of measures page if self.measure_page.is_here(): self.page.check_no_accounts() measure_ids = self.page.get_measure_ids() self.accounts = [] for measure_id in measure_ids: self.page.go_measure_accounts_list(measure_id) if self.page.check_measure_accounts(): for account in list(self.page.get_list()): account._info['measure_id'] = measure_id self.accounts.append(account) self.page.go_measure_list() for account in self.accounts: if 'acc_type' in account._info and account._info['acc_type'] == Account.TYPE_LIFE_INSURANCE: self.page.go_measure_list() self.page.go_measure_accounts_list(account._info['measure_id']) self.page.go_history(account._info) if self.message.is_here(): self.page.submit() self.page.go_history(account._info) balance = self.page.get_measure_balance(account) account.balance = Decimal(FrenchTransaction.clean_amount(balance)) account.currency = account.get_currency(balance) return self.accounts
def find_amount(self, page, title): try: td = page.xpath(u'//th[contains(text(), "%s")]/../td' % title)[0] except IndexError: return None else: return Decimal(FrenchTransaction.clean_amount(td.text))
def get_list(self): account_type = Account.TYPE_UNKNOWN for tr in self.document.xpath('//table[@class="ca-table"]/tr'): try: title = tr.xpath('.//h3/text()')[0].lower().strip() except IndexError: pass else: account_type = self.TYPES.get(title, Account.TYPE_UNKNOWN) if not tr.attrib.get('class', '').startswith('colcelligne'): continue cols = tr.findall('td') if not cols or len(cols) < self.NB_COLS: continue account = Account() account.id = self.parser.tocleanstring(cols[self.COL_ID]) account.label = self.parser.tocleanstring(cols[self.COL_LABEL]) account.type = account_type or self.TYPES.get(account.label, Account.TYPE_UNKNOWN) balance = self.parser.tocleanstring(cols[self.COL_VALUE]) # we have to ignore those accounts, because using NotAvailable # makes boobank and probably many others crash if balance in ('indisponible', ''): continue account.balance = Decimal(Transaction.clean_amount(balance)) account.currency = account.get_currency(self.parser.tocleanstring(cols[self.COL_CURRENCY])) account._link = None self.set_link(account, cols) account._perimeter = self.browser.current_perimeter yield account
def get_list(self): # TODO: no idea abount how proxy account are displayed for a in self.document.xpath('//a[@class="mainclic"]'): account = Account() account.currency = Currency.CUR_EUR account.id = unicode(a.find('span[@class="account-number"]').text) account._id = account.id account.label = unicode(a.find('span[@class="title"]').text) balance = a.find('span[@class="solde"]/label').text account.balance = Decimal(FrenchTransaction.clean_amount(balance)) account.coming = NotAvailable if "Courant" in account.label: account.id = "CC-" + account.id account.type = Account.TYPE_CHECKING elif "Livret A" in account.label: account.id = "LA-" + account.id account.type = Account.TYPE_SAVINGS elif "Orange" in account.label: account.id = "LEO-" + account.id account.type = Account.TYPE_SAVINGS elif "Durable" in account.label: account.id = "LDD-" + account.id account.type = Account.TYPE_SAVINGS elif "Titres" in account.label: account.id = "TITRE-" + account.id account.type = Account.TYPE_MARKET elif "PEA" in account.label: account.id = "PEA-" + account.id account.type = Account.TYPE_MARKET jid = self.document.find('//input[@name="javax.faces.ViewState"]') account._jid = jid.attrib['value'] yield account
def get_list(self): for tr in self.document.xpath('//table[@class="ca-table"]/tr'): if not tr.attrib.get('class', '').startswith('colcelligne'): continue cols = tr.findall('td') if not cols: continue account = Account() account.id = self.parser.tocleanstring(cols[self.COL_ID]) account.label = self.parser.tocleanstring(cols[self.COL_LABEL]) account.type = self.TYPES.get(account.label, Account.TYPE_UNKNOWN) balance = self.parser.tocleanstring(cols[self.COL_VALUE]) # we have to ignore those accounts, because using NotAvailable # makes boobank and probably many others crash if balance in ('indisponible', ''): continue account.balance = Decimal(Transaction.clean_amount(balance)) account.currency = account.get_currency(self.parser.tocleanstring(cols[self.COL_CURRENCY])) account._link = None a = cols[0].find('a') if a is not None: account._link = a.attrib['href'].replace(' ', '%20') yield account
def iter_accounts(self, next_pages): account_type = Account.TYPE_UNKNOWN params = {} for field in self.document.xpath('//input'): params[field.attrib['name']] = field.attrib.get('value', '') for div in self.document.getroot().cssselect('div.btit'): if div.text is None: continue account_type = self.ACCOUNT_TYPES.get(div.text.strip(), Account.TYPE_UNKNOWN) if account_type is None: # ignore services accounts continue currency = None for th in div.getnext().xpath('.//thead//th'): m = re.match('.*\((\w+)\)$', th.text) if m and currency is None: currency = Account.get_currency(m.group(1)) for tr in div.getnext().xpath('.//tbody/tr'): if 'id' not in tr.attrib: continue args = dict(parse_qsl(tr.attrib['id'])) tds = tr.findall('td') if len(tds) < 4 or 'identifiant' not in args: self.logger.warning('Unable to parse an account') continue account = Account() account.id = args['identifiant'].replace(' ', '') account.label = u' '.join([u''.join([txt.strip() for txt in tds[1].itertext()]), u''.join([txt.strip() for txt in tds[2].itertext()])]).strip() account.type = account_type balance = FrenchTransaction.clean_amount(u''.join([txt.strip() for txt in tds[3].itertext()])) account.balance = Decimal(balance or '0.0') account.currency = currency if account.type == account.TYPE_LOAN: account.balance = - abs(account.balance) account._prev_debit = None account._next_debit = None account._params = None account._coming_params = None if balance != u'' and len(tds[3].xpath('.//a')) > 0: account._params = params.copy() account._params['dialogActionPerformed'] = 'SOLDE' account._params['attribute($SEL_$%s)' % tr.attrib['id'].split('_')[0]] = tr.attrib['id'].split('_', 1)[1] if len(tds) >= 5 and len(tds[self.COL_COMING].xpath('.//a')) > 0: _params = account._params.copy() _params['dialogActionPerformed'] = 'ENCOURS_COMPTE' next_pages.append(_params) yield account
def get_list(self): accounts = OrderedDict() for tr in self.document.getiterator('tr'): first_td = tr.getchildren()[0] if (first_td.attrib.get('class', '') == 'i g' or first_td.attrib.get('class', '') == 'p g') \ and first_td.find('a') is not None: a = first_td.find('a') link = a.get('href', '') if link.startswith('POR_SyntheseLst'): continue url = urlparse(link) p = parse_qs(url.query) if not 'rib' in p: continue for i in (2,1): balance = FrenchTransaction.clean_amount(tr.getchildren()[i].text) currency = Account.get_currency(tr.getchildren()[i].text) if len(balance) > 0: break balance = Decimal(balance) id = p['rib'][0] if id in accounts: account = accounts[id] if not account.coming: account.coming = Decimal('0.0') account.coming += balance account._card_links.append(link) continue account = Account() account.id = id account.label = unicode(a.text).strip().lstrip(' 0123456789').title() account._link_id = link account._card_links = [] # Find accounting amount page = self.browser.get_document(self.browser.openurl(link)) coming = self.find_amount(page, u"Opérations à venir") accounting = self.find_amount(page, u"Solde comptable") if accounting is not None and accounting + (coming or Decimal('0')) != balance: self.logger.warning('%s + %s != %s' % (accounting, coming, balance)) if accounting is not None: balance = accounting if coming is not None: account.coming = coming account.balance = balance account.currency = currency accounts[account.id] = account return accounts.itervalues()
def get_list(self): TABLE_XPATH = '//table[caption[@class="caption tdb-cartes-caption" or @class="ca-table caption"]]' cards_tables = self.document.xpath(TABLE_XPATH) currency = self.document.xpath('//table/caption//span/text()[starts-with(.,"Montants en ")]')[0].replace("Montants en ", "") or None if cards_tables: self.logger.debug('There are several cards') xpaths = { '_id': './caption/span[@class="tdb-cartes-num"]', 'label1': './caption/span[contains(@class, "tdb-cartes-carte")]', 'label2': './caption/span[@class="tdb-cartes-prop"]', 'balance': './/tr/td[@class="cel-num"]', 'currency': '//table/caption//span/text()[starts-with(.,"Montants en ")]', 'link': './/tr//a/@href[contains(., "fwkaction=Detail")]', } else: self.logger.debug('There is only one card') xpaths = { '_id': './/tr/td[@class="cel-texte"]', 'label1': './/tr[@class="ligne-impaire ligne-bleu"]/th', 'label2': './caption/span[@class="tdb-cartes-prop"]/b', 'balance': './/tr[last()-1]/td[@class="cel-num"] | .//tr[last()-2]/td[@class="cel-num"]', 'currency': '//table/caption//span/text()[starts-with(.,"Montants en ")]', } TABLE_XPATH = '(//table[@class="ca-table"])[1]' cards_tables = self.document.xpath(TABLE_XPATH) for table in cards_tables: get = lambda name: self.parser.tocleanstring(table.xpath(xpaths[name])[0]) account = Account() account.type = account.TYPE_CARD account.id = ''.join(get('_id').split()[1:]) account._id = ' '.join(get('_id').split()[1:]) account.label = '%s - %s' % (get('label1'), re.sub('\s*-\s*$', '', get('label2'))) try: account.balance = Decimal(Transaction.clean_amount(table.xpath(xpaths['balance'])[-1].text)) account.currency = account.get_currency(self.document .xpath(xpaths['currency'])[0].replace("Montants en ", "")) if not account.currency and currency: account.currency = Account.get_currency(currency) except IndexError: account.balance = Decimal('0.0') if 'link' in xpaths: try: account._link = table.xpath(xpaths['link'])[-1] except IndexError: account._link = None else: account._link = re.sub('[\n\r\t]+', '', account._link) else: account._link = self.url account._perimeter = self.browser.current_perimeter yield account
def get_converted_amount(self, account): find_td = self.doc.xpath('//td[contains(text(),"' + account.currency + '")] | //dd[contains(text(),"' + account.currency + '")]') if len(find_td) > 0 : # In case text is "12,34 EUR = 56.78 USD" or "-£115,62 GBP soit -€163,64 EUR" for text in re.split('=|soit', CleanText().filter(find_td[0])): if account.currency in text: return Decimal(FrenchTransaction.clean_amount(text.split(account.currency)[0])) return False
def recap(self): div = self.document.find('//div[@class="content recap"]') transfer = Transfer(0) transfer.amount = Decimal(FrenchTransaction.clean_amount(div.xpath('.//span[@id="confirmtransferAmount"]')[0].text)) transfer.origin = div.xpath('.//span[@id="confirmfromAccount"]')[0].text transfer.recipient = div.xpath('.//span[@id="confirmtoAccount"]')[0].text transfer.reason = div.xpath('.//span[@id="confirmtransferMotive"]')[0].text return transfer
def _add_account(self, accounts, link, label, account_type, balance): info = self._get_account_info(link, accounts) if info is None: self.logger.warning('Unable to parse account %r: %r' % (label, link)) return account = Account() account.id = info['id'] if is_rib_valid(info['id']): account.iban = rib2iban(info['id']) account._info = info account.label = label account.type = self.ACCOUNT_TYPES.get( label, info['acc_type'] if 'acc_type' in info else account_type) balance = balance or self.get_balance(account) account.balance = Decimal( FrenchTransaction.clean_amount(balance) ) if balance and balance is not NotAvailable else NotAvailable account.currency = account.get_currency( balance ) if balance and balance is not NotAvailable else NotAvailable account._card_links = [] if account._info['type'] == 'HISTORIQUE_CB' and account.id in accounts: a = accounts[account.id] if not a.coming: a.coming = Decimal('0.0') if account.balance and account.balance is not NotAvailable: a.coming += account.balance a._card_links.append(account._info) return accounts[account.id] = account
class item(ItemElement): klass = Account TYPE = { 'Livret': Account.TYPE_SAVINGS, 'Compte': Account.TYPE_CHECKING, 'PEA': Account.TYPE_PEA, 'PEA-PME': Account.TYPE_PEA, 'Compte-titres': Account.TYPE_MARKET, 'Assurance-vie': Account.TYPE_LIFE_INSURANCE, 'Crédit': Account.TYPE_LOAN, } obj_id = CleanText( './td//div[contains(@class, "-synthese-title") or contains(@class, "-synthese-text")]' ) & Regexp(pattern=r'(\d+)') obj_label = CleanText( './td//div[contains(@class, "-synthese-title")]') obj_balance = MyDecimal( './td//div[contains(@class, "-synthese-num")]', replace_dots=True) obj_currency = FrenchTransaction.Currency( './td//div[contains(@class, "-synthese-num")]') obj_type = Map(Regexp(Field('label'), r'^([^ ]*)'), TYPE, default=Account.TYPE_UNKNOWN) def obj_url(self): return urljoin(self.page.url, CleanText('./@data-href')(self)) obj__card_balance = CleanDecimal( './td//div[@class="synthese-encours"][last()]/div[2]', default=None) def condition(self): return not len(self.el.xpath('./td[@class="chart"]'))
def populate(self, accounts): cards = [] for account in accounts: for li in self.doc.xpath('//li[@class="nav-category"]'): title = CleanText().filter(li.xpath('./h3')) for a in li.xpath('./ul/li//a'): label = CleanText().filter( a.xpath('.//span[@class="nav-category__name"]')) balance_el = a.xpath( './/span[@class="nav-category__value"]') balance = CleanDecimal( replace_dots=True, default=NotAvailable).filter(balance_el) if 'CARTE' in label and balance: acc = Account() acc.balance = balance acc.label = label acc.currency = FrenchTransaction.Currency().filter( balance_el) acc._link = Link().filter(a.xpath('.')) acc._history_page = acc._link acc.id = acc._webid = Regexp( pattern='carte/(.*)$').filter(Link().filter( a.xpath('.'))) acc.type = Account.TYPE_CARD if not acc in cards: cards.append(acc) elif account.label == label and account.balance == balance: if not account.type: account.type = AccountsPage.ACCOUNT_TYPES.get( title, Account.TYPE_UNKNOWN) account._webid = Attr( None, 'data-account-label').filter( a.xpath( './/span[@class="nav-category__name"]')) accounts.extend(cards)
def parse_table(self, what, actype=Account.TYPE_UNKNOWN): tables = self.document.xpath("//table[@id='%s']" % what, smart_strings=False) if len(tables) < 1: return lines = tables[0].xpath(".//tbody/tr") for line in lines: account = Account() tmp = line.xpath("./td//a")[0] account.label = to_unicode(tmp.text) account.type = actype account._link_id = tmp.get("href") if 'BourseEnLigne' in account._link_id: account.type = Account.TYPE_MARKET tmp = line.xpath("./td/span/strong") if len(tmp) >= 2: tmp_id = tmp[0].text tmp_balance = tmp[1].text else: tmp_id = line.xpath("./td//span")[1].text tmp_balance = tmp[0].text account.id = tmp_id account.currency = account.get_currency(tmp_balance) account.balance = Decimal(FrenchTransaction.clean_amount(tmp_balance)) if account.id in self.accounts: a = self.accounts[account.id] a._card_links.append(account._link_id) if not a.coming: a.coming = Decimal('0.0') a.coming += account.balance else: account._card_links = [] self.accounts[account.id] = account
def handle_response(self, account, recipient, amount, reason): account_txt = CleanText( '//form//dl/dt[span[contains(text(), "biter")]]/following::dd[1]', replace=[(' ', '')])(self.doc) recipient_txt = CleanText( '//form//dl/dt[span[contains(text(), "diter")]]/following::dd[1]', replace=[(' ', '')])(self.doc) assert account.id in account_txt or ''.join( account.label.split()) == account_txt, 'Something went wrong' assert recipient.id in recipient_txt or ''.join( recipient.label.split()) == recipient_txt, 'Something went wrong' r_amount = CleanDecimal( '//form//dl/dt[span[contains(text(), "Montant")]]/following::dd[1]', replace_dots=True)(self.doc) exec_date = Date(CleanText( '//form//dl/dt[span[contains(text(), "Date")]]/following::dd[1]'), dayfirst=True)(self.doc) currency = FrenchTransaction.Currency( '//form//dl/dt[span[contains(text(), "Montant")]]/following::dd[1]' )(self.doc) transfer = Transfer() transfer.currency = currency transfer.amount = r_amount transfer.account_iban = account.iban transfer.recipient_iban = recipient.iban transfer.account_id = account.id transfer.recipient_id = recipient.id transfer.exec_date = exec_date transfer.label = reason transfer.account_label = account.label transfer.recipient_label = recipient.label transfer.account_balance = account.balance return transfer
def get_list(self): for box in self.document.getroot().cssselect( 'div.roundedBox div.contentBox'): a = Account() a.id = self.parser.tocleanstring( box.xpath( './/tr[@id="summaryImageHeaderRow"]//div[@class="summaryTitles"]' )[0]) a.label = self.parser.tocleanstring( box.xpath('.//span[@class="cardTitle"]')[0]) balance = self.parser.tocleanstring( self.parser.select(box, 'td#colOSBalance div.summaryValues', 1)) if balance in (u'Indisponible', u'Indisponible Facturation en cours', ''): a.balance = NotAvailable else: a.balance = -abs(Decimal(Transaction.clean_amount(balance))) a.currency = a.get_currency(balance) a._link = self.parser.select(box, 'div.summaryTitles a.summaryLink', 1).attrib['href'] yield a
def iter_documents(self, sub): #pdb.set_trace() years = [None] + self.document.xpath('//ul[@class="years"]/li/a') for year in years: #pdb.set_trace() if year is not None and year.attrib['href']: self.browser.location(year.attrib['href']) tables = self.browser.page.document.xpath('//table[contains(@summary, "factures")]') for table in tables: for tr in table.xpath('.//tr'): list_tds = tr.xpath('.//td') if len(list_tds) == 0: continue url = re.sub('[\r\n\t]', '', list_tds[0].xpath('.//a')[0].attrib['href']) date_search = re.search('dateFactureQE=(\d+/\d+/\d+)', url) if not date_search: continue date = datetime.strptime(date_search.group(1), "%d/%m/%Y").date() amount = self.parser.tocleanstring(list_tds[2]) if amount is None: continue bill = Bill() bill.id = sub._id + "." + date.strftime("%Y%m%d") bill.price = Decimal(FrenchTransaction.clean_amount(amount)) bill.currency = bill.get_currency(amount) bill.date = date bill.label = self.parser.tocleanstring(list_tds[0]) bill.format = u'pdf' bill.type = u'bill' bill._url = url yield bill
def populate(self, accounts): for account in accounts: for li in self.doc.xpath('//li[@class="nav-category"]'): title = CleanText().filter(li.xpath('./h3')) for a in li.xpath('./ul/li/a'): label = CleanText().filter( a.xpath('./span[@class="nav-category__name"]')) if account._holder and account._holder in label: balance = a.xpath( './span[@class="nav-category__value"]') account.balance = CleanDecimal( replace_dots=True).filter(balance) account.currency = FrenchTransaction.Currency().filter( balance) account._link = Link().filter(a.xpath('.')) account._history_page = account._link account._webid = Regexp(pattern='([^=]+)$').filter( Link().filter(a.xpath('.'))) elif account.label == label: if not account.type: account.type = AccountsPage.ACCOUNT_TYPES.get( title, Account.TYPE_UNKNOWN) if account.type == Account.TYPE_LOAN: account._history_page = None elif account.type in (Account.TYPE_LIFE_INSURANCE, Account.TYPE_MARKET): account._history_page = re.sub( '/$', '', Link().filter(a.xpath('.'))) elif 'titulaire' in self.url: account._history_page = self.browser.budget_transactions else: account._history_page = self.browser.other_transactions account._webid = Attr( None, 'data-account-label').filter( a.xpath('./span[@class="nav-category__name"]'))
class item(ItemElement): klass = Account def condition(self): return self.page.accounts_list_condition(self.el) class Type(Filter): def filter(self, label): for pattern, actype in AccountsPage.TYPES.items(): if pattern in label: return actype return Account.TYPE_UNKNOWN obj__title = CleanText('td[@class="ColonneLibelle"][2]') obj__nature = CleanText('td[@class="ColonneLibelle"][3]') obj_label = Format('%s %s', Field('_title'), Field('_nature')) obj_currency = FrenchTransaction.Currency( './td[@class="ColonneCode"]') obj_id = CleanText('td[@class="ColonneLibelle"][1]/a') obj__link = Link('td[@class="ColonneLibelle"][1]/a') obj__rib_link = Link('.//a[contains(@href, "rib.jsp")]') obj_type = Type(Field('label')) obj_balance = CleanDecimal('td[@class="ColonneNumerique"]/nobr', replace_dots=True)
def _add_account(self, accounts, link, label, account_type, balance): info = self._get_account_info(link) if info is None: self.logger.warning('Unable to parse account %r: %r' % (label, link)) return account = Account() account.id = info['id'] account._info = info account.label = label account.type = info['acc_type'] if 'acc_type' in info else account_type account.balance = Decimal(FrenchTransaction.clean_amount(balance)) if balance else self.get_balance(account) account.currency = account.get_currency(balance) account._card_links = [] if account._info['type'] == 'HISTORIQUE_CB' and account.id in accounts: a = accounts[account.id] if not a.coming: a.coming = Decimal('0.0') a.coming += account.balance a._card_links.append(account._info) return accounts[account.id] = account
def get_monthly_transactions(self, trs): date_getter = attrgetter('date') groups = [ list(g) for k, g in groupby(sorted(trs, key=date_getter), date_getter) ] trs = [] for group in groups: if group[0].date > datetime.today().date(): continue tr = FrenchTransaction() tr.raw = tr.label = "RELEVE CARTE %s" % group[0].date tr.amount = -sum(t.amount for t in group) tr.date = tr.rdate = tr.vdate = group[0].date tr.type = FrenchTransaction.TYPE_CARD_SUMMARY tr._is_coming = False tr._is_manualsum = True trs.append(tr) return trs
def clean_amount(amount): return Decimal(FrenchTransaction.clean_amount( amount)) if amount and amount != ' NS' else NotAvailable
def get_history(self, account): transactions = [] if not account._link_id: raise NotImplementedError() if len(account.id) >= 16 and account.id[:16] in self.cards_histo_available: if self.two_cards_page: # In this case, you need to return to the page where the iter account get the cards information # Indeed, for the same position of card in the two pages the url, headers and parameters are exactly the same account._referer.go(subbank=self.currentSubBank) if account._secondpage: self.location(self.page.get_second_page_link()) # Check if '000000xxxxxx0000' card have an annual history self.location(account._link_id) # The history of the card is available for 1 year with 1 month per page # Here we catch all the url needed to be the more compatible with the catch of merged subtransactions urlstogo = self.page.get_links() self.location(account._link_id) half_history = 'firstHalf' for url in urlstogo: transactions = [] self.location(url) if 'GoMonthPrecedent' in url: # To reach the 6 last month of history you need to change this url parameter # Moreover we are on a transition page where we see the 6 next month (no scrapping here) half_history = 'secondHalf' else: history = self.page.get_history() self.tr_date = self.page.get_date() amount_summary = self.page.get_amount_summary() if self.page.has_more_operations(): for i in range(1, 100): # Arbitrary range; it's the number of click needed to access to the full history of the month (stop with the next break) data = { '_FID_DoAddElem': '', '_wxf2_cc': 'fr-FR', '_wxf2_pmode': 'Normal', '_wxf2_pseq': i, '_wxf2_ptarget': 'C:P:updPan', 'Data_ServiceListDatas_CurrentOtherCardThirdPartyNumber': '', 'Data_ServiceListDatas_CurrentType': 'MyCards', } if 'fid=GoMonth&mois=' in self.url: m = re.search(r'fid=GoMonth&mois=(\d+)', self.url) if m: m = m.group(1) self.location('CRP8_SCIM_DEPCAR.aspx?_tabi=C&a__itaret=as=SCIM_ListeActivityStep\%3a\%3a\%2fSCIM_ListeRouter%3a%3a&a__mncret=SCIM_LST&a__ecpid=EID2011&_stack=_remote::moiSelectionner={},moiAfficher={},typeDepense=T&_pid=SCIM_DEPCAR_Details'.format(m, half_history), data=data) else: self.location(self.url, data=data) if not self.page.has_more_operations_xml(): history = self.page.iter_history_xml(date=self.tr_date) # We are now with an XML page with all the transactions of the month break else: history = self.page.get_history(date=self.tr_date) for tr in history: # For regrouped transaction, we have to go through each one to get details if tr._regroup: self.location(tr._regroup) for tr2 in self.page.get_tr_merged(): tr2._is_coming = tr._is_coming tr2.date = self.tr_date transactions.append(tr2) else: transactions.append(tr) if transactions and self.tr_date < datetime.today().date(): tr = FrenchTransaction() tr.raw = tr.label = "RELEVE CARTE %s" % self.tr_date tr.amount = amount_summary tr.date = tr.rdate = tr.vdate = self.tr_date tr.type = FrenchTransaction.TYPE_CARD_SUMMARY tr._is_coming = False tr._is_manualsum = True transactions.append(tr) for tr in sorted_transactions(transactions): yield tr else: # need to refresh the months select if account._link_id.startswith('ENC_liste_oper'): self.location(account._pre_link) if not hasattr(account, '_card_pages'): for tr in self.list_operations(account._link_id, account): transactions.append(tr) coming_link = self.page.get_coming_link() if self.operations.is_here() else None if coming_link is not None: for tr in self.list_operations(coming_link, account): transactions.append(tr) deferred_date = None cards = ([page.select_card(account._card_number) for page in account._card_pages] if hasattr(account, '_card_pages') else account._card_links if hasattr(account, '_card_links') else []) for card in cards: card_trs = [] for tr in self.list_operations(card, account): if tr._to_delete: # Delete main transaction when subtransactions exist continue if hasattr(tr, '_deferred_date') and (not deferred_date or tr._deferred_date < deferred_date): deferred_date = tr._deferred_date if tr.date >= datetime.now(): tr._is_coming = True elif hasattr(account, '_card_pages'): card_trs.append(tr) transactions.append(tr) if card_trs: transactions.extend(self.get_monthly_transactions(card_trs)) if deferred_date is not None: # set deleted for card_summary for tr in transactions: tr.deleted = (tr.type == FrenchTransaction.TYPE_CARD_SUMMARY and deferred_date.month <= tr.date.month and not hasattr(tr, '_is_manualsum')) for tr in sorted_transactions(transactions): yield tr
class item(ItemElement): klass = Account load_details = Field('url') & AsyncLoad def condition(self): return not self.is_external() obj_label = CleanText( './/a[has-class("account--name")] | .//div[has-class("account--name")]' ) obj_balance = CleanDecimal('.//a[has-class("account--balance")]', replace_dots=True) obj_currency = FrenchTransaction.Currency( './/a[has-class("account--balance")]') obj_valuation_diff = Async('details') & CleanDecimal( '//li[h4[text()="Total des +/- values"]]/h3 |\ //li[span[text()="Total des +/- values latentes"]]/span[has-class("overview__value")]', replace_dots=True, default=NotAvailable) obj__card = Async('details') & Attr( '//a[@data-modal-behavior="credit_card-modal-trigger"]', 'href', default=NotAvailable) obj__holder = None def obj_coming(self): # Don't duplicate coming (card balance with account coming) # TODO: fetch coming which is not card coming for account with cards. if self.obj__card(self): return NotAvailable return Async( 'details', CleanDecimal(u'//li[h4[text()="Mouvements à venir"]]/h3', replace_dots=True, default=NotAvailable))(self) def obj_id(self): type = Field('type')(self) if type == Account.TYPE_CARD: # When card is opposed it still appears on accounts page with a dead link and so, no id. Skip it. if Attr('.//a[has-class("account--name")]', 'href')(self) == '#': raise SkipItem() return self.obj__idparts()[1] id = Async( 'details', Regexp(CleanText('//h3[has-class("account-number")]'), r'(\d+)', default=NotAvailable))(self) if not id: raise SkipItem() return id def obj_type(self): # card url is /compte/cav/xxx/carte/yyy so reverse to match "carte" before "cav" for word in Field('url')(self).lower().split('/')[::-1]: v = self.page.ACCOUNT_TYPES.get(word) if v: return v for word in Field('label')(self).replace('_', ' ').lower().split(): v = self.page.ACCOUNT_TYPES.get(word) if v: return v category = CleanText( './preceding-sibling::tr[has-class("list--accounts--master")]//h4' )(self) v = self.page.ACCOUNT_TYPES.get(category.lower()) if v: return v page = Async('details').loaded_page(self) if isinstance(page, LoanPage): return Account.TYPE_LOAN return Account.TYPE_UNKNOWN def obj_url(self): link = Attr('.//a[has-class("account--name")] | .//a[2]', 'href', default=NotAvailable)(self) return urljoin(self.page.url, link) def is_external(self): return '/budget/' in Field('url')(self) def obj__idparts(self): return re.findall('[a-z\d]{32}', Field('url')(self)) def obj__webid(self): parts = self.obj__idparts() if parts: return parts[0] # We do not yield other banks accounts for the moment. def validate(self, obj): return not Async('details', CleanText(u'//h4[contains(text(), "Établissement bancaire")]'))(self) and not \ Async('details', CleanText(u'//h4/div[contains(text(), "Établissement bancaire")]'))(self)
class item(ItemElement): klass = Account def condition(self): return len(self.el.xpath('./td')) > 2 class Label(Filter): def filter(self, text): return text.lstrip(' 0123456789').title() class Type(Filter): PATTERNS = [ ('Pea', Account.TYPE_PEA), ('invest', Account.TYPE_MARKET), ('ptf', Account.TYPE_MARKET), ('ldd', Account.TYPE_SAVINGS), ('livret', Account.TYPE_SAVINGS), ('compte', Account.TYPE_CHECKING), ('account', Account.TYPE_CHECKING), ('pret', Account.TYPE_LOAN), ('vie', Account.TYPE_LIFE_INSURANCE), ('strategie patr.', Account.TYPE_LIFE_INSURANCE), ('essentiel', Account.TYPE_LIFE_INSURANCE), ('elysee', Account.TYPE_LIFE_INSURANCE), ('abondance', Account.TYPE_LIFE_INSURANCE), ('ely. retraite', Account.TYPE_LIFE_INSURANCE), ('lae option assurance', Account.TYPE_LIFE_INSURANCE), ('carte ', Account.TYPE_CARD), ('plan assur. innovat.', Account.TYPE_LIFE_INSURANCE), ] def filter(self, label): label = label.lower() for pattern, type in self.PATTERNS: if pattern in label: return type return Account.TYPE_UNKNOWN obj_label = Label(CleanText('./td[1]/a')) obj_coming = Env('coming') obj_currency = FrenchTransaction.Currency('./td[2]') obj_url = AbsoluteLink('./td[1]/a') obj_type = Type(Field('label')) obj_coming = NotAvailable @property def obj_balance(self): if self.el.xpath('./parent::*/tr/th') and self.el.xpath( './parent::*/tr/th')[0].text in [ u'Credits', u'Crédits' ]: return CleanDecimal(replace_dots=True, sign=lambda x: -1).filter( self.el.xpath('./td[3]')) return CleanDecimal(replace_dots=True).filter( self.el.xpath('./td[3]')) @property def obj_id(self): # Investment account and main account can have the same id # so we had account type in case of Investment to prevent conflict if Field('type')(self) == Account.TYPE_MARKET: return CleanText(replace=[('.', ''), (' ', '')]).filter( self.el.xpath('./td[2]')) + ".INVEST" return CleanText(replace=[('.', ''), (' ', '')]).filter( self.el.xpath('./td[2]'))
def get_history(self, currency): self.MONTHS = self.FR_MONTHS if currency == 'EUR' else self.US_MONTHS #checking if the card is still valid if self.document.xpath('//div[@id="errorbox"]'): return #adding a time delta because amex have hard time to put the date in a good interval beginning_date = self.get_beginning_debit_date() - datetime.timedelta( days=300) end_date = self.get_end_debit_date() guesser = ChaoticDateGuesser(beginning_date, end_date) for tr in reversed( self.document.xpath( '//div[@id="txnsSection"]//tr[@class="tableStandardText"]') ): cols = tr.findall('td') t = Transaction() day, month = self.parser.tocleanstring(cols[self.COL_DATE]).split( ' ', 1) day = int(day) month = self.MONTHS.index(month.rstrip('.')) + 1 date = guesser.guess_date(day, month) rdate = None try: detail = self.parser.select(cols[self.COL_TEXT], 'div.hiddenROC', 1) except BrokenPageError: pass else: m = re.search(r' (\d{2} \D{3,4})', (' '.join( [txt.strip() for txt in detail.itertext()])).strip()) if m: rday, rmonth = m.group(1).split(' ') rday = int(rday) rmonth = self.MONTHS.index(rmonth.rstrip('.')) + 1 rdate = guesser.guess_date(rday, rmonth) detail.drop_tree() raw = (' '.join([ txt.strip() for txt in cols[self.COL_TEXT].itertext() ])).strip() credit = self.parser.tocleanstring(cols[self.COL_CREDIT]) debit = self.parser.tocleanstring(cols[self.COL_DEBIT]) t.date = date t.rdate = rdate or date t.raw = re.sub(r'[ ]+', ' ', raw) t.label = re.sub('(.*?)( \d+)? .*', r'\1', raw).strip() t.amount = CleanDecimal(replace_dots=currency == 'EUR').filter( credit or debit) * (1 if credit else -1) if t.amount > 0: t.type = t.TYPE_ORDER else: t.type = t.TYPE_CARD yield t
def iter_accounts(self, next_pages): account_type = Account.TYPE_UNKNOWN params = self.get_params() actions = self.get_button_actions() for div in self.document.getroot().cssselect('div.btit'): if div.text in (None, u'Synthèse'): continue account_type = self.ACCOUNT_TYPES.get(div.text.strip(), Account.TYPE_UNKNOWN) if account_type is None: # ignore services accounts self.logger.debug('Ignore account type %s', div.text.strip()) continue # Go to the full list of this kind of account, if any. btn = div.getparent().xpath('.//button/span[text()="Suite"]') if len(btn) > 0: btn = btn[0].getparent() _params = params.copy() _params.update(actions[btn.attrib['id']]) next_pages.append(_params) continue currency = None for th in div.getnext().xpath('.//thead//th'): m = re.match('.*\((\w+)\)$', th.text) if m and currency is None: currency = Account.get_currency(m.group(1)) for tr in div.getnext().xpath('.//tbody/tr'): if 'id' not in tr.attrib: continue args = dict(parse_qsl(tr.attrib['id'])) tds = tr.findall('td') if len(tds) < 4 or 'identifiant' not in args: self.logger.warning('Unable to parse an account') continue account = Account() account.id = args['identifiant'].replace(' ', '') account.label = u' '.join([ u''.join([txt.strip() for txt in tds[1].itertext()]), u''.join([txt.strip() for txt in tds[2].itertext()]) ]).strip() account.type = account_type balance = FrenchTransaction.clean_amount(u''.join( [txt.strip() for txt in tds[3].itertext()])) account.balance = Decimal(balance or '0.0') account.currency = currency if account.type == account.TYPE_LOAN: account.balance = -abs(account.balance) account._prev_debit = None account._next_debit = None account._params = None account._coming_params = None account._invest_params = None if balance != u'' and len(tds[3].xpath('.//a')) > 0: account._params = params.copy() account._params['dialogActionPerformed'] = 'SOLDE' account._params[ 'attribute($SEL_$%s)' % tr.attrib['id'].split('_')[0]] = tr.attrib['id'].split( '_', 1)[1] if len(tds) >= 5 and len( tds[self.COL_COMING].xpath('.//a')) > 0: _params = account._params.copy() _params['dialogActionPerformed'] = 'ENCOURS_COMPTE' # If there is an action needed before going to the cards page, save it. m = re.search('dialogActionPerformed=([\w_]+)', self.url) if m and m.group(1) != 'EQUIPEMENT_COMPLET': _params['prevAction'] = m.group(1) next_pages.append(_params) if not account._params: account._invest_params = params.copy() account._invest_params['dialogActionPerformed'] = 'CONTRAT' account._invest_params[ 'attribute($SEL_$%s)' % tr.attrib['id'].split('_')[0]] = tr.attrib['id'].split( '_', 1)[1] yield account # Needed to preserve navigation. btn = self.document.xpath('.//button/span[text()="Retour"]') if len(btn) > 0: btn = btn[0].getparent() _params = params.copy() _params.update(actions[btn.attrib['id']]) self.browser.openurl('/cyber/internet/ContinueTask.do', urllib.urlencode(_params))
def get_history(self, date_guesser): i = 0 for tr in self.document.xpath('//table[@class="ca-table"]//tr'): parent = tr.getparent() while parent is not None and parent.tag != 'table': parent = parent.getparent() if parent.attrib.get('class', '') != 'ca-table': continue if tr.attrib.get('class', '') == 'tr-thead': heads = tr.findall('th') for i, head in enumerate(heads): key = self.parser.tocleanstring(head) if key == u'Crédit': self.COL_CREDIT = i - len(heads) elif key == u'Débit': self.COL_DEBIT = i - len(heads) elif key == u'Libellé': self.COL_TEXT = i if not tr.attrib.get('class', '').startswith('ligne-'): continue cols = tr.findall('td') # On loan accounts, there is a ca-table with a summary. Skip it. if tr.find('th') is not None or len(cols) < 3: continue t = Transaction(i) col_text = cols[self.COL_TEXT] if len(col_text.xpath('.//br')) == 0: col_text = cols[self.COL_TEXT + 1] raw = self.parser.tocleanstring(col_text) date = self.parser.tocleanstring(cols[self.COL_DATE]) credit = self.parser.tocleanstring(cols[self.COL_CREDIT]) if self.COL_DEBIT is not None: debit = self.parser.tocleanstring(cols[self.COL_DEBIT]) else: debit = '' day, month = map(int, date.split('/', 1)) t.date = date_guesser.guess_date(day, month) t.rdate = t.date t.raw = raw # On some accounts' history page, there is a <font> tag in columns. if col_text.find('font') is not None: col_text = col_text.find('font') t.category = unicode(col_text.text.strip()) t.label = re.sub('(.*) (.*)', r'\2', t.category).strip() sub_label = col_text.find('br').tail if sub_label is not None and ( len(t.label) < 3 or t.label == t.category or len(re.findall('[^\w\s]', sub_label)) / float(len(sub_label)) < len(re.findall('\d', t.label)) / float(len(t.label))): t.label = unicode(sub_label.strip()) # Sometimes, the category contains the label, even if there is another line with it again. t.category = re.sub('(.*) .*', r'\1', t.category).strip() t.type = self.TYPES.get(t.category, t.TYPE_UNKNOWN) # Parse operation date in label (for card transactions for example) m = re.match('(?P<text>.*) (?P<dd>[0-3]\d)/(?P<mm>[0-1]\d)$', t.label) if not m: m = re.match('^(?P<dd>[0-3]\d)/(?P<mm>[0-1]\d) (?P<text>.*)$', t.label) if m: if t.type in (t.TYPE_CARD, t.TYPE_WITHDRAWAL): t.rdate = date_guesser.guess_date( int(m.groupdict()['dd']), int(m.groupdict()['mm']), change_current_date=False) t.label = m.groupdict()['text'].strip() # Strip city or other useless information from label. t.label = re.sub('(.*) .*', r'\1', t.label).strip() t.set_amount(credit, debit) yield t i += 1
def get_list(self): accounts = [] noaccounts = self.get_from_js('_js_noMvts =', ';') if noaccounts is not None: assert 'avez aucun compte' in noaccounts return [] txt = self.get_from_js('_data = new Array(', ');', is_list=True) if txt is None: raise BrowserUnavailable('Unable to find accounts list in scripts') data = json.loads('[%s]' % txt.replace("'", '"')) for line in data: a = Account() a.id = line[self.COL_ID].replace(' ', '') if re.match( r'Classement=(.*?):::Banque=(.*?):::Agence=(.*?):::SScompte=(.*?):::Serie=(.*)', a.id): a.id = str(CleanDecimal().filter(a.id)) a._acc_nb = a.id.split('_')[0] if len( a.id.split('_')) > 1 else None a.label = MyStrip(line[self.COL_LABEL], xpath='.//div[@class="libelleCompteTDB"]') # This account can be multiple life insurance accounts if a.label == 'ASSURANCE VIE-BON CAPI-SCPI-DIVERS *': continue a.balance = Decimal( FrenchTransaction.clean_amount(line[self.COL_BALANCE])) a.currency = a.get_currency(line[self.COL_BALANCE]) a.type = self.get_account_type(a.label) if line[self.COL_HISTORY] == 'true': a._inv = False a._link = self.get_history_link() a._args = { '_eventId': 'clicDetailCompte', '_ipc_eventValue': '', '_ipc_fireEvent': '', 'deviseAffichee': 'DEVISE', 'execution': self.get_execution(), 'idCompteClique': line[self.COL_ID], } else: a._inv = True a._args = { '_ipc_eventValue': line[self.COL_ID], '_ipc_fireEvent': line[self.COL_FIRE_EVENT], } a._link = self.doc.xpath( '//form[@name="changePageForm"]')[0].attrib['action'] if a.type is Account.TYPE_CARD: a.coming = a.balance a.balance = Decimal('0.0') accounts.append(a) return accounts
def parse_decimal(self, value): v = value.strip() if v == '-' or v == '': return NotAvailable return Decimal(Transaction.clean_amount(value))
def get_list(self): blocks = self.document.xpath('//div[@id="synthese-list"]//div[@class="block"]') for div in blocks: block_title = ''.join(div.xpath('.//span[@class="title"]/a/text()')).lower().strip() for tr in div.getiterator('tr'): account = Account() account.id = None account._link_id = None account.type = self.ACCOUNT_TYPES.get(block_title, Account.TYPE_UNKNOWN) if 'assurance vie' in block_title: # Life insurance accounts are investments account.type = Account.TYPE_LIFE_INSURANCE for td in tr.getiterator('td'): if td.get('class', '') == 'account-cb': try: a = td.xpath('./*/a[@class="gras"]')[0] except IndexError: # ignore account break account.type = Account.TYPE_CARD account.label, account.id = [s.strip() for s in self.parser.tocleanstring(td).rsplit('-', 1)] # Sometimes there is text after the card number: # <a class="gras" href="/comptes/banque/cartes/index.phtml?CompteCourant=ulietuliedtlueditluedt&currentCB=ruisecruicertuci"> # CARTE PREMIER </a> # <br>MACHIN BIDULE TRUC - 1111********1111 # # <br> # <strong><a href="/aide/faq/index.phtml?document_id=472">Son échéance est le <span style="color:#ff8400; font-weight:bold;">31/03/2015</span>.<br>En savoir plus</a></strong> # So we have to remove all the shit after it. account.id = account.id.split(' ')[0] try: account._link_id = td.xpath('.//a')[0].get('href') # Try to use account._link_id for account.id to prevent duplicate accounts currentCB = re.search('currentCB=(.*)', account._link_id) if currentCB: account.id = currentCB.group(1) except KeyError: pass elif td.get('class', '') == 'account-name': try: span = td.xpath('./span[@class="label"]')[0] except IndexError: # ignore account break account.label = self.parser.tocleanstring(span) account.id = self.parser.tocleanstring(td).rsplit('-', 1)[-1].strip() try: account._link_id = td.xpath('.//a')[0].get('href') account._detail_url = account._link_id except KeyError: pass elif td.get('class', '') == 'account-more-actions': for a in td.getiterator('a'): # For normal account, two "account-more-actions" # One for the account, one for the credit card. Take the good one if 'href' in a.attrib and "mouvements.phtml" in a.get('href') and "/cartes/" not in a.get('href'): account._link_id = a.get('href') elif td.get('class', '') == 'account-number': id = td.text id = id.strip(u' \n\t') account.id = id elif td.get('class', '') == 'account-total': span = td.find('span') if span is None: balance = td.text else: balance = span.text account.currency = account.get_currency(balance) balance = FrenchTransaction.clean_amount(balance) if balance != "": account.balance = Decimal(balance) else: account.balance = Decimal(0) else: # because of some weird useless <tr> if account.id is not None and (not account._link_id or 'moneycenter' not in account._link_id): yield account
def get_list(self): err = CleanText('//span[@class="error_msg"]', default='')(self.doc) if err == 'Vous ne disposez pas de compte consultable.': raise NoAccountsException() def check_valid_url(url): pattern = [ '/restitution/cns_detailAVPAT.html', '/restitution/cns_detailAlterna.html', ] for p in pattern: if url.startswith(p): return False return True for tr in self.doc.getiterator('tr'): if 'LGNTableRow' not in tr.attrib.get('class', '').split(): continue account = Account() for td in tr.getiterator('td'): if td.attrib.get('headers', '') == 'TypeCompte': a = td.find('a') if a is None: break account.label = CleanText('.')(a) account._link_id = a.get('href', '') for pattern, actype in self.TYPES.items(): if account.label.startswith(pattern): account.type = actype break else: if account._link_id.startswith('/asv/asvcns10.html'): account.type = Account.TYPE_LIFE_INSURANCE # Website crashes when going on theses URLs if not check_valid_url(account._link_id): account._link_id = None elif td.attrib.get('headers', '') == 'NumeroCompte': account.id = CleanText(u'.', replace=[(' ', '')])(td) elif td.attrib.get('headers', '') == 'Libelle': text = CleanText('.')(td) if text != '': account.label = text elif td.attrib.get('headers', '') == 'Solde': div = td.xpath('./div[@class="Solde"]') if len(div) > 0: balance = CleanText('.')(div[0]) if len(balance) > 0 and balance not in ('ANNULEE', 'OPPOSITION'): try: account.balance = Decimal( FrenchTransaction.clean_amount(balance)) except InvalidOperation: self.logger.error( 'Unable to parse balance %r' % balance) continue account.currency = account.get_currency(balance) else: account.balance = NotAvailable if not account.label or empty(account.balance): continue if account._link_id and 'CARTE_' in account._link_id: account.type = account.TYPE_CARD if account.type == Account.TYPE_UNKNOWN: self.logger.debug('Unknown account type: %s', account.label) yield account
def get_list(self): TABLE_XPATH = '//table[caption[@class="caption tdb-cartes-caption" or @class="ca-table caption"]]' cards_tables = self.document.xpath(TABLE_XPATH) if cards_tables: self.logger.debug('There are several cards') xpaths = { '_id': './caption/span[@class="tdb-cartes-num"]', 'label1': './caption/span[contains(@class, "tdb-cartes-carte")]', 'label2': './caption/span[@class="tdb-cartes-prop"]', 'balance': './/tr/td[@class="cel-num"]', 'currency': '//table/caption//span/text()[starts-with(.,"Montants en ")]', 'link': './/tr//a/@href[contains(., "fwkaction=Detail")]', } else: self.logger.debug('There is only one card') xpaths = { '_id': './/tr/td[@class="cel-texte"]', 'label1': './/tr[@class="ligne-impaire ligne-bleu"]/th', 'label2': './caption/span[@class="tdb-cartes-prop"]/b', 'balance': './/tr[last()-1]/td[@class="cel-num"]', 'currency': '//table/caption//span/text()[starts-with(.,"Montants en ")]', } TABLE_XPATH = '(//table[@class="ca-table"])[1]' cards_tables = self.document.xpath(TABLE_XPATH) for table in cards_tables: get = lambda name: self.parser.tocleanstring( table.xpath(xpaths[name])[0]) account = Account() account.type = account.TYPE_CARD account.id = ''.join(get('_id').split()[1:]) account.label = '%s - %s' % (get('label1'), re.sub('\s*-\s*$', '', get('label2'))) try: account.balance = Decimal( Transaction.clean_amount( table.xpath(xpaths['balance'])[-1].text)) account.currency = account.get_currency( self.document.xpath(xpaths['currency'])[0].replace( "Montants en ", "")) except IndexError: account.balance = Decimal('0.0') if 'link' in xpaths: try: account._link = table.xpath(xpaths['link'])[-1] except IndexError: account._link = None else: account._link = re.sub('[\n\r\t]+', '', account._link) else: account._link = self.url yield account
def get_list(self): accounts = [] for tr in self.document.xpath('//table[@class="compteTable"]/tr'): if not tr.attrib.get('class', '').startswith('ligne_'): continue cols = tr.findall('td') if len(cols) < 2: continue try: amount = sum([ Decimal(FrenchTransaction.clean_amount(txt)) for txt in cols[-1].itertext() if len(txt.strip()) > 0 ]) except InvalidOperation: continue a = cols[0].find('a') if a is None: # this line is a cards line. attach it on the first account. if len(accounts) == 0: self.logger.warning( 'There is a card link but no accounts!') continue for a in cols[0].xpath('.//li/a'): args = self.js2args(a.attrib['href']) if not 'numero_compte' in args or not 'numero_poste' in args: self.logger.warning('Card link with strange args: %s' % args) continue accounts[0]._card_links.append( '%s.%s' % (args['numero_compte'], args['numero_poste'])) if not accounts[0].coming: accounts[0].coming = Decimal('0.0') accounts[0].coming += amount continue args = self.js2args(a.attrib['href']) if not 'numero_compte' in args or not 'numero_poste' in args: self.logger.warning( 'Account link for %r with strange args: %s' % (a.attrib.get('alt', a.text), args)) continue account = Account() account.id = u'%s.%s' % (args['numero_compte'], args['numero_poste']) account.label = to_unicode(a.attrib.get('alt', a.text.strip())) account.balance = amount account.currency = [ account.get_currency(txt) for txt in cols[-1].itertext() if len(txt.strip()) > 0 ][0] account._card_links = [] accounts.append(account) return accounts
class item(ItemElement): klass = Account def condition(self): if len(self.el.xpath('./td')) < 2: return False first_td = self.el.xpath('./td')[0] return (("i" in first_td.attrib.get('class', '') or "p" in first_td.attrib.get('class', '')) and first_td.find('a') is not None) class Label(Filter): def filter(self, text): return text.lstrip(' 0123456789').title() class Type(Filter): def filter(self, label): for pattern, actype in AccountsPage.TYPES.iteritems(): if label.startswith(pattern): return actype return Account.TYPE_UNKNOWN obj_id = Env('id') obj_label = Label( CleanText( './td[1]/a/text() | ./td[1]/a/span[@class and not(contains(@class, "doux"))]' )) obj_coming = Env('coming') obj_balance = Env('balance') obj_currency = FrenchTransaction.Currency('./td[2] | ./td[3]') obj__link_id = Link('./td[1]/a') obj__card_links = [] obj_type = Type(Field('label')) obj__is_inv = False obj__is_webid = Env('_is_webid') def parse(self, el): link = el.xpath('./td[1]/a')[0].get('href', '') if 'POR_SyntheseLst' in link: raise SkipItem() url = urlparse(link) p = parse_qs(url.query) if 'rib' not in p and 'webid' not in p: raise SkipItem() for td in el.xpath('./td[2] | ./td[3]'): try: balance = CleanDecimal('.', replace_dots=True)(td) except InvalidOperation: continue else: break else: raise ParseError('Unable to find balance for account %s' % CleanText('./td[1]/a')(el)) self.env['_is_webid'] = False if self.page.browser.is_new_website: id = CleanText( './td[1]/a/node()[contains(@class, "doux")]', replace=[(' ', '')])(el) else: if 'rib' in p: id = p['rib'][0] else: id = p['webid'][0] self.env['_is_webid'] = True page = self.page.browser.open(link).page # Handle cards if id in self.parent.objects: if page.is_fleet() or id in self.page.browser.fleet_pages: if not id in self.page.browser.fleet_pages: self.page.browser.fleet_pages[id] = [] self.page.browser.fleet_pages[id].append(page) else: account = self.parent.objects[id] if not account.coming: account.coming = Decimal('0.0') account.coming += balance account._card_links.append(link) raise SkipItem() self.env['id'] = id # Handle real balances coming = page.find_amount( u"Opérations à venir") if page else None accounting = page.find_amount( u"Solde comptable") if page else None if accounting is not None and accounting + ( coming or Decimal('0')) != balance: self.page.logger.warning('%s + %s != %s' % (accounting, coming, balance)) if accounting is not None: balance = accounting self.env['balance'] = balance self.env['coming'] = coming or NotAvailable
def clean_amount(cls, text): text = text.strip() # Convert "American" UUU.CC format to "French" UUU,CC format if re.search(r'\d\.\d\d$', text): text = text.replace(',', ' ').replace('.', ',') return FrenchTransaction.clean_amount(text)
def iter_accounts(self, next_pages): params = self.get_params() account = None currency = None for th in self.document.xpath('//table[@id="TabCtes"]//thead//th'): m = re.match('.*\((\w+)\)$', th.text) if m and currency is None: currency = Account.get_currency(m.group(1)) for tr in self.document.xpath('//table[@id="TabCtes"]/tbody/tr'): cols = tr.xpath('./td') id = self.parser.tocleanstring(cols[self.COL_ID]) if len(id) > 0: if account is not None: yield account account = Account() account.id = id.replace(' ', '') account.type = Account.TYPE_CARD account.balance = account.coming = Decimal('0') account._next_debit = datetime.date.today() account._prev_debit = datetime.date(2000, 1, 1) account.label = u' '.join([ self.parser.tocleanstring(cols[self.COL_TYPE]), self.parser.tocleanstring(cols[self.COL_LABEL]) ]) account.currency = currency account._params = None account._invest_params = None account._coming_params = params.copy() account._coming_params[ 'dialogActionPerformed'] = 'SELECTION_ENCOURS_CARTE' account._coming_params[ 'attribute($SEL_$%s)' % tr.attrib['id'].split('_')[0]] = tr.attrib['id'].split( '_', 1)[1] elif account is None: raise BrokenPageError('Unable to find accounts on cards page') else: account._params = params.copy() account._params[ 'dialogActionPerformed'] = 'SELECTION_ENCOURS_CARTE' account._params[ 'attribute($SEL_$%s)' % tr.attrib['id'].split('_')[0]] = tr.attrib['id'].split( '_', 1)[1] date_col = self.parser.tocleanstring(cols[self.COL_DATE]) m = re.search('(\d+)/(\d+)/(\d+)', date_col) if not m: self.logger.warning('Unable to parse date %r' % date_col) continue date = datetime.date(*reversed(map(int, m.groups()))) if date.year < 100: date = date.replace(year=date.year + 2000) amount = Decimal( FrenchTransaction.clean_amount( self.parser.tocleanstring(cols[self.COL_AMOUNT]))) if not date_col.endswith('(1)'): # debited account.coming += -abs(amount) account._next_debit = date elif date > account._prev_debit: account._prev_balance = -abs(amount) account._prev_debit = date if account is not None: yield account # Needed to preserve navigation. btn = self.document.xpath('.//button/span[text()="Retour"]') if len(btn) > 0: btn = btn[0].getparent() actions = self.get_button_actions() _params = params.copy() _params.update(actions[btn.attrib['id']]) self.browser.openurl('/cyber/internet/ContinueTask.do', urllib.urlencode(_params))
def iter_investments(self): # We did not get some html, but something like that (XX is a quantity, YY a price): # message='[...] # popup=2{6{E:ALO{PAR{{reel{695{380{ALSTOM REGROUPT#XX#YY,YY €#YY,YY €#1 YYY,YY €#-YYY,YY €#-42,42%#-0,98 %#42,42 %#|1|AXA#cotationValeur.php?val=E:CS&pl=6&nc=1& # popup=2{6{E:CS{PAR{{reel{695{380{AXA#XX#YY,YY €#YY,YYY €#YYY,YY €#YY,YY €#3,70%#42,42 %#42,42 %#|1|blablablab #cotationValeur.php?val=P:CODE&pl=6&nc=1& # [...] lines = self.doc.split("popup=2") lines.pop(0) invests = [] for line in lines: columns = line.split('#') _pl = columns[0].split('{')[1] _id = columns[0].split('{')[2] invest = Investment(_id) invest.label = unicode(columns[0].split('{')[-1]) invest.code = unicode(_id) if ':' in invest.code: invest.code = self.browser.titrevalue.open(val=invest.code, pl=_pl).get_isin() # The code we got is not a real ISIN code. if not re.match( '^[A-Z]{2}[\d]{10}$|^[A-Z]{2}[\d]{5}[A-Z]{1}[\d]{4}$', invest.code): m = re.search( '\{([A-Z]{2}[\d]{10})\{|\{([A-Z]{2}[\d]{5}[A-Z]{1}[\d]{4})\{', line) if m: invest.code = unicode(m.group(1) or m.group(2)) quantity = FrenchTransaction.clean_amount(columns[1]) invest.quantity = CleanDecimal( default=NotAvailable).filter(quantity) unitprice = FrenchTransaction.clean_amount(columns[2]) invest.unitprice = CleanDecimal( default=NotAvailable).filter(unitprice) unitvalue = FrenchTransaction.clean_amount(columns[3]) invest.unitvalue = CleanDecimal( default=NotAvailable).filter(unitvalue) valuation = FrenchTransaction.clean_amount(columns[4]) # valuation is not nullable, use 0 as default value invest.valuation = CleanDecimal( default=Decimal('0')).filter(valuation) diff = FrenchTransaction.clean_amount(columns[5]) invest.diff = CleanDecimal(default=NotAvailable).filter(diff) # On some case we have a multine investment with a total column # for now we have only see this on 2 lines, we will need to adapt it when o if columns[9] == u'|Total' and _id == 'fichevaleur': prev_inv = invest invest = invests.pop(-1) if prev_inv.quantity: invest.quantity = invest.quantity + prev_inv.quantity if prev_inv.valuation: invest.valuation = invest.valuation + prev_inv.valuation if prev_inv.diff: invest.diff = invest.diff + prev_inv.diff invests.append(invest) for invest in invests: yield invest
def get_list(self): for table in self.has_accounts(): tds = table.xpath('./tbody/tr')[0].findall('td') if len(tds) < 3: if tds[0].text_content() == u'Pr\xeat Personnel': account = Account() args = self.js2args(table.xpath('.//a')[0].attrib['onclick']) account._args = args account.label = CleanText().filter(tds[0].xpath('./ancestor::table[has-class("tableaux-pret-personnel")]/caption')) account.id = account.label.split()[-1] + args['paramNumContrat'] loan_details = self.browser.open('/webapp/axabanque/jsp/panorama.faces', data=args).page # Need to go back on home page after open self.browser.bank_accounts.open() account.balance = loan_details.get_loan_balance() account.currency = loan_details.get_loan_currency() # Skip loans without any balance (already fully reimbursed) if empty(account.balance): continue account.type = Account.TYPE_LOAN account._acctype = "bank" account._hasinv = False account._is_debit_card = False yield account continue boxes = table.xpath('./tbody//tr[not(.//strong[contains(text(), "Total")])]') foot = table.xpath('./tfoot//tr') for box in boxes: account = Account() account._url = None if len(box.xpath('.//a')) != 0 and 'onclick' in box.xpath('.//a')[0].attrib: args = self.js2args(box.xpath('.//a')[0].attrib['onclick']) account.label = u'{0} {1}'.format(unicode(table.xpath('./caption')[0].text.strip()), unicode(box.xpath('.//a')[0].text.strip())) elif len(foot[0].xpath('.//a')) != 0 and 'onclick' in foot[0].xpath('.//a')[0].attrib: args = self.js2args(foot[0].xpath('.//a')[0].attrib['onclick']) account.label = unicode(table.xpath('./caption')[0].text.strip()) else: continue self.logger.debug('Args: %r' % args) if 'paramNumCompte' not in args: #The displaying of life insurances is very different from the other if args.get('idPanorama:_idcl').split(":")[1] == 'tableaux-direct-solution-vie': account_details = self.browser.open("#", data=args) scripts = account_details.page.doc.xpath('//script[@type="text/javascript"]/text()') script = filter(lambda x: "src" in x, scripts)[0] iframe_url = re.search("src:(.*),", script).group()[6:-2] account_details_iframe = self.browser.open(iframe_url, data=args) account.id = CleanText('//span[contains(@id,"NumeroContrat")]/text()')(account_details_iframe.page.doc) account._url = iframe_url account.type = account.TYPE_LIFE_INSURANCE account.balance = MyDecimal('//span[contains(@id,"MontantEpargne")]/text()')(account_details_iframe.page.doc) account._acctype = "bank" account._is_debit_card = False else: try: label = unicode(table.xpath('./caption')[0].text.strip()) except Exception: label = 'Unable to determine' self.logger.warning('Unable to get account ID for %r' % label) continue if account.type != account.TYPE_LIFE_INSURANCE: # get accounts type account_type_str = '' for l in table.attrib['class'].split(' '): if 'tableaux-comptes-' in l: account_type_str = l[len('tableaux-comptes-'):].lower() break account.type = Account.TYPE_UNKNOWN for pattern, type in self.ACCOUNT_TYPES.items(): if pattern in account_type_str or pattern in account.label.lower(): account.type = type break # get accounts id try: account.id = args['paramNumCompte'] + args['paramNumContrat'] if 'Visa' in account.label: card_id = re.search('(\d+)', box.xpath('./td[2]')[0].text.strip()) if card_id: account.id += card_id.group(1) if u'Valorisation' in account.label or u'Liquidités' in account.label: account.id += args[next(k for k in args.keys() if "_idcl" in k)].split('Jsp')[-1] except KeyError: account.id = args['paramNumCompte'] # get accounts balance try: balance_value = CleanText('.//td[has-class("montant")]')(box) # skip debit card # some cards don't have information in balance tab, skip them if balance_value == u'Débit immédiat' or balance_value == '': account._is_debit_card = True else: account._is_debit_card = False account.balance = Decimal(FrenchTransaction.clean_amount(self.parse_number(balance_value))) if account.type == Account.TYPE_CARD: account.coming = account.balance account.balance = Decimal(0) except InvalidOperation: #The account doesn't have a amount pass account._url = self.doc.xpath('//form[contains(@action, "panorama")]/@action')[0] account._acctype = "bank" # get accounts currency currency_title = table.xpath('./thead//th[@class="montant"]')[0].text.strip() m = re.match('Montant \((\w+)\)', currency_title) if not m: self.logger.warning('Unable to parse currency %r' % currency_title) else: account.currency = account.get_currency(m.group(1)) account._args = args account._hasinv = True if "Valorisation" in account.label else False yield account
def parse_decimal(self, td): value = CleanText('.')(td) if value and value != '-': return Decimal(FrenchTransaction.clean_amount(value)) else: return NotAvailable