def get_list(self): blocks = self.document.xpath('//div[@id="synthese-list"]//div[@class="block"]') for div in blocks: block_title = "".join(div.xpath('.//span[@class="title"]/a/text()')).lower().strip() for tr in div.getiterator("tr"): account = Account() account.id = None account._link_id = None account.type = self.ACCOUNT_TYPES.get(block_title, Account.TYPE_UNKNOWN) if "assurance vie" in block_title: # Life insurance accounts are investments account.type = Account.TYPE_LIFE_INSURANCE for td in tr.getiterator("td"): if td.get("class", "") == "account-cb": try: a = td.xpath('./*/a[@class="gras"]')[0] except IndexError: # ignore account break account.type = Account.TYPE_CARD account.label, account.id = [s.strip() for s in self.parser.tocleanstring(td).rsplit("-", 1)] # Sometimes there is text after the card number: # <a class="gras" href="/comptes/banque/cartes/index.phtml?CompteCourant=ulietuliedtlueditluedt&currentCB=ruisecruicertuci"> # CARTE PREMIER </a> # <br>MACHIN BIDULE TRUC - 1111********1111 # # <br> # <strong><a href="/aide/faq/index.phtml?document_id=472">Son échéance est le <span style="color:#ff8400; font-weight:bold;">31/03/2015</span>.<br>En savoir plus</a></strong> # So we have to remove all the shit after it. account.id = account.id.split(" ")[0] try: account._link_id = td.xpath(".//a")[0].get("href") # Try to use account._link_id for account.id to prevent duplicate accounts currentCB = re.search("currentCB=(.*)", account._link_id) if currentCB: account.id = currentCB.group(1) except KeyError: pass elif td.get("class", "") == "account-name": try: span = td.xpath('./span[@class="label"]')[0] except IndexError: # ignore account break account.label = self.parser.tocleanstring(span) account.id = self.parser.tocleanstring(td).rsplit("-", 1)[-1].strip() try: account._link_id = td.xpath(".//a")[0].get("href") account._detail_url = account._link_id except KeyError: pass elif td.get("class", "") == "account-more-actions": for a in td.getiterator("a"): # For normal account, two "account-more-actions" # One for the account, one for the credit card. Take the good one if ( "href" in a.attrib and "mouvements.phtml" in a.get("href") and "/cartes/" not in a.get("href") ): account._link_id = a.get("href") elif td.get("class", "") == "account-number": id = td.text id = id.strip(u" \n\t") account.id = id elif td.get("class", "") == "account-total": span = td.find("span") if span is None: balance = td.text else: balance = span.text account.currency = account.get_currency(balance) balance = FrenchTransaction.clean_amount(balance) if balance != "": account.balance = Decimal(balance) else: account.balance = Decimal(0) else: # because of some weird useless <tr> if account.id is not None and (not account._link_id or "moneycenter" not in account._link_id): yield account
def get_list(self): blocks = self.document.xpath('//div[@id="synthese-list"]//div[@class="block"]') for div in blocks: block_title = ''.join(div.xpath('.//span[@class="title"]/a/text()')).lower().strip() for tr in div.getiterator('tr'): account = Account() account.id = None account._link_id = None account.type = self.ACCOUNT_TYPES.get(block_title, Account.TYPE_UNKNOWN) if 'assurance vie' in block_title: # Life insurance accounts are investments account.type = Account.TYPE_LIFE_INSURANCE for td in tr.getiterator('td'): if td.get('class', '') == 'account-cb': try: a = td.xpath('./*/a[@class="gras"]')[0] except IndexError: # ignore account break account.type = Account.TYPE_CARD account.label, account.id = [s.strip() for s in self.parser.tocleanstring(td).rsplit('-', 1)] # Sometimes there is text after the card number: # <a class="gras" href="/comptes/banque/cartes/index.phtml?CompteCourant=ulietuliedtlueditluedt&currentCB=ruisecruicertuci"> # CARTE PREMIER </a> # <br>MACHIN BIDULE TRUC - 1111********1111 # # <br> # <strong><a href="/aide/faq/index.phtml?document_id=472">Son échéance est le <span style="color:#ff8400; font-weight:bold;">31/03/2015</span>.<br>En savoir plus</a></strong> # So we have to remove all the shit after it. account.id = account.id.split(' ')[0] try: account._link_id = td.xpath('.//a')[0].get('href') # Try to use account._link_id for account.id to prevent duplicate accounts currentCB = re.search('currentCB=(.*)', account._link_id) if currentCB: account.id = currentCB.group(1) except KeyError: pass elif td.get('class', '') == 'account-name': try: span = td.xpath('./span[@class="label"]')[0] except IndexError: # ignore account break account.label = self.parser.tocleanstring(span) account.id = self.parser.tocleanstring(td).rsplit('-', 1)[-1].strip() try: account._link_id = td.xpath('.//a')[0].get('href') account._detail_url = account._link_id except KeyError: pass elif td.get('class', '') == 'account-more-actions': for a in td.getiterator('a'): # For normal account, two "account-more-actions" # One for the account, one for the credit card. Take the good one if 'href' in a.attrib and "mouvements.phtml" in a.get('href') and "/cartes/" not in a.get('href'): account._link_id = a.get('href') elif td.get('class', '') == 'account-number': id = td.text id = id.strip(u' \n\t') account.id = id elif td.get('class', '') == 'account-total': span = td.find('span') if span is None: balance = td.text else: balance = span.text account.currency = account.get_currency(balance) balance = FrenchTransaction.clean_amount(balance) if balance != "": account.balance = Decimal(balance) else: account.balance = Decimal(0) else: # because of some weird useless <tr> if account.id is not None and (not account._link_id or 'moneycenter' not in account._link_id): yield account
def get_list(self): blocks = self.document.xpath('//div[@id="synthese-list"]//div[@class="block"]') for div in blocks: block_title = ''.join(div.xpath('.//span[@class="title"]//text()')).lower() for tr in div.getiterator('tr'): account = Account() account.id = None account._link_id = None if 'assurance vie' in block_title: # Life insurance accounts are investments account.type = Account.TYPE_MARKET for td in tr.getiterator('td'): if td.get('class', '') == 'account-cb': try: a = td.xpath('./*/a[@class="gras"]')[0] except IndexError: # ignore account break account.type = Account.TYPE_CARD account.label = self.parser.tocleanstring(a) try: account._link_id = td.xpath('.//a')[0].get('href') except KeyError: pass elif td.get('class', '') == 'account-name': try: span = td.xpath('./span[@class="label"]')[0] except IndexError: # ignore account break account.label = self.parser.tocleanstring(span) try: account._link_id = td.xpath('.//a')[0].get('href') account._detail_url = account._link_id except KeyError: pass elif td.get('class', '') == 'account-more-actions': for a in td.getiterator('a'): # For normal account, two "account-more-actions" # One for the account, one for the credit card. Take the good one if "mouvements.phtml" in a.get('href') and "/cartes/" not in a.get('href'): account._link_id = a.get('href') elif td.get('class', '') == 'account-number': id = td.text id = id.strip(u' \n\t') account.id = id elif td.get('class', '') == 'account-total': span = td.find('span') if span is None: balance = td.text else: balance = span.text account.currency = account.get_currency(balance) balance = FrenchTransaction.clean_amount(balance) if balance != "": account.balance = Decimal(balance) else: account.balance = Decimal(0) else: # because of some weird useless <tr> if account.id is not None: yield account
def get_list(self): blocks = self.document.xpath( '//div[@id="synthese-list"]//div[@class="block"]') for div in blocks: block_title = ''.join( div.xpath('.//span[@class="title"]//text()')).lower() for tr in div.getiterator('tr'): account = Account() account.id = None account._link_id = None if 'assurance vie' in block_title: # Life insurance accounts are investments account.type = Account.TYPE_MARKET for td in tr.getiterator('td'): if td.get('class', '') == 'account-cb': try: a = td.xpath('./*/a[@class="gras"]')[0] except IndexError: # ignore account break account.type = Account.TYPE_CARD account.label, account.id = [ s.strip() for s in self.parser.tocleanstring(td).rsplit( '-', 1) ] try: account._link_id = td.xpath('.//a')[0].get('href') except KeyError: pass elif td.get('class', '') == 'account-name': try: span = td.xpath('./span[@class="label"]')[0] except IndexError: # ignore account break account.label = self.parser.tocleanstring(span) account.id = self.parser.tocleanstring(td).rsplit( '-', 1)[-1].strip() try: account._link_id = td.xpath('.//a')[0].get('href') account._detail_url = account._link_id except KeyError: pass elif td.get('class', '') == 'account-more-actions': for a in td.getiterator('a'): # For normal account, two "account-more-actions" # One for the account, one for the credit card. Take the good one if 'href' in a.attrib and "mouvements.phtml" in a.get( 'href') and "/cartes/" not in a.get( 'href'): account._link_id = a.get('href') elif td.get('class', '') == 'account-number': id = td.text id = id.strip(u' \n\t') account.id = id elif td.get('class', '') == 'account-total': span = td.find('span') balance = td.text if span is None else span.text account.currency = account.get_currency(balance) balance = FrenchTransaction.clean_amount(balance) account.balance = Decimal( balance) if balance != "" else Decimal(0) else: # because of some weird useless <tr> if account.id is not None and (not account._link_id or 'moneycenter' not in account._link_id): yield account
def get_list(self): blocks = self.document.xpath('//div[@id="synthese-list"]//div[@class="block"]') for div in blocks: block_title = ''.join(div.xpath('.//span[@class="title"]//text()')).lower() for tr in div.getiterator('tr'): account = Account() account.id = None account._link_id = None if 'assurance vie' in block_title: # Life insurance accounts are investments account.type = Account.TYPE_LIFE_INSURANCE for td in tr.getiterator('td'): if td.get('class', '') == 'account-cb': try: a = td.xpath('./*/a[@class="gras"]')[0] except IndexError: # ignore account break account.type = Account.TYPE_CARD account.label, account.id = [s.strip() for s in self.parser.tocleanstring(td).rsplit('-', 1)] # Sometimes there is text after the card number: # <a class="gras" href="/comptes/banque/cartes/index.phtml?CompteCourant=ulietuliedtlueditluedt&currentCB=ruisecruicertuci"> # CARTE PREMIER </a> # <br>MACHIN BIDULE TRUC - 1111********1111 # # <br> # <strong><a href="/aide/faq/index.phtml?document_id=472">Son échéance est le <span style="color:#ff8400; font-weight:bold;">31/03/2015</span>.<br>En savoir plus</a></strong> # So we have to remove all the shit after it. account.id = account.id.split(' ')[0] try: account._link_id = td.xpath('.//a')[0].get('href') except KeyError: pass elif td.get('class', '') == 'account-name': try: span = td.xpath('./span[@class="label"]')[0] except IndexError: # ignore account break account.label = self.parser.tocleanstring(span) account.id = self.parser.tocleanstring(td).rsplit('-', 1)[-1].strip() try: account._link_id = td.xpath('.//a')[0].get('href') account._detail_url = account._link_id except KeyError: pass elif td.get('class', '') == 'account-more-actions': for a in td.getiterator('a'): # For normal account, two "account-more-actions" # One for the account, one for the credit card. Take the good one if 'href' in a.attrib and "mouvements.phtml" in a.get('href') and "/cartes/" not in a.get('href'): account._link_id = a.get('href') elif td.get('class', '') == 'account-number': id = td.text id = id.strip(u' \n\t') account.id = id elif td.get('class', '') == 'account-total': span = td.find('span') if span is None: balance = td.text else: balance = span.text account.currency = account.get_currency(balance) balance = FrenchTransaction.clean_amount(balance) if balance != "": account.balance = Decimal(balance) else: account.balance = Decimal(0) else: # because of some weird useless <tr> if account.id is not None and (not account._link_id or not 'moneycenter' in account._link_id): yield account
def get_list(self): blocks = self.document.xpath('//div[@id="synthese-list"]//div[@class="block"]') for div in blocks: block_title = "".join(div.xpath('.//span[@class="title"]//text()')).lower() for tr in div.getiterator("tr"): account = Account() account.id = None account._link_id = None if "assurance vie" in block_title: # Life insurance accounts are investments account.type = Account.TYPE_MARKET for td in tr.getiterator("td"): if td.get("class", "") == "account-cb": try: a = td.xpath('./*/a[@class="gras"]')[0] except IndexError: # ignore account break account.type = Account.TYPE_CARD account.label, account.id = [s.strip() for s in self.parser.tocleanstring(td).rsplit("-", 1)] try: account._link_id = td.xpath(".//a")[0].get("href") except KeyError: pass elif td.get("class", "") == "account-name": try: span = td.xpath('./span[@class="label"]')[0] except IndexError: # ignore account break account.label = self.parser.tocleanstring(span) account.id = self.parser.tocleanstring(td).rsplit("-", 1)[-1].strip() try: account._link_id = td.xpath(".//a")[0].get("href") account._detail_url = account._link_id except KeyError: pass elif td.get("class", "") == "account-more-actions": for a in td.getiterator("a"): # For normal account, two "account-more-actions" # One for the account, one for the credit card. Take the good one if "mouvements.phtml" in a.get("href") and "/cartes/" not in a.get("href"): account._link_id = a.get("href") elif td.get("class", "") == "account-number": id = td.text id = id.strip(u" \n\t") account.id = id elif td.get("class", "") == "account-total": span = td.find("span") if span is None: balance = td.text else: balance = span.text account.currency = account.get_currency(balance) balance = FrenchTransaction.clean_amount(balance) if balance != "": account.balance = Decimal(balance) else: account.balance = Decimal(0) else: # because of some weird useless <tr> if account.id is not None: yield account