def get_history(self): i = 0 ignore = False for tr in self.doc.xpath( '//table[@cellpadding="1"]/tr') + self.doc.xpath( '//tr[@class="rowClick" or @class="rowHover"]'): tds = tr.findall('td') if len(tds) < 4: continue # if there are more than 4 columns, ignore the first one. i = min(len(tds) - 4, 1) if tr.attrib.get('class', '') == 'DataGridHeader': if tds[2].text == u'Titulaire': ignore = True else: ignore = False continue if ignore: continue # Remove useless details detail = tr.cssselect('div.detail') if len(detail) > 0: detail[0].drop_tree() t = Transaction() date = u''.join([txt.strip() for txt in tds[i + 0].itertext()]) raw = u' '.join([txt.strip() for txt in tds[i + 1].itertext()]) debit = u''.join([txt.strip() for txt in tds[-2].itertext()]) credit = u''.join([txt.strip() for txt in tds[-1].itertext()]) t.parse(date, re.sub(r'[ ]+', ' ', raw)) card_debit_date = self.doc.xpath( u'//span[@id="MM_HISTORIQUE_CB_m_TableTitle3_lblTitle"] | //label[contains(text(), "débiter le")]' ) if card_debit_date: t.rdate = Date(dayfirst=True).filter(date) m = re.search('(\d{2}\/\d{2}\/\d{4})', card_debit_date[0].text) assert m t.date = Date(dayfirst=True).filter(m.group(1)) if t.date is NotAvailable: continue if 'tot dif' in t.raw.lower(): t.deleted = True t.set_amount(credit, debit) yield t i += 1