def on_loaded(self): self.operations = [] for tr in self.document.getiterator('tr'): if tr.attrib.get('class', '') == 'hdoc1' or tr.attrib.get('class', '') == 'hdotc1': tds = tr.findall('td') if len(tds) != 4: continue date = u'' date = tds[0].text label = u'' label += tds[1].text label = label.replace(u'\xa0', u'') for child in tds[1].getchildren(): if child.text: label += child.text if child.tail: label += child.tail if tds[1].tail: label += tds[1].tail label = label.strip() amount = tds[2].text.replace('.', '').replace(',', '.') # if we don't have exactly one '.', this is not a floatm try the next operation = Operation(len(self.operations)) if amount.count('.') != 1: amount = tds[3].text.replace('.', '').replace(',', '.') operation.amount = float(amount) else: operation.amount = - float(amount) operation.date = date operation.label = label self.operations.append(operation)
def get_history(self): mvt_table = self.document.xpath("//table[@id='mouvements']", smart_strings=False)[0] mvt_ligne = mvt_table.xpath("./tbody/tr") operations = [] for mvt in mvt_ligne: operation = Operation(len(operations)) operation.date = mvt.xpath("./td")[0].text tp = mvt.xpath("./td")[1] operation.label = remove_extra_spaces(remove_html_tags(self.browser.parser.tostring(tp))) r = re.compile(r"\d+") tp = mvt.xpath("./td/span") amount = None for t in tp: if r.search(t.text): amount = t.text amount = "".join(amount.replace(".", "").replace(",", ".").split()) if amount[0] == "-": operation.amount = -float(amount[1:]) else: operation.amount = float(amount) operations.append(operation) return operations
def get_history(self): index = 0 for tr in self.document.getiterator('tr'): first_td = tr.getchildren()[0] if first_td.attrib.get('class', '') == 'i g' or first_td.attrib.get('class', '') == 'p g': operation = Operation(index) index += 1 operation.date = first_td.text operation.label = tr.getchildren()[2].text.replace('\n',' ') if len(tr.getchildren()[3].text) > 2: s = tr.getchildren()[3].text elif len(tr.getchildren()[4].text) > 2: s = tr.getchildren()[4].text else: s = "0" balance = u'' for c in s: if c.isdigit() or c == "-": balance += c if c == ',': balance += '.' operation.amount = float(balance) yield operation
def on_loaded(self): self.operations = [] for tr in self.document.getiterator('tr'): if tr.attrib.get('class', '') == 'hdoc1' or tr.attrib.get('class', '') == 'hdotc1': tds = tr.findall('td') if len(tds) != 3: continue date = tds[0].getchildren()[0].attrib.get('name', '') label = u'' label += tds[1].text or u'' label = label.replace(u'\xa0', u'') for child in tds[1].getchildren(): if child.text: label += child.text if child.tail: label += child.tail if tds[1].tail: label += tds[1].tail label = label.strip() amount = tds[2].text.replace('.', '').replace(',', '.') operation = Operation(len(self.operations)) operation.date = date operation.label = label operation.amount = float(amount) self.operations.append(operation)
def get_history(self, start_index = 0): """ Returns the history of a specific account. Note that this function expects the current page page to be the one dedicated to this history. """ # tested on CA Lorraine, Paris, Toulouse # avoir parsing the page as an account-dedicated page if it is not the case if not self.is_account_page(): return index = start_index operation = False body_elmt_list = self.document.xpath('/html/body/*') # type of separator used in the page separators = 'hr' # How many <hr> elements do we have under the <body>? sep_expected = len(self.document.xpath('/html/body/hr')) if (not sep_expected): # no <hr>? Then how many class-less <div> used as separators instead? sep_expected = len(self.document.xpath('/html/body/div[not(@class) and not(@style)]')) separators = 'div' # the interesting divs are after the <hr> elements interesting_divs = [] right_div_count = 0 left_div_count = 0 sep_found = 0 for body_elmt in body_elmt_list: if (separators == 'hr' and body_elmt.tag == 'hr'): sep_found += 1 elif (separators == 'div' and body_elmt.tag == 'div' and body_elmt.get('class', 'nope') == 'nope'): sep_found += 1 elif (sep_found >= sep_expected and body_elmt.tag == 'div'): # we just want <div> with dv class and a style attribute if (body_elmt.get('class', '') != 'dv'): continue if (body_elmt.get('style', 'nope') == 'nope'): continue interesting_divs.append(body_elmt) if (self.is_right_aligned_div(body_elmt)): right_div_count += 1 else: left_div_count += 1 # So, how are data laid out? toulouse_way_of_life = (left_div_count == 2 * right_div_count) # we'll have: one left-aligned div for the date, one right-aligned # div for the amount, and one left-aligned div for the label. Each time. if (not toulouse_way_of_life): for body_elmt in interesting_divs: if (self.is_right_aligned_div(body_elmt)): # this is the second line of an operation entry, displaying the amount data = self.extract_text(body_elmt).replace(',', '.').replace(' ', '') matches = re.findall('^(-?[0-9]+\.[0-9]{2}).*$', data) operation.amount = float(matches[0]) if (matches) else 0.0 yield operation else: # this is the first line of an operation entry, displaying the date and label data = self.extract_text(body_elmt) matches = re.findall('^([012][0-9]|3[01])/(0[1-9]|1[012]).(.+)$', data) operation = Operation(index) index += 1 if (matches): operation.date = u'%s/%s' % (matches[0][0], matches[0][1]) operation.label = u'%s' % matches[0][2] else: operation.date = u'01/01' operation.label = u'Unknown' else: for i in range(0, len(interesting_divs)/3): operation = Operation(index) index += 1 # amount data = self.extract_text(interesting_divs[(i*3)+1]).replace(',', '.').replace(' ', '') matches = re.findall('^(-?[0-9]+\.[0-9]{2}).*$', data) operation.amount = float(matches[0]) if (matches) else 0.0 # date data = self.extract_text(interesting_divs[i*3]) matches = re.findall('^([012][0-9]|3[01])/(0[1-9]|1[012])', data) operation.date = u'%s/%s' % (matches[0][0], matches[0][1]) if (matches) else u'01/01' #label data = self.extract_text(interesting_divs[(i*3)+2]) data = re.sub(' +', ' ', data) operation.label = u'%s' % data yield operation