def filter(self, el): index = 1 if len(el) > 1 else 0 content = CleanText.clean(CleanText('.', ['HORAIRES'])(el[index])) a_time = content.split(' - ')[0] regexp = re.compile(ur'(?P<hh>\d+)h?(?P<mm>\d+)') m = regexp.search(a_time) return time(int(m.groupdict()['hh'] or 0), int(m.groupdict()['mm'] or 0))
def filter(self, el): index = 1 if len(el) > 1 else 0 content = CleanText.clean(CleanText(".", ["HORAIRES"])(el[index])) a_time = content.split(" - ")[0] regexp = re.compile(ur"(?P<hh>\d+)h?(?P<mm>\d+)") m = regexp.search(a_time) return time(int(m.groupdict()["hh"] or 0), int(m.groupdict()["mm"] or 0))
def filter(self, el): index = 1 if len(el) > 1 else 0 content = CleanText.clean(CleanText('.', ['HORAIRES'])(el[index])) a_price = content.split(' - ')[-1] parsed_price = re.findall(r"\d*\,\d+|\d+", " ".join(a_price)) if parsed_price and len(parsed_price) > 0: return float(parsed_price[0].replace(',', '.')) return float(0)
def filter(self, el): content = CleanText.clean(CleanText(CleanHTML('.'), ['*'])(el[0])) a_date = content[0:content.index(' - ')] for fr, en in date_util.DATE_TRANSLATE_FR: a_date = fr.sub(en, a_date) try: _month = datetime.strptime(a_date, "%A %d %B").month if (datetime.now().month > _month): a_date += u' %i' % (datetime.now().year + 1) else: a_date += u' %i' % (datetime.now().year) except ValueError: pass return datetime.strptime(a_date, "%A %d %B %Y")
def next_page(self): try: form = self.page.get_form('//form[@id="paginationForm"]') except FormNotFound: return text = CleanText.clean(form.el) m = re.search(u'(\d+) / (\d+)', text or '', flags=re.MULTILINE) if not m: return cur = int(m.group(1)) last = int(m.group(2)) if cur == last: return form['page'] = str(cur + 1) return form.request
def get_list(self): account = None for cpt in self.document.xpath( '//a[@class="synthese_id_compte" or @class="synthese_carte_differe"]' ): url_to_parse = cpt.xpath('@href')[0].replace("\n", "") # link # account._link_id = lien vers historique d'un compte (courant ou livret) if '/mes-comptes/livret/' in url_to_parse: compte_id_re = re.compile(r'.*\?(.*)$') link_id = '/fr/prive/mes-comptes/livret/consulter-situation/consulter-solde.jsp?%s' % \ (compte_id_re.search(url_to_parse).groups()[0]) else: link_id = url_to_parse number = cpt.xpath('./span[@class="synthese_numero_compte"]') if len(number) == 0: account._card_links.append(link_id) continue account = Account() account.id = self.parser.tocleanstring(number[0]).replace( u'N°', '') try: balance = self.parser.tocleanstring( cpt.xpath('./span[contains(@class, "synthese_solde")]')[0]) except IndexError: continue account.balance = Decimal(Transaction.clean_amount(balance)) account.currency = account.get_currency(balance) account._link_id = link_id account._card_links = [] account.label = (' '.join([ CleanText.clean(part) for part in cpt.xpath('./text()') ])).strip(' - ').strip() for pattern, type in self.ACCOUNT_TYPES.iteritems(): if pattern in account._link_id: account.type = type yield account
def get_list(self): account = None for cpt in self.document.xpath('//a[@class="synthese_id_compte" or @class="synthese_carte_differe"]'): url_to_parse = cpt.xpath('@href')[0].replace("\n", "") # link # account._link_id = lien vers historique d'un compte (courant ou livret) if '/mes-comptes/livret/' in url_to_parse: compte_id_re = re.compile(r'.*\?(.*)$') link_id = '/fr/prive/mes-comptes/livret/consulter-situation/consulter-solde.jsp?%s' % \ (compte_id_re.search(url_to_parse).groups()[0]) else: link_id = url_to_parse number = cpt.xpath('./span[@class="synthese_numero_compte"]') if len(number) == 0: account._card_links.append(link_id) continue account = Account() account.id = self.parser.tocleanstring(number[0]).replace(u'N°', '') try: balance = self.parser.tocleanstring(cpt.xpath('./span[contains(@class, "synthese_solde")]')[0]) except IndexError: continue account.balance = Decimal(Transaction.clean_amount(balance)) account.currency = account.get_currency(balance) account._link_id = link_id account._card_links = [] account.label = (' '.join([CleanText.clean(part) for part in cpt.xpath('./text()')])).strip(' - ').strip() for pattern, type in self.ACCOUNT_TYPES.iteritems(): if pattern in account._link_id: account.type = type yield account