def parse(self, el): if any(s in CleanText('.')(el) for s in ['Avoir disponible', 'Solde']) or self.page.is_inner(CleanText('.')(el)): self.env['category'] = 'Interne' else: self.env['category'] = 'Externe' if self.env['category'] == 'Interne': _id = CleanText(Attr('.', 'value'))(el) if _id == self.env['account_id']: raise SkipItem() try: account = find_object(self.page.browser.get_accounts_list(), id=_id, error=AccountNotFound) self.env['id'] = _id self.env['label'] = account.label self.env['iban'] = account.iban except AccountNotFound: self.env['id'] = Regexp(CleanText('.'), '- (.*?) -')(el).replace(' ', '') self.env['iban'] = NotAvailable label = CleanText('.')(el).split('-') holder = label[-1] if not any(string in label[-1] for string in ['Avoir disponible', 'Solde']) else label[-2] self.env['label'] = '%s %s' % (label[0].strip(), holder.strip()) self.env['bank_name'] = 'La Banque Postale' else: self.env['id'] = self.env['iban'] = Regexp(CleanText('.'), '- (.*?) -')(el).replace(' ', '') self.env['label'] = Regexp(CleanText('.'), '- (.*?) - (.*)', template='\\2')(el).strip() first_part = CleanText('.')(el).split('-')[0].strip() self.env['bank_name'] = 'La Banque Postale' if first_part in ['CCP', 'PEL'] else NotAvailable if self.env['id'] in self.parent.objects: # user add two recipients with same iban... raise SkipItem()
def parse(self, el): link = el.xpath('./td[1]/a')[0].get('href', '') if link.startswith('POR_SyntheseLst'): raise SkipItem() url = urlparse(link) p = parse_qs(url.query) if 'rib' not in p and 'webid' not in p: raise SkipItem() for td in el.xpath('./td[2] | ./td[3]'): try: balance = CleanDecimal('.', replace_dots=True)(td) except InvalidOperation: continue else: break else: raise ParseError('Unable to find balance for account %s' % CleanText('./td[1]/a')(el)) id = p['rib'][0] if 'rib' in p else p['webid'][0] # Handle cards if id in self.parent.objects: account = self.parent.objects[id] if not account.coming: account.coming = Decimal('0.0') account.coming += balance account._card_links.append(link) raise SkipItem() self.env['id'] = id # Handle real balances page = self.page.browser.open(link).page coming = page.find_amount( u"Opérations à venir") if page else None accounting = page.find_amount( u"Solde comptable") if page else None if accounting is not None and accounting + ( coming or Decimal('0')) != balance: self.page.logger.warning('%s + %s != %s' % (accounting, coming, balance)) if accounting is not None: balance = accounting self.env['balance'] = balance self.env['coming'] = coming or NotAvailable
def obj_id(self): iban = CleanText('./td[6]', replace=[(' ', '')])(self) iban_number = re.search(r'(?<=IBAN:)(\w+)BIC', iban) if iban_number: return iban_number.group(1) raise SkipItem('There is no IBAN for the recipient %s' % Field('label')(self))
def parse(self, el): if el.xpath('./td[2]/a'): m = re.search('(\d+)', el.xpath('./td[2]/a')[0].get('data-modal-alert-behavior', '')) if m: self.env['account']._history_pages.append((Field('raw')(self),\ self.page.browser.open('%s%s%s' % (self.page.url.split('mouvements')[0], 'mouvement/', m.group(1))).page)) raise SkipItem()
def obj_date(self): maybe_date = CleanText('./td[position()=2]')(self) if maybe_date == '-': raise SkipItem() return Date(CleanText('./td[position()=2]'), dayfirst=True)(self)
def obj_url(self): url = urljoin(self.page.browser.BASEURL, CleanText('./@data-url')(self)) self.page.browser.location(url) if self.page.doc.xpath('//form[contains(., "Afficher")]'): return url raise SkipItem()
def obj_id(self): type = Field('type')(self) if type == Account.TYPE_CARD: # When card is opposed it still appears on accounts page with a dead link and so, no id. Skip it. if Attr('.//a[has-class("account--name")]', 'href')(self) == '#': raise SkipItem() return self.obj__idparts()[1] id = Async( 'details', Regexp(CleanText('//h3[has-class("account-number")]'), r'(\d+)', default=NotAvailable))(self) if not id: raise SkipItem() return id
def parse(self, el): account = [ acc for acc in self.env['accounts'] if acc.id == Field('id')(self) ] if account: account[0]._card_links.append(Field('_link_id')(self)) raise SkipItem()
def obj_id(self): id = Async( 'details', Regexp(CleanText('//h3[has-class("account-number")]'), r'(\d+)', default=NotAvailable))(self) if not id: raise SkipItem() return id
def get_lifenumber(self): index = Dict('index')(self) data = json.loads( self.page.browser.redirect_insurance.open( accid=index).text) if not data: raise SkipItem('account seems unavailable') url = data['url'] page = self.page.browser.open(url).page return page.get_account_id()
def parse(self, el): page = Async('details').loaded_page(self) type = CleanText().filter( page.doc.xpath('//th[contains(text(), \ "Cadre fiscal")]/following-sibling::td[1]')) if not type: raise SkipItem() self.env['type'] = self.page.TYPES.get(type.lower(), Account.TYPE_UNKNOWN) self.env['page'] = page
def parse(self, el): link = Regexp(CleanText('./@onclick'), "'(.*)'")(el) id = Regexp(CleanText('./@onclick'), r'.*AGENCE=(\w+).*COMPTE=(\w+).*CLE=(\w+)', r'\1\2\3')(el) account = self.parent.objects[id] if not account.coming: account.coming = Decimal('0') account.coming += CleanDecimal('.//td[has-class("right")]', replace_dots=True)(el) account._coming_links.append(link) raise SkipItem()
def parse(self, el): _div = "//div/div/div[@id='popup']" div = el.xpath("%s" % _div)[0] if self.obj.id: event = self.obj event.url = self.page.url event.description = CleanHTML("%s/div/div[@class='presentation-popup']" % _div)(self) raise SkipItem() self.env['is_concert'] = (div.attrib['class'] != 'theatre-popup') self.env['url'] = self.page.url
def obj_date(self): # Filter the separator boards high_text = CleanText('./a/dl/dd[1]/strong')(self) if not high_text: raise SkipItem() return Format( '%s %s', CleanText('./a/dl/dt/b'), Date(CleanText('./a/dl/dt/text()[2]'), dayfirst=False))(self)
def obj_split_path(self): # parse first section link section_links = self.xpath( './following-sibling::div[has-class("chapter-content-container")]//a' ) if not section_links: raise SkipItem() url = section_links[0].get('href') url = self.page.browser.absurl(url) match = self.page.browser.section.match(url) courseid = self.env['course'].replace('/', '-') chapter = match.group('chapter') return [courseid, chapter]
def parse(self, el): key = Env('key', default=None)(self) if key and "DeferredDebit" in key: for x in Dict('%s/currentMonthCardList' % key)(self.page.doc): deferred_date = Dict('dateDiffere', default=None)(x) if deferred_date: break setattr(self.obj, '_deferred_date', self.FromTimestamp().filter(deferred_date)) # Skip duplicate transactions amount = Dict('montantEnEuro', default=None)(self) tr = Dict('libelleCourt')(self) + Dict('dateOperation')(self) + str(amount) if amount is None or (tr in self.page.browser.trs['list'] and self.page.browser.trs['lastdate'] <= Field('date')(self)): raise SkipItem() self.page.browser.trs['lastdate'] = Field('date')(self) self.page.browser.trs['list'].append(tr)
def parse(self, el): _id = Regexp(CleanText('.', replace=[(' ', '')]), '(\d+)', default=NotAvailable)(self) if _id and len(_id) >= min( len(acc.id) for acc in self.page.browser.get_accounts_list()): account = find_object( self.page.browser.get_accounts_list(), id=_id) if not account: accounts = [ acc for acc in self.page.browser.get_accounts_list() if acc.id in _id or _id in acc.id ] assert len(accounts) == 1 account = accounts[0] self.env['id'] = _id else: rcpt_label = CleanText('.')(self) account = None # the recipients selector contains "<type> - <label>" # the js contains "<part_of_id>", ... "<label>", ... "<type>" # the accounts list contains "<label>" and the id # put all this data together for params in self.page.iter_params_by_type('Emetteurs'): param_label = '%s - %s' % (params[8], params[5]) if param_label != rcpt_label: continue param_id = params[1] + params[2] + params[3] for ac in self.page.browser.get_accounts_list(): if ac.id in param_id: account = ac break if account is None: self.page.logger.warning( 'the internal %r recipient could not be found in js or accounts list', rcpt_label) raise SkipItem() self.env['id'] = account.id self.env['label'] = account.label self.env['iban'] = account.iban
def parse(self, el): link = el.xpath('./td[1]/a')[0].get('href', '') url = urlparse(link) p = parse_qs(url.query) if 'CPT_IdPrestation' in p: id = p['CPT_IdPrestation'][0] elif 'Ass_IdPrestation' in p: id = p['Ass_IdPrestation'][0] elif 'CB_IdPrestation' in p: id = p['CB_IdPrestation'][0] else: raise SkipItem() balance = CleanDecimal('./td[3]', replace_dots=True)(self) self.env['id'] = id self.env['balance'] = balance self.env['coming'] = NotAvailable
def parse(self, el): # We have only one history for all accounts... # And we know only on details page if it match current account. trid = CleanText(TableCell('id'))(self) if trid not in self.page.browser.cache['details']: # Thanks to stateful website : first go on details page... idt = Attr(TableCell('id')(self)[0].xpath('./a'), 'id', default=None)(self) typeop = Regexp( Attr(TableCell('id')(self)[0].xpath('./a'), 'onclick'), 'Operation.+?([A-Z_]+)')(self) form = self.page.get_history_form(idt, { 'referenceOp': trid, 'typeOperation': typeop }) page = self.page.browser.open(form.url, data=dict(form)).page self.page.browser.cache['details'][trid] = page # ...then go back to history list. idt = Attr('//input[@title="Retour"]', 'id', default=None)(page.doc) form = self.page.get_history_form(idt) self.page.browser.open(form.url, data=dict(form)).page else: page = self.page.browser.cache['details'][trid] # Check if page is related to the account if not len( page.doc.xpath('//td[contains(text(), $id)]', id=Env('accid')(self))): raise SkipItem() self.env['investments'] = list( page.get_investments(accid=Env('accid')(self))) self.env['amount'] = sum([ i.valuation or Decimal('0') for i in self.env['investments'] ])
def parse(self, el): subscriber = Async( 'details', CleanText( u'//span[contains(text(), "prénom / nom")]/following-sibling::span[1]' ))(self) self.env['subscriber'] = subscriber if subscriber else \ Async('details', Format('%s %s %s', \ CleanText(u'//*[contains(text(), "civilité")]/following-sibling::*[1]'), \ CleanText(u'//*[contains(text(), "prénom")]/following-sibling::*[1]'), \ CleanText(u'//*[text() = "nom :"]/following-sibling::*[1]')))(self) subid = Regexp(Attr('.', 'ecareurl', default="None"), 'idContrat=(\d+)', default=None)(self) self.env[ 'subid'] = subid if subid else self.page.browser.username self.env['multi'] = True if subid else False # Prevent from available account but no added in customer area if subid and not json.loads( self.page.browser.open(Attr( '.', 'ecareurl')(self)).content)['html']: raise SkipItem()
def parse(self, el): link = el.xpath('./td[1]/a')[0].get('href', '') if 'POR_SyntheseLst' in link: raise SkipItem() url = urlparse(link) p = parse_qs(url.query) if 'rib' not in p and 'webid' not in p: raise SkipItem() for td in el.xpath('./td[2] | ./td[3]'): try: balance = CleanDecimal('.', replace_dots=True)(td) except InvalidOperation: continue else: break else: raise ParseError('Unable to find balance for account %s' % CleanText('./td[1]/a')(el)) self.env['_is_webid'] = False if self.page.browser.is_new_website: id = CleanText( './td[1]/a/node()[contains(@class, "doux")]', replace=[(' ', '')])(el) else: if 'rib' in p: id = p['rib'][0] else: id = p['webid'][0] self.env['_is_webid'] = True page = self.page.browser.open(link).page # Handle cards if id in self.parent.objects: if page.is_fleet() or id in self.page.browser.fleet_pages: if not id in self.page.browser.fleet_pages: self.page.browser.fleet_pages[id] = [] self.page.browser.fleet_pages[id].append(page) else: account = self.parent.objects[id] if not account.coming: account.coming = Decimal('0.0') account.coming += balance account._card_links.append(link) raise SkipItem() self.env['id'] = id # Handle real balances coming = page.find_amount( u"Opérations à venir") if page else None accounting = page.find_amount( u"Solde comptable") if page else None if accounting is not None and accounting + ( coming or Decimal('0')) != balance: self.page.logger.warning('%s + %s != %s' % (accounting, coming, balance)) if accounting is not None: balance = accounting self.env['balance'] = balance self.env['coming'] = coming or NotAvailable
def obj_thumbnail(self): path = Attr('./a[has-class("thumbnail")]/img', 'src', default=None)(self) if path is None: raise SkipItem('not an image thread') return Thumbnail(urljoin(self.page.url, path))
def get_image_url(self): if self.doc.xpath('//video[@class="preview"]'): raise SkipItem('Videos are not implemented') return urljoin(self.url, Link('//a[img[@class="preview"]]')(self.doc))