def get_renew_date(self, subscription): div = self.doc.xpath('//div[@login="******"]' % subscription._login)[0] mydate = Date(CleanText('//div[@class="resumeConso"]/span[@class="actif"][1]'), dayfirst=True)(div) if mydate.month == 12: mydate = mydate.replace(month=1) mydate = mydate.replace(year=mydate.year + 1) else: try: mydate = mydate.replace(month=mydate.month + 1) except ValueError: lastday = calendar.monthrange(mydate.year, mydate.month + 1)[1] mydate = mydate.replace(month=mydate.month + 1, day=lastday) return mydate
class item(ItemElement): klass = BaseCalendarEvent obj_url = Link('./a[1]') obj_id = Regexp(Link('./a[1]'), r'\?(\d+)') obj_summary = CleanText('./a[1]') obj_start_date = Date(CleanText('./span[1]')) obj_category = CATEGORIES.CONCERT obj_status = STATUS.CONFIRMED
class item(ItemElement): klass = Transaction obj_date = Date(CleanText(TableCell('date')), Env('date_guesser')) obj_type = Transaction.TYPE_UNKNOWN obj_id = CleanText(TableCell('reference')) obj_label = CleanText(TableCell('type')) obj_amount = CleanDecimal(CleanHTML(TableCell('montant')), replace_dots=True, default=NotAvailable)
class item(ItemElement): klass = Transaction condition = lambda self: len(self.el.xpath('td[@class="impaire"]') ) > 0 obj_raw = Transaction.Raw('td[4] | td[3]') obj_date = Date(CleanText('td[2]'), dayfirst=True) obj_amount = CleanDecimal('td[7]', replace_dots=True)
class get_arte_cinema_video(ArteItemElement): def __init__(self, *args, **kwargs): super(ArteItemElement, self).__init__(*args, **kwargs) self.el = self.el.get('videoJsonPlayer') klass = ArteSiteVideo obj__site = SITE.CINEMA.get('id') obj_date = Date(Dict('VRA', default=''), default=NotAvailable)
class item(ItemElement): klass = Transaction # This is 'Date de valeur' obj_date = Date(Dict('dVl'), dayfirst=True) obj__date = Date(Dict('date', default=None), dayfirst=True, default=NotAvailable) obj_coming = False obj_raw = Transaction.Raw( Format('%s %s %s', Dict('l1'), Dict('l2'), Dict('l3'))) # We have l4 and l5 too most of the time, but it seems to be unimportant and would make label too long. #tr.label = ' '.join([' '.join(transaction[l].strip().split()) for l in ['l1', 'l2', 'l3']]) def obj_amount(self): return CleanDecimal(Dict('c', default=None), replace_dots=True, default=None)(self) or \ CleanDecimal(Dict('d'), replace_dots=True)(self)
class item(BnpHistoryItem): klass = Transaction obj_type = Transaction.TYPE_DEFERRED_CARD obj_date = obj_vdate = Date(Dict('valeur'), dayfirst=True) def obj_amount(self): amount = Dict('debit', default=None)(self) or Dict('credit')(self) return Eval(lambda x, y: x / 10**y, Decimal(amount['montant']), Decimal(amount['nb_dec']))(self)
class item(ItemElement): klass = Bill obj_id = Format('%s.%s', Env('subid'), Dict('orderId')) obj_date = Date(Dict('billingDate')) obj_format = u"pdf" obj_type = u"bill" obj_price = CleanDecimal(Dict('priceWithTax/value')) obj__url = Dict('pdfUrl')
class item(ItemElement): klass = Bill obj_type = DocumentTypes.BILL obj_format = "pdf" # TableCell('date') can have other info like: 'duplicata' obj_date = Date(CleanText('./td[@headers="ec-dateCol"]/text()[not(preceding-sibling::br)]'), parse_func=parse_french_date, dayfirst=True) def obj__cell(self): # sometimes the link to the bill is not in the right column (Thanks Orange!!) if CleanText(TableCell('url')(self))(self): return 'url' return 'infos' def obj_price(self): if CleanText(TableCell('amount')(self))(self): return CleanDecimal(Regexp(CleanText(TableCell('amount')), '.*?([\d,]+).*', default=NotAvailable), replace_dots=True, default=NotAvailable)(self) else: return Field('_ht')(self) def obj_currency(self): if CleanText(TableCell('amount')(self))(self): return Currency(TableCell('amount')(self))(self) else: return Currency(TableCell('ht')(self))(self) # Only when a list of documents is present obj__url_base = Regexp(CleanText('.//ul[@class="liste"]/script', default=None), '.*?contentList[\d]+ \+= \'<li><a href=".*\"(.*?idDocument=2)"', default=None) def obj_url(self): if Field('_url_base')(self): # URL won't work if HTML is not unescape return HTMLParser().unescape(str(Field('_url_base')(self))) return Link(TableCell(Field('_cell')(self))(self)[0].xpath('./a'), default=NotAvailable)(self) obj__label_base = Regexp(CleanText('.//ul[@class="liste"]/script', default=None), '.*</span>(.*?)</a.*', default=None) def obj_label(self): if Field('_label_base')(self): return HTMLParser().unescape(str(Field('_label_base')(self))) else: return CleanText(TableCell(Field('_cell')(self))(self)[0].xpath('.//span[@class="ec_visually_hidden"]'))(self) obj__ht = CleanDecimal(TableCell('ht', default=NotAvailable), replace_dots=True, default=NotAvailable) def obj_vat(self): if Field('_ht')(self) is NotAvailable or Field('price')(self) is NotAvailable: return return Field('price')(self) - Field('_ht')(self) def obj_id(self): if Field('price')(self) is NotAvailable: return '%s_%s%s' % (Env('subid')(self), Field('date')(self).strftime('%d%m%Y'), Field('_ht')(self)) else: return '%s_%s%s' % (Env('subid')(self), Field('date')(self).strftime('%d%m%Y'), Field('price')(self))
def get_history(self, account): for i, tr in self.parse_transactions(): t = self.TRANSACTION() if account.type is Account.TYPE_CARD: date = vdate = Date(dayfirst=True, default=None).filter(tr['dateval']) t.bdate = Date(dayfirst=True, default=NotAvailable).filter(tr['date']) else: date = Date(dayfirst=True, default=None).filter(tr['date']) vdate = Date(dayfirst=True, default=None).filter(tr['dateval']) or date raw = MyStrip(' '.join([tr['typeope'], tr['LibComp']])) t.parse(date, raw, vdate) t.set_amount(tr['mont']) self.detect_currency(t, raw, account.currency) if self.condition(t, account.type): continue yield t
def obj_rdate(self): if self.obj.rdate: # Transaction.Raw may have already set it return self.obj.rdate s = Regexp(Field('raw'), ' (\d{2}/\d{2}/\d{2}) | (?!NUM) (\d{6}) ', default=NotAvailable)(self) if not s: return Field('date')(self) s = s.replace('/', '') return Date(dayfirst=True).filter('%s%s%s%s%s' % (s[:2], '-', s[2:4], '-', s[4:]))
class item(ItemElement): klass = Investment obj_label = CleanText(TableCell('label')) obj_quantity = CleanDecimal(TableCell('quantity'), replace_dots=True, default=NotAvailable) obj_unitvalue = CleanDecimal(TableCell('unitvalue'), replace_dots=True, default=NotAvailable) obj_valuation = CleanDecimal(TableCell('valuation'), replace_dots=True) obj_vdate = Date(CleanText(TableCell('vdate')), dayfirst=True) obj__code_url = Regexp(Attr('./td/a', 'onclick', default=""), r'PageExterne\(\'([^\']+)', default=None)
class item(ItemElement): klass = Investment obj_label = Dict('libelleFonds') obj_unitvalue = Dict('vl') & CleanDecimal obj_quantity = Dict('nbParts') & CleanDecimal obj_valuation = Dict('mtBrut') & CleanDecimal obj_code = Dict('codeIsin', default=NotAvailable) obj_vdate = Date(Dict('dtVl')) obj_diff = Dict('mtPMV') & CleanDecimal
class item(ItemElement): klass = Investment obj_label = CleanText(TableCell('label')) obj_code = Regexp(Link('./td/a'), 'Isin%253D([^%]+)') obj_quantity = MyDecimal(TableCell('quantity')) obj_unitprice = MyDecimal(TableCell('unitprice')) obj_unitvalue = MyDecimal(TableCell('unitvalue')) obj_valuation = MyDecimal(TableCell('valuation')) obj_vdate = Date(CleanText(TableCell('vdate')), dayfirst=True, default=NotAvailable)
class item(ItemElement): klass = Transaction def condition(self): return u'Aucune opération en attente' not in CleanText(TableCell('date'))(self) obj_date = Date(CleanText(TableCell('date')), Env('date_guesser')) obj_type = Transaction.TYPE_UNKNOWN obj_label = CleanText(TableCell('operation')) obj_amount = CleanDecimal(TableCell('montant'), replace_dots=True)
def obj_publication_date(self): _date = CleanText('p[@class="infos"]') try: return Date(_date)(self) except ParseError: str_date = _date(self) if 'hier' in str_date: return date.today() - timedelta(days=1) else: return date.today()
class item(ItemElement): klass = Document obj_id = Format('%s_%s', Env('subid'), Dict('identifiantDocument/identifiant')) obj_date = Date(Dict('dateCreation')) obj_label = Dict('libelle') obj_format = 'pdf' obj_type = DocumentTypes.STATEMENT obj_url = Dict('_links/document/href')
class item(ItemElement): klass = Bill obj_id = Format('%s.%s', Env('subid'), Dict('orderId')) obj_date = Date(Dict('billingDate')) obj_format = u"pdf" obj_type = DocumentTypes.BILL obj_price = CleanDecimal(Dict('priceWithTax/value')) obj_url = Dict('pdfUrl') obj_label = Format('Facture %s', Dict('orderId'))
class item(ItemElement): klass = Transaction obj_rdate = Date(Dict('date', default=None), dayfirst=True, default=NotAvailable) obj_date = Date(Dict('dVl', default=None), dayfirst=True, default=NotAvailable) obj__coming = False # Label is split into l1, l2, l3, l4, l5. # l5 is needed for transfer label, for example: # 'l1': "000001 VIR EUROPEEN EMIS NET" # 'l2': "POUR: XXXXXXXXXXXXX" # 'l3': "REF: XXXXXXXXXXXXXX" # 'l4': "REMISE: XXXXXX TRANSFER LABEL" # 'l5': "MOTIF: TRANSFER LABEL" obj_raw = Transaction.Raw( Format( '%s %s %s %s %s', Dict('l1'), Dict('l2'), Dict('l3'), Dict('l4'), Dict('l5'), )) # keep the 3 first rows for transaction label obj_label = Transaction.Raw( Format( '%s %s %s', Dict('l1'), Dict('l2'), Dict('l3'), )) def obj_amount(self): return CleanDecimal(Dict('c', default=None), replace_dots=True, default=None)(self) or \ CleanDecimal(Dict('d'), replace_dots=True)(self) def obj_deleted(self): return self.obj.type == FrenchTransaction.TYPE_CARD_SUMMARY
def parse(self, el): self.env['date'] = Date(Regexp(CleanText(u'//td[contains(text(), "Total prélevé")]'), ' (\d{2}/\d{2}/\d{4})', \ default=NotAvailable), default=NotAvailable)(self) \ or (parse_french_date('%s %s' % ('1', CleanText(u'//select[@id="moi"]/option[@selected]')(self))) + relativedelta(day=31)).date() self.env['_is_coming'] = date.today() < self.env['date'] amount = CleanText( TableCell('amount'))(self).split('dont frais') self.env['amount'] = amount[0] self.env['commission'] = amount[1] if len( amount) > 1 else NotAvailable
def obj_rdate(self): date = Field('date')(self) # rdate is already set by `obj_raw` and the patterns. rdate = self.obj.rdate date_operation = Date(Dict('dateOperation'))(self) if rdate == date and date_operation < date: return date_operation elif rdate != date: return rdate return NotAvailable
def obj_date(self): # Dates in the first column may appear as '12/01/2019' or '12/01' date = CleanText('./td[1]/font//text()')(self) if len(date) == 10: return Date(CleanText('./td[1]/font//text()'), dayfirst=True)(self) elif len(date) == 5: # Date has no indicated year. return DateGuesser(CleanText('./td[1]//text()'), Env('date_guesser'))(self)
class item(ItemElement): klass = Document obj_date = Date(Dict('dateDocument')) obj_format = "pdf" obj_label = Format("%s : %s", Dict('libelle1'), Dict('libelle3')) obj_type = CleanText(Dict('libelleIcone'), replace=[('Icône ', '')]) obj_id = Regexp(Dict('libelle2'), r"(\S+)\.", nth=0) obj_url = Format("/prive/telechargerdocumentremuneration/v1?documentUuid=%s", Dict('documentUuid'))
def obj_date(self): date = Date(Attr('.//time', 'datetime'))(self) if Env('is_card', default=False)(self): if self.page.browser.deferred_card_calendar is None: self.page.browser.location( Link('//a[contains(text(), "calendrier")]')(self)) closest = self.page.browser.get_debit_date(date) if closest: return closest return date
def obj_details(self): details = {} energy_value = CleanText( '//div[has-class("offer-energy-greenhouseeffect-summary")]//div[has-class("energy-summary")]', default=None )(self) if energy_value and len(energy_value) > 1: energy_value = energy_value.replace("DPE", "").strip()[0] if energy_value not in ["A", "B", "C", "D", "E", "F", "G"]: energy_value = None if energy_value is None: energy_value = NotAvailable details["DPE"] = energy_value greenhouse_value = CleanText( '//div[has-class("offer-energy-greenhouseeffect-summary")]//div[has-class("greenhouse-summary")]', default=None )(self) if greenhouse_value and len(greenhouse_value) > 1: greenhouse_value = greenhouse_value.replace("GES", "").strip()[0] if greenhouse_value not in ["A", "B", "C", "D", "E", "F", "G"]: greenhouse_value = None if greenhouse_value is None: greenhouse_value = NotAvailable details["GES"] = greenhouse_value details["creationDate"] = Date( Regexp( CleanText( '//p[@class="offer-description-notes"]|//p[has-class("darkergrey")]' ), u'.*Mis en ligne : (\d{2}/\d{2}/\d{4}).*' ), dayfirst=True )(self) honoraires = CleanText( ( '//div[has-class("offer-price")]/span[has-class("lbl-agencyfees")]' ), default=None )(self) if honoraires: details["Honoraires"] = ( "{} (TTC, en sus)".format( honoraires.split(":")[1].strip() ) ) for li in XPath('//ul[@itemprop="description"]/li')(self): label = CleanText('./div[has-class("criteria-label")]')(li) value = CleanText('./div[has-class("criteria-value")]')(li) details[label] = value return details
class get_torrent(ItemElement): klass = Torrent obj_id = Regexp(CleanText('//div[@class="trackers"]/h2'), r'hash ([0-9a-f]+)', '\\1') obj_name = CleanText('//div[@class="downlinks"]/h2/span') obj_date = CleanText('//div[@class="downlinks"]/div/span/@title') & Date(default=None) obj_size = CleanText('//div[@class="files"]/div/@title', replace=[(',', ''), ('b', '')]) & \ Type(type=float) def obj_seeders(self): try: return max([int(_.text.replace(',', '')) for _ in self.xpath('//div[@class="trackers"]/dl/dd/span[@class="u"]')]) except ValueError: return NotAvailable def obj_leechers(self): try: return max([int(_.text.replace(',', '')) for _ in self.xpath('//div[@class="trackers"]/dl/dd/span[@class="d"]')]) except ValueError: return NotAvailable def obj_url(self): return self.page.browser.BASEURL + \ Regexp(CleanText('//div[@class="trackers"]/h2'), r'hash ([0-9a-f]+)', '\\1')(self) def obj_files(self): def traverse_nested_lists(ul, result, depth=0): for li in ul.xpath('./li'): sub_uls = li.xpath('./ul') if sub_uls: result.append(("| " * depth) + ("%s" % li.text)) for sub_ul in sub_uls: traverse_nested_lists(sub_ul, result, depth+1) else: try: size = li.xpath('span')[0].text except: size = "" result.append(("| " * depth) + ("%s [%s]" % (li.text, size))) result = [] traverse_nested_lists(self.xpath('//div[@class="files"]/ul')[0], result) return result def obj_magnet(self): hsh = Regexp(CleanText('//div[@class="trackers"]/h2'), r'hash ([0-9a-f]+)', '\\1')(self) name = "dn=%s" % quote_plus(CleanText('//div[@class="downlinks"]/h2/span')(self)) trackers = ["tr=%s" % _.text for _ in self.xpath('//div[@class="trackers"]/dl/dt')] return "&".join(["magnet:?xt=urn:btih:%s" % hsh, name] + trackers) def obj_description(self): return u"Torrent files available at:\n" + \ u"\n\n".join(self.xpath('//div[@class="downlinks"]/dl/dt/a/@href'))
def obj_details(self): details = {} dispo = Date( Regexp(CleanText('//p[has-class("OfferTop-dispo")]'), r'.* (\d\d\/\d\d\/\d\d\d\d)', default=datetime.date.today().isoformat()))(self) if dispo is not None: details["dispo"] = dispo priceMentions = CleanText('//p[has-class("OfferTop-mentions")]', default=None)(self) if priceMentions is not None: details["priceMentions"] = priceMentions agency = CleanText('//p[has-class("OfferContact-address")]', default=None)(self) if agency is not None: details["agency"] = agency for item in self.xpath( '//div[has-class("OfferDetails-columnize")]/div'): category = CleanText( './h3[has-class("OfferDetails-title--2")]', default=None)(item) if not category: continue details[category] = {} for detail_item in item.xpath( './/ul[has-class("List--data")]/li'): detail_title = CleanText( './/span[has-class("List-data")]')(detail_item) detail_value = CleanText('.//*[has-class("List-value")]')( detail_item) details[category][detail_title] = detail_value for detail_item in item.xpath( './/ul[has-class("List--bullet")]/li'): detail_title = CleanText('.')(detail_item) details[category][detail_title] = True try: electric_consumption = CleanDecimal( Regexp( Attr('//div[has-class("OfferDetails-content")]//img', 'src'), r'https://dpe.foncia.net\/(\d+)\/.*'))(self) details["electric_consumption"] = ( '{} kWhEP/m².an'.format(electric_consumption)) except (RegexpError, XPathNotFound): pass return details
class item(ItemElement): klass = FrenchTransaction obj_date = Date(CleanText('./td[1]')) obj_rdate = Date(CleanText('./td[1]')) obj_raw = CleanText('./td[2]') obj_amount = CleanDecimal('./td[4]', replace_dots=True, default=Decimal('0')) obj_original_currency = FrenchTransaction.Currency('./td[4]') obj_type = Transaction.TYPE_BANK obj__is_coming = False def obj_commission(self): gross_amount = CleanDecimal('./td[3]', replace_dots=True, default=NotAvailable)(self) if gross_amount: return gross_amount - Field('amount')(self) return NotAvailable
def iter_history(self): for tr in self.doc.xpath( u'//table[@class="boursedetail"]/tbody/tr[td]'): t = Transaction() t.label = CleanText('.')(tr.xpath('./td[2]')[0]) t.date = Date(dayfirst=True).filter( CleanText('.')(tr.xpath('./td[1]')[0])) t.amount = self.parse_decimal(tr.xpath('./td[3]')[0]) yield t
def get_loan_attributes(self, account): loan = Loan() loan.total_amount = CleanDecimal('//div/span[contains(text(), "Capital initial")]/following-sibling::*[1]', replace_dots=True)(self.doc) owner_name = CleanText('//a[@class="lien-entete login"]/span')(self.doc) loan.name = ' '.join(owner_name.split()[1:]) loan.subscription_date = Date(Regexp(CleanText('//h4[span[contains(text(), "Date de départ du prêt")]]'), r'(\d{2}\/\d{2}\/\d{4})'), dayfirst=True)(self.doc) loan.maturity_date = Date(Regexp(CleanText('//h4[span[contains(text(), "Date de fin du prêt")]]'), r'(\d{2}\/\d{2}\/\d{4})'), dayfirst=True)(self.doc) loan.rate = Eval(lambda x: x / 100, CleanDecimal('//div/span[contains(text(), "Taux fixe")]/following-sibling::*[1]', replace_dots=True))(self.doc) loan.last_payment_amount = CleanDecimal('//div[@class="txt-detail " and not (@style)]//span[contains(text(), "Echéance du")]/following-sibling::span[1]')(self.doc) loan.last_payment_date = Date(Regexp(CleanText('//div[@class="txt-detail " and not (@style)]//span[contains(text(), "Echéance du")]'), r'(\d{2}\/\d{2}\/\d{4})'), dayfirst=True)(self.doc) loan.id = account.id loan.currency = account.currency loan.label = account.label loan.balance = account.balance loan.coming = account.coming loan.type = account.type loan._uncleaned_id = account._uncleaned_id loan._multiple_type = account._multiple_type return loan
class item(ItemElement): klass = Transaction obj_raw = Transaction.Raw(TableCell('label')) obj_vdate = Date(CleanText(TableCell('vdate')), dayfirst=True) def obj_amount(self): return MyDecimal('.//div/span')(TableCell('amount')(self)[0]) def obj_date(self): return Date(CleanText('.//span[contains(@id, "C4__QUE_50FADFF19F566198286748")]'), dayfirst=True)(self)