def obj__date_hours(self): date = Env('date')(self) weekday = date.weekday() txt = CleanText('.//p[@class="sb"]')(self).lower() m = re.match(r'du \d+/\d+/\d+ au \d+/\d+/\d+ (.*)', txt) if m: txt = m.group(1) p = TimeParser(txt, weekday) p.do_parse() return p.res m = re.match('le \w+ \d+ \w+ \d+ à (\d+)h(\d*)$', txt) return [(int(m.group(1)), int(m.group(2) or 0))]
def parse(self, el): key = Env('key', default=None)(self) if key and "DeferredDebit" in key: for x in Dict('%s/currentMonthCardList' % key)(self.page.doc): deferred_date = Dict('dateDiffere', default=None)(x) if deferred_date: break setattr(self.obj, '_deferred_date', self.FromTimestamp().filter(deferred_date)) # Skip duplicate transactions amount = Dict('montantEnEuro', default=None)(self) tr = Dict('libelleCourt')(self) + Dict('dateOperation')(self) + str(amount) if amount is None or (tr in self.page.browser.trs['list'] and self.page.browser.trs['lastdate'] <= Field('date')(self)): raise SkipItem() self.page.browser.trs['lastdate'] = Field('date')(self) self.page.browser.trs['list'].append(tr)
class item(ItemElement): klass = Bill def condition(self): num = Attr('.', 'data-fact_ligne', default='')(self) return self.env['subid'] == num obj__url = Attr('.//div[@class="pdf"]/a', 'href') obj__localid = Regexp(Field('_url'), '&l=(\d*)&id', u'\\1') obj_label = Regexp(Field('_url'), '&date=(\d*)', u'\\1') obj_id = Format('%s.%s', Env('subid'), Field('label')) obj_date = FormatDate(Field('label')) obj_format = u"pdf" obj_type = u"bill" obj_price = CleanDecimal('div[@class="montant"]', default=Decimal(0), replace_dots=False)
class item(ItemElement): klass = Bill obj_id = Format('%s_%s', Env('username'), CleanDecimal(TableCell('id'))) obj__url = Attr('.//a[contains(text(), "PDF")]', 'href', default=NotAvailable) obj_date = Date(CleanText(TableCell('date'))) obj_format = u"pdf" obj_label = Format('Facture %s', CleanDecimal(TableCell('id'))) obj_type = DocumentTypes.BILL obj_price = CleanDecimal(TableCell('price')) obj_currency = u'EUR' def condition(self): return CleanText(TableCell('id'))(self) != "No bills" def parse(self, el): self.env['username'] = self.page.browser.username
class item(ItemElement): klass = Transaction obj_label = CleanText(TableCell('label')) obj_type = Transaction.TYPE_BANK obj_date = Date(CleanText(TableCell('date')), dayfirst=True) obj_amount = CleanDecimal(TableCell('amount')) obj_investments = Env('investments') def parse(self, el): i = Investment() i.label = Field('label')(self) i.code = CleanText(TableCell('code'))(self) i.quantity = MyDecimal(TableCell('quantity'))(self) i.valuation = Field('amount')(self) i.vdate = Field('date')(self) self.env['investments'] = [i]
class item(ItemElement): klass = Transaction obj_rdate = Date(CleanText('./td[1]'), dayfirst=True) obj_date = Date(Env('date'), dayfirst=True, default=NotAvailable) obj_raw = Transaction.Raw(CleanText('./td[2]')) obj__coming = True def obj_type(self): return Transaction.TYPE_DEFERRED_CARD def obj_amount(self): return CleanDecimal('./td[3]', replace_dots=True, default=NotAvailable)(self) \ or CleanDecimal('./td[2]', replace_dots=True)(self) def condition(self): return CleanText('./td[2]')(self)
class item(ItemElement): klass = Transaction obj_raw = Transaction.Raw(Field('label')) obj_date = Date(CleanText(TableCell('date')), dayfirst=True) obj_rdate = Date(CleanText(TableCell('date')), dayfirst=True) obj_amount = CleanDecimal(TableCell('amount'), replace_dots=True) obj__coming = Env('coming', False) def obj_label(self): raw_label = CleanText(TableCell('label'))(self) label = CleanText(TableCell('label')(self)[0].xpath('./br/following-sibling::text()'))(self) if (label and label.split()[0] != raw_label.split()[0]) or not label: label = raw_label return CleanText(TableCell('label')(self)[0].xpath('./noscript'))(self) or label
class item(ItemElement): klass = BaseAudio obj_id = BaseAudioIdFilter(Format(u'%s.%s', Env('radio_id'), Regexp(CleanText('./div/div/a/@href'), 'http://media.radiofrance-podcast.net/podcast09/(.*).mp3'))) obj_ext = u'mp3' obj_format = u'mp3' obj_url = CleanText('./div/div/a/@href') obj_title = Format(u'%s : %s', CleanText('./a/div[@class="subtitle"]'), CleanText('./a/div[@class="title"]')) obj_description = CleanText('./div/div/a/@data-asset-xtname') def obj_duration(self): _d = CleanText('./div/div/a/@data-duration')(self) return timedelta(seconds=int(_d))
class item(ItemElement): klass = Account def condition(self): return len(self.el.xpath('./td')) > 2 class Label(Filter): def filter(self, text): return text.lstrip(' 0123456789').title() class Type(Filter): def filter(self, label): if 'invest' in label.lower(): return Account.TYPE_MARKET return Account.TYPE_UNKNOWN obj_label = Label(CleanText('./td[1]/a')) obj_coming = Env('coming') obj_currency = FrenchTransaction.Currency('./td[3]') obj__link_id = Link('./td[1]/a') obj_type = Type(Field('label')) obj_coming = NotAvailable @property def obj_balance(self): if self.el.xpath('./parent::*/tr/th') and self.el.xpath( './parent::*/tr/th')[0].text == 'Credits': balance = CleanDecimal(replace_dots=True).filter( self.el.xpath('./td[3]')) if balance < 0: return balance else: return -balance return CleanDecimal(replace_dots=True).filter( self.el.xpath('./td[3]')) @property def obj_id(self): # Investment account and main account can have the same id # so we had account type in case of Investment to prevent conflict if Field('type')(self) == Account.TYPE_MARKET: return CleanText(replace=[('.', ''), (' ', '')]).filter( self.el.xpath('./td[2]')) + ".INVEST" return CleanText(replace=[('.', ''), (' ', '')]).filter( self.el.xpath('./td[2]'))
class item(ItemElement): klass = Transaction obj_date = Date(CleanText(TableCell('date')), dayfirst=True) obj_raw = Transaction.Raw(TableCell('raw')) obj_amount = CleanDecimal(TableCell('amount'), replace_dots=True) obj__detail = Env('detail') def obj_id(self): try: return Regexp(Link('./td/a', default=None), 'numMvt=(\d+)', default=None)(self) except TypeError: return NotAvailable def parse(self, el): link = Link('./td/a', default=None)(self) page = self.page.browser.async_open(link) if link else None self.env['detail'] = page
class item(ItemElement): klass = Subscription obj__type = CleanText(Dict('type')) obj_label = Env('label') obj_subscriber = Format("%s %s %s", CleanText(Dict('civilite')), CleanText(Dict('prenom')), CleanText(Dict('nom'))) def obj_id(self): if Dict('date-activation')(self) is not None: return Format('%s-%s', Dict('num_ligne'), Dict('date-activation'))(self) else: return Format('%s', Dict('num_ligne'))(self) def parse(self, el): # add spaces number = iter(Field('id')(self).split('-')[0]) self.env['label'] = ' '.join(a+b for a, b in zip(number, number))
class item(ItemElement): klass = Housing obj_id = Format('%s-%s', Env('type'), CleanText('./@id', replace=[('header-offer-', '')])) obj_title = CleanText('./div/div/div[@class="offer-details-wrapper"]/div/div/p[@class="offer-type"]/span/@title') obj_area = CleanDecimal(CleanText('./div/div/div[@class="offer-details-wrapper"]/div/div/div/div/h3/a/span[@class="offer-area-number"]', default=NotAvailable)) obj_cost = CleanDecimal(Regexp(CleanText('./div/div/div[@class="offer-details-wrapper"]/div/div/p[@class="offer-price"]/span', default=NotAvailable), '(.*) [%s%s%s]' % (u'€', u'$', u'£'), default=NotAvailable), default=Decimal(0)) obj_currency = Regexp(CleanText('./div/div/div[@class="offer-details-wrapper"]/div/div/p[@class="offer-price"]/span', default=NotAvailable), '.* ([%s%s%s])' % (u'€', u'$', u'£'), default=u'€') obj_date = Date(Regexp(CleanText('./div/div/div[has-class("offer-picture-more")]/div/p[@class="offer-update"]'), ".*(\d{2}/\d{2}/\d{4}).*")) obj_text = CleanText('./div/div/div[@class="offer-details-wrapper"]/div/div/div/p[has-class("offer-description")]/span') obj_location = CleanText('./div/div/div[@class="offer-details-wrapper"]/div/div/div/div/h2')
class item(ItemElement): klass = Bill def condition(self): num = Attr('.', 'data-fact_ligne', default='')(self) return self.env['subid'] == num obj_url = AbsoluteLink('.//div[@class="pdf"]/a') obj__localid = Regexp(Field('url'), '&id=(.*)&date', u'\\1') obj_label = Regexp(Field('url'), '&date=(\d*)', u'\\1') obj_id = Format('%s.%s', Env('subid'), Field('_localid')) obj_date = FormatDate(Field('label')) obj_format = u"pdf" obj_type = DocumentTypes.BILL obj_price = CleanDecimal('div[@class="montant"]', default=Decimal(0), replace_dots=False) obj_currency = Currency('div[@class="montant"]')
class get_video(ItemElement): klass = BaseVideo obj_id = Env('_id') obj_title = CleanText(CleanHTML(Dict('name'))) obj_description = CleanHTML(Dict('description')) obj_date = DateTime(Dict('uploadDate')) obj_duration = VimeoDuration(Dict('duration')) obj_author = CleanText(Dict('author/name')) def obj_nsfw(self): _sfw = Dict('isFamilyFriendly', default="True")(self) return _sfw != "True" def obj_thumbnail(self): thumbnail = Thumbnail(Dict('thumbnailUrl')(self.el)) thumbnail.url = thumbnail.id return thumbnail
def obj_id(self): type = Field('type')(self) if type == Account.TYPE_LIFE_INSURANCE: number = self.get_lifenumber() if number: return number elif type in (Account.TYPE_PEA, Account.TYPE_MARKET): number = self.get_market_number() if number: return number try: return Env('numbers')(self)[Dict('index')(self)] except KeyError: # index often changes, so we can't use it... and have to do something ugly return Slugify( Format('%s-%s', Dict('libelleContrat'), Dict('nomTitulaire')))(self)
class item(ItemElement): klass = Subscription obj_id = CleanText(TableCell('id'), replace=[(' ', '')]) obj_subscriber = Env('subscriber') def obj_label(self): label = CleanText(TableCell('label'))(self) if not label: return Format('%s %s', CleanText(TableCell('type')), Field('id'))(self) return label def obj__rad_button_id(self): return Attr('.//div/input','name')(TableCell('rad_button')(self)[0]) def condition(self): # has the same id as the main account it depends on return 'Points de fidélité' not in Field('label')(self)
class item(ItemElement): klass = Account obj_id = Format('%s%s%s', CleanText('./td[2]', replace=[(' ', '')]), CleanText('./td[3]'), CleanText('./td[1]', replace=[(' ', '')])) obj__owner = CleanText('./td[1]') obj_number = CleanText('./td[2]', replace=[(' ', '')]) obj_label = CleanText('./td[1]') obj_type = Account.TYPE_CARD obj__status = CleanText('./td[5]') obj_currency = 'EUR' obj_url = Link('./td[2]/a') obj__company = Env( 'company', default=None ) # this field is something used to make the module work, not something meant to be displayed to end users
class get_video(ItemElement): klass = BaseVideo obj_nsfw = True obj_ext = 'mp4' obj_title = Attr('//meta[@property="og:title"]', 'content') obj_id = Env('id') obj__props = Eval( json.loads, Regexp(RawText('//script[contains(text(),"XPlayerTPL2")]'), r'XPlayerTPL2\(\n[^\n]+\n(.*),\n')) obj_duration = Base(Field('_props'), Dict('duration')) obj_url = Base(Field('_props'), Dict('sources/mp4/0/url')) def obj__page(self): return self.page.url
def next_page(self): d = self.page.doc['donnees'][ 'compte'] if not 'Prochain' in self.page.url else self.page.doc[ 'donnees'] if 'ecrituresRestantes' in d: next_ope = d['ecrituresRestantes'] next_data = d['sceauEcriture'] else: next_ope = d['operationsRestantes'] next_data = d['sceauOperation'] if next_ope: data = {} data['b64e4000_sceauEcriture'] = next_data if not 'intraday' in self.page.url: data['cl200_typeReleve'] = Env('value')(self) return requests.Request("POST", BrowserURL('history_next')(self), data=data)
class item(ItemElement): klass = Bill obj_label = CleanText('a[1]', replace=[(' ', '-')]) obj_id = Format('%s-%s', Env('subid'), Field('label')) # Force first day of month as label is in form "janvier 2016" obj_date = Format( '1 %s', Field('label')) & Date(parse_func=parse_french_date) obj_format = 'pdf' obj_type = DocumentTypes.STATEMENT obj__localid = Attr('a[2]', 'onclick') def condition(self): return not ('tous les relev' in CleanText('a[1]')( self.el)) and not ('annuel' in CleanText('a[1]')(self.el)) def obj__year(self): return int(CleanText('a[1]')(self).split(' ')[1])
class item(ItemElement): klass = BREvent obj_summary = CleanText('.//h4') obj_url = AbsoluteLink('.//h4/a') obj_description = CleanText('.//div[@class="libellepreliste"]') obj_city = CleanText('(.//span[@class="lieu"]/a)[2]') obj_location = CleanText('(.//span[@class="lieu"]/a)[1]') obj_timezone = 'Europe/Paris' def obj_price(self): return float( CleanText('.//span[@class="prixli"]')(self).replace( '€', '.')) def obj__date_hours(self): date = Env('date')(self) weekday = date.weekday() txt = CleanText('.//p[@class="sb"]')(self).lower() m = re.match(r'du \d+/\d+/\d+ au \d+/\d+/\d+ (.*)', txt) if m: txt = m.group(1) p = TimeParser(txt, weekday) p.do_parse() return p.res m = re.match('le \w+ \d+ \w+ \d+ à (\d+)h(\d*)$', txt, re.UNICODE) return [(int(m.group(1)), int(m.group(2) or 0))] obj_start_date = Env('date') def obj_category(self): text = CleanText( './/h4/following-sibling::span[@class="small"]/a')(self) for k in LABEL_TO_CAT: if k in text: return LABEL_TO_CAT[k] return CATEGORIES.AUTRE def obj_siteid(self): return self.page.browser.event.match( Field('url')(self)).group('id')
class get_recipe(ItemElement): klass = Recipe obj_id = Env('_id') obj_title = CleanText('//h1') obj_picture_url = Format( 'http:%s', CleanText('//img[@id="shareimg" and @src!=""]/@src', default=None)) obj_thumbnail_url = Format( 'http:%s', CleanText('//img[@id="shareimg" and @src!=""]/@src', default=None)) def obj_preparation_time(self): _prep = CuisineazDuration( CleanText( '//span[@id="ctl00_ContentPlaceHolder_LblRecetteTempsPrepa"]' ))(self) return int(_prep.total_seconds() / 60) def obj_cooking_time(self): _cook = CuisineazDuration( CleanText( '//span[@id="ctl00_ContentPlaceHolder_LblRecetteTempsCuisson"]' ))(self) return int(_cook.total_seconds() / 60) def obj_nb_person(self): nb_pers = CleanText( '//span[@id="ctl00_ContentPlaceHolder_LblRecetteNombre"]')( self) return [nb_pers] if nb_pers else NotAvailable def obj_ingredients(self): ingredients = [] for el in XPath( '//section[has-class("recipe_ingredients")]/ul/li')(self): ingredients.append(CleanText('.')(el)) return ingredients obj_instructions = Join('\n\n - ', '//div[@id="preparation"]/span/p/text()', addBefore=' - ')
class item(ItemElement): klass = Investment obj_label = Dict('SecurityName') obj_quantity = MyDecimal(Dict('Quantity')) obj_vdate = Env('vdate') obj_unitvalue = Env('unitvalue', default=NotAvailable) obj_unitprice = Env('unitprice', default=NotAvailable) obj_valuation = MyDecimal(Dict('ValueInEuro')) obj_diff = MyDecimal(Dict('ResultValueInEuro')) obj_diff_percent = Eval(lambda x: x / 100, MyDecimal(Dict('ResultPercentageInEuro'))) obj_original_currency = Env('o_currency', default=NotAvailable) obj_original_unitvalue = Env('o_unitvalue', default=NotAvailable) obj_original_unitprice = Env('o_unitprice', default=NotAvailable) obj_original_valuation = Env('o_valuation', default=NotAvailable) obj_original_diff = Env('o_diff', default=NotAvailable) obj__security_id = Dict('SecurityId') def obj_code(self): if is_isin_valid(Dict('IsinCode')(self)): return Dict('IsinCode')(self) elif "espèces" in Field('label')(self).lower(): return "XX-liquidity" return NotAvailable def obj_code_type(self): if is_isin_valid(Field('code')(self)): return Investment.CODE_TYPE_ISIN return NotAvailable def parse(self, el): if self.env['currency'] != CleanText(Dict('CurrencyCode'))(self): self.env['o_currency'] = CleanText(Dict('CurrencyCode'))(self) self.env['o_unitvalue'] = MyDecimal(Dict('Quote'))(self) self.env['o_unitprice'] = MyDecimal(Dict('HistoricQuote'))(self) self.env['o_valuation'] = MyDecimal(Dict('ValueInSecurityCurrency'))(self) self.env['o_diff'] = MyDecimal(Dict('ResultValueInSecurityCurrency'))(self) else: self.env['unitvalue'] = MyDecimal(Dict('Quote'))(self) self.env['unitprice'] = MyDecimal(Dict('HistoricQuote'))(self) self.env['vdate'] = Date(dayfirst=True).filter(Dict('PortfolioSummary/UpdatedAt')(self.page.doc))
class item(ItemElement): klass = Bill obj_date = Date(Dict('dueDate'), parse_func=parse_french_date, default=NotAvailable) obj_price = CleanDecimal(Dict('amountIncludingTax')) obj_format = 'pdf' def obj_label(self): return 'Facture du %s' % Field('date')(self) def obj_id(self): return '%s_%s' % (Env('subid')(self), Field('date')(self).strftime('%d%m%Y')) def get_params(self): params = {'billid': Dict('id')(self), 'billDate': Dict('dueDate')(self)} return urlencode(params) obj_url = BrowserURL('doc_api_pro', subid=Env('subid'), dir=Dict('documents/0/mainDir'), fact_type=Dict('documents/0/subDir'), billparams=get_params) obj__is_v2 = False
def parse(self, el): exception = Dict('exception', default=None)(self) if exception: message = exception.get('message', '') assert 'SERVICE_INDISPONIBLE' in message, 'Unknown error in history page: "%s"' % message # The error message is a stack trace so we do not # send it. raise BrowserUnavailable() # Key only if coming key = Env('key', default=None)(self) if key: if "CardList" in key: self.item_xpath = "%s/currentMonthCardList/*/listeOperations" % key elif "futureOperationList" in key: self.item_xpath = "%s/futurePrelevementList" % key else: self.item_xpath = "%s/operationList" % key else: self.item_xpath = "listOperationProxy"
class get_recipe(ItemElement): klass = Recipe def parse(self, el): json_content = CleanText('//script[@type="application/ld+json"]')(el) self.el = json.loads(json_content) obj_id = Env('id') obj_title = Dict('name') obj_ingredients = Dict('recipeIngredient') obj_cooking_time = Time('cookTime') obj_preparation_time = Time('prepTime') def obj_nb_person(self): return [CleanDecimal(Dict('recipeYield'), default=0)(self)] obj_instructions = Dict('recipeInstructions') obj_picture_url = Dict('image', default='') obj_author = Dict('author/name', default=NotAvailable)
class item(ItemElement): klass = Bill obj_id = Format( '%s_%s', Env('subid'), CleanDecimal( CleanText( './/span[@class="date magic_gras magic_font13"]'))) obj__url = Attr('.//span[@class="telecharger pdf"]/a', 'href', default=NotAvailable) obj_date = Date( CleanText('.//span[@class="date magic_gras magic_font13"]')) obj_format = u"pdf" obj_type = u"bill" obj_price = CleanDecimal('span[@class="montant"]', replace_dots=True) obj_currency = Regexp(CleanText('span[@class="montant"]'), '([^\s\d,])')
class fill_paste(ItemElement): klass = SprungePaste obj_id = Env('id') obj_title = NotAvailable def obj_contents(self): text = self.page.response.text # Sprunge seems to add a newline to our original text if text.endswith(u'\n'): text = text[:-1] return text obj_url = BrowserURL('paste', id=Field('id')) def validate(self, obj): if obj.contents == u'%s not found.' % obj.id: raise PasteNotFound() return True
class item(ItemElement): klass = Bill load_details = Attr('./td/a', 'href') & AsyncLoad obj_id = Format('%s_%s', Env('email'), CleanDecimal(TableCell('id'))) obj_url = Async('details') & Link( '//a[contains(@href, "facture")]', default=NotAvailable) obj_date = Date(CleanText(TableCell('date'))) obj_format = u"pdf" obj_label = Async('details') & CleanText( '//table/tr/td[@class="Prod"]') obj_type = u"bill" obj_price = CleanDecimal(TableCell('price'), replace_dots=True) obj_currency = u'EUR' def parse(self, el): self.env['email'] = self.page.browser.username
class item(ItemElement): klass = Bill obj_id = Format( '%s_%s', Env('subid'), CleanDecimal(CleanText('.//span[has-class("date")]'))) obj__url = Link('.//span[has-class("pdf")]/a', default=NotAvailable) obj_date = Date(CleanText('.//span[has-class("date")]'), dayfirst=True) obj_label = CleanText('.//span[has-class("date")]') obj_format = u"pdf" obj_type = u"bill" obj_price = CleanDecimal('span[@class="montant"]', replace_dots=True) def obj_currency(self): return Bill.get_currency( CleanText('span[@class="montant"]')(self))
class get_video(ItemElement): klass = BaseVideo _balise = lambda x: '//div[@itemprop="video"]/meta[@itemprop="%s"]/@content' % x obj_id = Env('_id') obj_title = CleanText(_balise('name')) obj_date = DateTime(CleanText(_balise('dateCreated'))) obj_duration = VimeoDuration(CleanText(_balise('duration'))) obj_description = CleanText(_balise('description')) obj_author = CleanText( '//div[@itemprop="author"]/meta[@itemprop="name"]/@content') def obj_thumbnail(self): thumbnail = BaseImage( CleanText( '//div[@itemprop="video"]/span[@itemprop="thumbnail"]/link/@href' )(self.el)) thumbnail.url = thumbnail.id return thumbnail
def obj_split_path(self): cat = Env('cat')(self) cat.append(Field('id')(self)) return cat