class item(ItemElement): klass = Investment obj_label = CleanText( Regexp(CleanHTML(TableCell('reference')), '(.*)\n\n')) obj_vdate = Date( Regexp(CleanHTML(TableCell('reference')), '(\d{2}/\d{2}/\d{4})')) obj_unitvalue = CleanDecimal(Regexp( CleanHTML(TableCell('reference')), '.*\n\n(.*)\n\n'), replace_dots=True) obj_description = CleanText(CleanHTML(TableCell('reference'))) obj_portfolio_share = CleanDecimal(CleanHTML( TableCell('repartition')), replace_dots=True) obj_valuation = CleanDecimal(CleanHTML(TableCell('montant')), replace_dots=True) def obj_quantity(self): return Decimal( Field('valuation')(self) / Field('unitvalue')(self))
def iter_documents(self, subscription): docs, docs_len, check, month_back, date = list(), -1, 0, 6, None # First request is known bills = self.request('pnrs') while check < month_back: # If not first if docs_len > -1 and date: if check > 0: # If nothing, we try 4 weeks back date = (datetime.strptime(date, '%Y-%m-%d') - relativedelta(weeks=4)).strftime('%Y-%m-%d') else: # Add 8 weeks to last date to be sure to get all date = (datetime.combine(date, datetime.min.time()) + relativedelta(weeks=8)).strftime('%Y-%m-%d') bills = self.request('pnrs?date=%s' % date) docs_len = len(docs) for proof, pnr, trip in zip(bills['proofs'], bills['pnrs'], bills['trips']): # Check if not already in docs list for doc in docs: if vars(doc)['id'].split('_', 1)[1] == pnr['id']: break else: b = Bill() b.id = '%s_%s' % (subscription.id, pnr['id']) b._url = proof['url'] b.date = Date().filter(proof['created_at']) b.format = u"pdf" b.label = u'Trajet du %s' % Date().filter( trip['departure_date']) b.type = DocumentTypes.BILL b.vat = CleanDecimal().filter('0') if pnr['cents']: b.price = CleanDecimal().filter( format(pnr['cents'] / float(100), '.2f')) b.currency = pnr['currency'] docs.append(b) check += 1 # If a new bill is found, we reset check if docs_len < len(docs): date = b.date check = 0 return iter(docs)
def validate(self, obj): if obj.category == 'RELEVE CB': obj.type = Transaction.TYPE_CARD_SUMMARY obj.deleted = True raw = Async( 'details', CleanText( u'//td[contains(text(), "Libellé")]/following-sibling::*[1]|//td[contains(text(), "Nom du donneur")]/following-sibling::*[1]', default=obj.raw))(self) if raw: if obj.raw in raw or raw in obj.raw or ' ' not in obj.raw: obj.raw = raw obj.label = raw else: obj.label = '%s %s' % (obj.raw, raw) obj.raw = '%s %s' % (obj.raw, raw) if not obj.date: obj.date = Async( 'details', Date(CleanText( u'//td[contains(text(), "Date de l\'opération")]/following-sibling::*[1]', default=u''), default=NotAvailable))(self) obj.rdate = obj.date obj.vdate = Async( 'details', Date(CleanText( u'//td[contains(text(), "Date de valeur")]/following-sibling::*[1]', default=u''), default=NotAvailable))(self) obj.amount = Async( 'details', CleanDecimal( u'//td[contains(text(), "Montant")]/following-sibling::*[1]', replace_dots=True, default=NotAvailable))(self) # ugly hack to fix broken html if not obj.amount: obj.amount = Async( 'details', CleanDecimal( u'//td[contains(text(), "Montant")]/following-sibling::*[1]', replace_dots=True, default=NotAvailable))(self) return True
class fill_loan_details(ItemElement): def condition(self): # If the loan doesn't have any info (that means the loan is already refund), # the data/message is null whereas it is set to "OK" when everything is fine. return Dict('data/message')(self) == 'OK' obj_total_amount = Dict('data/montantPret') obj_maturity_date = Date(Dict('data/dateEcheanceRemboursement'), dayfirst=True) obj_duration = Dict('data/dureeRemboursement') obj_rate = Dict('data/tauxRemboursement') obj_nb_payments_left = Dict('data/nbRemboursementRestant') obj_next_payment_date = Date(Dict('data/dateProchainAmortissement'), dayfirst=True) obj__subscriber = Format('%s %s', Dict('data/titulaire/nom'), Dict('data/titulaire/prenom')) obj__iduser = None
class item(ItemElement): klass = Transaction obj_label = CleanText(TableCell('label')) obj_amount = CleanDecimal.SI(TableCell('amount')) obj_date = Env('date') obj_rdate = Date(CleanText(TableCell('date')), dayfirst=True) obj_type = Transaction.TYPE_CARD
def obj_date(self): # Dates in the first column may appear as '12/01/2019' or '12/01' date = CleanText('./td[1]/font//text()')(self) if len(date) == 10: return Date(CleanText('./td[1]/font//text()'), dayfirst=True)(self) elif len(date) == 5: # Date has no indicated year. return DateGuesser(CleanText('./td[1]//text()'), Env('date_guesser'))(self)
class item(ItemElement): klass = LITransaction obj_raw = LITransaction.Raw(CleanText(TableCell('label'))) obj_date = Date(CleanText(TableCell('date'))) obj_amount = Transaction.Amount(TableCell('amount'), TableCell('gross_amount'), replace_dots=False)
class item(ItemElement): klass = Transaction obj_raw = Transaction.Raw(Field('label')) obj_date = Date(CleanText(TableCell('date')), dayfirst=True) obj_rdate = Date(CleanText(TableCell('date')), dayfirst=True) obj_amount = CleanDecimal(TableCell('amount'), replace_dots=True) obj__coming = Env('coming', False) def obj_label(self): raw_label = CleanText(TableCell('label'))(self) label = CleanText(TableCell('label')(self)[0].xpath('./br/following-sibling::text()'))(self) if (label and label.split()[0] != raw_label.split()[0]) or not label: label = raw_label return CleanText(TableCell('label')(self)[0].xpath('./noscript'))(self) or label
def get_history(self, acc_type): txt = self.get_from_js('ListeMvts_data = new Array(', ');\n') if txt is None: no_trans = self.get_from_js('js_noMvts = new Ext.Panel(', ')') if no_trans is not None: # there is no transactions for this account, this is normal. return else: # No history on this account return data = ast.literal_eval('[%s]' % txt.replace('"', '\\"')) for line in data: t = Transaction() if acc_type is Account.TYPE_CARD and MyStrip( line[self.COL_DEBIT_DATE]): date = vdate = Date(dayfirst=True).filter( MyStrip(line[self.COL_DEBIT_DATE])) else: date = Date(dayfirst=True, default=NotAvailable).filter( MyStrip(line[self.COL_DATE])) if not date: continue vdate = MyStrip(line[self.COL_DEBIT_DATE]) if vdate != '': vdate = Date(dayfirst=True).filter(vdate) raw = MyStrip(line[self.COL_LABEL]) t.parse(date, raw, vdate=vdate) t.set_amount(line[self.COL_VALUE]) if t.amount == 0 and t.label.startswith('FRAIS DE '): m = re.search(r'(\b\d+,\d+)E\b', t.label) if m: t.amount = -CleanDecimal(replace_dots=True).filter( m.group(1)) self.logger.info('parsing amount in transaction label: %r', t) if self.condition(t, acc_type): continue yield t
def obj_date(self): if Env('is_card', default=False)(self): month = CleanText('//label[@for="movementSearch_period_%s"]' % ('1' if Env('is_previous', default=False)(self) else '0'), replace=[(u'Débit ', '')])(self) date_text = CleanText(u'//li[h3]/h4[@class="summary__title" and contains(text(), "Solde débité au")and contains(text(), "%s")]' % month, replace=[(u'Solde débité au ', '')])(self) if not date_text: date = Date(Attr('.//time', 'datetime'))(self) if self.page.browser.deferred_card_calendar is None: self.page.browser.location(Link('//a[contains(text(), "calendrier")]')(self)) closest = self.page.browser.get_debit_date(date) if closest: return closest return date debit_date = parse_french_date(date_text) return debit_date.date() return Date(Attr('.//time', 'datetime'))(self)
def obj_date(self): tr_date = CleanText(TableCell('date'))(self) if len(tr_date) == 4: # date of transaction with label 'Intérêts crédités au cours de l'année' # is only year valuation # set transaction date to the last day year return datetime.date(int(tr_date), 12, 31) return Date(dayfirst=True).filter(tr_date)
class item(ItemElement): klass = Transaction condition = lambda self: len(self.el.xpath('td[@class="impaire"]')) > 0 obj_raw = Transaction.Raw('td[4] | td[3]/a') obj_date = Date(CleanText('td[2]'), dayfirst=True) obj_amount = CleanDecimal('td[7]', replace_dots=True)
def parse(self, el): txt = CleanText( TableCell('availability')(self)[0].xpath('./span'))(self) self.env['availability_date'] = Date( dayfirst=True, default=NotAvailable).filter(txt) self.env['condition'] = Pocket.CONDITION_DATE if self.env['availability_date'] else \ self.page.CONDITIONS.get(txt.lower().split()[0], Pocket.CONDITION_UNKNOWN) self.env['matching_txt'] = txt
class item(ItemElement): klass = Transaction obj_date = Date(CleanText('.//td[@headers="date"]'), dayfirst=True) obj_raw = Transaction.Raw('.//td[@headers="libelle"]') obj_amount = CleanDecimal( './/td[@headers="debit" or @headers="credit"]', replace_dots=True)
class item(ItemElement): klass = Bill obj_id = Format('%s.%s', Env('subid'), Dict('orderId')) obj_date = Date(Dict('billingDate')) obj_format = u"pdf" obj_price = CleanDecimal(Dict('priceWithTax/value')) obj__url = Dict('pdfUrl')
class item(ItemElement): klass = Transaction obj_rdate = Date(CleanText('./td[1]'), dayfirst=True) obj_date = Date(Env('date'), dayfirst=True, default=NotAvailable) obj_raw = Transaction.Raw(CleanText('./td[2]')) obj__coming = True def obj_type(self): return Transaction.TYPE_DEFERRED_CARD def obj_amount(self): return CleanDecimal('./td[3]', replace_dots=True, default=NotAvailable)(self) \ or CleanDecimal('./td[2]', replace_dots=True)(self) def condition(self): return CleanText('./td[2]')(self)
class get_housing(ItemElement): klass = Housing obj_id = Env('_id') obj_title = CleanText(CleanHTML('//meta[@itemprop="name"]/@content')) obj_area = CleanDecimal(Regexp( CleanText(CleanHTML('//meta[@itemprop="name"]/@content')), '(.*?)(\d*) m\xb2(.*?)', '\\2'), default=NotAvailable) obj_cost = CleanDecimal('//*[@itemprop="price"]') obj_currency = Regexp(CleanText('//*[@itemprop="price"]'), '.*([%s%s%s])' % (u'€', u'$', u'£'), default=u'€') obj_date = Date( Regexp( CleanText( '//p[@class="offer-description-notes"]|//p[has-class("darkergrey")]' ), u'.* Mis à jour : (\d{2}/\d{2}/\d{4}).*')) obj_text = CleanHTML( '//div[@class="offer-description-text"]|//div[has-class("offer-description")]' ) obj_location = CleanText('//*[@itemprop="address"]') obj_url = BrowserURL('housing', _id=Env('_id')) def obj_photos(self): photos = [] for img in XPath( '//div[@class="carousel-content"]/ul/li/a/img/@src|//div[@class="carousel"]/ul/li/a/img/@src' )(self): photos.append(HousingPhoto(u'%s' % img)) return photos def obj_details(self): details = {} energy = CleanText( '//div[has-class("energy-summary")]/span[@class="section-label"]|//div[has-class("energy-summary")]/div/span[@class="section-label"]', default='')(self) energy_value = CleanText( '//div[has-class("energy-summary")]/span[@class="energy-msg"]', default='')(self) if energy and energy_value: details[energy] = energy_value greenhouse = CleanText( '//div[has-class("greenhouse-summary")]/span[@class="section-label"]|//div[has-class("greenhouse-summary")]/div/span[@class="section-label"]', default='')(self) greenhouse_value = CleanText( '//div[has-class("greenhouse-summary")]/span[@class="energy-msg"]', default='')(self) if greenhouse and greenhouse_value: details[greenhouse] = greenhouse_value for li in XPath('//ul[@itemprop="description"]/li')(self): label = CleanText('./div[has-class("criteria-label")]')(li) value = CleanText('./div[has-class("criteria-value")]')(li) details[label] = value return details
class item(ItemElement): klass = Investment obj_label = CleanText(TableCell('label')) obj_code = CleanText(TableCell('code')) obj_unitvalue = CleanDecimal(TableCell('unitvalue'), replace_dots=True) obj_quantity = CleanDecimal(TableCell('quantity'), replace_dots=True) obj_valuation = Eval(lambda x, y: x * y, Field('quantity'), Field('unitvalue')) obj_vdate = Date(CleanText(TableCell('vdate')), dayfirst=True)
class item(ItemElement): klass = Transaction obj_raw = Transaction.Raw(Dict('label')) obj_date = Date(Dict('date')) obj_amount = Eval(float_to_decimal, Dict('gross_amount/value')) def validate(self, obj): return CleanText(Dict('status'))(self) == 'DONE'
def obj_rdate(self): s = Regexp(Field('raw'), ' (\d{2}/\d{2}/\d{2}) | (?!NUM) (\d{6}) ', default=NotAvailable)(self) if not s: return Field('date')(self) s = s.replace('/', '') return Date(dayfirst=True).filter( '%s%s%s%s%s' % (s[:2], '-', s[2:4], '-', s[4:]))
class item(ItemElement): klass = FrenchTransaction obj_date = Date(CleanText(TableCell('date')), dayfirst=True) obj_label = CleanText(TableCell('label')) obj_amount = CleanDecimal(TableCell('amount'), replace_dots=True, default=NotAvailable) obj__is_coming = False
class item(ItemElement): klass = Investment obj_label = Dict('libelleFonds') obj_unitvalue = Dict('vl') & CleanDecimal obj_quantity = Dict('nbParts') & CleanDecimal obj_valuation = Dict('mtBrut') & CleanDecimal obj_code = Dict('codeIsin', default=NotAvailable) obj_vdate = Date(Dict('dtVl'))
class item(ItemElement): klass = Transaction obj_date = Date(CleanText(TableCell('date')), Env('date_guesser')) obj_type = Transaction.TYPE_UNKNOWN obj_id = CleanText(TableCell('reference')) obj_label = CleanText(TableCell('type')) obj_amount = CleanDecimal(CleanHTML(TableCell('montant')), replace_dots=True)
class item(ItemElement): klass = Transaction obj_label = Format('%s - %s', CleanText(TableCell('operation')), CleanText(TableCell('nature'))) obj_rdate = Date(CleanText(TableCell('date')), dayfirst=True) obj_amount = CleanDecimal(TableCell('amount'), sign=lambda x: -1, replace_dots=True)
class get_arte_cinema_video(ArteItemElement): def __init__(self, *args, **kwargs): super(ArteItemElement, self).__init__(*args, **kwargs) self.el = self.el.get('videoJsonPlayer') klass = ArteSiteVideo obj__site = SITE.CINEMA.get('id') obj_date = Date(Dict('VRA'))
def obj_date(self): date = CleanText('./td[1]/font//text()')(self) if len(date) == 10: return Date(CleanText('./td[1]/font//text()'), dayfirst=True)(self) elif len(date) == 5: # Date has no indicated year. return DateGuesser(CleanText('./td[1]//text()'), Env('date_guesser'))(self)
def get_history(self, acc_type): for i, tr in self.parse_transactions(): t = Transaction() if acc_type is Account.TYPE_CARD: date = vdate = Date(dayfirst=True, default=None).filter(tr['dateval']) else: date = Date(dayfirst=True, default=None).filter(tr['date']) vdate = Date(dayfirst=True, default=None).filter(tr['dateval']) or date raw = MyStrip(' '.join([tr['typeope'], tr['LibComp']])) t.parse(date, raw, vdate) t.set_amount(tr['mont']) if self.condition(t, acc_type): continue yield t
class item(ItemElement): klass = BaseJobAdvert obj_id = Regexp(Link('.'), '.*fr/jobs/(\d+)/.*') obj_title = CleanText('h4/span[@class="job-title"]') obj_society_name = CleanText('h4/span[@class="job-company"]') obj_publication_date = Date( CleanText('h4/span[@class="badge pull-right"]'), parse_func=parse_french_date)
class item(ItemElement): klass = Transaction obj_date = Date(CleanText(TableCell("date")), dayfirst=True, default=NotAvailable) obj_raw = CleanText(TableCell("label")) obj_label = CleanText(TableCell("label")) obj_amount = CleanDecimal(TableCell("amount"), replace_dots=True, default=NotAvailable) def obj__transaction_detail(self): return AbsoluteLink((TableCell("label")(self)[0]).xpath('.//a'))
class item(ItemElement): klass = Investment obj_label = Regexp(CleanText(CleanHTML(TableCell('misc'))), r'^(.*? - \d+)') obj_vdate = Date(Regexp(CleanHTML(TableCell('misc')), r'(\d{2}/\d{2}/\d{4})')) obj_unitvalue = CleanDecimal(Regexp(CleanText(TableCell('misc')), r'([\d,]+) €'), replace_dots=True) obj_portfolio_share = Eval(lambda x: x / 100, CleanDecimal(CleanHTML(TableCell('portfolio_share')), replace_dots=True)) obj_valuation = CleanDecimal(CleanHTML(TableCell('valuation')), replace_dots=True) obj_diff = CleanDecimal(CleanHTML(TableCell('diff')), replace_dots=True)