Example #1
0
 def get_renew_date(self, subscription):
     div = self.doc.xpath('//div[@login="******"]' % subscription._login)[0]
     mydate = Date(CleanText('//div[@class="resumeConso"]/span[@class="actif"][1]'), dayfirst=True)(div)
     if mydate.month == 12:
         mydate = mydate.replace(month=1)
         mydate = mydate.replace(year=mydate.year + 1)
     else:
         try:
             mydate = mydate.replace(month=mydate.month + 1)
         except ValueError:
             lastday = calendar.monthrange(mydate.year, mydate.month + 1)[1]
             mydate = mydate.replace(month=mydate.month + 1, day=lastday)
     return mydate
Example #2
0
        class item(ItemElement):
            klass = BaseCalendarEvent

            obj_url = Link('./a[1]')
            obj_id = Regexp(Link('./a[1]'), r'\?(\d+)')
            obj_summary = CleanText('./a[1]')
            obj_start_date = Date(CleanText('./span[1]'))
            obj_category = CATEGORIES.CONCERT
            obj_status = STATUS.CONFIRMED
Example #3
0
        class item(ItemElement):
            klass = Transaction

            obj_date = Date(CleanText(TableCell('date')), Env('date_guesser'))
            obj_type = Transaction.TYPE_UNKNOWN
            obj_id = CleanText(TableCell('reference'))
            obj_label = CleanText(TableCell('type'))
            obj_amount = CleanDecimal(CleanHTML(TableCell('montant')),
                                      replace_dots=True, default=NotAvailable)
Example #4
0
        class item(ItemElement):
            klass = Transaction

            condition = lambda self: len(self.el.xpath('td[@class="impaire"]')
                                         ) > 0

            obj_raw = Transaction.Raw('td[4] | td[3]')
            obj_date = Date(CleanText('td[2]'), dayfirst=True)
            obj_amount = CleanDecimal('td[7]', replace_dots=True)
Example #5
0
    class get_arte_cinema_video(ArteItemElement):
        def __init__(self, *args, **kwargs):
            super(ArteItemElement, self).__init__(*args, **kwargs)
            self.el = self.el.get('videoJsonPlayer')

        klass = ArteSiteVideo

        obj__site = SITE.CINEMA.get('id')
        obj_date = Date(Dict('VRA', default=''), default=NotAvailable)
Example #6
0
        class item(ItemElement):
            klass = Transaction

            # This is 'Date de valeur'
            obj_date = Date(Dict('dVl'), dayfirst=True)
            obj__date = Date(Dict('date', default=None),
                             dayfirst=True,
                             default=NotAvailable)
            obj_coming = False
            obj_raw = Transaction.Raw(
                Format('%s %s %s', Dict('l1'), Dict('l2'), Dict('l3')))

            # We have l4 and l5 too most of the time, but it seems to be unimportant and would make label too long.
            #tr.label = ' '.join([' '.join(transaction[l].strip().split()) for l in ['l1', 'l2', 'l3']])

            def obj_amount(self):
                return CleanDecimal(Dict('c', default=None), replace_dots=True, default=None)(self) or \
                    CleanDecimal(Dict('d'), replace_dots=True)(self)
Example #7
0
        class item(BnpHistoryItem):
            klass = Transaction

            obj_type = Transaction.TYPE_DEFERRED_CARD
            obj_date = obj_vdate = Date(Dict('valeur'), dayfirst=True)

            def obj_amount(self):
                amount = Dict('debit', default=None)(self) or Dict('credit')(self)
                return Eval(lambda x, y: x / 10**y, Decimal(amount['montant']), Decimal(amount['nb_dec']))(self)
Example #8
0
        class item(ItemElement):
            klass = Bill

            obj_id = Format('%s.%s', Env('subid'), Dict('orderId'))
            obj_date = Date(Dict('billingDate'))
            obj_format = u"pdf"
            obj_type = u"bill"
            obj_price = CleanDecimal(Dict('priceWithTax/value'))
            obj__url = Dict('pdfUrl')
Example #9
0
        class item(ItemElement):
            klass = Bill

            obj_type = DocumentTypes.BILL
            obj_format = "pdf"

            # TableCell('date') can have other info like: 'duplicata'
            obj_date = Date(CleanText('./td[@headers="ec-dateCol"]/text()[not(preceding-sibling::br)]'), parse_func=parse_french_date, dayfirst=True)

            def obj__cell(self):
                # sometimes the link to the bill is not in the right column (Thanks Orange!!)
                if CleanText(TableCell('url')(self))(self):
                    return 'url'
                return 'infos'

            def obj_price(self):
                if CleanText(TableCell('amount')(self))(self):
                    return CleanDecimal(Regexp(CleanText(TableCell('amount')), '.*?([\d,]+).*', default=NotAvailable), replace_dots=True, default=NotAvailable)(self)
                else:
                    return Field('_ht')(self)

            def obj_currency(self):
                if CleanText(TableCell('amount')(self))(self):
                    return Currency(TableCell('amount')(self))(self)
                else:
                    return Currency(TableCell('ht')(self))(self)

            # Only when a list of documents is present
            obj__url_base = Regexp(CleanText('.//ul[@class="liste"]/script', default=None), '.*?contentList[\d]+ \+= \'<li><a href=".*\"(.*?idDocument=2)"', default=None)

            def obj_url(self):
                if Field('_url_base')(self):
                    # URL won't work if HTML is not unescape
                    return HTMLParser().unescape(str(Field('_url_base')(self)))
                return Link(TableCell(Field('_cell')(self))(self)[0].xpath('./a'), default=NotAvailable)(self)

            obj__label_base = Regexp(CleanText('.//ul[@class="liste"]/script', default=None), '.*</span>(.*?)</a.*', default=None)

            def obj_label(self):
                if Field('_label_base')(self):
                    return HTMLParser().unescape(str(Field('_label_base')(self)))
                else:
                    return CleanText(TableCell(Field('_cell')(self))(self)[0].xpath('.//span[@class="ec_visually_hidden"]'))(self)

            obj__ht = CleanDecimal(TableCell('ht', default=NotAvailable), replace_dots=True, default=NotAvailable)

            def obj_vat(self):
                if Field('_ht')(self) is NotAvailable or Field('price')(self) is NotAvailable:
                    return
                return Field('price')(self) - Field('_ht')(self)

            def obj_id(self):
                if Field('price')(self) is NotAvailable:
                    return '%s_%s%s' % (Env('subid')(self), Field('date')(self).strftime('%d%m%Y'), Field('_ht')(self))
                else:
                    return '%s_%s%s' % (Env('subid')(self), Field('date')(self).strftime('%d%m%Y'), Field('price')(self))
Example #10
0
    def get_history(self, account):
        for i, tr in self.parse_transactions():
            t = self.TRANSACTION()

            if account.type is Account.TYPE_CARD:
                date = vdate = Date(dayfirst=True, default=None).filter(tr['dateval'])
                t.bdate = Date(dayfirst=True, default=NotAvailable).filter(tr['date'])
            else:
                date = Date(dayfirst=True, default=None).filter(tr['date'])
                vdate = Date(dayfirst=True, default=None).filter(tr['dateval']) or date
            raw = MyStrip(' '.join([tr['typeope'], tr['LibComp']]))
            t.parse(date, raw, vdate)
            t.set_amount(tr['mont'])
            self.detect_currency(t, raw, account.currency)

            if self.condition(t, account.type):
                continue

            yield t
Example #11
0
            def obj_rdate(self):
                if self.obj.rdate:
                    # Transaction.Raw may have already set it
                    return self.obj.rdate

                s = Regexp(Field('raw'), ' (\d{2}/\d{2}/\d{2}) | (?!NUM) (\d{6}) ', default=NotAvailable)(self)
                if not s:
                    return Field('date')(self)
                s = s.replace('/', '')
                return Date(dayfirst=True).filter('%s%s%s%s%s' % (s[:2], '-', s[2:4], '-', s[4:]))
Example #12
0
        class item(ItemElement):
            klass = Investment

            obj_label = CleanText(TableCell('label'))
            obj_quantity = CleanDecimal(TableCell('quantity'), replace_dots=True, default=NotAvailable)
            obj_unitvalue = CleanDecimal(TableCell('unitvalue'), replace_dots=True, default=NotAvailable)
            obj_valuation = CleanDecimal(TableCell('valuation'), replace_dots=True)
            obj_vdate = Date(CleanText(TableCell('vdate')), dayfirst=True)

            obj__code_url = Regexp(Attr('./td/a', 'onclick', default=""), r'PageExterne\(\'([^\']+)', default=None)
Example #13
0
        class item(ItemElement):
            klass = Investment

            obj_label = Dict('libelleFonds')
            obj_unitvalue = Dict('vl') & CleanDecimal
            obj_quantity = Dict('nbParts') & CleanDecimal
            obj_valuation = Dict('mtBrut') & CleanDecimal
            obj_code = Dict('codeIsin', default=NotAvailable)
            obj_vdate = Date(Dict('dtVl'))
            obj_diff = Dict('mtPMV') & CleanDecimal
Example #14
0
        class item(ItemElement):
            klass = Investment

            obj_label = CleanText(TableCell('label'))
            obj_code = Regexp(Link('./td/a'), 'Isin%253D([^%]+)')
            obj_quantity = MyDecimal(TableCell('quantity'))
            obj_unitprice = MyDecimal(TableCell('unitprice'))
            obj_unitvalue = MyDecimal(TableCell('unitvalue'))
            obj_valuation = MyDecimal(TableCell('valuation'))
            obj_vdate = Date(CleanText(TableCell('vdate')), dayfirst=True, default=NotAvailable)
Example #15
0
        class item(ItemElement):
            klass = Transaction

            def condition(self):
                return u'Aucune opération en attente' not in CleanText(TableCell('date'))(self)

            obj_date = Date(CleanText(TableCell('date')), Env('date_guesser'))
            obj_type = Transaction.TYPE_UNKNOWN
            obj_label = CleanText(TableCell('operation'))
            obj_amount = CleanDecimal(TableCell('montant'), replace_dots=True)
Example #16
0
 def obj_publication_date(self):
     _date = CleanText('p[@class="infos"]')
     try:
         return Date(_date)(self)
     except ParseError:
         str_date = _date(self)
         if 'hier' in str_date:
             return date.today() - timedelta(days=1)
         else:
             return date.today()
Example #17
0
        class item(ItemElement):
            klass = Document

            obj_id = Format('%s_%s', Env('subid'),
                            Dict('identifiantDocument/identifiant'))
            obj_date = Date(Dict('dateCreation'))
            obj_label = Dict('libelle')
            obj_format = 'pdf'
            obj_type = DocumentTypes.STATEMENT
            obj_url = Dict('_links/document/href')
Example #18
0
        class item(ItemElement):
            klass = Bill

            obj_id = Format('%s.%s', Env('subid'), Dict('orderId'))
            obj_date = Date(Dict('billingDate'))
            obj_format = u"pdf"
            obj_type = DocumentTypes.BILL
            obj_price = CleanDecimal(Dict('priceWithTax/value'))
            obj_url = Dict('pdfUrl')
            obj_label = Format('Facture %s', Dict('orderId'))
Example #19
0
        class item(ItemElement):
            klass = Transaction

            obj_rdate = Date(Dict('date', default=None),
                             dayfirst=True,
                             default=NotAvailable)
            obj_date = Date(Dict('dVl', default=None),
                            dayfirst=True,
                            default=NotAvailable)
            obj__coming = False

            # Label is split into l1, l2, l3, l4, l5.
            # l5 is needed for transfer label, for example:
            # 'l1': "000001 VIR EUROPEEN EMIS   NET"
            # 'l2': "POUR: XXXXXXXXXXXXX"
            # 'l3': "REF: XXXXXXXXXXXXXX"
            # 'l4': "REMISE: XXXXXX TRANSFER LABEL"
            # 'l5': "MOTIF: TRANSFER LABEL"
            obj_raw = Transaction.Raw(
                Format(
                    '%s %s %s %s %s',
                    Dict('l1'),
                    Dict('l2'),
                    Dict('l3'),
                    Dict('l4'),
                    Dict('l5'),
                ))

            # keep the 3 first rows for transaction label
            obj_label = Transaction.Raw(
                Format(
                    '%s %s %s',
                    Dict('l1'),
                    Dict('l2'),
                    Dict('l3'),
                ))

            def obj_amount(self):
                return CleanDecimal(Dict('c', default=None), replace_dots=True, default=None)(self) or \
                    CleanDecimal(Dict('d'), replace_dots=True)(self)

            def obj_deleted(self):
                return self.obj.type == FrenchTransaction.TYPE_CARD_SUMMARY
Example #20
0
 def parse(self, el):
     self.env['date'] = Date(Regexp(CleanText(u'//td[contains(text(), "Total prélevé")]'), ' (\d{2}/\d{2}/\d{4})', \
                                    default=NotAvailable), default=NotAvailable)(self) \
     or (parse_french_date('%s %s' % ('1', CleanText(u'//select[@id="moi"]/option[@selected]')(self))) + relativedelta(day=31)).date()
     self.env['_is_coming'] = date.today() < self.env['date']
     amount = CleanText(
         TableCell('amount'))(self).split('dont frais')
     self.env['amount'] = amount[0]
     self.env['commission'] = amount[1] if len(
         amount) > 1 else NotAvailable
Example #21
0
 def obj_rdate(self):
     date = Field('date')(self)
     # rdate is already set by `obj_raw` and the patterns.
     rdate = self.obj.rdate
     date_operation = Date(Dict('dateOperation'))(self)
     if rdate == date and date_operation < date:
         return date_operation
     elif rdate != date:
         return rdate
     return NotAvailable
Example #22
0
 def obj_date(self):
     # Dates in the first column may appear as '12/01/2019' or '12/01'
     date = CleanText('./td[1]/font//text()')(self)
     if len(date) == 10:
         return Date(CleanText('./td[1]/font//text()'),
                     dayfirst=True)(self)
     elif len(date) == 5:
         # Date has no indicated year.
         return DateGuesser(CleanText('./td[1]//text()'),
                            Env('date_guesser'))(self)
Example #23
0
 class item(ItemElement):
     klass = Document
     obj_date = Date(Dict('dateDocument'))
     obj_format = "pdf"
     obj_label = Format("%s : %s", Dict('libelle1'), Dict('libelle3'))
     obj_type = CleanText(Dict('libelleIcone'),
                          replace=[('Icône ', '')])
     obj_id = Regexp(Dict('libelle2'), r"(\S+)\.", nth=0)
     obj_url = Format("/prive/telechargerdocumentremuneration/v1?documentUuid=%s",
                      Dict('documentUuid'))
Example #24
0
 def obj_date(self):
     date = Date(Attr('.//time', 'datetime'))(self)
     if Env('is_card', default=False)(self):
         if self.page.browser.deferred_card_calendar is None:
             self.page.browser.location(
                 Link('//a[contains(text(), "calendrier")]')(self))
         closest = self.page.browser.get_debit_date(date)
         if closest:
             return closest
     return date
Example #25
0
        def obj_details(self):
            details = {}
            energy_value = CleanText(
                '//div[has-class("offer-energy-greenhouseeffect-summary")]//div[has-class("energy-summary")]',
                default=None
            )(self)
            if energy_value and len(energy_value) > 1:
                energy_value = energy_value.replace("DPE", "").strip()[0]
                if energy_value not in ["A", "B", "C", "D", "E", "F", "G"]:
                    energy_value = None
            if energy_value is None:
                energy_value = NotAvailable
            details["DPE"] = energy_value

            greenhouse_value = CleanText(
                '//div[has-class("offer-energy-greenhouseeffect-summary")]//div[has-class("greenhouse-summary")]',
                default=None
            )(self)
            if greenhouse_value and len(greenhouse_value) > 1:
                greenhouse_value = greenhouse_value.replace("GES", "").strip()[0]
                if greenhouse_value not in ["A", "B", "C", "D", "E", "F", "G"]:
                    greenhouse_value = None
            if greenhouse_value is None:
                greenhouse_value = NotAvailable
            details["GES"] = greenhouse_value

            details["creationDate"] = Date(
                Regexp(
                    CleanText(
                        '//p[@class="offer-description-notes"]|//p[has-class("darkergrey")]'
                    ),
                    u'.*Mis en ligne : (\d{2}/\d{2}/\d{4}).*'
                ),
                dayfirst=True
            )(self)

            honoraires = CleanText(
                (
                    '//div[has-class("offer-price")]/span[has-class("lbl-agencyfees")]'
                ),
                default=None
            )(self)
            if honoraires:
                details["Honoraires"] = (
                    "{} (TTC, en sus)".format(
                        honoraires.split(":")[1].strip()
                    )
                )

            for li in XPath('//ul[@itemprop="description"]/li')(self):
                label = CleanText('./div[has-class("criteria-label")]')(li)
                value = CleanText('./div[has-class("criteria-value")]')(li)
                details[label] = value

            return details
Example #26
0
    class get_torrent(ItemElement):
        klass = Torrent

        obj_id = Regexp(CleanText('//div[@class="trackers"]/h2'),  r'hash ([0-9a-f]+)', '\\1')
        obj_name = CleanText('//div[@class="downlinks"]/h2/span')
        obj_date = CleanText('//div[@class="downlinks"]/div/span/@title') & Date(default=None)
        obj_size = CleanText('//div[@class="files"]/div/@title', replace=[(',', ''), ('b', '')]) & \
            Type(type=float)

        def obj_seeders(self):
            try:
                return max([int(_.text.replace(',', ''))
                            for _ in self.xpath('//div[@class="trackers"]/dl/dd/span[@class="u"]')])
            except ValueError:
                return NotAvailable

        def obj_leechers(self):
            try:
                return max([int(_.text.replace(',', ''))
                            for _ in self.xpath('//div[@class="trackers"]/dl/dd/span[@class="d"]')])
            except ValueError:
                return NotAvailable

        def obj_url(self):
            return self.page.browser.BASEURL + \
                Regexp(CleanText('//div[@class="trackers"]/h2'), r'hash ([0-9a-f]+)', '\\1')(self)

        def obj_files(self):
            def traverse_nested_lists(ul, result, depth=0):
                for li in ul.xpath('./li'):
                    sub_uls = li.xpath('./ul')
                    if sub_uls:
                        result.append(("| " * depth) + ("%s" % li.text))
                        for sub_ul in sub_uls:
                            traverse_nested_lists(sub_ul, result, depth+1)
                    else:
                        try:
                            size = li.xpath('span')[0].text
                        except:
                            size = ""
                        result.append(("| " * depth) + ("%s [%s]" % (li.text, size)))

            result = []
            traverse_nested_lists(self.xpath('//div[@class="files"]/ul')[0], result)
            return result

        def obj_magnet(self):
            hsh = Regexp(CleanText('//div[@class="trackers"]/h2'),  r'hash ([0-9a-f]+)', '\\1')(self)
            name = "dn=%s" % quote_plus(CleanText('//div[@class="downlinks"]/h2/span')(self))
            trackers = ["tr=%s" % _.text for _ in self.xpath('//div[@class="trackers"]/dl/dt')]
            return "&".join(["magnet:?xt=urn:btih:%s" % hsh, name] + trackers)

        def obj_description(self):
            return u"Torrent files available at:\n" + \
                   u"\n\n".join(self.xpath('//div[@class="downlinks"]/dl/dt/a/@href'))
Example #27
0
        def obj_details(self):
            details = {}

            dispo = Date(
                Regexp(CleanText('//p[has-class("OfferTop-dispo")]'),
                       r'.* (\d\d\/\d\d\/\d\d\d\d)',
                       default=datetime.date.today().isoformat()))(self)
            if dispo is not None:
                details["dispo"] = dispo

            priceMentions = CleanText('//p[has-class("OfferTop-mentions")]',
                                      default=None)(self)
            if priceMentions is not None:
                details["priceMentions"] = priceMentions

            agency = CleanText('//p[has-class("OfferContact-address")]',
                               default=None)(self)
            if agency is not None:
                details["agency"] = agency

            for item in self.xpath(
                    '//div[has-class("OfferDetails-columnize")]/div'):
                category = CleanText(
                    './h3[has-class("OfferDetails-title--2")]',
                    default=None)(item)
                if not category:
                    continue

                details[category] = {}

                for detail_item in item.xpath(
                        './/ul[has-class("List--data")]/li'):
                    detail_title = CleanText(
                        './/span[has-class("List-data")]')(detail_item)
                    detail_value = CleanText('.//*[has-class("List-value")]')(
                        detail_item)
                    details[category][detail_title] = detail_value

                for detail_item in item.xpath(
                        './/ul[has-class("List--bullet")]/li'):
                    detail_title = CleanText('.')(detail_item)
                    details[category][detail_title] = True

            try:
                electric_consumption = CleanDecimal(
                    Regexp(
                        Attr('//div[has-class("OfferDetails-content")]//img',
                             'src'),
                        r'https://dpe.foncia.net\/(\d+)\/.*'))(self)
                details["electric_consumption"] = (
                    '{} kWhEP/m².an'.format(electric_consumption))
            except (RegexpError, XPathNotFound):
                pass

            return details
Example #28
0
        class item(ItemElement):
            klass = FrenchTransaction

            obj_date = Date(CleanText('./td[1]'))
            obj_rdate = Date(CleanText('./td[1]'))
            obj_raw = CleanText('./td[2]')
            obj_amount = CleanDecimal('./td[4]',
                                      replace_dots=True,
                                      default=Decimal('0'))
            obj_original_currency = FrenchTransaction.Currency('./td[4]')
            obj_type = Transaction.TYPE_BANK
            obj__is_coming = False

            def obj_commission(self):
                gross_amount = CleanDecimal('./td[3]',
                                            replace_dots=True,
                                            default=NotAvailable)(self)
                if gross_amount:
                    return gross_amount - Field('amount')(self)
                return NotAvailable
Example #29
0
    def iter_history(self):
        for tr in self.doc.xpath(
                u'//table[@class="boursedetail"]/tbody/tr[td]'):
            t = Transaction()

            t.label = CleanText('.')(tr.xpath('./td[2]')[0])
            t.date = Date(dayfirst=True).filter(
                CleanText('.')(tr.xpath('./td[1]')[0]))
            t.amount = self.parse_decimal(tr.xpath('./td[3]')[0])

            yield t
Example #30
0
    def get_loan_attributes(self, account):
        loan = Loan()
        loan.total_amount = CleanDecimal('//div/span[contains(text(), "Capital initial")]/following-sibling::*[1]', replace_dots=True)(self.doc)
        owner_name = CleanText('//a[@class="lien-entete login"]/span')(self.doc)
        loan.name = ' '.join(owner_name.split()[1:])
        loan.subscription_date = Date(Regexp(CleanText('//h4[span[contains(text(), "Date de départ du prêt")]]'), r'(\d{2}\/\d{2}\/\d{4})'), dayfirst=True)(self.doc)
        loan.maturity_date = Date(Regexp(CleanText('//h4[span[contains(text(), "Date de fin du prêt")]]'), r'(\d{2}\/\d{2}\/\d{4})'), dayfirst=True)(self.doc)
        loan.rate = Eval(lambda x: x / 100, CleanDecimal('//div/span[contains(text(), "Taux fixe")]/following-sibling::*[1]', replace_dots=True))(self.doc)
        loan.last_payment_amount = CleanDecimal('//div[@class="txt-detail  " and not (@style)]//span[contains(text(), "Echéance du")]/following-sibling::span[1]')(self.doc)
        loan.last_payment_date = Date(Regexp(CleanText('//div[@class="txt-detail  " and not (@style)]//span[contains(text(), "Echéance du")]'), r'(\d{2}\/\d{2}\/\d{4})'), dayfirst=True)(self.doc)

        loan.id = account.id
        loan.currency = account.currency
        loan.label = account.label
        loan.balance = account.balance
        loan.coming = account.coming
        loan.type = account.type
        loan._uncleaned_id = account._uncleaned_id
        loan._multiple_type = account._multiple_type
        return loan
Example #31
0
        class item(ItemElement):
            klass = Transaction

            obj_raw = Transaction.Raw(TableCell('label'))
            obj_vdate = Date(CleanText(TableCell('vdate')), dayfirst=True)

            def obj_amount(self):
                return MyDecimal('.//div/span')(TableCell('amount')(self)[0])

            def obj_date(self):
                return Date(CleanText('.//span[contains(@id, "C4__QUE_50FADFF19F566198286748")]'), dayfirst=True)(self)