Exemple #1
0
        class item(ItemElement):
            klass = Bill

            obj_id = Format('%s_%s', Env('subid'), Dict('documentNumber'))
            obj_date = Date(
                Eval(
                    lambda t: datetime.fromtimestamp(int(t) / 1000).strftime(
                        '%Y-%m-%d'), Dict('creationDate')))
            obj_format = u"pdf"
            obj_label = Format('Facture %s', Dict('documentNumber'))
            obj_type = u"bill"
            obj_price = Env('price')
            obj_currency = u"€"
            obj_vat = NotAvailable
            obj__doc_number = Dict('documentNumber')
            obj__par_number = Dict('parNumber')
            obj__num_acc = Env('numAcc')
            obj__bp = Env('bpNumber')

            def parse(self, el):
                self.env['price'] = Decimal(Dict('billAmount')(self))
                self.env['numAcc'] = str(int(Env('subid')(self)))
Exemple #2
0
        class Item(ItemElement):
            klass = BaseJobAdvert

            obj_id = CleanText(Format('%s#%s#%s',
                                      Regexp(Attr('.', 'id'), '^..(.*)'),
                                      Attr('h2/a', 'title'),
                                      CleanText('span[@class="company"]')),
                               replace=[(" ", "-"), ("/", "-")])
            obj_title = Attr('h2/a', 'title')
            obj_society_name = CleanText('span[@class="company"]')
            obj_place = CleanText('span/span[@class="location"]')
            obj_publication_date = IndeedDate(
                CleanText('table/tr/td/span[@class="date"]'))
Exemple #3
0
        class item(ItemElement):
            klass = Album

            obj_url = AbsoluteLink('./a')
            obj__thumbnail_url = Attr('./a/div[@class="art"]/img', 'src')
            obj_title = CleanText('./a/p[@class="title"]', children=False)
            obj_id = Format('album.%s.%s', Env('band'),
                            Regexp(Field('url'), r'/album/([-\w]+)'))

            def obj_author(self):
                return CleanText(
                    './a/p[@class="title"]/span[@class="artist-override"]')(
                        self) or self.page.get_artist()
Exemple #4
0
    class get_album(ItemElement):
        klass = Album

        obj_id = Format('album.%s.%s', Env('band'), Env('album'))
        obj_title = CleanText('//h2[@class="trackTitle"]')
        obj_author = CleanText('//span[@itemprop="byArtist"]')
        _date = Date(Attr('//meta[@itemprop="datePublished"]', 'content'))

        def obj_year(self):
            return self._date(self).year

        def obj_url(self):
            return self.page.url
Exemple #5
0
        class account(ItemElement):
            klass = Account

            def condition(self):
                return '/outil/UWLM/ListeMouvement' in self.el.attrib[
                    'onclick']

            NATURE2TYPE = {
                '001': Account.TYPE_SAVINGS,
                '005': Account.TYPE_CHECKING,
                '006': Account.TYPE_CHECKING,
                '007': Account.TYPE_SAVINGS,
                '012': Account.TYPE_SAVINGS,
                '023': Account.TYPE_CHECKING,
                '046': Account.TYPE_SAVINGS,
                '047': Account.TYPE_SAVINGS,
                '049': Account.TYPE_SAVINGS,
                '068': Account.TYPE_PEA,
                '069': Account.TYPE_SAVINGS,
            }

            obj__link_id = Format('%s&mode=190',
                                  Regexp(CleanText('./@onclick'), "'(.*)'"))
            obj__agence = Regexp(Field('_link_id'), r'.*agence=(\w+)')
            obj__compte = Regexp(Field('_link_id'), r'compte=(\w+)')
            obj_id = Format('%s%s', Field('_agence'), Field('_compte'))
            obj__transfer_id = Format('%s0000%s', Field('_agence'),
                                      Field('_compte'))
            obj__coming_links = []
            obj_label = CleanText('.//div[@class="libelleCompte"]')
            obj_balance = MyDecimal('.//td[has-class("right")]',
                                    replace_dots=True)
            obj_currency = FrenchTransaction.Currency(
                './/td[has-class("right")]')
            obj_type = Map(Regexp(Field('_link_id'), r'.*nature=(\w+)'),
                           NATURE2TYPE,
                           default=Account.TYPE_UNKNOWN)
            obj__market_link = None
        class item(ItemElement):
            klass = Document

            obj_id = Format('%s_%s%s', Env('sub_id'), Regexp(CleanText('.//a/@title'), r' (\d{2}) '), CleanText('.//span[contains(@class, "date")]' ,symbols='/'))
            obj_label = Format('%s - %s', CleanText('.//span[contains(@class, "lib")]'), CleanText('.//span[contains(@class, "date")]'))
            obj_url = Format('/voscomptes/canalXHTML/relevePdf/relevePdf_historique/%s', Link('./a'))
            obj_format = 'pdf'
            obj_type = DocumentTypes.OTHER

            def obj_date(self):
                date = CleanText('.//span[contains(@class, "date")]')(self)
                m = re.search(r'(\d{2}/\d{2}/\d{4})', date)
                if m:
                    return Date(CleanText('.//span[contains(@class, "date")]'), dayfirst=True)(self)
                else:
                    return Date(
                        Format(
                            '%s/%s',
                            Regexp(CleanText('.//a/@title'), r' (\d{2}) '),
                            CleanText('.//span[contains(@class, "date")]')
                        ),
                        dayfirst=True
                    )(self)
Exemple #7
0
            class item(ItemElement):
                klass = Transaction

                obj_label = Format('%s du %s', Field('_labeltype'),
                                   Field('date'))
                obj_type = Transaction.TYPE_BANK
                obj_date = Date(CleanText(
                    u'./div[@data-label="Date d\'effet"]', children=False),
                                dayfirst=True)
                obj_amount = CleanDecimal(u'./div[@data-label="Montant en €"]',
                                          replace_dots=True)
                obj__labeltype = Regexp(
                    Capitalize('./preceding::h2[@class="feature"][1]'),
                    'Historique Des\s+(\w+)')
Exemple #8
0
    def get_profile(self):
        profile = Person()

        profile.name = Format(
            '%s %s', CleanText('//div[@id="persoIdentiteDetail"]//dd[3]'),
            CleanText('//div[@id="persoIdentiteDetail"]//dd[2]'))(self.doc)
        profile.address = CleanText('//div[@id="persoAdresseDetail"]//dd')(
            self.doc)
        profile.email = CleanText('//div[@id="persoEmailDetail"]//td[2]')(
            self.doc)
        profile.job = CleanText('//div[@id="persoIdentiteDetail"]//dd[4]')(
            self.doc)

        return profile
Exemple #9
0
        class item(ItemElement):
            klass = Account

            # TableCell('service_number') alone is not enough because a person with the
            # same service_number might have multiple cards.
            # And a card number can be associated to multiple persons.
            obj_id = obj_number = Format(
                '%s_%s',
                CleanText(TableCell('service_number')),
                CleanText(TableCell('card_number')),
            )
            obj_label = CleanText(TableCell('label'))
            obj_currency = 'EUR'
            obj_type = Account.TYPE_CARD
Exemple #10
0
    class get_profile(ItemElement):
        klass = Person

        obj_name = Format('%s %s %s', MySelect('genderTitle'), MyInput('firstName'), MyInput('lastName'))
        obj_nationality = CleanText(u'//span[contains(text(), "Nationalité")]/span')
        obj_spouse_name = MyInput('spouseFirstName')
        obj_children = CleanDecimal(MyInput('dependentChildren'), default=NotAvailable)
        obj_family_situation = MySelect('maritalStatus')
        obj_matrimonial = MySelect('matrimonial')
        obj_housing_status = MySelect('housingSituation')
        obj_job = MyInput('occupation')
        obj_job_start_date = Date(MyInput('employeeSince'), default=NotAvailable)
        obj_company_name = MyInput('employer')
        obj_socioprofessional_category = MySelect('socioProfessionalCategory')
Exemple #11
0
        class item(ItemElement):
            klass = Document

            obj__refdoc = Regexp(
                Attr(".", "onclick", default=""),
                r"\('refdoc'\)\.value='([^\']+)'",
                default=None,
            )
            obj__norng = Regexp(
                Attr(".", "onclick", default=""),
                r"\('norng'\)\.value='([^\']+)'",
                default=None,
            )
            obj_id = Format("%s_%s", Env("subscription_id"), Field("_refdoc"))
Exemple #12
0
class SeLogerItem(ItemElement):
    klass = Housing

    obj_id = CleanText('idAnnonce')

    def obj_type(self):
        idType = int(CleanText('idTypeTransaction')(self))
        type = next(k for k, v in TYPES.items() if v == idType)
        if type == POSTS_TYPES.FURNISHED_RENT:
            # SeLoger does not let us discriminate between furnished and not
            # furnished.
            return POSTS_TYPES.RENT
        return type

    def obj_house_type(self):
        idType = CleanText('idTypeBien')(self)
        try:
            return next(k for k, v in RET.items() if v == idType)
        except StopIteration:
            return NotAvailable

    obj_title = Format(
        "%s %s%s - %s",
        CleanText('titre'),
        CleanText('surface'),
        CleanText('surfaceUnite'),
        CleanText('ville'),
    )
    obj_date = DateTime(CleanText('dtFraicheur'))
    obj_cost = CleanDecimal('prix')

    obj_currency = Currency('prixUnite')

    obj_area = CleanDecimal('surface', default=NotAvailable)
    obj_price_per_meter = PricePerMeterFilter()
    obj_text = CleanText('descriptif')
    obj_rooms = CleanDecimal('nbPiece|nbPieces', default=NotAvailable)
    obj_bedrooms = CleanDecimal('nbChambre|nbChambres', default=NotAvailable)

    def obj_location(self):
        location = CleanText('adresse', default="")(self)
        quartier = CleanText('quartier', default=None)(self)
        if not location and quartier is not None:
            location = quartier
        ville = CleanText('ville')(self)
        cp = CleanText('cp')(self)
        return u'%s %s (%s)' % (location, ville, cp)

    obj_station = CleanText('proximite', default=NotAvailable)
    obj_url = CleanText('permaLien')
Exemple #13
0
 def obj_date(self):
     date = CleanText('.//span[contains(@class, "date")]')(self)
     m = re.search(r'(\d{2}/\d{2}/\d{4})', date)
     if m:
         return Date(CleanText('.//span[contains(@class, "date")]'), dayfirst=True)(self)
     else:
         return Date(
             Format(
                 '%s/%s',
                 Regexp(CleanText('.//a/@title'), r' (\d{2}) '),
                 CleanText('.//span[contains(@class, "date")]')
             ),
             dayfirst=True
         )(self)
Exemple #14
0
        class item(ItemElement):
            klass = Account

            obj_id = obj_number = Format(
                '%s_%s',
                Attr('//div[span[contains(text(), "Identifiant prestation")]]/following-sibling::input', 'value'),
                Attr('//div[span[contains(text(), "Numéro de la carte")]]/following-sibling::input', 'value'),
            )
            obj_label = CleanText('//div[@class="v-slot"]/div[contains(@class, "v-label-undef-w")]')

            obj_iban = CleanText(Attr('//div[span[contains(text(), "IBAN")]]/following-sibling::input', 'value'), replace=[(' ', '')])

            obj_balance = 0
            obj_type = Account.TYPE_CARD
Exemple #15
0
        class item(ItemElement):
            klass = SensCritiquenCalendarEvent

            def condition(self):
                if '_id' in self.env and self.env['_id']:
                    return Format(u'%s#%s#%s',
                                  Regexp(Link('.'), '/film/(.*)'),
                                  FormatDate("%Y%m%d%H%M",
                                             Date('div[@class="elgr-guide-details"]/div[@class="elgr-data-diffusion"]')),
                                  CleanText('./div/span[@class="d-offset"]',
                                            replace=[(' ', '-')]))(self) == self.env['_id']
                return True

            def validate(self, obj):
                if 'date_from' in self.env and self.env['date_from'] and obj.start_date > self.env['date_from']:
                    if not self.env['date_to']:
                        return True
                    else:
                        if empty(obj.end_date):
                            if obj.start_date < self.env['date_to']:
                                return True
                        elif obj.end_date <= self.env['date_to']:
                            return True

                if '_id' in self.env:
                    return True

                return False

            obj_id = Format(u'%s#%s#%s',
                            Regexp(Link('.'), '/film/(.*)'),
                            FormatDate("%Y%m%d%H%M", Date('div/div[@class="elgr-data-diffusion"]')),
                            CleanText('./div/span[@class="d-offset"]', replace=[(' ', '-')]))
            obj_start_date = Date('div/div[@class="elgr-data-diffusion"]')
            obj_summary = Format('%s - %s',
                                 Regexp(CleanText('./div/img/@alt'), '^Affiche(.*)'),
                                 CleanText('./div/span[@class="d-offset"]'))
Exemple #16
0
    class get_video(ItemElement):
        klass = RmllVideo

        obj_id = CleanHTML('/html/head/meta[@property="og:url"]/@content'
                           ) & CleanText() & Regexp(
                               pattern=r'.*/permalink/(.+)/$')
        obj_title = Format(
            u'%s',
            CleanHTML('/html/head/meta[@name="DC.title"]/@content')
            & CleanText())
        obj_description = Format(
            u'%s',
            CleanHTML('/html/head/meta[@property="og:description"]/@content')
            & CleanText())

        def obj_thumbnail(self):
            url = NormalizeThumbnail(
                CleanText('/html/head/meta[@property="og:image"]/@content'))(
                    self)
            if url:
                thumbnail = Thumbnail(url)
                thumbnail.url = thumbnail.id
                return thumbnail

        obj_duration = CleanText('/html/head/script[not(@src)]') & Regexp(
            pattern=r'media_duration: ([^,.]+),?.*,',
            default='') & Duration(default=NotAvailable)

        def obj_url(self):
            links = XPath(
                '//div[@id="tab_sharing_content"]/div/div/div[@class="paragraph"]/div[@class="share"]/a[@target="_blank"]/@href'
            )(self)
            for link in links:
                ext = str(link).split('.')[-1]
                self.logger.debug("Link:%s Ext:%s", link, ext)
                if ext in ['mp4', 'webm']:
                    return unicode(link)
Exemple #17
0
        class item(ItemElement):
            klass = Bill

            def condition(self):
                num = Attr('.', 'data-fact_ligne', default='')(self)
                return self.env['subid'] == num

            obj_url = Attr('.//div[@class="pdf"]/a', 'href')
            obj__localid = Regexp(Field('url'), '&id=(.*)&date', u'\\1')
            obj_label = Regexp(Field('url'), '&date=(\d*)', u'\\1')
            obj_id = Format('%s.%s', Env('subid'), Field('_localid'))
            obj_date = FormatDate(Field('label'))
            obj_format = u"pdf"
            obj_type = u"bill"
            obj_price = CleanDecimal('div[@class="montant"]', default=Decimal(0), replace_dots=False)
Exemple #18
0
    class get_unique_card(ItemElement):
        item_xpath = '//table[@class="ca-table"][@summary]'

        klass = Account

        # Transform 'n° 4999 78xx xxxx xx72' into '499978xxxxxxxx72'
        obj_number = CleanText(
            '//table[@class="ca-table"][@summary]//tr[@class="ligne-impaire"]/td[@class="cel-texte"][1]',
            replace=[(' ', ''), ('n°', '')])

        # Card ID is formatted as '499978xxxxxxxx72MrFirstnameLastname-'
        obj_id = Format(
            '%s%s', Field('number'),
            CleanText(
                '//table[@class="ca-table"][@summary]//caption[@class="caption"]//b',
                replace=[(' ', '')]))

        # Card label is formatted as 'Carte VISA Premier - Mr M Lastname'
        obj_label = Format(
            '%s - %s',
            CleanText(
                '//table[@class="ca-table"][@summary]//tr[@class="ligne-impaire ligne-bleu"]/th[@id="compte-1"]'
            ),
            CleanText(
                '//table[@class="ca-table"][@summary]//caption[@class="caption"]//b'
            ))

        obj_balance = CleanDecimal(0)
        obj_coming = CleanDecimal.French(
            '//table[@class="ca-table"][@summary]//tr[@class="ligne-paire"]//td[@class="cel-num"]',
            default=0)
        obj_currency = Currency(
            Regexp(CleanText('//th[contains(text(), "Montant en")]'),
                   r'^Montant en (.*)'))
        obj_type = Account.TYPE_CARD
        obj__form = None
Exemple #19
0
        class item(ItemElement):
            klass = Detail

            def condition(self):
                txt = self.el.xpath('td[1]')[0].text
                return (txt is not None) and (txt != "Date")

            obj_id = None
            obj_datetime = DateTime(CleanText('td[1]', symbols=u'à'),
                                    dayfirst=True)
            obj_label = Format(u'%s %s %s', CleanText('td[2]'),
                               CleanText('td[3]'), CleanText('td[4]'))
            obj_price = CleanDecimal('td[5]',
                                     default=Decimal(0),
                                     replace_dots=True)
Exemple #20
0
        class item(ItemElement):
            klass = Message

            obj_id = Format(u'%s#%s', CleanText(Dict('origin/streamId')),
                            CleanText(Dict('id')))
            obj_sender = CleanText(Dict('author', default=u''))
            obj_title = Format(u'%s - %s',
                               CleanText(Dict('origin/title', default=u'')),
                               CleanText(Dict('title')))

            def obj_date(self):
                return datetime.fromtimestamp(Dict('published')(self.el) / 1e3)

            def obj_content(self):
                if 'content' in self.el.keys():
                    return Format(u'%s%s\r\n',
                                  CleanHTML(Dict('content/content')),
                                  CleanText(Dict('origin/htmlUrl')))(self.el)
                elif 'summary' in self.el.keys():
                    return Format(u'%s%s\r\n',
                                  CleanHTML(Dict('summary/content')),
                                  CleanText(Dict('origin/htmlUrl')))(self.el)
                else:
                    return ''
Exemple #21
0
        class item(ItemElement):
            klass = Bill

            obj_id = Format('%s_%s', Env('subid'), CleanText('./td[3]'))
            obj_url = Attr('./td[@class="center" or @class="center pdf"]/a', 'href')
            obj_date = Env('date')
            obj_format = u"pdf"
            obj_type = u"bill"
            obj_price = CleanDecimal('./td[@class="center montant"]/span', replace_dots=True)

            def parse(self, el):
                self.env['date'] = parse_french_date(el.xpath('./td[2]')[0].text).date()

            def condition(self):
                return CleanText().filter(self.el.xpath('.//td')[-1]) != "" and len(self.el.xpath('./td[@class="center" or @class="center pdf"]/a/@href')) == 1
Exemple #22
0
        class item(ItemElement):
            klass = Subscription

            obj_id = CleanText(Dict('num_ligne'))
            obj__type = CleanText(Dict('type'))
            obj_label = Env('label')
            obj_subscriber = Format("%s %s %s", CleanText(Dict('civilite')),
                                    CleanText(Dict('prenom')), CleanText(Dict('nom')))
            obj__contract = Env('contract')

            def parse(self, el):
                # add spaces
                number = iter(self.obj_id(el))
                self.env['label'] = ' '.join(a+b for a, b in zip(number, number))
                self.env['contract'] = re.search('\\"user_id\\":\\"([0-9]+)\\"', self.page.get('data.tag')).group(1)
Exemple #23
0
            class item(ItemElement):
                klass = Account

                obj__prestation_number = Dict('numeroPrestation')

                obj_id = Format(
                    '%s_TITRE',
                    CleanText(Field('_prestation_number'),
                              replace=[(' ', '')]))
                obj_number = CleanText(Field('_prestation_number'),
                                       replace=[(' ', '')])
                obj_label = Dict('intitule')
                obj_balance = CleanDecimal.French(Dict('evaluation'))
                obj_currency = CurrencyFilter(Dict('evaluation'))
                obj_type = Account.TYPE_MARKET
Exemple #24
0
    class get_current(ItemElement):
        klass = Current

        obj_date = DateTime(Dict('vt1currentdatetime/dateTime'))
        obj_id = Env('city_id')
        obj_text = Format('%shPa (%s) - humidity %s%% - feels like %s°C - %s',
                          Dict('vt1observation/altimeter'),
                          Dict('vt1observation/barometerTrend'),
                          Dict('vt1observation/humidity'),
                          Dict('vt1observation/feelsLike'),
                          Dict('vt1observation/phrase'))

        def obj_temp(self):
            temp = Dict('vt1observation/temperature')(self)
            return Temperature(float(temp), 'C')
Exemple #25
0
        class item(ItemElement):
            def condition(self):
                return Dict('accountNumber', default=None)(self)

            klass = Recipient

            obj_id = Dict('accountNumber')
            obj_label = CleanText(
                Format('%s %s', Dict('accountHolderLongDesignation'),
                       Dict('accountNatureShortLabel', default='')))
            obj_iban = Dict('ibanCode')
            obj_category = 'Interne'
            obj_enabled_at = date.today()
            obj__is_recipient = Dict('recipientOfTransfert', default=False)
            obj__owner_name = CleanText(Dict('accountHolderLongDesignation'))
Exemple #26
0
        class item(ItemElement):
            klass = Bill

            obj__ref = CleanText('//input[@id="noref"]/@value')
            obj_id = Format('%s_%s', Env('subid'), CleanText('./@facture-id'))
            obj_url = Format(
                'http://www.bouyguestelecom.fr/parcours/facture/download/index?id=%s&no_reference=%s',
                CleanText('./@facture-id'), CleanText('./@facture-ligne'))
            obj_date = Env('date')
            obj_format = u"pdf"
            obj_label = CleanText('./text()')
            obj_type = u"bill"
            obj_price = CleanDecimal(
                CleanText('./span', replace=[(u' € ', '.')]))
            obj_currency = u"€"

            def parse(self, el):
                self.env['date'] = parse_french_date(
                    '01 %s' % CleanText('./text()')(self)).date()

            def condition(self):
                # XXX ugly fix to avoid duplicate bills
                return CleanText('./@facture-id')(self.el) != CleanText(
                    './following-sibling::div[1]/@facture-id')(self.el)
Exemple #27
0
        class item(ItemElement):
            klass = Bill

            obj_date = Date(Dict('date'), default=NotAvailable)
            obj_price = Eval(lambda x: x / 100, CleanDecimal(Dict('amount')))
            obj_format = 'pdf'

            def obj_label(self):
                return 'Facture du %s' % Field('date')(self)

            def obj_id(self):
                return '%s_%s' % (Env('subid')(self), Field('date')(self).strftime('%d%m%Y'))

            obj_url = Format('%s%s', BrowserURL('doc_api_par'), Dict('hrefPdf'))
            obj__is_v2 = True
Exemple #28
0
        def obj_details(self):
            details = dict()
            for item in XPath('//div[@class="footer-descriptif"]/ul/li')(self):
                key = CleanText('./span[@class="label"]')(item)
                value = CleanText('.', replace=[(key, '')])(item)
                if value and key:
                    details[key] = value

            key = CleanText('//div[@class="classe-energie-content"]/div/div/span')(self)
            value = Format('%s(%s)', CleanText('//div[@class="classe-energie-content"]/div/div/p'),
                           CleanText('//div[@class="classe-energie-content"]/div/@class',
                                     replace=[('-', ' ')]))(self)
            if value and key:
                details[key] = value
            return details
Exemple #29
0
    class get_job_advert(ItemElement):
        klass = BaseJobAdvert

        obj_id = Format('d#%s', Env('_id'))
        obj_url = BrowserURL('advert2', _id=Env('_id'))
        obj_title = CleanText('//h3')
        obj_description = CleanHTML(
            '//div[@id="jobBodyContent"]|//div[@itemprop="description"]')
        obj_contract_type = CleanHTML('//div[@class="jobview-section"]')
        obj_society_name = Regexp(CleanText('//h4[@class="company"]'),
                                  '.* : (.*) - .*')
        obj_place = Regexp(CleanText('//h4[@class="company"]'), '.* - (.*)')
        obj_publication_date = Date(Regexp(
            CleanText('//span[@class="postedDate"]'), '.* : (.*)'),
                                    dayfirst=True)
Exemple #30
0
        class item(ItemElement):
            klass = Subscription

            obj_id = CleanText(Dict('num_ligne'))
            obj__type = CleanText(Dict('type'))
            obj_label = Env('label')
            obj_subscriber = Format("%s %s %s", CleanText(Dict('civilite')),
                                    CleanText(Dict('prenom')),
                                    CleanText(Dict('nom')))

            def parse(self, el):
                # add spaces
                number = iter(self.obj_id(el))
                self.env['label'] = ' '.join(a + b
                                             for a, b in zip(number, number))
Exemple #31
0
 def obj_label(self):
     label = Format('%s', CleanText('./td[2]'))(self)
     label = label.replace(" o ", " ")
     return label