Beispiel #1
0
        class item(ItemElement):
            klass = Account

            def condition(self):
                if 'Votre carte est annulée' in CleanText(
                        './/span[@id="cardSORStatus"]')(self):
                    self.logger.warning('skipping cancelled card %r',
                                        self.obj_id(self))
                    return False
                return True

            obj_id = CleanText(
                './/td[@class="cardArtColWidth"]/div[@class="summaryTitles"]')
            obj_label = CleanText('.//span[@class="cardTitle"]')
            obj_type = Account.TYPE_CARD

            obj_currency = CleanCurrency(
                './/td[@id="colOSBalance"]/div[@class="summaryValues makeBold"]'
            )

            def obj_balance(self):
                return -abs(
                    parse_decimal(
                        CleanText(
                            './/td[@id="colOSBalance"]/div[@class="summaryValues makeBold"]'
                        )(self)))

            obj_url = AbsoluteLink(
                './/a[text()="View Latest Transactions"]',
                default=AbsoluteLink(
                    './/a[span[text()="Online Statement"] or text()="Détail de vos opérations"]'
                ))
Beispiel #2
0
    class iter_torrents(ListElement):
        next_page = AbsoluteLink('//div[has-class("pagination")]/a[last()]')
        item_xpath = '//div[has-class("list_tor")]'

        class item(ItemElement):
            klass = Torrent
            obj_id = Regexp(
                CleanText('.//a[has-class("list_tor_title")]/@href'),
                '/(.*)\.torrent\.html$', '\\1')
            obj_name = CleanText('.//a[has-class("list_tor_title")]')
            obj_seeders = CleanDecimal('.//b[has-class("green")]/text()',
                                       default=0)
            obj_leechers = CleanDecimal('.//b[has-class("red")]/text()',
                                        default=0)
            obj_filename = Format('%s.torrent', obj_name)
            obj_url = AbsoluteLink('.//a[@title="Download torrent"]')

            def obj_size(self):
                rawsize = Regexp(
                    CleanText(
                        './/div[has-class("list_tor_right")]/p[1]/span[1]'),
                    'Size: (.*)$', '\\1')(self)
                nsize = float(re.sub(r'[A-Za-z]', '', rawsize))
                usize = re.sub(r'[.0-9 ]', '', rawsize).upper()
                size = get_bytes_size(nsize, usize)
                return size
Beispiel #3
0
 def obj_url(self):
     # Accounts without an <a> in the <td> have no link
     if self.el.xpath('./td[1]/a'):
         return CleanText(AbsoluteLink('./td[1]/a'),
                          default=None,
                          replace=[('\n', '')])(self)
     return None
Beispiel #4
0
        class item(ItemElement):
            klass = Bill

            obj_id = Format('facture-%s-%s-%s#%s',
                            Slugify(CleanText(TableCell('date'))),
                            Slugify(CleanText(TableCell('amount'))),
                            Slugify(CleanText(TableCell('type'))),
                            Env('sub_id'))
            obj_url = AbsoluteLink('./td[5]//a', default=NotAvailable)
            obj_date = Date(CleanText(TableCell('date')), dayfirst=True)
            obj_label = Format('%s %s %s', CleanText(TableCell('type')),
                               CleanText(TableCell('amount')),
                               CleanText(TableCell('date')))
            obj_type = DocumentTypes.BILL
            obj_price = CleanDecimal(TableCell('amount'), replace_dots=True)
            obj_currency = Currency(TableCell('amount'))
            obj_duedate = Date(Regexp(CleanText(TableCell('status')),
                                      r'le (\d+)/(\d+)/(\d+)', r'\1/\2/\3'),
                               dayfirst=True)

            def obj_format(self):
                if self.obj_url(self):
                    return 'pdf'
                return NotAvailable

            def obj_income(self):
                if self.obj_price(self) < 0:
                    return True
                return False
Beispiel #5
0
    class get_torrent(ItemElement):
        klass = Torrent
        obj_name = CleanText('.//div[@id="middle_content"]/h1')
        obj_description = CleanText('//div[@id="descriptionContent"]',
                                    default=NotAvailable)
        obj_id = Regexp(
            CleanText(
                '//div[@id="middle_content"]/a[@title="Download torrent"]/@href'
            ), '/(.*)\.torrent', '\\1')
        obj_url = AbsoluteLink(
            '//div[@id="middle_content"]/a[@title="Download torrent"]')
        obj_filename = Format('%s.torrent', obj_name)

        def obj_size(self):
            rawsize = CleanText('//div[has-class("files")]/../h5')(self)
            s = rawsize.split(',')[-1].replace(')', '')
            nsize = float(re.sub(r'[A-Za-z]', '', s))
            usize = re.sub(r'[.0-9 ]', '', s).upper()
            size = get_bytes_size(nsize, usize)
            return size

        def obj_files(self):
            res = []
            for f in self.xpath(
                    '//div[has-class("files")]//div[not(has-class("wrapper"))]'
            ):
                res.append(CleanText(f)(self))
            return res

        obj_seeders = CleanDecimal('//div[has-class("sl_block")]/b[1]',
                                   default=0)
        obj_leechers = CleanDecimal('//div[has-class("sl_block")]/b[2]',
                                    default=0)
        obj_magnet = CleanText('.//a[has-class("magnet")]/@href')
Beispiel #6
0
    class iter_torrents(ListElement):
        next_page = AbsoluteLink('//a[@rel="next"]')
        item_xpath = '//table[has-class("table")]/tbody/tr'

        class item(ItemElement):
            klass = Torrent
            obj_id = CleanText('.//a[@id="get_nfo"]/@target')
            obj_name = CleanText('.//td[2]//text()')
            obj_seeders = CleanDecimal('./td[last()-1]/text()', default=0)
            obj_leechers = CleanDecimal('./td[last()]/text()', default=0)
            obj_description = NotLoaded
            obj_files = NotLoaded
            obj_filename = Format('%s.torrent', obj_name)
            obj_magnet = NotAvailable

            def obj_url(self):
                return '%sengine/download_torrent?id=%s' % (
                    self.page.browser.BASEURL, self.obj_id)

            def obj_size(self):
                rawsize = CleanText('./td[last()-3]')(self)
                nsize = float(re.sub(r'[A-Za-z]', '', rawsize))
                usize = re.sub(r'[.0-9]', '',
                               rawsize).strip().replace('o', 'B').upper()
                size = get_bytes_size(nsize, usize)
                return size
Beispiel #7
0
    class iter_housings(ListElement):
        item_xpath = './/li[@data-tranid="1"]'

        next_page = AbsoluteLink('./ul[has-class("pagination")]/li/a[has-class("next")]')

        class item(AvendreAlouerItem):
            obj_phone = CleanText(Attr('.', 'data-infos'))
Beispiel #8
0
 class item(ItemElement):
     klass = Subtitle
     obj_id = Regexp(Attr('.//td[1]', 'id'), 'main(\d*)')
     obj_name = Regexp(CleanText('.//td[1]'), '(.*)Download at 25')
     obj_nb_cd = CleanDecimal('.//td[3]')
     obj_url = AbsoluteLink('.//td[5]//a')
     obj_language = Regexp(Attr('.//td[2]//a//div', 'class'),
                           'flag (.*)')
Beispiel #9
0
        class item(ItemElement):
            klass = Account

            obj_label = CleanText('./td[contains(@class, "col-1")]/a')
            obj_id = CleanText('./td[contains(@class, "col-2")]/a', replace=[(' ', '')])
            obj_balance = CleanDecimal('./td[contains(@class, "col-3")]', replace_dots=True)
            obj__detail_link = AbsoluteLink('./td[contains(@class, "col-2")]/a')
            obj_type = Account.TYPE_LIFE_INSURANCE
Beispiel #10
0
        class item(ItemElement):
            klass = Account

            def condition(self):
                return len(self.el.xpath('./td')) > 2

            class Label(Filter):
                def filter(self, text):
                    return text.lstrip(' 0123456789').title()

            class Type(Filter):
                PATTERNS = [
                    ('invest', Account.TYPE_MARKET),
                    ('ldd', Account.TYPE_SAVINGS),
                    ('livret', Account.TYPE_SAVINGS),
                    ('compte', Account.TYPE_CHECKING),
                    ('account', Account.TYPE_CHECKING),
                    ('pret', Account.TYPE_LOAN),
                    ('vie', Account.TYPE_LIFE_INSURANCE),
                    ('strategie patr.', Account.TYPE_LIFE_INSURANCE),
                    ('essentiel', Account.TYPE_LIFE_INSURANCE),
                    ('elysee', Account.TYPE_LIFE_INSURANCE),
                    ('abondance', Account.TYPE_LIFE_INSURANCE),
                    ('ely. retraite', Account.TYPE_LIFE_INSURANCE),
                    ('lae option assurance', Account.TYPE_LIFE_INSURANCE),
                    ('carte ', Account.TYPE_CARD),
                    ('plan assur. innovat.', Account.TYPE_LIFE_INSURANCE),
                ]

                def filter(self, label):
                    label = label.lower()
                    for pattern, type in self.PATTERNS:
                        if pattern in label:
                            return type
                    return Account.TYPE_UNKNOWN

            obj_label = Label(CleanText('./td[1]/a'))
            obj_coming = Env('coming')
            obj_currency = FrenchTransaction.Currency('./td[2]')

            obj_url = AbsoluteLink('./td[1]/a')

            obj_type = Type(Field('label'))
            obj_coming = NotAvailable

            @property
            def obj_balance(self):
                if self.el.xpath('./parent::*/tr/th') and self.el.xpath('./parent::*/tr/th')[0].text in [u'Credits', u'Crédits']:
                    return CleanDecimal(replace_dots=True, sign=lambda x: -1).filter(self.el.xpath('./td[3]'))
                return CleanDecimal(replace_dots=True).filter(self.el.xpath('./td[3]'))

            @property
            def obj_id(self):
                # Investment account and main account can have the same id
                # so we had account type in case of Investment to prevent conflict
                if Field('type')(self) == Account.TYPE_MARKET:
                    return CleanText(replace=[('.', ''), (' ', '')]).filter(self.el.xpath('./td[2]')) + ".INVEST"
                return CleanText(replace=[('.', ''), (' ', '')]).filter(self.el.xpath('./td[2]'))
Beispiel #11
0
        class item(ItemElement):
            klass = Document

            obj_label = 'Imprimé fiscal unique'
            obj_type = DocumentTypes.REPORT
            obj_format = 'pdf'

            obj_url = AbsoluteLink('.')
            obj_id = Regexp(Field('url'), r'fileId=(\d+)')
Beispiel #12
0
        class item(ItemElement):
            def condition(self):
                has_children = XPath('.//div[@id="spanInfosEpc"]',
                                     default=False)(self)
                if has_children:
                    return True
                return False

            klass = Housing

            obj_id = Regexp(
                CleanText('./a/@href',
                          replace=[('/annonces-immobilieres/', ''),
                                   ('/location/', '')]), '(.*).html')
            obj_type = Env('query_type')
            obj_advert_type = ADVERT_TYPES.PERSONAL

            def obj_house_type(self):
                type = Attr('./a/div/p/span[@class="item type"]/img',
                            'alt')(self)
                if type == 'Appartement':
                    return HOUSE_TYPES.APART
                elif type == 'Maison /villa':
                    return HOUSE_TYPES.HOUSE
                elif type == 'Terrain / autreinfosaccesepc':
                    return HOUSE_TYPES.LAND
                else:
                    return HOUSE_TYPES.OTHER

            def obj_title(self):
                title = CleanText('./a/div/p/span[@class="item title"]')(self)
                if title == "":
                    title = CleanText('./a/div/p/span[@class="item loc"]')(
                        self)
                return title

            obj_cost = CleanDecimal(
                CleanText('./a/div/p/span[@class="item prix"]',
                          children=False))
            obj_currency = Currency.get_currency(u'€')
            obj_text = Format(
                '%s / %s / %s / %s',
                CleanText('./a/div/p/span[@class="item type"]/img/@alt'),
                CleanText('./a/div/p/span[@id="divnbpieces"]', children=False),
                CleanText('./a/div/p/span[@id="divsurface"]', children=False),
                CleanText('./a/div/p/span[@class="item prix"]/span'))
            obj_location = CleanText(
                './a/div/p/span[@class="item loc"]/text()[position() > 1]')
            obj_area = CleanDecimal(
                './a/div/p/span[@class="item surf"]/text()[last()]')
            obj_rooms = CleanDecimal(
                './a/div/p/span[@class="item nb"]/text()[last()]',
                default=NotAvailable)
            obj_currency = Currency.get_currency(u'€')
            obj_utilities = UTILITIES.UNKNOWN
            obj_url = AbsoluteLink('./a')
Beispiel #13
0
    class iter_housings(ListElement):

        item_xpath = '//article[has-class("itemListe")]'

        next_page = AbsoluteLink(
            './div[@class="pagination-foot-bloc"]/a[@class="pageActive"][2]')

        class item(ItemElement):

            klass = Housing

            obj_id = QueryValue(
                Attr('.//div[has-class("presentationItem")]/h2/a', 'href'),
                'idter')

            obj_url = AbsoluteLink('.//h2/a')

            obj_type = POSTS_TYPES.SALE

            obj_advert_type = ADVERT_TYPES.PROFESSIONAL

            obj_house_type = HOUSE_TYPES.LAND

            obj_title = CleanText('.//div[@class="presentationItem"]/h2/a')

            obj_area = CleanDecimal(
                Regexp(CleanText('.//div[@class="presentationItem"]/h3'),
                       'surface de (\d+) m²'))

            obj_cost = CleanDecimal(
                CleanText('.//div[@class="presentationItem"]/h3/span[1]',
                          replace=[(".", ""), (" €", "")]))

            obj_currency = Currency.get_currency(u'€')

            obj_date = Date(
                CleanText(
                    './/div[@class="presentationItem"]//span[@class="majItem"]',
                    replace=[("Mise à jour : ", "")]))

            obj_text = CleanText('.//div[@class="presentationItem"]/p')

            obj_phone = CleanText(
                './/div[@class="divBoutonContact"]/div[@class="phone-numbers-bloc"]/p[1]/strong'
            )

            def obj_photos(self):
                for photo in self.xpath(
                        './/div[has-class("photoItemListe")]/img/@data-src'):
                    if photo:
                        photo_url = BASE_URL + '/' + photo
                        return [HousingPhoto(photo_url)]
                else:
                    return []

            obj_utilities = UTILITIES.UNKNOWN
Beispiel #14
0
            class item(ItemElement):
                klass = Album

                obj_title = CleanText('.//div[@class="heading"]/a')
                obj_url = Regexp(AbsoluteLink('.//div[@class="heading"]/a'),
                                 r'^([^?]+)\?')
                obj_id = Regexp(Field('url'),
                                r'://([-\w]+)\.bandcamp.com/album/([-\w]+)',
                                r'album.\1.\2',
                                default=None)
Beispiel #15
0
        class item(ItemElement):
            klass = BaseAudio

            obj_title = CleanText('./td[@class="title-col"]//a')
            obj_ext = 'mp3'
            obj_format = 'mp3'
            obj_bitrate = 128
            obj__page_url = AbsoluteLink('./td[@class="title-col"]//a')
            obj_id = Format('audio.%s.%s', Env('band'),
                            Regexp(Field('_page_url'), r'/track/([-\w]+)'))
Beispiel #16
0
        class item(ItemElement):
            klass = Subtitle

            obj_name = CleanText('.//td/a[@alt="Subtitles\' page"]')
            obj_nb_cd = Type(CleanText(TableCell('cd')), type=int)
            obj_language = CleanText(TableCell('language'))
            obj_url = AbsoluteLink(
                './/td/div[has-class("pull-left")]/a[@alt="Download subtitles."]'
            )
            obj_id = Regexp(Field('url'), r'/(-*\w*)/download$', r'\1')
Beispiel #17
0
            class item(ItemElement):
                klass = BaseAudio

                obj_title = CleanText('.//div[@class="heading"]/a')
                obj__page_url = Regexp(
                    AbsoluteLink('.//div[@class="heading"]/a'), r'^([^?]+)\?')
                obj_id = Regexp(Field('_page_url'),
                                r'://([-\w]+)\.bandcamp.com/track/([-\w]+)',
                                r'audio.\1.\2',
                                default=None)
Beispiel #18
0
        class item(ItemElement):
            klass = Document

            obj_date = Date(CleanText(TableCell('date')), dayfirst=True)
            obj_format = 'pdf'
            obj_label = CleanText(TableCell('type'))
            obj_url = AbsoluteLink('./td[3]//a', default=NotAvailable)
            obj_id = Format('doc-%s-%s#%s',
                            Slugify(CleanText(TableCell('date'))),
                            Slugify(CleanText(TableCell('type'))),
                            Env('sub_id'))
Beispiel #19
0
        class item(ItemElement):
            klass = Place

            obj_name = CleanText('.//a[has-class("denomination-links")]')
            obj_address = CleanText('.//a[has-class("adresse")]')
            obj_phone = Regexp(
                CleanText(
                    './/div[has-class("tel-zone")][span[contains(text(),"Tél")]]//strong[@class="num"]',
                    replace=[(' ', '')]), r'^0(\d{9})$', r'+33\1')
            obj_url = AbsoluteLink('.//a[has-class("denomination-links")]')
            obj_opening = HasElement('.//span[text()="Horaires"]', NotLoaded,
                                     NotAvailable)
Beispiel #20
0
        class item(ItemElement):
            klass = Account

            obj_id = CleanText('./td[2]')
            obj_label = CleanText('./td[1]')
            obj_type = Account.TYPE_CARD
            obj__rib = Env('rib')
            obj_currency = u'EUR'
            obj_number = CleanText('./td[2]', replace=[(' ', '')])
            obj_url = AbsoluteLink('./td[2]/a')

            obj__completeid = Format('%s:%s', obj_id, obj_label)
Beispiel #21
0
        class item(ItemElement):
            klass = Album

            obj_url = AbsoluteLink('./a')
            obj__thumbnail_url = Attr('./a/div[@class="art"]/img', 'src')
            obj_title = CleanText('./a/p[@class="title"]', children=False)
            obj_id = Format('album.%s.%s', Env('band'),
                            Regexp(Field('url'), r'/album/([-\w]+)'))

            def obj_author(self):
                return CleanText(
                    './a/p[@class="title"]/span[@class="artist-override"]')(
                        self) or self.page.get_artist()
Beispiel #22
0
        class item(ItemElement):
            klass = BaseVideo

            obj_nsfw = True
            obj_ext = 'mp4'

            obj_title = CleanText('./a/u')
            obj_duration = Duration(CleanText('./a/b'))
            obj__page = AbsoluteLink('./a')
            obj_id = Regexp(obj__page, r'/videos/(.+)')

            def obj_thumbnail(self):
                return Thumbnail(Attr('.//img[@class="thumb"]', 'src')(self))
Beispiel #23
0
        class item(ItemElement):
            klass = BaseVideo

            obj_nsfw = True
            obj_ext = 'mp4'

            obj_title = CleanText('.//a[@class="video-thumb-info__name"]')
            obj_duration = Duration(CleanText('.//div[@class="thumb-image-container__duration"]'))
            obj__page = AbsoluteLink('./a')
            obj_id = Regexp(obj__page, r'/videos/(.+)')

            def obj_thumbnail(self):
                return Thumbnail(Attr('.//img[@class="thumb-image-container__image"]', 'src')(self))
Beispiel #24
0
            class item(ItemElement):
                klass = Collection

                obj_title = CleanText('.//div[@class="heading"]/a')
                obj_url = Regexp(AbsoluteLink('.//div[@class="heading"]/a'),
                                 r'^([^?]+)\?')
                obj_id = Regexp(Field('url'),
                                r'://([-\w]+)\.bandcamp.com',
                                r'artist.\1',
                                default=None)

                def obj_split_path(self):
                    url = self.obj_url(self)
                    return [re.search(r'https://([^.]+)\.', url).group(1)]
Beispiel #25
0
    class iter_events(ListElement):
        item_xpath = '//table[@id="preliste"]/tr'
        next_page = AbsoluteLink(
            '(//a[text()=">"][contains(@href,"LISTEPEpg")])[1]')

        class item(ItemElement):
            klass = BREvent

            obj_summary = CleanText('.//h4')
            obj_url = AbsoluteLink('.//h4/a')
            obj_description = CleanText('.//div[@class="libellepreliste"]')
            obj_city = CleanText('(.//span[@class="lieu"]/a)[2]')
            obj_location = CleanText('(.//span[@class="lieu"]/a)[1]')
            obj_timezone = 'Europe/Paris'

            def obj_price(self):
                return float(
                    CleanText('.//span[@class="prixli"]')(self).replace(
                        '€', '.'))

            def obj__date_hours(self):
                date = Env('date')(self)
                weekday = date.weekday()

                txt = CleanText('.//p[@class="sb"]')(self).lower()
                m = re.match(r'du \d+/\d+/\d+ au \d+/\d+/\d+ (.*)', txt)
                if m:
                    txt = m.group(1)
                    p = TimeParser(txt, weekday)
                    p.do_parse()
                    return p.res

                m = re.match('le \w+ \d+ \w+ \d+ à (\d+)h(\d*)$', txt,
                             re.UNICODE)
                return [(int(m.group(1)), int(m.group(2) or 0))]

            obj_start_date = Env('date')

            def obj_category(self):
                text = CleanText(
                    './/h4/following-sibling::span[@class="small"]/a')(self)
                for k in LABEL_TO_CAT:
                    if k in text:
                        return LABEL_TO_CAT[k]
                return CATEGORIES.AUTRE

            def obj_siteid(self):
                return self.page.browser.event.match(
                    Field('url')(self)).group('id')
Beispiel #26
0
        class item(ItemElement):
            def condition(self):
                return (self.el.xpath('./td/img/@alt') or 'x')[0] == '[SND]'

            klass = BaseAudio

            obj_url = AbsoluteLink('./td/a')

            filename = Decode(Regexp(Field('url'), '/([^/]+)$'))
            obj_title = Regexp(filename, r'(.*)\.[^.]+$')
            obj_ext = Regexp(filename, r'\.([^.]+)$')
            obj_format = obj_ext

            def obj_id(self):
                return 'audio.%s' % '/'.join(self.page.get_split_path() + [self.filename(self)])
Beispiel #27
0
    class iter_content(ListElement):
        next_page = AbsoluteLink('//a[has-class("next")]')

        class iter_albums(ListElement):
            item_xpath = '//ul[@class="result-items"]/li[.//div[@class="itemtype"][normalize-space(text())="ALBUM"]]'

            class item(ItemElement):
                klass = Album

                obj_title = CleanText('.//div[@class="heading"]/a')
                obj_url = Regexp(AbsoluteLink('.//div[@class="heading"]/a'),
                                 r'^([^?]+)\?')
                obj_id = Regexp(Field('url'),
                                r'://([-\w]+)\.bandcamp.com/album/([-\w]+)',
                                r'album.\1.\2',
                                default=None)

        class iter_tracks(ListElement):
            item_xpath = '//ul[@class="result-items"]/li[.//div[@class="itemtype"][normalize-space(text())="TRACK"]]'

            class item(ItemElement):
                klass = BaseAudio

                obj_title = CleanText('.//div[@class="heading"]/a')
                obj__page_url = Regexp(
                    AbsoluteLink('.//div[@class="heading"]/a'), r'^([^?]+)\?')
                obj_id = Regexp(Field('_page_url'),
                                r'://([-\w]+)\.bandcamp.com/track/([-\w]+)',
                                r'audio.\1.\2',
                                default=None)

        class iter_artists(ListElement):
            item_xpath = '//ul[@class="result-items"]/li[.//div[@class="itemtype"][normalize-space(text())="ARTIST"]]'

            class item(ItemElement):
                klass = Collection

                obj_title = CleanText('.//div[@class="heading"]/a')
                obj_url = Regexp(AbsoluteLink('.//div[@class="heading"]/a'),
                                 r'^([^?]+)\?')
                obj_id = Regexp(Field('url'),
                                r'://([-\w]+)\.bandcamp.com',
                                r'artist.\1',
                                default=None)

                def obj_split_path(self):
                    url = self.obj_url(self)
                    return [re.search(r'https://([^.]+)\.', url).group(1)]
Beispiel #28
0
        class item(ItemElement):
            klass = Account

            obj_id = CleanText('./td[2]')

            # Some account names have spaces in the middle which cause
            # the history search to fail if we remove them.
            # eg: `NAME  SURNAME` = `NAME++SURNAME` in the history search.
            obj_label = Eval(lambda x: x.strip(), RawText('./td[1]'))
            obj_type = Account.TYPE_CARD
            obj__rib = Env('rib')
            obj__company = Env('company', default=None)  # this field is something used to make the module work, not something meant to be displayed to end users
            obj_currency = 'EUR'
            obj_number = CleanText('./td[2]', replace=[(' ', '')])
            obj_url = AbsoluteLink('./td[2]/a')

            obj__completeid = Format('%s:%s', obj_id, obj_label)
Beispiel #29
0
    class iter_videos(ListElement):
        next_page = AbsoluteLink('//a[text()="Suivant"]')
        item_xpath = '//div[@class="video"]'

        class item(ItemElement):
            klass = BaseVideo

            obj_nsfw = True
            obj_ext = 'mp4'

            obj_title = CleanText('./a/u')
            obj_duration = Duration(CleanText('./a/b'))
            obj__page = AbsoluteLink('./a')
            obj_id = Regexp(obj__page, r'/videos/(.+)')

            def obj_thumbnail(self):
                return Thumbnail(Attr('.//img[@class="thumb"]', 'src')(self))
Beispiel #30
0
        class item(ItemElement):
            klass = Bill

            def condition(self):
                num = Attr('.', 'data-fact_ligne', default='')(self)
                return self.env['subid'] == num

            obj_url = AbsoluteLink('.//div[@class="pdf"]/a')
            obj__localid = Regexp(Field('url'), '&id=(.*)&date', u'\\1')
            obj_label = Regexp(Field('url'), '&date=(\d*)', u'\\1')
            obj_id = Format('%s.%s', Env('subid'), Field('_localid'))
            obj_date = FormatDate(Field('label'))
            obj_format = u"pdf"
            obj_type = DocumentTypes.BILL
            obj_price = CleanDecimal('div[@class="montant"]',
                                     default=Decimal(0),
                                     replace_dots=False)
            obj_currency = Currency('div[@class="montant"]')