class item(ItemElement): klass = Account def condition(self): if 'Votre carte est annulée' in CleanText( './/span[@id="cardSORStatus"]')(self): self.logger.warning('skipping cancelled card %r', self.obj_id(self)) return False return True obj_id = CleanText( './/td[@class="cardArtColWidth"]/div[@class="summaryTitles"]') obj_label = CleanText('.//span[@class="cardTitle"]') obj_type = Account.TYPE_CARD obj_currency = CleanCurrency( './/td[@id="colOSBalance"]/div[@class="summaryValues makeBold"]' ) def obj_balance(self): return -abs( parse_decimal( CleanText( './/td[@id="colOSBalance"]/div[@class="summaryValues makeBold"]' )(self))) obj_url = AbsoluteLink( './/a[text()="View Latest Transactions"]', default=AbsoluteLink( './/a[span[text()="Online Statement"] or text()="Détail de vos opérations"]' ))
class iter_torrents(ListElement): next_page = AbsoluteLink('//div[has-class("pagination")]/a[last()]') item_xpath = '//div[has-class("list_tor")]' class item(ItemElement): klass = Torrent obj_id = Regexp( CleanText('.//a[has-class("list_tor_title")]/@href'), '/(.*)\.torrent\.html$', '\\1') obj_name = CleanText('.//a[has-class("list_tor_title")]') obj_seeders = CleanDecimal('.//b[has-class("green")]/text()', default=0) obj_leechers = CleanDecimal('.//b[has-class("red")]/text()', default=0) obj_filename = Format('%s.torrent', obj_name) obj_url = AbsoluteLink('.//a[@title="Download torrent"]') def obj_size(self): rawsize = Regexp( CleanText( './/div[has-class("list_tor_right")]/p[1]/span[1]'), 'Size: (.*)$', '\\1')(self) nsize = float(re.sub(r'[A-Za-z]', '', rawsize)) usize = re.sub(r'[.0-9 ]', '', rawsize).upper() size = get_bytes_size(nsize, usize) return size
def obj_url(self): # Accounts without an <a> in the <td> have no link if self.el.xpath('./td[1]/a'): return CleanText(AbsoluteLink('./td[1]/a'), default=None, replace=[('\n', '')])(self) return None
class item(ItemElement): klass = Bill obj_id = Format('facture-%s-%s-%s#%s', Slugify(CleanText(TableCell('date'))), Slugify(CleanText(TableCell('amount'))), Slugify(CleanText(TableCell('type'))), Env('sub_id')) obj_url = AbsoluteLink('./td[5]//a', default=NotAvailable) obj_date = Date(CleanText(TableCell('date')), dayfirst=True) obj_label = Format('%s %s %s', CleanText(TableCell('type')), CleanText(TableCell('amount')), CleanText(TableCell('date'))) obj_type = DocumentTypes.BILL obj_price = CleanDecimal(TableCell('amount'), replace_dots=True) obj_currency = Currency(TableCell('amount')) obj_duedate = Date(Regexp(CleanText(TableCell('status')), r'le (\d+)/(\d+)/(\d+)', r'\1/\2/\3'), dayfirst=True) def obj_format(self): if self.obj_url(self): return 'pdf' return NotAvailable def obj_income(self): if self.obj_price(self) < 0: return True return False
class get_torrent(ItemElement): klass = Torrent obj_name = CleanText('.//div[@id="middle_content"]/h1') obj_description = CleanText('//div[@id="descriptionContent"]', default=NotAvailable) obj_id = Regexp( CleanText( '//div[@id="middle_content"]/a[@title="Download torrent"]/@href' ), '/(.*)\.torrent', '\\1') obj_url = AbsoluteLink( '//div[@id="middle_content"]/a[@title="Download torrent"]') obj_filename = Format('%s.torrent', obj_name) def obj_size(self): rawsize = CleanText('//div[has-class("files")]/../h5')(self) s = rawsize.split(',')[-1].replace(')', '') nsize = float(re.sub(r'[A-Za-z]', '', s)) usize = re.sub(r'[.0-9 ]', '', s).upper() size = get_bytes_size(nsize, usize) return size def obj_files(self): res = [] for f in self.xpath( '//div[has-class("files")]//div[not(has-class("wrapper"))]' ): res.append(CleanText(f)(self)) return res obj_seeders = CleanDecimal('//div[has-class("sl_block")]/b[1]', default=0) obj_leechers = CleanDecimal('//div[has-class("sl_block")]/b[2]', default=0) obj_magnet = CleanText('.//a[has-class("magnet")]/@href')
class iter_torrents(ListElement): next_page = AbsoluteLink('//a[@rel="next"]') item_xpath = '//table[has-class("table")]/tbody/tr' class item(ItemElement): klass = Torrent obj_id = CleanText('.//a[@id="get_nfo"]/@target') obj_name = CleanText('.//td[2]//text()') obj_seeders = CleanDecimal('./td[last()-1]/text()', default=0) obj_leechers = CleanDecimal('./td[last()]/text()', default=0) obj_description = NotLoaded obj_files = NotLoaded obj_filename = Format('%s.torrent', obj_name) obj_magnet = NotAvailable def obj_url(self): return '%sengine/download_torrent?id=%s' % ( self.page.browser.BASEURL, self.obj_id) def obj_size(self): rawsize = CleanText('./td[last()-3]')(self) nsize = float(re.sub(r'[A-Za-z]', '', rawsize)) usize = re.sub(r'[.0-9]', '', rawsize).strip().replace('o', 'B').upper() size = get_bytes_size(nsize, usize) return size
class iter_housings(ListElement): item_xpath = './/li[@data-tranid="1"]' next_page = AbsoluteLink('./ul[has-class("pagination")]/li/a[has-class("next")]') class item(AvendreAlouerItem): obj_phone = CleanText(Attr('.', 'data-infos'))
class item(ItemElement): klass = Subtitle obj_id = Regexp(Attr('.//td[1]', 'id'), 'main(\d*)') obj_name = Regexp(CleanText('.//td[1]'), '(.*)Download at 25') obj_nb_cd = CleanDecimal('.//td[3]') obj_url = AbsoluteLink('.//td[5]//a') obj_language = Regexp(Attr('.//td[2]//a//div', 'class'), 'flag (.*)')
class item(ItemElement): klass = Account obj_label = CleanText('./td[contains(@class, "col-1")]/a') obj_id = CleanText('./td[contains(@class, "col-2")]/a', replace=[(' ', '')]) obj_balance = CleanDecimal('./td[contains(@class, "col-3")]', replace_dots=True) obj__detail_link = AbsoluteLink('./td[contains(@class, "col-2")]/a') obj_type = Account.TYPE_LIFE_INSURANCE
class item(ItemElement): klass = Account def condition(self): return len(self.el.xpath('./td')) > 2 class Label(Filter): def filter(self, text): return text.lstrip(' 0123456789').title() class Type(Filter): PATTERNS = [ ('invest', Account.TYPE_MARKET), ('ldd', Account.TYPE_SAVINGS), ('livret', Account.TYPE_SAVINGS), ('compte', Account.TYPE_CHECKING), ('account', Account.TYPE_CHECKING), ('pret', Account.TYPE_LOAN), ('vie', Account.TYPE_LIFE_INSURANCE), ('strategie patr.', Account.TYPE_LIFE_INSURANCE), ('essentiel', Account.TYPE_LIFE_INSURANCE), ('elysee', Account.TYPE_LIFE_INSURANCE), ('abondance', Account.TYPE_LIFE_INSURANCE), ('ely. retraite', Account.TYPE_LIFE_INSURANCE), ('lae option assurance', Account.TYPE_LIFE_INSURANCE), ('carte ', Account.TYPE_CARD), ('plan assur. innovat.', Account.TYPE_LIFE_INSURANCE), ] def filter(self, label): label = label.lower() for pattern, type in self.PATTERNS: if pattern in label: return type return Account.TYPE_UNKNOWN obj_label = Label(CleanText('./td[1]/a')) obj_coming = Env('coming') obj_currency = FrenchTransaction.Currency('./td[2]') obj_url = AbsoluteLink('./td[1]/a') obj_type = Type(Field('label')) obj_coming = NotAvailable @property def obj_balance(self): if self.el.xpath('./parent::*/tr/th') and self.el.xpath('./parent::*/tr/th')[0].text in [u'Credits', u'Crédits']: return CleanDecimal(replace_dots=True, sign=lambda x: -1).filter(self.el.xpath('./td[3]')) return CleanDecimal(replace_dots=True).filter(self.el.xpath('./td[3]')) @property def obj_id(self): # Investment account and main account can have the same id # so we had account type in case of Investment to prevent conflict if Field('type')(self) == Account.TYPE_MARKET: return CleanText(replace=[('.', ''), (' ', '')]).filter(self.el.xpath('./td[2]')) + ".INVEST" return CleanText(replace=[('.', ''), (' ', '')]).filter(self.el.xpath('./td[2]'))
class item(ItemElement): klass = Document obj_label = 'Imprimé fiscal unique' obj_type = DocumentTypes.REPORT obj_format = 'pdf' obj_url = AbsoluteLink('.') obj_id = Regexp(Field('url'), r'fileId=(\d+)')
class item(ItemElement): def condition(self): has_children = XPath('.//div[@id="spanInfosEpc"]', default=False)(self) if has_children: return True return False klass = Housing obj_id = Regexp( CleanText('./a/@href', replace=[('/annonces-immobilieres/', ''), ('/location/', '')]), '(.*).html') obj_type = Env('query_type') obj_advert_type = ADVERT_TYPES.PERSONAL def obj_house_type(self): type = Attr('./a/div/p/span[@class="item type"]/img', 'alt')(self) if type == 'Appartement': return HOUSE_TYPES.APART elif type == 'Maison /villa': return HOUSE_TYPES.HOUSE elif type == 'Terrain / autreinfosaccesepc': return HOUSE_TYPES.LAND else: return HOUSE_TYPES.OTHER def obj_title(self): title = CleanText('./a/div/p/span[@class="item title"]')(self) if title == "": title = CleanText('./a/div/p/span[@class="item loc"]')( self) return title obj_cost = CleanDecimal( CleanText('./a/div/p/span[@class="item prix"]', children=False)) obj_currency = Currency.get_currency(u'€') obj_text = Format( '%s / %s / %s / %s', CleanText('./a/div/p/span[@class="item type"]/img/@alt'), CleanText('./a/div/p/span[@id="divnbpieces"]', children=False), CleanText('./a/div/p/span[@id="divsurface"]', children=False), CleanText('./a/div/p/span[@class="item prix"]/span')) obj_location = CleanText( './a/div/p/span[@class="item loc"]/text()[position() > 1]') obj_area = CleanDecimal( './a/div/p/span[@class="item surf"]/text()[last()]') obj_rooms = CleanDecimal( './a/div/p/span[@class="item nb"]/text()[last()]', default=NotAvailable) obj_currency = Currency.get_currency(u'€') obj_utilities = UTILITIES.UNKNOWN obj_url = AbsoluteLink('./a')
class iter_housings(ListElement): item_xpath = '//article[has-class("itemListe")]' next_page = AbsoluteLink( './div[@class="pagination-foot-bloc"]/a[@class="pageActive"][2]') class item(ItemElement): klass = Housing obj_id = QueryValue( Attr('.//div[has-class("presentationItem")]/h2/a', 'href'), 'idter') obj_url = AbsoluteLink('.//h2/a') obj_type = POSTS_TYPES.SALE obj_advert_type = ADVERT_TYPES.PROFESSIONAL obj_house_type = HOUSE_TYPES.LAND obj_title = CleanText('.//div[@class="presentationItem"]/h2/a') obj_area = CleanDecimal( Regexp(CleanText('.//div[@class="presentationItem"]/h3'), 'surface de (\d+) m²')) obj_cost = CleanDecimal( CleanText('.//div[@class="presentationItem"]/h3/span[1]', replace=[(".", ""), (" €", "")])) obj_currency = Currency.get_currency(u'€') obj_date = Date( CleanText( './/div[@class="presentationItem"]//span[@class="majItem"]', replace=[("Mise à jour : ", "")])) obj_text = CleanText('.//div[@class="presentationItem"]/p') obj_phone = CleanText( './/div[@class="divBoutonContact"]/div[@class="phone-numbers-bloc"]/p[1]/strong' ) def obj_photos(self): for photo in self.xpath( './/div[has-class("photoItemListe")]/img/@data-src'): if photo: photo_url = BASE_URL + '/' + photo return [HousingPhoto(photo_url)] else: return [] obj_utilities = UTILITIES.UNKNOWN
class item(ItemElement): klass = Album obj_title = CleanText('.//div[@class="heading"]/a') obj_url = Regexp(AbsoluteLink('.//div[@class="heading"]/a'), r'^([^?]+)\?') obj_id = Regexp(Field('url'), r'://([-\w]+)\.bandcamp.com/album/([-\w]+)', r'album.\1.\2', default=None)
class item(ItemElement): klass = BaseAudio obj_title = CleanText('./td[@class="title-col"]//a') obj_ext = 'mp3' obj_format = 'mp3' obj_bitrate = 128 obj__page_url = AbsoluteLink('./td[@class="title-col"]//a') obj_id = Format('audio.%s.%s', Env('band'), Regexp(Field('_page_url'), r'/track/([-\w]+)'))
class item(ItemElement): klass = Subtitle obj_name = CleanText('.//td/a[@alt="Subtitles\' page"]') obj_nb_cd = Type(CleanText(TableCell('cd')), type=int) obj_language = CleanText(TableCell('language')) obj_url = AbsoluteLink( './/td/div[has-class("pull-left")]/a[@alt="Download subtitles."]' ) obj_id = Regexp(Field('url'), r'/(-*\w*)/download$', r'\1')
class item(ItemElement): klass = BaseAudio obj_title = CleanText('.//div[@class="heading"]/a') obj__page_url = Regexp( AbsoluteLink('.//div[@class="heading"]/a'), r'^([^?]+)\?') obj_id = Regexp(Field('_page_url'), r'://([-\w]+)\.bandcamp.com/track/([-\w]+)', r'audio.\1.\2', default=None)
class item(ItemElement): klass = Document obj_date = Date(CleanText(TableCell('date')), dayfirst=True) obj_format = 'pdf' obj_label = CleanText(TableCell('type')) obj_url = AbsoluteLink('./td[3]//a', default=NotAvailable) obj_id = Format('doc-%s-%s#%s', Slugify(CleanText(TableCell('date'))), Slugify(CleanText(TableCell('type'))), Env('sub_id'))
class item(ItemElement): klass = Place obj_name = CleanText('.//a[has-class("denomination-links")]') obj_address = CleanText('.//a[has-class("adresse")]') obj_phone = Regexp( CleanText( './/div[has-class("tel-zone")][span[contains(text(),"Tél")]]//strong[@class="num"]', replace=[(' ', '')]), r'^0(\d{9})$', r'+33\1') obj_url = AbsoluteLink('.//a[has-class("denomination-links")]') obj_opening = HasElement('.//span[text()="Horaires"]', NotLoaded, NotAvailable)
class item(ItemElement): klass = Account obj_id = CleanText('./td[2]') obj_label = CleanText('./td[1]') obj_type = Account.TYPE_CARD obj__rib = Env('rib') obj_currency = u'EUR' obj_number = CleanText('./td[2]', replace=[(' ', '')]) obj_url = AbsoluteLink('./td[2]/a') obj__completeid = Format('%s:%s', obj_id, obj_label)
class item(ItemElement): klass = Album obj_url = AbsoluteLink('./a') obj__thumbnail_url = Attr('./a/div[@class="art"]/img', 'src') obj_title = CleanText('./a/p[@class="title"]', children=False) obj_id = Format('album.%s.%s', Env('band'), Regexp(Field('url'), r'/album/([-\w]+)')) def obj_author(self): return CleanText( './a/p[@class="title"]/span[@class="artist-override"]')( self) or self.page.get_artist()
class item(ItemElement): klass = BaseVideo obj_nsfw = True obj_ext = 'mp4' obj_title = CleanText('./a/u') obj_duration = Duration(CleanText('./a/b')) obj__page = AbsoluteLink('./a') obj_id = Regexp(obj__page, r'/videos/(.+)') def obj_thumbnail(self): return Thumbnail(Attr('.//img[@class="thumb"]', 'src')(self))
class item(ItemElement): klass = BaseVideo obj_nsfw = True obj_ext = 'mp4' obj_title = CleanText('.//a[@class="video-thumb-info__name"]') obj_duration = Duration(CleanText('.//div[@class="thumb-image-container__duration"]')) obj__page = AbsoluteLink('./a') obj_id = Regexp(obj__page, r'/videos/(.+)') def obj_thumbnail(self): return Thumbnail(Attr('.//img[@class="thumb-image-container__image"]', 'src')(self))
class item(ItemElement): klass = Collection obj_title = CleanText('.//div[@class="heading"]/a') obj_url = Regexp(AbsoluteLink('.//div[@class="heading"]/a'), r'^([^?]+)\?') obj_id = Regexp(Field('url'), r'://([-\w]+)\.bandcamp.com', r'artist.\1', default=None) def obj_split_path(self): url = self.obj_url(self) return [re.search(r'https://([^.]+)\.', url).group(1)]
class iter_events(ListElement): item_xpath = '//table[@id="preliste"]/tr' next_page = AbsoluteLink( '(//a[text()=">"][contains(@href,"LISTEPEpg")])[1]') class item(ItemElement): klass = BREvent obj_summary = CleanText('.//h4') obj_url = AbsoluteLink('.//h4/a') obj_description = CleanText('.//div[@class="libellepreliste"]') obj_city = CleanText('(.//span[@class="lieu"]/a)[2]') obj_location = CleanText('(.//span[@class="lieu"]/a)[1]') obj_timezone = 'Europe/Paris' def obj_price(self): return float( CleanText('.//span[@class="prixli"]')(self).replace( '€', '.')) def obj__date_hours(self): date = Env('date')(self) weekday = date.weekday() txt = CleanText('.//p[@class="sb"]')(self).lower() m = re.match(r'du \d+/\d+/\d+ au \d+/\d+/\d+ (.*)', txt) if m: txt = m.group(1) p = TimeParser(txt, weekday) p.do_parse() return p.res m = re.match('le \w+ \d+ \w+ \d+ à (\d+)h(\d*)$', txt, re.UNICODE) return [(int(m.group(1)), int(m.group(2) or 0))] obj_start_date = Env('date') def obj_category(self): text = CleanText( './/h4/following-sibling::span[@class="small"]/a')(self) for k in LABEL_TO_CAT: if k in text: return LABEL_TO_CAT[k] return CATEGORIES.AUTRE def obj_siteid(self): return self.page.browser.event.match( Field('url')(self)).group('id')
class item(ItemElement): def condition(self): return (self.el.xpath('./td/img/@alt') or 'x')[0] == '[SND]' klass = BaseAudio obj_url = AbsoluteLink('./td/a') filename = Decode(Regexp(Field('url'), '/([^/]+)$')) obj_title = Regexp(filename, r'(.*)\.[^.]+$') obj_ext = Regexp(filename, r'\.([^.]+)$') obj_format = obj_ext def obj_id(self): return 'audio.%s' % '/'.join(self.page.get_split_path() + [self.filename(self)])
class iter_content(ListElement): next_page = AbsoluteLink('//a[has-class("next")]') class iter_albums(ListElement): item_xpath = '//ul[@class="result-items"]/li[.//div[@class="itemtype"][normalize-space(text())="ALBUM"]]' class item(ItemElement): klass = Album obj_title = CleanText('.//div[@class="heading"]/a') obj_url = Regexp(AbsoluteLink('.//div[@class="heading"]/a'), r'^([^?]+)\?') obj_id = Regexp(Field('url'), r'://([-\w]+)\.bandcamp.com/album/([-\w]+)', r'album.\1.\2', default=None) class iter_tracks(ListElement): item_xpath = '//ul[@class="result-items"]/li[.//div[@class="itemtype"][normalize-space(text())="TRACK"]]' class item(ItemElement): klass = BaseAudio obj_title = CleanText('.//div[@class="heading"]/a') obj__page_url = Regexp( AbsoluteLink('.//div[@class="heading"]/a'), r'^([^?]+)\?') obj_id = Regexp(Field('_page_url'), r'://([-\w]+)\.bandcamp.com/track/([-\w]+)', r'audio.\1.\2', default=None) class iter_artists(ListElement): item_xpath = '//ul[@class="result-items"]/li[.//div[@class="itemtype"][normalize-space(text())="ARTIST"]]' class item(ItemElement): klass = Collection obj_title = CleanText('.//div[@class="heading"]/a') obj_url = Regexp(AbsoluteLink('.//div[@class="heading"]/a'), r'^([^?]+)\?') obj_id = Regexp(Field('url'), r'://([-\w]+)\.bandcamp.com', r'artist.\1', default=None) def obj_split_path(self): url = self.obj_url(self) return [re.search(r'https://([^.]+)\.', url).group(1)]
class item(ItemElement): klass = Account obj_id = CleanText('./td[2]') # Some account names have spaces in the middle which cause # the history search to fail if we remove them. # eg: `NAME SURNAME` = `NAME++SURNAME` in the history search. obj_label = Eval(lambda x: x.strip(), RawText('./td[1]')) obj_type = Account.TYPE_CARD obj__rib = Env('rib') obj__company = Env('company', default=None) # this field is something used to make the module work, not something meant to be displayed to end users obj_currency = 'EUR' obj_number = CleanText('./td[2]', replace=[(' ', '')]) obj_url = AbsoluteLink('./td[2]/a') obj__completeid = Format('%s:%s', obj_id, obj_label)
class iter_videos(ListElement): next_page = AbsoluteLink('//a[text()="Suivant"]') item_xpath = '//div[@class="video"]' class item(ItemElement): klass = BaseVideo obj_nsfw = True obj_ext = 'mp4' obj_title = CleanText('./a/u') obj_duration = Duration(CleanText('./a/b')) obj__page = AbsoluteLink('./a') obj_id = Regexp(obj__page, r'/videos/(.+)') def obj_thumbnail(self): return Thumbnail(Attr('.//img[@class="thumb"]', 'src')(self))
class item(ItemElement): klass = Bill def condition(self): num = Attr('.', 'data-fact_ligne', default='')(self) return self.env['subid'] == num obj_url = AbsoluteLink('.//div[@class="pdf"]/a') obj__localid = Regexp(Field('url'), '&id=(.*)&date', u'\\1') obj_label = Regexp(Field('url'), '&date=(\d*)', u'\\1') obj_id = Format('%s.%s', Env('subid'), Field('_localid')) obj_date = FormatDate(Field('label')) obj_format = u"pdf" obj_type = DocumentTypes.BILL obj_price = CleanDecimal('div[@class="montant"]', default=Decimal(0), replace_dots=False) obj_currency = Currency('div[@class="montant"]')