Exemple #1
0
    class get_torrent(ItemElement):
        klass = Torrent

        obj_name = CleanText('//h2[has-class("h2fiche")]',
                             default=NotAvailable)
        obj_description = CleanHTML('//div[@id="textefiche"]',
                                    default=NotAvailable)
        obj_seeders = CleanText(
            '//div[@id="infosficher"]//span[has-class("seed_ok")]') & Type(
                type=int)
        obj_leechers = CleanText('(//div[@id="infosficher"]/span)[3]') & Type(
            type=int)
        obj_magnet = NotAvailable

        obj_id = Regexp(CleanText('//h2[has-class("h2fiche")]/a/@href'),
                        '.*dl-torrent/(.*).html')
        obj_url = Format('http://www.cpasbien.cm%s',
                         CleanText('//a[@id="telecharger"]/@href'))

        def obj_size(self):
            rawsize = CleanText('(//div[@id="infosficher"]/span)[1]')(self)
            rawsize = rawsize.replace(',', '.').strip()
            nsize = float(rawsize.split()[0])
            usize = rawsize.split()[-1].upper().replace('O', 'B')
            size = get_bytes_size(nsize, usize)
            return size

        obj_files = NotAvailable

        obj_filename = CleanText(Regexp(
            CleanText('//a[@id="telecharger"]/@href'),
            '.*telechargement/(.*)'),
                                 default=NotAvailable)
Exemple #2
0
    class get_torrent(ItemElement):
        klass = Torrent

        def obj_id(self):
            return self.page.url.split('/')[-1]

        def obj_url(self):
            return NotAvailable

        obj_name = CleanText('//div[@id="title"]')
        obj_magnet = CleanText(
            '//div[@class="download"]/a[starts-with(@href, "magnet:")]/@href')
        obj_date = Date(
            CleanText(
                '//div[@id="details"]//dt[.="Uploaded:"]/following-sibling::dd[1]'
            ))
        obj_size = Type(Regexp(
            CleanText(
                '//div[@id="details"]//dt[.="Size:"]/following-sibling::dd[1]'
            ), r'\((\d+) Bytes\)', '\\1'),
                        type=float)
        obj_seeders = Type(CleanText(
            '//div[@id="details"]//dt[.="Seeders:"]/following-sibling::dd[1]'),
                           type=int)
        obj_leechers = Type(CleanText(
            '//div[@id="details"]//dt[.="Leechers:"]/following-sibling::dd[1]'
        ),
                            type=int)
        obj_description = RawText('//div[@class="nfo"]/pre', children=True)
Exemple #3
0
        class item(ItemElement):
            klass = Torrent
            obj_id = Regexp(CleanText('.//a[has-class("titre")]/@href'),
                            '.*dl-torrent/(.*).html')
            obj_name = CleanText('.//a[has-class("titre")]',
                                 default=NotAvailable)
            obj_magnet = NotAvailable
            obj_seeders = CleanText('.//div[has-class("up")]',
                                    default=NotAvailable) & Type(type=int)
            obj_leechers = CleanText('.//div[has-class("down")]',
                                     default=NotAvailable) & Type(type=int)

            obj_description = NotLoaded
            obj_files = NotLoaded

            def obj_url(self):
                href = CleanText('.//a[has-class("titre")]/@href')(self)
                subid = href.split('/')[-1].replace('.html', '.torrent')
                return 'http://www.cpasbien.cm/telechargement/%s' % subid

            def obj_size(self):
                rawsize = CleanText('./div[has-class("poid")]')(self)
                rawsize = rawsize.replace(',', '.').strip()
                nsize = float(rawsize.split()[0])
                usize = rawsize.split()[-1].upper().replace('O', 'B')
                size = get_bytes_size(nsize, usize)
                return size

            obj_filename = Format(
                '%s.torrent',
                Regexp(CleanText('.//a[has-class("titre")]/@href'),
                       '/([^/]*)\.html'))
Exemple #4
0
    class get_location(ItemElement):
        klass = IpLocation

        obj_id = Regexp(
            CleanText('//h1/strong[starts-with(.,"IP Address Information")]'),
            r'- ([.\d]+)')

        obj_city = CleanText('//td[.//strong[text()="City"]]', children=False)
        obj_country = CleanText('//td[.//strong[text()="Country"]]',
                                children=False)
        obj_region = CleanText('//td[.//strong[text()="Region"]]',
                               children=False)
        obj_zipcode = CleanText('//td[.//strong[text()="Postcode"]]',
                                children=False)
        obj_host = CleanText('//td[.//strong[text()="Domain Name"]]',
                             children=False,
                             default=NotAvailable)
        obj_isp = CleanText('//td[.//strong[text()="ISP"]]', children=False)
        obj_lt = Regexp(
            CleanText('//td[.//strong[text()="Coordinates of City"]]',
                      children=False),
            r'\(([\d.-]+), [\d.-]+\)') & Type(type=float)
        obj_lg = Regexp(
            CleanText('//td[.//strong[text()="Coordinates of City"]]',
                      children=False),
            r'\([\d.-]+, ([\d.-]+)\)') & Type(type=float)
Exemple #5
0
    class get_recipe(ItemElement):
        klass = Recipe

        obj_id = Env('_id')
        obj_title = CleanText('//h1')
        obj_preparation_time = Type(Regexp(CleanText('//li[@class="time"]/span'), ".* (\d*) min"), type=int)

        obj_cooking_time = Type(Regexp(CleanText('//li[@class="time-cooking"]/span'), ".* (\d*) min"), type=int)

        def obj_nb_person(self):
            nb_pers = Regexp(CleanText('//div[@class="row ingredients"]/div/p'),
                             '.*pour (\d+) personnes', default=0)(self)
            return [nb_pers] if nb_pers else NotAvailable

        def obj_ingredients(self):
            i = []
            ingredients = XPath('//ul[@class="ingredientsList"]/li',
                                default=[])(self)
            for ingredient in ingredients:
                i.append(CleanText('.')(ingredient))
            return i

        obj_instructions = Join(u'\n- ', '//div[@class="recipe-prepa"]/ol/li', newline=True, addBefore='- ')

        obj_thumbnail_url = CleanText('//div[has-class("toprecipeImage")]/img/@src', default=NotAvailable)
        obj_picture_url = CleanText('//div[has-class("toprecipeImage")]/img/@src', default=NotAvailable)
Exemple #6
0
    class get_recipe(ItemElement):
        klass = Recipe

        obj_id = Env('id')
        obj_title = CleanText('//h1[@class="m_title"]')
        obj_preparation_time = Type(CleanText('//span[@class="preptime"]'),
                                    type=int)
        obj_cooking_time = Type(CleanText('//span[@class="cooktime"]'),
                                type=int)

        def obj_nb_person(self):
            nb_pers = Regexp(CleanText(
                '//p[@class="m_content_recette_ingredients"]/span[1]'),
                             '.*\(pour (\d+) personnes\)',
                             default=0)(self)
            return [nb_pers] if nb_pers else NotAvailable

        def obj_ingredients(self):
            ingredients = CleanText(
                '//p[@class="m_content_recette_ingredients"]',
                default='')(self).split('-')
            if len(ingredients) > 1:
                return ingredients[1:]

        obj_instructions = CleanHTML('//div[@class="m_content_recette_todo"]')
        obj_thumbnail_url = CleanText(
            '//a[@class="m_content_recette_illu"]/img/@src',
            default=NotAvailable)
        obj_picture_url = CleanText(
            '//a[@class="m_content_recette_illu"]/img/@src',
            default=NotAvailable)
Exemple #7
0
        class item(ItemElement):
            klass = Torrent

            obj_id = Regexp(CleanText('./td[2]/div/a[@class="detLink"]/@href'),
                            r'^/torrent/(\d+)/', '\\1')
            obj_name = Regexp(CleanText('./td[2]/div/a[@class="detLink"]/@title'),
                              r'Details for (.*)$', '\\1')
            obj_magnet = CleanText('./td[2]/a[title="Download this torrent using magnet"]/@href')
            obj_date = Date(Regexp(CleanText('./td[2]/font'), r'Uploaded ([^,]+),', '\\1'), fuzzy=True)
            obj_seeders = Type(CleanText('./td[3]'), type=int)
            obj_leechers = Type(CleanText('./td[4]'), type=int)

            def obj_size(self):
                value, unit = Regexp(CleanText('./td[2]/font'), r'Size ([\d\.]+ [^,]+),', '\\1')(self).split(' ')
                return get_bytes_size(float(value), unit)
Exemple #8
0
    class get_torrent(ItemElement):
        klass = Torrent

        def obj_description(self):
            desctxt = CleanHTML('//div[has-class("description")]/article')(
                self)
            strippedlines = '\n'.join([
                s.strip() for s in desctxt.split('\n')
                if re.search(r'\[[0-9]+\]', s) is None
            ])
            description = re.sub(r'\s\s+', '\n\n', strippedlines)
            return description

        obj_name = CleanText(
            '//div[has-class("torrentDetails")]/h2/span/text()')

        obj_id = CleanText('//input[@id="torrent-id"][1]/@value')

        def obj_url(self):
            fullid = CleanText('//input[@id="torrent-id"][1]/@value')(self)
            downurl = 'https://www.t411.in/torrents/download/?id=%s' % fullid
            return downurl

        obj_filename = CleanText(
            '//div[@class="accordion"]//tr[th="Torrent"]/td')

        def obj_size(self):
            rawsize = CleanText(
                '//div[@class="accordion"]//tr[th="Taille totale"]/td')(self)
            nsize = float(rawsize.split()[0])
            usize = rawsize.split()[-1].upper()
            size = get_bytes_size(nsize, usize)
            return size

        def obj_files(self):
            res = []
            for f in Type(
                    '//div[@class="accordion"]/h3[text()="Liste des Fichiers"]\
                          /following-sibling::div[1]//tr',
                    type=list)(self)[1:]:
                res.append(CleanText(f)(self))
            return res

        obj_seeders = CleanText(
            '//div[@class="details"]//td[@class="up"]') & Type(type=int)
        obj_leechers = CleanText(
            '//div[@class="details"]//td[@class="down"]') & Type(type=int)
        obj_magnet = NotAvailable
Exemple #9
0
        def obj_price(self):
            if not self.env['_json']:
                return

            return Type(CleanText(Dict('offers/price', default="0")),
                        type=float,
                        default=0)(self.env['_json'])
Exemple #10
0
        class item(ItemElement):
            klass = Torrent

            obj_id = Regexp(CleanText('./dt/a/@href'), r'/([0-9a-f]+)', '\\1')
            obj_name = CleanText('./dt/a')
            obj_date = CleanText('./dd/span[2]/@title') & Date(default=None, parse_func=parse_timestamp)
            obj_seeders = CleanText('./dd/span[4]', replace=[(',', '')]) & Type(type=int)
            obj_leechers = CleanText('./dd/span[5]', replace=[(',', '')]) & Type(type=int)

            def obj_size(self):
                data = CleanText('./dd/span[3]')(self)
                if data:
                    value, unit = data.split()
                    return get_bytes_size(float(value), unit)
                else:
                    return float("NaN")
Exemple #11
0
 def obj_files(self):
     res = []
     for f in Type(
             '//div[@class="accordion"]/h3[text()="Liste des Fichiers"]\
                   /following-sibling::div[1]//tr',
             type=list)(self)[1:]:
         res.append(CleanText(f)(self))
     return res
Exemple #12
0
    class get_torrent(ItemElement):
        klass = Torrent

        obj_description = CleanText('//div[@id="desc"]', default=NotAvailable)
        obj_seeders = CleanText(
            '(//div[has-class("seedBlock")]/strong)[1]') & Type(type=int)
        obj_leechers = CleanText(
            '(//div[has-class("leechBlock")]/strong)[1]') & Type(type=int)
        obj_name = CleanText('//h1[has-class("novertmarg")]//span',
                             default=NotAvailable)
        obj_magnet = CleanText(
            '//div[has-class("downloadButtonGroup")]//a[starts-with(@href,"magnet")]/@href',
            default=NotAvailable)

        obj_id = Regexp(CleanText('//h1[has-class("novertmarg")]/a/@href'),
                        '.*-t([0-9]*)\.html')

        def obj_url(self):
            href = CleanText(
                '//div[has-class("downloadButtonGroup")]//a[starts-with(@href,"//")]/@href'
            )(self)
            return u'https:%s' % href

        def obj_size(self):
            rawsize = CleanText(
                '//span[has-class("folder") or has-class("folderopen")]')(self)
            rawsize = rawsize.split(': ')[-1].split(')')[0].strip()
            rawsize = rawsize.replace(',', '.')
            nsize = float(rawsize.split()[0])
            usize = rawsize.split()[-1].upper()
            size = get_bytes_size(nsize, usize)
            return size

        def obj_files(self):
            res = []
            for f in Type('//td[has-class("torFileName")]', type=list)(self):
                res.append(CleanText(f)(self))
            return res

        obj_filename = CleanText(Regexp(
            CleanText(
                '//div[has-class("downloadButtonGroup")]//a[starts-with(@href,"//")]/@href'
            ), '.*title=(.*)'),
                                 default=NotAvailable)
Exemple #13
0
        class item(ItemElement):
            klass = Subtitle

            obj_name = CleanText('.//td/a[@alt="Subtitles\' page"]')
            obj_nb_cd = Type(CleanText(TableCell('cd')), type=int)
            obj_language = CleanText(TableCell('language'))
            obj_url = AbsoluteLink(
                './/td/div[has-class("pull-left")]/a[@alt="Download subtitles."]'
            )
            obj_id = Regexp(Field('url'), r'/(-*\w*)/download$', r'\1')
Exemple #14
0
        class item(ItemElement):
            klass = Account

            obj_id = Regexp(Attr('.//a', 'href'), r'(\d+)') & Type(type=int)
            obj_label = CleanText('./td[1]')
            obj_balance = CleanDecimal('./td[2]', replace_dots=True)

            def obj_url(self):
                return (u'%s%s' %
                        (self.page.browser.BASEURL, Link(u'.//a[1]')(self)))
Exemple #15
0
    class get_torrent(ItemElement):
        klass = Torrent

        obj_id = Regexp(CleanText('//div[@class="trackers"]/h2'),  r'hash ([0-9a-f]+)', '\\1')
        obj_name = CleanText('//div[@class="downlinks"]/h2/span')
        obj_date = CleanText('//div[@class="downlinks"]/div/span/@title') & Date(default=None)
        obj_size = CleanText('//div[@class="files"]/div/@title', replace=[(',', ''), ('b', '')]) & \
            Type(type=float)

        def obj_seeders(self):
            try:
                return max([int(_.text.replace(',', ''))
                            for _ in self.xpath('//div[@class="trackers"]/dl/dd/span[@class="u"]')])
            except ValueError:
                return NotAvailable

        def obj_leechers(self):
            try:
                return max([int(_.text.replace(',', ''))
                            for _ in self.xpath('//div[@class="trackers"]/dl/dd/span[@class="d"]')])
            except ValueError:
                return NotAvailable

        def obj_url(self):
            return self.page.browser.BASEURL + \
                Regexp(CleanText('//div[@class="trackers"]/h2'), r'hash ([0-9a-f]+)', '\\1')(self)

        def obj_files(self):
            def traverse_nested_lists(ul, result, depth=0):
                for li in ul.xpath('./li'):
                    sub_uls = li.xpath('./ul')
                    if sub_uls:
                        result.append(("| " * depth) + ("%s" % li.text))
                        for sub_ul in sub_uls:
                            traverse_nested_lists(sub_ul, result, depth+1)
                    else:
                        try:
                            size = li.xpath('span')[0].text
                        except:
                            size = ""
                        result.append(("| " * depth) + ("%s [%s]" % (li.text, size)))

            result = []
            traverse_nested_lists(self.xpath('//div[@class="files"]/ul')[0], result)
            return result

        def obj_magnet(self):
            hsh = Regexp(CleanText('//div[@class="trackers"]/h2'),  r'hash ([0-9a-f]+)', '\\1')(self)
            name = "dn=%s" % quote_plus(CleanText('//div[@class="downlinks"]/h2/span')(self))
            trackers = ["tr=%s" % _.text for _ in self.xpath('//div[@class="trackers"]/dl/dt')]
            return "&".join(["magnet:?xt=urn:btih:%s" % hsh, name] + trackers)

        def obj_description(self):
            return u"Torrent files available at:\n" + \
                   u"\n\n".join(self.xpath('//div[@class="downlinks"]/dl/dt/a/@href'))
Exemple #16
0
        class item(ItemElement):
            klass = Torrent
            obj_id = Regexp(CleanText('./td[2]/a/@href'),
                            '/torrents/([0-9]+)/(\w+)', '\\1')
            obj_name = Regexp(CleanText('./td[2]/a/@href'),
                              '/torrents/([0-9]+)/([-\w]+)', '\\2')
            obj_seeders = CleanText('./td[6]/span[text()]') & Type(type=int)
            obj_leechers = CleanText('./td[7]/span[text()]') & Type(type=int)
            obj_description = NotLoaded
            obj_files = NotLoaded
            obj_filename = Format('%s.torrent', obj_name)
            obj_magnet = NotAvailable
            obj_url = CleanText('./td[2]/a/@href')

            def obj_size(self):
                rawsize = CleanText('./td[5]')(self)
                nsize = float(rawsize.split()[0])
                usize = rawsize.split()[-1].upper()
                size = get_bytes_size(nsize, usize)
                return size
Exemple #17
0
        class item(ItemElement):
            klass = Torrent
            obj_id = Regexp(
                CleanText(
                    './/div[@class="torrentname"]//a[@class="cellMainLink"]/@href'
                ), '.*-t([0-9]*).html')
            obj_name = CleanText('.//a[@class="cellMainLink"]',
                                 default=NotAvailable)
            obj_magnet = CleanText(
                './/div[has-class("iaconbox")]//a[starts-with(@href,"magnet")]/@href',
                default=NotAvailable)
            obj_seeders = CleanText(
                './/td[has-class("green") and has-class("center")]',
                default=NotAvailable) & Type(type=int)
            obj_leechers = CleanText(
                './/td[has-class("red") and has-class("center")]',
                default=NotAvailable) & Type(type=int)

            obj_description = NotLoaded
            obj_files = NotLoaded

            def obj_url(self):
                href = CleanText(
                    './/div[has-class("iaconbox")]//a[starts-with(@href,"//")]/@href'
                )(self)
                return 'https:%s' % href

            def obj_size(self):
                rawsize = CleanText('./td[2]')(self)
                rawsize = rawsize.replace(',', '.')
                nsize = float(rawsize.split()[0])
                usize = rawsize.split()[-1].upper()
                size = get_bytes_size(nsize, usize)
                return size

            obj_filename = CleanText(Regexp(
                CleanText(
                    './/div[has-class("iaconbox")]//a[starts-with(@href,"//")]/@href'
                ), '.*title=(.*)'),
                                     default=NotAvailable)
Exemple #18
0
    class get_location(ItemElement):
        klass = IpLocation

        obj_id = CleanText('//ul/li[starts-with(.,"IP address :")]/strong')

        obj_city = CleanText(Regexp(
            CleanText('//ul/li[starts-with(.,"City :")]/text()'),
            'City : (.*)'),
                             default=NotAvailable)

        obj_country = CleanText(Regexp(
            CleanText('//ul/li[starts-with(.,"Country :")]/text()'),
            'Country : (.*)'),
                                default=NotAvailable)

        obj_region = CleanText(Regexp(
            CleanText('//ul/li[starts-with(.,"State/Province :")]/text()'),
            'State/Province : (.*)'),
                               default=NotAvailable)

        obj_lt = CleanText(Regexp(
            CleanText('//ul/li[starts-with(.,"Latitude :")]/text()'),
            'Latitude : (.*)'),
                           default=NotAvailable) & Type(type=float)

        obj_lg = CleanText(Regexp(
            CleanText('//ul/li[starts-with(.,"Longitude :")]/text()'),
            'Longitude : (.*)'),
                           default=NotAvailable) & Type(type=float)

        obj_zipcode = CleanText(Regexp(
            CleanText('//ul/li[starts-with(.,"Zip or postal code :")]/text()'),
            'Zip or postal code : (.*)'),
                                default=NotAvailable)

        obj_host = CleanText(Regexp(
            CleanText('//ul/li[starts-with(.,"Hostname :")]/text()'),
            'Hostname : (.*)'),
                             default=NotAvailable)
Exemple #19
0
        class item(ItemElement):
            klass = Torrent
            obj_id = Regexp(CleanText('./td[3]/a/@href'),
                            '/torrents/nfo/\?id=(.*)')
            obj_name = CleanText('./td[2]/a/@title')
            obj_seeders = CleanText('./td[8]') & Type(type=int)
            obj_leechers = CleanText('./td[9]') & Type(type=int)
            obj_description = NotLoaded
            obj_files = NotLoaded
            obj_filename = Format('%s.torrent', CleanText('./td[2]/a/@title'))
            obj_magnet = NotAvailable

            def obj_url(self):
                fullid = Regexp(CleanText('./td[3]/a/@href'), '/torrents/nfo/\?id=(.*)')(self)
                downurl = self.page.browser.download.build(id=fullid)
                return downurl

            def obj_size(self):
                rawsize = CleanText('./td[6]')(self)
                nsize = float(rawsize.split()[0])
                usize = rawsize.split()[-1].upper()
                size = get_bytes_size(nsize, usize)
                return size
Exemple #20
0
    class get_video(ItemElement):
        klass = YoupornVideo

        obj_author = CleanText('//div[has-class("submitByLink")]')
        #obj_date = Date('//div[@id="stats-date"]')
        obj_duration = NotAvailable
        obj_ext = 'mp4'
        obj_id = Env('id')
        obj_rating = CleanText('//div[@class="videoRatingPercentage"]'
                               ) & Regexp(pattern=r'(..)%') & Type(type=int)
        obj_rating_max = 100
        obj_thumbnail = NotAvailable
        obj_title = CleanText('//h1')
        obj_url = Link('//div[@id="downloadModal"]//a[1]')
Exemple #21
0
    class get_video(ItemElement):
        klass = YoupornVideo

        obj_author = CleanText('//div[@class="author-block--line"][1]'
                               ) & Regexp(pattern=r'By: (.*)')
        #obj_date = Date('//div[@id="stats-date"]')
        obj_duration = NotAvailable
        obj_ext = 'mp4'
        obj_id = Env('id')
        obj_rating = CleanText('//div[@class="rating-percentage"]') & Regexp(
            pattern=r'(..)%') & Type(type=int)
        obj_rating_max = 100
        obj_thumbnail = NotAvailable
        obj_title = CleanText('//h1')
        obj_url = Link('//ul[@class="downloadList"]/li[2]/a')
Exemple #22
0
    class get_event(ItemElement):
        klass = BaseCalendarEvent

        obj_summary = CleanText('//div[@id="sectionHead"]/h1')
        obj_description = CleanHTML('//div[@id="event-item"]/div[3]/p[2]')
        obj_price = CleanDecimal(Regexp(
            CleanText('//aside[@id="detail"]/ul/li[3]'),
            r'Cost /[^\d]*([\d ,.]+).',
            default=''),
                                 default=None)
        obj_location = Regexp(CleanText('//aside[@id="detail"]/ul/li[2]'),
                              r'Venue / (.+)')
        obj_booked_entries = Type(
            CleanText('//h1[@id="MembersFavouriteCount"]'), type=int)
        obj_status = STATUS.CONFIRMED
        obj_category = CATEGORIES.CONCERT

        _date = Date(CleanText('//aside[@id="detail"]/ul/li[1]/a[1]'))

        def obj_start_date(self):
            start_time = Time(
                Regexp(CleanText('//aside[@id="detail"]/ul/li[1]'),
                       r'(\d{2}:\d{2}) -'))(self)
            return CombineDate(self._date, start_time)(self)

        def obj_end_date(self):
            end_time = Time(
                Regexp(CleanText('//aside[@id="detail"]/ul/li[1]'),
                       r'- (\d{2}:\d{2})'))(self)

            end_date = CombineDate(self._date, end_time)(self)
            if end_date > self.obj_start_date():
                end_date += timedelta(days=1)

            return end_date

        def obj_ticket(self):
            li_class = Attr('//li[@id="tickets"]//li[1]',
                            'class',
                            default=None)(self)

            if li_class:
                if li_class == 'closed':
                    return TICKET.CLOSED
                else:
                    return TICKET.AVAILABLE

            return TICKET.NOTAVAILABLE
Exemple #23
0
        class item(ItemElement):
            klass = YoupornVideo

            def obj_thumbnail(self):
                thumbnail_url = Attr('./img', 'src')(self)
                thumbnail = Thumbnail(thumbnail_url)
                thumbnail.url = thumbnail.id
                return thumbnail

            obj_author = NotAvailable
            obj_duration = CSS('span.duration') & CleanText() & Duration()
            obj_id = Attr('../..', 'data-video-id')
            obj_rating = CleanText('./span/i') & Regexp(
                pattern=r'(..)%') & Type(type=int)
            obj_rating_max = 100
            obj_title = CleanText('./p')
            obj_url = NotAvailable
Exemple #24
0
        class item(ItemElement):
            klass = YoupornVideo

            def obj_thumbnail(self):
                thumbnail_url = Attr('.//img', 'data-original')(self)
                thumbnail = Thumbnail(thumbnail_url)
                thumbnail.url = thumbnail.id
                return thumbnail

            obj_author = NotAvailable
            obj_duration = CleanText(
                './/div[has-class("video-duration")]') & Duration()
            obj_id = Attr('.', 'data-video-id')
            obj_rating = CleanText(
                './/span[has-class("video-box-percentage")]') & Regexp(
                    pattern=r'(\d+)%') & Type(type=int)
            obj_rating_max = 100
            obj_title = CleanText('.//div[has-class("video-box-title")]')
Exemple #25
0
 def obj_nb_person(self):
     return [Type(CleanText('//span[@class="yield"]'), type=int)(self)]
Exemple #26
0
    class get_video(ItemElement):
        klass = YoupornVideo

        obj_author = CleanText('//div[has-class("submitByLink")]')
        #obj_date = Date('//div[@id="stats-date"]')
        obj_duration = NotAvailable
        obj_ext = 'mp4'
        obj_id = Env('id')
        obj_rating = CleanText('//div[@class="videoRatingPercentage"]') & Regexp(pattern=r'(\d+)%') & Type(type=int)
        obj_rating_max = 100
        obj_thumbnail = NotAvailable
        obj_title = CleanText('//h1')

        def obj_url(self):
            return loads(re.search('videoUrl":(".*?")', self.page.text).group(1))
Exemple #27
0
 def get_date(self, _time):
     m = re.match('.*/events\?start_date=(\d{4})-(\d{2})-\d{2}', self.page.url)
     if m:
         day = Type(CleanText('./ancestor::td/div[@class="day_number"]'), type=int)(self)
         start_date = date(year=int(m.group(1)), month=int(m.group(2)), day=day)
         return datetime.combine(start_date, _time)
Exemple #28
0
 def obj_files(self):
     res = []
     for f in Type('//td[has-class("torFileName")]', type=list)(self):
         res.append(CleanText(f)(self))
     return res