Example #1
0
    def obj_thumbnail(self):
        url = Dict('VTU/IUR', default=NotAvailable)(self)
        if empty(url):
            return url

        thumbnail = Thumbnail(url)
        thumbnail.url = thumbnail.id
        return thumbnail
Example #2
0
    def iter_videos(self):
        # When no results are found, the website returns random results
        sb = self.parser.select(self.document.getroot(), 'div.search form input.searchbox', 1)
        if sb.value == 'No Results Found':
            return

        #Extracting meta data from results page
        vidbackdrop_list = self.parser.select(self.document.getroot(), 'div.vidBackdrop    ')
        for vidbackdrop in vidbackdrop_list:
            url = self.parser.select(vidbackdrop, 'a', 1).attrib['href']
            _id = url[2:]

            video = CappedVideo(_id)
            video.set_empty_fields(NotAvailable, ('url',))

            video.title = to_unicode(self.parser.select(vidbackdrop, 'div.vidTitle a', 1).text)
            video.author = to_unicode(self.parser.select(vidbackdrop, 'div.vidAuthor a', 1).text)

            thumbnail_url = 'http://cdn.capped.tv/pre/%s.png' % _id
            video.thumbnail = Thumbnail(thumbnail_url)
            video.thumbnail.url = to_unicode(video.thumbnail.id)

            #we get the description field
            duration_tmp = self.parser.select(vidbackdrop, 'div.vidInfo', 1)
            #we remove tabs and spaces
            duration_tmp2 = duration_tmp.text[7:]
            #we remove all fields exept time
            duration_tmp3 = duration_tmp2.split(' ')[0]
            #we transform it in datetime format
            parts = duration_tmp3.split(':')
            if len(parts) == 1:
                hours = minutes = 0
                seconds = parts[0]
            elif len(parts) == 2:
                hours = 0
                minutes, seconds = parts
            elif len(parts) == 3:
                hours, minutes, seconds = parts
            else:
                raise BrokenPageError('Unable to parse duration %r' % duration_tmp)

            video.duration = datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds))

            yield video
Example #3
0
    def parse_video(self, el, video=None):
        _id = el.find('ID').text
        if _id == '-1':
            # means the video is not found
            return None

        if not video:
            video = CanalplusVideo(_id)

        infos = el.find('INFOS')
        video.title = u''
        for part in infos.find('TITRAGE'):
            if len(part.text.strip()) == 0:
                continue
            if len(video.title) > 0:
                video.title += u' — '
            video.title += part.text.strip()
        video.description = infos.find('DESCRIPTION').text

        media = el.find('MEDIA')
        url = media.find('IMAGES').find('PETIT').text
        if url:
            video.thumbnail = Thumbnail(url)
            video.thumbnail.url = video.thumbnail.id
        else:
            video.thumbnail = NotAvailable
        for format in media.find('VIDEOS'):
            if format.text is None:
                continue

            if format.tag == 'HLS':
                video.ext = u'm3u8'
                video.url = format.text
                break

        day, month, year = map(
            int,
            infos.find('PUBLICATION').find('DATE').text.split('/'))
        hour, minute, second = map(
            int,
            infos.find('PUBLICATION').find('HEURE').text.split(':'))
        video.date = datetime(year, month, day, hour, minute, second)

        return video
Example #4
0
    def iter_videos(self):
        for span in self.document.xpath('//ul[@id="list"]/li'):
            a = self.parser.select(span, 'a', 1)
            url = a.attrib['href']
            _id = re.sub(r'/showvideo/(\d+)/.*', r'\1', url)

            video = JacquieEtMichelVideo(_id)

            url = span.find('.//img').attrib['src']
            video.thumbnail = Thumbnail(url)
            video.thumbnail.url = video.thumbnail.id

            title_el = self.parser.select(span, 'h2', 1)
            video.title = to_unicode(title_el.text.strip())
            video.description = self.parser.tocleanstring(
                span.xpath('.//div[@class="desc"]')[0])
            video.set_empty_fields(NotAvailable, ('url,'))

            yield video
Example #5
0
    def iter_videos(self):
        for a in self.parser.select(
                self.document.getroot(),
                'section.conference ul.media_items li.featured a.session_item'
        ):
            href = a.attrib.get('href', '')
            # print href
            m = re.match('/play/(\d+)/.*', href)
            if not m:
                continue
            # print m.group(1)
            video = GDCVaultVideo(m.group(1))

            # get title
            try:
                video.title = unicode(
                    self.parser.select(a, 'div.conference_info p strong',
                                       1).text)
            except IndexError:
                video.title = NotAvailable

            # get description
            try:
                video.description = unicode(
                    self.parser.select(a, 'div.conference_info p', 1).text)
            except IndexError:
                video.description = NotAvailable

            # get thumbnail
            img = self.parser.select(a, 'div.featured_image img', 1)
            if img is not None:
                video.thumbnail = Thumbnail(img.attrib['src'])
                video.thumbnail.url = video.thumbnail.id
            else:
                video.thumbnail = NotAvailable

            #m = re.match('id-(\d+)', a.attrib.get('class', ''))
            #if not m:
            #    continue
            # FIXME
            yield video
Example #6
0
    def fill_gallery(self, gallery):
        gallery.title = self.document.xpath("//h1[@id='gn']/text()")[0]
        cardinality_string = self.document.xpath("//div[@id='gdd']//tr[td[@class='gdt1']/text()='Length:']/td[@class='gdt2']/text()")[0]
        gallery.cardinality = int(re.match(r"\d+", cardinality_string).group(0))
        date_string = self.document.xpath("//div[@id='gdd']//tr[td[@class='gdt1']/text()='Posted:']/td[@class='gdt2']/text()")[0]
        gallery.date = datetime.strptime(date_string, "%Y-%m-%d %H:%M")
        rating_string = self.document.xpath("//td[@id='rating_label']/text()")[0]
        rating_match = re.search(r"\d+\.\d+", rating_string)
        if rating_match is None:
            gallery.rating = None
        else:
            gallery.rating = float(rating_match.group(0))

        gallery.rating_max = 5

        try:
            thumbnail_url = self.document.xpath("//div[@class='gdtm']/a/img/attribute::src")[0]
        except IndexError:
            thumbnail_style = self.document.xpath("//div[@class='gdtm']/div/attribute::style")[0]
            thumbnail_url = re.search(r"background:[^;]+url\((.+?)\)", thumbnail_style).group(1)

        gallery.thumbnail = Thumbnail(thumbnail_url)
Example #7
0
    def iter_videos(self):
        for div in self.parser.select(self.document.getroot(),
                                      'li#contentsearch'):
            title = self.parser.select(div, '#titlesearch span', 1)

            a = self.parser.select(div, 'a', 1)
            url = a.attrib['href']
            m = re.match('/video-(.*)', url)
            if not m:
                self.logger.debug('url %s does not match' % url)
                continue
            _id = m.group(1)
            video = TricTracTVVideo(_id)
            video.title = unicode(title.text)

            url = self.parser.select(div, 'img', 1).attrib['src']
            stars = self.parser.select(div, '.etoile_on')
            video.rating = len(stars)
            video.rating_max = 5

            video.thumbnail = Thumbnail('http://www.trictrac.tv/%s' % url)
            video.thumbnail.url = video.thumbnail.id

            yield video
Example #8
0
def video_info(url):
    """Fetch info about a video using youtube-dl

    :param url: URL of the web page containing the video
    :rtype: :class:`weboob.capabilities.video.Video`
    """

    if not MediaPlayer._find_in_path(os.environ['PATH'], 'youtube-dl'):
        raise Exception('Please install youtube-dl')

    try:
        j = json.loads(
            subprocess.check_output(['youtube-dl', '-f', 'best', '-J', url]))
    except subprocess.CalledProcessError:
        return

    v = BaseVideo(id=url)
    v.title = unicode(j.get('title')) if j.get('title') else NotAvailable
    v.ext = unicode(j.get('ext')) if j.get('ext') else NotAvailable
    v.description = unicode(
        j.get('description')) if j.get('description') else NotAvailable
    v.url = unicode(j['url'])
    v.duration = timedelta(
        seconds=j.get('duration')) if j.get('duration') else NotAvailable
    v.author = unicode(
        j.get('uploader')) if j.get('uploader') else NotAvailable
    v.rating = j.get('average_rating') or NotAvailable

    if j.get('thumbnail'):
        v.thumbnail = Thumbnail(unicode(j['thumbnail']))

    d = j.get('upload_date', j.get('release_date'))
    if d:
        v.date = parse_date(d)

    return v
Example #9
0
 def obj_thumbnail(self):
     thumbnail_url = Attr('.//img', 'data-original')(self)
     thumbnail = Thumbnail(thumbnail_url)
     thumbnail.url = thumbnail.id
     return thumbnail
Example #10
0
 def obj_thumbnail(self):
     url = Attr('a/img[@class="resultat-vignette"]', 'data-src')(self)
     thumbnail = Thumbnail(url)
     thumbnail.url = thumbnail.id
     return thumbnail
Example #11
0
 def obj_thumbnail(self):
     url = CleanText('./a/img/@src')(self)
     thumbnail = Thumbnail(url)
     thumbnail.url = thumbnail.id
     return thumbnail
Example #12
0
 def obj_thumbnail(self):
     t = CleanText('./thumbnails/thumbnail[1]', default='')(self)
     if t:
         thumbnail = Thumbnail(t)
         thumbnail.url = thumbnail.id
         return thumbnail
Example #13
0
 def obj_thumbnail(self):
     thumbnail = Thumbnail(self.xpath('./a/div/img')[0].attrib['src'])
     thumbnail.url = thumbnail.id
     return thumbnail
Example #14
0
 def obj_thumbnail(self):
     url = CleanText('./div/a/img/@data-src')(self)
     thumbnail = Thumbnail(url)
     thumbnail.url = url
     return thumbnail
Example #15
0
 def obj_thumbnail(self):
     thumbnail = Thumbnail(
         self.xpath('.//img')[0].attrib['data-original'])
     thumbnail.url = thumbnail.id
     return thumbnail
Example #16
0
 def obj_thumbnail(self):
     thumbnail = Thumbnail(
         self.xpath('.//img')[0].attrib['data-original'])
     thumbnail.url = thumbnail.id.replace('http://', 'https://')
     return thumbnail
Example #17
0
 def obj_thumbnail(self):
     t = CleanText('./thumbnails/thumbnail[1]', default='')(self)
     if t:
         thumbnail = Thumbnail(t)
         thumbnail.url = thumbnail.id
         return thumbnail
Example #18
0
 def obj_thumbnail(self):
     url = CleanText('.//div[@class="article-primary "]/div[has-class("field-thumbnail")]/span/noscript/img/@src')(self)
     thumbnail = Thumbnail(url)
     thumbnail.url = thumbnail.id
     return thumbnail
Example #19
0
 def obj_thumbnail(self):
     thumbnail = Thumbnail(CleanText('//image[1]/url')(self))
     thumbnail.url = thumbnail.id
     return thumbnail
Example #20
0
 def obj_thumbnail(self):
     if 'path_img_emission' in self.el:
         thumbnail = Thumbnail(Dict('path_img_emission')(self))
         thumbnail.url = thumbnail.id
         return thumbnail
Example #21
0
 def obj_thumbnail(self):
     thumbnail = Thumbnail(Dict('clip/pictures/sizes/0/link')(self))
     thumbnail.url = thumbnail.id
     return thumbnail
Example #22
0
 def obj_thumbnail(self):
     try:
         return Thumbnail(next(img['url'] for img in self.el['images'] if img['format'] == 'landscape'))
     except StopIteration:
         return NotAvailable
Example #23
0
 def obj_thumbnail(self):
     url = Format('http://www.francetv.fr%s', Dict['image'])(self)
     thumbnail = Thumbnail(url)
     thumbnail.url = thumbnail.id
     return thumbnail
Example #24
0
 def obj_thumbnail(self):
     url = CleanText('.//div[@class="article-primary "]/div[has-class("field-thumbnail")]/span/noscript/img/@src')(self)
     thumbnail = Thumbnail(url)
     thumbnail.url = thumbnail.id
     return thumbnail
Example #25
0
 def obj_thumbnail(self):
     url = NormalizeThumbnail(CleanText('/html/head/meta[@property="og:image"]/@content'))(self)
     if url:
         thumbnail = Thumbnail(url)
         thumbnail.url = thumbnail.id
         return thumbnail
Example #26
0
 def obj_thumbnail(self):
     thumbnail = Thumbnail(Dict('thumbnailUrl')(self.el))
     thumbnail.url = thumbnail.id
     return thumbnail
Example #27
0
 def obj_thumbnail(self):
     thumbnail = Thumbnail(Dict('clip/pictures/sizes/0/link')(self))
     thumbnail.url = thumbnail.id
     return thumbnail
Example #28
0
 def obj_thumbnail(self):
     url = CleanText('//meta[@property="og:image"]/@content')(self)
     thumbnail = Thumbnail(url)
     thumbnail.url = url
     return thumbnail
Example #29
0
 def obj_thumbnail(self):
     thumbnail = Thumbnail(CleanText('//image[1]/url')(self))
     thumbnail.url = thumbnail.id
     return thumbnail
Example #30
0
 def obj_thumbnail(self):
     url = CleanText('div/div/a/figure/span/span/@data-src')(self)
     thumbnail = Thumbnail(url)
     thumbnail.url = thumbnail.id
     return thumbnail
Example #31
0
 def obj_thumbnail(self):
     thumbnail_url = Attr('./img', 'src')(self)
     thumbnail = Thumbnail(thumbnail_url)
     thumbnail.url = thumbnail.id
     return thumbnail
Example #32
0
 def obj_thumbnail(self):
     if 'path_img_emission' in self.el:
         thumbnail = Thumbnail(Dict('path_img_emission')(self))
         thumbnail.url = thumbnail.id
         return thumbnail
Example #33
0
 def obj_thumbnail(self):
     thumbnail = Thumbnail(self.xpath('.//img')[0].attrib['data-original'])
     thumbnail.url = thumbnail.id
     return thumbnail
Example #34
0
 def obj_thumbnail(self):
     url = CleanText('//meta[@property="og:image"]/@content')(self)
     thumbnail = Thumbnail(url)
     thumbnail.url = thumbnail.id
     return thumbnail
Example #35
0
 def obj_thumbnail(self):
     url = Attr('a/img[@class="resultat-vignette"]',
                'data-src')(self)
     thumbnail = Thumbnail(url)
     thumbnail.url = thumbnail.id
     return thumbnail
Example #36
0
 def obj_thumbnail(self):
     url = Dict('VTU/IUR')(self)
     thumbnail = Thumbnail(url)
     thumbnail.url = thumbnail.id
     return thumbnail
Example #37
0
 def obj_thumbnail(self):
     path = Attr('./a[has-class("thumbnail")]/img', 'src', default=None)(self)
     if path is None:
         raise SkipItem('not an image thread')
     return Thumbnail(urljoin(self.page.url, path))
Example #38
0
 def obj_thumbnail(self):
     thumbnail = Thumbnail(self.xpath('.//img')[0].attrib['data-original'])
     thumbnail.url = thumbnail.id.replace('http://', 'https://')
     return thumbnail
Example #39
0
 def obj_thumbnail(self):
     thumbnail_url = Attr('./img', 'src')(self)
     thumbnail = Thumbnail(thumbnail_url)
     thumbnail.url = thumbnail.id
     return thumbnail
Example #40
0
 def obj_thumbnail(self):
     thumbnail_url = Attr('.//img', 'data-original')(self)
     thumbnail = Thumbnail(thumbnail_url)
     thumbnail.url = thumbnail.id
     return thumbnail
Example #41
0
 def obj_thumbnail(self):
     thumbnail = Thumbnail(Dict('thumbnailUrl')(self.el))
     thumbnail.url = thumbnail.id
     return thumbnail
Example #42
0
 def obj_thumbnail(self):
     url = Format('http://www.francetv.fr%s', Dict['image'])(self)
     thumbnail = Thumbnail(url)
     thumbnail.url = thumbnail.id
     return thumbnail
Example #43
0
 def obj_thumbnail(self):
     thumbnail = Thumbnail(
         self.xpath('./a/div/img')[0].attrib['src'])
     thumbnail.url = thumbnail.id
     return thumbnail
Example #44
0
 def obj_thumbnail(self):
     return Thumbnail(Attr('.//img[@class="thumb-image-container__image"]', 'src')(self))
Example #45
0
 def obj_thumbnail(self):
     url = CleanText('./div/a/img/@data-src')(self)
     thumbnail = Thumbnail(url)
     thumbnail.url = url
     return thumbnail
Example #46
0
 def obj_thumbnail(self):
     return Thumbnail(Attr('.//img[@class="thumb"]', 'src')(self))
Example #47
0
 def obj_thumbnail(self):
     url = CleanText('./a/img/@src')(self)
     thumbnail = Thumbnail(url)
     thumbnail.url = thumbnail.id
     return thumbnail
Example #48
0
 def obj_thumbnail(self):
     thumbnail = Thumbnail(Attr('.//img[has-class("js-videoThumb")]', 'data-path')(self).replace('{index}', '1'))
     thumbnail.url = thumbnail.id
     return thumbnail
Example #49
0
 def obj_thumbnail(self):
     url = CleanText('div/div/a/figure/span/span/@data-src')(self)
     thumbnail = Thumbnail(url)
     thumbnail.url = thumbnail.id
     return thumbnail