def obj_thumbnail(self): url = Dict('VTU/IUR', default=NotAvailable)(self) if empty(url): return url thumbnail = Thumbnail(url) thumbnail.url = thumbnail.id return thumbnail
def iter_videos(self): # When no results are found, the website returns random results sb = self.parser.select(self.document.getroot(), 'div.search form input.searchbox', 1) if sb.value == 'No Results Found': return #Extracting meta data from results page vidbackdrop_list = self.parser.select(self.document.getroot(), 'div.vidBackdrop ') for vidbackdrop in vidbackdrop_list: url = self.parser.select(vidbackdrop, 'a', 1).attrib['href'] _id = url[2:] video = CappedVideo(_id) video.set_empty_fields(NotAvailable, ('url',)) video.title = to_unicode(self.parser.select(vidbackdrop, 'div.vidTitle a', 1).text) video.author = to_unicode(self.parser.select(vidbackdrop, 'div.vidAuthor a', 1).text) thumbnail_url = 'http://cdn.capped.tv/pre/%s.png' % _id video.thumbnail = Thumbnail(thumbnail_url) video.thumbnail.url = to_unicode(video.thumbnail.id) #we get the description field duration_tmp = self.parser.select(vidbackdrop, 'div.vidInfo', 1) #we remove tabs and spaces duration_tmp2 = duration_tmp.text[7:] #we remove all fields exept time duration_tmp3 = duration_tmp2.split(' ')[0] #we transform it in datetime format parts = duration_tmp3.split(':') if len(parts) == 1: hours = minutes = 0 seconds = parts[0] elif len(parts) == 2: hours = 0 minutes, seconds = parts elif len(parts) == 3: hours, minutes, seconds = parts else: raise BrokenPageError('Unable to parse duration %r' % duration_tmp) video.duration = datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds)) yield video
def parse_video(self, el, video=None): _id = el.find('ID').text if _id == '-1': # means the video is not found return None if not video: video = CanalplusVideo(_id) infos = el.find('INFOS') video.title = u'' for part in infos.find('TITRAGE'): if len(part.text.strip()) == 0: continue if len(video.title) > 0: video.title += u' — ' video.title += part.text.strip() video.description = infos.find('DESCRIPTION').text media = el.find('MEDIA') url = media.find('IMAGES').find('PETIT').text if url: video.thumbnail = Thumbnail(url) video.thumbnail.url = video.thumbnail.id else: video.thumbnail = NotAvailable for format in media.find('VIDEOS'): if format.text is None: continue if format.tag == 'HLS': video.ext = u'm3u8' video.url = format.text break day, month, year = map( int, infos.find('PUBLICATION').find('DATE').text.split('/')) hour, minute, second = map( int, infos.find('PUBLICATION').find('HEURE').text.split(':')) video.date = datetime(year, month, day, hour, minute, second) return video
def iter_videos(self): for span in self.document.xpath('//ul[@id="list"]/li'): a = self.parser.select(span, 'a', 1) url = a.attrib['href'] _id = re.sub(r'/showvideo/(\d+)/.*', r'\1', url) video = JacquieEtMichelVideo(_id) url = span.find('.//img').attrib['src'] video.thumbnail = Thumbnail(url) video.thumbnail.url = video.thumbnail.id title_el = self.parser.select(span, 'h2', 1) video.title = to_unicode(title_el.text.strip()) video.description = self.parser.tocleanstring( span.xpath('.//div[@class="desc"]')[0]) video.set_empty_fields(NotAvailable, ('url,')) yield video
def iter_videos(self): for a in self.parser.select( self.document.getroot(), 'section.conference ul.media_items li.featured a.session_item' ): href = a.attrib.get('href', '') # print href m = re.match('/play/(\d+)/.*', href) if not m: continue # print m.group(1) video = GDCVaultVideo(m.group(1)) # get title try: video.title = unicode( self.parser.select(a, 'div.conference_info p strong', 1).text) except IndexError: video.title = NotAvailable # get description try: video.description = unicode( self.parser.select(a, 'div.conference_info p', 1).text) except IndexError: video.description = NotAvailable # get thumbnail img = self.parser.select(a, 'div.featured_image img', 1) if img is not None: video.thumbnail = Thumbnail(img.attrib['src']) video.thumbnail.url = video.thumbnail.id else: video.thumbnail = NotAvailable #m = re.match('id-(\d+)', a.attrib.get('class', '')) #if not m: # continue # FIXME yield video
def fill_gallery(self, gallery): gallery.title = self.document.xpath("//h1[@id='gn']/text()")[0] cardinality_string = self.document.xpath("//div[@id='gdd']//tr[td[@class='gdt1']/text()='Length:']/td[@class='gdt2']/text()")[0] gallery.cardinality = int(re.match(r"\d+", cardinality_string).group(0)) date_string = self.document.xpath("//div[@id='gdd']//tr[td[@class='gdt1']/text()='Posted:']/td[@class='gdt2']/text()")[0] gallery.date = datetime.strptime(date_string, "%Y-%m-%d %H:%M") rating_string = self.document.xpath("//td[@id='rating_label']/text()")[0] rating_match = re.search(r"\d+\.\d+", rating_string) if rating_match is None: gallery.rating = None else: gallery.rating = float(rating_match.group(0)) gallery.rating_max = 5 try: thumbnail_url = self.document.xpath("//div[@class='gdtm']/a/img/attribute::src")[0] except IndexError: thumbnail_style = self.document.xpath("//div[@class='gdtm']/div/attribute::style")[0] thumbnail_url = re.search(r"background:[^;]+url\((.+?)\)", thumbnail_style).group(1) gallery.thumbnail = Thumbnail(thumbnail_url)
def iter_videos(self): for div in self.parser.select(self.document.getroot(), 'li#contentsearch'): title = self.parser.select(div, '#titlesearch span', 1) a = self.parser.select(div, 'a', 1) url = a.attrib['href'] m = re.match('/video-(.*)', url) if not m: self.logger.debug('url %s does not match' % url) continue _id = m.group(1) video = TricTracTVVideo(_id) video.title = unicode(title.text) url = self.parser.select(div, 'img', 1).attrib['src'] stars = self.parser.select(div, '.etoile_on') video.rating = len(stars) video.rating_max = 5 video.thumbnail = Thumbnail('http://www.trictrac.tv/%s' % url) video.thumbnail.url = video.thumbnail.id yield video
def video_info(url): """Fetch info about a video using youtube-dl :param url: URL of the web page containing the video :rtype: :class:`weboob.capabilities.video.Video` """ if not MediaPlayer._find_in_path(os.environ['PATH'], 'youtube-dl'): raise Exception('Please install youtube-dl') try: j = json.loads( subprocess.check_output(['youtube-dl', '-f', 'best', '-J', url])) except subprocess.CalledProcessError: return v = BaseVideo(id=url) v.title = unicode(j.get('title')) if j.get('title') else NotAvailable v.ext = unicode(j.get('ext')) if j.get('ext') else NotAvailable v.description = unicode( j.get('description')) if j.get('description') else NotAvailable v.url = unicode(j['url']) v.duration = timedelta( seconds=j.get('duration')) if j.get('duration') else NotAvailable v.author = unicode( j.get('uploader')) if j.get('uploader') else NotAvailable v.rating = j.get('average_rating') or NotAvailable if j.get('thumbnail'): v.thumbnail = Thumbnail(unicode(j['thumbnail'])) d = j.get('upload_date', j.get('release_date')) if d: v.date = parse_date(d) return v
def obj_thumbnail(self): thumbnail_url = Attr('.//img', 'data-original')(self) thumbnail = Thumbnail(thumbnail_url) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): url = Attr('a/img[@class="resultat-vignette"]', 'data-src')(self) thumbnail = Thumbnail(url) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): url = CleanText('./a/img/@src')(self) thumbnail = Thumbnail(url) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): t = CleanText('./thumbnails/thumbnail[1]', default='')(self) if t: thumbnail = Thumbnail(t) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): thumbnail = Thumbnail(self.xpath('./a/div/img')[0].attrib['src']) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): url = CleanText('./div/a/img/@data-src')(self) thumbnail = Thumbnail(url) thumbnail.url = url return thumbnail
def obj_thumbnail(self): thumbnail = Thumbnail( self.xpath('.//img')[0].attrib['data-original']) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): thumbnail = Thumbnail( self.xpath('.//img')[0].attrib['data-original']) thumbnail.url = thumbnail.id.replace('http://', 'https://') return thumbnail
def obj_thumbnail(self): t = CleanText('./thumbnails/thumbnail[1]', default='')(self) if t: thumbnail = Thumbnail(t) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): url = CleanText('.//div[@class="article-primary "]/div[has-class("field-thumbnail")]/span/noscript/img/@src')(self) thumbnail = Thumbnail(url) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): thumbnail = Thumbnail(CleanText('//image[1]/url')(self)) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): if 'path_img_emission' in self.el: thumbnail = Thumbnail(Dict('path_img_emission')(self)) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): thumbnail = Thumbnail(Dict('clip/pictures/sizes/0/link')(self)) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): try: return Thumbnail(next(img['url'] for img in self.el['images'] if img['format'] == 'landscape')) except StopIteration: return NotAvailable
def obj_thumbnail(self): url = Format('http://www.francetv.fr%s', Dict['image'])(self) thumbnail = Thumbnail(url) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): url = CleanText('.//div[@class="article-primary "]/div[has-class("field-thumbnail")]/span/noscript/img/@src')(self) thumbnail = Thumbnail(url) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): url = NormalizeThumbnail(CleanText('/html/head/meta[@property="og:image"]/@content'))(self) if url: thumbnail = Thumbnail(url) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): thumbnail = Thumbnail(Dict('thumbnailUrl')(self.el)) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): thumbnail = Thumbnail(Dict('clip/pictures/sizes/0/link')(self)) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): url = CleanText('//meta[@property="og:image"]/@content')(self) thumbnail = Thumbnail(url) thumbnail.url = url return thumbnail
def obj_thumbnail(self): thumbnail = Thumbnail(CleanText('//image[1]/url')(self)) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): url = CleanText('div/div/a/figure/span/span/@data-src')(self) thumbnail = Thumbnail(url) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): thumbnail_url = Attr('./img', 'src')(self) thumbnail = Thumbnail(thumbnail_url) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): if 'path_img_emission' in self.el: thumbnail = Thumbnail(Dict('path_img_emission')(self)) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): thumbnail = Thumbnail(self.xpath('.//img')[0].attrib['data-original']) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): url = CleanText('//meta[@property="og:image"]/@content')(self) thumbnail = Thumbnail(url) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): url = Attr('a/img[@class="resultat-vignette"]', 'data-src')(self) thumbnail = Thumbnail(url) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): url = Dict('VTU/IUR')(self) thumbnail = Thumbnail(url) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): path = Attr('./a[has-class("thumbnail")]/img', 'src', default=None)(self) if path is None: raise SkipItem('not an image thread') return Thumbnail(urljoin(self.page.url, path))
def obj_thumbnail(self): thumbnail = Thumbnail(self.xpath('.//img')[0].attrib['data-original']) thumbnail.url = thumbnail.id.replace('http://', 'https://') return thumbnail
def obj_thumbnail(self): thumbnail_url = Attr('./img', 'src')(self) thumbnail = Thumbnail(thumbnail_url) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): thumbnail_url = Attr('.//img', 'data-original')(self) thumbnail = Thumbnail(thumbnail_url) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): thumbnail = Thumbnail(Dict('thumbnailUrl')(self.el)) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): url = Format('http://www.francetv.fr%s', Dict['image'])(self) thumbnail = Thumbnail(url) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): thumbnail = Thumbnail( self.xpath('./a/div/img')[0].attrib['src']) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): return Thumbnail(Attr('.//img[@class="thumb-image-container__image"]', 'src')(self))
def obj_thumbnail(self): url = CleanText('./div/a/img/@data-src')(self) thumbnail = Thumbnail(url) thumbnail.url = url return thumbnail
def obj_thumbnail(self): return Thumbnail(Attr('.//img[@class="thumb"]', 'src')(self))
def obj_thumbnail(self): url = CleanText('./a/img/@src')(self) thumbnail = Thumbnail(url) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): thumbnail = Thumbnail(Attr('.//img[has-class("js-videoThumb")]', 'data-path')(self).replace('{index}', '1')) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): url = CleanText('div/div/a/figure/span/span/@data-src')(self) thumbnail = Thumbnail(url) thumbnail.url = thumbnail.id return thumbnail