Beispiel #1
0
    def iter_videos(self):
        span_list = self.parser.select(self.document.getroot(),
                                       'span#miniatura')
        for span in span_list:
            a = self.parser.select(span, 'a', 1)
            url = a.attrib['href']
            _id = re.sub(r'/videos/(.+)\.html', r'\1', url)

            video = YoujizzVideo(_id)

            video.thumbnail = Thumbnail(
                unicode(span.find('.//img').attrib['src']))

            title_el = self.parser.select(span, 'span#title1', 1)
            video.title = to_unicode(title_el.text.strip())

            time_span = self.parser.select(span, 'span.thumbtime span', 1)
            time_txt = time_span.text.strip().replace(';', ':')
            if time_txt == 'N/A':
                minutes, seconds = 0, 0
            elif ':' in time_txt:
                minutes, seconds = (int(v) for v in time_txt.split(':'))
            else:
                raise BrokenPageError(
                    'Unable to parse the video duration: %s' % time_txt)

            video.duration = datetime.timedelta(minutes=minutes,
                                                seconds=seconds)

            yield video
Beispiel #2
0
    def get_video(self, _id):
        video = QuviVideo(_id)

        parser = LibQuvi()
        if not parser.load():
            raise UserError('Make sure libquvi 0.4 is installed')

        try:
            info = parser.get_info(video.page_url)
        except QuviError as qerror:
            raise UserError(qerror.message)

        video.url = to_unicode(info.get('url'))
        if not video.url:
            raise NotImplementedError()

        video.ext = to_unicode(info.get('suffix'))
        video.title = to_unicode(info.get('title'))
        video.page = to_unicode(info.get('page'))
        duration = int(info.get('duration', 0))
        if duration:
            video.duration = datetime.timedelta(milliseconds=duration)
        if info.get('thumbnail'):
            video.thumbnail = Thumbnail(info.get('thumbnail'))
        return video
Beispiel #3
0
    def iter_videos(self):
        for div in self.parser.select(self.document.getroot(), 'li.vignette'):
            title = self.parser.select(div, 'h4 a', 1)
            url = title.attrib['href']
            m = re.match('^http://www.pluzz.fr/([^/]+)\.html$', url)
            if not m:
                self.logger.debug('url %s does not match' % url)
                continue
            _id = m.group(1)
            video = PluzzVideo(_id)
            m = re.match('^(.+) - ([0-2][0-9])h([0-5][0-9])$', title.text)
            if m:
                video.title = m.group(1)
                hour = int(m.group(2))
                minute = int(m.group(3))
            else:
                video.title = title.text
                hour = 0
                minute = 0

            m = re.match('(\d+)/(\d+)/(\d+)',
                         self.parser.select(div, 'p.date', 1).text)
            if m:
                video.date = datetime.datetime(int(m.group(3)),
                                               int(m.group(2)),
                                               int(m.group(1)), hour, minute)

            url = self.parser.select(div, 'img.illustration', 1).attrib['src']
            video.thumbnail = Thumbnail(u'http://www.pluzz.fr/%s' % url)

            yield video
Beispiel #4
0
    def iter_videos(self):
        for div in self.parser.select(self.document.getroot(),
                                      'article.rs-cell'):
            title = self.parser.select(div, 'h3 a', 1)
            url = title.attrib['href']
            m = re.match('^http://pluzz.francetv.fr/videos/(.+).html$', url)
            if not m:
                self.logger.debug('url %s does not match' % url)
                continue
            _id = m.group(1)
            video = PluzzVideo(_id)
            video.title = unicode(title.text.strip())
            for p in div.xpath('.//p[@class="bientot"]'):
                video.title += ' - %s' % p.text.split('|')[0].strip()
            video.date = parse_dt(div.find('span').attrib['data-date'])
            duration = div.xpath('.//span[@class="type-duree"]')[0].text.split(
                '|')[1].strip()
            if duration[-1:] == "'":
                t = [0, int(duration[:-1])]
            else:
                t = map(int, duration.split(':'))
            video.duration = datetime.timedelta(hours=t[0], minutes=t[1])

            url = self.parser.select(div, 'a.vignette img', 1).attrib['src']
            video.thumbnail = Thumbnail(url)

            yield video
Beispiel #5
0
    def iter_videos(self):
        for div in self.parser.select(self.document.getroot(), 'div.data_emissions ul li'):
            m = re.match('id-(\d+)', div.attrib.get('class', ''))
            if not m:
                continue

            img = self.parser.select(div, 'div.screenshot a img', 1)

            video = NolifeTVVideo(m.group(1))
            video.title = unicode(img.attrib['alt'])
            try:
                video.description = unicode(self.parser.select(div, 'div.tooltip div.border-bottom p, div.infos div.border-bottom p')[-1].text)
            except IndexError:
                video.description = NotAvailable

            video.thumbnail = Thumbnail(unicode(img.attrib['src']))
            try:
                dparts = self.parser.select(div, 'span.date_emission', 1).text.strip().split('/')
                hparts = self.parser.select(div, 'span.hour_emission', 1).text.strip().split('h')
                video.date = datetime(int(dparts[-1]), int(dparts[-2]), int(dparts[-3]),
                                      int(hparts[0]), int(hparts[1]))
            except (BrokenPageError,ValueError):
                video.date = NotAvailable

            video.set_empty_fields(NotAvailable, ('url',))

            yield video
Beispiel #6
0
    def iter_videos(self):
        try:
            ul = self.parser.select(self.document.getroot(),
                                    'div.container-videos ul', 1)
        except BrokenPageError:
            # It means there are no results.
            return
        for li in ul.findall('li'):
            id = re.sub(self.URL_REGEXP, r'\1', li.find('a').attrib['href'])

            video = InaVideo('boutique.%s' % id)

            video.thumbnail = Thumbnail(u'http://boutique.ina.fr%s' %
                                        li.find('a').find('img').attrib['src'])

            video.title = unicode(self.parser.select(li, 'p.titre', 1).text)

            date = self.parser.select(li, 'p.date', 1).text
            day, month, year = [int(s) for s in date.split('/')]
            video.date = datetime.datetime(year, month, day)

            duration = self.parser.select(li, 'p.duree', 1).text
            m = re.match(r'((\d+)h)?((\d+)min)?(\d+)s', duration)
            if m:
                video.duration = datetime.timedelta(hours=int(m.group(2) or 0),
                                                    minutes=int(
                                                        m.group(4) or 0),
                                                    seconds=int(m.group(5)))
            else:
                raise BrokenPageError('Unable to match duration (%r)' %
                                      duration)

            yield video
Beispiel #7
0
    def fill_gallery(self, gallery):
        gallery.title = self.document.xpath("//h1[@id='gn']/text()")[0]
        cardinality_string = self.document.xpath(
            "//div[@id='gdd']//tr[td[@class='gdt1']/text()='Length:']/td[@class='gdt2']/text()"
        )[0]
        gallery.cardinality = int(
            re.match(r"\d+", cardinality_string).group(0))
        date_string = self.document.xpath(
            "//div[@id='gdd']//tr[td[@class='gdt1']/text()='Posted:']/td[@class='gdt2']/text()"
        )[0]
        gallery.date = datetime.strptime(date_string, "%Y-%m-%d %H:%M")
        rating_string = self.document.xpath(
            "//td[@id='rating_label']/text()")[0]
        rating_match = re.search(r"\d+\.\d+", rating_string)
        if rating_match is None:
            gallery.rating = None
        else:
            gallery.rating = float(rating_match.group(0))

        gallery.rating_max = 5

        try:
            thumbnail_url = self.document.xpath(
                "//div[@class='gdtm']/a/img/attribute::src")[0]
        except IndexError:
            thumbnail_style = self.document.xpath(
                "//div[@class='gdtm']/div/attribute::style")[0]
            thumbnail_url = re.search(r"background:[^;]+url\((.+?)\)",
                                      thumbnail_style).group(1)

        gallery.thumbnail = Thumbnail(thumbnail_url)
Beispiel #8
0
    def iter_videos(self):
        for a in self.parser.select(self.document.getroot(), 'section.conference ul.media_items li.featured a.session_item'):
            href = a.attrib.get('href', '')
            # print href
            m = re.match('/play/(\d+)/.*', href)
            if not m:
                continue
            # print m.group(1)
            video = GDCVaultVideo(m.group(1))

            # get title
            try:
                video.title = unicode(self.parser.select(a, 'div.conference_info p strong', 1).text)
            except IndexError:
                video.title = NotAvailable

            # get description
            try:
                video.description = unicode(self.parser.select(a, 'div.conference_info p', 1).text)
            except IndexError:
                video.description = NotAvailable

            # get thumbnail
            img = self.parser.select(a, 'div.featured_image img', 1)
            if img is not None:
                video.thumbnail = Thumbnail(unicode(img.attrib['src']))
            else:
                video.thumbnail = NotAvailable


            #m = re.match('id-(\d+)', a.attrib.get('class', ''))
            #if not m:
            #    continue
            # FIXME
            yield video
Beispiel #9
0
    def create_video_from_songs_result(self, songs):
        self.VIDEOS_FROM_SONG_RESULTS = []

        for song in songs:
            video = GroovesharkVideo(song['SongID'])
            video.title = u'Song - %s' % song['SongName'].encode(
                'ascii', 'replace')
            video.author = u'%s' % song['ArtistName'].encode(
                'ascii', 'replace')
            video.description = u'%s - %s - %s' % (
                video.author, song['AlbumName'].encode('ascii', 'replace'),
                song['Year'].encode('ascii', 'replace'))
            video.thumbnail = Thumbnail(
                u'http://images.gs-cdn.net/static/albums/40_' +
                song['CoverArtFilename'])
            video.duration = datetime.timedelta(
                seconds=int(float(song['EstimateDuration'])))
            video.rating = float(song['AvgRating'])
            try:
                video.date = datetime.date(year=int(song['Year']),
                                           month=1,
                                           day=1)
            except ValueError:
                video.date = NotAvailable
            self.VIDEOS_FROM_SONG_RESULTS.append(video)

            yield video
Beispiel #10
0
    def iter_videos(self):
        for div in self.parser.select(self.document.getroot(),
                                      'div.dmpi_video_item'):
            _id = div.attrib.get('data-id', None)

            if _id is None:
                self.browser.logger.warning('Unable to find the ID of a video')
                continue

            video = DailymotionVideo(_id)
            video.title = unicode(self.parser.select(div, 'h3 a',
                                                     1).text).strip()
            video.author = unicode(
                self.parser.select(div, 'div.dmpi_user_login',
                                   1).find('a').find('span').text).strip()
            video.description = html2text(
                self.parser.tostring(
                    self.parser.select(div, 'div.dmpi_video_description',
                                       1))).strip() or unicode()
            try:
                parts = self.parser.select(div, 'div.duration',
                                           1).text.split(':')
            except BrokenPageError:
                # it's probably a live, np.
                video.duration = NotAvailable
            else:
                if len(parts) == 1:
                    seconds = parts[0]
                    hours = minutes = 0
                elif len(parts) == 2:
                    minutes, seconds = parts
                    hours = 0
                elif len(parts) == 3:
                    hours, minutes, seconds = parts
                else:
                    raise BrokenPageError(
                        'Unable to parse duration %r' %
                        self.parser.select(div, 'div.duration', 1).text)
                video.duration = datetime.timedelta(hours=int(hours),
                                                    minutes=int(minutes),
                                                    seconds=int(seconds))
            url = unicode(
                self.parser.select(div, 'img.dmco_image',
                                   1).attrib['data-src'])
            # remove the useless anti-caching
            url = re.sub('\?\d+', '', url)
            # use the bigger thumbnail
            url = url.replace('jpeg_preview_medium.jpg',
                              'jpeg_preview_large.jpg')
            video.thumbnail = Thumbnail(unicode(url))

            rating_div = self.parser.select(div, 'div.small_stars', 1)
            video.rating_max = self.get_rate(rating_div)
            video.rating = self.get_rate(rating_div.find('div'))

            video.set_empty_fields(NotAvailable, ('url', ))
            yield video
Beispiel #11
0
    def _entry2video(self, entry):
        """
        Parse an entry returned by gdata and return a Video object.
        """
        video = YoutubeVideo(to_unicode(entry.id.text.split('/')[-1].strip()))
        video.title = to_unicode(entry.media.title.text.strip())
        video.duration = datetime.timedelta(seconds=int(entry.media.duration.seconds.strip()))
        video.thumbnail = Thumbnail(to_unicode(entry.media.thumbnail[0].url.strip()))

        if entry.author[0].name.text:
            video.author = to_unicode(entry.author[0].name.text.strip())
        if entry.media.name:
            video.author = to_unicode(entry.media.name.text.strip())
        return video
Beispiel #12
0
    def iter_videos(self):
        # When no results are found, the website returns random results
        sb = self.parser.select(self.document.getroot(),
                                'div.search form input.searchbox', 1)
        if sb.value == 'No Results Found':
            return

        #Extracting meta data from results page
        vidbackdrop_list = self.parser.select(self.document.getroot(),
                                              'div.vidBackdrop    ')
        for vidbackdrop in vidbackdrop_list:
            url = self.parser.select(vidbackdrop, 'a', 1).attrib['href']
            _id = url[2:]

            video = CappedVideo(_id)
            video.set_empty_fields(NotAvailable, ('url', ))

            video.title = to_unicode(
                self.parser.select(vidbackdrop, 'div.vidTitle a', 1).text)
            video.author = to_unicode(
                self.parser.select(vidbackdrop, 'div.vidAuthor a', 1).text)

            thumbnail_url = 'http://cdn.capped.tv/pre/%s.png' % _id
            video.thumbnail = Thumbnail(thumbnail_url)

            #we get the description field
            duration_tmp = self.parser.select(vidbackdrop, 'div.vidInfo', 1)
            #we remove tabs and spaces
            duration_tmp2 = duration_tmp.text[7:]
            #we remove all fields exept time
            duration_tmp3 = duration_tmp2.split(' ')[0]
            #we transform it in datetime format
            parts = duration_tmp3.split(':')
            if len(parts) == 1:
                hours = minutes = 0
                seconds = parts[0]
            elif len(parts) == 2:
                hours = 0
                minutes, seconds = parts
            elif len(parts) == 3:
                hours, minutes, seconds = parts
            else:
                raise BrokenPageError('Unable to parse duration %r' %
                                      duration_tmp)

            video.duration = datetime.timedelta(hours=int(hours),
                                                minutes=int(minutes),
                                                seconds=int(seconds))

            yield video
Beispiel #13
0
    def iter_videos(self):
        videos = self.document.getroot().cssselect("div[class=video]")
        for div in videos:
            title = div.find('h2').find('a').text
            m = re.match(r'/(fr|de|en)/videos/(.*)\.html',
                         div.find('h2').find('a').attrib['href'])
            _id = ''
            if m:
                _id = m.group(2)
            rating = rating_max = 0
            rates = self.parser.select(div, 'div[class=rateContainer]', 1)
            for r in rates.findall('div'):
                if 'star-rating-on' in r.attrib['class']:
                    rating += 1
                rating_max += 1

            video = ArteVideo(_id)
            video.title = unicode(title)
            video.rating = rating
            video.rating_max = rating_max

            thumb = self.parser.select(div, 'img[class=thumbnail]', 1)
            video.thumbnail = Thumbnail(u'http://videos.arte.tv' +
                                        thumb.attrib['src'])

            try:
                parts = self.parser.select(div, 'div.duration_thumbnail',
                                           1).text.split(':')
                if len(parts) == 2:
                    hours = 0
                    minutes, seconds = parts
                elif len(parts) == 3:
                    hours, minutes, seconds = parts
                else:
                    raise BrokenPageError('Unable to parse duration %r' %
                                          parts)
            except BrokenPageError:
                pass
            else:
                video.duration = datetime.timedelta(hours=int(hours),
                                                    minutes=int(minutes),
                                                    seconds=int(seconds))

            video.set_empty_fields(NotAvailable, ('url', ))

            yield video
Beispiel #14
0
 def create_video(self, song):
     if song['EstimateDuration']:
         video = GroovesharkVideo(song['SongID'])
         video.title = u'Song - %s' % song['Name'].encode(
             'ascii', 'replace')
         video.author = u'%s' % song['ArtistName'].encode(
             'ascii', 'replace')
         video.description = u'%s - %s' % (
             video.author, song['AlbumName'].encode('ascii', 'replace'))
         if song['CoverArtFilename']:
             video.thumbnail = Thumbnail(
                 u'http://images.gs-cdn.net/static/albums/40_' +
                 song['CoverArtFilename'])
         video.duration = datetime.timedelta(
             seconds=int(float(song['EstimateDuration'])))
         video.date = NotAvailable
         return video
Beispiel #15
0
    def parse_video(self, el, video=None, quality=None):
        _id = el.find('ID').text
        if _id == '-1':
            # means the video is not found
            return None

        if not video:
            video = CanalplusVideo(_id)

        infos = el.find('INFOS')
        video.title = u''
        for part in infos.find('TITRAGE'):
            if len(part.text.strip()) == 0:
                continue
            if len(video.title) > 0:
                video.title += u' — '
            video.title += part.text.strip()
        video.description = unicode(infos.find('DESCRIPTION').text)

        media = el.find('MEDIA')
        url = media.find('IMAGES').find('PETIT').text
        if url:
            video.thumbnail = Thumbnail(unicode(url))
        else:
            video.thumbnail = NotAvailable
        lastest_format = None
        for format in media.find('VIDEOS'):
            if format.text is None:
                continue
            if format.tag == quality:
                video.url = unicode(format.text)
                break
            lastest_format = format
        if not video.url and lastest_format is not None:
            video.url = unicode(lastest_format.text)

        day, month, year = map(
            int,
            infos.find('PUBLICATION').find('DATE').text.split('/'))
        hour, minute, second = map(
            int,
            infos.find('PUBLICATION').find('HEURE').text.split(':'))
        video.date = datetime(year, month, day, hour, minute, second)

        return video
Beispiel #16
0
 def iter_videos(self, lang='fr'):
     videos = list()
     xml_url = (self.document.xpath('//link')[0]).attrib['href']
     datas = self.browser.readurl(xml_url)
     re_items = re.compile("(<item>.*?</item>)", re.DOTALL)
     items = re.findall(re_items, datas)
     for item in items:
         parsed_element = self.get_element(item, lang)
         if parsed_element:
             video = ArteLiveVideo(parsed_element['ID'])
             video.title = parsed_element['title']
             video.description = parsed_element['pitch']
             video.author = parsed_element['author']
             if parsed_element['pict']:
                 video.thumbnail = Thumbnail(parsed_element['pict'])
             video.set_empty_fields(NotAvailable, ('url', ))
             videos.append(video)
     return videos
Beispiel #17
0
 def create_video(self, item):
     video = ArteVideo(item['VID'])
     if 'VSU' in item:
         video.title = u'%s : %s' % (item['VTI'], item['VSU'])
     else:
         video.title = u'%s' % (item['VTI'])
     video.rating = int(item['VRT'])
     video.thumbnail = Thumbnail(u'%s' % item['programImage'])
     video.duration = datetime.timedelta(
         seconds=int(item['videoDurationSeconds']))
     video.set_empty_fields(NotAvailable, ('url', ))
     video.description = u'%s' % item['VDE']
     m = re.match('(\d{2})\s(\d{2})\s(\d{4})(.*?)', item['VDA'])
     if m:
         dd = int(m.group(1))
         mm = int(m.group(2))
         yyyy = int(m.group(3))
         video.date = datetime.date(yyyy, mm, dd)
     return video
Beispiel #18
0
    def iter_videos(self):
        videos = self.document.getroot().cssselect("div[class=bloc-contenu-8]")
        for div in videos:
            title = self.parser.select(div, 'a.typo-titre',
                                       1).text_content().replace('  ', ' ')
            m = re.match(r'/contenu.php\?id=(.*)',
                         div.find('a').attrib['href'])
            _id = ''
            if m:
                _id = m.group(1)

            video = ArretSurImagesVideo(_id)
            video.title = unicode(title)
            video.rating = None
            video.rating_max = None

            thumb = self.parser.select(div, 'img', 1)
            video.thumbnail = Thumbnail(u'http://www.arretsurimages.net' +
                                        thumb.attrib['src'])

            yield video
Beispiel #19
0
    def iter_videos(self):
        for li in self.document.getroot().xpath('//ul/li[@class="videoBox"]'):
            a = li.find('div').find('a')
            if a is None or a.find('img') is None:
                continue

            thumbnail_url = a.find('img').attrib['src']

            a = self.parser.select(li, './/a[@class="videoTitle"]', 1, 'xpath')

            url = a.attrib['href']
            _id = url[len('/watch/'):]
            _id = _id[:_id.find('/')]

            video = YoupornVideo(int(_id))
            video.title = unicode(a.text.strip())
            video.thumbnail = Thumbnail(unicode(thumbnail_url))

            hours = minutes = seconds = 0
            div = li.cssselect('div.duration')
            if len(div) > 0:
                pack = [int(s) for s in div[0].text.strip().split(':')]
                if len(pack) == 3:
                    hours, minutes, seconds = pack
                elif len(pack) == 2:
                    minutes, seconds = pack

            video.duration = datetime.timedelta(hours=hours, minutes=minutes, seconds=seconds)

            div = li.cssselect('div.rating')
            if div:
                video.rating = int(div[0].text.strip('% '))
                video.rating_max = 100

            video.set_empty_fields(NotAvailable, ('url', 'author'))

            yield video
Beispiel #20
0
    def iter_videos(self):
        for div in self.parser.select(self.document.getroot(),
                                      'li#contentsearch'):
            title = self.parser.select(div, '#titlesearch span', 1)

            a = self.parser.select(div, 'a', 1)
            url = a.attrib['href']
            m = re.match('/video-(.*)', url)
            if not m:
                self.logger.debug('url %s does not match' % url)
                continue
            _id = m.group(1)
            video = TricTracTVVideo(_id)
            video.title = unicode(title.text)

            url = self.parser.select(div, 'img', 1).attrib['src']
            stars = self.parser.select(div, '.etoile_on')
            video.rating = len(stars)
            video.rating_max = 5

            video.thumbnail = Thumbnail(
                unicode('http://www.trictrac.tv/%s' % url))

            yield video
Beispiel #21
0
    def get_video(self, video=None):
        _id = to_unicode(self.group_dict['id'])
        if video is None:
            video = NolifeTVVideo(_id)

        # Check if video is external.
        try:
            div = self.parser.select(self.document.getroot(), 'div#message_lien_ext', 1)
        except BrokenPageError:
            pass
        else:
            link = div.find('a').attrib['href']
            raise ForbiddenVideo('Video is only available here: %s' % link)

        meta = self.parser.select(self.document.getroot(), 'meta[property="og:title"]', 1)
        try:
            video.title = unicode(meta.attrib['content'])
        except BrokenPageError:
            video.title = NotAvailable

        meta = self.parser.select(self.document.getroot(), 'meta[property="og:description"]', 1)
        try:
            video.description = unicode(meta.attrib['content'])
        except BrokenPageError:
            video.description = NotAvailable

        meta = self.parser.select(self.document.getroot(), 'meta[property="og:image"]', 1)
        try:
            video.thumbnail = Thumbnail(unicode(meta.attrib['content']))
        except BrokenPageError:
            video.thumbnail = NotAvailable

        try:
            video.date = parse_dt(self.parser.select(div, 'div#infos_complementaires', 1).find('p').text.strip())
        except Exception:
            video.date = NotAvailable
        video.author = NotAvailable
        video.duration = NotAvailable
        video.rating = NotAvailable
        video.rating_max = NotAvailable

        if not video.url:
            skey, timestamp = self.genkey()
            self.browser.readurl('http://online.nolife-tv.com/_nlfplayer/api/api_player.php',
                                 'skey=%s&a=MD5&timestamp=%s' % (skey, timestamp))

            skey, timestamp = self.genkey()
            self.browser.readurl('http://online.nolife-tv.com/_nlfplayer/api/api_player.php',
                                 'a=EML&skey=%s&id%%5Fnlshow=%s&timestamp=%s' % (skey, _id, timestamp))

            skey, timestamp = self.genkey()
            data = self.browser.readurl('http://online.nolife-tv.com/_nlfplayer/api/api_player.php',
                                         'quality=0&a=UEM%%7CSEM%%7CMEM%%7CCH%%7CSWQ&skey=%s&id%%5Fnlshow=%s&timestamp=%s' % (skey, _id, timestamp))

            values = dict([urllib.splitvalue(s) for s in data.split('&')])

            if not 'url' in values:
                raise ForbiddenVideo(values.get('message', 'Not available').decode('iso-8859-15'))
            video.url = unicode(values['url'])

        video.set_empty_fields(NotAvailable)

        return video
Beispiel #22
0
    def set_details(self, v):
        # try to get as much from the page itself
        obj = self.parser.select(self.document.getroot(), 'h1[itemprop=name]')
        if len(obj) > 0:
            v.title = unicode(obj[0].text)

        obj = self.parser.select(self.document.getroot(),
                                 'meta[itemprop=dateCreated]')
        if len(obj) > 0:
            v.date = parse_dt(obj[0].attrib['content'])

        #obj = self.parser.select(self.document.getroot(), 'meta[itemprop=duration]')

        obj = self.parser.select(self.document.getroot(),
                                 'meta[itemprop=thumbnailUrl]')
        if len(obj) > 0:
            v.thumbnail = Thumbnail(unicode(obj[0].attrib['content']))

        # for the rest, use the JSON config descriptor
        json_data = self.browser.openurl(
            'http://%s/config/%s?type=%s&referrer=%s' %
            ("player.vimeo.com", int(v.id), "html5_desktop_local", ""))
        data = json.load(json_data)
        if data is None:
            raise BrokenPageError('Unable to get JSON config for id: %r' %
                                  int(v.id))
        #print data

        if v.title is None:
            v.title = unicode(data['video']['title'])
        if v.thumbnail is None:
            v.thumbnail = Thumbnail(unicode(data['video']['thumbnail']))
        v.duration = datetime.timedelta(seconds=int(data['video']['duration']))

        # determine available codec and quality
        # use highest quality possible
        quality = 'sd'
        codec = None
        if 'vp6' in data['video']['files']:
            codec = 'vp6'
        if 'vp8' in data['video']['files']:
            codec = 'vp8'
        if 'h264' in data['video']['files']:
            codec = 'h264'
        if not codec:
            raise BrokenPageError(
                'Unable to detect available codec for id: %r' % int(v.id))

        if 'hd' in data['video']['files'][codec]:
            quality = 'hd'

        v.url = unicode(
            "http://player.vimeo.com/play_redirect?quality=%s&codecs=%s&clip_id=%d&time=%s&sig=%s&type=html5_desktop_local"
            % (quality, codec, int(v.id), data['request']['timestamp'],
               data['request']['signature']))

        # attempt to determine the redirected URL to pass it instead
        # since the target server doesn't check for User-Agent, unlike
        # for the source one.
        # HACK: we use mechanize directly here for now... FIXME
        #print "asking for redirect on '%s'" % (v.url)
        self.browser.set_handle_redirect(False)
        try:
            self.browser.open_novisit(v.url)
        except HTTPError, e:
            if e.getcode() == 302 and hasattr(e, 'hdrs'):
                #print e.hdrs['Location']
                v.url = unicode(e.hdrs['Location'])