Beispiel #1
0
 def test_get_video_data(self):
     scrape_file = self.get_data_file('youtube/video_info.txt')
     response = self.get_response(scrape_file.read())
     data = self.loader.get_video_data(response)
     self.assertEqual(set(data), self.loader.fields)
     expected_data = {
         'title': u'CaramellDansen (Full Version + Lyrics)',
         'thumbnail_url': 'http://i3.ytimg.com/vi/J_DV9b0x7v4/hqdefault.jpg',
         'tags': [u'caramell', u'dance', u'dansen', u'hip', u'hop',
                  u's\xfcchtig', u'geil', u'cool', u'lustig', u'manga',
                  u'schweden', u'anime', u'musik', u'music', u'funny',
                  u'caramelldansen', u'U-U-U-Aua', u'Dance'],
         'files': [VideoFile(url='http://r10---sn-nx57yn7k.c.youtube.com/videoplayback?upn=p0pWpkfxwq4&sparams=cp%2Cgcr%2Cid%2Cip%2Cipbits%2Citag%2Cratebypass%2Csource%2Cupn%2Cexpire&fexp=919330%2C916611%2C920704%2C912806%2C928001%2C922403%2C922405%2C929901%2C913605%2C929104%2C913546%2C913556%2C908496%2C920201%2C913302%2C919009%2C911116%2C901451%2C902556&key=yt1&expire=1356417961&source=youtube&ipbits=8&itag=18&gcr=us&sver=3&signature=7D1D4A9CF0626C3B2A10F6567390165729FA00B8.89C64C08C8B1CA689E4CE21169531E34A56761C1&ratebypass=yes&mt=1356395169&mv=m&ms=au&ip=74.61.34.250&cp=U0hUS1RMVV9LS0NONF9MRllKOmZQN1JFU3lOX2Js&id=27f0d5f5bd31eefe',
                             width=640,
                             height=360,
                             expires=datetime.datetime(2012, 12, 25, 6, 46, 1),
                             mime_type=u'video/mp4'),
                   VideoFile(url='http://r15---sn-nx57yn7r.c.youtube.com/videoplayback?upn=p0pWpkfxwq4&sparams=algorithm%2Cburst%2Ccp%2Cfactor%2Cgcr%2Cid%2Cip%2Cipbits%2Citag%2Csource%2Cupn%2Cexpire&fexp=919330%2C916611%2C920704%2C912806%2C928001%2C922403%2C922405%2C929901%2C913605%2C929104%2C913546%2C913556%2C908496%2C920201%2C913302%2C919009%2C911116%2C901451%2C902556&key=yt1&algorithm=throttle-factor&itag=34&ipbits=8&burst=40&gcr=us&sver=3&signature=9DAA96CCF4CA172A92C78907B0ECE241BAFB07D6.09AF4B0F7C6558D1B9B184A5F60232EDC296FF95&mv=m&mt=1356395169&ip=74.61.34.250&expire=1356417961&source=youtube&ms=au&factor=1.25&cp=U0hUS1RMVV9LS0NONF9MRllKOmZQN1JFU3lOX2Js&id=27f0d5f5bd31eefe',
                             width=640,
                             height=360,
                             expires=datetime.datetime(2012, 12, 25, 6, 46, 1),
                             mime_type=u'video/x-flv'),
                   VideoFile(url='http://r2---sn-nx57yn7d.c.youtube.com/videoplayback?upn=p0pWpkfxwq4&sparams=algorithm%2Cburst%2Ccp%2Cfactor%2Cgcr%2Cid%2Cip%2Cipbits%2Citag%2Csource%2Cupn%2Cexpire&fexp=919330%2C916611%2C920704%2C912806%2C928001%2C922403%2C922405%2C929901%2C913605%2C929104%2C913546%2C913556%2C908496%2C920201%2C913302%2C919009%2C911116%2C901451%2C902556&key=yt1&algorithm=throttle-factor&itag=5&ipbits=8&burst=40&gcr=us&sver=3&signature=A909DD7537DD821A0F5AE14FF1E83C87DD7C4EDF.4FCE8516F456B96DD45DB35A7AB8624A1A45498F&mv=m&mt=1356395169&ip=74.61.34.250&expire=1356417961&source=youtube&ms=au&factor=1.25&cp=U0hUS1RMVV9LS0NONF9MRllKOmZQN1JFU3lOX2Js&id=27f0d5f5bd31eefe',
                             width=400,
                             height=240,
                             expires=datetime.datetime(2012, 12, 25, 6, 46, 1),
                             mime_type=u'video/x-flv'),
                   VideoFile(url='http://r11---sn-nx57yn76.c.youtube.com/videoplayback?upn=p0pWpkfxwq4&sparams=cp%2Cgcr%2Cid%2Cip%2Cipbits%2Citag%2Cratebypass%2Csource%2Cupn%2Cexpire&fexp=919330%2C916611%2C920704%2C912806%2C928001%2C922403%2C922405%2C929901%2C913605%2C929104%2C913546%2C913556%2C908496%2C920201%2C913302%2C919009%2C911116%2C901451%2C902556&key=yt1&expire=1356417961&source=youtube&ipbits=8&itag=43&gcr=us&sver=3&signature=7D0403EBD72E0A28695B35F7BD542A88BC9FE4FA.C4D2BFDD22BBFDC002F8C67CF66363BCC93501EC&ratebypass=yes&mt=1356395169&mv=m&ms=au&ip=74.61.34.250&cp=U0hUS1RMVV9LS0NONF9MRllKOmZQN1JFU3lOX2Js&id=27f0d5f5bd31eefe',
                             width=640,
                             height=360,
                             expires=datetime.datetime(2012, 12, 25, 6, 46, 1),
                             mime_type=u'video/webm')]
     }
     self.assertDictEqual(data, expected_data)
    def test_serialize__files(self):
        """
        Tests that a video with associated files can still be serialized and
        deserialized.

        """
        video = Video("http://www.youtube.com/watch?v=J_DV9b0x7v4")
        now = datetime.datetime.now()
        video.files = [VideoFile(url='http://google.com',
                                 expires=now,
                                 length=100,
                                 width=50,
                                 height=50,
                                 mime_type="video/x-flv"),
                       VideoFile(url='http://xkcd.com',
                                 expires=now,
                                 length=75,
                                 width=80,
                                 height=80,
                                 mime_type="application/x-shockwave-flash"),]

        data = video.serialize()
        # verify that the data we expect is in the serialized version.
        self.assertEqual(data['files'][0]['url'], "http://google.com")
        self.assertEqual(data['files'][1]['mime_type'],
                         "application/x-shockwave-flash")
        self.assertEqual(data['files'][0]['expires'], now.isoformat())

        # Verify that the data can be deserialized as a video.
        new_video = Video.deserialize(data)
        self.assertEqual(dict(video.items()), dict(new_video.items()))
    def test_get_file__no_mimetypes(self):
        """
        If none of the videos have mime types, the first file should be
        returned.

        """
        video = Video("http://www.youtube.com/watch?v=J_DV9b0x7v4")
        file1 = VideoFile(url='http://google.com')
        file2 = VideoFile(url='http://xkcd.com')
        file3 = VideoFile(url='http://example.com')
        video.files = [file1, file2, file3]
        self.assertEqual(video.get_file(), file1)
        video.files = [file3, file2, file1]
        self.assertEqual(video.get_file(), file3)
Beispiel #4
0
    def test_get_file__open(self):
        """
        Tests that open video formats are preferred over proprietary.

        """
        video = Video("http://www.youtube.com/watch?v=J_DV9b0x7v4")
        file1 = VideoFile(url='http://google.com', mime_type="video/ogg")
        file2 = VideoFile(url='http://xkcd.com',
                          mime_type="application/x-shockwave-flash")
        file3 = VideoFile(url='http://example.com', mime_type="video/mp4")
        video.files = [file1, file2, file3]
        self.assertEqual(video.get_file(), file1)
        video.files = [file3, file2, file1]
        self.assertEqual(video.get_file(), file1)
Beispiel #5
0
 def test_parse_feed_entry_atom(self):
     fp = feedparser.parse(self._data_file_path('generic/feed.atom'))
     data = self.feed.get_video_data(fp.entries[0])
     self.assertEqual(
         data, {
             'title':
             u'Atom 1.0',
             'description':
             u"""<h1>Show Notes</h1>
     <ul>
       <li>00:01:00 -- Introduction</li>
       <li>00:15:00 -- Talking about Atom 1.0</li>
       <li>00:30:00 -- Wrapping up</li>
     </ul>""",
             'tags':
             None,
             'link':
             u'http://www.example.org/entries/1',
             'guid':
             u'http://www.example.org/entries/1',
             'embed_code':
             None,
             'files': [
                 VideoFile(url=u'http://www.example.org/myvideo.ogg',
                           length=u'1234',
                           mime_type=u'application/ogg')
             ],
             'thumbnail_url':
             None,
             'publish_datetime':
             datetime.datetime(2005, 7, 15, 12, 0),
             'license':
             'http://creativecommons.org/licenses/by/2.5/'
         })
Beispiel #6
0
    def parse_feed_entry(entry):
        """
        Parses a feedparser entry from a blip rss feed into a dictionary
        mapping :class:`.Video` fields to values. This is used for blip feeds
        and blip API requests (since those can also be done with feeds.)

        """
        files = [VideoFile(url=enclosure.get('url'),
                           mime_type=enclosure.get('type'),
                           length=(enclosure.get('filesize') or
                                   enclosure.get('length')))
                 for enclosure in get_accepted_enclosures(entry)]

        data = {
            'guid': entry['id'],
            'link': entry['link'],
            'title': entry['title'],
            'description': entry['blip_puredescription'],
            'files': files,
            'embed_code': entry['media_player']['content'],
            'publish_datetime': datetime.strptime(entry['blip_datestamp'],
                                                  "%Y-%m-%dT%H:%M:%SZ"),
            'thumbnail_url': get_entry_thumbnail_url(entry),
            'tags': [tag['term'] for tag in entry['tags']
                     if tag['scheme'] is None][1:],
            'user': entry['blip_safeusername'],
            'user_url': entry['blip_showpage']
        }
        if 'license' in entry:
            data['license'] = entry['license']
        return data
Beispiel #7
0
    def get_video_data(self, response):
        if response.status_code == 402:
            # 402: Payment required.
            # A note in the previous code said this could happen when too many
            # requests were made (per second?) Unclear why, though, or why
            # this is only caught here.
            return {}
        params = urlparse.parse_qs(response.text.encode('utf-8'))
        if params['status'][0] == 'fail':
            if params['errorcode'][0] == '150':  # unembedable
                return {'is_embeddable': False}
            return {}
        data = {
            'title': params['title'][0].decode('utf8'),
            'thumbnail_url': params['thumbnail_url'][0],
        }
        if 'keywords' in params:
            data['tags'] = params['keywords'][0].decode('utf8').split(',')
        if data['thumbnail_url'].endswith('/default.jpg'):
            # got a crummy version; increase the resolution
            data['thumbnail_url'] = data['thumbnail_url'].replace(
                '/default.jpg', '/hqdefault.jpg')

        url_querystrings = params["url_encoded_fmt_stream_map"][0].split(",")
        url_data = [urlparse.parse_qs(qs) for qs in url_querystrings]
        url_data_map = dict(
            (ud['itag'][0], ud) for ud in url_data if 'itag' in ud)

        data['files'] = []
        for code, mime_type, width, height in self.formats:
            if code in url_data_map:
                file_data = url_data_map[code]
                parsed_file_url = urlparse.urlsplit(file_data['url'][0])
                parsed_file_url_qs = dict(
                    urlparse.parse_qsl(parsed_file_url.query))
                expires = struct_time_to_datetime(
                    time.gmtime(int(parsed_file_url_qs['expire'])))
                parsed_file_url_qs['signature'] = file_data['sig'][0]
                url = urlparse.urlunsplit(parsed_file_url[:3] + (
                    urllib.urlencode(parsed_file_url_qs), ) +
                                          parsed_file_url[4:])
                data['files'].append(
                    VideoFile(url=url,
                              expires=expires,
                              mime_type=mime_type,
                              width=width,
                              height=height))
        return data
Beispiel #8
0
    def get_video_data(self, item):
        files = [VideoFile(url=enclosure.get('url'),
                           mime_type=enclosure.get('type'),
                           length=(enclosure.get('filesize') or
                                   enclosure.get('length')))
                 for enclosure in get_accepted_enclosures(item)]

        data = {
            'title': item.title,
            'description': item.description,
            'thumbnail_url': item.media_thumbnail[0]['url'],
            'publish_datetime': struct_time_to_datetime(item.published_parsed),
            'user': item['kaltura_userscreenname'],
            'files': files or None,
        }
        return data
from vidscraper.videos import VideoFile

DISQUS_DATA = {
    'guid':
    u'4809E60A-C2AB-11DF-BBAC-A6337D0214E0',
    'link':
    "http://blip.tv/file/4135225",
    'title':
    "Scaling the World's Largest Django Application",
    'description':
    "Disqus, one of the largest Django applications in "
    "the world, will explain how they deal with scaling "
    "complexities in a small startup.",
    'files': [
        VideoFile(url=u'http://blip.tv/file/get/Robertlofthouse-'
                  u'ScalingTheWorldsLargestDjangoApplication558.ogv',
                  length=u'73533796',
                  mime_type=u'video/ogg'),
        VideoFile(url=u'http://blip.tv/file/get/Robertlofthouse-'
                  u'ScalingTheWorldsLargestDjangoApplication883.flv',
                  length=u'418241604',
                  mime_type=u'video/x-flv')
    ],
    'embed_code':
    '<embed src="http://blip.tv/play/AYH9xikC" '
    'type="application/x-shockwave-flash" width="480" '
    'height="390" wmode="transparent" '
    'allowscriptaccess="always" allowfullscreen="true" >'
    '</embed>',
    'publish_datetime':
    datetime.datetime(2010, 9, 17, 22, 31, 14),
    'thumbnail_url':
Beispiel #10
0
    def get_video_data(self, item):
        if item.get('published_parsed'):
            best_date = struct_time_to_datetime(item['published_parsed'])
        elif item.get('updated_parsed'):
            best_date = struct_time_to_datetime(item['updated_parsed'])
        else:
            best_date = None

        link = item.get('link')
        if 'links' in item:
            for possible_link in item.links:
                if possible_link.get('rel') == 'via':
                    # original URL
                    link = possible_link['href']
                    break
        if ('content' in item and item['content']
                and item['content'][0]['value']):  # Atom
            description = item['content'][0]['value']
        else:
            description = item.get('summary', '')

        files = [
            VideoFile(url=enclosure.get('url'),
                      mime_type=enclosure.get('type'),
                      length=(enclosure.get('filesize')
                              or enclosure.get('length')))
            for enclosure in get_accepted_enclosures(item)
        ]

        embed_code = None
        if 'media_player' in item:
            player = item['media_player']
            if player.get('content'):
                embed_code = convert_entities(player['content'])
            elif 'url' in player:
                files.append(
                    VideoFile(url=player['url'], mime_type=player.get('type')))
        if not files:
            files = None
        if 'media_license' in item:
            license = item['media_license']['href']
        else:
            license = item.get('license')
        return {
            'link':
            link,
            'title':
            convert_entities(item.get('title', '')),
            'description':
            description,
            'thumbnail_url':
            get_entry_thumbnail_url(item),
            'files':
            files,
            'publish_datetime':
            best_date,
            'guid':
            item.get('id'),
            'embed_code':
            embed_code,
            'tags':
            [tag['term'] for tag in item['tags']
             if tag['scheme'] is None] if 'tags' in item else None,
            'license':
            license
        }