Example #1
0
 def get_iterator(self):
     return vidscraper.auto_feed(
         self.original_url,
         max_results=None,
         api_keys=get_api_keys(),
         etag=self.external_etag or None,
         last_modified=self.external_last_modified,
     )
Example #2
0
def fetch_cmd(cfg, parser, parsed, args):
    if not parsed.quiet:
        parser.print_byline()

    projectpath = cfg.get('project', 'projectpath')
    jsonpath = os.path.join(projectpath, 'json')

    if not os.path.exists(jsonpath):
        os.makedirs(jsonpath)

    try:
        url = cfg.get('project', 'url')
    except ConfigParser.NoOptionError:
        url = ''

    if not url:
        err('url not specified in steve.ini project config file.')
        err('Add "url = ..." to [project] section of steve.ini file.')
        return 1

    if 'youtube' in url:
        try:
            youtube_embed = YOUTUBE_EMBED[cfg.get('project', 'youtube_embed')]
        except KeyError:
            err('youtube_embed must be either "iframe" or "object".')
            return 1
    else:
        youtube_embed = None

    out('Scraping {0}...'.format(url))
    video_feed = vidscraper.auto_feed(url)
    video_feed.load()

    print 'Found {0} videos...'.format(video_feed.video_count)
    for i, video in enumerate(video_feed):
        if video.title:
            filename = video.title.replace(' ', '_')
            filename = ''.join([c for c in filename if c in ALLOWED_LETTERS])
            filename = '_' + filename
        else:
            filename = ''

        filename = '{0:04d}{1}.json'.format(i, filename[:40])

        print 'Working on {0}... ({1})'.format(
            unicodedata.normalize('NFKD',
                                  video.title).encode('ascii', 'ignore'),
            filename)
        item = vidscraper_to_dict(video, youtube_embed=youtube_embed)

        f = open(os.path.join('json', filename), 'w')
        f.write(convert_to_json(item))
        f.close()

        # TODO: what if there's a file there already? on the first one,
        # prompt the user whether to stomp on existing files or skip.
    return 0
Example #3
0
def fetch_cmd(cfg, parser, parsed, args):
    if not parsed.quiet:
        parser.print_byline()

    projectpath = cfg.get('project', 'projectpath')
    jsonpath = os.path.join(projectpath, 'json')

    if not os.path.exists(jsonpath):
        os.makedirs(jsonpath)

    try:
        url = cfg.get('project', 'url')
    except ConfigParser.NoOptionError:
        url = ''

    if not url:
        err('url not specified in steve.ini project config file.')
        err('Add "url = ..." to [project] section of steve.ini file.')
        return 1

    if 'youtube' in url:
        try:
            youtube_embed = YOUTUBE_EMBED[cfg.get('project', 'youtube_embed')]
        except KeyError:
            err('youtube_embed must be either "iframe" or "object".')
            return 1
    else:
        youtube_embed = None

    out('Scraping {0}...'.format(url))
    video_feed = vidscraper.auto_feed(url)
    video_feed.load()

    print 'Found {0} videos...'.format(video_feed.video_count)
    for i, video in enumerate(video_feed):
        if video.title:
            filename = video.title.replace(' ', '_')
            filename = ''.join([c for c in filename if c in ALLOWED_LETTERS])
            filename = '_' + filename
        else:
            filename = ''

        filename = '{0:04d}{1}.json'.format(i, filename[:40])

        print 'Working on {0}... ({1})'.format(
            unicodedata.normalize('NFKD', video.title).encode(
                'ascii', 'ignore'),
            filename)
        item = vidscraper_to_dict(video, youtube_embed=youtube_embed)

        f = open(os.path.join('json', filename), 'w')
        f.write(convert_to_json(item))
        f.close()

        # TODO: what if there's a file there already? on the first one,
        # prompt the user whether to stomp on existing files or skip.
    return 0
 def clean_feed_url(self):
     url = self.cleaned_data['feed_url']
     # Get a canonical URL from vidscraper
     scraped_feed = auto_feed(url, api_keys=API_KEYS)
     url = scraped_feed.url
     try:
         models.Feed.objects.get(feed_url=url, site=settings.SITE_ID)
     except models.Feed.DoesNotExist:
         pass
     else:
         raise ValidationError("Feed with this URL already exists.")
     return url
Example #5
0
 def clean_feed_url(self):
     url = self.cleaned_data['feed_url']
     # Get a canonical URL from vidscraper
     scraped_feed = auto_feed(url, api_keys=API_KEYS)
     url = scraped_feed.url
     try:
         models.Feed.objects.get(feed_url=url, site=settings.SITE_ID)
     except models.Feed.DoesNotExist:
         pass
     else:
         raise ValidationError("Feed with this URL already exists.")
     return url
Example #6
0
 def test_auto_feed(self):
     feed = auto_feed("http://youtube.com/AssociatedPress")
     self.assertEqual(feed.url,
                      ('http://gdata.youtube.com/feeds/base/users/'
                       'AssociatedPress/uploads?alt=rss&v=2'))
     feed.load()
     self.assertEqual(feed.title, 'Uploads by AssociatedPress')
     self.assertEqual(
         feed.thumbnail_url,
         'http://www.youtube.com/img/pic_youtubelogo_123x63.gif')
     self.assertTrue('AssociatedPress' in feed.webpage)
     self.assertTrue(feed.entry_count > 50000)
Example #7
0
def fetch_videos_from_url(url, youtube_embed=None):
    """Fetches video data from given url and returns array of dicts

    :arg url: The url to fetch data from

    :returns: list of richard-ish dicts

    Example:

    >>> fetch_videos_from_url('http://www.youtube.com/user/PyConDE/videos')
    [...]

    """
    video_feed = vidscraper.auto_feed(url)
    video_feed.load()
    return [vidscraper_to_dict(vid, youtube_embed) for vid in video_feed]
Example #8
0
def fetch_videos_from_url(url, youtube_embed=None):
    """Fetches video data from given url and returns array of dicts

    :arg url: The url to fetch data from

    :returns: list of richard-ish dicts

    Example:

    >>> fetch_videos_from_url('http://www.youtube.com/user/PyConDE/videos')
    [...]

    """
    video_feed = vidscraper.auto_feed(url)
    video_feed.load()
    return [vidscraper_to_dict(vid, youtube_embed) for vid in video_feed]
Example #9
0
    def clean_feed_url(self):
        url = self.cleaned_data['feed_url']
        try:
            scraped_feed = auto_feed(url)
            url = scraped_feed.url
        except CantIdentifyUrl:
            raise forms.ValidationError('It does not appear that %s is an '
                                        'RSS/Atom feed URL.' % url)

        site = Site.objects.get_current()
        if models.Feed.objects.filter(feed_url=url, site=site):
            raise forms.ValidationError(
                'That feed already exists on this site.')

        self.cleaned_data['scraped_feed'] = scraped_feed

        return url
Example #10
0
    def clean_feed_url(self):
        url = self.cleaned_data['feed_url']
        try:
            scraped_feed = auto_feed(url)
            url = scraped_feed.url
        except CantIdentifyUrl:
            raise forms.ValidationError('It does not appear that %s is an '
                                        'RSS/Atom feed URL.' % url)

        site = Site.objects.get_current()
        if models.Feed.objects.filter(feed_url=url,
                                      site=site):
            raise forms.ValidationError(
                'That feed already exists on this site.')

        self.cleaned_data['scraped_feed'] = scraped_feed

        return url
Example #11
0
 def test_auto_feed(self):
     max_results = 20
     feed = auto_feed("http://youtube.com/AssociatedPress",
                      max_results=max_results)
     self.assertEqual(feed.url, "http://youtube.com/AssociatedPress")
     self.assertEqual(feed.url_data, {'username': '******'})
     feed.load()
     self.assertEqual(feed.title, 'Uploads by AssociatedPress')
     self.assertEqual(
         feed.thumbnail_url,
         'http://www.youtube.com/img/pic_youtubelogo_123x63.gif')
     # YouTube changes this sometimes, so just make sure it's there
     self.assertTrue(feed.webpage)
     self.assertTrue(feed.etag is not None)
     self.assertTrue(feed.video_count > 55000)
     self.assertEqual(feed.guid,
                      u'tag:youtube.com,2008:user:AssociatedPress:uploads')
     videos = list(feed)
     self.assertEqual(len(videos), max_results)
Example #12
0
 def test_auto_feed(self):
     max_results = 20
     feed = auto_feed("http://youtube.com/AssociatedPress",
                      max_results=max_results)
     self.assertEqual(feed.url,
                      "http://youtube.com/AssociatedPress")
     self.assertEqual(feed.url_data, {'username': '******'})
     feed.load()
     self.assertEqual(feed.title, 'Uploads by AssociatedPress')
     self.assertEqual(
         feed.thumbnail_url,
         'http://www.youtube.com/img/pic_youtubelogo_123x63.gif')
     # YouTube changes this sometimes, so just make sure it's there
     self.assertTrue(feed.webpage)
     self.assertTrue(feed.etag is not None)
     self.assertTrue(feed.video_count > 55000)
     self.assertEqual(feed.guid,
                      u'tag:youtube.com,2008:user:AssociatedPress:uploads')
     videos = list(feed)
     self.assertEqual(len(videos), max_results)