Beispiel #1
0
def scrapevideo(video_url):
    """Scrapes the url and fixes the data

    This is sort of a wrapper around `vidscraper.auto_scrape`. It
    calls that, but then transforms the results into a Python dict and
    adds some additional computed metadata.

    :arg video_url: Url of video to scrape.

    :returns: Python dict of metadata

    Example:

    >>> scrapevideo('http://www.youtube.com/watch?v=ywToByBkOTc')
    {'url': 'http://www.youtube.com/watch?v=ywToByBkOTc', ...}

    """
    video_data = vidscraper.auto_scrape(video_url)

    data = dict([(field, getattr(video_data, field))
                 for field in video_data.fields])

    for field in ('publish_datetime', 'file_url_expires'):
        dt = data.get(field, None)
        if isinstance(dt, datetime.datetime):
            data[field] = dt.isoformat()

    data['url'] = video_url
    if 'youtube.com' in video_url and 'guid' in data and data['guid']:
        guid = data['guid'].split('/')[-1]
        data['object_embed_code'] = YOUTUBE_EMBED['object'].format(guid=guid)
        data['iframe_embed_code'] = YOUTUBE_EMBED['iframe'].format(guid=guid)

    return data
Beispiel #2
0
def scrapevideo(video_url):
    """Scrapes the url and fixes the data

    This is sort of a wrapper around `vidscraper.auto_scrape`. It
    calls that, but then transforms the results into a Python dict and
    adds some additional computed metadata.

    :arg video_url: Url of video to scrape.

    :returns: Python dict of metadata

    Example:

    >>> scrapevideo('http://www.youtube.com/watch?v=ywToByBkOTc')
    {'url': 'http://www.youtube.com/watch?v=ywToByBkOTc', ...}

    """
    video_data = vidscraper.auto_scrape(video_url)

    data = dict([(field, getattr(video_data, field))
                 for field in video_data.fields])

    for field in ('publish_datetime', 'file_url_expires'):
        dt = data.get(field, None)
        if isinstance(dt, datetime.datetime):
            data[field] = dt.isoformat()

    data['url'] = video_url
    if 'youtube.com' in video_url and 'guid' in data and data['guid']:
        guid = data['guid'].split('/')[-1]
        data['object_embed_code'] = YOUTUBE_EMBED['object'].format(guid=guid)
        data['iframe_embed_code'] = YOUTUBE_EMBED['iframe'].format(guid=guid)

    return data
Beispiel #3
0
 def test_auto_scrape(self):
     video = auto_scrape("http://www.youtube.com/watch?v=J_DV9b0x7v4")
     self.assertEqual(video.title,
                      u'CaramellDansen (Full Version + Lyrics)')
     self.assertNotEqual(video.file_url, None)
     self.assertEqual(video.file_url_mimetype, u'video/x-flv')
     self.assertTrue(
         video.file_url_expires - datetime.datetime.now() >
         datetime.timedelta(hours=1))
Beispiel #4
0
 def test_auto_scrape(self):
     video = auto_scrape("http://www.youtube.com/watch?v=J_DV9b0x7v4")
     self.assertEqual(video.title,
                      u'CaramellDansen (Full Version + Lyrics)')
     self.assertGreater(len(video.files), 0)
     self.assertTrue(video.files[0].url)
     self.assertEqual(video.files[0].mime_type, u'video/mp4')
     self.assertTrue(video.files[0].expires -
                     datetime.datetime.now() > datetime.timedelta(hours=1))
Beispiel #5
0
 def test_auto_scrape(self):
     video = auto_scrape("http://www.youtube.com/watch?v=J_DV9b0x7v4")
     self.assertEqual(video.title,
                      u'CaramellDansen (Full Version + Lyrics)')
     self.assertGreater(len(video.files), 0)
     self.assertTrue(video.files[0].url)
     self.assertEqual(video.files[0].mime_type, u'video/mp4')
     self.assertTrue(
         video.files[0].expires - datetime.datetime.now() >
         datetime.timedelta(hours=1))
 def handle_noargs(self, **options):
     if site_too_old():
         return
     for v in models.Video.objects.filter(when_published__isnull=True):
         try:
             d = vidscraper.auto_scrape(v.website_url, fields=["publish_date"])
         except:
             pass
         else:
             if d:
                 v.when_published = d["publish_date"]
                 v.save()
 def handle_noargs(self, **options):
     if site_too_old():
         return
     for v in models.Video.objects.filter(when_published__isnull=True):
         try:
             d = vidscraper.auto_scrape(v.website_url,
                                        fields=['publish_date'])
         except:
             pass
         else:
             if d:
                 v.when_published = d['publish_date']
                 v.save()
 def handle_noargs(self, **options):
     if site_too_old():
         return
     for v in models.Video.objects.filter(when_published__isnull=True):
         try:
             video = vidscraper.auto_scrape(v.website_url, fields=[
                     'publish_datetime'], api_keys=API_KEYS)
         except:
             pass
         else:
             if video:
                 v.when_published = video.publish_datetime
                 v.save()
 def handle_noargs(self, **options):
     if site_too_old():
         return
     for v in models.Video.objects.filter(when_published__isnull=True):
         try:
             video = vidscraper.auto_scrape(v.website_url,
                                            fields=['publish_datetime'],
                                            api_keys=API_KEYS)
         except:
             pass
         else:
             if video:
                 v.when_published = video.publish_datetime
                 v.save()
Beispiel #10
0
 def clean_url(self):
     url = urlparse.urldefrag(self.cleaned_data['url'])[0]
     self._validate_unique(url=url)
     self.video_cache = None
     try:
         self.video_cache = vidscraper.auto_scrape(url, api_keys=API_KEYS)
     except (UnhandledVideo, urllib2.URLError):
         pass
     else:
         if self.video_cache.link is not None and url != self.video_cache.link:
             url = self.video_cache.link
             self._validate_unique(url=url, guid=self.video_cache.guid)
         elif self.video_cache.guid is not None:
             self._validate_unique(guid=self.video_cache.guid)
     return url
Beispiel #11
0
 def clean_url(self):
     url = urlparse.urldefrag(self.cleaned_data['url'])[0]
     self._validate_unique(url=url)
     self.video_cache = None
     try:
         self.video_cache = vidscraper.auto_scrape(url, api_keys=API_KEYS)
     except (CantIdentifyUrl, urllib2.URLError):
         pass
     else:
         if self.video_cache.link is not None and url != self.video_cache.link:
             url = self.video_cache.link
             self._validate_unique(url=url, guid=self.video_cache.guid)
         elif self.video_cache.guid is not None:
             self._validate_unique(guid=self.video_cache.guid)
     return url
    def handle_noargs(self, **options):
        if site_too_old():
            return
        for v in models.Video.objects.filter(when_published__isnull=True):
            try:
                video = vidscraper.auto_scrape(v.website_url, fields=[
                        'publish_datetime'], api_keys=API_KEYS)
            except:
                pass
            else:
                if video:
                    v.when_published = video.publish_datetime
                    v.save()

        # Finally, at the end, if stamps are enabled, update them.
        if ENABLE_CHANGE_STAMPS:
            models.create_or_delete_video_needs_published_date_stamp()
Beispiel #13
0
def get_scraped_data(url):
    cache_key = 'vidscraper_data-' + url
    if len(cache_key) >= 250:
        # too long, use the hash
        cache_key = 'vidscraper_data-hash-' + hashlib.sha1(url).hexdigest()
    scraped_data = cache.get(cache_key)

    if not scraped_data:
        # try and scrape the url
        try:
            scraped_data = vidscraper.auto_scrape(url)
        except vidscraper.errors.Error:
            scraped_data = None

        cache.add(cache_key, scraped_data)

    return scraped_data
Beispiel #14
0
def get_scraped_data(url):
    cache_key = 'vidscraper_data-' + url
    if len(cache_key) >= 250:
        # too long, use the hash
        cache_key = 'vidscraper_data-hash-' + hashlib.sha1(url).hexdigest()
    scraped_data = cache.get(cache_key)

    if not scraped_data:
        # try and scrape the url
        try:
            scraped_data = vidscraper.auto_scrape(url)
        except vidscraper.errors.Error:
            scraped_data = None

        cache.add(cache_key, scraped_data)

    return scraped_data
Beispiel #15
0
    def handle_noargs(self, **options):
        if site_too_old():
            return
        for v in models.Video.objects.filter(when_published__isnull=True):
            try:
                d = vidscraper.auto_scrape(v.website_url,
                                           fields=['publish_date'])
            except:
                pass
            else:
                if d:
                    v.when_published = d['publish_date']
                    v.save()

        # Finally, at the end, if stamps are enabled, update them.
        if models.ENABLE_CHANGE_STAMPS:
            models.create_or_delete_video_needs_published_date_stamp()
Beispiel #16
0
def get_vidscraper_video(url):
    cache_key = 'vidscraper_data-' + url
    if len(cache_key) >= 250:
        # too long, use the hash
        cache_key = 'vidscraper_data-hash-' + hashlib.sha1(url).hexdigest()
    vidscraper_video = cache.get(cache_key)

    if not vidscraper_video:
        # try and scrape the url
        try:
            vidscraper_video = vidscraper.auto_scrape(url)
        except vidscraper.errors.Error:
            vidscraper_video = None

        cache.add(cache_key, vidscraper_video)

    return vidscraper_video
Beispiel #17
0
def get_vidscraper_video(url):
    cache_key = 'vidscraper_data-' + url
    if len(cache_key) >= 250:
        # too long, use the hash
        cache_key = 'vidscraper_data-hash-' + hashlib.sha1(url).hexdigest()
    vidscraper_video = cache.get(cache_key)

    if not vidscraper_video:
        # try and scrape the url
        try:
            vidscraper_video = vidscraper.auto_scrape(url, api_keys=API_KEYS)
        except (vidscraper.errors.Error, urllib2.URLError):
            vidscraper_video = None

        cache.add(cache_key, vidscraper_video)

    return vidscraper_video
Beispiel #18
0
    def save(self, commit=True, request=None):
        kwargs = {
            'video': vidscraper.auto_scrape(self.cleaned_data['original_url']),
            'commit': False,
        }

        if request and request.user.is_authenticated():
            kwargs['owner'] = request.user

        instance = Video.from_vidscraper_video(**kwargs)

        def save_m2m():
            instance.save_m2m()

        if commit:
            instance.save()
            save_m2m()
        else:
            self.save_m2m = save_m2m
        return instance
Beispiel #19
0
    def save(self, commit=True, request=None):
        kwargs = {
            'video': vidscraper.auto_scrape(self.cleaned_data['original_url']),
            'commit': False,
        }

        if request and request.user.is_authenticated():
            kwargs['owner'] = request.user

        instance = Video.from_vidscraper_video(**kwargs)

        def save_m2m():
            instance.save_m2m()

        if commit:
            instance.save()
            save_m2m()
        else:
            self.save_m2m = save_m2m
        return instance
def get_data_from_youtube(url):
    video = vidscraper.auto_scrape(url)
    return {'thumbnail_url': video['thumbnail_url'], 'embed': video['embed']}
Beispiel #21
0
def get_data_from_youtube(url):
    video = vidscraper.auto_scrape(url)
    return {
        'thumbnail_url': video['thumbnail_url'],
        'embed': video['embed']
        }