コード例 #1
0
    def parse_feed_entry(entry):
        """
        Parses a feedparser entry from a blip rss feed into a dictionary
        mapping :class:`.Video` fields to values. This is used for blip feeds
        and blip API requests (since those can also be done with feeds.)

        """
        files = [VideoFile(url=enclosure.get('url'),
                           mime_type=enclosure.get('type'),
                           length=(enclosure.get('filesize') or
                                   enclosure.get('length')))
                 for enclosure in get_accepted_enclosures(entry)]

        data = {
            'guid': entry['id'],
            'link': entry['link'],
            'title': entry['title'],
            'description': entry['blip_puredescription'],
            'files': files,
            'embed_code': entry['media_player']['content'],
            'publish_datetime': datetime.strptime(entry['blip_datestamp'],
                                                  "%Y-%m-%dT%H:%M:%SZ"),
            'thumbnail_url': get_entry_thumbnail_url(entry),
            'tags': [tag['term'] for tag in entry['tags']
                     if tag['scheme'] is None][1:],
            'user': entry['blip_safeusername'],
            'user_url': entry['blip_showpage']
        }
        if 'license' in entry:
            data['license'] = entry['license']
        return data
コード例 #2
0
ファイル: blip.py プロジェクト: msabramo/vidscraper
    def parse_feed_entry(self, entry):
        """
        Reusable method to parse a feedparser entry from a blip rss feed into
        a dictionary mapping :class:`.Video` fields to values.

        """
        enclosure = get_first_accepted_enclosure(entry)

        data = {
            'guid': entry['id'],
            'link': entry['link'],
            'title': entry['title'],
            'description': clean_description_html(
                entry['blip_puredescription']),
            'file_url': enclosure['url'],
            'embed_code': entry['media_player']['content'],
            'publish_datetime': datetime.strptime(entry['blip_datestamp'],
                                                  "%Y-%m-%dT%H:%M:%SZ"),
            'thumbnail_url': get_entry_thumbnail_url(entry),
            'tags': [tag['term'] for tag in entry['tags']
                     if tag['scheme'] is None][1:],
            'user': entry['blip_safeusername'],
            'user_url': entry['blip_showpage']
        }
        if 'license' in entry:
            data['license'] = entry['license']
        return data
コード例 #3
0
ファイル: generic.py プロジェクト: afrigeo/vidscraper
    def get_video_data(self, item):
        if item.get('published_parsed'):
            best_date = struct_time_to_datetime(item['published_parsed'])
        elif item.get('updated_parsed'):
            best_date = struct_time_to_datetime(item['updated_parsed'])
        else:
            best_date = None

        link = item.get('link')
        if 'links' in item:
            for possible_link in item.links:
                if possible_link.get('rel') == 'via':
                    # original URL
                    link = possible_link['href']
                    break
        if ('content' in item and item['content'] and
            item['content'][0]['value']): # Atom
            description = item['content'][0]['value']
        else:
            description = item.get('summary', '')

        files = [VideoFile(url=enclosure.get('url'),
                           mime_type=enclosure.get('type'),
                           length=(enclosure.get('filesize') or
                                   enclosure.get('length')))
                 for enclosure in get_accepted_enclosures(item)]

        embed_code = None
        if 'media_player' in item:
            player = item['media_player']
            if player.get('content'):
                embed_code = convert_entities(player['content'])
            elif 'url' in player:
                files.append(VideoFile(
                                     url=player['url'],
                                     mime_type=player.get('type')))
        if not files:
            files = None
        if 'media_license' in item:
            license = item['media_license']['href']
        else:
            license = item.get('license')
        return {
            'link': link,
            'title': convert_entities(item.get('title', '')),
            'description': description,
            'thumbnail_url': get_entry_thumbnail_url(item),
            'files': files,
            'publish_datetime': best_date,
            'guid': item.get('id'),
            'embed_code': embed_code,
            'tags': [tag['term'] for tag in item['tags']
                     if tag['scheme'] is None] if 'tags' in item else None,
            'license': license
        }
コード例 #4
0
ファイル: feed.py プロジェクト: stephenjudge/vidscraper
    def parse_feed_entry(self, entry):
        enclosure = get_first_accepted_enclosure(entry)
        if "published_parsed" in entry:
            best_date = struct_time_to_datetime(entry["published_parsed"])
        elif "updated_parsed" in entry:
            best_date = struct_time_to_datetime(entry["updated_parsed"])
        else:
            best_date = None

        link = entry.get("link")
        if "links" in entry:
            for possible_link in entry.links:
                if possible_link.get("rel") == "via":
                    # original URL
                    link = possible_link["href"]
                    break
        if "content" in entry and entry["content"] and entry["content"][0]["value"]:  # Atom
            description = entry["content"][0]["value"]
        else:
            description = entry.get("summary", "")

        embed_code = None
        if "media_player" in entry:
            player = entry["media_player"]
            if player.get("content"):
                embed_code = convert_entities(player["content"])
            elif "url" in player:
                embed_code = make_embed_code(player["url"], "")
        if "media_license" in entry:
            license = entry["media_license"]["href"]
        else:
            license = entry.get("license")
        return {
            "link": link,
            "title": convert_entities(entry["title"]),
            "description": description,
            "thumbnail_url": get_entry_thumbnail_url(entry),
            "file_url": enclosure.get("url") if enclosure else None,
            "file_url_mimetype": enclosure.get("type") if enclosure else None,
            "file_url_length": ((enclosure.get("filesize") or enclosure.get("length")) if enclosure else None),
            "publish_datetime": best_date,
            "guid": entry.get("id"),
            "embed_code": embed_code,
            "tags": [tag["term"] for tag in entry["tags"] if tag["scheme"] is None] if "tags" in entry else None,
            "license": license,
        }
コード例 #5
0
ファイル: feed.py プロジェクト: paulswartz/vidscraper
    def parse_feed_entry(self, entry):
        enclosure = get_first_accepted_enclosure(entry)
        if 'published_parsed' in entry:
            best_date = struct_time_to_datetime(entry['published_parsed'])
        elif 'updated_parsed' in entry:
            best_date = struct_time_to_datetime(entry['updated_parsed'])
        else:
            best_date = None

        link = entry.get('link')
        if 'links' in entry:
            for possible_link in entry.links:
                if possible_link.get('rel') == 'via':
                    # original URL
                    link = possible_link['href']
                    break
        if ('content' in entry and entry['content'] and
            entry['content'][0]['value']): # Atom
            description = entry['content'][0]['value']
        else:
            description = entry['summary'] or ''

        embed_code = None
        if 'media_player' in entry:
            player = entry['media_player']
            if player.get('content'):
                embed_code = convert_entities(player['content'])
            elif 'url' in player:
                embed_code = make_embed_code(player['url'], '')

        return {
            'link': link,
            'title': convert_entities(entry['title']),
            'description': description,
            'thumbnail_url': get_entry_thumbnail_url(entry),
            'file_url': enclosure.get('url') if enclosure else None,
            'file_url_mimetype': enclosure.get('type') if enclosure else None,
            'file_url_length': ((enclosure.get('filesize') or
                                enclosure.get('length'))
                                if enclosure else None),
            'publish_datetime': best_date,
            'guid': entry.get('id'),
            'embed_code': embed_code,
            'tags': [tag['term'] for tag in entry['tags']
                     if tag['scheme'] is None] if 'tags' in entry else None
            }
コード例 #6
0
ファイル: youtube.py プロジェクト: msabramo/vidscraper
    def parse_feed_entry(self, entry):
        """
        Reusable method to parse a feedparser entry from a youtube rss feed.
        Returns a dictionary mapping :class:`.Video` fields to values.

        """
        user = entry['author']
        if 'published_parsed' in entry:
            best_date = struct_time_to_datetime(entry['published_parsed'])
        else:
            best_date = struct_time_to_datetime(entry['updated_parsed'])
        if ('summary_detail' in entry and
            entry['summary_detail']['type'] == 'text/html'):
            # HTML-ified description in RSS feeds
            soup = BeautifulSoup(entry['summary']).findAll('span')[0]
            description = unicode(soup.string)
        else:
            description = entry['summary']
        data = {
            'link': entry['links'][0]['href'].split('&', 1)[0],
            'title': entry['title'],
            'description': description,
            'thumbnail_url': get_entry_thumbnail_url(entry),
            'publish_datetime': best_date,
            'tags': [t['term'] for t in entry['tags']
                    if not t['term'].startswith('http')],
            'user': user,
            'user_url': u'http://www.youtube.com/user/%s' % user,
            'guid' : entry['id'],
        }
        if entry.id.startswith('tag:youtube.com'):
            data['guid'] = 'http://gdata.youtube.com/feeds/api/videos/%s' % (
                entry.id.split(':')[-1],)
        if 'media_player' in entry: # only in search feeds/API?
            data['flash_enclosure_url'] = entry['media_player']['url']
        if data['thumbnail_url'].endswith('/default.jpg'):
            # got a crummy version; increase the resolution
            data['thumbnail_url'] = data['thumbnail_url'].replace(
                '/default.jpg', '/hqdefault.jpg')
        return data
コード例 #7
0
    def get_video_data(self, item):
        if item.get('published_parsed'):
            best_date = struct_time_to_datetime(item['published_parsed'])
        elif item.get('updated_parsed'):
            best_date = struct_time_to_datetime(item['updated_parsed'])
        else:
            best_date = None

        link = item.get('link')
        if 'links' in item:
            for possible_link in item.links:
                if possible_link.get('rel') == 'via':
                    # original URL
                    link = possible_link['href']
                    break
        if ('content' in item and item['content']
                and item['content'][0]['value']):  # Atom
            description = item['content'][0]['value']
        else:
            description = item.get('summary', '')

        files = [
            VideoFile(url=enclosure.get('url'),
                      mime_type=enclosure.get('type'),
                      length=(enclosure.get('filesize')
                              or enclosure.get('length')))
            for enclosure in get_accepted_enclosures(item)
        ]

        embed_code = None
        if 'media_player' in item:
            player = item['media_player']
            if player.get('content'):
                embed_code = convert_entities(player['content'])
            elif 'url' in player:
                files.append(
                    VideoFile(url=player['url'], mime_type=player.get('type')))
        if not files:
            files = None
        if 'media_license' in item:
            license = item['media_license']['href']
        else:
            license = item.get('license')
        return {
            'link':
            link,
            'title':
            convert_entities(item.get('title', '')),
            'description':
            description,
            'thumbnail_url':
            get_entry_thumbnail_url(item),
            'files':
            files,
            'publish_datetime':
            best_date,
            'guid':
            item.get('id'),
            'embed_code':
            embed_code,
            'tags':
            [tag['term'] for tag in item['tags']
             if tag['scheme'] is None] if 'tags' in item else None,
            'license':
            license
        }