예제 #1
0
def _populate_bliptv_data(rss, metadata):
    """Parse bliptv video rss and pull out the metadata information.

      >>> rss = '''<?xml version="1.0" ?>
      ... <rss version="2.0"
      ...      xmlns:media="http://search.yahoo.com/mrss/"
      ...      xmlns:blip="http://blip.tv/dtd/blip/1.0">
      ... <channel>
      ...     <item>
      ...       <title>
      ...         Random Video
      ...       </title>
      ...       <blip:user>someuser</blip:user>
      ...       <blip:puredescription>
      ...         This is a random description.
      ...       </blip:puredescription>
      ...       <media:keywords>abc, def</media:keywords>
      ...       <media:thumbnail url="http://someurl.com/somefile.jpg" />
      ...     </item>
      ...   </channel>
      ... </rss>
      ... '''

      >>> metadata = VideoMetadata()
      >>> _populate_bliptv_data(rss, metadata)

      >>> metadata.title
      u'Random Video'
      >>> metadata.description
      u'This is a random description.'
      >>> metadata.tags
      set([u'abc', u'def'])
      >>> metadata.thumbnail_url
      u'http://someurl.com/somefile.jpg'
      >>> metadata.author
      u'someuser'

    """
    doc = minidom.parseString(rss)
    metadata.thumbnail_url = xpath_attr( \
        doc, u'rss/channel/item/media:thumbnail', 'url')
    metadata.title = xpath_text( \
        doc, u'rss/channel/item/title')
    metadata.author = xpath_text( \
        doc, u'rss/channel/item/blip:user')
    metadata.description = xpath_text( \
        doc, u'rss/channel/item/blip:puredescription')

    keywordtext = xpath_text( \
        doc, u'rss/channel/item/media:keywords') or ''
    metadata.tags = set([x.strip()
                         for x in keywordtext.split(',') if x.strip()])
예제 #2
0
def _youtube_metadata_lookup(xml):
    """Parse the given xml and get appropriate metadata.

      >>> xml = '''<?xml version="1.0" ?>
      ... <ut_response status="ok">
      ...   <video_details>
      ...     <author>youtubeuser</author>
      ...     <title>Random Title</title>
      ...     <rating_avg>3.25</rating_avg>
      ...     <rating_count>10</rating_count>
      ...     <tags>california trip redwoods</tags>
      ...     <description>Random description.</description>
      ...     <update_time>1129803584</update_time>
      ...     <view_count>7</view_count>
      ...     <upload_time>1127760809</upload_time>
      ...     <length_seconds>8</length_seconds>
      ...     <recording_date>None</recording_date>
      ...     <recording_location/>
      ...     <recording_country/>
      ...     <thumbnail_url>http://blah.com/default.jpg</thumbnail_url>
      ...   </video_details>
      ... </ut_response>'''

      >>> data = _youtube_metadata_lookup(xml)
      >>> data.author
      u'youtubeuser'
      >>> data.title
      u'Random Title'
      >>> data.description
      u'Random description.'
      >>> data.tags
      [u'california', u'trip', u'redwoods']
      >>> data.thumbnail_url
      u'http://blah.com/default.jpg'
      >>> data.duration
      8.0

    """

    try:
        doc = minidom.parseString(xml)
    except expat.ExpatError:
        logger.exception('Error while trying to parse RSS XML - "%s"' % xml)
        return None

    metadata = VideoMetadata()
    metadata.title = xpath_text(doc, u'ut_response/video_details/title')
    metadata.author = xpath_text(doc, u'ut_response/video_details/author')
    metadata.description = xpath_text(\
        doc, u'ut_response/video_details/description')
    metadata.thumbnail_url = xpath_text(\
        doc, u'ut_response/video_details/thumbnail_url')
    metadata.tags = xpath_text(\
        doc, u'ut_response/video_details/tags').split(' ')

    duration = xpath_text( \
        doc, u'ut_response/video_details/length_seconds')
    if duration is not None and duration.strip() != '':
        try:
            metadata.duration = float(duration)
        except:
            # probably wasn't an int, ignoring
            pass

    return metadata
예제 #3
0
def _populate_google_data(rss, metadata):
    """Parse google video rss and pull out the metadata information.

      >>> rss = '''<?xml version="1.0" ?>
      ... <rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/" xmlns:openSearch="http://a9.com/-/spec/opensearchrss/1.0/">
      ... <channel>
      ...     <title>
      ...       Google Video - The Big Experiment &amp; Rocky
      ...     </title>
      ...     <link>
      ...       http://video.google.com/videoplay?docid=-274981837129821058
      ...     </link>
      ...     <item>
      ...       <author>
      ...         Jon Doe
      ...       </author>
      ...       <media:group>
      ...         <media:title>
      ...           The Big Experiment &amp; Rocky
      ...         </media:title>
      ...         <media:description>
      ...           hello world
      ...
      ...           Keywords:  eepybird eepy bird
      ...         </media:description>
      ...         <media:thumbnail url="http://video.google.com/somepath.jpg" width="320"/>
      ...         <media:content duration="23" />
      ...       </media:group>
      ...     </item>
      ...   </channel>
      ... </rss>
      ... '''

      >>> metadata = VideoMetadata()
      >>> _populate_google_data(rss, metadata)

      >>> metadata.title
      u'The Big Experiment & Rocky'
      >>> metadata.description
      u'hello world'
      >>> metadata.tags
      set([u'eepybird', u'bird', u'eepy'])
      >>> metadata.thumbnail_url
      u'http://video.google.com/somepath.jpg'
      >>> metadata.author
      u'Jon Doe'
      >>> metadata.duration
      23.0

    """
    doc = minidom.parseString(rss)
    metadata.thumbnail_url = xpath_attr( \
        doc, u'rss/channel/item/media:group/media:thumbnail', 'url')
    metadata.title = xpath_text( \
        doc, u'rss/channel/item/media:group/media:title')
    metadata.author = xpath_text( \
        doc, u'rss/channel/item/author')

    duration = xpath_attr( \
        doc, u'rss/channel/item/media:group/media:content', 'duration')
    if duration is not None and duration.strip() != '':
        try:
            metadata.duration = float(duration)
        except:
            # probably wasn't an int, ignoring
            pass

    text = xpath_text( \
        doc, u'rss/channel/item/media:group/media:description')
    description = None
    tags = None
    if text:
        description = text
        pos = description.find('Keywords:')
        if pos > -1 and len(description) > pos + 9:
            keywordblurb = description[pos+9:]
            tags = set([x.strip() for x in keywordblurb.split(' ')
                        if x.strip()])
        if pos > -1:
            description = description[:pos]
        description = description.strip()

    metadata.description = description
    metadata.tags = tags