예제 #1
0
파일: model.py 프로젝트: obris/gpodder
    def from_feedparser_entry(cls, entry, channel):
        episode = cls(channel)
        episode.guid = entry.get('id', '')

        # Replace multi-space and newlines with single space (Maemo bug 11173)
        episode.title = re.sub('\s+', ' ', entry.get('title', ''))
        episode.link = entry.get('link', '')
        if 'content' in entry and len(entry['content']) and \
                entry['content'][0].get('type', '') == 'text/html':
            episode.description = entry['content'][0].value
        else:
            episode.description = entry.get('summary', '')

        # Fallback to subtitle if summary is not available
        if not episode.description:
            episode.description = entry.get('subtitle', '')

        try:
            total_time = 0

            # Parse iTunes-specific podcast duration metadata
            itunes_duration = entry.get('itunes_duration', '')
            if itunes_duration:
                total_time = util.parse_time(itunes_duration)

            # Parse time from YouTube descriptions if it's a YouTube feed
            if youtube.is_youtube_guid(episode.guid):
                result = re.search(r'Time:<[^>]*>\n<[^>]*>([:0-9]*)<',
                        episode.description)
                if result:
                    youtube_duration = result.group(1)
                    total_time = util.parse_time(youtube_duration)

            episode.total_time = total_time
        except:
            pass

        episode.published = feedcore.get_pubdate(entry)

        enclosures = entry.get('enclosures', [])
        media_rss_content = entry.get('media_content', [])
        audio_available = any(e.get('type', '').startswith('audio/') \
                for e in enclosures + media_rss_content)
        video_available = any(e.get('type', '').startswith('video/') \
                for e in enclosures + media_rss_content)

        # XXX: Make it possible for hooks/extensions to override this by
        # giving them a list of enclosures and the "self" object (podcast)
        # and letting them sort and/or filter the list of enclosures to
        # get the desired enclosure picked by the algorithm below.
        filter_and_sort_enclosures = lambda x: x

        # read the flattr auto-url, if exists
        payment_info = [link['href'] for link in entry.get('links', [])
            if link['rel'] == 'payment']
        if payment_info:
            episode.payment_url = sorted(payment_info, key=get_payment_priority)[0]

        # Enclosures
        for e in filter_and_sort_enclosures(enclosures):
            episode.mime_type = e.get('type', 'application/octet-stream')
            if episode.mime_type == '':
                # See Maemo bug 10036
                logger.warn('Fixing empty mimetype in ugly feed')
                episode.mime_type = 'application/octet-stream'

            if '/' not in episode.mime_type:
                continue

            # Skip images in feeds if audio or video is available (bug 979)
            # This must (and does) also look in Media RSS enclosures (bug 1430)
            if episode.mime_type.startswith('image/') and \
                    (audio_available or video_available):
                continue

            # If we have audio or video available later on, skip
            # 'application/octet-stream' data types (fixes Linux Outlaws)
            if episode.mime_type == 'application/octet-stream' and \
                    (audio_available or video_available):
                continue

            episode.url = util.normalize_feed_url(e.get('href', ''))
            if not episode.url:
                continue

            try:
                episode.file_size = int(e.length) or -1
            except:
                episode.file_size = -1

            return episode

        # Media RSS content
        for m in filter_and_sort_enclosures(media_rss_content):
            episode.mime_type = m.get('type', 'application/octet-stream')
            if '/' not in episode.mime_type:
                continue

            # Skip images in Media RSS if we have audio/video (bug 1444)
            if episode.mime_type.startswith('image/') and \
                    (audio_available or video_available):
                continue

            episode.url = util.normalize_feed_url(m.get('url', ''))
            if not episode.url:
                continue

            try:
                episode.file_size = int(m.get('filesize', 0)) or -1
            except:
                episode.file_size = -1

            try:
                episode.total_time = int(m.get('duration', 0)) or 0
            except:
                episode.total_time = 0

            return episode

        # Brute-force detection of any links
        for l in entry.get('links', ()):
            episode.url = util.normalize_feed_url(l.get('href', ''))
            if not episode.url:
                continue

            if (youtube.is_video_link(episode.url) or \
                    vimeo.is_video_link(episode.url)):
                return episode

            # Check if we can resolve this link to a audio/video file
            filename, extension = util.filename_from_url(episode.url)
            file_type = util.file_type_by_extension(extension)
            if file_type is None and hasattr(l, 'type'):
                extension = util.extension_from_mimetype(l.type)
                file_type = util.file_type_by_extension(extension)

            # The link points to a audio or video file - use it!
            if file_type is not None:
                return episode

        return None
예제 #2
0
    def from_feedparser_entry(entry, channel, mimetype_prefs=''):
        episode = PodcastEpisode(channel)
        #log("from_feedparser_entry(%s)" % entry.get('title',''))
        # Replace multi-space and newlines with single space (Maemo bug 11173)
        episode.title = re.sub('\s+', ' ', entry.get('title', ''))
        episode.link = entry.get('link', '')
        #print("summary=%s" % entry.summary)
        if 'content' in entry and len(entry['content']) and \
                entry['content'][0].get('type', '') == 'text/html':
            episode.description = entry['content'][0].value
        else:
            episode.description = entry.get('summary', '')

        try:
            # Parse iTunes-specific podcast duration metadata
            total_time = util.parse_time(entry.get('itunes_duration', ''))
            episode.total_time = total_time
        except:
            pass

        # Fallback to subtitle if summary is not available0
        if not episode.description:
            episode.description = entry.get('subtitle', '')
        #print("episode %s description=%s" % (episode.title,episode.description))
        episode.guid = entry.get('id', '')
        if entry.get('updated_parsed', None):
            episode.pubDate = rfc822.mktime_tz(entry.updated_parsed+(0,))

        enclosures = entry.get('enclosures', [])
        media_rss_content = entry.get('media_content', [])
        audio_available = any(e.get('type', '').startswith('audio/') \
                for e in enclosures + media_rss_content)
        video_available = any(e.get('type', '').startswith('video/') \
                for e in enclosures + media_rss_content)

        # Create the list of preferred mime types
        mimetype_prefs = mimetype_prefs.split(',')

        def calculate_preference_value(enclosure):
            """Calculate preference value of an enclosure

            This is based on mime types and allows users to prefer
            certain mime types over others (e.g. MP3 over AAC, ...)
            """
            mimetype = enclosure.get('type', None)
            try:
                # If the mime type is found, return its (zero-based) index
                return mimetype_prefs.index(mimetype)
            except ValueError:
                # If it is not found, assume it comes after all listed items
                return len(mimetype_prefs)

        # Enclosures
        for e in sorted(enclosures, key=calculate_preference_value):
            episode.mimetype = e.get('type', 'application/octet-stream')
            if episode.mimetype == '':
                # See Maemo bug 10036
                log('Fixing empty mimetype in ugly feed', sender=episode)
                episode.mimetype = 'application/octet-stream'

            if '/' not in episode.mimetype:
                continue

            # Skip images in feeds if audio or video is available (bug 979)
            # This must (and does) also look in Media RSS enclosures (bug 1430)
            if episode.mimetype.startswith('image/') and \
                    (audio_available or video_available):
                continue

            episode.url = util.normalize_feed_url(e.get('href', ''))
            if not episode.url:
                continue

            try:
                episode.length = int(e.length) or -1
            except:
                episode.length = -1

            return episode

        # Media RSS content
        for m in sorted(media_rss_content, key=calculate_preference_value):
            episode.mimetype = m.get('type', 'application/octet-stream')
            if '/' not in episode.mimetype:
                continue

            # Skip images in Media RSS if we have audio/video (bug 1444)
            if episode.mimetype.startswith('image/') and \
                    (audio_available or video_available):
                continue

            episode.url = util.normalize_feed_url(m.get('url', ''))
            if not episode.url:
                continue

            try:
                episode.length = int(m.fileSize) or -1
            except:
                episode.length = -1

            return episode

        # Brute-force detection of any links
        for l in entry.get('links', ()):
            episode.url = util.normalize_feed_url(l.get('href', ''))
            if not episode.url:
                continue

            if youtube.is_video_link(episode.url):
                return episode

            # Check if we can resolve this link to a audio/video file
            filename, extension = util.filename_from_url(episode.url)
            file_type = util.file_type_by_extension(extension)
            if file_type is None and hasattr(l, 'type'):
                extension = util.extension_from_mimetype(l.type)
                file_type = util.file_type_by_extension(extension)

            # The link points to a audio or video file - use it!
            if file_type is not None:
                return episode

        # Scan MP3 links in description text
        mp3s = re.compile(r'http://[^"]*\.mp3')
        for content in entry.get('content', ()):
            html = content.value
            for match in mp3s.finditer(html):
                episode.url = match.group(0)
                return episode
               
        #don't return None : for non-podcast channels
        episode.state = gpodder.STATE_NORMAL
        episode.url = ''
        return episode