예제 #1
0
    def get_logo_inline(self):
        """ Fetches the feed's logo and returns its data URI """

        if not self.inline_logo:
            return None

        logo_url = self.get_logo_url()

        if not logo_url:
            return None

        try:
            url, content, last_mod_up, last_mod_utc, etag, content_type, \
                length = fetch_url(logo_url)

        except Exception as e:
            msg = 'could not fetch feed logo %(logo_url)s: %(msg)s' % \
                dict(logo_url=logo_url, msg=str(e))
            self.add_warning('fetch-logo', msg)
            logging.info(msg)
            return None

        # TODO: uncomment
        #if last_mod_up and mod_since_up and last_mod_up <= mod_since_up:
        #    return None

        mtype = mimetype.get_mimetype(None, url)

        transform_args = dict(size=self.scale_to, img_format=self.logo_format)

        if any(transform_args.values()):
            content, mtype = transform_image(content, mtype, **transform_args)

        return get_data_uri(content, mtype)
예제 #2
0
    def get_logo_inline(self):
        """ Fetches the feed's logo and returns its data URI """

        if not self.inline_logo:
            return None

        logo_url = self.get_logo_url()

        if not logo_url:
            return None

        try:
            url, content, last_mod_up, last_mod_utc, etag, content_type, \
                length = fetch_url(logo_url)

        except Exception as e:
            msg = 'could not fetch feed logo %(logo_url)s: %(msg)s' % \
                dict(logo_url=logo_url, msg=str(e))
            self.add_warning('fetch-logo', msg)
            logging.info(msg)
            return None

        # TODO: uncomment
        #if last_mod_up and mod_since_up and last_mod_up <= mod_since_up:
        #    return None

        mtype = mimetype.get_mimetype(None, url)

        transform_args = dict(size=self.scale_to, img_format=self.logo_format)

        if any(transform_args.values()):
            content, mtype = transform_image(content, mtype, **transform_args)

        return get_data_uri(content, mtype)
예제 #3
0
    def get_coverart(self):
        key = ':'.join((self.username, 'avatar_url'))

        image = None
        json_url = 'http://api.soundcloud.com/users/%s.json?consumer_key=%s' \
            % (self.username, settings.SOUNDCLOUD_CONSUMER_KEY)

        resp = fetch_url(json_url)
        user_info = json.loads(resp.content)
        return user_info.get('avatar_url', None)
예제 #4
0
    def get_coverart(self):
        key = ':'.join((self.username, 'avatar_url'))

        image = None
        json_url = 'http://api.soundcloud.com/users/%s.json?consumer_key=%s' \
            % (self.username, settings.SOUNDCLOUD_CONSUMER_KEY)

        resp = fetch_url(json_url)
        user_info = json.loads(resp.content)
        return user_info.get('avatar_url', None)
예제 #5
0
    def get_metadata(self, url):
        """Get file download metadata

        Returns a (size, type, name) from the given download
        URL. Will use the network connection to determine the
        metadata via the HTTP header fields.
        """

        res = fetch_url(url, headers_only=True)
        return (res.length, res.content_type,
                os.path.basename(os.path.dirname(res.url)))
예제 #6
0
    def get_metadata(self, url):
        """Get file download metadata

        Returns a (size, type, name) from the given download
        URL. Will use the network connection to determine the
        metadata via the HTTP header fields.
        """

        res = fetch_url(url, headers_only=True)
        return (res.length, res.content_type,
                os.path.basename(os.path.dirname(res.url)))
예제 #7
0
        def get_urls(data_config_url):
            data_config_data = fetch_url(data_config_url).read().decode("utf-8")
            data_config = json.loads(data_config_data)
            for fileinfo in list(data_config["request"]["files"].values()):
                if not isinstance(fileinfo, dict):
                    continue

                for fileformat, keys in list(fileinfo.items()):
                    if not isinstance(keys, dict):
                        continue

                    yield (fileformat, keys["url"])
예제 #8
0
        def get_urls(data_config_url):
            data_config_data = fetch_url(data_config_url).read().decode(
                'utf-8')
            data_config = json.loads(data_config_data)
            for fileinfo in list(data_config['request']['files'].values()):
                if not isinstance(fileinfo, dict):
                    continue

                for fileformat, keys in list(fileinfo.items()):
                    if not isinstance(keys, dict):
                        continue

                    yield (fileformat, keys['url'])
예제 #9
0
    def get_real_download_url(self, url, preferred_fileformat=None):
        video_id = get_vimeo_id(url)

        if video_id is None:
            return url

        web_url = 'http://vimeo.com/%s' % video_id
        web_data = fetch_url(web_url).read()
        data_config_frag = DATA_CONFIG_RE.search(web_data)

        if data_config_frag is None:
            raise VimeoError('Cannot get data config from Vimeo')

        data_config_url = data_config_frag.group(1).replace('&amp;', '&')

        def get_urls(data_config_url):
            data_config_data = fetch_url(data_config_url).read().decode(
                'utf-8')
            data_config = json.loads(data_config_data)
            for fileinfo in list(data_config['request']['files'].values()):
                if not isinstance(fileinfo, dict):
                    continue

                for fileformat, keys in list(fileinfo.items()):
                    if not isinstance(keys, dict):
                        continue

                    yield (fileformat, keys['url'])

        fileformat_to_url = dict(get_urls(data_config_url))

        if preferred_fileformat is not None and preferred_fileformat in fileformat_to_url:
            logger.debug('Picking preferred format: %s', preferred_fileformat)
            return fileformat_to_url[preferred_fileformat]

        def fileformat_sort_key_func(fileformat):
            if fileformat in FILEFORMAT_RANKING:
                return FILEFORMAT_RANKING.index(fileformat)

            return 0

        for fileformat in sorted(fileformat_to_url,
                                 key=fileformat_sort_key_func,
                                 reverse=True):
            logger.debug('Picking best format: %s', fileformat)
            return fileformat_to_url[fileformat]
예제 #10
0
    def get_real_download_url(self, url, preferred_fileformat=None):
        video_id = get_vimeo_id(url)

        if video_id is None:
            return url

        web_url = "http://vimeo.com/%s" % video_id
        web_data = fetch_url(web_url).read()
        data_config_frag = DATA_CONFIG_RE.search(web_data)

        if data_config_frag is None:
            raise VimeoError("Cannot get data config from Vimeo")

        data_config_url = data_config_frag.group(1).replace("&amp;", "&")

        def get_urls(data_config_url):
            data_config_data = fetch_url(data_config_url).read().decode("utf-8")
            data_config = json.loads(data_config_data)
            for fileinfo in list(data_config["request"]["files"].values()):
                if not isinstance(fileinfo, dict):
                    continue

                for fileformat, keys in list(fileinfo.items()):
                    if not isinstance(keys, dict):
                        continue

                    yield (fileformat, keys["url"])

        fileformat_to_url = dict(get_urls(data_config_url))

        if preferred_fileformat is not None and preferred_fileformat in fileformat_to_url:
            logger.debug("Picking preferred format: %s", preferred_fileformat)
            return fileformat_to_url[preferred_fileformat]

        def fileformat_sort_key_func(fileformat):
            if fileformat in FILEFORMAT_RANKING:
                return FILEFORMAT_RANKING.index(fileformat)

            return 0

        for fileformat in sorted(fileformat_to_url, key=fileformat_sort_key_func, reverse=True):
            logger.debug("Picking best format: %s", fileformat)
            return fileformat_to_url[fileformat]
예제 #11
0
def parse_feed(feed_url, text_processor, mod_since_utc=None):
    """ Parses a feed and returns its JSON object

    mod_since_utc: feeds that have not changed since this timestamp are ignored
    text_processor: class to pre-process text contents
    """

    parser_cls = get_parser_cls(feed_url)

    try:
        resp = fetch_url(feed_url, mod_since_utc)
        parser = parser_cls(feed_url, resp, text_processor=text_processor)
        return parser.get_feed()

    except NotModified:
        return None

    except (http.client.HTTPException, urllib.error.URLError, urllib.error.HTTPError,
            ValueError, socket.error, ParserException) as ex:
        raise FetchFeedException(ex) from ex
예제 #12
0
    def get_tracks(self, feed):
        """Get a generator of tracks from a SC user

        The generator will give you a dictionary for every
        track it can find for its user."""

        json_url = 'http://api.soundcloud.com/users/%(user)s/%(feed)s.json?' \
                   'filter=downloadable&consumer_key=%(consumer_key)s' % \
                   {
                       "user": self.username,
                       "feed": feed,
                       "consumer_key": settings.SOUNDCLOUD_CONSUMER_KEY
                   }

        res = fetch_url(json_url)
        response = json.loads(res.content)

        self._check_error(response)
        tracks = (track for track in response if track['downloadable'])

        for track in tracks:
            # Prefer stream URL (MP3), fallback to download URL
            url = track.get('stream_url', track['download_url']) + \
                '?consumer_key=%(consumer_key)s' \
                % {'consumer_key': settings.SOUNDCLOUD_CONSUMER_KEY}

            yield {
                'title':
                track.get('title', track.get('permalink', 'Unknown track')),
                'link':
                track.get('permalink_url',
                          'http://soundcloud.com/' + self.username),
                'description':
                track.get('description', 'No description available'),
                'url':
                url,
                'guid':
                track.get('permalink', track.get('id')),
                'pubDate':
                self.parsedate(track.get('created_at', None)),
            }
예제 #13
0
def parse_feed(feed_url, text_processor, mod_since_utc=None):
    """ Parses a feed and returns its JSON object

    mod_since_utc: feeds that have not changed since this timestamp are ignored
    text_processor: class to pre-process text contents
    """

    parser_cls = get_parser_cls(feed_url)

    try:
        resp = fetch_url(feed_url, mod_since_utc)
        parser = parser_cls(feed_url, resp, text_processor=text_processor)
        return parser.get_feed()

    except NotModified:
        return None

    except (http.client.HTTPException, urllib.error.URLError,
            urllib.error.HTTPError, ValueError, socket.error,
            ParserException) as ex:
        raise FetchFeedException(ex) from ex
예제 #14
0
    def get_tracks(self, feed):
        """Get a generator of tracks from a SC user

        The generator will give you a dictionary for every
        track it can find for its user."""

        json_url = 'http://api.soundcloud.com/users/%(user)s/%(feed)s.json?' \
                   'filter=downloadable&consumer_key=%(consumer_key)s' % \
                   {
                       "user": self.username,
                       "feed": feed,
                       "consumer_key": settings.SOUNDCLOUD_CONSUMER_KEY
                   }

        res = fetch_url(json_url)
        response = json.loads(res.content)

        self._check_error(response)
        tracks = (track for track in response if track['downloadable'])

        for track in tracks:
            # Prefer stream URL (MP3), fallback to download URL
            url = track.get('stream_url', track['download_url']) + \
                '?consumer_key=%(consumer_key)s' \
                % {'consumer_key': settings.SOUNDCLOUD_CONSUMER_KEY}

            yield {
                'title': track.get('title',
                                   track.get('permalink', 'Unknown track')),
                'link': track.get('permalink_url',
                                  'http://soundcloud.com/'+self.username),
                'description': track.get('description',
                                         'No description available'),
                'url': url,
                'guid': track.get('permalink', track.get('id')),
                'pubDate': self.parsedate(track.get('created_at', None)),
            }