def get_logo_inline(self): """ Fetches the feed's logo and returns its data URI """ if not self.inline_logo: return None logo_url = self.get_logo_url() if not logo_url: return None try: url, content, last_mod_up, last_mod_utc, etag, content_type, \ length = fetch_url(logo_url) except Exception as e: msg = 'could not fetch feed logo %(logo_url)s: %(msg)s' % \ dict(logo_url=logo_url, msg=str(e)) self.add_warning('fetch-logo', msg) logging.info(msg) return None # TODO: uncomment #if last_mod_up and mod_since_up and last_mod_up <= mod_since_up: # return None mtype = mimetype.get_mimetype(None, url) transform_args = dict(size=self.scale_to, img_format=self.logo_format) if any(transform_args.values()): content, mtype = transform_image(content, mtype, **transform_args) return get_data_uri(content, mtype)
def get_coverart(self): key = ':'.join((self.username, 'avatar_url')) image = None json_url = 'http://api.soundcloud.com/users/%s.json?consumer_key=%s' \ % (self.username, settings.SOUNDCLOUD_CONSUMER_KEY) resp = fetch_url(json_url) user_info = json.loads(resp.content) return user_info.get('avatar_url', None)
def get_metadata(self, url): """Get file download metadata Returns a (size, type, name) from the given download URL. Will use the network connection to determine the metadata via the HTTP header fields. """ res = fetch_url(url, headers_only=True) return (res.length, res.content_type, os.path.basename(os.path.dirname(res.url)))
def get_urls(data_config_url): data_config_data = fetch_url(data_config_url).read().decode("utf-8") data_config = json.loads(data_config_data) for fileinfo in list(data_config["request"]["files"].values()): if not isinstance(fileinfo, dict): continue for fileformat, keys in list(fileinfo.items()): if not isinstance(keys, dict): continue yield (fileformat, keys["url"])
def get_urls(data_config_url): data_config_data = fetch_url(data_config_url).read().decode( 'utf-8') data_config = json.loads(data_config_data) for fileinfo in list(data_config['request']['files'].values()): if not isinstance(fileinfo, dict): continue for fileformat, keys in list(fileinfo.items()): if not isinstance(keys, dict): continue yield (fileformat, keys['url'])
def get_real_download_url(self, url, preferred_fileformat=None): video_id = get_vimeo_id(url) if video_id is None: return url web_url = 'http://vimeo.com/%s' % video_id web_data = fetch_url(web_url).read() data_config_frag = DATA_CONFIG_RE.search(web_data) if data_config_frag is None: raise VimeoError('Cannot get data config from Vimeo') data_config_url = data_config_frag.group(1).replace('&', '&') def get_urls(data_config_url): data_config_data = fetch_url(data_config_url).read().decode( 'utf-8') data_config = json.loads(data_config_data) for fileinfo in list(data_config['request']['files'].values()): if not isinstance(fileinfo, dict): continue for fileformat, keys in list(fileinfo.items()): if not isinstance(keys, dict): continue yield (fileformat, keys['url']) fileformat_to_url = dict(get_urls(data_config_url)) if preferred_fileformat is not None and preferred_fileformat in fileformat_to_url: logger.debug('Picking preferred format: %s', preferred_fileformat) return fileformat_to_url[preferred_fileformat] def fileformat_sort_key_func(fileformat): if fileformat in FILEFORMAT_RANKING: return FILEFORMAT_RANKING.index(fileformat) return 0 for fileformat in sorted(fileformat_to_url, key=fileformat_sort_key_func, reverse=True): logger.debug('Picking best format: %s', fileformat) return fileformat_to_url[fileformat]
def get_real_download_url(self, url, preferred_fileformat=None): video_id = get_vimeo_id(url) if video_id is None: return url web_url = "http://vimeo.com/%s" % video_id web_data = fetch_url(web_url).read() data_config_frag = DATA_CONFIG_RE.search(web_data) if data_config_frag is None: raise VimeoError("Cannot get data config from Vimeo") data_config_url = data_config_frag.group(1).replace("&", "&") def get_urls(data_config_url): data_config_data = fetch_url(data_config_url).read().decode("utf-8") data_config = json.loads(data_config_data) for fileinfo in list(data_config["request"]["files"].values()): if not isinstance(fileinfo, dict): continue for fileformat, keys in list(fileinfo.items()): if not isinstance(keys, dict): continue yield (fileformat, keys["url"]) fileformat_to_url = dict(get_urls(data_config_url)) if preferred_fileformat is not None and preferred_fileformat in fileformat_to_url: logger.debug("Picking preferred format: %s", preferred_fileformat) return fileformat_to_url[preferred_fileformat] def fileformat_sort_key_func(fileformat): if fileformat in FILEFORMAT_RANKING: return FILEFORMAT_RANKING.index(fileformat) return 0 for fileformat in sorted(fileformat_to_url, key=fileformat_sort_key_func, reverse=True): logger.debug("Picking best format: %s", fileformat) return fileformat_to_url[fileformat]
def parse_feed(feed_url, text_processor, mod_since_utc=None): """ Parses a feed and returns its JSON object mod_since_utc: feeds that have not changed since this timestamp are ignored text_processor: class to pre-process text contents """ parser_cls = get_parser_cls(feed_url) try: resp = fetch_url(feed_url, mod_since_utc) parser = parser_cls(feed_url, resp, text_processor=text_processor) return parser.get_feed() except NotModified: return None except (http.client.HTTPException, urllib.error.URLError, urllib.error.HTTPError, ValueError, socket.error, ParserException) as ex: raise FetchFeedException(ex) from ex
def get_tracks(self, feed): """Get a generator of tracks from a SC user The generator will give you a dictionary for every track it can find for its user.""" json_url = 'http://api.soundcloud.com/users/%(user)s/%(feed)s.json?' \ 'filter=downloadable&consumer_key=%(consumer_key)s' % \ { "user": self.username, "feed": feed, "consumer_key": settings.SOUNDCLOUD_CONSUMER_KEY } res = fetch_url(json_url) response = json.loads(res.content) self._check_error(response) tracks = (track for track in response if track['downloadable']) for track in tracks: # Prefer stream URL (MP3), fallback to download URL url = track.get('stream_url', track['download_url']) + \ '?consumer_key=%(consumer_key)s' \ % {'consumer_key': settings.SOUNDCLOUD_CONSUMER_KEY} yield { 'title': track.get('title', track.get('permalink', 'Unknown track')), 'link': track.get('permalink_url', 'http://soundcloud.com/' + self.username), 'description': track.get('description', 'No description available'), 'url': url, 'guid': track.get('permalink', track.get('id')), 'pubDate': self.parsedate(track.get('created_at', None)), }
def get_tracks(self, feed): """Get a generator of tracks from a SC user The generator will give you a dictionary for every track it can find for its user.""" json_url = 'http://api.soundcloud.com/users/%(user)s/%(feed)s.json?' \ 'filter=downloadable&consumer_key=%(consumer_key)s' % \ { "user": self.username, "feed": feed, "consumer_key": settings.SOUNDCLOUD_CONSUMER_KEY } res = fetch_url(json_url) response = json.loads(res.content) self._check_error(response) tracks = (track for track in response if track['downloadable']) for track in tracks: # Prefer stream URL (MP3), fallback to download URL url = track.get('stream_url', track['download_url']) + \ '?consumer_key=%(consumer_key)s' \ % {'consumer_key': settings.SOUNDCLOUD_CONSUMER_KEY} yield { 'title': track.get('title', track.get('permalink', 'Unknown track')), 'link': track.get('permalink_url', 'http://soundcloud.com/'+self.username), 'description': track.get('description', 'No description available'), 'url': url, 'guid': track.get('permalink', track.get('id')), 'pubDate': self.parsedate(track.get('created_at', None)), }