def check_for_updates(up_to_date_callback=None): """Checks the AUTOUPDATE_URL for the recent version. The ``up_to_date_callback`` is a function that should take no arguments and return nothing. """ import miro.plat if miro.plat.AUTOUPDATE == False: logging.info("this platform has autoupdate disabled. skipping.") return global check_in_progress if not check_in_progress: check_in_progress = True logging.info("Checking for updates...") if app.config.get(prefs.APP_FINAL_RELEASE) == u"0": # if this is not a final release, look at the beta # channel url = app.config.get(prefs.AUTOUPDATE_BETA_URL) logging.info("Using the beta channel") else: # if this is a final release, look at the final # channel url = app.config.get(prefs.AUTOUPDATE_URL) logging.info("Using the final channel") logging.info("check_for_updates: checking %s", url) update_handler = lambda data: _handle_app_cast( data, up_to_date_callback) error_handler = _handle_error grab_url(url, update_handler, error_handler)
def add_subscription_url(prefix, expected_content_type, url): real_url = url[len(prefix):] def callback(info): if info.get('content-type') == expected_content_type: subscription_list = autodiscover.parse_content(info['body']) if subscription_list is None: text = _( "This %(appname)s podcast file has an invalid format: " "%(url)s. Please notify the publisher of this file.", {"appname": app.config.get(prefs.SHORT_APP_NAME), "url": real_url} ) _complain_about_subscription_url(text) else: subscription.Subscriber().add_subscriptions( subscription_list) else: text = _( "This %(appname)s podcast file has the wrong content type: " "%(url)s. Please notify the publisher of this file.", {"appname": app.config.get(prefs.SHORT_APP_NAME), "url": real_url} ) _complain_about_subscription_url(text) def errback(error): text = _( "Could not download the %(appname)s podcast file: %(url)s", {"appname": app.config.get(prefs.SHORT_APP_NAME), "url": real_url} ) _complain_about_subscription_url(text) httpclient.grab_url(real_url, callback, errback)
def _scrape_youtube_url(url, callback): check_u(url) components = urlparse.urlsplit(url) params = cgi.parse_qs(components[3]) video_id = None if components[2] == u'/watch' and 'v' in params: try: video_id = params['v'][0] except IndexError: pass elif components[2].startswith('/v/'): m = re.compile(r'/v/([\w-]+)').match(components[2]) if m is not None: video_id = m.group(1) if video_id is None: logging.warning('_scrape_youtube_url: unable to scrape YouTube Video URL') callback(None) return try: url = u"http://www.youtube.com/get_video_info?video_id=%s&el=embedded&ps=default&eurl=" % video_id httpclient.grab_url( url, lambda x: _youtube_callback_step2(x, video_id, callback), lambda x: _youtube_errback(x, callback)) except StandardError: logging.exception("youtube_callback: unable to scrape YouTube Video URL") callback(None)
def request_icon(self): if self.removed: icon_cache_updater.update_finished() return self.dbItem.confirm_db_thread() if self.updating: self.needsUpdate = True icon_cache_updater.update_finished() return if hasattr(self.dbItem, "get_thumbnail_url"): url = self.dbItem.get_thumbnail_url() else: url = self.url # Only verify each icon once per run unless the url changes if (url == self.url and self.filename and fileutil.access(self.filename, os.R_OK)): icon_cache_updater.update_finished() return self.updating = True # No need to extract the icon again if we already have it. if url is None or url.startswith(u"/") or url.startswith(u"file://"): self.error_callback(url) return # Last try, get the icon from HTTP. httpclient.grab_url(url, lambda info: self.update_icon_cache(url, info), lambda error: self.error_callback(url, error))
def check_for_updates(up_to_date_callback=None): """Checks the AUTOUPDATE_URL for the recent version. The ``up_to_date_callback`` is a function that should take no arguments and return nothing. """ import miro.plat if miro.plat.AUTOUPDATE == False: logging.info("this platform has autoupdate disabled. skipping.") return global check_in_progress if not check_in_progress: check_in_progress = True logging.info("Checking for updates...") if app.config.get(prefs.APP_FINAL_RELEASE) == u"0": # if this is not a final release, look at the beta # channel url = app.config.get(prefs.AUTOUPDATE_BETA_URL) logging.info("Using the beta channel") else: # if this is a final release, look at the final # channel url = app.config.get(prefs.AUTOUPDATE_URL) logging.info("Using the final channel") logging.info("check_for_updates: checking %s", url) update_handler = lambda data: _handle_app_cast(data, up_to_date_callback) error_handler = _handle_error grab_url(url, update_handler, error_handler)
def callback(headers): """We need to figure out if the URL is a external video link, or a link to a feed. """ if check_url_exists(url): return content_type = headers.get("content-type") if content_type: if filetypes.is_feed_content_type(content_type): add_feeds([url]) return if flashscraper.is_maybe_flashscrapable(url): entry = _build_entry(url, "video/x-flv", additional=metadata) download_video(entry) return if filetypes.is_maybe_feed_content_type(content_type): logging.info("%s content type is %s. " "going to peek to see if it's a feed....", url, content_type) httpclient.grab_url(url, callback_peek, errback) return entry = _build_entry(url, content_type) if filetypes.is_video_enclosure(entry["enclosures"][0]): download_video(entry) else: handle_unknown_callback(url)
def add_subscription_url(prefix, expected_content_type, url): real_url = url[len(prefix):] def callback(info): if info.get('content-type') == expected_content_type: subscription_list = autodiscover.parse_content(info['body']) if subscription_list is None: text = _( "This %(appname)s podcast file has an invalid format: " "%(url)s. Please notify the publisher of this file.", { "appname": app.config.get(prefs.SHORT_APP_NAME), "url": real_url }) _complain_about_subscription_url(text) else: subscription.Subscriber().add_subscriptions(subscription_list) else: text = _( "This %(appname)s podcast file has the wrong content type: " "%(url)s. Please notify the publisher of this file.", { "appname": app.config.get(prefs.SHORT_APP_NAME), "url": real_url }) _complain_about_subscription_url(text) def errback(error): text = _("Could not download the %(appname)s podcast file: %(url)s", { "appname": app.config.get(prefs.SHORT_APP_NAME), "url": real_url }) _complain_about_subscription_url(text) httpclient.grab_url(real_url, callback, errback)
def request_icon(self): if self.removed: app.icon_cache_updater.update_finished() return self.dbItem.confirm_db_thread() if self.updating: self.needsUpdate = True app.icon_cache_updater.update_finished() return if hasattr(self.dbItem, "get_thumbnail_url"): url = self.dbItem.get_thumbnail_url() else: url = self.url # Only verify each icon once per run unless the url changes if (url == self.url and self.filename and fileutil.access(self.filename, os.R_OK)): app.icon_cache_updater.update_finished() return self.updating = True # No need to extract the icon again if we already have it. if url is None or url.startswith(u"/") or url.startswith(u"file://"): self.error_callback(url) return # Last try, get the icon from HTTP. httpclient.grab_url(url, lambda info: self.update_icon_cache(url, info), lambda error: self.error_callback(url, error))
def query_7digital(self, release_id): if release_id not in self.seven_digital_cache: self.release_id = release_id seven_digital_url = self._make_7digital_url(release_id) httpclient.grab_url(seven_digital_url, self.seven_digital_callback, self.seven_digital_errback) else: self.handle_7digital_cache_hit(release_id)
def _scrape_vimeo_moogaloop_url(url, callback): try: id_ = MEGALOOP_RE.match(url).group(2) url = u"http://www.vimeo.com/moogaloop/load/clip:%s" % id_ httpclient.grab_url(url, lambda x: _scrape_vimeo_callback(x, callback), lambda x: _scrape_vimeo_errback(x, callback)) except StandardError: logging.warning("Unable to scrape vimeo.com moogaloop URL: %s", url) callback(None)
def query_echonest_with_code(self, code, version, metadata): post_vars = { 'api_key': ECHO_NEST_API_KEY, 'bucket': ['tracks', 'id:7digital'], 'query': self._make_echonest_query(code, version, metadata), } url = 'http://echonest.pculture.org/api/v4/song/identify?' httpclient.grab_url(url, self.echonest_callback, self.echonest_errback, post_vars=post_vars)
def _scrape_vimeo_moogaloop_url(url, callback): try: id_ = MEGALOOP_RE.match(url).group(2) url = u"http://www.vimeo.com/moogaloop/load/clip:%s" % id_ httpclient.grab_url( url, lambda x: _scrape_vimeo_callback(x, callback), lambda x: _scrape_vimeo_errback(x, callback) ) except StandardError: logging.warning("Unable to scrape vimeo.com moogaloop URL: %s", url) callback(None)
def query_echonest_with_echonest_id(self, echonest_id): url_data = [ ('api_key', ECHO_NEST_API_KEY), ('bucket', 'tracks'), ('bucket', 'id:7digital'), ('id', echonest_id), ] url = ('http://echonest.pculture.org/api/v4/song/profile?' + urllib.urlencode(url_data)) httpclient.grab_url(url, self.echonest_callback, self.echonest_errback)
def _scrape_veohtv_video_url(url, callback): try: components = urlparse.urlsplit(url) params = cgi.parse_qs(components[3]) t = params['type'][0] permalink_id = params['permalinkId'][0] url = u'http://www.veoh.com/movieList.html?type=%s&permalinkId=%s&numResults=45' % (t, permalink_id) httpclient.grab_url(url, lambda x: _scrape_veohtv_callback(x, callback), lambda x: _scrape_veohtv_errback(x, callback)) except StandardError: logging.warning("unable to scrape Veoh URL: %s", url) callback(None)
def _scrape_vimeo_video_url(url, callback): try: id_ = VIMEO_RE.match(url).group(2) url = u"http://www.vimeo.com/moogaloop/load/clip:%s" % id_ httpclient.grab_url( url, lambda x: _scrape_vimeo_callback(x, callback), lambda x: _scrape_vimeo_errback(x, callback)) except (SystemExit, KeyboardInterrupt): raise except: logging.warning("Unable to scrape vimeo.com video URL: %s", url) callback(None)
def _scrape_vimeo_video_url_try_2(url, callback, vimeo_id): """Try scraping vimeo URLs by scraping the javascript code. This method seems less reliable than the regular method, but it works for private videos. See #19305 """ video_url = u'http://vimeo.com/%s' % vimeo_id httpclient.grab_url( video_url, lambda x: _scrape_vimeo_download_try_2_callback(x, callback, vimeo_id), lambda x: _scrape_vimeo_download_errback(x, callback, url))
def _scrape_vmix_video_url(url, callback): try: components = urlparse.urlsplit(url) params = cgi.parse_qs(components[3]) type_ = params['type'][0] id_ = params['id'][0] l = params['l'][0] url = (u"http://sdstage01.vmix.com/videos.php?type=%s&id=%s&l=%s" % (type_, id_, l)) httpclient.grab_url(url, lambda x: _scrape_vmix_callback(x, callback), lambda x: _scrape_vmix_errback(x, callback)) except StandardError: logging.warning("unable to scrape VMix Video URL: %s", url) callback(None)
def query_echonest_with_tags(self, metadata): url_data = [ ('api_key', ECHO_NEST_API_KEY), ('bucket', 'tracks'), ('bucket', 'id:7digital'), # In case there are multiple songs for the same artist/title, only # use the "hottest" song, AKA the most popular. ('results', '1'), ('sort', 'song_hotttnesss-desc'), ] for key in ('title', 'artist'): if key in metadata: url_data.append((key, metadata[key].encode('utf-8'))) url = ('http://echonest.pculture.org/api/v4/song/search?' + urllib.urlencode(url_data)) httpclient.grab_url(url, self.echonest_callback, self.echonest_errback)
def __init__(self, report, description, send_database): signals.SignalEmitter.__init__(self) self.create_signal('finished') self.is_done = False backupfile = None if send_database: try: logging.info("Sending entire database") backupfile = self._backup_support_dir() except StandardError: logging.exception("Failed to backup database") if isinstance(report, str): report = report.decode(locale.getpreferredencoding()) report = report.encode("utf-8", "ignore") if isinstance(description, str): description = description.decode(locale.getpreferredencoding()) description = description.encode("utf-8", "ignore") post_vars = {"description": description, "app_name": app.config.get(prefs.LONG_APP_NAME), "log": report} if backupfile: post_files = {"databasebackup": {"filename": "databasebackup.zip", "mimetype": "application/octet-stream", "handle": backupfile, }} else: post_files = None logging.info("Sending crash report....") self.client = httpclient.grab_url(BOGON_URL, self.callback, self.errback, post_vars=post_vars, post_files=post_files)
def get_metainfo(self): if self.metainfo is None: if self.url.startswith('file://'): path = get_file_url_path(self.url) try: metainfoFile = open(path, 'rb') except IOError: self.handle_error( _("Torrent file deleted"), _("The torrent file for this item was deleted " "outside of %(appname)s.", {"appname": app.config.get(prefs.SHORT_APP_NAME)} )) return try: metainfo = metainfoFile.read() finally: metainfoFile.close() self.handle_metainfo(metainfo) else: self.description_client = httpclient.grab_url(self.url, self.on_metainfo_download, self.on_metainfo_download_error, content_check_callback=self.check_description) else: self.got_metainfo()
def add_subscriptions(self, subscriptions_list, parent_folder=None): """ We loop through the list of subscriptions, creating things as we go (if needed). We also keep track of what we've added. Each type (folder, feed, site, download) gets dispatched to one of our methods. Each dispatcher returns True if it's added the subscription, anything else if it's been ignored for some reason (generally because it's already present in the DB). The only exception to this is the 'folder' type, which has the same return signature as this method. Returns a tuple of dictionaries (added, ignored). Each dictionary maps a subscription type (feed, site, download) to the number of added/ignored items in this subscription. """ added = {} ignored = {} for subscription in subscriptions_list: subscription_type = subscription['type'] handler = getattr(self, 'handle_%s' % subscription_type, None) if handler: trackback = subscription.get('trackback') if trackback: httpclient.grab_url(trackback, lambda x: None, lambda x: None) ret = handler(subscription, parent_folder) if ret: if subscription_type == 'folder': for key, value in ret[0].items(): added.setdefault(key, []) added[key].extend(value) for key, value in ret[1].items(): ignored.setdefault(key, []) ignored[key].extend(value) else: added.setdefault(subscription_type, []) added[subscription_type].append(subscription) else: ignored.setdefault(subscription_type, []) ignored[subscription_type].append(subscription) else: raise ValueError('unknown subscription type: %s' % subscription_type) return added, ignored
def _scrape_vimeo_video_url(url, callback, countdown=10): try: id_ = VIMEO_RE.match(url).group(2) url = 'http://vimeo.com/%s?action=download' % id_ httpclient.grab_url( url, lambda x: _scrape_vimeo_download_callback(x, callback), lambda x: _scrape_vimeo_download_errback(x, callback, url), extra_headers={ 'Referer': 'http://vimeo.com/%s' % id_, 'X-Requested-With': 'XMLHttpRequest', 'User-Agent': ('Mozilla/5.0 (X11; Linux x86_64) ' 'AppleWebKit/536.11 (KHTML, like Gecko) ' 'Chrome/20.0.1132.8 Safari/536.11') }) except StandardError: logging.exception("Unable to scrape vimeo.com video URL: %s", url) callback(None)
def _scrape_vimeo_video_url(url, callback, countdown=10): try: id_ = VIMEO_RE.match(url).group(2) download_url = 'http://vimeo.com/%s?action=download' % id_ httpclient.grab_url( download_url, lambda x: _scrape_vimeo_download_callback(x, callback), lambda x: _scrape_vimeo_video_url_try_2(url, callback, id_), extra_headers={ 'Referer': 'http://vimeo.com/%s' % id_, 'X-Requested-With': 'XMLHttpRequest', 'User-Agent': ('Mozilla/5.0 (X11; Linux x86_64) ' 'AppleWebKit/536.11 (KHTML, like Gecko) ' 'Chrome/20.0.1132.8 Safari/536.11') }) except StandardError: logging.exception("Unable to scrape vimeo.com video URL: %s", url) callback(None)
def check_for_updates(up_to_date_callback=None): """Checks the AUTOUPDATE_URL for the recent version. The ``up_to_date_callback`` is a function that should take no arguments and return nothing. """ import miro.plat if miro.plat.AUTOUPDATE == False: logging.info("this platform has autoupdate disabled. skipping.") return global check_in_progress if not check_in_progress: check_in_progress = True logging.info("Checking for updates...") url = app.config.get(prefs.AUTOUPDATE_URL) update_handler = lambda data: _handle_app_cast(data, up_to_date_callback) error_handler = _handle_error grab_url(url, update_handler, error_handler)
def callback(headers, content_type=None): """We need to figure out if the URL is a external video link, or a link to a feed. """ print 'callback for', url, headers, content_type if check_url_exists(url): return if content_type is None: content_type = headers.get("content-type") if content_type: if filetypes.is_feed_content_type(content_type): add_feeds([url]) return if flashscraper.is_maybe_flashscrapable(url): entry = _build_entry(url, 'video/x-flv', additional=metadata) download_video(entry) return if amazon.is_amazon_content_type(content_type): amazon.download_file(url, handle_unknown_callback) return if filetypes.is_maybe_feed_content_type(content_type): logging.info( "%s content type is %s. " "going to peek to see if it's a feed....", url, content_type) httpclient.grab_url(url, callback_peek, errback) return entry = _build_entry(url, content_type) if filetypes.is_video_enclosure(entry['enclosures'][0]): download_video(entry) else: handle_unknown_callback(url)
def start_download(self, resume=True): if self.retryDC: self.retryDC.cancel() self.retryDC = None if resume: resume = self._resume_sanity_check() logging.info("start_download: %s", self.url) self.client = httpclient.grab_url( self.url, self.on_download_finished, self.on_download_error, header_callback=self.on_headers, write_file=self.filename, resume=resume) self.update_client() eventloop.add_timeout(self.CHECK_STATS_TIMEOUT, self.update_stats, 'update http downloader stats')
def test_quick_cancel(self): # Try canceling before find_http_auth returns and make sure things # work. self.setup_answer("user", "password") url = self.httpserver.build_url('protected/index.txt') self.grab_url_error = self.grab_url_info = None self.client = httpclient.grab_url(url, self.grab_url_callback, self.grab_url_errback) # at this point, our client should be finding the HTTP auth. It # shouldn't have started transfering data. self.assertEquals(len(httpclient.curl_manager.transfer_map), 0) self.client.cancel() self.expecting_errback = True eventloop.add_timeout(0.2, self.stopEventLoop, 'stopping event loop', args=(False,)) self.runEventLoop(timeout=self.event_loop_timeout) # make sure that the callback/errback weren't called and nothing is # transfering self.check_nothing_called() self.assertEquals(len(httpclient.curl_manager.transfer_map), 0)
def download_guide(self): self.client = httpclient.grab_url(self.get_url(), self.guide_downloaded, self.guide_error)
def grab_url(self, url, *args, **kwargs): self.grab_url_error = self.grab_url_info = None self.client = httpclient.grab_url(url, self.grab_url_callback, self.grab_url_errback, *args, **kwargs) self.runEventLoop(timeout=self.event_loop_timeout)
def fetch_cover_art(self): httpclient.grab_url(self.cover_art_url, self.cover_art_callback, self.cover_art_errback, write_file=self.grab_url_dest)