def fetch( url, to_folder, origin_url=None, source_type=None, source_location=None, source_name=None, extra_metadata=None, progress_reporter=lambda a, b: None, verbose=True, ): reported = verbose try: logger.info(lambda: "Trying to fetch URL %s to %s " % (url, to_folder)) if verbose: progress_reporter(_("Fetching"), url) if url.startswith("javascript:"): if verbose: progress_reporter(_("Not an image"), url) return None if url.find("://") < 0: url = "file://" + url r = Util.request(url, stream=True) if not "content-type" in r.headers: logger.info(lambda: "Unknown content-type for url " + url) if verbose: progress_reporter(_("Not an image"), url) return None ct = r.headers["content-type"] if not ct.startswith("image/"): logger.info(lambda: "Unsupported content-type for url " + url + ": " + ct) if verbose: progress_reporter(_("Not an image"), url) return None local_name = Util.get_local_name(r.url) if "content-disposition" in r.headers: cd = r.headers["content-disposition"] cd_name = ImageFetcher.extract_filename_from_content_disposition( cd) if cd_name: local_name = cd_name filename = os.path.join(to_folder, local_name) if os.path.exists(filename): m = Util.read_metadata(filename) if m and m.get("imageURL") == url: logger.info( lambda: "Local file already exists (%s)" % filename) return filename else: logger.info( lambda: "File with same name already exists, but from different imageURL; renaming new download" ) filename = Util.find_unique_name(filename) logger.info(lambda: "Fetching to " + filename) if not reported: reported = True progress_reporter(_("Fetching"), url) local_filepath_partial = filename + ".partial" with open(local_filepath_partial, "wb") as f: Util.request_write_to(r, f) try: img = Image.open(local_filepath_partial) except Exception: progress_reporter(_("Not an image"), url) Util.safe_unlink(local_filepath_partial) return None if img.size[0] < 400 or img.size[1] < 400: # too small - delete and do not use progress_reporter(_("Image too small, ignoring it"), url) Util.safe_unlink(local_filepath_partial) return None metadata = { "sourceType": source_type or "fetched", "sourceName": source_name or "Fetched", "sourceURL": origin_url or url, "imageURL": url, } if source_location: metadata["sourceLocation"] = source_location metadata.update(extra_metadata or {}) Util.write_metadata(local_filepath_partial, metadata) os.rename(local_filepath_partial, filename) logger.info(lambda: "Fetched %s to %s." % (url, filename)) return filename except Exception as e: # pylint: disable=no-member logger.exception(lambda: "Fetch failed for URL " + url) if reported: if isinstance( e, HTTPError) and e.response.status_code in (403, 404): progress_reporter( _("Sorry, got %s error...") % str(e.response.status_code), _("This means the link is no longer valid"), ) else: progress_reporter( _("Fetch failed for some reason"), _("To get more information, please run Variety from terminal with -v option and retry the action" ), ) return None
def save_locally( self, origin_url, image_url, source_type=None, source_location=None, source_name=None, force_download=False, extra_metadata=None, local_filename=None, request_headers=None, request_kwargs=None, ): source_type = source_type or self.get_source_type() source_name = source_name or self.get_source_name() source_location = source_location or self.get_source_location( ) or self.get_description() if not force_download and self.is_in_banned(origin_url): logger.info( lambda: "URL " + origin_url + " is banned, skip downloading") return None try: os.makedirs(self.target_folder) except Exception: pass if origin_url.startswith("//"): origin_url = "https:" + origin_url if image_url.startswith("//"): image_url = origin_url.split("//")[0] + image_url # we will download the contents to a ".partial" file, then rename it to the proper name if not local_filename: local_filename = self.get_local_filename(url=image_url) local_filepath = self._local_filepath(local_filename=local_filename) local_filepath_partial = local_filepath + ".partial" logger.info(lambda: "Origin URL: " + origin_url) logger.info(lambda: "Image URL: " + image_url) logger.info(lambda: "Local path: " + local_filepath) if not force_download and os.path.exists(local_filepath): logger.info(lambda: "File already exists, skip downloading") return None is_unsafe, blacklisted = self.is_unsafe(extra_metadata or {}) if is_unsafe: logger.info( lambda: "Skipping non-safe download %s due to blacklisted keywords (%s). " "Is the source %s:%s suitable for Safe mode?" % (origin_url, str(blacklisted), source_type, source_location)) return None try: r = Util.request(image_url, stream=True, headers=request_headers, **(request_kwargs or {})) with open(local_filepath_partial, "wb") as f: Util.request_write_to(r, f) except Exception as e: logger.info( lambda: "Download failed from image URL: %s (source location: %s) " % (image_url, source_location)) Util.safe_unlink(local_filepath_partial) raise e if not Util.is_image(local_filepath_partial, check_contents=True): logger.info( lambda: "Downloaded data was not an image, image URL might be outdated" ) Util.safe_unlink(local_filepath_partial) return None metadata = { "sourceType": source_type, "sourceName": source_name, "sourceLocation": source_location, "sourceURL": origin_url, "imageURL": image_url, } metadata.update(extra_metadata or {}) Util.write_metadata(local_filepath_partial, metadata) # file rename is an atomic operation, so we should never end up with partial downloads os.rename(local_filepath_partial, local_filepath) logger.info(lambda: "Download complete") return local_filepath