Ejemplo n.º 1
0
    def fetch(
        url,
        to_folder,
        origin_url=None,
        source_type=None,
        source_location=None,
        source_name=None,
        extra_metadata=None,
        progress_reporter=lambda a, b: None,
        verbose=True,
    ):
        reported = verbose
        try:
            logger.info(lambda: "Trying to fetch URL %s to %s " %
                        (url, to_folder))
            if verbose:
                progress_reporter(_("Fetching"), url)

            if url.startswith("javascript:"):
                if verbose:
                    progress_reporter(_("Not an image"), url)
                return None

            if url.find("://") < 0:
                url = "file://" + url

            r = Util.request(url, stream=True)
            if not "content-type" in r.headers:
                logger.info(lambda: "Unknown content-type for url " + url)
                if verbose:
                    progress_reporter(_("Not an image"), url)
                return None

            ct = r.headers["content-type"]
            if not ct.startswith("image/"):
                logger.info(lambda: "Unsupported content-type for url " + url +
                            ": " + ct)
                if verbose:
                    progress_reporter(_("Not an image"), url)
                return None

            local_name = Util.get_local_name(r.url)
            if "content-disposition" in r.headers:
                cd = r.headers["content-disposition"]
                cd_name = ImageFetcher.extract_filename_from_content_disposition(
                    cd)
                if cd_name:
                    local_name = cd_name

            filename = os.path.join(to_folder, local_name)
            if os.path.exists(filename):
                m = Util.read_metadata(filename)
                if m and m.get("imageURL") == url:
                    logger.info(
                        lambda: "Local file already exists (%s)" % filename)
                    return filename
                else:
                    logger.info(
                        lambda:
                        "File with same name already exists, but from different imageURL; renaming new download"
                    )
                    filename = Util.find_unique_name(filename)

            logger.info(lambda: "Fetching to " + filename)
            if not reported:
                reported = True
                progress_reporter(_("Fetching"), url)

            local_filepath_partial = filename + ".partial"
            with open(local_filepath_partial, "wb") as f:
                Util.request_write_to(r, f)

            try:
                img = Image.open(local_filepath_partial)
            except Exception:
                progress_reporter(_("Not an image"), url)
                Util.safe_unlink(local_filepath_partial)
                return None

            if img.size[0] < 400 or img.size[1] < 400:
                # too small - delete and do not use
                progress_reporter(_("Image too small, ignoring it"), url)
                Util.safe_unlink(local_filepath_partial)
                return None

            metadata = {
                "sourceType": source_type or "fetched",
                "sourceName": source_name or "Fetched",
                "sourceURL": origin_url or url,
                "imageURL": url,
            }
            if source_location:
                metadata["sourceLocation"] = source_location
            metadata.update(extra_metadata or {})
            Util.write_metadata(local_filepath_partial, metadata)

            os.rename(local_filepath_partial, filename)
            logger.info(lambda: "Fetched %s to %s." % (url, filename))
            return filename

        except Exception as e:
            # pylint: disable=no-member
            logger.exception(lambda: "Fetch failed for URL " + url)
            if reported:
                if isinstance(
                        e, HTTPError) and e.response.status_code in (403, 404):
                    progress_reporter(
                        _("Sorry, got %s error...") %
                        str(e.response.status_code),
                        _("This means the link is no longer valid"),
                    )
                else:
                    progress_reporter(
                        _("Fetch failed for some reason"),
                        _("To get more information, please run Variety from terminal with -v option and retry the action"
                          ),
                    )
            return None
Ejemplo n.º 2
0
    def save_locally(
        self,
        origin_url,
        image_url,
        source_type=None,
        source_location=None,
        source_name=None,
        force_download=False,
        extra_metadata=None,
        local_filename=None,
        request_headers=None,
        request_kwargs=None,
    ):
        source_type = source_type or self.get_source_type()
        source_name = source_name or self.get_source_name()
        source_location = source_location or self.get_source_location(
        ) or self.get_description()

        if not force_download and self.is_in_banned(origin_url):
            logger.info(
                lambda: "URL " + origin_url + " is banned, skip downloading")
            return None

        try:
            os.makedirs(self.target_folder)
        except Exception:
            pass

        if origin_url.startswith("//"):
            origin_url = "https:" + origin_url

        if image_url.startswith("//"):
            image_url = origin_url.split("//")[0] + image_url

        # we will download the contents to a ".partial" file, then rename it to the proper name
        if not local_filename:
            local_filename = self.get_local_filename(url=image_url)
        local_filepath = self._local_filepath(local_filename=local_filename)
        local_filepath_partial = local_filepath + ".partial"
        logger.info(lambda: "Origin URL: " + origin_url)
        logger.info(lambda: "Image URL: " + image_url)
        logger.info(lambda: "Local path: " + local_filepath)

        if not force_download and os.path.exists(local_filepath):
            logger.info(lambda: "File already exists, skip downloading")
            return None

        is_unsafe, blacklisted = self.is_unsafe(extra_metadata or {})
        if is_unsafe:
            logger.info(
                lambda:
                "Skipping non-safe download %s due to blacklisted keywords (%s). "
                "Is the source %s:%s suitable for Safe mode?" %
                (origin_url, str(blacklisted), source_type, source_location))
            return None

        try:
            r = Util.request(image_url,
                             stream=True,
                             headers=request_headers,
                             **(request_kwargs or {}))
            with open(local_filepath_partial, "wb") as f:
                Util.request_write_to(r, f)
        except Exception as e:
            logger.info(
                lambda:
                "Download failed from image URL: %s (source location: %s) " %
                (image_url, source_location))
            Util.safe_unlink(local_filepath_partial)
            raise e

        if not Util.is_image(local_filepath_partial, check_contents=True):
            logger.info(
                lambda:
                "Downloaded data was not an image, image URL might be outdated"
            )
            Util.safe_unlink(local_filepath_partial)
            return None

        metadata = {
            "sourceType": source_type,
            "sourceName": source_name,
            "sourceLocation": source_location,
            "sourceURL": origin_url,
            "imageURL": image_url,
        }
        metadata.update(extra_metadata or {})
        Util.write_metadata(local_filepath_partial, metadata)

        # file rename is an atomic operation, so we should never end up with partial downloads
        os.rename(local_filepath_partial, local_filepath)
        logger.info(lambda: "Download complete")
        return local_filepath