Example #1
0
    def fill_queue(self):
        page = random.randint(1, 250)
        url = 'https://api.unsplash.com/photos/?page=%d&per_page=30&client_id=%s' % (page, UnsplashDownloader.CLIENT_ID)
        logger.info(lambda: "Filling Unsplash queue from " + url)

        r = Util.request(url)
        if int(r.headers.get('X-Ratelimit-Remaining', 1000000)) < 100:
            UnsplashDownloader.rate_limiting_started_time = time.time()

        for item in r.json():
            try:
                width = item['width']
                height = item['height']
                if self.parent and not self.parent.size_ok(width, height):
                    continue

                image_url = item['links']['download']
                origin_url = item['links']['html']

                filename = os.path.join(self.target_folder, Util.sanitize_filename(image_url.split('/')[-2] + '.jpg'))
                extra_metadata = {
                    'sourceType': 'unsplash',
                    'sfwRating': 100,
                    'author': item['user']['name'],
                    'authorURL': item['user']['links']['html'],
                    'keywords': [cat['title'].lower().strip() for cat in item['categories']]
                }

                self.queue.append((origin_url, image_url, extra_metadata, filename))
            except:
                logger.exception(lambda: "Could not process an item from Unsplash")
                raise

        random.shuffle(self.queue)
        logger.info(lambda: "Unsplash populated with %d URLs" % len(self.queue))
Example #2
0
    def fill_queue(self):
        if time.time() - UnsplashDownloader.rate_limiting_started_time < 3600:
            logger.info(
                lambda:
                "Unsplash queue empty, but rate limit reached, will try again later"
            )
            return []

        page = random.randint(1, 250)
        url = 'https://api.unsplash.com/photos/?page=%d&per_page=30&client_id=%s' % (
            page, UnsplashDownloader.CLIENT_ID)
        logger.info(lambda: "Filling Unsplash queue from " + url)

        r = Util.request(url)
        if int(r.headers.get('X-Ratelimit-Remaining', 1000000)) < 100:
            UnsplashDownloader.rate_limiting_started_time = time.time()

        queue = []
        for item in r.json():
            try:
                width = item['width']
                height = item['height']
                if self.is_size_inadequate(width, height):
                    continue

                image_url = item['urls']['full']
                origin_url = item['links'][
                    'html'] + UnsplashDownloader.UTM_PARAMS

                extra_metadata = {
                    'sourceType':
                    'unsplash',
                    'sfwRating':
                    100,
                    'author':
                    item['user']['name'],
                    'authorURL':
                    item['user']['links']['html'] +
                    UnsplashDownloader.UTM_PARAMS,
                    'keywords': [
                        cat['title'].lower().strip()
                        for cat in item['categories']
                    ],
                    'extraData': {
                        'unsplashDownloadLocation':
                        item['links']['download_location'],
                        'unsplashDownloadReported':
                        False,
                    }
                }

                queue.append((origin_url, image_url, extra_metadata))
            except:
                logger.exception(
                    lambda: "Could not process an item from Unsplash")
                raise

        random.shuffle(queue)
        return queue
    def fill_queue(self):
        if time.time() - UnsplashDownloader.rate_limiting_started_time < 3600:
            logger.info(
                lambda:
                "Unsplash queue empty, but rate limit reached, will try again later"
            )
            return []

        url = self.get_unsplash_api_url()
        logger.info(lambda: "Filling Unsplash queue from " + url)

        r = Util.request(url)
        if int(r.headers.get("X-Ratelimit-Remaining", 1000000)) < 1000:
            UnsplashDownloader.rate_limiting_started_time = time.time()

        queue = []
        for item in r.json():
            try:
                width = item["width"]
                height = item["height"]
                if self.is_size_inadequate(width, height):
                    continue

                image_url = item["urls"]["full"] + "&w={}".format(
                    max(1980, int(Util.get_primary_display_size()[0] * 1.2)))
                origin_url = item["links"][
                    "html"] + UnsplashDownloader.UTM_PARAMS

                extra_metadata = {
                    "sourceType":
                    "unsplash",
                    "sfwRating":
                    100,
                    "author":
                    item["user"]["name"],
                    "authorURL":
                    item["user"]["links"]["html"] +
                    UnsplashDownloader.UTM_PARAMS,
                    "keywords": [
                        cat["title"].lower().strip()
                        for cat in item["categories"]
                    ],
                    "extraData": {
                        "unsplashDownloadLocation":
                        item["links"]["download_location"],
                        "unsplashDownloadReported":
                        False,
                    },
                }

                queue.append((origin_url, image_url, extra_metadata))
            except:
                logger.exception(
                    lambda: "Could not process an item from Unsplash")
                raise

        random.shuffle(queue)
        return queue
Example #4
0
    def save_locally(self, origin_url, image_url,
                     source_type=None, source_location=None, source_name=None,
                     force_download=False, extra_metadata={}, local_filename=None):
        if not source_type:
            source_type = self.source_type
        if not source_name:
            source_name = self.name
        if not source_location:
            source_location = self.location

        if not force_download and self.parent and origin_url in self.parent.banned:
            logger.info(lambda: "URL " + origin_url + " is banned, skip downloading")
            return None

        try:
            os.makedirs(self.target_folder)
        except Exception:
            pass

        if origin_url.startswith('//'):
            origin_url = 'https:' + origin_url

        if image_url.startswith('//'):
            image_url = origin_url.split('//')[0] + image_url

        if not local_filename:
            local_filename = self.get_local_filename(image_url)
        logger.info(lambda: "Origin URL: " + origin_url)
        logger.info(lambda: "Image URL: " + image_url)
        logger.info(lambda: "Local name: " + local_filename)

        if not force_download and os.path.exists(local_filename):
            logger.info(lambda: "File already exists, skip downloading")
            return None

        if self.parent and self.parent.options.safe_mode:
            sfw_rating = Smart.get_sfw_rating(origin_url)
            if sfw_rating is not None and sfw_rating < 100:
                logger.info(lambda: "Skipping non-safe download %s. Is the source %s:%s "
                                    "suitable for Safe mode?" % (origin_url, source_type, self.location))
                return None

        if self.parent and self.parent.options.safe_mode and 'keywords' in extra_metadata:
            blacklisted = set(k.lower() for k in extra_metadata['keywords']) & Smart.get_safe_mode_keyword_blacklist()
            if len(blacklisted) > 0:
                logger.info(lambda: "Skipping non-safe download %s due to blacklisted keywords (%s). "
                                    "Is the source %s:%s suitable for Safe mode?" %
                                    (origin_url, str(blacklisted), source_type, self.location))
                return None

        try:
            r = Util.request(image_url, stream=True)
            with open(local_filename, 'wb') as f:
                Util.request_write_to(r, f)
        except Exception, e:
            logger.info(lambda: "Download failed from image URL: %s (source location: %s) " % (image_url, self.location))
            raise e
Example #5
0
    def fill_queue(self):
        if time.time() - UnsplashDownloader.rate_limiting_started_time < 3600:
            logger.info(
                lambda: "Unsplash queue empty, but rate limit reached, will try again later"
            )
            return []

        page = random.randint(1, 250)
        url = "https://api.unsplash.com/photos/?page=%d&per_page=30&client_id=%s" % (
            page,
            UnsplashDownloader.CLIENT_ID,
        )
        logger.info(lambda: "Filling Unsplash queue from " + url)

        r = Util.request(url)
        if int(r.headers.get("X-Ratelimit-Remaining", 1000000)) < 100:
            UnsplashDownloader.rate_limiting_started_time = time.time()

        queue = []
        for item in r.json():
            try:
                width = item["width"]
                height = item["height"]
                if self.is_size_inadequate(width, height):
                    continue

                image_url = item["urls"]["full"]
                origin_url = item["links"]["html"] + UnsplashDownloader.UTM_PARAMS

                extra_metadata = {
                    "sourceType": "unsplash",
                    "sfwRating": 100,
                    "author": item["user"]["name"],
                    "authorURL": item["user"]["links"]["html"] + UnsplashDownloader.UTM_PARAMS,
                    "keywords": [cat["title"].lower().strip() for cat in item["categories"]],
                    "extraData": {
                        "unsplashDownloadLocation": item["links"]["download_location"],
                        "unsplashDownloadReported": False,
                    },
                }

                queue.append((origin_url, image_url, extra_metadata))
            except:
                logger.exception(lambda: "Could not process an item from Unsplash")
                raise

        random.shuffle(queue)
        return queue
Example #6
0
    def fill_queue(self):
        page = random.randint(1, 250)
        url = 'https://api.unsplash.com/photos/?page=%d&per_page=30&client_id=%s' % (
            page, UnsplashDownloader.CLIENT_ID)
        logger.info(lambda: "Filling Unsplash queue from " + url)

        r = Util.request(url)
        if int(r.headers.get('X-Ratelimit-Remaining', 1000000)) < 100:
            UnsplashDownloader.rate_limiting_started_time = time.time()

        for item in r.json():
            try:
                width = item['width']
                height = item['height']
                if self.parent and not self.parent.size_ok(width, height):
                    continue

                image_url = item['links']['download']
                origin_url = item['links']['html']

                filename = os.path.join(
                    self.target_folder,
                    Util.sanitize_filename(image_url.split('/')[-2] + '.jpg'))
                extra_metadata = {
                    'sourceType':
                    'unsplash',
                    'sfwRating':
                    100,
                    'author':
                    item['user']['name'],
                    'authorURL':
                    item['user']['links']['html'],
                    'keywords': [
                        cat['title'].lower().strip()
                        for cat in item['categories']
                    ]
                }

                self.queue.append(
                    (origin_url, image_url, extra_metadata, filename))
            except:
                logger.exception(
                    lambda: "Could not process an item from Unsplash")
                raise

        random.shuffle(self.queue)
        logger.info(
            lambda: "Unsplash populated with %d URLs" % len(self.queue))
Example #7
0
    def fetch(
        url,
        to_folder,
        origin_url=None,
        source_type=None,
        source_location=None,
        source_name=None,
        extra_metadata=None,
        progress_reporter=lambda a, b: None,
        verbose=True,
    ):
        reported = verbose
        try:
            logger.info(lambda: "Trying to fetch URL %s to %s " %
                        (url, to_folder))
            if verbose:
                progress_reporter(_("Fetching"), url)

            if url.startswith("javascript:"):
                if verbose:
                    progress_reporter(_("Not an image"), url)
                return None

            if url.find("://") < 0:
                url = "file://" + url

            r = Util.request(url, stream=True)
            if not "content-type" in r.headers:
                logger.info(lambda: "Unknown content-type for url " + url)
                if verbose:
                    progress_reporter(_("Not an image"), url)
                return None

            ct = r.headers["content-type"]
            if not ct.startswith("image/"):
                logger.info(lambda: "Unsupported content-type for url " + url +
                            ": " + ct)
                if verbose:
                    progress_reporter(_("Not an image"), url)
                return None

            local_name = Util.get_local_name(r.url)
            if "content-disposition" in r.headers:
                cd = r.headers["content-disposition"]
                cd_name = ImageFetcher.extract_filename_from_content_disposition(
                    cd)
                if cd_name:
                    local_name = cd_name

            filename = os.path.join(to_folder, local_name)
            if os.path.exists(filename):
                m = Util.read_metadata(filename)
                if m and m.get("imageURL") == url:
                    logger.info(
                        lambda: "Local file already exists (%s)" % filename)
                    return filename
                else:
                    logger.info(
                        lambda:
                        "File with same name already exists, but from different imageURL; renaming new download"
                    )
                    filename = Util.find_unique_name(filename)

            logger.info(lambda: "Fetching to " + filename)
            if not reported:
                reported = True
                progress_reporter(_("Fetching"), url)

            local_filepath_partial = filename + ".partial"
            with open(local_filepath_partial, "wb") as f:
                Util.request_write_to(r, f)

            try:
                img = Image.open(local_filepath_partial)
            except Exception:
                progress_reporter(_("Not an image"), url)
                Util.safe_unlink(local_filepath_partial)
                return None

            if img.size[0] < 400 or img.size[1] < 400:
                # too small - delete and do not use
                progress_reporter(_("Image too small, ignoring it"), url)
                Util.safe_unlink(local_filepath_partial)
                return None

            metadata = {
                "sourceType": source_type or "fetched",
                "sourceName": source_name or "Fetched",
                "sourceURL": origin_url or url,
                "imageURL": url,
            }
            if source_location:
                metadata["sourceLocation"] = source_location
            metadata.update(extra_metadata or {})
            Util.write_metadata(local_filepath_partial, metadata)

            os.rename(local_filepath_partial, filename)
            logger.info(lambda: "Fetched %s to %s." % (url, filename))
            return filename

        except Exception as e:
            # pylint: disable=no-member
            logger.exception(lambda: "Fetch failed for URL " + url)
            if reported:
                if isinstance(
                        e, HTTPError) and e.response.status_code in (403, 404):
                    progress_reporter(
                        _("Sorry, got %s error...") %
                        str(e.response.status_code),
                        _("This means the link is no longer valid"),
                    )
                else:
                    progress_reporter(
                        _("Fetch failed for some reason"),
                        _("To get more information, please run Variety from terminal with -v option and retry the action"
                          ),
                    )
            return None
Example #8
0
    def save_locally(
        self,
        origin_url,
        image_url,
        source_type=None,
        source_location=None,
        source_name=None,
        force_download=False,
        extra_metadata=None,
        local_filename=None,
        request_headers=None,
        request_kwargs=None,
    ):
        source_type = source_type or self.get_source_type()
        source_name = source_name or self.get_source_name()
        source_location = source_location or self.get_source_location(
        ) or self.get_description()

        if not force_download and self.is_in_banned(origin_url):
            logger.info(
                lambda: "URL " + origin_url + " is banned, skip downloading")
            return None

        try:
            os.makedirs(self.target_folder)
        except Exception:
            pass

        if origin_url.startswith("//"):
            origin_url = "https:" + origin_url

        if image_url.startswith("//"):
            image_url = origin_url.split("//")[0] + image_url

        # we will download the contents to a ".partial" file, then rename it to the proper name
        if not local_filename:
            local_filename = self.get_local_filename(url=image_url)
        local_filepath = self._local_filepath(local_filename=local_filename)
        local_filepath_partial = local_filepath + ".partial"
        logger.info(lambda: "Origin URL: " + origin_url)
        logger.info(lambda: "Image URL: " + image_url)
        logger.info(lambda: "Local path: " + local_filepath)

        if not force_download and os.path.exists(local_filepath):
            logger.info(lambda: "File already exists, skip downloading")
            return None

        is_unsafe, blacklisted = self.is_unsafe(extra_metadata or {})
        if is_unsafe:
            logger.info(
                lambda:
                "Skipping non-safe download %s due to blacklisted keywords (%s). "
                "Is the source %s:%s suitable for Safe mode?" %
                (origin_url, str(blacklisted), source_type, source_location))
            return None

        try:
            r = Util.request(image_url,
                             stream=True,
                             headers=request_headers,
                             **(request_kwargs or {}))
            with open(local_filepath_partial, "wb") as f:
                Util.request_write_to(r, f)
        except Exception as e:
            logger.info(
                lambda:
                "Download failed from image URL: %s (source location: %s) " %
                (image_url, source_location))
            Util.safe_unlink(local_filepath_partial)
            raise e

        if not Util.is_image(local_filepath_partial, check_contents=True):
            logger.info(
                lambda:
                "Downloaded data was not an image, image URL might be outdated"
            )
            Util.safe_unlink(local_filepath_partial)
            return None

        metadata = {
            "sourceType": source_type,
            "sourceName": source_name,
            "sourceLocation": source_location,
            "sourceURL": origin_url,
            "imageURL": image_url,
        }
        metadata.update(extra_metadata or {})
        Util.write_metadata(local_filepath_partial, metadata)

        # file rename is an atomic operation, so we should never end up with partial downloads
        os.rename(local_filepath_partial, local_filepath)
        logger.info(lambda: "Download complete")
        return local_filepath
Example #9
0
    def save_locally(self,
                     origin_url,
                     image_url,
                     source_type=None,
                     source_location=None,
                     source_name=None,
                     force_download=False,
                     extra_metadata={},
                     local_filename=None):
        if not source_type:
            source_type = self.source_type
        if not source_name:
            source_name = self.name
        if not source_location:
            source_location = self.location

        if not force_download and self.parent and origin_url in self.parent.banned:
            logger.info(
                lambda: "URL " + origin_url + " is banned, skip downloading")
            return None

        try:
            os.makedirs(self.target_folder)
        except Exception:
            pass

        if origin_url.startswith('//'):
            origin_url = 'https:' + origin_url

        if image_url.startswith('//'):
            image_url = origin_url.split('//')[0] + image_url

        if not local_filename:
            local_filename = self.get_local_filename(image_url)
        logger.info(lambda: "Origin URL: " + origin_url)
        logger.info(lambda: "Image URL: " + image_url)
        logger.info(lambda: "Local name: " + local_filename)

        if not force_download and os.path.exists(local_filename):
            logger.info(lambda: "File already exists, skip downloading")
            return None

        if self.parent and self.parent.options.safe_mode:
            sfw_rating = Smart.get_sfw_rating(origin_url)
            if sfw_rating is not None and sfw_rating < 100:
                logger.info(
                    lambda:
                    "Skipping non-safe download %s. Is the source %s:%s "
                    "suitable for Safe mode?" %
                    (origin_url, source_type, self.location))
                return None

        if self.parent and self.parent.options.safe_mode and 'keywords' in extra_metadata:
            blacklisted = set(k.lower() for k in extra_metadata['keywords']
                              ) & Smart.get_safe_mode_keyword_blacklist()
            if len(blacklisted) > 0:
                logger.info(
                    lambda:
                    "Skipping non-safe download %s due to blacklisted keywords (%s). "
                    "Is the source %s:%s suitable for Safe mode?" %
                    (origin_url, str(blacklisted), source_type, self.location))
                return None

        try:
            r = Util.request(image_url, stream=True)
            with open(local_filename, 'wb') as f:
                Util.request_write_to(r, f)
        except Exception, e:
            logger.info(
                lambda:
                "Download failed from image URL: %s (source location: %s) " %
                (image_url, self.location))
            raise e
Example #10
0
    def save_locally(self,
                     origin_url,
                     image_url,
                     source_type=None,
                     source_location=None,
                     source_name=None,
                     force_download=False,
                     extra_metadata={},
                     local_filename=None):
        if not source_type:
            source_type = self.source_type
        if not source_name:
            source_name = self.name
        if not source_location:
            source_location = self.location

        if not force_download and self.parent and origin_url in self.parent.banned:
            logger.info(
                lambda: "URL " + origin_url + " is banned, skip downloading")
            return None

        try:
            os.makedirs(self.target_folder)
        except Exception:
            pass

        if origin_url.startswith('//'):
            origin_url = 'https:' + origin_url

        if image_url.startswith('//'):
            image_url = origin_url.split('//')[0] + image_url

        if not local_filename:
            local_filename = self.get_local_filename(image_url)
        logger.info(lambda: "Origin URL: " + origin_url)
        logger.info(lambda: "Image URL: " + image_url)
        logger.info(lambda: "Local name: " + local_filename)

        if not force_download and os.path.exists(local_filename):
            logger.info(lambda: "File already exists, skip downloading")
            return None

        if self.parent and self.parent.options.safe_mode and 'keywords' in extra_metadata:
            blacklisted = set(
                k.lower()
                for k in extra_metadata['keywords']) & SAFE_MODE_BLACKLIST
            if len(blacklisted) > 0:
                logger.info(
                    lambda:
                    "Skipping non-safe download %s due to blacklisted keywords (%s). "
                    "Is the source %s:%s suitable for Safe mode?" %
                    (origin_url, str(blacklisted), source_type, self.location))
                return None

        try:
            r = Util.request(image_url, stream=True)
            with open(local_filename, 'wb') as f:
                Util.request_write_to(r, f)
        except Exception as e:
            logger.info(
                lambda:
                "Download failed from image URL: %s (source location: %s) " %
                (image_url, self.location))
            raise e

        if not Util.is_image(local_filename, check_contents=True):
            logger.info(
                lambda:
                "Downloaded data was not an image, image URL might be outdated"
            )
            os.unlink(local_filename)
            return None

        metadata = {
            "sourceType": source_type,
            "sourceName": source_name,
            "sourceLocation": source_location,
            "sourceURL": origin_url,
            "imageURL": image_url
        }
        metadata.update(extra_metadata)
        Util.write_metadata(local_filename, metadata)

        logger.info(lambda: "Download complete")
        return local_filename
Example #11
0
    def fetch(url, to_folder, origin_url=None,
              source_type=None, source_location=None, source_name=None,
              extra_metadata={},
              progress_reporter=lambda a, b: None, verbose=True):
        reported = verbose
        try:
            logger.info(lambda: "Trying to fetch URL %s to %s " % (url, to_folder))
            if verbose:
                progress_reporter(_("Fetching"), url)

            if url.startswith('javascript:'):
                if verbose:
                    progress_reporter(_("Not an image"), url)
                return None

            if url.find('://') < 0:
                url = "file://" + url

            r = Util.request(url, stream=True)
            if not "content-type" in r.headers:
                logger.info(lambda: "Unknown content-type for url " + url)
                if verbose:
                    progress_reporter(_("Not an image"), url)
                return None

            ct = r.headers["content-type"]
            if not ct.startswith("image/"):
                logger.info(lambda: "Unsupported content-type for url " + url + ": " + ct)
                if verbose:
                    progress_reporter(_("Not an image"), url)
                return None

            local_name = Util.get_local_name(r.url)
            if "content-disposition" in r.headers:
                cd = r.headers["content-disposition"]
                cd_name = ImageFetcher.extract_filename_from_content_disposition(cd)
                if cd_name:
                    local_name = cd_name

            filename = os.path.join(to_folder, local_name)
            if os.path.exists(filename):
                m = Util.read_metadata(filename)
                if m and m.get("imageURL") == url:
                    logger.info(lambda: "Local file already exists (%s)" % filename)
                    return filename
                else:
                    logger.info(lambda:
                        "File with same name already exists, but from different imageURL; renaming new download")
                    filename = Util.find_unique_name(filename)
                    local_name = os.path.basename(filename)

            logger.info(lambda: "Fetching to " + filename)
            if not reported:
                reported = True
                progress_reporter(_("Fetching"), url)

            with open(filename, 'wb') as f:
                Util.request_write_to(r, f)

            try:
                img = Image.open(filename)
            except Exception:
                progress_reporter(_("Not an image"), url)
                os.unlink(filename)
                return None

            if img.size[0] < 400 or img.size[1] < 400:
                # too small - delete and do not use
                progress_reporter(_("Image too small, ignoring it"), url)
                os.unlink(filename)
                return None

            metadata = {"sourceType": source_type or 'fetched',
                        "sourceName": source_name or "Fetched",
                        "sourceURL": origin_url or url,
                        "imageURL": url}
            if source_location:
                metadata["sourceLocation"] = source_location
            metadata.update(extra_metadata)
            Util.write_metadata(filename, metadata)

            logger.info(lambda: "Fetched %s to %s." % (url, filename))

            return filename

        except Exception, e:
            logger.exception(lambda: "Fetch failed for URL " + url)
            if reported:
                if isinstance(e, HTTPError) and e.response.status_code in (403, 404):
                    progress_reporter(
                        _("Sorry, got %s error...") % str(e.response.status_code),
                        _("This means the link is no longer valid"))
                else:
                    progress_reporter(
                        _("Fetch failed for some reason"),
                        _("To get more information, please run Variety from terminal with -v option and retry the action"))
            return None