def test_metadata(self): self.assertTrue(os.path.exists("test.jpg")) info = { "sourceURL": "b", "imageURL": "d", "sourceName": "a", "sourceLocation": "c", "sourceType": "flickr", "author": "автор", "authorURL": "url", "keywords": ["дума1", "дума2"], "headline": "проба1 проба1", "description": "проба2 проба2", "sfwRating": 50, "extraData": { "key1": "val1", "key2": "val2" }, } self.assertEqual(info, Util.read_metadata("test.jpg")) self.assertTrue(Util.write_metadata("test.jpg", info)) self.assertEqual(info, Util.read_metadata("test.jpg")) self.assertTrue(os.path.exists("test.svg")) self.assertEqual(info, Util.read_metadata("test.svg")) Util.write_metadata("test.svg", info) self.assertEqual(info, Util.read_metadata("test.svg"))
def test_metadata(self): self.assertTrue(os.path.exists('test.jpg')) info = { 'sourceURL': u'b', 'imageURL': u'd', 'sourceName': u'a', 'sourceLocation': u'c', 'sourceType': u'flickr', 'author': u'автор', 'authorURL': u'url', 'keywords': [u'дума1', u'дума2'], 'headline': u'проба1 проба1', 'description': u'проба2 проба2', 'sfwRating': 50, 'extraData': { 'key1': 'val1', 'key2': 'val2' } } self.assertEqual(info, Util.read_metadata('test.jpg')) self.assertTrue(Util.write_metadata('test.jpg', info)) self.assertEqual(info, Util.read_metadata('test.jpg')) self.assertTrue(os.path.exists('test.svg')) self.assertEqual(info, Util.read_metadata('test.svg')) Util.write_metadata('test.svg', info) self.assertEqual(info, Util.read_metadata('test.svg'))
def on_image_set_as_wallpaper(self, img, meta): extraData = meta.get("extraData", None) if not extraData: return download_loc = extraData.get("unsplashDownloadLocation") reported = extraData.get("unsplashDownloadReported") if download_loc and not reported: url = "{}?client_id={}".format(download_loc, UnsplashDownloader.CLIENT_ID) Util.fetch(url) meta["extraData"]["unsplashDownloadReported"] = True Util.write_metadata(img, meta)
def setWallpaperHook(img, meta): extraData = meta.get('extraData', None) if not extraData: return download_loc = extraData.get('unsplashDownloadLocation') reported = extraData.get('unsplashDownloadReported') if download_loc and not reported: url = '{}?client_id={}'.format(download_loc, UnsplashDownloader.CLIENT_ID) Util.fetch(url) meta['extraData']['unsplashDownloadReported'] = True Util.write_metadata(img, meta)
def test_metadata(self): self.assertTrue(os.path.exists('test.jpg')) info = { 'sourceURL': u'b', 'imageURL': u'd', 'sourceName': u'a', 'sourceLocation': u'c', 'sourceType': u'flickr', 'author': u'автор', 'authorURL': u'url', 'keywords': [u'дума1', u'дума2'], 'headline': u'проба1', 'description': u'проба2', 'sfwRating': 50, } self.assertTrue(Util.write_metadata('test.jpg', info)) self.assertEqual(info, Util.read_metadata('test.jpg')) self.assertTrue(os.path.exists('test.svg')) Util.write_metadata('test.svg', info) self.assertEqual(info, Util.read_metadata('test.svg'))
def fetch( url, to_folder, origin_url=None, source_type=None, source_location=None, source_name=None, extra_metadata=None, progress_reporter=lambda a, b: None, verbose=True, ): reported = verbose try: logger.info(lambda: "Trying to fetch URL %s to %s " % (url, to_folder)) if verbose: progress_reporter(_("Fetching"), url) if url.startswith("javascript:"): if verbose: progress_reporter(_("Not an image"), url) return None if url.find("://") < 0: url = "file://" + url r = Util.request(url, stream=True) if not "content-type" in r.headers: logger.info(lambda: "Unknown content-type for url " + url) if verbose: progress_reporter(_("Not an image"), url) return None ct = r.headers["content-type"] if not ct.startswith("image/"): logger.info(lambda: "Unsupported content-type for url " + url + ": " + ct) if verbose: progress_reporter(_("Not an image"), url) return None local_name = Util.get_local_name(r.url) if "content-disposition" in r.headers: cd = r.headers["content-disposition"] cd_name = ImageFetcher.extract_filename_from_content_disposition( cd) if cd_name: local_name = cd_name filename = os.path.join(to_folder, local_name) if os.path.exists(filename): m = Util.read_metadata(filename) if m and m.get("imageURL") == url: logger.info( lambda: "Local file already exists (%s)" % filename) return filename else: logger.info( lambda: "File with same name already exists, but from different imageURL; renaming new download" ) filename = Util.find_unique_name(filename) logger.info(lambda: "Fetching to " + filename) if not reported: reported = True progress_reporter(_("Fetching"), url) local_filepath_partial = filename + ".partial" with open(local_filepath_partial, "wb") as f: Util.request_write_to(r, f) try: img = Image.open(local_filepath_partial) except Exception: progress_reporter(_("Not an image"), url) Util.safe_unlink(local_filepath_partial) return None if img.size[0] < 400 or img.size[1] < 400: # too small - delete and do not use progress_reporter(_("Image too small, ignoring it"), url) Util.safe_unlink(local_filepath_partial) return None metadata = { "sourceType": source_type or "fetched", "sourceName": source_name or "Fetched", "sourceURL": origin_url or url, "imageURL": url, } if source_location: metadata["sourceLocation"] = source_location metadata.update(extra_metadata or {}) Util.write_metadata(local_filepath_partial, metadata) os.rename(local_filepath_partial, filename) logger.info(lambda: "Fetched %s to %s." % (url, filename)) return filename except Exception as e: # pylint: disable=no-member logger.exception(lambda: "Fetch failed for URL " + url) if reported: if isinstance( e, HTTPError) and e.response.status_code in (403, 404): progress_reporter( _("Sorry, got %s error...") % str(e.response.status_code), _("This means the link is no longer valid"), ) else: progress_reporter( _("Fetch failed for some reason"), _("To get more information, please run Variety from terminal with -v option and retry the action" ), ) return None
def save_locally( self, origin_url, image_url, source_type=None, source_location=None, source_name=None, force_download=False, extra_metadata=None, local_filename=None, request_headers=None, request_kwargs=None, ): source_type = source_type or self.get_source_type() source_name = source_name or self.get_source_name() source_location = source_location or self.get_source_location( ) or self.get_description() if not force_download and self.is_in_banned(origin_url): logger.info( lambda: "URL " + origin_url + " is banned, skip downloading") return None try: os.makedirs(self.target_folder) except Exception: pass if origin_url.startswith("//"): origin_url = "https:" + origin_url if image_url.startswith("//"): image_url = origin_url.split("//")[0] + image_url # we will download the contents to a ".partial" file, then rename it to the proper name if not local_filename: local_filename = self.get_local_filename(url=image_url) local_filepath = self._local_filepath(local_filename=local_filename) local_filepath_partial = local_filepath + ".partial" logger.info(lambda: "Origin URL: " + origin_url) logger.info(lambda: "Image URL: " + image_url) logger.info(lambda: "Local path: " + local_filepath) if not force_download and os.path.exists(local_filepath): logger.info(lambda: "File already exists, skip downloading") return None is_unsafe, blacklisted = self.is_unsafe(extra_metadata or {}) if is_unsafe: logger.info( lambda: "Skipping non-safe download %s due to blacklisted keywords (%s). " "Is the source %s:%s suitable for Safe mode?" % (origin_url, str(blacklisted), source_type, source_location)) return None try: r = Util.request(image_url, stream=True, headers=request_headers, **(request_kwargs or {})) with open(local_filepath_partial, "wb") as f: Util.request_write_to(r, f) except Exception as e: logger.info( lambda: "Download failed from image URL: %s (source location: %s) " % (image_url, source_location)) Util.safe_unlink(local_filepath_partial) raise e if not Util.is_image(local_filepath_partial, check_contents=True): logger.info( lambda: "Downloaded data was not an image, image URL might be outdated" ) Util.safe_unlink(local_filepath_partial) return None metadata = { "sourceType": source_type, "sourceName": source_name, "sourceLocation": source_location, "sourceURL": origin_url, "imageURL": image_url, } metadata.update(extra_metadata or {}) Util.write_metadata(local_filepath_partial, metadata) # file rename is an atomic operation, so we should never end up with partial downloads os.rename(local_filepath_partial, local_filepath) logger.info(lambda: "Download complete") return local_filepath
class Downloader(object): def __init__(self, parent, source_type, name, location, is_refresher=False): self.parent = parent self.source_type = source_type self.name = name self.location = location self.is_refresher = is_refresher def update_download_folder(self): filename = self.convert_to_filename(self.location) l = len(self.parent.real_download_folder) if len(filename) + l > 160: filename = filename[:(150 - l)] + Util.md5(filename)[:10] self.target_folder = os.path.join(self.parent.real_download_folder, filename) def convert_to_filename(self, url): url = re.sub(r"http://", "", url) url = re.sub(r"https://", "", url) valid_chars = "_%s%s" % (string.ascii_letters, string.digits) return ''.join(c if c in valid_chars else '_' for c in url) def get_local_filename(self, url): return os.path.join(self.target_folder, Util.get_local_name(url)) def is_in_downloaded(self, url): return os.path.exists(self.get_local_filename(url)) def is_in_favorites(self, url): return self.parent and os.path.exists( os.path.join(self.parent.options.favorites_folder, Util.get_local_name(url))) def save_locally(self, origin_url, image_url, source_type=None, source_location=None, source_name=None, force_download=False, extra_metadata={}, local_filename=None): if not source_type: source_type = self.source_type if not source_name: source_name = self.name if not source_location: source_location = self.location if not force_download and self.parent and origin_url in self.parent.banned: logger.info( lambda: "URL " + origin_url + " is banned, skip downloading") return None try: os.makedirs(self.target_folder) except Exception: pass if origin_url.startswith('//'): origin_url = 'https:' + origin_url if image_url.startswith('//'): image_url = origin_url.split('//')[0] + image_url if not local_filename: local_filename = self.get_local_filename(image_url) logger.info(lambda: "Origin URL: " + origin_url) logger.info(lambda: "Image URL: " + image_url) logger.info(lambda: "Local name: " + local_filename) if not force_download and os.path.exists(local_filename): logger.info(lambda: "File already exists, skip downloading") return None if self.parent and self.parent.options.safe_mode: sfw_rating = Smart.get_sfw_rating(origin_url) if sfw_rating is not None and sfw_rating < 100: logger.info( lambda: "Skipping non-safe download %s. Is the source %s:%s " "suitable for Safe mode?" % (origin_url, source_type, self.location)) return None if self.parent and self.parent.options.safe_mode and 'keywords' in extra_metadata: blacklisted = set(k.lower() for k in extra_metadata['keywords'] ) & Smart.get_safe_mode_keyword_blacklist() if len(blacklisted) > 0: logger.info( lambda: "Skipping non-safe download %s due to blacklisted keywords (%s). " "Is the source %s:%s suitable for Safe mode?" % (origin_url, str(blacklisted), source_type, self.location)) return None try: r = Util.request(image_url, stream=True) with open(local_filename, 'wb') as f: Util.request_write_to(r, f) except Exception, e: logger.info( lambda: "Download failed from image URL: %s (source location: %s) " % (image_url, self.location)) raise e if not Util.is_image(local_filename, check_contents=True): logger.info( lambda: "Downloaded data was not an image, image URL might be outdated" ) os.unlink(local_filename) return None metadata = { "sourceType": source_type, "sourceName": source_name, "sourceLocation": source_location, "sourceURL": origin_url, "imageURL": image_url } metadata.update(extra_metadata) Util.write_metadata(local_filename, metadata) logger.info(lambda: "Download complete") return local_filename
def save_locally(self, origin_url, image_url, source_type=None, source_location=None, source_name=None, force_download=False, extra_metadata={}, local_filename=None): if not source_type: source_type = self.source_type if not source_name: source_name = self.name if not source_location: source_location = self.location if not force_download and self.parent and origin_url in self.parent.banned: logger.info( lambda: "URL " + origin_url + " is banned, skip downloading") return None try: os.makedirs(self.target_folder) except Exception: pass if origin_url.startswith('//'): origin_url = 'https:' + origin_url if image_url.startswith('//'): image_url = origin_url.split('//')[0] + image_url if not local_filename: local_filename = self.get_local_filename(image_url) logger.info(lambda: "Origin URL: " + origin_url) logger.info(lambda: "Image URL: " + image_url) logger.info(lambda: "Local name: " + local_filename) if not force_download and os.path.exists(local_filename): logger.info(lambda: "File already exists, skip downloading") return None if self.parent and self.parent.options.safe_mode and 'keywords' in extra_metadata: blacklisted = set( k.lower() for k in extra_metadata['keywords']) & SAFE_MODE_BLACKLIST if len(blacklisted) > 0: logger.info( lambda: "Skipping non-safe download %s due to blacklisted keywords (%s). " "Is the source %s:%s suitable for Safe mode?" % (origin_url, str(blacklisted), source_type, self.location)) return None try: r = Util.request(image_url, stream=True) with open(local_filename, 'wb') as f: Util.request_write_to(r, f) except Exception as e: logger.info( lambda: "Download failed from image URL: %s (source location: %s) " % (image_url, self.location)) raise e if not Util.is_image(local_filename, check_contents=True): logger.info( lambda: "Downloaded data was not an image, image URL might be outdated" ) os.unlink(local_filename) return None metadata = { "sourceType": source_type, "sourceName": source_name, "sourceLocation": source_location, "sourceURL": origin_url, "imageURL": image_url } metadata.update(extra_metadata) Util.write_metadata(local_filename, metadata) logger.info(lambda: "Download complete") return local_filename
def fetch(url, to_folder, origin_url=None, source_type=None, source_location=None, source_name=None, extra_metadata={}, progress_reporter=lambda a, b: None, verbose=True): reported = verbose try: logger.info(lambda: "Trying to fetch URL %s to %s " % (url, to_folder)) if verbose: progress_reporter(_("Fetching"), url) if url.startswith('javascript:'): if verbose: progress_reporter(_("Not an image"), url) return None if url.find('://') < 0: url = "file://" + url r = Util.request(url, stream=True) if not "content-type" in r.headers: logger.info(lambda: "Unknown content-type for url " + url) if verbose: progress_reporter(_("Not an image"), url) return None ct = r.headers["content-type"] if not ct.startswith("image/"): logger.info(lambda: "Unsupported content-type for url " + url + ": " + ct) if verbose: progress_reporter(_("Not an image"), url) return None local_name = Util.get_local_name(r.url) if "content-disposition" in r.headers: cd = r.headers["content-disposition"] cd_name = ImageFetcher.extract_filename_from_content_disposition(cd) if cd_name: local_name = cd_name filename = os.path.join(to_folder, local_name) if os.path.exists(filename): m = Util.read_metadata(filename) if m and m.get("imageURL") == url: logger.info(lambda: "Local file already exists (%s)" % filename) return filename else: logger.info(lambda: "File with same name already exists, but from different imageURL; renaming new download") filename = Util.find_unique_name(filename) local_name = os.path.basename(filename) logger.info(lambda: "Fetching to " + filename) if not reported: reported = True progress_reporter(_("Fetching"), url) with open(filename, 'wb') as f: Util.request_write_to(r, f) try: img = Image.open(filename) except Exception: progress_reporter(_("Not an image"), url) os.unlink(filename) return None if img.size[0] < 400 or img.size[1] < 400: # too small - delete and do not use progress_reporter(_("Image too small, ignoring it"), url) os.unlink(filename) return None metadata = {"sourceType": source_type or 'fetched', "sourceName": source_name or "Fetched", "sourceURL": origin_url or url, "imageURL": url} if source_location: metadata["sourceLocation"] = source_location metadata.update(extra_metadata) Util.write_metadata(filename, metadata) logger.info(lambda: "Fetched %s to %s." % (url, filename)) return filename except Exception, e: logger.exception(lambda: "Fetch failed for URL " + url) if reported: if isinstance(e, HTTPError) and e.response.status_code in (403, 404): progress_reporter( _("Sorry, got %s error...") % str(e.response.status_code), _("This means the link is no longer valid")) else: progress_reporter( _("Fetch failed for some reason"), _("To get more information, please run Variety from terminal with -v option and retry the action")) return None