def is_valid_sequence_url(url, verbose=False): m = re.search("\[(.+?)-(.+?)\]", url) if not m: if verbose: log.warning(f"no sequence was found in {url}") return False # else return True
def delete_physically(self, death_list): """ Delete the images in the list from the file system. Return value: number of images that were removed successfully. """ total = len(death_list) cnt = 0 msg = "deleting" for idx, img in enumerate(death_list, start=1): percent = round(idx * 100 / total) log.info(f"{msg} {percent}%") self.message_label.setText(msg) self.progressbar.show() self.progressbar.setValue(percent) QApplication.processEvents() p = Path(img.get_absolute_path_or_url()) # log.debug(f"removing {str(p)}") p.unlink() if not p.exists(): cnt += 1 else: log.warning(f"couldn't remove {str(p)}") # sleep(0.2) # make the progressbar visible self.message_label.setText("") self.progressbar.hide() # return cnt
def remove_elem(self) -> None: first = self.q.popleft() first.p.unlink() if not first.p.exists(): self.size -= first.size # log.debug(f"cache news: {first.name} was deleted") else: log.warning(f"cache news: couldn't remove {first.name}")
def connection(): global client try: client = ImgurClient(cfg.IMGUR_CLIENT_ID, cfg.IMGUR_CLIENT_SECRET) except Exception as e: log.warning("missing or wrong imgur API keys") log.warning(f"imgur exception: {str(e)}") client = None
def read(self): categories = cfg.categories_file() try: with open(categories) as f: log.info(f"{categories} was read") return yaml.safe_load(f) except Exception as e: log.warning("couldn't read {0}".format(categories)) log.warning(e) return {}
def shrink(self) -> None: # if we are below the threshold => OK, nothing to do if self.size <= self.max_size_bytes: return # else, if the cache folder's size is over the limit while True: if self.size <= self.max_size_bytes: break if len(self.q) == 1: log.warning("the cache folder grew too big but it has just one element") log.warning("Tip: increase the cache size, the current value is too small.") break self.remove_elem()
def extract_images_from_a_specific_post(url): if cfg.TUMBLR_API_KEY is None: log.warning(f"no tumblr API key found, cannot process {url}") return [] # res = extract_parts_from(url) # print(res) urls = [] if res: blog_name, post_id = res # print(parsed) api_call = f"https://api.tumblr.com/v2/blog/{blog_name}.tumblr.com/posts/photo?id={post_id}&api_key={cfg.TUMBLR_API_KEY}" # print("#", api_call) # print() try: d = requests.get(api_call, timeout=cfg.REQUESTS_TIMEOUT).json() except: log.error(f"problem with the tumblr post {url}") return [] # else if "errors" not in d: # pprint(d) posts = d["response"]["posts"] for post in posts: photos = post["photos"] for photo in photos: img_url = photo["original_size"]["url"] if Path(img_url).suffix.lower() in cfg.SUPPORTED_FORMATS: urls.append(img_url) # # else: log.warning("Unauthorized tumblr access. Is your API key valid?") # return urls
def extract_images_from_an_album(url): if client is None: connection() # if it's still None if client is None: log.warning(f"problem with your imgur API keys, cannot process {url}") return [] # res = [] album_id = get_album_id(url) if album_id: images = [] try: images = client.get_album_images(album_id) except (imgurpython.helpers.error.ImgurClientError, TypeError): log.warning(f"problem with album {url}") except imgurpython.helpers.error.ImgurClientRateLimitError: log.warning("Imgur API: rate-limit exceeded", file=sys.stderr) res = [img.link for img in images] # return res
def read_subreddit(subreddit, after_id=None, statusbar=None, mainWindow=None): try: if mainWindow: mainWindow.loading_line.show() if not after_id: img_url = url_template.format(subreddit=subreddit) else: img_url = url_template_with_after_id.format(subreddit=subreddit, after_id=after_id) r = requests.get(img_url, headers=cfg.headers, timeout=cfg.REQUESTS_TIMEOUT) d = r.json() res = [] total = len(d["data"]["children"]) for idx, child in enumerate(d["data"]["children"], start=1): percent = round(idx * 100 / total) log.info(f"{percent}%") if statusbar: statusbar.progressbar.show() statusbar.progressbar.setValue(percent) # statusbar.flash_message(blue(f"{percent} %")) # without this nothing appeared until 100%: QApplication.processEvents( ) # reason: https://stackoverflow.com/a/29917237/232485 entry = child["data"] domain = entry["domain"] link = entry["url"] after_id = entry[ "name"] # use this for a new page that comes after this entry extra = { 'subreddit': subreddit, 'after_id': after_id, 'next_page_url': f'https://www.reddit.com/r/{subreddit}/.json?after={after_id}' } if Path(link).suffix.lower() in cfg.SUPPORTED_FORMATS: res.append(ImageWithExtraInfo(link, extra)) continue # if domain.endswith(".tumblr.com"): # print("# tumblr found:", link) try: images = tumblr.extract_images_from_a_specific_post(link) except: log.warning(f"cannot extract images from {link}") images = [] # print("# extracted images:", len(images)) for img_url in images: if Path(img_url).suffix.lower() in cfg.SUPPORTED_FORMATS: res.append(ImageWithExtraInfo(img_url, extra)) # # continue # end tumblr section if domain.endswith("imgur.com"): if imgur.is_album(link): try: images = imgur.extract_images_from_an_album(link) except: log.warning(f"cannot extract images from {link}") images = [] for img_url in images: if Path(img_url).suffix.lower( ) in cfg.SUPPORTED_FORMATS: res.append(ImageWithExtraInfo(img_url, extra)) # # else: # it's on imgur.com but it's not an album # it may be a single image embedded in an HTML page try: img_url = link + ".jpg" # it works sometimes r = requests.head(img_url, headers=cfg.headers, timeout=cfg.REQUESTS_TIMEOUT) if r.ok: res.append(ImageWithExtraInfo(img_url, extra)) except: log.warning(f"problem with {link} -> {url}") # end imgur section # return res except KeyError: log.warning(f"cannot extract data from {img_url}") return [] except Exception as e: log.warning(f"exception: {str(e)}") log.warning(f"problem with {img_url}") return [] finally: if statusbar: statusbar.progressbar.hide() if mainWindow: mainWindow.loading_line.hide()