def get_images(self, post) -> list: is_reddit_post = True # If we are using the direct Imgur download, and a URL is passed, let's try to use this as the URL if isinstance(post, str): url = post is_reddit_post = False else: url = post.url # First check if it is a direct URL so that we avoid querying the API if ImgurAPIParser.is_imgur_direct_url(url): return [ImgurAPIParser.get_image_from_direct_url(post)] urls = ImgurAPI.get_image_urls(url) image_entities = [] for u in urls: if is_reddit_post: image_entities.append( RedditPostImage(u, post, ImgurAPIParser.get_file_name_from_url(u))) else: image_entities.append( ImgurImage(u, ImgurAPIParser.get_file_name_from_url(u))) return image_entities
def get_images(self, post): images = [] image_file = post.url[post.url.rfind('/') + 1:] images.append(RedditPostImage(post.url, post, image_file)) return images
def get_images(self, post): images = [] image_url = post.url image_file = tidy_up_url(image_url) + ".jpg" image_file = image_file[image_file.rfind('/') + 1:] images.append(RedditPostImage(image_url, post, image_file)) return images
def get_image_from_direct_url(post) -> RedditPostImage: image_url = tidy_up_url(post.url) if image_url.endswith("gifv"): image_url = image_url.replace("gifv", "mp4") image_file = ImgurBaseParser.get_file_name_from_url(image_url) image = RedditPostImage(image_url, post, image_file) return image
def get_images(self, post): images = [] html_source = requests.get(post.url).text soup = BeautifulSoup(html_source, "lxml") match = soup.select("meta['property=og:image']") if not match: raise NotAbleToDownloadException("Wasn't able to download %s" % post.url) image_url = match[0]["content"] image_url = tidy_up_url(image_url) image_file = image_url[image_url.rfind('/') + 1:] images.append(RedditPostImage(image_url, post, image_file)) return images
def get_images(self, post): images = [] # Gfycat links may or may not have a gif, gifv or webm extension, if they don't we can append web # (WebM for the win!) image_url = tidy_up_url(post.url) # A file extension can be 3-4 characters long, let's splice the string and look for the '.' if image_url[-5:].rfind('.') < 0: image_url = image_url + ".webm" # We need to change the URL to giant.gfycat in order to be able to download movies gfycat_index = image_url.find("gfycat") image_url = image_url[:gfycat_index] + "giant." + image_url[ gfycat_index:] image_file = image_url[image_url.rfind('/') + 1:] images.append(RedditPostImage(image_url, post, image_file)) return images
def get_images(self, post) -> list: images = [] if "/a/" in post.url or "/gallery/" in post.url: # This is an album submission html_source = requests.get(post.url).text soup = BeautifulSoup(html_source, "lxml") matches = soup.select('img.post-image-placeholder') if not matches: matches = soup.select("img[itemprop]") if not matches: raise NotAbleToDownloadException("Couldn't process %s" % post.url) for m in matches: image_url = tidy_up_url(m['src']) image_file = ImgurBaseParser.get_file_name_from_url(image_url) images.append(RedditPostImage(image_url, post, image_file)) elif ImgurHTMLParser.is_imgur_direct_url(post.url): # This is a direct url images.append(ImgurHTMLParser.get_image_from_direct_url(post)) elif "imgur.com/" in post.url: # This is an Imgur page with a single image html_source = requests.get(post.url).text soup = BeautifulSoup(html_source, "lxml") # Let's try first to find those tags that will hold the image inside of an href property match = soup.select("a.zoom") if not match: match = soup.select("link[rel=image_src]") if match: image_url = match[0]["href"] else: # If no match was found inside of an href property, let's try now with the tags that contain it inside # of a src property match = soup.select("img.post-image-placeholder") if not match: match = soup.select("img[itemprop]") if not match: match = soup.select("img") if not match: raise NotAbleToDownloadException("Couldn't process %s" % post.url) image_url = match[0]['src'] image_url = tidy_up_url(image_url) image_file = ImgurHTMLParser.get_file_name_from_url(image_url) images.append(RedditPostImage(image_url, post, image_file)) else: raise NotAbleToDownloadException("Couldn't process %s" % post.url) return images