Esempio n. 1
0
    def get_images(self, post) -> list:

        is_reddit_post = True

        # If we are using the direct Imgur download, and a URL is passed, let's try to use this as the URL
        if isinstance(post, str):
            url = post
            is_reddit_post = False
        else:
            url = post.url

        # First check if it is a direct URL so that we avoid querying the API
        if ImgurAPIParser.is_imgur_direct_url(url):
            return [ImgurAPIParser.get_image_from_direct_url(post)]

        urls = ImgurAPI.get_image_urls(url)
        image_entities = []

        for u in urls:
            if is_reddit_post:
                image_entities.append(
                    RedditPostImage(u, post,
                                    ImgurAPIParser.get_file_name_from_url(u)))
            else:
                image_entities.append(
                    ImgurImage(u, ImgurAPIParser.get_file_name_from_url(u)))

        return image_entities
Esempio n. 2
0
    def get_images(self, post):
        images = []

        image_file = post.url[post.url.rfind('/') + 1:]

        images.append(RedditPostImage(post.url, post, image_file))

        return images
    def get_images(self, post):
        images = []

        image_url = post.url
        image_file = tidy_up_url(image_url) + ".jpg"
        image_file = image_file[image_file.rfind('/') + 1:]

        images.append(RedditPostImage(image_url, post, image_file))

        return images
Esempio n. 4
0
    def get_image_from_direct_url(post) -> RedditPostImage:
        image_url = tidy_up_url(post.url)

        if image_url.endswith("gifv"):
            image_url = image_url.replace("gifv", "mp4")

        image_file = ImgurBaseParser.get_file_name_from_url(image_url)

        image = RedditPostImage(image_url, post, image_file)

        return image
Esempio n. 5
0
    def get_images(self, post):
        images = []

        html_source = requests.get(post.url).text
        soup = BeautifulSoup(html_source, "lxml")
        match = soup.select("meta['property=og:image']")

        if not match:
            raise NotAbleToDownloadException("Wasn't able to download %s" % post.url)

        image_url = match[0]["content"]
        image_url = tidy_up_url(image_url)
        image_file = image_url[image_url.rfind('/') + 1:]

        images.append(RedditPostImage(image_url, post, image_file))

        return images
    def get_images(self, post):
        images = []

        # Gfycat links may or may not have a gif, gifv or webm extension, if they don't we can append web
        # (WebM for the win!)
        image_url = tidy_up_url(post.url)

        # A file extension can be 3-4 characters long, let's splice the string and look for the '.'
        if image_url[-5:].rfind('.') < 0:
            image_url = image_url + ".webm"
        # We need to change the URL to giant.gfycat in order to be able to download movies
        gfycat_index = image_url.find("gfycat")
        image_url = image_url[:gfycat_index] + "giant." + image_url[
            gfycat_index:]

        image_file = image_url[image_url.rfind('/') + 1:]

        images.append(RedditPostImage(image_url, post, image_file))

        return images
Esempio n. 7
0
    def get_images(self, post) -> list:
        images = []

        if "/a/" in post.url or "/gallery/" in post.url:
            # This is an album submission

            html_source = requests.get(post.url).text

            soup = BeautifulSoup(html_source, "lxml")
            matches = soup.select('img.post-image-placeholder')

            if not matches:
                matches = soup.select("img[itemprop]")

            if not matches:
                raise NotAbleToDownloadException("Couldn't process %s" %
                                                 post.url)

            for m in matches:
                image_url = tidy_up_url(m['src'])
                image_file = ImgurBaseParser.get_file_name_from_url(image_url)
                images.append(RedditPostImage(image_url, post, image_file))

        elif ImgurHTMLParser.is_imgur_direct_url(post.url):
            # This is a direct url
            images.append(ImgurHTMLParser.get_image_from_direct_url(post))

        elif "imgur.com/" in post.url:
            # This is an Imgur page with a single image

            html_source = requests.get(post.url).text
            soup = BeautifulSoup(html_source, "lxml")

            # Let's try first to find those tags that will hold the image inside of an href property
            match = soup.select("a.zoom")

            if not match:
                match = soup.select("link[rel=image_src]")

            if match:
                image_url = match[0]["href"]
            else:
                # If no match was found inside of an href property, let's try now with the tags that contain it inside
                #  of a src property
                match = soup.select("img.post-image-placeholder")

                if not match:
                    match = soup.select("img[itemprop]")
                if not match:
                    match = soup.select("img")
                if not match:
                    raise NotAbleToDownloadException("Couldn't process %s" %
                                                     post.url)

                image_url = match[0]['src']

            image_url = tidy_up_url(image_url)
            image_file = ImgurHTMLParser.get_file_name_from_url(image_url)

            images.append(RedditPostImage(image_url, post, image_file))
        else:
            raise NotAbleToDownloadException("Couldn't process %s" % post.url)

        return images