async def search_imgur(self, ctx: Context, *, query: str): """Imgur search.""" search_id = 0 await self.bot.send_typing(ctx.message.channel) try: client_id = self.settings["imgur"]["id"] client_secret = self.settings["imgur"]["secret"] except KeyError: await self.bot.say("Please set imgur id and secret.") return try: search_id = self.settings["imgur"]["search_id"] except KeyError: self.settings["imgur"]["search_id"] = 0 # count = 0 client = ImgurClient(client_id, client_secret) results = client.gallery_search(query) try: result = next(islice(results, search_id, None)) if result.is_album: img = client.get_image(result.cover) else: img = result await self.bot.say(str(img.link)) search_id += 1 except StopIteration: search_id = 0 self.settings["imgur"]["search_id"] = search_id dataIO.save_json(JSON, self.settings)
def get_url(submission): def what_is_inside(url): header = requests.head(url).headers if 'Content-Type' in header: return header['Content-Type'] else: return '' url = submission.url url_content = what_is_inside(url) if ('image/jpeg' == url_content or 'image/png' == url_content): return 'img', url, url_content.split('/')[1] if 'image/gif' in url_content: return 'gif', url, 'gif' if url.endswith('.gifv'): if 'image/gif' in what_is_inside(url[0:-1]): return 'gif', url[0:-1], 'gif' if submission.is_self is True: # Self submission with text return 'text', None, None if urlparse(url).netloc == 'imgur.com': # Imgur imgur_config = yaml.load(open('imgur.yml').read()) imgur_client = ImgurClient(imgur_config['client_id'], imgur_config['client_secret']) path_parts = urlparse(url).path.split('/') if path_parts[1] == 'gallery': # TODO: gallary handling return 'other', url, None elif path_parts[1] == 'topic': # TODO: topic handling return 'other', url, None elif path_parts[1] == 'a': # An imgur album album = imgur_client.get_album(path_parts[2]) story = {} for num, img in enumerate(album.images): number = num + 1 story[number] = { 'link': img['link'], 'gif': img['animated'], 'type': img['type'].split('/')[1] } return 'album', story, None else: # Just imgur img img = imgur_client.get_image(path_parts[1].split('.')[0]) if not img.animated: return 'img', img.link, img.type.split('/')[1] else: return 'gif', img.link, 'gif' else: return 'other', url, None
def get_imgur_urls(self, url): c_id = '45b94b4d0013b7a' split = url.split('/') if self.has_extension(split[len(split)-1]): yield url return client = ImgurClient(client_id=c_id, client_secret=None) if 'a' == split[len(split)-2]: split = split[len(split)-1].split('#')[0] for img in client.get_album_images(split): yield img.link else: yield client.get_image(split[len(split)-1]).link
def get_imgur_info(self, url): # Returns imgur link info. p = re.compile(ur'(?:.*)(?:http(?:s|)://(?:www\.|i\.|)imgur\.com/)([A-Za-z0-9]*)(?:(?:\.[jpgt]|\ |$).*)', re.IGNORECASE) m = re.search(p, url) if m is None: return None client = ImgurClient('', '') img = client.get_image(m.group(1)) lines = [] lines.append("Title: " + img.title) return lines
def find(criteria): client = ImgurClient(creds.client_id, creds.client_secret) q = criteria.replace("'", "") search = client.gallery_search(q, advanced=None, sort='top', window='all') numresults = len(search) if numresults != 0: if numresults != 1: numb = random.randrange(1, numresults, 1) item = search[numb] imgID = item.id result = client.get_image(imgID) result = result.link return result else: item = search[0] imgID = item.id result = client.get_image(imgID) result = result.link return result else: # use bing if no imgur results return risky(q)
class Imgur_Uploader(object): def __init__(self): client_id = 'YOUR-CLIENT-ID' client_secret = 'YOUR-CLIENT-SECRET' self.client = ImgurClient(client_id, client_secret) def upload_image(self, filepath): return self.client.upload_from_path(filepath) def download_image(self, image_id, name): image = self.client.get_image(image_id) image_link = image.link uid = uuid.uuid1().urn urllib.urlretrieve(image_link, "IMGURDownloads/" + name[:-4] + uid[9:] + name[-4:])
async def imgur(*search_terms): """ Fetches images from Imgur based on given arguments. Support single and multiple arguments" """ client = ImgurClient(imgur_client_id, imgur_client_secret) search_terms = " ".join(search_terms) images = client.gallery_search(search_terms) if images: image = random.choice(images) if image.is_album == True: await bot.say(client.get_image(image.cover).link) else: await bot.say(image.link) else: await bot.say("Ei löytynyt kuvia termillä " + search_terms)
async def imgur(*search_terms): """ Fetches images from Imgur based on given arguments. Support single and multiple arguments" """ client = ImgurClient(imgur_client_id, imgur_client_secret) search_terms = " ".join(search_terms) images = client.gallery_search(search_terms) if images: image = random.choice(images) if image.is_album == True: await ctx.send(client.get_image(image.cover).link) else: await client.send(image.link) else: await ctx.send("Couldnt find the picture! " + search_terms)
class ImgurExtractor(Extractor): def __init__(self, url, user, post_title, subreddit, creation_date, save_path, subreddit_save_method, imgur_client, name_downloads_by): """ A subclass of the Extractor class. This class interacts exclusively with the imgur website through the imgur api via ImgurPython :param imgur_client: A tuple of the client id and client secret provided by imgur to access their api. This tuple is supplied to imgurpython to establish an imgur client """ super().__init__(url, user, post_title, subreddit, creation_date, save_path, subreddit_save_method, name_downloads_by) try: self.client = ImgurClient(imgur_client[0], imgur_client[1]) except ImgurClientError as e: if e.status_code == 500: self.over_capacity_error() def extract_content(self): """Dictates what type of page container a link is and then dictates which extraction method should be used""" try: if 'i.imgur' in self.url: self.extract_direct_link() elif "/a/" in self.url: self.extract_album() elif '/gallery/' in self.url: try: self.extract_album() except: pass elif self.url.lower().endswith( ('.jpg', 'jpeg', '.png', '.gif', '.gifv', '.mp4', '.webm')): self.extract_direct_mislinked() else: self.extract_single() except ImgurClientError as e: if e.status_code == 403: if self.client.credits['ClientRemaining'] is None: self.failed_to_locate_error() elif self.client.credits['ClientRemaining'] <= 0: self.no_credit_error() else: self.failed_to_locate_error() if e.status_code == 429: self.rate_limit_exceeded_error() if e.status_code == 500: self.over_capacity_error() if e.status_code == 404: self.does_not_exist_error() except ImgurClientRateLimitError: self.rate_limit_exceeded_error() except: self.failed_to_locate_error() def rate_limit_exceeded_error(self): x = Post(self.url, self.user, self.post_title, self.subreddit, self.creation_date) self.failed_extracts_to_save.append(x) self.failed_extract_messages.append( '\nFailed: Imgur rate limit exceeded. This post has been saved and will be downloaded ' 'the next time the application is run. Please make sure you have adequate user ' 'credits upon the next run. User credits can be checked in the help menu\n' 'Title: %s, User: %s, Subreddit: %s' % (self.post_title, self.user, self.subreddit)) def no_credit_error(self): x = Post(self.url, self.user, self.post_title, self.subreddit, self.creation_date) self.failed_extracts_to_save.append(x) self.failed_extract_messages.append( '\nFailed: You do not have enough imgur credits left to extract this ' 'content. This post will be saved and extraction attempted ' 'the next time the program is run. Please make sure that you ' 'have adequate credits upon next run.\nTitle: %s, User: %s, ' 'Subreddit: %s' % (self.post_title, self.user, self.subreddit)) def over_capacity_error(self): x = Post(self.url, self.user, self.post_title, self.subreddit, self.creation_date) self.failed_extracts_to_save.append(x) self.failed_extract_messages.append( '\nFailed: Imgur is currently over capacity. This post has been saved and ' 'extraction will be attempted the next time the program is run.\nTitle: ' '%s, User: %s, Subreddit: %s' % (self.post_title, self.user, self.subreddit)) def does_not_exist_error(self): self.failed_extract_messages.append( '\nFailed: The content does not exist. This most likely means that the ' 'image has been deleted on Imgur, but the post still remains on reddit\n' 'Url: %s, User: %s, Subreddit: %s, Title: %s' % (self.url, self.user, self.subreddit, self.post_title)) def failed_to_locate_error(self): self.failed_extract_messages.append( '\nFailed to locate the content at %s\nUser: %s Subreddit: %s Title: %s' '\n' % (self.url, self.user, self.subreddit, self.post_title)) def extract_direct_link(self): for ext in ['.jpg', '.jpeg', '.png', '.gif', '.gifv', '.mp4', '.webm']: if ext in self.url: index = self.url.find(ext) url = '%s%s' % (self.url[:index], ext) domain, id_with_ext = url.rsplit('/', 1) image_id, extension = id_with_ext.rsplit('.', 1) file_name = self.post_title if self.name_downloads_by == 'Post Title' else image_id if url.endswith('gifv') or url.endswith('gif'): picture = self.client.get_image(image_id) if picture.type == 'image/gif' and picture.animated: url = picture.mp4 extension = 'mp4' x = Content(url, self.user, self.post_title, self.subreddit, file_name, "", '.' + extension, self.save_path, self.subreddit_save_method) self.extracted_content.append(x) def extract_album(self): count = 1 domain, album_id = self.url.rsplit('/', 1) for pic in self.client.get_album_images(album_id): url = pic.link address, extension = url.rsplit('.', 1) file_name = self.post_title if self.name_downloads_by == 'Post Title' else album_id if pic.type == 'image/gif' and pic.animated: extension = 'mp4' url = pic.mp4 x = Content(url, self.user, self.post_title, self.subreddit, file_name + " ", count, '.' + extension, self.save_path, self.subreddit_save_method) count += 1 self.extracted_content.append(x) def extract_single(self): domain, image_id = self.url.rsplit('/', 1) pic = self.client.get_image(image_id) url = pic.link address, extension = url.rsplit('.', 1) file_name = self.post_title if self.name_downloads_by == 'Post Title' else image_id if pic.type == 'image/gif' and pic.animated: extension = 'mp4' url = pic.mp4 x = Content(url, self.user, self.post_title, self.subreddit, file_name, "", '.' + extension, self.save_path, self.subreddit_save_method) self.extracted_content.append(x) def extract_direct_mislinked(self): """ All direct links to imgur.com must start with 'https://i.imgur. Sometimes links get mis labeled somehow when they are posted. This method is to add the correct address beginning to mislinked imgur urls and get a proper extraction """ for ext in ['.jpg', '.jpeg', '.png', '.gif', '.gifv', '.mp4', '.webm']: if ext in self.url: index = self.url.find(ext) url = '%s%s' % (self.url[:index], ext) domain, id_with_ext = url.rsplit('/', 1) domain = 'https://i.imgur.com/' url = '%s%s' % (domain, id_with_ext) image_id, extension = id_with_ext.rsplit('.', 1) file_name = self.post_title if self.name_downloads_by == 'Post Title' else image_id if url.endswith('gifv') or url.endswith('gif'): picture = self.client.get_image(image_id) if picture.type == 'image/gif' and picture.animated: url = picture.mp4 extension = 'mp4' x = Content(url, self.user, self.post_title, self.subreddit, file_name, "", '.' + extension, self.save_path, self.subreddit_save_method) self.extracted_content.append(x)
# this is a lie, but eh so what path += '.mp4' if os.path.exists(path): continue print(" \_{}".format(url_to_get)) elif parts.netloc in ['imgur.com','i.imgur.com']: noext = os.path.splitext(parts.path)[0] pieces = noext.strip('/').split('/') try: if pieces[0] == 'a': for x in imgur.get_album_images(pieces[1]): url_to_get = x.link else: obj = imgur.get_image(pieces[0]) url_to_get = obj.link except: print(" \_ Unable to get {}".format(entry.url)) ignore.add(path) continue hasext = os.path.splitext(path) if not hasext[1]: ext = os.path.splitext(url_to_get)[1] path += ext print(" \_{}".format(url_to_get)) elif parts.netloc == 'gfycat.com':
def get_media(img_url, post_id): if any(s in img_url for s in ('i.redd.it', 'i.reddituploads.com')): file_name = os.path.basename(urllib.parse.urlsplit(img_url).path) file_extension = os.path.splitext(img_url)[-1].lower() # Fix for issue with i.reddituploads.com links not having a file extension in the URL if not file_extension: file_extension += '.jpg' file_name += '.jpg' img_url += '.jpg' # Grab the GIF versions of .GIFV links # When Tweepy adds support for video uploads, we can use grab the MP4 versions if (file_extension == '.gifv'): file_extension = file_extension.replace('.gifv', '.gif') file_name = file_name.replace('.gifv', '.gif') img_url = img_url.replace('.gifv', '.gif') # Download the file file_path = IMAGE_DIR + '/' + file_name print('[ OK ] Downloading file at URL ' + img_url + ' to ' + file_path + ', file type identified as ' + file_extension) img = save_file(img_url, file_path) return img elif ('imgur.com' in img_url): # Imgur try: client = ImgurClient(IMGUR_CLIENT, IMGUR_CLIENT_SECRET) except BaseException as e: print('[EROR] Error while authenticating with Imgur:', str(e)) return # Working demo of regex: https://regex101.com/r/G29uGl/2 regex = r"(?:.*)imgur\.com(?:\/gallery\/|\/a\/|\/)(.*?)(?:\/.*|\.|$)" m = re.search(regex, img_url, flags=0) if m: # Get the Imgur image/gallery ID id = m.group(1) if any(s in img_url for s in ('/a/', '/gallery/')): # Gallery links images = client.get_album_images(id) # Only the first image in a gallery is used imgur_url = images[0].link else: # Single image imgur_url = client.get_image(id).link # If the URL is a GIFV link, change it to a GIF file_extension = os.path.splitext(imgur_url)[-1].lower() if (file_extension == '.gifv'): file_extension = file_extension.replace('.gifv', '.gif') img_url = imgur_url.replace('.gifv', '.gif') # Download the image file_path = IMAGE_DIR + '/' + id + file_extension print('[ OK ] Downloading Imgur image at URL ' + imgur_url + ' to ' + file_path) imgur_file = save_file(imgur_url, file_path) # Imgur will sometimes return a single-frame thumbnail instead of a GIF, so we need to check for this if (file_extension == '.gif'): # Open the file using the Pillow library img = Image.open(imgur_file) # Get the MIME type mime = Image.MIME[img.format] if (mime == 'image/gif'): # Image is indeed a GIF, so it can be posted img.close() return imgur_file else: # Image is not actually a GIF, so don't post it print( '[EROR] Imgur has not processed a GIF version of this link, so it can not be posted' ) img.close() # Delete the image try: os.remove(imgur_file) except BaseException as e: print('[EROR] Error while deleting media file:', str(e)) return else: return imgur_file else: print( '[EROR] Could not identify Imgur image/gallery ID in this URL:', img_url) return elif ('gfycat.com' in img_url): # Gfycat gfycat_name = os.path.basename(urllib.parse.urlsplit(img_url).path) client = GfycatClient() gfycat_info = client.query_gfy(gfycat_name) # Download the 2MB version because Tweepy has a 3MB upload limit for GIFs gfycat_url = gfycat_info['gfyItem']['max2mbGif'] file_path = IMAGE_DIR + '/' + gfycat_name + '.gif' print('[ OK ] Downloading Gfycat at URL ' + gfycat_url + ' to ' + file_path) gfycat_file = save_file(gfycat_url, file_path) return gfycat_file elif ('giphy.com' in img_url): # Giphy # Working demo of regex: https://regex101.com/r/o8m1kA/2 regex = r"https?://((?:.*)giphy\.com/media/|giphy.com/gifs/|i.giphy.com/)(.*-)?(\w+)(/|\n)" m = re.search(regex, img_url, flags=0) if m: # Get the Giphy ID id = m.group(3) # Download the 2MB version because Tweepy has a 3MB upload limit for GIFs giphy_url = 'https://media.giphy.com/media/' + id + '/giphy-downsized.gif' file_path = IMAGE_DIR + '/' + id + '-downsized.gif' print('[ OK ] Downloading Giphy at URL ' + giphy_url + ' to ' + file_path) giphy_file = save_file(giphy_url, file_path) return giphy_file else: print('[EROR] Could not identify Giphy ID in this URL:', img_url) return else: print('[WARN] Post', post_id, 'doesn\'t point to an image/GIF:', img_url) return
class ImgurWrapper: def __init__(self, client_id, client_secret): self.__client = ImgurClient(client_id, client_secret) @staticmethod def is_imgur(url): """ Simple check to see if url is an imgur url :param url: ParsedUrl of the :return: boolean if it is a imagr or not """ return "imgur.com" in url.hostname @staticmethod def is_album(url): if "/a/" not in url.path: return False else: return True def get_image_list(self, url): """ This call is intended to take the url and return a list of all images associated with it. It will parse the image without extension or parse all images. :param url: parsed url object :return: list of images """ image_list = [] if self.is_album(url): image_list = self.get_album_images(url) else: image = self.get_image(url) if image is not None: image_list.append(image) return image_list def get_image(self, url): """ Get a single image from a url :param url: parsed url :return: an image or None if exception raised """ image_id = url.path[url.path.rfind("/") + 1:] try: image = self.__client.get_image(image_id) except ImgurClientError as e: logging.error("Status Code: " + e.status_code + " Error: " + e.error_message) image = None return image def get_album_images(self, url): """ Gets all the images in an album as a list of image objects :param url: parsed url :return: Either a list of images or an empty list """ album_id = url.path[url.path.rfind("/") + 1:] image_list = [] try: images = self.__client.get_album_images(album_id) except ImgurClientError as e: logging.error("Status Code: " + str(e.status_code) + " Error: " + e.error_message) else: image_list = images return image_list
def get_media(submission, IMGUR_CLIENT, IMGUR_CLIENT_SECRET): img_url = submission.url # Make sure config file exists try: config = configparser.ConfigParser() config.read('config.ini') except BaseException as e: print('[EROR] Error while reading config file:', str(e)) sys.exit() # Make sure media folder exists IMAGE_DIR = config['MediaSettings']['MediaFolder'] if not os.path.exists(IMAGE_DIR): os.makedirs(IMAGE_DIR) print('[ OK ] Media folder not found, created a new one') # Download and save the linked image if any(s in img_url for s in ('i.redd.it', 'i.reddituploads.com')): # Reddit-hosted images file_name = os.path.basename(urllib.parse.urlsplit(img_url).path) file_extension = os.path.splitext(img_url)[-1].lower() # Fix for issue with i.reddituploads.com links not having a file extension in the URL if not file_extension: file_extension += '.jpg' file_name += '.jpg' img_url += '.jpg' # Download the file file_path = IMAGE_DIR + '/' + file_name print('[ OK ] Downloading file at URL ' + img_url + ' to ' + file_path + ', file type identified as ' + file_extension) img = save_file(img_url, file_path) return [img] elif ('v.redd.it' in img_url): # Reddit video fileouts = [] class ytdlLogger(object): def debug(self, msg): if msg.startswith(IMAGE_DIR): fileouts.append(msg) def warning(self, msg): print("[WARN] " + msg) def error(self, msg): print("[EROR] " + msg) ytdl_opts = { 'outtmpl': IMAGE_DIR + '/%(id)s.%(ext)s', 'noplaylist': True, 'forcefilename': True, 'logger': ytdlLogger() } print("[ OK ] Downloading video at url " + img_url + " via youtube-dl...") with youtube_dl.YoutubeDL(ytdl_opts) as ytdl: ytdl.download([img_url]) print("[ OK ] File downloaded to " + fileouts[0]) return [fileouts[0]] elif ('reddit.com/gallery/' in img_url): # Reddit galleries (multiple images) try: galleryitems = submission.gallery_data['items'] mediadata = submission.media_metadata except BaseException as e: print('[EROR] Post seems to be a gallery but there was an error trying to get the gallery data:', str(e)) return if len(galleryitems) > 4: print('[WARN] Post is a gallery with more than 4 images. Skipping as it is too many for Twitter.') return img_url_list = [] for item in galleryitems: if mediadata[item['media_id']]['m'] == 'image/jpg': img_url_list.append(f"https://i.redd.it/{item['media_id']}.jpg") elif mediadata[item['media_id']]['m'] == 'image/png': img_url_list.append(f"https://i.redd.it/{item['media_id']}.png") elif mediadata[item['media_id']]['m'] == 'image/webp': img_url_list.append(f"https://i.redd.it/{item['media_id']}.webp") else: print('[WARN] An item in the gallery is not a JPG, PNG, or WEBP. Skipping this post as it is likely unable to be posted to Twitter.') return downloaded_imgs = [] for url in img_url_list: file_name = os.path.basename(urllib.parse.urlsplit(url).path) saved = save_file(url, file_name) downloaded_imgs.append(saved) return downloaded_imgs elif ('imgur.com' in img_url): # Imgur try: client = ImgurClient(IMGUR_CLIENT, IMGUR_CLIENT_SECRET) except BaseException as e: print('[EROR] Error while authenticating with Imgur:', str(e)) return # Working demo of regex: https://regex101.com/r/G29uGl/2 regex = r"(?:.*)imgur\.com(?:\/gallery\/|\/a\/|\/)(.*?)(?:\/.*|\.|$)" m = re.search(regex, img_url, flags=0) if m: # Get the Imgur image/gallery ID id = m.group(1) if any(s in img_url for s in ('/a/', '/gallery/')): # Gallery links images = client.get_album_images(id) # Only the first image in a gallery is used imgur_url = images[0].link else: # Single image imgur_url = client.get_image(id).link # If the URL is a GIFV or MP4 link, change it to the GIF version file_extension = os.path.splitext(imgur_url)[-1].lower() if (file_extension == '.gifv'): file_extension = file_extension.replace('.gifv', '.gif') imgur_url = imgur_url.replace('.gifv', '.gif') elif (file_extension == '.mp4'): file_extension = file_extension.replace('.mp4', '.gif') imgur_url = imgur_url.replace('.mp4', '.gif') # Download the image file_path = IMAGE_DIR + '/' + id + file_extension print('[ OK ] Downloading Imgur image at URL ' + imgur_url + ' to ' + file_path) imgur_file = save_file(imgur_url, file_path) # Imgur will sometimes return a single-frame thumbnail instead of a GIF, so we need to check for this if (file_extension == '.gif'): # Open the file using the Pillow library img = Image.open(imgur_file) # Get the MIME type mime = Image.MIME[img.format] if (mime == 'image/gif'): # Image is indeed a GIF, so it can be posted img.close() return [imgur_file] else: # Image is not actually a GIF, so don't post it print( '[WARN] Imgur has not processed a GIF version of this link, so it can not be posted to Twitter') img.close() # Delete the image try: os.remove(imgur_file) except BaseException as e: print('[EROR] Error while deleting media file:', str(e)) return else: return [imgur_file] else: print( '[EROR] Could not identify Imgur image/gallery ID in this URL:', img_url) return elif ('gfycat.com' in img_url): # Gfycat try: gfycat_name = os.path.basename(urllib.parse.urlsplit(img_url).path) client = GfycatClient() gfycat_info = client.query_gfy(gfycat_name) except BaseException as e: print('[EROR] Error downloading Gfycat link:', str(e)) return # Download the 2MB version because Tweepy has a 3MB upload limit for GIFs gfycat_url = gfycat_info['gfyItem']['max2mbGif'] file_path = IMAGE_DIR + '/' + gfycat_name + '.gif' print('[ OK ] Downloading Gfycat at URL ' + gfycat_url + ' to ' + file_path) gfycat_file = save_file(gfycat_url, file_path) return [gfycat_file] elif ('giphy.com' in img_url): # Giphy # Working demo of regex: https://regex101.com/r/o8m1kA/2 regex = r"https?://((?:.*)giphy\.com/media/|giphy.com/gifs/|i.giphy.com/)(.*-)?(\w+)(/|\n)" m = re.search(regex, img_url, flags=0) if m: # Get the Giphy ID id = m.group(3) # Download the 2MB version because Tweepy has a 3MB upload limit for GIFs giphy_url = 'https://media.giphy.com/media/' + id + '/giphy-downsized.gif' file_path = IMAGE_DIR + '/' + id + '-downsized.gif' print('[ OK ] Downloading Giphy at URL ' + giphy_url + ' to ' + file_path) giphy_file = save_file(giphy_url, file_path) # Check the hash to make sure it's not a GIF saying "This content is not available" # More info: https://github.com/corbindavenport/tootbot/issues/8 hash = hashlib.md5(file_as_bytes( open(giphy_file, 'rb'))).hexdigest() if (hash == '59a41d58693283c72d9da8ae0561e4e5'): print( '[WARN] Giphy has not processed a 2MB GIF version of this link, so it can not be posted to Twitter') return else: return [giphy_file] else: print('[EROR] Could not identify Giphy ID in this URL:', img_url) return else: # Check if URL is an image, based on the MIME type image_formats = ('image/png', 'image/jpeg', 'image/gif', 'image/webp') img_site = urlopen(img_url) meta = img_site.info() if meta["content-type"] in image_formats: # URL appears to be an image, so download it file_name = os.path.basename(urllib.parse.urlsplit(img_url).path) file_path = IMAGE_DIR + '/' + file_name print('[ OK ] Downloading file at URL ' + img_url + ' to ' + file_path) try: img = save_file(img_url, file_path) return [img] except BaseException as e: print('[EROR] Error while downloading image:', str(e)) return else: print('[EROR] URL does not point to a valid image file') return
class imgur(commands.Cog): def __init__(self, bot): self.bot = bot self.clientID = bot.config.get('imgur_client_id') self.secretID = bot.config.get('imgur_client_secret') self.imgur_client = ImgurClient(self.clientID, self.secretID) @is_admin() @commands.command(aliases=['addalbum', 'aa']) async def album(self, ctx, link: str = None, *, album_name: str = None): """addalbum [album link] [album name] - Adds an album, link, and name. ex; .addalbum https://imgur.com/gallery/MnIjj3n a phone and 'pickone a phone' would call this album. """ if not link or not album_name: await ctx.send( 'Please include a link to the album and a name for the album.') return possible_links = [ 'https://imgur.com/gallery/', 'https://imgur.com/a/' ] #leaving this for additions later if not any(x in link for x in possible_links): await ctx.send('That doesnt look like a valid link.') else: album_name = album_name.lower() fetch_albums = await self.bot.fetch.all( f"SELECT * FROM Albums WHERE GuildID=?", (ctx.guild.id, )) fetch_album_names = list([album[2] for album in fetch_albums ]) if fetch_albums else [] if album_name not in fetch_album_names: await self.bot.db.execute( f"INSERT INTO Albums(GuildID, AlbumName, AlbumLink) VALUES (?, ?, ?)", ( ctx.guild.id, album_name, link, )) await self.bot.db.commit() await ctx.send(f'"{album_name}" has been added!') else: await ctx.send(f'"{album_name}" already exists') @is_admin() @commands.command(aliases=['delalbum', 'remalbum', 'da', 'ra']) async def deletealbum(self, ctx, *, album_name: str = None): """ deletealbum [album name] - Deletes an album, name. ex; .deletealbum a phone """ if not album_name: await ctx.send('Please provide an album name.') if album_name: album_name = album_name.lower() fetch_album = await self.bot.fetch.one( f"SELECT * FROM Albums WHERE GuildID=? AND AlbumName=?", (ctx.guild.id, album_name)) if fetch_album: await self.bot.db.execute( f"DELETE FROM Albums WHERE GuildID=? And AlbumName=?", ( ctx.guild.id, album_name, )) await self.bot.db.commit() await ctx.send(f'Removed album "{album_name}"') else: await ctx.send( f'Couldn\'t find an album the name of "{album_name}"') @commands.command(aliases=['p1', 'po', 'pick']) async def pickone(self, ctx, *, album_name: str = None): """ pickone (Optional album name) - picks a random image from the album. ex; .pickone a phone If only one album exists you do not provide an album name. """ grab_content_title_config = await self.bot.fetch.one( f"SELECT Content, Title FROM GuildConfig WHERE ID=?", (ctx.guild.id, )) content = grab_content_title_config[0] title = grab_content_title_config[1] if content is None and title is None: content = 'You asked me to pick a picture...' title = 'I Chose...' if album_name: album_name = album_name.lower() fetch_album = await self.bot.fetch.one( f"SELECT * FROM Albums WHERE GuildID=? AND AlbumName=?", ( ctx.guild.id, album_name, )) if not fetch_album: return await ctx.send("Couldnt find an album by that name") if len(fetch_album) == 0: return await ctx.send( 'You should probably add an album first..') imgur_link = fetch_album[3] if not album_name: fetch_albums = await self.bot.fetch.all( f"SELECT AlbumName, AlbumLink FROM Albums WHERE GuildID=?", (ctx.guild.id, )) if not fetch_albums: return await ctx.send("Might want to add an album first!") if len(fetch_albums) >= 2: return await ctx.send( 'Seems you forgot to provide an album name!') imgur_link = fetch_albums[0][1] try: await ctx.message.add_reaction( discord.utils.get(self.bot.emojis, name='check')) except: pass try: tail = imgur_link.split('/')[4] the_list = list( item.link for item in self.imgur_client.get_album_images(tail)) item = random.choice(the_list) item_id = item.split('/')[3][0:-4] if title in ['album title', 'Album Title']: title = self.imgur_client.get_album(tail).title if content in ['description', 'Description']: content = self.imgur_client.get_image(item_id).description if (self.imgur_client.get_image(item_id).size * 1e-6) > 8.0: return await ctx.send( f"{self.imgur_client.get_image(item_id).link} was too big to send." ) get_stream_status = await self.bot.fetch.one( f"SELECT Stream FROM GuildConfig WHERE ID=?", (ctx.guild.id, )) stream = get_stream_status[0] async with self.bot.aiohttp.get(item) as resp: link = await resp.read() if item.endswith('.gif'): f = discord.File(io.BytesIO(link), filename="image.gif") e = discord.Embed( title=title, colour=discord.Colour(0x278d89), ) if stream: e.set_image(url=f'''attachment://image.gif''') else: e.set_image( url=f'{self.imgur_client.get_image(item_id).link}') else: f = discord.File(io.BytesIO(link), filename="image.png") e = discord.Embed( title=title, colour=discord.Colour(0x278d89), ) if stream: e.set_image(url=f'''attachment://image.png''') else: e.set_image( url=f'{self.imgur_client.get_image(item_id).link}') e.set_footer( text= f'storage is currently: {"link" if not stream else "stream"} \n' f'if images aren\'t showing up, try toggling this with .stream' ) if stream: await ctx.send(file=f, embed=e, content=content) if not stream: await ctx.send(embed=e, content=content) except Exception as e: print( f'{e}, tail: {tail if tail else None} link: {imgur_link}, item: {item if item else None}' ) if isinstance(e, ImgurClientError): print(f'{e.error_message}') return await ctx.send(f'{e.error_message}') elif not isinstance(e, ImgurClientError): return await ctx.send( f'There was an issue processing this command.\nDebug: `{e}`' ) @commands.command(aliases=['al', 'list']) async def albumlist(self, ctx): """albumlist - displays all currently added albums by name. """ fetch_albums = await self.bot.fetch.all( f"SELECT * FROM Albums WHERE GuildID=?", (ctx.guild.id, )) if fetch_albums: list_album_names = ", ".join( list([album[2] for album in fetch_albums])) await ctx.send(f"{list_album_names}") else: await ctx.send('It doesnt seem that you have added an ablum.') @is_admin() @commands.command(aliases=['adda', 'admin']) async def addadmin(self, ctx, member: discord.Member = None): """addadmin [user name] - Adds an admin ex; .addadmin @ProbsJustin#0001 You can attempt to use just a string name; eg ProbsJustin but recommend a mention. """ if not member: await ctx.send('You should probably include a member.') return else: check_if_pwr_user = await self.bot.fetch.one( f"SELECT * FROM Permissions WHERE MemberID=? AND GuildID=?", ( ctx.author.id, ctx.guild.id, )) if not check_if_pwr_user: await self.bot.db.execute( f"INSERT INTO Permissions(MemberID, GuildID) VALUES (?, ?)", ( ctx.author.id, ctx.guild.id, )) await self.bot.db.commit() await ctx.send(f'{member.mention} has been added as an admin.') else: await ctx.send('That user is already an admin!') @is_admin() @commands.command(aliases=['remadmin', 'deladmin', 'deleteadmin']) async def removeadmin(self, ctx, member: discord.Member = None): """removeadmin [user name] - Remove an admin ex; .removeadmin @ProbsJustin#0001 You can attempt to use just a string name; eg ProbsJustin but recommend a mention. """ if not member: await ctx.send('You should probably include a member.') return else: chck_if_usr_is_admin = await self.bot.fetch.one( f"SELECT * FROM Permissions WHERE MemberID=? AND GuildID=?", ( ctx.author.id, ctx.guild.id, )) if chck_if_usr_is_admin: await self.bot.db.execute( f"DELETE FROM Permissions WHERE MemberID=? AND GuildID=?", ( ctx.author.id, ctx.guild.id, )) await self.bot.db.commit() await ctx.send( f'{member.mention} has been removed as an admin.') else: await ctx.send('I couldnt find that user in the admin list.') @addadmin.error @removeadmin.error async def member_not_found_error(self, ctx, exception): #so this is a thing. if not isinstance(exception, NotAuthorized): await ctx.send('Member not found! Try mentioning them instead.') @is_admin() @commands.command() async def set(self, ctx, content_title: str = None, *, message: str = ''): """set [content/title] [name] - Change the title/content from "I Chose..." "you asked.." """ editable_args = ['content', 'title'] if not content_title: await ctx.send( f"Please provide either {' or '.join(editable_args)}.") return content_title = content_title.lower() if content_title in editable_args: if content_title == "title": await self.bot.db.execute( f"UPDATE GuildConfig SET Title=? WHERE ID=?", ( message, ctx.guild.id, )) if content_title == 'content': await self.bot.db.execute( f"UPDATE GuildConfig SET Content=? WHERE ID=?", ( message, ctx.guild.id, )) await self.bot.db.commit() await ctx.send(f'{content_title.lower()} updated.') else: await ctx.send("Invalid parameters.") @is_admin() @commands.command() async def stream(self, ctx): """ Toggles how the images are sent to discord, if images aren't showing up try toggling this. """ get_stream_status = await self.bot.fetch.one( f"SELECT Stream FROM GuildConfig WHERE ID=?", (ctx.guild.id, )) update_stream_status = await self.bot.db.execute( f"UPDATE GuildConfig SET Stream=? WHERE ID=?", (not get_stream_status[0], ctx.guild.id)) await self.bot.db.commit() await ctx.send( f"Streaming turned {'on' if not get_stream_status[0] else 'off'}")
class imgur(commands.Cog): def __init__(self, bot): self.bot = bot self.clientID = bot.config.data.get('config').get('imgur_client_id') self.secretID = bot.config.data.get('config').get( 'imgur_client_secret') self.imgur_client = ImgurClient(self.clientID, self.secretID) async def fetch_one(self, arg): get = await self.bot.db.execute(arg) results = await get.fetchone() return results async def fetch_all(self, arg): get = await self.bot.db.execute(arg) results = await get.fetchall() return results @is_admin() @commands.command(aliases=['addalbum', 'aa']) async def album(self, ctx, link: str = None, *, album_name: str = None): """addalbum [album link] [album name] - Adds an album, link, and name. ex; .addalbum https://imgur.com/gallery/MnIjj3n a phone and 'pickone a phone' would call this album. """ if not link or not album_name: await ctx.send( 'Please include a link to the album and a name for the album.') return possible_links = [ 'https://imgur.com/gallery/', 'https://imgur.com/a/' ] #leaving this for additions later if not any(x in link for x in possible_links): await ctx.send('That doesnt look like a valid link.') else: album_name = album_name.lower() get_albums = await imgur.fetch_all( self, f'SELECT AlbumLink FROM GuildAlbums WHERE GuildID={ctx.guild.id}' ) if link not in list(albumlink[0] for albumlink in get_albums): await self.bot.db.execute( f"INSERT INTO GuildAlbums(GuildID, AlbumLink, AlbumName) " f"VALUES (?, ?, ?)", (ctx.guild.id, link, album_name)) await self.bot.db.commit() await ctx.send(f'"{album_name}" has been added!') else: albums_name = await imgur.fetch_one( self, f'SELECT AlbumName FROM GuildAlbums WHERE AlbumLink="{link}"' ) await ctx.send(f'{link} already exists as {albums_name[0]}.') @is_admin() @commands.command(aliases=['delalbum', 'remalbum', 'da', 'ra']) async def deletealbum(self, ctx, *, album_name: str = None): """deletealbum [album name] - Deletes an album, name. ex; .deletealbum a phone """ if not album_name: await ctx.send('Please provide an album name.') get_albums = await imgur.fetch_all( self, f'SELECT AlbumName FROM GuildAlbums WHERE GuildID={ctx.guild.id}') if album_name.lower() in list(albumnames[0] for albumnames in get_albums): await self.bot.db.execute( f'DELETE FROM GuildAlbums WHERE GuildID=? and AlbumName=?', (ctx.guild.id, album_name.lower())) await self.bot.db.commit() await ctx.send(f'Removed album "{album_name}"') else: await ctx.send(f'Couldnt find an album the name of "{album_name}"') @commands.command(aliases=['p1', 'po', 'pick']) async def pickone(self, ctx, *, album_name: str = None): """pickone (Optional album name) - picks a random image from the album. ex; .pickone a phone If only one album exists you do not provide an album name. """ album_names = await imgur.fetch_all( self, f'SELECT AlbumName FROM GuildAlbums WHERE GuildID={ctx.guild.id}') if not album_names: await ctx.send('You should probably add an album first..') return content = await imgur.fetch_one( self, f'SELECT Content FROM GuildConfig WHERE ID={ctx.guild.id}') title = await imgur.fetch_one( self, f'SELECT Title FROM GuildConfig WHERE ID={ctx.guild.id}') await ctx.message.add_reaction( discord.utils.get(self.bot.emojis, name='check')) content = 'You asked me to pick a picture...' if not content[ 0] else content[0] title = 'I Chose...' if not title[0] else title[0] if album_name: if album_name.lower() in list(albumnames[0] for albumnames in album_names): album_link = await imgur.fetch_one( self, f'SELECT AlbumLink FROM GuildAlbums WHERE ' f'AlbumName="{album_name.lower()}" and GuildID={ctx.guild.id}' ) tail = album_link[0].split('/')[4] the_list = list( item.link for item in self.imgur_client.get_album_images(tail)) else: await ctx.send( f'I couldnt find an album by the name of "{album_name}"') if not album_name: if len(album_names) >= 2: await ctx.send('Seems you forgot to provide an album name!') return if len(album_names) == 1: album_link = await imgur.fetch_one( self, f'SELECT AlbumLink FROM GuildAlbums WHERE ' f'AlbumName="{album_names[0][0]}" and GuildID={ctx.guild.id}' ) tail = album_link[0].split('/')[4] the_list = list( item.link for item in self.imgur_client.get_album_images(tail)) try: item = random.choice(the_list) item_id = item.split('/')[3][0:-4] if title in ['album title', 'Album Title']: title = self.imgur_client.get_album(tail).title if content in ['description', 'Description']: content = self.imgur_client.get_image(item_id).description async with self.bot.aiohttp.get(item) as resp: link = await resp.read() if item.endswith('.gif'): f = discord.File(io.BytesIO(link), filename="image.gif") e = discord.Embed(title=title, colour=discord.Colour(0x278d89)) e.set_image(url=f'''attachment://image.gif''') else: f = discord.File(io.BytesIO(link), filename="image.png") e = discord.Embed(title=title, colour=discord.Colour(0x278d89)) e.set_image(url=f'''attachment://image.png''') await ctx.send(file=f, embed=e, content=content) except Exception as e: if isinstance(e, ImgurClientError): print(f'{e.error_message}') await ctx.send(f'{e.error_message}') elif not isinstance(e, ImgurClientError): await ctx.send( f'There was an issue processing this command. {e}') @commands.command(aliases=['al', 'list']) async def albumlist(self, ctx): """albumlist - displays all currently added albums by name. """ album_names = await imgur.fetch_all( self, f'SELECT AlbumName FROM GuildAlbums WHERE GuildID={ctx.guild.id}') if len(album_names) is not 0: await ctx.send( f"The list of albums I see are: {', '.join(list(an[0] for an in album_names))}." ) else: await ctx.send('It doesnt seem that you have added an ablum.') @is_admin() @commands.command(aliases=['adda', 'admin']) async def addadmin(self, ctx, member: discord.Member = None): """addadmin [user name] - Adds an admin ex; .addadmin @ProbsJustin#0001 You can attempt to use just a string name; eg ProbsJustin but recommend a mention. """ if not member: await ctx.send('You should probably include a member.') return else: get_admins = await imgur.fetch_all( self, f'SELECT AdminID FROM GuildAdmins WHERE GuildID={ctx.guild.id}' ) if member.id not in list(admin[0] for admin in get_admins): await self.bot.db.execute( f"INSERT INTO GuildAdmins(GuildID, AdminID) VALUES (?, ?)", (ctx.guild.id, member.id)) await self.bot.db.commit() await ctx.send(f'{member.mention} has been added as an admin.') else: await ctx.send('That user is already an admin!') @is_admin() @commands.command(aliases=['remadmin', 'deladmin', 'deleteadmin']) async def removeadmin(self, ctx, member: discord.Member = None): """removeadmin [user name] - Remove an admin ex; .removeadmin @ProbsJustin#0001 You can attempt to use just a string name; eg ProbsJustin but recommend a mention. """ if not member: await ctx.send('You should probably include a member.') return else: get_admins = await imgur.fetch_all( self, f'SELECT AdminID FROM GuildAdmins WHERE GuildID={ctx.guild.id}' ) if member.id in list(admin[0] for admin in get_admins): await self.bot.db.execute( f'DELETE FROM GuildAdmins WHERE GuildID=? and AdminID=?', (ctx.guild.id, member.id)) await self.bot.db.commit() await ctx.send( f'{member.mention} has been removed as an admin.') else: await ctx.send('I couldnt find that user in the admin list.') @addadmin.error @removeadmin.error async def member_not_found_error(self, ctx, exception): #so this is a thing. if not isinstance(exception, NotAuthorized): await ctx.send('Member not found! Try mentioning them instead.') @is_admin() @commands.command() async def set(self, ctx, content_title: str = None, *, message: str = ''): """set [content/title] [name] - Change the title/content from "I Chose..." "you asked.." """ editable_args = ['content', 'title'] if not content_title: await ctx.send( f"Please provide either {' or '.join(editable_args)}.") return if content_title.lower() in editable_args: await self.bot.db.execute( f'UPDATE GuildConfig SET {content_title.title()}="{message}" ' f'WHERE ID={ctx.guild.id}') await self.bot.db.commit() await ctx.send(f'{content_title.lower()} updated.') else: await ctx.send("Invalid parameters.")
if not os.path.isfile(toDownload[i].id + '.mp4'): try: ydl_opts['outtmpl'] = toDownload[i].id + '.mp4' with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download([toDownload[i].url]) except Exception as e: Unable.write( str(e) + ' ' + toDownload[i].url + ' http://www.reddit.com/' + toDownload[i].id + '\n') os.chdir(base) print('imgur gifv: ' + toDownload[i].id) else: imgurCode = (urlparse( toDownload[i].url).path).split('/')[-1].split('.')[0] image = client.get_image(imgurCode) if not (imgCheck): ext = (".mp4" if hasattr(image, "mp4") else "." + (urlparse(image.link).path).split('.')[-1]) toDownload[ i].url = "https://i.imgur.com/" + imgurCode + ext imgur_path = mypath + '/' + toDownload[i].id + '.' + ( urlparse(toDownload[i].url).path).split('.')[-1] download(toDownload[i].url, imgur_path) print('Basic imgur file, missing extension: ' + toDownload[i].id) if (imgCheck): imgur_path = mypath + '/' + ''.join( filename(toDownload[i].id)) + '.' + (urlparse( image.link).path).split('.')[-1] download(toDownload[i].url, imgur_path)
def get_url(submission, mp4_instead_gif=True): ''' return TYPE, URL, EXTENSION E.x.: return 'img', 'http://example.com/pic.png', 'png' ''' def what_is_inside(url): header = requests.head(url).headers if 'Content-Type' in header: return header['Content-Type'] else: return '' url = submission.url url_content = what_is_inside(url) if (CONTENT_JPEG == url_content or CONTENT_PNG == url_content): return TYPE_IMG, url, url_content.split('/')[1] if CONTENT_GIF in url_content: if url.endswith('.gif') and mp4_instead_gif: # Let's try to find .mp4 file. url_mp4 = url[:-4] + '.mp4' if CONTENT_MP4 == what_is_inside(url_mp4): return TYPE_GIF, url_mp4, 'mp4' return TYPE_GIF, url, 'gif' if url.endswith('.gifv'): if mp4_instead_gif: url_mp4 = url[:-5] + '.mp4' if CONTENT_MP4 == what_is_inside(url_mp4): return TYPE_GIF, url_mp4, 'mp4' if CONTENT_GIF in what_is_inside(url[0:-1]): return TYPE_GIF, url[0:-1], 'gif' if submission.is_self is True: # Self submission with text return 'text', None, None if urlparse(url).netloc == 'imgur.com': # Imgur imgur_config = yaml.load(open('imgur.yml').read()) imgur_client = ImgurClient(imgur_config['client_id'], imgur_config['client_secret']) path_parts = urlparse(url).path.split('/') if path_parts[1] == 'gallery': # TODO: gallary handling return 'other', url, None elif path_parts[1] == 'topic': # TODO: topic handling return 'other', url, None elif path_parts[1] == 'a': # An imgur album album = imgur_client.get_album(path_parts[2]) story = {} for num, img in enumerate(album.images): number = num + 1 what = TYPE_IMG link = img['link'] ext = img['type'].split('/')[1] if img['animated']: what = TYPE_GIF link = img['mp4'] if mp4_instead_gif else img['gifv'][:-1] ext = 'mp4' if mp4_instead_gif else 'gif' story[number] = {'url': link, 'what': what, 'ext': ext} return 'album', story, None else: # Just imgur img img = imgur_client.get_image(path_parts[1].split('.')[0]) if not img.animated: return TYPE_IMG, img.link, img.type.split('/')[1] else: if mp4_instead_gif: return TYPE_GIF, img.mp4, 'mp4' else: # return 'gif', img.link, 'gif' return TYPE_GIF, img.gifv[:-1], 'gif' else: return 'other', url, None
from imgurpython import ImgurClient import requests from PIL import Image import io client_id = 'c2058ecfc76d75f' client_secret = '5fe636c3e7a032b56b2120fe82eb3071c790c5ff' client = ImgurClient(client_id, client_secret) # Example request from album galleries #items = client.get_album_images("f0H0u") #This is doggos items2 = client.get_album_images("XqBdP") #This is for not doggos item = client.get_image("nhTyj4d") webbrowser.open_new(item.link) with urllib.request.urlopen(item.link) as url: f = io.BytesIO(url.read()) img = Image.open(f) img.show() def image_grayscale(picture): new_list = [] for p in picture.getdata(): new_red = int(p[0] * 0.299) new_green = int(p[1] * 0.587)
class Imgur(Plugin): CONFIG_DEFAULTS = { 'client_id': None, 'client_secret': None, } CONFIG_ENVVARS = { 'client_id': ['IMGUR_CLIENT_ID'], 'client_secret': ['IMGUR_CLIENT_SECRET'], } def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.client = ImgurClient(self.config_get('client_id'), self.config_get('client_secret')) @Plugin.integrate_with('linkinfo') def integrate_with_linkinfo(self, linkinfo): linkinfo.register_handler(lambda url: url.netloc in ('imgur.com', 'i.imgur.com'), self._linkinfo_handler, exclusive=True) def _linkinfo_handler(self, url, match): # Split up endpoint and ID: /<image>, /a/<album> or /gallery/<id> kind, _, id = url.path.lstrip('/').rpartition('/') # Strip file extension from direct image links id = id.partition('.')[0] try: if kind == '': nsfw, title = self._format_image(self.client.get_image(id)) elif kind == 'a': nsfw, title = self._format_album(self.client.get_album(id), url.fragment) elif kind == 'gallery': data = self.client.gallery_item(id) if data.is_album: nsfw, title = self._format_album(data, None) else: nsfw, title = self._format_image(data) else: nsfw, title = False, None except ImgurClientError as e: return LinkInfoResult(url, str(e), is_error=True) if title: return LinkInfoResult(url, title, nsfw=nsfw) else: return None @staticmethod def _format_image(data): title = data.title or '' return data.nsfw or 'nsfw' in title.lower(), title @staticmethod def _format_album(data, image_id): title = '{0} ({1})'.format(data.title or 'Untitled album', pluralize(data.images_count, 'image', 'images')) images = {i['id']: i for i in data.images} image = images.get(image_id) if image and image['title']: title += ': ' + image['title'] return data.nsfw or 'nsfw' in title.lower(), title
class SpiffyTitles(callbacks.Plugin): """Displays link titles when posted in a channel""" threaded = True callBefore = ["Web"] link_cache = [] handlers = {} wall_clock_timeout = 8 max_request_retries = 3 imgur_client = None def __init__(self, irc): self.__parent = super(SpiffyTitles, self) self.__parent.__init__(irc) self.wall_clock_timeout = self.registryValue("wallClockTimeoutInSeconds") self.default_handler_enabled = self.registryValue("defaultHandlerEnabled") self.add_handlers() def add_handlers(self): """ Adds all handlers """ self.add_youtube_handlers() self.add_imdb_handlers() self.add_imgur_handlers() self.add_coub_handlers() self.add_vimeo_handlers() self.add_dailymotion_handlers() self.add_wikipedia_handlers() def add_dailymotion_handlers(self): self.handlers["www.dailymotion.com"] = self.handler_dailymotion def add_vimeo_handlers(self): self.handlers["vimeo.com"] = self.handler_vimeo def add_coub_handlers(self): self.handlers["coub.com"] = self.handler_coub def add_wikipedia_handlers(self): self.handlers["en.wikipedia.org"] = self.handler_wikipedia def handler_dailymotion(self, url, info, channel): """ Handles dailymotion links """ dailymotion_handler_enabled = self.registryValue("dailymotionHandlerEnabled", channel=channel) log.debug("SpiffyTitles: calling dailymotion handler for %s" % url) title = None video_id = None """ Get video ID """ if dailymotion_handler_enabled and "/video/" in info.path: video_id = info.path.lstrip("/video/").split("_")[0] if video_id is not None: api_url = "https://api.dailymotion.com/video/%s?fields=id,title,owner.screenname,duration,views_total" % video_id log.debug("SpiffyTitles: looking up dailymotion info: %s", api_url) agent = self.get_user_agent() headers = { "User-Agent": agent } request = requests.get(api_url, headers=headers) ok = request.status_code == requests.codes.ok if ok: response = json.loads(request.text) if response is not None and "title" in response: video = response dailymotion_template = Template(self.registryValue("dailymotionVideoTitleTemplate", channel=channel)) video["views_total"] = "{:,}".format(int(video["views_total"])) video["duration"] = self.get_duration_from_seconds(video["duration"]) video["ownerscreenname"] = video["owner.screenname"] title = dailymotion_template.render(video) else: log.debug("SpiffyTitles: received unexpected payload from video: %s" % api_url) else: log.error("SpiffyTitles: dailymotion handler returned %s: %s" % (request.status_code, request.text[:200])) if title is None: log.debug("SpiffyTitles: could not get dailymotion info for %s" % url) return self.handler_default(url, channel) else: return title def handler_vimeo(self, url, domain, channel): """ Handles Vimeo links """ vimeo_handler_enabled = self.registryValue("vimeoHandlerEnabled", channel=channel) log.debug("SpiffyTitles: calling vimeo handler for %s" % url) title = None video_id = None """ Get video ID """ if vimeo_handler_enabled: result = re.search(r'^(http(s)://)?(www\.)?(vimeo\.com/)?(\d+)', url) if result is not None: video_id = result.group(5) if video_id is not None: api_url = "https://vimeo.com/api/v2/video/%s.json" % video_id log.debug("SpiffyTitles: looking up vimeo info: %s", api_url) agent = self.get_user_agent() headers = { "User-Agent": agent } request = requests.get(api_url, headers=headers) ok = request.status_code == requests.codes.ok if ok: response = json.loads(request.text) if response is not None and "title" in response[0]: video = response[0] vimeo_template = Template(self.registryValue("vimeoTitleTemplate", channel=channel)) """ Some videos do not have this information available """ if "stats_number_of_plays" in video: video["stats_number_of_plays"] = "{:,}".format(int(video["stats_number_of_plays"])) else: video["stats_number_of_plays"] = 0 if "stats_number_of_comments" in video: video["stats_number_of_comments"] = "{:,}".format(int(video["stats_number_of_comments"])) else: video["stats_number_of_comments"] = 0 video["duration"] = self.get_duration_from_seconds(video["duration"]) title = vimeo_template.render(video) else: log.debug("SpiffyTitles: received unexpected payload from video: %s" % api_url) else: log.error("SpiffyTitles: vimeo handler returned %s: %s" % (request.status_code, request.text[:200])) if title is None: log.debug("SpiffyTitles: could not get vimeo info for %s" % url) return self.handler_default(url, channel) else: return title def handler_coub(self, url, domain, channel): """ Handles coub.com links """ coub_handler_enabled = self.registryValue("coubHandlerEnabled", channel=channel) log.debug("SpiffyTitles: calling coub handler for %s" % url) title = None """ Get video ID """ if coub_handler_enabled and "/view/" in url: video_id = url.split("/view/")[1] """ Remove any query strings """ if "?" in video_id: video_id = video_id.split("?")[0] api_url = "http://coub.com/api/v2/coubs/%s" % video_id agent = self.get_user_agent() headers = { "User-Agent": agent } request = requests.get(api_url, headers=headers) ok = request.status_code == requests.codes.ok if ok: response = json.loads(request.text) if response: video = response coub_template = Template(self.registryValue("coubTemplate")) video["likes_count"] = "{:,}".format(int(video["likes_count"])) video["recoubs_count"] = "{:,}".format(int(video["recoubs_count"])) video["views_count"] = "{:,}".format(int(video["views_count"])) title = coub_template.render(video) else: log.error("SpiffyTitles: coub handler returned %s: %s" % (request.status_code, request.text[:200])) if title is None: if coub_handler_enabled: log.debug("SpiffyTitles: %s does not appear to be a video link!" % url) return self.handler_default(url, channel) else: return title def add_imgur_handlers(self): # Images mostly self.handlers["i.imgur.com"] = self.handler_imgur_image # Albums, galleries, etc self.handlers["imgur.com"] = self.handler_imgur def initialize_imgur_client(self, channel): """ Check if imgur client id or secret are set, and if so initialize imgur API client """ if self.imgur_client is None: imgur_client_id = self.registryValue("imgurClientID") imgur_client_secret = self.registryValue("imgurClientSecret") imgur_handler_enabled = self.registryValue("imgurHandlerEnabled", channel=channel) if imgur_handler_enabled and imgur_client_id and imgur_client_secret: log.debug("SpiffyTitles: enabling imgur handler") # Initialize API client try: from imgurpython import ImgurClient from imgurpython.helpers.error import ImgurClientError try: self.imgur_client = ImgurClient(imgur_client_id, imgur_client_secret) except ImgurClientError as e: log.error("SpiffyTitles: imgur client error: %s" % (e.error_message)) except ImportError as e: log.error("SpiffyTitles ImportError: %s" % str(e)) else: log.debug("SpiffyTitles: imgur handler disabled or empty client id/secret") def doPrivmsg(self, irc, msg): """ Observe each channel message and look for links """ channel = msg.args[0] ignore_actions = self.registryValue("ignoreActionLinks", channel=msg.args[0]) is_channel = irc.isChannel(channel) is_ctcp = ircmsgs.isCtcp(msg) message = msg.args[1] title = None bot_nick = irc.nick origin_nick = msg.nick is_message_from_self = origin_nick.lower() == bot_nick.lower() requires_capability = len(str(self.registryValue("requireCapability", channel=msg.args[0]))) > 0 if is_message_from_self: return """ Check if we require a capability to acknowledge this link """ if requires_capability: user_has_capability = self.user_has_capability(msg) if not user_has_capability: return """ Configuration option determines whether we should ignore links that appear within an action """ if is_ctcp and ignore_actions: return if is_channel: channel_is_allowed = self.is_channel_allowed(channel) url = self.get_url_from_message(message) ignore_match = self.message_matches_ignore_pattern(message) if ignore_match: log.debug("SpiffyTitles: ignoring message due to linkMessagePattern match") return if url: # Check if channel is allowed based on white/black list restrictions if not channel_is_allowed: log.debug("SpiffyTitles: not responding to link in %s due to black/white list restrictions" % (channel)) return info = urlparse(url) domain = info.netloc is_ignored = self.is_ignored_domain(domain) if is_ignored: log.debug("SpiffyTitles: URL ignored due to domain blacklist match: %s" % url) return is_whitelisted_domain = self.is_whitelisted_domain(domain) if self.registryValue("whitelistDomainPattern") and not is_whitelisted_domain: log.debug("SpiffyTitles: URL ignored due to domain whitelist mismatch: %s" % url) return title = self.get_title_by_url(url, channel) if title is not None and title: ignore_match = self.title_matches_ignore_pattern(title, channel) if ignore_match: return else: irc.sendMsg(ircmsgs.privmsg(channel, title)) else: if self.default_handler_enabled: log.debug("SpiffyTitles: could not get a title for %s" % (url)) else: log.debug("SpiffyTitles: could not get a title for %s but default handler is disabled" % (url)) def get_title_by_url(self, url, channel): """ Retrieves the title of a website based on the URL provided """ info = urlparse(url) domain = info.netloc title = None """ Check if we have this link cached according to the cache lifetime. If so, serve link from the cache instead of calling handlers. """ cached_link = self.get_link_from_cache(url) if cached_link is not None: title = cached_link["title"] else: if domain in self.handlers: handler = self.handlers[domain] title = handler(url, info, channel) else: if self.default_handler_enabled: title = self.handler_default(url, channel) if title is not None: title = self.get_formatted_title(title, channel) # Update link cache log.debug("SpiffyTitles: caching %s" % (url)) now = datetime.datetime.now() self.link_cache.append({ "url": url, "timestamp": now, "title": title }) return title def t(self, irc, msg, args, query): """ Retrieves title for a URL on demand """ message = msg.args[1] channel = msg.args[0] url = self.get_url_from_message(message) title = None error_message = self.registryValue("onDemandTitleError", channel=channel) try: if url: title = self.get_title_by_url(query, channel) except: pass if title is not None and title: irc.sendMsg(ircmsgs.privmsg(channel, title)) else: irc.sendMsg(ircmsgs.privmsg(channel, error_message)) t = wrap(t, ['text']) def get_link_from_cache(self, url): """ Looks for a URL in the link cache and returns info about if it's not stale according to the configured cache lifetime, or None. If linkCacheLifetimeInSeconds is 0, then cache is disabled and we can immediately return """ cache_lifetime_in_seconds = int(self.registryValue("linkCacheLifetimeInSeconds")) if cache_lifetime_in_seconds == 0: return # No cache yet if len(self.link_cache) == 0: return cached_link = None now = datetime.datetime.now() stale = False seconds = 0 for link in self.link_cache: if link["url"] == url: cached_link = link break # Found link, check timestamp if cached_link is not None: seconds = (now - cached_link["timestamp"]).total_seconds() stale = seconds >= cache_lifetime_in_seconds if stale: log.debug("SpiffyTitles: %s was sent %s seconds ago" % (url, seconds)) else: log.debug("SpiffyTitles: serving link from cache: %s" % (url)) return cached_link def add_imdb_handlers(self): """ Enables meta info about IMDB links through the OMDB API """ self.handlers["www.imdb.com"] = self.handler_imdb self.handlers["imdb.com"] = self.handler_imdb def add_youtube_handlers(self): """ Adds handlers for Youtube videos. The handler is matched based on the domain used in the URL. """ self.handlers["youtube.com"] = self.handler_youtube self.handlers["www.youtube.com"] = self.handler_youtube self.handlers["youtu.be"] = self.handler_youtube self.handlers["m.youtube.com"] = self.handler_youtube def is_channel_allowed(self, channel): """ Checks channel whitelist and blacklist to determine if the current channel is allowed to display titles. """ channel = channel.lower() is_allowed = False white_list = self.filter_empty(self.registryValue("channelWhitelist")) black_list = self.filter_empty(self.registryValue("channelBlacklist")) white_list_empty = len(white_list) == 0 black_list_empty = len(black_list) == 0 # Most basic case, which is that both white and blacklist are empty. Any channel is allowed. if white_list_empty and black_list_empty: is_allowed = True # If there is a white list, blacklist is ignored. if white_list: is_allowed = channel in white_list # Finally, check blacklist if not white_list and black_list: is_allowed = channel not in black_list return is_allowed def filter_empty(self, input): """ Remove all empty strings from a list """ return set([channel for channel in input if len(channel.strip())]) def is_ignored_domain(self, domain): """ Checks domain against a regular expression """ pattern = self.registryValue("ignoredDomainPattern") if pattern: log.debug("SpiffyTitles: matching %s against %s" % (domain, str(pattern))) try: pattern_search_result = re.search(pattern, domain) if pattern_search_result is not None: match = pattern_search_result.group() return match except re.Error: log.error("SpiffyTitles: invalid regular expression: %s" % (pattern)) def is_whitelisted_domain(self, domain): """ Checks domain against a regular expression """ pattern = self.registryValue("whitelistDomainPattern") if pattern: log.debug("SpiffyTitles: matching %s against %s" % (domain, str(pattern))) try: pattern_search_result = re.search(pattern, domain) if pattern_search_result is not None: match = pattern_search_result.group() return match except re.Error: log.error("SpiffyTitles: invalid regular expression: %s" % (pattern)) def get_video_id_from_url(self, url, info): """ Get YouTube video ID from URL """ try: path = info.path domain = info.netloc video_id = "" if domain == "youtu.be": video_id = path.split("/")[1] else: parsed = cgi.parse_qsl(info.query) params = dict(parsed) if "v" in params: video_id = params["v"] if video_id: return video_id else: log.error("SpiffyTitles: error getting video id from %s" % (url)) except IndexError as e: log.error("SpiffyTitles: error getting video id from %s (%s)" % (url, str(e))) def handler_youtube(self, url, domain, channel): """ Uses the Youtube API to provide additional meta data about Youtube Video links posted. """ youtube_handler_enabled = self.registryValue("youtubeHandlerEnabled", channel=channel) developer_key = self.registryValue("youtubeDeveloperKey") if not youtube_handler_enabled: return None if not developer_key: log.info("SpiffyTitles: no Youtube developer key set! Check the documentation for instructions.") return None log.debug("SpiffyTitles: calling Youtube handler for %s" % (url)) video_id = self.get_video_id_from_url(url, domain) yt_template = Template(self.registryValue("youtubeTitleTemplate", channel=channel)) title = "" if video_id: options = { "part": "snippet,statistics,contentDetails", "maxResults": 1, "key": developer_key, "id": video_id } encoded_options = urlencode(options) api_url = "https://www.googleapis.com/youtube/v3/videos?%s" % (encoded_options) agent = self.get_user_agent() headers = { "User-Agent": agent } log.debug("SpiffyTitles: requesting %s" % (api_url)) request = requests.get(api_url, headers=headers) ok = request.status_code == requests.codes.ok if ok: response = json.loads(request.text) if response: try: if response["pageInfo"]["totalResults"] > 0: items = response["items"] video = items[0] snippet = video["snippet"] title = snippet["title"] statistics = video["statistics"] view_count = 0 like_count = 0 dislike_count = 0 comment_count = 0 favorite_count = 0 if "viewCount" in statistics: view_count = "{:,}".format(int(statistics["viewCount"])) if "likeCount" in statistics: like_count = "{:,}".format(int(statistics["likeCount"])) if "dislikeCount" in statistics: dislike_count = "{:,}".format(int(statistics["dislikeCount"])) if "favoriteCount" in statistics: favorite_count = "{:,}".format(int(statistics["favoriteCount"])) if "commentCount" in statistics: comment_count = "{:,}".format(int(statistics["commentCount"])) channel_title = snippet["channelTitle"] duration_seconds = self.get_total_seconds_from_duration(video["contentDetails"]["duration"]) """ #23 - If duration is zero, then it"s a LIVE video """ if duration_seconds > 0: duration = self.get_duration_from_seconds(duration_seconds) else: duration = "LIVE" timestamp = self.get_timestamp_from_youtube_url(url) yt_logo = self.get_youtube_logo() compiled_template = yt_template.render({ "title": title, "duration": duration, "timestamp": timestamp, "view_count": view_count, "like_count": like_count, "dislike_count": dislike_count, "comment_count": comment_count, "favorite_count": favorite_count, "channel_title": channel_title, "yt_logo": yt_logo }) title = compiled_template else: log.debug("SpiffyTitles: video appears to be private; no results!") except IndexError as e: log.error("SpiffyTitles: IndexError parsing Youtube API JSON response: %s" % (str(e))) else: log.error("SpiffyTitles: Error parsing Youtube API JSON response") else: log.error("SpiffyTitles: Youtube API HTTP %s: %s" % (request.status_code, request.text)) # If we found a title, return that. otherwise, use default handler if title: return title else: log.debug("SpiffyTitles: falling back to default handler") return self.handler_default(url, channel) def get_duration_from_seconds(self, duration_seconds): m, s = divmod(duration_seconds, 60) h, m = divmod(m, 60) duration = "%02d:%02d" % (m, s) """ Only include hour if the video is at least 1 hour long """ if h > 0: duration = "%02d:%s" % (h, duration) return duration def get_youtube_logo(self): colored_letters = [ "%s" % ircutils.mircColor("You", fg="red", bg="white"), "%s" % ircutils.mircColor("Tube", fg="white", bg="red") ] yt_logo = "".join(colored_letters) return yt_logo def get_total_seconds_from_duration(self, input): """ Duration comes in a format like this: PT4M41S which translates to 4 minutes and 41 seconds. This method returns the total seconds so that the duration can be parsed as usual. """ pattern = regex = re.compile(""" (?P<sign> -?) P (?:(?P<years> \d+) Y)? (?:(?P<months> \d+) M)? (?:(?P<days> \d+) D)? (?: T (?:(?P<hours> \d+) H)? (?:(?P<minutes>\d+) M)? (?:(?P<seconds>\d+) S)? )? """, re.VERBOSE) duration = regex.match(input).groupdict(0) delta = timedelta(hours=int(duration['hours']), minutes=int(duration['minutes']), seconds=int(duration['seconds'])) return delta.total_seconds() def get_timestamp_from_youtube_url(self, url): """ Get YouTube timestamp """ pattern = r"[?&]t=([^&]+)" match = re.search(pattern, url) if match: timestamp = match.group(1).upper() try: seconds = float(timestamp) except ValueError: seconds = self.get_total_seconds_from_duration("PT" + timestamp) if seconds > 0: return self.get_duration_from_seconds(seconds) else: return "" def handler_default(self, url, channel): """ Default handler for websites """ default_handler_enabled = self.registryValue("defaultHandlerEnabled", channel=channel) if default_handler_enabled: log.debug("SpiffyTitles: calling default handler for %s" % (url)) default_template = Template(self.registryValue("defaultTitleTemplate", channel=channel)) html = self.get_source_by_url(url) if html is not None and html: title = self.get_title_from_html(html) if title is not None: title_template = default_template.render(title=title) return title_template else: log.debug("SpiffyTitles: default handler fired but doing nothing because disabled") def handler_imdb(self, url, info, channel): """ Handles imdb.com links, querying the OMDB API for additional info Typical IMDB URL: http://www.imdb.com/title/tt2467372/ """ headers = self.get_headers() result = None if not self.registryValue("imdbHandlerEnabled", channel=channel): log.debug("SpiffyTitles: IMDB handler disabled. Falling back to default handler.") return self.handler_default(url, channel) # Don't care about query strings if "?" in url: url = url.split("?")[0] # We can only accommodate a specific format of URL here if "/title/" in url: imdb_id = url.split("/title/")[1].rstrip("/") omdb_url = "http://www.omdbapi.com/?i=%s&plot=short&r=json&tomatoes=true" % (imdb_id) try: request = requests.get(omdb_url, timeout=10, headers=headers) if request.status_code == requests.codes.ok: response = json.loads(request.text) result = None imdb_template = Template(self.registryValue("imdbTemplate")) not_found = "Error" in response unknown_error = response["Response"] != "True" if not_found or unknown_error: log.debug("SpiffyTitles: OMDB error for %s" % (omdb_url)) else: result = imdb_template.render(response) else: log.error("SpiffyTitles OMDB API %s - %s" % (request.status_code, request.text)) except requests.exceptions.Timeout as e: log.error("SpiffyTitles imdb Timeout: %s" % (str(e))) except requests.exceptions.ConnectionError as e: log.error("SpiffyTitles imdb ConnectionError: %s" % (str(e))) except requests.exceptions.HTTPError as e: log.error("SpiffyTitles imdb HTTPError: %s" % (str(e))) if result is not None: return result else: log.debug("SpiffyTitles: IMDB handler failed. calling default handler") return self.handler_default(url, channel) def handler_wikipedia(self, url, domain, channel): """ Queries wikipedia API for article extracts. """ wikipedia_handler_enabled = self.registryValue("wikipedia.enabled", channel=channel) if not wikipedia_handler_enabled: return self.handler_default(url, channel) self.log.debug("SpiffyTitles: calling Wikipedia handler for %s" % (url)) pattern = r"/(?:w(?:iki))/(?P<page>[^/]+)$" info = urlparse(url) match = re.search(pattern, info.path) if not match: self.log.debug("SpiffyTitles: no title found.") return self.handler_default(url, channel) elif info.fragment and self.registryValue("wikipedia.ignoreSectionLinks", channel=channel): self.log.debug("SpiffyTitles: ignoring section link.") return self.handler_default(url, channel) else: page_title = match.groupdict()['page'] default_api_params = { "format": "json", "action": "query", "prop": "extracts", "exsentences": "2", "exlimit": "1", "exintro": "", "explaintext": "" } extra_params = dict(parse_qsl('&'.join(self.registryValue("wikipedia.apiParams", channel=channel)))) title_param = { self.registryValue("wikipedia.titleParam", channel=channel): page_title } # merge dicts api_params = default_api_params.copy() api_params.update(extra_params) api_params.update(title_param) api_url = "https://en.wikipedia.org/w/api.php?%s" % ('&'.join("%s=%s" % (key, val) for (key,val) in api_params.iteritems())) agent = self.get_user_agent() headers = { "User-Agent": agent } extract = "" self.log.debug("SpiffyTitles: requesting %s" % (api_url)) request = requests.get(api_url, headers=headers) ok = request.status_code == requests.codes.ok if ok: response = json.loads(request.text) if response: try: extract = response['query']['pages'].values()[0]['extract'] except KeyError as e: self.log.error("SpiffyTitles: KeyError parsing Wikipedia API JSON response: %s" % (str(e))) else: self.log.error("SpiffyTitles: Error parsing Wikipedia API JSON response") else: self.log.error("SpiffyTitles: Wikipedia API HTTP %s: %s" % (request.status_code, request.text)) if extract: if (self.registryValue("wikipedia.removeParentheses")): extract = re.sub(r' ?\([^)]*\)', '', extract) max_chars = self.registryValue("wikipedia.maxChars", channel=channel) if len(extract) > max_chars: extract = extract[:max_chars - 3].rsplit(' ', 1)[0].rstrip(',.') + '...' wikipedia_template = Template(self.registryValue("wikipedia.extractTemplate", channel=channel)) return wikipedia_template.render({"extract": extract}) else: self.log.debug("SpiffyTitles: falling back to default handler") return self.handler_default(url, channel) def is_valid_imgur_id(self, input): """ Tests if input matches the typical imgur id, which seems to be alphanumeric. Images, galleries, and albums all share their format in their identifier. """ match = re.match(r"[a-z0-9]+", input, re.IGNORECASE) return match is not None def handler_imgur(self, url, info, channel): """ Queries imgur API for additional information about imgur links. This handler is for any imgur.com domain. """ self.initialize_imgur_client(channel) is_album = info.path.startswith("/a/") is_gallery = info.path.startswith("/gallery/") is_image_page = not is_album and not is_gallery and re.match(r"^\/[a-zA-Z0-9]+", info.path) result = None if is_album: result = self.handler_imgur_album(url, info, channel) #elif is_image_page: # result = self.handler_imgur_image(url, info) else: result = self.handler_default(url, channel) return result def handler_imgur_album(self, url, info, channel): """ Handles retrieving information about albums from the imgur API. imgur provides the following information about albums: https://api.imgur.com/models/album """ from imgurpython.helpers.error import ImgurClientRateLimitError from imgurpython.helpers.error import ImgurClientError self.initialize_imgur_client(channel) if self.imgur_client: album_id = info.path.split("/a/")[1] """ If there is a query string appended, remove it """ if "?" in album_id: album_id = album_id.split("?")[0] if self.is_valid_imgur_id(album_id): log.debug("SpiffyTitles: found imgur album id %s" % (album_id)) try: album = self.imgur_client.get_album(album_id) if album: imgur_album_template = Template(self.registryValue("imgurAlbumTemplate", channel=channel)) compiled_template = imgur_album_template.render({ "title": album.title, "section": album.section, "view_count": "{:,}".format(album.views), "image_count": "{:,}".format(album.images_count), "nsfw": album.nsfw }) return compiled_template else: log.error("SpiffyTitles: imgur album API returned unexpected results!") except ImgurClientRateLimitError as e: log.error("SpiffyTitles: imgur rate limit error: %s" % (e.error_message)) except ImgurClientError as e: log.error("SpiffyTitles: imgur client error: %s" % (e.error_message)) else: log.debug("SpiffyTitles: unable to determine album id for %s" % (url)) else: return self.handler_default(url, channel) def handler_imgur_image(self, url, info, channel): """ Handles retrieving information about images from the imgur API. Used for both direct images and imgur.com/some_image_id_here type links, as they're both single images. """ self.initialize_imgur_client(channel) from imgurpython.helpers.error import ImgurClientRateLimitError from imgurpython.helpers.error import ImgurClientError title = None if self.imgur_client: """ If there is a period in the path, it's a direct link to an image. If not, then it's a imgur.com/some_image_id_here type link """ if "." in info.path: path = info.path.lstrip("/") image_id = path.split(".")[0] else: image_id = info.path.lstrip("/") if self.is_valid_imgur_id(image_id): log.debug("SpiffyTitles: found image id %s" % (image_id)) try: image = self.imgur_client.get_image(image_id) if image: imgur_template = Template(self.registryValue("imgurTemplate", channel=channel)) readable_file_size = self.get_readable_file_size(image.size) compiled_template = imgur_template.render({ "title": image.title, "type": image.type, "nsfw": image.nsfw, "width": image.width, "height": image.height, "view_count": "{:,}".format(image.views), "file_size": readable_file_size, "section": image.section }) title = compiled_template else: log.error("SpiffyTitles: imgur API returned unexpected results!") except ImgurClientRateLimitError as e: log.error("SpiffyTitles: imgur rate limit error: %s" % (e.error_message)) except ImgurClientError as e: log.error("SpiffyTitles: imgur client error: %s" % (e.error_message)) else: log.error("SpiffyTitles: error retrieving image id for %s" % (url)) if title is not None: return title else: return self.handler_default(url, channel) def get_readable_file_size(self, num, suffix="B"): """ Returns human readable file size """ for unit in ["","Ki","Mi","Gi","Ti","Pi","Ei","Zi"]: if abs(num) < 1024.0: return "%3.1f%s%s" % (num, unit, suffix) num /= 1024.0 return "%.1f%s%s" % (num, "Yi", suffix) def get_formatted_title(self, title, channel): """ Remove cruft from title and apply bold if applicable """ useBold = self.registryValue("useBold", channel=channel) # Replace anywhere in string title = title.replace("\n", " ") title = title.replace("\t", " ") title = re.sub(" +", " ", title) if useBold: title = ircutils.bold(title) title = title.strip() return title def get_title_from_html(self, html): """ Retrieves value of <title> tag from HTML """ soup = BeautifulSoup(html, "lxml") if soup is not None: """ Some websites have more than one title tag, so get all of them and take the last value. """ head = soup.find("head") titles = head.find_all("title") if titles is not None and len(titles): title_text = titles[-1].get_text() if len(title_text): stripped_title = title_text.strip() return stripped_title @timeout_decorator.timeout(wall_clock_timeout) def get_source_by_url(self, url, retries=1): """ Get the HTML of a website based on a URL """ max_retries = self.registryValue("maxRetries") if retries is None: retries = 1 if retries >= max_retries: log.debug("SpiffyTitles: hit maximum retries for %s" % url) return None log.debug("SpiffyTitles: attempt #%s for %s" % (retries, url)) try: headers = self.get_headers() log.debug("SpiffyTitles: requesting %s" % (url)) request = requests.get(url, headers=headers, timeout=10, allow_redirects=True) if request.status_code == requests.codes.ok: # Check the content type which comes in the format: "text/html; charset=UTF-8" content_type = request.headers.get("content-type").split(";")[0].strip() acceptable_types = self.registryValue("mimeTypes") log.debug("SpiffyTitles: content type %s" % (content_type)) if content_type in acceptable_types: text = request.content if text: return text else: log.debug("SpiffyTitles: empty content from %s" % (url)) else: log.debug("SpiffyTitles: unacceptable mime type %s for url %s" % (content_type, url)) else: log.error("SpiffyTitles HTTP response code %s - %s" % (request.status_code, request.content)) except timeout_decorator.TimeoutError: log.error("SpiffyTitles: wall timeout!") self.get_source_by_url(url, retries+1) except requests.exceptions.MissingSchema as e: urlWithSchema = "http://%s" % (url) log.error("SpiffyTitles missing schema. Retrying with %s" % (urlWithSchema)) return self.get_source_by_url(urlWithSchema) except requests.exceptions.Timeout as e: log.error("SpiffyTitles Timeout: %s" % (str(e))) self.get_source_by_url(url, retries+1) except requests.exceptions.ConnectionError as e: log.error("SpiffyTitles ConnectionError: %s" % (str(e))) self.get_source_by_url(url, retries+1) except requests.exceptions.HTTPError as e: log.error("SpiffyTitles HTTPError: %s" % (str(e))) except requests.exceptions.InvalidURL as e: log.error("SpiffyTitles InvalidURL: %s" % (str(e))) def get_headers(self): agent = self.get_user_agent() self.accept_language = self.registryValue("language") headers = { "User-Agent": agent, "Accept-Language": ";".join((self.accept_language, "q=1.0")) } return headers def get_user_agent(self): """ Returns a random user agent from the ones available """ agents = self.registryValue("userAgents") return random.choice(agents) def message_matches_ignore_pattern(self, input): """ Checks message against linkMessageIgnorePattern to determine whether the message should be ignored. """ match = False pattern = self.registryValue("linkMessageIgnorePattern") if pattern: match = re.search(pattern, input) return match def title_matches_ignore_pattern(self, input, channel): """ Checks message against ignoredTitlePattern to determine whether the title should be ignored. """ match = False pattern = self.registryValue("ignoredTitlePattern", channel=channel) if pattern: match = re.search(pattern, input) if match: log.debug("SpiffyTitles: title %s matches ignoredTitlePattern for %s" % (input, channel)) return match def get_url_from_message(self, input): """ Find the first string that looks like a URL from the message """ url_re = self.registryValue("urlRegularExpression") match = re.search(url_re, input) if match: raw_url = match.group(0).strip() url = self.remove_control_characters(unicode(raw_url)) return url def remove_control_characters(self, s): return "".join(ch for ch in s if unicodedata.category(ch)[0]!="C") def user_has_capability(self, msg): channel = msg.args[0] mask = msg.prefix required_capability = self.registryValue("requireCapability") cap = ircdb.makeChannelCapability(channel, required_capability) has_cap = ircdb.checkCapability(mask, cap, ignoreDefaultAllow=True) if has_cap: log.debug("SpiffyTitles: %s has required capability '%s'" % (mask, required_capability)) else: log.debug("SpiffyTitles: %s does NOT have required capability '%s'" % (mask, required_capability)) return has_cap
def downloader(self): """ Main download method. Gets index of saved posts from reddit using PRAW, then checks them against the posts already saved in the database. Posts will be downloaded and saved according to type of post (selfpost, image, image album, webm, article.) :return: None """ self.set_output_thread_condition(1) self.stop_request.clear() warnings.warn( "Suppressed Resource warning", ResourceWarning) # suppresses sll unclosed socket warnings. logger = self.logger logger.info("\n###########\nStarting SR\n###########") logger.debug("Getting settings from db") get_comments = self.settings_dict['save_comments'].value number_of_comments = self.settings_dict['number_of_comments'].value path = "static/SRDownloads" if not os.path.exists(path): os.makedirs(path) # Authenticate with Reddit logger.info('Authenticating with Reddit') client_id = '_Nxh9h0Tys5KCQ' redirect_uri = 'http://127.0.0.1:5000/authorize_callback' refresh_token = self.settings_dict['reddit_refresh_token'].value user_agent = "SavedRetriever 0.9 by /u/fuzzycut" try: r = praw.Reddit(user_agent) r.set_oauth_app_info(client_id, '', redirect_uri) access_information = r.refresh_access_information(refresh_token) r.set_access_credentials(**access_information) logger.info("Authenticated") except Exception as e: logger.error(e) self.set_output_thread_condition(2) raise SystemExit time_since_accesstoken = time.time() index = set() try: # Create index of unique post codes for post in models.Post.query.all(): index.add(post.code) except OSError: logger.error("Unable to create index") raise SystemExit logger.info("Beginning to save files to db...") items = r.get_me().get_saved(limit=None) self.post_downloaded_count = 0 # Convert saved post generator to a list in order to iterate backwards, so that the most recent saved post # is the most recently downloaded for i in list(items)[::-1]: if self.stop_request.is_set(): logger.info('Cancelling download...') break if (time.time() - time_since_accesstoken ) / 60 > 55: # Refresh the access token before it runs out. logger.debug('Refreshing Reddit token') r.refresh_access_information( access_information['refresh_token']) time_since_accesstoken = time.time() name = i.name if name not in index: # file has not been downloaded permalink = i.permalink title = i.link_title if hasattr(i, 'link_title') else i.title date = datetime.datetime.fromtimestamp(i.created) post = None author = str(i.author) user = models.Author.query.filter_by(username=author) logger.info('Getting post ' + name + ' - ' + title[:255]) if user.count() == 0: # user is not in db user = models.Author(username=author) self.db.session.add(user) self.db.session.commit() else: user = user.first() comments = self._get_comments( i, number_of_comments, r) if get_comments == 'True' else "{}" # ========== # # IS COMMENT # # ========== # if hasattr(i, 'body_html'): logger.debug("{} is comment".format(name)) body = i.body_html # html output body = self.subreddit_linker(body) summary = body[:600] summary = bleach.clean(summary, tags=self.allowed_tags, attributes=self.allowed_attrs, strip=True) post = models.Post(permalink=permalink, title=title, body_content=body, date_posted=date, author_id=user.id, code=name, type='text', summary=summary, comments=comments) # ============ # # IS SELF-POST # # ============ # elif hasattr(i, 'is_self') and i.is_self is True: logger.debug('{} is self-post'.format(name)) text = i.selftext_html if i.selftext_html is not None else "" # html output text = self.subreddit_linker(text) summary = text[:600] summary = bleach.clean(summary, tags=self.allowed_tags, attributes=self.allowed_attrs, strip=True) post = models.Post(permalink=permalink, title=title, body_content=text, date_posted=date, author_id=user.id, code=name, type='text', summary=summary, comments=comments) # ====================== # # IS DIRECT LINKED IMAGE # # ====================== # elif (hasattr(i, 'url') and (self._get_image_url_type( i.url) in ['jpg', 'png', 'gif', 'gifv', 'pdf']) or "reddituploads" in i.url): logger.debug('{} is direct linked image'.format(name)) url = i.url base_filename = "{}_image.{}".format( name, self._get_image_url_type(url)) filename = path + "/" + base_filename filetype = 'image' if url[-4:] == "gifv": url = url.replace('gifv', 'mp4') filename = filename.replace('gifv', 'mp4') base_filename = base_filename.replace('gifv', 'mp4') base_filename = base_filename.replace( '_image', '_video') filetype = 'video' # image downloader section if os.path.exists(filename) and ( os.path.getsize(filename) > 0): # If image exists and is valid image_downloaded = True logger.info( "Image already exists - {}".format(base_filename)) else: image_downloaded = self.image_saver(url, filename) if image_downloaded: logger.info( 'Downloaded image - {}'.format(base_filename)) self._add_image_to_db(base_filename, filename) if filename.split('.')[-1] == 'pdf': img = '<a href="static/SRDownloads/{}">Click here for link to downloaded pdf</a>'.format( base_filename) elif filename.split('.')[-1] == 'mp4': img = '<video class="sr-image img-responsive" id="share-video" autoplay="" muted=""' \ ' loop=""><source id="mp4Source" src="/img/{}" type=' \ '"video/mp4">Sorry,' \ ' your browser doesn\'t support HTML5 video. </video>'.format(base_filename) else: img = '<a href="/img/{0}"><img class="sr-image img-responsive" src="/img/{0}">' \ '</a>'.format(base_filename) else: img = "Image failed to download - It may be temporarily or permanently unavailable" img_json = [{ "name": "", "filename": base_filename, "description": "" }] img_json = json.dumps(img_json) post = models.Post(permalink=permalink, title=title, body_content=img_json, date_posted=date, author_id=user.id, code=name, type=filetype, summary=img, comments=comments) # =============== # # IS GFYCAT IMAGE # # =============== # elif hasattr(i, 'url') and 'gfycat.com' in i.url: json_url = 'https://gfycat.com/cajax/get/' gfy_id = i.url.split('/')[-1] url = json_url + gfy_id data = None try: with urllib.request.urlopen(url) as response: data = response.read().decode('utf-8') except urllib.error.HTTPError: logger.warn("Unable to open gfycat url" + url) json_data = json.loads(data) base_filename = "{}_video.{}".format( name, 'mp4') # filename for image. regex same as above. filename = path + "/" + base_filename if os.path.exists(filename) and ( os.path.getsize(filename) > 0): # If image exists and is valid image_downloaded = True logger.info( "Image already exists - {}".format(base_filename)) else: image_downloaded = self.image_saver( json_data['gfyItem']['mp4Url'], filename) if image_downloaded: logger.info( 'Downloaded video - {}'.format(base_filename)) self._add_image_to_db(base_filename, filename) img = '<video class="sr-image img-responsive" id="share-video" autoplay="" muted="" loop="">' \ '<source id="mp4Source" src="/img/{}" type="video/mp4">Sorry, your browser doesn\'t support ' \ 'HTML5 video. </video>'.format(base_filename) else: img = "Image failed to download - It may be temporarily or permanently unavailable" img_json = [{ "name": "", "filename": base_filename, "description": "" }] img_json = json.dumps(img_json) post = models.Post(permalink=permalink, title=title, body_content=img_json, date_posted=date, author_id=user.id, code=name, type='video', summary=img, comments=comments) # ============== # # IS IMGUR ALBUM # # ============== # elif hasattr( i, 'url' ) and 'imgur' in i.url: # Add option to download images to folder. logger.debug('{} is Imgur album'.format(name)) url = i.url # body = "<h2>{}</h2>".format(title) body = [] summary = '' # imgur api section client = ImgurClient('755357eb4cd70bd', None) pattern = '\/([A-z0-9]{5,7})' # matches any 5-7 long word that comes after a forward slash (/). match = re.findall(pattern, url) gallery_id = match[-1].replace( '/', '') # removes any forward slashes for processing gallery = [] filename = None try: gallery = client.get_album_images(gallery_id) except imgurpython.helpers.error.ImgurClientError: # if 'gallery' is actually just a lone image try: gallery = [client.get_image(gallery_id)] except imgurpython.helpers.error.ImgurClientError as error: # if gallery does not exist. if error.status_code != 404: logger.error("**{} - {}**".format( error.status_code, error.error_message)) else: logger.error(error) img_path = path first_image = True for image in gallery: # add if gallery > 10, then just add a link (would be too large for the note) image_name = image.title if image.title is not None else "" # image_description = image.description if image.description is not None else "" if image.description != title and image.description is not None: image_description = image.description else: image_description = "" image_filetype = image.type.split('/')[1] image_id = image.id image_link = image.link # sets up downloaded filename and html for embedding image base_filename = "{}_image.{}".format( image_id, image_filetype) img_json = [{ "name": image_name, "filename": base_filename, "description": image_description }] filename = img_path + "/" + base_filename # only download if file doesn't already exist if os.path.exists(filename) and ( os.path.getsize(filename) > 0): image_downloaded = True logger.info('Image already exists - {}'.format( base_filename)) else: image_downloaded = self.image_saver( image_link, filename) if image_downloaded: logger.info( 'Image downloaded - {}'.format(base_filename)) self._add_image_to_db(base_filename, filename) if first_image: summary = '<a href="/img/{0}"><img src="/img/{0}"' \ ' class="sr-image img-responsive"></a>'.format(base_filename) first_image = False body += img_json post = models.Post(permalink=permalink, title=title + " - Album", body_content=json.dumps(body), date_posted=date, author_id=user.id, code=name, type='album', summary=summary, comments=comments) # ========== # # IS ARTICLE # # ========== # elif hasattr(i, 'title') and i.is_self is False: logger.debug('{} is article/webpage'.format(name)) url = i.url html = None try: # Set header to trick some sites into letting the script pull the article header = { 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) ' 'Gecko/2009021910 Firefox/3.0.7' } request = urllib.request.Request(url, headers=header) with urllib.request.urlopen(request) as response: html = response.read() except urllib.error.HTTPError as e: self.logger.error( "Unable to access article url\n %s\n %s\n %s", e, url, i.name) continue except urllib.error.URLError as e: self.logger.error( "Unable to access article url\n %s\n %s\n %s", e, url, i.name) continue article = Document(html) article_text = article.summary() article_text = bleach.clean(article_text, tags=self.allowed_tags, attributes=self.allowed_attrs, strip=True) summary = article_text[:600] summary = bleach.clean(summary, tags=self.allowed_tags, attributes=self.allowed_attrs, strip=True) article_text = self._make_article_img_responsive( article_text) article_text = '<a href="{}">Original article</a>'.format( url) + article_text if article_text is None: # if unable to parse document, manually set an error message article_text = 'Unable to parse page - See <a href="{}">here</a> for the original link'.format( url) # article = "<a href='{}'>{}</a><br/>{}<br/>".format(url, title, article) # source of article post = models.Post(permalink=permalink, title=title, body_content=article_text, date_posted=date, author_id=user.id, code=name, type='article', summary=summary, comments=comments) # end of checking for saved items # try: self.db.session.add(post) self.db.session.commit() except InterfaceError: self.db.session.rollback() self.logger.error("Error adding post to db - {}".format( post.title)) continue self.post_downloaded_count += 1 logger.info('Saved ' + name + ' - ' + title[:255]) # end of for loop logger.info("All items downloaded") self.set_output_thread_condition(2)
from imgurpython import ImgurClient from credentials import creds from random import randint import urllib import os client_id = creds['client_id'] client_secret = creds['client_secret'] client = ImgurClient(client_id, client_secret) items = client.subreddit_gallery('earthporn', sort='time', window='week', page=0) rand = randint(0, len(items) - 1) image_id = items[rand].id url = client.get_image(image_id).link local_name = 'image.' + url[-3:] urllib.request.urlretrieve(url, local_name) os.system( 'gsettings set org.cinnamon.desktop.background picture-uri "file:///home/john/PythonStuff/backchange/' + local_name + '"')
class LinkedMediaHelper: """ ImgurHelper provides methods to collect data / content from Imgur and Gfycat """ def __init__( self, config: Configuration, imgur_secrets: str = 'imgur.secret', gfycat_secrets: str = 'gfycat.secret', ): self.logger = config.bot.logger self.save_dir = config.media.folder try: imgur_config = self._get_imgur_secrets(imgur_secrets) self.imgur_client = ImgurClient( imgur_config['Imgur']['ClientID'], imgur_config['Imgur']['ClientSecret'], ) gfycat_config = self._get_gfycat_secrets(gfycat_secrets) self.gfycat_client = GfycatClient( gfycat_config['Gfycat']['ClientID'], gfycat_config['Gfycat']['ClientSecret'], ) except ImgurClientError as imgur_error: self.logger.error('Error on creating ImgurClient: %s', imgur_error) self.logger.error(FATAL_TOOTBOT_ERROR) sys.exit(1) except GfycatClientError as gfycat_error: self.logger.error('Error on creating GfycatClient: %s', gfycat_error) self.logger.error(FATAL_TOOTBOT_ERROR) sys.exit(1) def _get_gfycat_secrets(self, gfycat_secrets: str) -> configparser.ConfigParser: """ _get_gfycat_secrets checks if the Gfycat api secrets file exists. - If the file exists, this methods reads the the files and returns the secrets in as a dict. - If the file doesn't exist it asks the user over stdin to supply these values and then saves them into the gfycat_secrets file Arguments: gfycat_secrets (string): file name of secrets file for API credentials Returns: imgur_config (dict): Dictionary containing the client id and client secret needed to login to Gfycat """ if not os.path.exists(gfycat_secrets): self.logger.warning( 'Gfycat API keys not found. (See wiki if you need help).') # Whitespaces are stripped from input: https://stackoverflow.com/a/3739939 gfycat_client_id = ''.join( input("[ .. ] Enter Gfycat client ID: ").split()) gfycat_client_secret = ''.join( input("[ .. ] Enter Gfycat client secret: ").split()) # Make sure authentication is working try: gfycat_client = GfycatClient(gfycat_client_id, gfycat_client_secret) # If this call doesn't work, it'll throw an ImgurClientError gfycat_client.query_gfy('oddyearlyhorsefly') # It worked, so save the keys to a file gfycat_config = configparser.ConfigParser() gfycat_config['Gfycat'] = { 'ClientID': gfycat_client_id, 'ClientSecret': gfycat_client_secret, } with open(gfycat_secrets, 'w') as file: gfycat_config.write(file) file.close() except GfycatClientError as gfycat_error: self.logger.error('Error while logging into Gfycat: %s', gfycat_error) self.logger.error(FATAL_TOOTBOT_ERROR) sys.exit(1) else: # Read API keys from secret file gfycat_config = configparser.ConfigParser() gfycat_config.read(gfycat_secrets) return gfycat_config def _get_imgur_secrets(self, imgur_secrets: str) -> configparser.ConfigParser: """ _get_imgur_secrets checks if the Imgur api secrets file exists. - If the file exists, this methods reads the the files and returns the secrets in as a dict. - If the file doesn't exist it asks the user over stdin to supply these values and then saves them into the imgur_secrets file Arguments: imgur_secrets (string): file name of secrets file for API credentials Returns: imgur_config (dict): Dictionary containing the client id and client secret needed to login to Imgur """ if not os.path.exists(imgur_secrets): self.logger.warning( 'Imgur API keys not found. (See wiki if you need help).') # Whitespaces are stripped from input: https://stackoverflow.com/a/3739939 imgur_client_id = ''.join( input("[ .. ] Enter Imgur client ID: ").split()) imgur_client_secret = ''.join( input("[ .. ] Enter Imgur client secret: ").split()) # Make sure authentication is working try: imgur_client = ImgurClient(imgur_client_id, imgur_client_secret) # If this call doesn't work, it'll throw an ImgurClientError imgur_client.get_album('dqOyj') # It worked, so save the keys to a file imgur_config = configparser.ConfigParser() imgur_config['Imgur'] = { 'ClientID': imgur_client_id, 'ClientSecret': imgur_client_secret, } with open(imgur_secrets, 'w') as file: imgur_config.write(file) file.close() except ImgurClientError as imgur_error: self.logger.error('Error while logging into Imgur: %s', imgur_error) self.logger.error(FATAL_TOOTBOT_ERROR) sys.exit(1) else: # Read API keys from secret file imgur_config = configparser.ConfigParser() imgur_config.read(imgur_secrets) return imgur_config def get_imgur_image(self, img_url: str, max_images: int = 4) -> List[str]: """ get_imgur_image downloads images from imgur. Arguments: img_url: url of imgur image to download max_images: maximum number of images to download and process, Defaults to 4 Returns: file_paths (string): path to downloaded image or None if no image was downloaded """ # Working demo of regex: https://regex101.com/r/G29uGl/2 regex = r"(?:.*)imgur\.com(?:\/gallery\/|\/a\/|\/)(.*?)(?:\/.*|\.|$)" regex_match = re.search(regex, img_url, flags=0) if not regex_match: self.logger.error( 'Could not identify Imgur image/gallery ID at: %s', img_url) return [] # Get the Imgur image/gallery ID imgur_id = regex_match.group(1) image_urls = self._get_image_urls(img_url, imgur_id) # Download and process individual images (up to max_images) imgur_paths = [] for image_url in image_urls: # If the URL is a GIFV or MP4 link, change it to the GIF version file_extension = os.path.splitext(image_url)[-1].lower() if file_extension == '.gifv': file_extension = '.gif' image_url = image_url.replace('.gifv', '.gif') elif file_extension == '.mp4': file_extension = '.gif' image_url = image_url.replace('.mp4', '.gif') # Download the image file_path = self.save_dir + '/' + imgur_id + '_' + str( len(imgur_paths)) + file_extension self.logger.info('Downloading Imgur image at URL %s to %s', image_url, file_path) current_image = save_file(image_url, file_path, self.logger) # Imgur will sometimes return a single-frame thumbnail # instead of a GIF, so we need to check for this if file_extension != '.gif' or self._check_imgur_gif(file_path): imgur_paths.append(current_image) if len(imgur_paths) == max_images: break return imgur_paths def _get_image_urls(self, img_url: str, imgur_id: str) -> List[str]: """ _get_image_urls builds a list of urls of all Imgur images identified by imgur_id Arguments: img_url: URL to IMGUR post imgur_id: ID for IMGUR post Returns: imgur_urls: List of urls to images of Imgur post identified byr imgur_id """ image_urls = [] try: if any(s in img_url for s in ('/a/', '/gallery/')): # Gallery links self.logger.info('Imgur link points to gallery: %s', img_url) images = self.imgur_client.get_album_images(imgur_id) for image in images: image_urls.append(image.link) else: # Single image image_urls = [self.imgur_client.get_image(imgur_id).link] except ImgurClientError as imgur_error: self.logger.error('Could not get information from imgur: %s', imgur_error) return image_urls def _check_imgur_gif(self, file_path: str) -> bool: """ _check_imgur_gif checks if a file downloaded from imgur is indeed a gif. If file is not a gif, remove the file. Arguments: file_path: file name and path to downloaded image Returns: True if downloaded image is indeed a GIF, otherwise returns False """ img = PILImage.open(file_path) mime = PILImage.MIME[img.format] img.close() if mime != 'image/gif': self.logger.warning('Imgur: not a GIF, not posting') try: os.remove(file_path) except OSError as remove_error: self.logger.error('Error while deleting media file: %s', remove_error) return False return True def get_gfycat_image(self, img_url: str) -> Optional[str]: """ get_gfycat_image downloads full resolution images from gfycat. Arguments: img_url (string): url of gfycat image to download Returns: file_path (string): path to downloaded image or None if no image was downloaded """ gfycat_url = "" file_path = self.save_dir + '/' try: gfycat_name = os.path.basename(urlsplit(img_url).path) response = requests.get(img_url) response.raise_for_status() soup = BeautifulSoup(response.text, 'lxml') for tag in soup.find_all("source", src=True): src = tag['src'] if "giant" in src and "mp4" in src: gfycat_url = src file_path += gfycat_name + '.mp4' except (requests.ConnectionError, requests.Timeout, requests.HTTPError, GfycatClientError) as gfycat_error: self.logger.error('Error downloading Gfycat link: %s', gfycat_error) return None if gfycat_url == '': self.logger.debug('Empty Gfycat URL; no attachment to download') return None self.logger.info('Downloading Gfycat at URL %s to %s', gfycat_url, file_path) return save_file(gfycat_url, file_path, self.logger) def get_reddit_image(self, img_url: str) -> str: """ get_reddit_image downloads full resolution images from i.reddit or reddituploads. Arguments: img_url (string): url of imgur image to download Returns: file_path (string): path to downloaded image or None if no image was downloaded """ file_name = os.path.basename(urlsplit(img_url).path) file_extension = os.path.splitext(img_url)[1].lower() # Fix for issue with i.reddituploads.com links not having a # file extension in the URL if not file_extension: file_extension += '.jpg' file_name += '.jpg' img_url += '.jpg' # Download the file file_path = self.save_dir + '/' + file_name self.logger.info( 'Downloading file at URL %s to %s, file type identified as %s', img_url, file_path, file_extension, ) return save_file(img_url, file_path, self.logger) def get_reddit_gallery(self, reddit_post: Submission, max_images: int = 4) -> List[str]: """ get_reddit_gallery downloads up to max_images images from a reddit gallery post and returns a List of file_paths downloaded images Arguments: reddit_post (reddit_post): reddit post / submission object max_images (int): [optional] maximum number of images to download. Default is 4 Returns: file_paths (List[str]) a list of the paths to downloadeed files. If no images have been downloaded, and empty list will be returned. """ file_paths = [] for item in sorted(reddit_post.gallery_data['items'], key=lambda x: x['id']): media_id = item['media_id'] meta = reddit_post.media_metadata[media_id] self.logger.debug('Media Metadata: %s', meta) if 'e' in meta and meta['e'] == 'Image': source = meta['s'] save_path = self.save_dir + '/' + media_id + '.' + meta[ 'm'].split('/')[1] self.logger.info('Gallery file_path, source: %s - %s', save_path, source['u']) self.logger.debug('A[%4dx%04d] %s' % (source['x'], source['y'], source['u'])) file_paths.append( save_file(source['u'], save_path, self.logger)) if len(file_paths) == max_images: break return file_paths def get_reddit_video(self, reddit_post: Submission) -> str: """ get_reddit_video downloads full resolution video from i.reddit or reddituploads. Arguments: reddit_post (reddit_post): reddit post / submission object Returns: file_path (string): path to downloaded video or None if no image was downloaded """ # Get URL for MP4 version of reddit video video_url = reddit_post.media['reddit_video']['fallback_url'] file_path = self.save_dir + '/' + reddit_post.id + '.mp4' self.logger.info('Downloading Reddit video at URL %s to %s', video_url, file_path) return save_file(video_url, file_path, self.logger) def get_giphy_image(self, img_url: str) -> Optional[str]: """ get_giphy_image downloads full or low resolution image from giphy Arguments: img_url (string): url of giphy image to download Returns: file_path (string): path to downloaded image or None if no image was downloaded """ # Working demo of regex: https://regex101.com/r/o8m1kA/2 regex = r"https?://((?:.*)giphy\.com/media/|giphy.com/gifs/|i.giphy.com/)(.*-)?(\w+)(/|\n)" match = re.search(regex, img_url, flags=0) if not match: self.logger.error('Could not identify Giphy ID in this URL: %s', img_url) return None # Get the Giphy ID giphy_id = match.group(3) # Download the MP4 version of the GIF giphy_url = 'https://media.giphy.com/media/' + giphy_id + '/giphy.mp4' file_path = self.save_dir + '/' + giphy_id + 'giphy.mp4' giphy_file = save_file(giphy_url, file_path, self.logger) self.logger.info('Downloading Giphy at URL %s to %s', giphy_url, file_path) return giphy_file def get_generic_image(self, img_url: str) -> Optional[str]: """ get_generic_image downloads image or video from a generic url to a media file. Arguments: img_url (string): url to image or video file Returns: file_path (string): path to downloaded video or None if no image was downloaded """ # First check if URL starts with http:// or https:// regex = r"^https?://" match = re.search(regex, img_url, flags=0) if not match: self.logger.info('Post link is not a full link: %s', img_url) return None # Check if URL is an image or MP4 file, based on the MIME type image_formats = ('image/png', 'image/jpeg', 'image/gif', 'image/webp', 'video/mp4') try: img_site = urlopen(img_url) except (URLError, UnicodeEncodeError) as url_error: self.logger.error('Error while opening URL %s', url_error) return None meta = img_site.info() if meta["content-type"] not in image_formats: self.logger.error('URL does not point to a valid image file: %s', img_url) return None # URL appears to be an image, so download it file_name = os.path.basename(urlsplit(img_url).path) file_path = self.save_dir + '/' + file_name self.logger.info('Downloading file at URL %s to %s', img_url, file_path) return save_file(img_url, file_path, self.logger)
class ImgurStorage(Storage): """ A storage class providing access to resources in a Dropbox Public folder. """ def __init__(self, location='/'): self.client = ImgurClient( CONSUMER_ID, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_REFRESH) logger.info("Logged in Imgur storage") self.account_info = self.client.get_account(USERNAME) self.albums = self.client.get_account_albums(USERNAME) self.location = location self.base_url = 'https://api.imgur.com/3/account/{url}/'.format(url=self.account_info.url) def _get_abs_path(self, name): return os.path.join(self.location, name) def _open(self, name, mode='rb'): remote_file = self.client.get_image(name, self, mode=mode) return remote_file def _save(self, name, content): name = self._get_abs_path(name) directory = os.path.dirname(name) logger.info([a.title for a in self.albums]) logger.info(name) logger.info(directory) if not self.exists(directory) and directory: album = self.client.create_album({"title": directory}) self.albums = self.client.get_account_albums(USERNAME) album = [a for a in self.albums if a.title == directory][0] #if not response['is_dir']: # raise IOError("%s exists and is not a directory." % directory) response = self._client_upload_from_fd(content, {"album": album.id, "name": name, "title": name}, False) return response["name"] def _client_upload_from_fd(self, fd, config=None, anon=True): """ use a file descriptor to perform a make_request """ if not config: config = dict() contents = fd.read() b64 = base64.b64encode(contents) data = { 'image': b64, 'type': 'base64', } data.update({meta: config[meta] for meta in set(self.client.allowed_image_fields).intersection(config.keys())}) return self.client.make_request('POST', 'upload', data, anon) def delete(self, name): name = self._get_abs_path(name) self.client.delete_image(name) def exists(self, name): name = self._get_abs_path(name) if len([a for a in self.albums if a.title == name]) > 0: return True try: album = [a for a in self.albums if a.title == os.path.dirname(name)][0] images = self.client.get_album_images(album.id) metadata = self.client.get_image(name) if len([im for im in images if im.name == name]) > 0: logger.info(dir(metadata)) return True except ImgurClientError as e: if e.status_code == 404: # not found return False raise e except IndexError as e: return False else: return True return False def listdir(self, path): path = self._get_abs_path(path) response = self.client.get_image(path) directories = [] files = [] for entry in response.get('contents', []): if entry['is_dir']: directories.append(os.path.basename(entry['path'])) else: files.append(os.path.basename(entry['path'])) return directories, files def size(self, path): cache_key = 'django-imgur-size:%s' % filepath_to_uri(path) size = cache.get(cache_key) if not size: directory = os.path.dirname(path) name = os.path.basename(path) album = [a for a in self.albums if a.title == directory][0] images = self.client.get_album_images(album.id) image = [im for im in images if im.name == path][0] size = self.client.get_image(image.id).size cache.set(cache_key, size) return size def url(self, path): cache_key = 'django-imgur-url:%s' % filepath_to_uri(path) url = cache.get(cache_key) if not url: directory = os.path.dirname(path) name = os.path.basename(path) album = [a for a in self.albums if a.title == directory][0] images = self.client.get_album_images(album.id) image = [im for im in images if im.name == path][0] url = self.client.get_image(image.id).link cache.set(cache_key, url) return url def get_available_name(self, name, max_length=None): """ Returns a filename that's free on the target storage system, and available for new content to be written to. """ #name = self._get_abs_path(name) #dir_name, file_name = os.path.split(name) #file_root, file_ext = os.path.splitext(file_name) ## If the filename already exists, add an underscore and a number (before ## the file extension, if one exists) to the filename until the generated ## filename doesn't exist. #count = itertools.count(1) #while self.exists(name): # # file_ext includes the dot. # name = os.path.join(dir_name, "%s_%s%s" % (file_root, count.next(), file_ext)) return name
class ImgurStorage(Storage): """ A storage class providing access to resources in a Dropbox Public folder. """ def __init__(self, location='/'): self.client = ImgurClient(CONSUMER_ID, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_REFRESH) logger.info("Logged in Imgur storage") self.account_info = self.client.get_account(USERNAME) self.albums = self.client.get_account_albums(USERNAME) self.location = location self.base_url = 'https://api.imgur.com/3/account/{url}/'.format( url=self.account_info.url) def _get_abs_path(self, name): return os.path.join(self.location, name) def _open(self, name, mode='rb'): remote_file = self.client.get_image(name, self, mode=mode) return remote_file def _save(self, name, content): name = self._get_abs_path(name) directory = os.path.dirname(name) logger.info([a.title for a in self.albums]) logger.info(name) logger.info(directory) if not self.exists(directory) and directory: album = self.client.create_album({"title": directory}) self.albums = self.client.get_account_albums(USERNAME) album = [a for a in self.albums if a.title == directory][0] #if not response['is_dir']: # raise IOError("%s exists and is not a directory." % directory) response = self._client_upload_from_fd(content, { "album": album.id, "name": name, "title": name }, False) return response["name"] def _client_upload_from_fd(self, fd, config=None, anon=True): """ use a file descriptor to perform a make_request """ if not config: config = dict() contents = fd.read() b64 = base64.b64encode(contents) data = { 'image': b64, 'type': 'base64', } data.update({ meta: config[meta] for meta in set(self.client.allowed_image_fields).intersection( list(config.keys())) }) return self.client.make_request('POST', 'upload', data, anon) def delete(self, name): name = self._get_abs_path(name) self.client.delete_image(name) def exists(self, name): name = self._get_abs_path(name) if len([a for a in self.albums if a.title == name]) > 0: return True try: album = [ a for a in self.albums if a.title == os.path.dirname(name) ][0] images = self.client.get_album_images(album.id) metadata = self.client.get_image(name) if len([im for im in images if im.name == name]) > 0: logger.info(dir(metadata)) return True except ImgurClientError as e: if e.status_code == 404: # not found return False raise e except IndexError as e: return False else: return True return False def listdir(self, path): path = self._get_abs_path(path) response = self.client.get_image(path) directories = [] files = [] for entry in response.get('contents', []): if entry['is_dir']: directories.append(os.path.basename(entry['path'])) else: files.append(os.path.basename(entry['path'])) return directories, files def size(self, path): cache_key = 'django-imgur-size:%s' % filepath_to_uri(path) size = cache.get(cache_key) if not size: directory = os.path.dirname(path) name = os.path.basename(path) album = [a for a in self.albums if a.title == directory][0] images = self.client.get_album_images(album.id) image = [im for im in images if im.name == path][0] size = self.client.get_image(image.id).size cache.set(cache_key, size) return size def url(self, path): cache_key = 'django-imgur-url:%s' % filepath_to_uri(path) url = cache.get(cache_key) if not url: directory = os.path.dirname(path) name = os.path.basename(path) album = [a for a in self.albums if a.title == directory][0] images = self.client.get_album_images(album.id) image = [im for im in images if im.name == path][0] url = self.client.get_image(image.id).link cache.set(cache_key, url) return url def get_available_name(self, name, max_length=None): """ Returns a filename that's free on the target storage system, and available for new content to be written to. """ #name = self._get_abs_path(name) #dir_name, file_name = os.path.split(name) #file_root, file_ext = os.path.splitext(file_name) ## If the filename already exists, add an underscore and a number (before ## the file extension, if one exists) to the filename until the generated ## filename doesn't exist. #count = itertools.count(1) #while self.exists(name): # # file_ext includes the dot. # name = os.path.join(dir_name, "%s_%s%s" % (file_root, count.next(), file_ext)) return name
def get_media(img_url, IMGUR_CLIENT, IMGUR_CLIENT_SECRET): # Make sure config file exists try: config = configparser.ConfigParser() config.read('config.ini') except BaseException as e: print('[EROR] Error while reading config file:', str(e)) sys.exit() # Make sure media folder exists IMAGE_DIR = config['MediaSettings']['MediaFolder'] if not os.path.exists(IMAGE_DIR): os.makedirs(IMAGE_DIR) print('[ OK ] Media folder not found, created a new one') # Download and save the linked image if any(s in img_url for s in ('i.redd.it', 'i.reddituploads.com')): # Reddit-hosted images file_name = os.path.basename(urllib.parse.urlsplit(img_url).path) file_extension = os.path.splitext(img_url)[-1].lower() # Fix for issue with i.reddituploads.com links not having a file extension in the URL if not file_extension: file_extension += '.jpg' file_name += '.jpg' img_url += '.jpg' # Download the file file_path = IMAGE_DIR + '/' + file_name print('[ OK ] Downloading file at URL ' + img_url + ' to ' + file_path + ', file type identified as ' + file_extension) img = save_file(img_url, file_path) return img elif ('v.redd.it' in img_url): # Reddit video print( '[WARN] Reddit videos can not be uploaded to Twitter, due to API limitations' ) return elif ('imgur.com' in img_url): # Imgur try: client = ImgurClient(IMGUR_CLIENT, IMGUR_CLIENT_SECRET) except BaseException as e: print('[EROR] Error while authenticating with Imgur:', str(e)) return # Working demo of regex: https://regex101.com/r/G29uGl/2 regex = r"(?:.*)imgur\.com(?:\/gallery\/|\/a\/|\/)(.*?)(?:\/.*|\.|$)" m = re.search(regex, img_url, flags=0) if m: # Get the Imgur image/gallery ID id = m.group(1) if any(s in img_url for s in ('/a/', '/gallery/')): # Gallery links images = client.get_album_images(id) # Only the first image in a gallery is used imgur_url = images[0].link else: # Single image imgur_url = client.get_image(id).link # If the URL is a GIFV or MP4 link, change it to the GIF version file_extension = os.path.splitext(imgur_url)[-1].lower() if (file_extension == '.gifv'): file_extension = file_extension.replace('.gifv', '.gif') imgur_url = imgur_url.replace('.gifv', '.gif') elif (file_extension == '.mp4'): file_extension = file_extension.replace('.mp4', '.gif') imgur_url = imgur_url.replace('.mp4', '.gif') # Download the image file_path = IMAGE_DIR + '/' + id + file_extension print('[ OK ] Downloading Imgur image at URL ' + imgur_url + ' to ' + file_path) imgur_file = save_file(imgur_url, file_path) # Imgur will sometimes return a single-frame thumbnail instead of a GIF, so we need to check for this if (file_extension == '.gif'): # Open the file using the Pillow library img = Image.open(imgur_file) # Get the MIME type mime = Image.MIME[img.format] if (mime == 'image/gif'): # Image is indeed a GIF, so it can be posted img.close() return imgur_file else: # Image is not actually a GIF, so don't post it print( '[WARN] Imgur has not processed a GIF version of this link, so it can not be posted to Twitter' ) img.close() # Delete the image try: os.remove(imgur_file) except BaseException as e: print('[EROR] Error while deleting media file:', str(e)) return else: return imgur_file else: print( '[EROR] Could not identify Imgur image/gallery ID in this URL:', img_url) return elif ('gfycat.com' in img_url): # Gfycat try: gfycat_name = os.path.basename(urllib.parse.urlsplit(img_url).path) client = GfycatClient() gfycat_info = client.query_gfy(gfycat_name) except BaseException as e: print('[EROR] Error downloading Gfycat link:', str(e)) return # Download the 2MB version because Tweepy has a 3MB upload limit for GIFs gfycat_url = gfycat_info['gfyItem']['max2mbGif'] file_path = IMAGE_DIR + '/' + gfycat_name + '.gif' print('[ OK ] Downloading Gfycat at URL ' + gfycat_url + ' to ' + file_path) gfycat_file = save_file(gfycat_url, file_path) return gfycat_file elif ('giphy.com' in img_url): # Giphy # Working demo of regex: https://regex101.com/r/o8m1kA/2 regex = r"https?://((?:.*)giphy\.com/media/|giphy.com/gifs/|i.giphy.com/)(.*-)?(\w+)(/|\n)" m = re.search(regex, img_url, flags=0) if m: # Get the Giphy ID id = m.group(3) # Download the 2MB version because Tweepy has a 3MB upload limit for GIFs giphy_url = 'https://media.giphy.com/media/' + id + '/giphy-downsized.gif' file_path = IMAGE_DIR + '/' + id + '-downsized.gif' print('[ OK ] Downloading Giphy at URL ' + giphy_url + ' to ' + file_path) giphy_file = save_file(giphy_url, file_path) # Check the hash to make sure it's not a GIF saying "This content is not available" # More info: https://github.com/corbindavenport/tootbot/issues/8 hash = hashlib.md5(file_as_bytes(open(giphy_file, 'rb'))).hexdigest() if (hash == '59a41d58693283c72d9da8ae0561e4e5'): print( '[WARN] Giphy has not processed a 2MB GIF version of this link, so it can not be posted to Twitter' ) return else: return giphy_file else: print('[EROR] Could not identify Giphy ID in this URL:', img_url) return else: # Check if URL is an image, based on the MIME type image_formats = ('image/png', 'image/jpeg', 'image/gif', 'image/webp') img_site = urlopen(img_url) meta = img_site.info() if meta["content-type"] in image_formats: # URL appears to be an image, so download it file_name = os.path.basename(urllib.parse.urlsplit(img_url).path) file_path = IMAGE_DIR + '/' + file_name print('[ OK ] Downloading file at URL ' + img_url + ' to ' + file_path) try: img = save_file(img_url, file_path) return img except BaseException as e: print('[EROR] Error while downloading image:', str(e)) return else: print('[EROR] URL does not point to a valid image file') return
class ImgurExtractor(BaseExtractor): url_key = 'imgur' def __init__(self, post, reddit_object, content_display_only=False): """ A subclass of the BaseExtractor class. This class interacts exclusively with the imgur website through the imgur api via ImgurPython """ super().__init__(post, reddit_object, content_display_only) self.connected = False self.imgur_client_id = self.settings_manager.imgur_client_id self.imgur_client_secret = self.settings_manager.imgur_client_secret if self.imgur_client_id is None or self.imgur_client_secret is None: if LogUtils.imgur_client_error_log_count < 1: LogUtils.imgur_client_error_log_count += 1 message = 'No valid Imgur client detected. In order to download content from imgur.com, you must ' \ 'have a valid imgur client id and client secret. Please see the imgur client information' \ 'dialog in the settings menu.' self.handle_failed_extract(message=message, imgur_client_id_valid=self.imgur_client_id is not None, imgur_client_secret_valid=self.imgur_client_secret is not None) else: try: self.client = ImgurClient(self.imgur_client_id, self.imgur_client_secret) self.connected = True except ImgurClientError as e: if e.status_code == 500: self.over_capacity_error() else: self.unknown_connection_error(e.status_code) except: message = 'Failed to connect to imgur.com' self.handle_failed_extract(message=message, save=True, extractor_error_message=message) def extract_content(self): """Dictates what type of page container a link is and then dictates which extraction method should be used""" if self.connected: try: if 'i.imgur' in self.url: self.extract_direct_link() elif "/a/" in self.url: self.extract_album() elif '/gallery/' in self.url: try: self.extract_album() except: pass elif self.url.lower().endswith(Const.ALL_EXT): self.extract_direct_mislinked() else: self.extract_single() except ImgurClientError as e: self.handle_client_error(e.status_code) except ImgurClientRateLimitError: self.rate_limit_exceeded_error() except: self.failed_to_locate_error() def handle_client_error(self, status_code): if status_code == 403: if self.client.credits['ClientRemaining'] is None: self.failed_to_locate_error() elif self.client.credits['ClientRemaining'] <= 0: self.no_credit_error() else: self.failed_to_locate_error() if status_code == 429: self.rate_limit_exceeded_error() if status_code == 500: self.over_capacity_error() if status_code == 404: self.does_not_exist_error() def rate_limit_exceeded_error(self): message = 'Imgur rate limit exceeded' self.handle_failed_extract(message=message, save=True, imgur_error_message='rate limit exceeded') def no_credit_error(self): message = 'Not enough imgur credits to extract post' self.handle_failed_extract(message=message, save=True, imgur_error_message='not enough credits') def over_capacity_error(self): message = 'Imgur is currently over capacity' self.handle_failed_extract(message=message, save=True, imgur_error_message='over capacity') def does_not_exist_error(self): message = 'Content does not exist. This most likely means that the content has been deleted on Imgur but ' \ 'the post still remains on reddit' self.handle_failed_extract(message=message, imgur_error_message='Content does not exist') def failed_to_locate_error(self): message = 'Failed to locate content' self.handle_failed_extract(message=message, extractor_error_message=message) def unknown_connection_error(self, status_code): message = 'Unknown imgur connection error' self.handle_failed_extract(message=message, save=True, status_code=status_code) def extract_album(self): count = 1 domain, album_id = self.url.rsplit('/', 1) for pic in self.client.get_album_images(album_id): url = pic.link address, extension = url.rsplit('.', 1) file_name = self.get_filename(album_id) if pic.type == 'image/gif' and pic.animated: extension = 'mp4' url = pic.mp4 self.make_content(url, file_name, extension, count) count += 1 def extract_single(self): domain, image_id = self.url.rsplit('/', 1) pic = self.client.get_image(image_id) url = pic.link address, extension = url.rsplit('.', 1) file_name = self.get_filename(image_id) if pic.type == 'image/gif' and pic.animated: extension = 'mp4' url = pic.mp4 self.make_content(url, file_name, extension) def extract_direct_link(self): for ext in Const.ALL_EXT: if ext in self.url: index = self.url.find(ext) url = '%s%s' % (self.url[:index], ext) try: domain, id_with_ext = url.rsplit('/', 1) image_id, extension = id_with_ext.rsplit('.', 1) file_name = self.get_filename(image_id) if url.endswith('gifv') or url.endswith('gif'): picture = self.client.get_image(image_id) if picture.type == 'image/gif' and picture.animated: url = picture.mp4 extension = 'mp4' self.make_content(url, file_name, extension) except NameError: message = 'Unrecognized extension' self.handle_failed_extract(message=message, extractor_error_message=message) def extract_direct_mislinked(self): """ All direct links to imgur.com must start with 'https://i.imgur. Sometimes links get mis labeled somehow when they are posted. This method is to add the correct address beginning to mislinked imgur urls and get a proper extraction """ for ext in Const.ALL_EXT: if ext in self.url: index = self.url.find(ext) url = '%s%s' % (self.url[:index], ext) try: domain, id_with_ext = url.rsplit('/', 1) domain = 'https://i.imgur.com/' url = '%s%s' % (domain, id_with_ext) image_id, extension = id_with_ext.rsplit('.', 1) file_name = self.get_filename(image_id) if url.endswith('gifv') or url.endswith('gif'): picture = self.client.get_image(image_id) if picture.type == 'image/gif' and picture.animated: url = picture.mp4 extension = 'mp4' self.make_content(url, file_name, extension) except NameError: message = 'Unrecognized extension' self.handle_failed_extract(message=message, extractor_error_message=message)
def main(): if not os.path.isfile('credentials.config'): # if credentials file does not exist, start the first run function first_run() # Authenticate and generate the credentials file. # command line switches function args = read_command_args() use_evernote = args.e debug_mode = args.debug delete_files = args.t if use_evernote is True else False path = args.p info_mode = args.i if debug_mode: # print("Warning - Debug mode active. Files will be downloaded, but not added to index") logger = create_logger(log_to_console=True) logger.setLevel(logging.DEBUG) logger.info('Warning - Debug mode active. Files will be downloaded, but not added to index') elif info_mode: warnings.warn("Suppressed Resource warning", ResourceWarning) # suppresses sll unclosed socket warnings. logger = create_logger(log_to_console=True) else: warnings.warn("Suppressed Resource warning", ResourceWarning) # suppresses sll unclosed socket warnings. logger = create_logger() logger.info("\n###########\nStarting SR\n###########") try: with open('credentials.config', 'r') as json_file: credentials = json.load(json_file) # get various OAuth tokens except OSError: logger.error('Unable to open credentials file') raise SystemExit # Create the downloads folder on the specified path, or in the dir where file is stored. if path is not "": path = path[0] else: path = os.getcwd() path += "/SRDownloads" if not os.path.exists(path): os.makedirs(path) # Authenticate with Reddit logger.info('Authenticating with Reddit') client_id = credentials['reddit']['client_id'] client_secret = credentials['reddit']['client_secret'] redirect_uri = credentials['reddit']['redirect_uri'] refresh_token = credentials['reddit']['refresh_token'] user_agent = "SavedRetriever 0.9 by /u/fuzzycut" try: r = praw.Reddit(user_agent=user_agent, oauth_client_id=client_id, oauth_client_secret=client_secret, oauth_redirect_uri=redirect_uri) access_information = r.refresh_access_information(refresh_token) r.set_access_credentials(**access_information) except Exception as e: logger.error(e) raise SystemExit time_since_accesstoken = time.time() index = set() if os.path.isfile('index.txt'): # checking for index file, which contains index of downloaded files. try: with open('index.txt', 'r') as ind: for line in ind: index.add(line[:-1]) # -1 truncates the newline in the index file. except OSError: logger.error("Unable to open index file for reading") raise SystemExit if use_evernote is True: enclient = evernoteWrapper.Client(credentials['evernote']['dev_token'], 'Saved from Reddit') html_index_file = None if delete_files is False: # only create index if we're going to use it. html_index_file = html_index.index(r.get_me().name, path) try: ind = open('index.txt', 'a') # open index file for appending except OSError: logger.error("Unable to open index file for writing") raise SystemExit logger.info("Beginning to save files...") for i in r.get_me().get_saved(limit=None): if (time.time() - time_since_accesstoken) / 60 > 55: # Refresh the access token before it runs out. logger.debug('Refreshing Reddit token') r.refresh_access_information(access_information['refresh_token']) time_since_accesstoken = time.time() name = i.name file_name = name # to stop ide complaining. note = None evernote_tags = ('Reddit', 'SavedRetriever', '/r/' + i.subreddit.display_name) # add config for this later # logger.info('Saving post - {}'.format(name)) if name not in index: # file has not been downloaded permalink = i.permalink author = i.author title = i.link_title if hasattr(i, 'link_title') else i.title # ========== # # IS COMMENT # # ========== # if hasattr(i, 'body_html'): logger.debug("{} is comment".format(name)) body = i.body_html # html output body = subreddit_linker(body) output = html_output_string(permalink, author, body, title) if delete_files is False: file_name = html_writer(path, name, output) # en api section if use_evernote is True: enclient.new_note(title) enclient.add_html(output) enclient.add_tag(*evernote_tags) # the * is very important. It unpacks the tags tuple properly note = enclient.create_note() # ============ # # IS SELF-POST # # ============ # elif hasattr(i, 'is_self') and i.is_self is True: logger.debug('{} is self-post'.format(name)) text = i.selftext_html if i.selftext_html is not None else "" # html output text = subreddit_linker(text) output = html_output_string(permalink, author, text, title) if delete_files is False: file_name = html_writer(path, name, output) # en api section if use_evernote is True: enclient.new_note(title) enclient.add_tag(*evernote_tags) enclient.add_html(output) note = enclient.create_note() # ====================== # # IS DIRECT LINKED IMAGE # # ====================== # elif hasattr(i, 'url') and re.sub("([^A-z0-9])\w+", "", i.url.split('.')[-1]) in ['jpg', 'png', 'gif', 'gifv', 'pdf']: """ Need to check file types and test pdf. How does this handle gfycat and webm? Can EN display that inline? The regex in the if is to strip out non-valid filetype chars. """ logger.debug('{} is direct linked image'.format(name)) url = i.url base_filename = "{}_image.{}".format(name, re.sub("([^A-z0-9])\w+", "", url.split('.')[ -1])) # filename for image. regex same as above. filename = path + "/" + base_filename # image downloader section if os.path.exists(filename) and (os.path.getsize(filename) > 0): # If image exists and is valid image_downloaded = True logger.info("Image already exists - {}".format(base_filename)) else: image_downloaded = image_saver(url, filename) logger.info('Downloaded image - {}'.format(base_filename)) if image_downloaded: # write image as <img> or link to local pdf downloaded in html file if filename.split('.')[-1] == 'pdf': img = '<a href="{}">Click here for link to downloaded pdf</a>'.format(base_filename) else: img = '<br><a href="{0}"><img src="{0}"></a>'.format( base_filename) # html for embedding in html file else: img = "Image failed to download - It may be temporarily or permanently unavailable" # Evernote api section if use_evernote is True: enclient.new_note(title) enclient.add_tag(*evernote_tags) enclient.add_html(html_output_string_image(permalink, author, "", title)) # should add body="" in the function if image_downloaded: enclient.add_resource(filename) note = enclient.create_note() if delete_files is False: file_name = html_writer(path, name, html_output_string_image(permalink, author, img, title)) else: os.remove(filename) # ============== # # IS IMGUR ALBUM # # ============== # elif hasattr(i, 'url') and 'imgur' in i.url: # Add option to download images to folder. logger.debug('{} is Imgur album'.format(name)) url = i.url body = "<h2>{}</h2>".format(title) # imgur api section client = ImgurClient(credentials['imgur']['client_id'], credentials['imgur']['client_secret']) pattern = '\/([A-z0-9]{5,7})' # matches any 5-7 long word that comes after a forward slash (/). match = re.findall(pattern, url) gallery_id = match[-1].replace('/', '') # removes any forward slashes for processing gallery = [] filename = None try: gallery = client.get_album_images(gallery_id) except imgurpython.helpers.error.ImgurClientError: # if 'gallery' is actually just a lone image try: gallery = [client.get_image(gallery_id)] except imgurpython.helpers.error.ImgurClientError as error: # if gallery does not exist. Is this the best way to do this? if debug_mode is True or error.status_code != 404: print("**{} - {}**".format(error.status_code, error.error_message)) # img_path = 'Downloads/{}'.format(gallery_id) img_path = path + "/" + gallery_id if not os.path.exists(img_path): os.makedirs(img_path) for image in gallery: # add if gallery > 10, then just add a link (would be too large for the note) image_name = image.title if image.title is not None else "" image_description = image.description if image.description is not None else "" image_filetype = image.type.split('/')[1] image_id = image.id image_link = image.link # sets up downloaded filename and html for embedding image base_filename = "{}_image.{}".format(image_id, image_filetype) img = '<p><h3>{0}</h3><a href="{1}/{2}"><img src="{1}/{2}"></a><br/>{3}</p>'.format(image_name, gallery_id, base_filename, image_description) filename = img_path + "/" + base_filename if os.path.exists(filename) and (os.path.getsize(filename) > 0): # only download if file doesn't already exist logger.info('Image already exists - {}'.format(base_filename)) else: image_saver(image_link, filename) logger.info('Image downloaded - {}'.format(base_filename)) body += img # Evernote api section if use_evernote is True: enclient.new_note(title) enclient.add_tag(*evernote_tags) if len(gallery) == 1 and filename is not None: enclient.add_html(html_output_string_image(permalink, author, "", title)) enclient.add_resource(filename) else: enclient.add_html(html_output_string_image(permalink, author, 'This album is too large to embed; please see <a href="{}">here</a> for the original link.'.format(url), title)) note = enclient.create_note() if delete_files is False: file_name = html_writer(path, name, html_output_string_image(permalink, author, body, title)) else: shutil.rmtree(img_path) # ========== # # IS ARTICLE # # ========== # elif hasattr(i, 'title') and i.is_self is False: # This section needs work. It is semi-complete. Ultimately, adding in the full article is the goal. logger.debug('{} is article/webpage'.format(name)) url = i.url # readability api section os.environ["READABILITY_PARSER_TOKEN"] = credentials['readability'][ 'parser_key'] # set the environment variable as the parser key logger.info('Initializing Readability Client') parse = ParserClient() # readability api doesn't take the token directly parse_response = parse.get_article(url) article = parse_response.json() if 'content' not in article: # if unable to parse document, manually set an error message article['content'] = 'Unable to parse page - See <a href="{}">here</a> for the original link'.format(url) article = article['content'] article = "<a href='{}'>{}</a><br/>{}<br/>".format(url, title, article) # source of article # html output section. output = html_output_string(permalink, author, article, title) if delete_files is False: file_name = html_writer(path, name, output) # Evernote section if use_evernote is True: enclient.new_note(title) enclient.add_tag(*evernote_tags) output = html_output_string(permalink, author, article, title) enclient.add_html(output) # Add html file to note # enclient.add_resource("Downloads/{}.html".format(name)) note = enclient.create_note() # end of checking for saved items # failed_upload = False if use_evernote is True: if note is not None: # print("Saved {:9} - GUID: {}".format(name, note.guid)) logger.info('Saved {:9} - GUID: {}'.format(name, note.guid)) else: # Upload failed # print("Saved {:9} - Note failed to upload".format(name)) logger.info('Saved {:9} - Note failed to upload'.format(name)) failed_upload = True elif use_evernote is False: # print("Saved " + name) logger.info('Saved ' + name) if not debug_mode and not failed_upload: ind.write(name + "\n") ind.flush() # this fixes python not writing the file if it terminates before .close() can be called if delete_files is False: html_index_file.add_link(title, file_name, permalink) # end of for loop ind.close() logger.info("All items downloaded") if delete_files is False: html_index_file.save_and_close() else: # try remove downloads if -t is set, but don't force it if directory has things in it already. try: os.rmdir('Downloads') except OSError: logger.error("Unable to remove files")
def get_image(self,image_id): client = ImgurClient(**self.__class__.imgur_creds) image = client.get_image(image_id) return image
def get_url(submission, mp4_instead_gif=True): ''' return TYPE, URL E.x.: return 'img', 'http://example.com/pic.png' ''' def what_is_inside(url): header = requests.head(url).headers if 'Content-Type' in header: return header['Content-Type'] else: return '' # If reddit native gallery if hasattr(submission, 'gallery_data'): dict_of_dicts_of_pics = dict() list_of_media = dict() for item in submission.gallery_data['items']: list_of_media[item['id']] = item['media_id'] counter = 0 for item in sorted(list_of_media.items(), key=lambda item: item[0]): if counter % 10 == 0: dict_of_dicts_of_pics[counter // 10] = dict() item_with_media = submission.media_metadata[item[1]]['s'] if 'u' in item_with_media: # It's a pic dict_of_dicts_of_pics[counter // 10][counter] = { 'url': item_with_media['u'], 'type': 'pic' } else: # It's a gif dict_of_dicts_of_pics[counter // 10][counter] = { 'url': item_with_media['mp4'], 'type': 'video' } counter += 1 return TYPE_GALLERY, dict_of_dicts_of_pics url = submission.url url_content = what_is_inside(url) if submission.is_video: if 'reddit_video' in submission.media: if submission.media['reddit_video'].get('is_gif', False): return TYPE_GIF, submission.media['reddit_video'][ 'fallback_url'] return TYPE_VIDEO, submission.media['reddit_video']['fallback_url'] # return TYPE_OTHER, url try: if len(submission.crosspost_parent_list) > 0: parent_submission_json = submission.crosspost_parent_list[0] if parent_submission_json['is_video'] == True: if 'reddit_video' in parent_submission_json['media']: if parent_submission_json['media']['reddit_video'].get( 'is_gif', False): return TYPE_GIF, parent_submission_json['media'][ 'reddit_video']['fallback_url'] return TYPE_VIDEO, parent_submission_json['media'][ 'reddit_video']['fallback_url'] except: # Not a crosspost pass if (CONTENT_JPEG == url_content or CONTENT_PNG == url_content): return TYPE_IMG, url if CONTENT_GIF in url_content: if url.endswith('.gif') and mp4_instead_gif: # Let's try to find .mp4 file. url_mp4 = url[:-4] + '.mp4' if CONTENT_MP4 == what_is_inside(url_mp4): return TYPE_GIF, url_mp4 return TYPE_GIF, url if url.endswith('.gifv'): if mp4_instead_gif: url_mp4 = url[:-5] + '.mp4' if CONTENT_MP4 == what_is_inside(url_mp4): return TYPE_GIF, url_mp4 if CONTENT_GIF in what_is_inside(url[0:-1]): return TYPE_GIF, url[0:-1] if submission.is_self is True: # Self submission with text return TYPE_TEXT, None if urlparse(url).netloc == 'imgur.com': # Imgur imgur_config = yaml.safe_load( open(os.path.join('configs', 'imgur.yml')).read()) imgur_client = ImgurClient(imgur_config['client_id'], imgur_config['client_secret']) path_parts = urlparse(url).path.split('/') if path_parts[1] == 'gallery': # TODO: gallary handling return TYPE_OTHER, url elif path_parts[1] == 'topic': # TODO: topic handling return TYPE_OTHER, url elif path_parts[1] == 'a': # An imgur album album = imgur_client.get_album(path_parts[2]) story = dict() for num, img in enumerate(album.images): number = num + 1 what = TYPE_IMG link = img['link'] ext = img['type'].split('/')[1] if img['animated']: what = TYPE_GIF link = img['mp4'] if mp4_instead_gif else img['gifv'][:-1] ext = 'mp4' if mp4_instead_gif else 'gif' story[number] = {'url': link, 'what': what, 'ext': ext} if len(story) == 1: return story[1]['what'], story[1]['url'] return TYPE_ALBUM, story else: # Just imgur img img = imgur_client.get_image(path_parts[1].split('.')[0]) if not img.animated: return TYPE_IMG, img.link else: if mp4_instead_gif: return TYPE_GIF, img.mp4 else: # return 'gif', img.link return TYPE_GIF, img.gifv[:-1] elif 'gfycat.com' in urlparse(url).netloc: rname = re.findall(r'gfycat.com\/(?:detail\/)?(\w*)', url)[0] try: r = requests.get(GFYCAT_GET + rname) if r.status_code != 200: logging.info('Gfy fail prevented!') return TYPE_OTHER, url urls = r.json()['gfyItem'] if mp4_instead_gif: return TYPE_GIF, urls['mp4Url'] else: return TYPE_GIF, urls['max5mbGif'] except KeyError: logging.info('Gfy fail prevented!') return TYPE_OTHER, url else: return TYPE_OTHER, url
class Bot: def __init__(self, videobot, slave_bot): """ Initializes the Imgur Bot with credentials stored in environment variables. :return: ImgurClient object """ IMGUR_CLIENT_ID = os.environ.get("IMGUR_CLIENT_ID") IMGUR_CLIENT_SECRET = os.environ.get("IMGUR_CLIENT_SECRET") self.client = ImgurClient(IMGUR_CLIENT_ID, IMGUR_CLIENT_SECRET) self.supported_video_formats = ['gif','gifv', 'webm', 'mp4'] self.slave_bot = slave_bot self.video_bot = videobot def handle_album(self, album_link): """ handles imgur links of the format: imgur.com/a/<id>, imgur.com/<id>#<img id> :type album_link: 'str' :rtype message: 'str' - Analysis message from Bot. :rtype status: <Dict> - {'nsfw':<float>, 'sfw':<float>} """ temp = album_link.split('/')[-1] album_id = temp.split('#')[0] message = None status = {} try: album = self.client.get_album(album_id=album_id) imgur_flag = album.nsfw if imgur_flag: message = 'Album marked NSFW on Imgur.' message = '**[Hover to reveal](#s "' + message + '")**' # reddit spoiler tag added. elif not imgur_flag: images_list = self.client.get_album_images(album_id) links = [item.link for item in images_list[0:10] if item.type.split('/')[-1] not in self.supported_video_formats] links_videos = [item.link for item in images_list[0:10] if item.type.split('/')[-1] in self.supported_video_formats] # Ensures only 10 images/gifs are processed in case album is very large. temp1, _ = self.handle_videos(links_videos) temp2, _ = self.handle_images(links) status.update(temp1) status.update(temp2) # for all images, if SFW - mark SFW. # if any image is not SFW, find out which one. max_nsfw= (None, 0) min_sfw = (None, 100) for k,v in status.items(): labels = sorted(status[k].items(), key=operator.itemgetter(1), reverse=True) tag, confidence = labels[0] if tag is 'SFW' and confidence<=min_sfw[1]: min_sfw = labels[0] elif tag is not 'SFW' and confidence>max_nsfw[1]: max_nsfw = labels[0] if max_nsfw != (None, 0): message = "Album has "+str(max_nsfw[0])+" image(s). I'm {0:.2f}% confident.".format(max_nsfw[1]) elif max_nsfw == (None, 0): message = "Album has "+str(min_sfw[0])+" image(s). I'm {0:.2f}% confident.".format(min_sfw[1]) message = '**[Hover to reveal](#s "'+message+' ")**' #reddit spoiler tag added. except error.ImgurClientError as e: status = None message = None print ('Imgur Error:', e.error_message) return status, message def handle_images(self, links): status = {} message = None valid_links = [self.ensure_extension(aLink) for aLink in links if aLink.split('.')[-1].lower() not in ['gif', 'gifv', 'mp4', 'webm']] status = self.slave_bot.analyze(valid_links) if len(valid_links) == 1: link = valid_links[0] labels = sorted(status[link].items(), key=operator.itemgetter(1), reverse=True) tag, confidence = labels[0] message = tag + ". I'm {0:.2f}% confident.".format(confidence) if tag is 'SFW': manning_distance = self.slave_bot.clarifai_bot.match_template(link, 'manning') if manning_distance is not None and manning_distance <= 0.01: message += ' Might be Manning Face.' message = '**[Hover to reveal](#s "' + message + ' ")**' # reddit spoiler tag added. return status, message def handle_gallery(self, gallery_link): item_id = gallery_link.split('/')[-1] # user linked to either an album or an image from the imgur gallery. # assume it is album. if it's a 404, assume it is an image. message = '' status = {} try: album = self.client.get_album(album_id=item_id) imgur_flag = album.nsfw if imgur_flag: status = {} message = 'Album marked NSFW on Imgur.' message = '**[Hover to reveal](#s "' + message + '")**' # reddit spoiler tag added. elif not imgur_flag: status, message = self.handle_album(album.link) except error.ImgurClientError as e: try: image = self.client.get_image(item_id) imgur_flag = image.nsfw if imgur_flag: message = 'Item marked NSFW on Imgur.' message = '**[Hover to reveal](#s "' + message + '")**' # reddit spoiler tag added. elif not imgur_flag: if image.type.split('/')[-1] in self.supported_video_formats: status, message = self.handle_videos([image.link]) else: status, message = self.handle_images([image.link]) except error.ImgurClientError as e: status = None message = None print('Imgur Error', e.error_message) return status, message def handle_videos(self, links): status = {} message = None for each_url in links: link = self.ensure_extension(each_url) # link is now 'imgur.com/id.extension' video_id = link.split('/')[-1].split('.')[0] filename = video_id+'.mp4' mp4_link = 'http://i.imgur.com/'+filename urllib.urlretrieve(mp4_link, filename) status.update({each_url:self.video_bot.make_prediction(filename)}) if os.path.exists(filename): os.remove(filename) if len(links) == 1: link = links[0] labels = sorted(status[link].items(), key=operator.itemgetter(1), reverse=True) tag, confidence = labels[0] message = tag + ". I'm {0:.2f}% confident.".format(confidence) message = '**[Hover to reveal](#s "' + message + ' ")**' # reddit spoiler tag added. return status, message def ensure_extension(self, url): temp = url.split('/')[-1] # will be <image_id>.<extension> or <image_id> if '.' not in temp: image_id = temp url = self.client.get_image(image_id).link return url else: return url
img_client_secret = '' mashape_key = '' if mashape_key: img_client = ImgurClient(img_client_id, img_client_secret, mashape_key=mashape_key) else: img_client = ImgurClient(img_client_id, img_client_secret) for i in range(1,25): data = {} for gallery_img in img_client.subreddit_gallery(subr, page=i): url = gallery_img.link # check if image is in jpg format and has not already been processed if len(re.findall('imgur.com/.+\.jpg',url))>0 and found_url.get(url,0)==0: print("writing " + url) img_id = re.findall('.com.+\.jpg',url)[0][5:-4] try: img = img_client.get_image(img_id) except: continue # get images in m format to make it easier to process for the network response = requests.get(url[:-4]+'m.jpg') label = img.nsfw if label: path = 'nsfw/' else: path = 'normal/' try: if not os.path.exists(path+img_id+'.jpg'): f = open(path+img_id+'.jpg',"wb") f.write(response.content) f.close() found_url[url] = 1
def get_hd_media(submission, IMGUR_CLIENT, IMGUR_CLIENT_SECRET): media_url = submission.url # Make sure config file exists try: config = configparser.ConfigParser() config.read('config.ini') except BaseException as e: print('[EROR] Error while reading config file:', str(e)) sys.exit() # Make sure media folder exists IMAGE_DIR = config['MediaSettings']['MediaFolder'] if not os.path.exists(IMAGE_DIR): os.makedirs(IMAGE_DIR) print('[ OK ] Media folder not found, created a new one') # Download and save the linked image if any(s in media_url for s in ('i.redd.it', 'i.reddituploads.com')): # Reddit-hosted images file_name = os.path.basename(urllib.parse.urlsplit(media_url).path) file_extension = os.path.splitext(media_url)[-1].lower() # Fix for issue with i.reddituploads.com links not having a file extension in the URL if not file_extension: file_extension += '.jpg' file_name += '.jpg' media_url += '.jpg' # Download the file file_path = IMAGE_DIR + '/' + file_name print('[ OK ] Downloading file at URL ' + media_url + ' to ' + file_path + ', file type identified as ' + file_extension) img = save_file(media_url, file_path) return img elif ('v.redd.it' in media_url): # Reddit video if submission.media: # Get URL for MP4 version of reddit video video_url = submission.media['reddit_video']['fallback_url'] # Download the file file_path = IMAGE_DIR + '/' + submission.id + '.mp4' print('[ OK ] Downloading Reddit video at URL ' + video_url + ' to ' + file_path) video = save_file(video_url, file_path) return video else: print('[EROR] Reddit API returned no media for this URL:', media_url) return elif ('imgur.com' in media_url): # Imgur try: client = ImgurClient(IMGUR_CLIENT, IMGUR_CLIENT_SECRET) except BaseException as e: print('[EROR] Error while authenticating with Imgur:', str(e)) return # Working demo of regex: https://regex101.com/r/G29uGl/2 regex = r"(?:.*)imgur\.com(?:\/gallery\/|\/a\/|\/)(.*?)(?:\/.*|\.|$)" m = re.search(regex, media_url, flags=0) if m: # Get the Imgur image/gallery ID id = m.group(1) if any(s in media_url for s in ('/a/', '/gallery/')): # Gallery links images = client.get_album_images(id) # Only the first image in a gallery is used imgur_url = images[0].link print(images[0]) else: # Single image/GIF if client.get_image(id).type == 'image/gif': # If the image is a GIF, use the MP4 version imgur_url = client.get_image(id).mp4 else: imgur_url = client.get_image(id).link file_extension = os.path.splitext(imgur_url)[-1].lower() # Download the image file_path = IMAGE_DIR + '/' + id + file_extension print('[ OK ] Downloading Imgur image at URL ' + imgur_url + ' to ' + file_path) imgur_file = save_file(imgur_url, file_path) return imgur_file else: print( '[EROR] Could not identify Imgur image/gallery ID in this URL:', media_url) return elif ('gfycat.com' in media_url): # Gfycat try: gfycat_name = os.path.basename( urllib.parse.urlsplit(media_url).path) client = GfycatClient() gfycat_info = client.query_gfy(gfycat_name) except BaseException as e: print('[EROR] Error downloading Gfycat link:', str(e)) return # Download the Mp4 version gfycat_url = gfycat_info['gfyItem']['mp4Url'] file_path = IMAGE_DIR + '/' + gfycat_name + '.mp4' print('[ OK ] Downloading Gfycat at URL ' + gfycat_url + ' to ' + file_path) gfycat_file = save_file(gfycat_url, file_path) return gfycat_file elif ('giphy.com' in media_url): # Giphy # Working demo of regex: https://regex101.com/r/o8m1kA/2 regex = r"https?://((?:.*)giphy\.com/media/|giphy.com/gifs/|i.giphy.com/)(.*-)?(\w+)(/|\n)" m = re.search(regex, media_url, flags=0) if m: # Get the Giphy ID id = m.group(3) # Download the MP4 version of the GIF giphy_url = 'https://media.giphy.com/media/' + id + '/giphy.mp4' file_path = IMAGE_DIR + '/' + id + 'giphy.mp4' print('[ OK ] Downloading Giphy at URL ' + giphy_url + ' to ' + file_path) giphy_file = save_file(giphy_url, file_path) return giphy_file else: print('[EROR] Could not identify Giphy ID in this URL:', media_url) return else: # Check if URL is an image or MP4 file, based on the MIME type image_formats = ('image/png', 'image/jpeg', 'image/gif', 'image/webp', 'video/mp4') img_site = urlopen(media_url) meta = img_site.info() if meta["content-type"] in image_formats: # URL appears to be an image, so download it file_name = os.path.basename(urllib.parse.urlsplit(media_url).path) file_path = IMAGE_DIR + '/' + file_name print('[ OK ] Downloading file at URL ' + media_url + ' to ' + file_path) try: img = save_file(media_url, file_path) return img except BaseException as e: print('[EROR] Error while downloading image:', str(e)) return else: print('[EROR] URL does not point to a valid image file.') return
class Getter(threading.Thread): def __init__(self,dwQ,errQ,stopped,filePath,initFile,cfreader): threading.Thread.__init__(self,daemon=False) self.errorQueue=errQ self.downloadQueue=dwQ self.stopped=stopped self.imgurClient=ImgurClient("4ff2bb9d9c640f2", "8b036ffa680a1304814f48eff9e93206c096727f") self.paramReader=cfreader self.downloadPath=PathHolder() self.downloadPath = self.paramReader.readParam(filePath+ "\\" + initFile,self.downloadPath) logger = logging.getLogger('Image_Downloader') logger.setLevel(logging.DEBUG) fh = logging.FileHandler('logs/downloads.log') fh.setLevel(logging.DEBUG) # create console handler with a higher log level ch = logging.StreamHandler() ch.setLevel(logging.ERROR) # create formatter and add it to the handlers formatter = logging.Formatter('[%(filename)s:%(lineno)s - %(funcName)20s() ] %(asctime)s %(levelname)s:%(message)s') fh.setFormatter(formatter) ch.setFormatter(formatter) # add the handlers to the logger logger.addHandler(fh) logger.addHandler(ch) logger.debug("-----------------------------") logger.debug("Init complete") def run(self): logger= logging.getLogger('Image_Downloader') logger.debug("Thread started") while not self.stopped.is_set(): self.getURL() def getURL(self): logger= logging.getLogger('Image_Downloader') while not self.downloadQueue.empty(): downloadList=None try: downloadList=self.downloadQueue.get() except: continue self.downloadQueue.task_done() logger.debug("Dequeued and marked done") for downloadObject in downloadList: if "imgur" in downloadObject.domain: logger.debug("Imgur URL found") fileName, fileExtension = os.path.splitext(downloadObject.url) if "/a/" in fileName: albumPart=downloadObject.url.split("/a/")[1] albumName=albumPart.split('/')[0].split('#')[0] self.handleAlbum(albumName,downloadObject.subreddit,downloadObject.id,downloadObject.title) elif fileExtension is not None and len(fileExtension) > 0: self.handleImage(downloadObject.url,downloadObject.subreddit,downloadObject.id,downloadObject.title,fileExtension) else: fileId=downloadObject.url.split("/")[-1] for f in fileId.split(","): self.handleURL(f,downloadObject.subreddit,downloadObject.id,downloadObject.title) else: logger.debug("Non imgur URL") self.errorQueue.put(ErrorLog(subredditName=downloadObject.subreddit,failedItemName=downloadObject.id,failReason="Domain not suppported")) logger.debug("Returning from fuction") def handleImage(self,url,redditName,id,title,fileExtension): logger= logging.getLogger('Image_Downloader') directory=self.downloadPath.pathToHold + redditName valid_chars = "-_.%s%s" % (string.ascii_letters, string.digits) name=''.join(c for c in title if c in valid_chars) fname = (name[:30]) if len(name) > 30 else name fileExtension=fileExtension.split("?")[0] file=directory + "\\" + fname +"_" + id + fileExtension logger.debug("From "+ url + "to " + file) try: if not os.path.exists(directory): os.mkdir(directory) except OSError as e: logger.debug(e.strerror) self.errorQueue.put(ErrorLog(subredditName=redditName,failedItemName=id,failReason=e.strerror,retries=0)) if not os.path.exists(file) and not os.path.exists(directory + "\\" + name +"_" + id + fileExtension): try: r = requests.get(url, stream=True) if r.status_code == 200: with open(file, 'wb') as f: for chunk in r.iter_content(1024): f.write(chunk) except (requests.exceptions.RequestException,requests.exceptions.ConnectionError,requests.exceptions.HTTPError,requests.exceptions.URLRequired,requests.exceptions.TooManyRedirects,requests.exceptions.ConnectTimeout,requests.exceptions.ReadTimeout,requests.exceptions.Timeout) as e: logger.debug(e.__class__.__name__) self.errorQueue.put(ErrorLog(subredditName=redditName,failedItemName=id,failReason=e.__class__.__name__,retries=0)) except OSError as e: logger.debug(e.strerror +" " + file ) self.errorQueue.put(ErrorLog(subredditName=redditName,failedItemName=id,failReason=e.strerror,retries=0)) def handleAlbum(self,albumName,redditName,id,title): logger= logging.getLogger('Image_Downloader') logger.debug("Found an Album to download" + albumName) for imageObject in self.imgurClient.get_album_images(albumName): logger.debug("Next item in the album") self.handleImageObject(imageObject,redditName,title) def handleURL(self,fileId,subreddit,id,title): logger= logging.getLogger('Image_Downloader') logger.debug("Found a wrapped image: "+ fileId) try: image=self.imgurClient.get_image(fileId) logger.debug("Got the image back " ) self.handleImageObject(image,subreddit,title) logger.debug("Done with this wrap") except ImgurClientError as e: logger.debug(e.error_message) logger.debug(e.status_code) except Exception as e: logger.debug(type(e)) def handleImageObject(self,imageObject,reddit,title): logger= logging.getLogger('Image_Downloader') logger.debug("Got the an image to download " ) fileName, fileExtension = os.path.splitext(imageObject.link) if fileExtension is not None and len(fileExtension) > 0: self.handleImage(imageObject.link,reddit,imageObject.id,title,fileExtension)
def get_url(submission, mp4_instead_gif=True): ''' return TYPE, URL, EXTENSION E.x.: return 'img', 'http://example.com/pic.png', 'png' ''' def what_is_inside(url): header = requests.head(url).headers if 'Content-Type' in header: return header['Content-Type'] else: return '' url = submission.url url_content = what_is_inside(url) if (CONTENT_JPEG == url_content or CONTENT_PNG == url_content): return TYPE_IMG, url, url_content.split('/')[1] if CONTENT_GIF in url_content: if url.endswith('.gif') and mp4_instead_gif: # Let's try to find .mp4 file. url_mp4 = url[:-4] + '.mp4' if CONTENT_MP4 == what_is_inside(url_mp4): return TYPE_GIF, url_mp4, 'mp4' return TYPE_GIF, url, 'gif' if url.endswith('.gifv'): if mp4_instead_gif: url_mp4 = url[:-5] + '.mp4' if CONTENT_MP4 == what_is_inside(url_mp4): return TYPE_GIF, url_mp4, 'mp4' if CONTENT_GIF in what_is_inside(url[0:-1]): return TYPE_GIF, url[0:-1], 'gif' if submission.is_self is True: # Self submission with text return TYPE_TEXT, None, None if urlparse(url).netloc == 'imgur.com': # Imgur imgur_config = yaml.load(open(os.path.join('configs', 'imgur.yml')).read()) imgur_client = ImgurClient(imgur_config['client_id'], imgur_config['client_secret']) path_parts = urlparse(url).path.split('/') if path_parts[1] == 'gallery': # TODO: gallary handling return TYPE_OTHER, url, None elif path_parts[1] == 'topic': # TODO: topic handling return TYPE_OTHER, url, None elif path_parts[1] == 'a': # An imgur album album = imgur_client.get_album(path_parts[2]) story = dict() for num, img in enumerate(album.images): number = num + 1 what = TYPE_IMG link = img['link'] ext = img['type'].split('/')[1] if img['animated']: what = TYPE_GIF link = img['mp4'] if mp4_instead_gif else img['gifv'][:-1] ext = 'mp4' if mp4_instead_gif else 'gif' story[number] = { 'url': link, 'what': what, 'ext': ext } if len(story) == 1: return story[1]['what'], story[1]['url'], story[1]['ext'] return TYPE_ALBUM, story, None else: # Just imgur img img = imgur_client.get_image(path_parts[1].split('.')[0]) if not img.animated: return TYPE_IMG, img.link, img.type.split('/')[1] else: if mp4_instead_gif: return TYPE_GIF, img.mp4, 'mp4' else: # return 'gif', img.link, 'gif' return TYPE_GIF, img.gifv[:-1], 'gif' elif 'gfycat.com' in urlparse(url).netloc: client = GfycatClient() rname = re.findall(r'gfycat.com\/(?:detail\/)?(\w*)', url)[0] try: urls = client.query_gfy(rname)['gfyItem'] if mp4_instead_gif: return TYPE_GIF, urls['mp4Url'], 'mp4' else: return TYPE_GIF, urls['max5mbGif'], 'gif' except KeyError: logging.info('Gfy fail prevented!') return TYPE_OTHER, url, None else: return TYPE_OTHER, url, None
from urllib.request import urlopen root = tk.Tk() root.title("display a website image") # a little more than width and height of image w = 520 h = 320 x = 80 y = 100 # use width x height + x_offset + y_offset (no spaces!) root.geometry("%dx%d+%d+%d" % (w, h, x, y)) # this GIF picture previously downloaded to tinypic.com #image_url = "http://i46.tinypic.com/r9oh0j.gif" client_id = 'c2058ecfc76d75f' client_secret = '5fe636c3e7a032b56b2120fe82eb3071c790c5ff' client = ImgurClient(client_id, client_secret) item = client.get_image("nhTyj4d.jpg") print(item.link) #image_url = client.get_image("nhTyj4d.jpg") #image_byt = urlopen(image_url).read() #image_b64 = base64.encodestring(image_byt) #photo = tk.PhotoImage(data=image_b64) # create a white canvas #cv = tk.Canvas(bg='white') #cv.pack(side='top', fill='both', expand='yes') # put the image on the canvas with # create_image(xpos, ypos, image, anchor) #cv.create_image(10, 10, image=photo, anchor='nw') #root.mainloop()