def test_gifv_gif_direct_link(): # since the extension is grabbed directly from the url, .gifv is the initial extension imgur = ImgurDownloader('http://i.imgur.com/MOvVbhc.gifv') # since the media is natively a video (.mp4) it is saved as such # NOTE: media downloaded is 1.4 MB imgur.save_images() file = os.path.join(os.getcwd(), 'MOvVbhc.mp4') assert(os.path.isfile(file)) os.remove(file)
def download(self): """Download media from submissions""" continue_downloading = True # var limit is constant, self.limit is not constant limit = self.limit # counters to keep track of how many submissions downloaded & more download_count, error_count, skip_count = 0, 0, 0 # load last-id of submission downloaded from or create new file for id log_filename = '._history.txt' log_data, prev_id = process_subreddit_last_id(subreddit=self.subreddit, sort_type=self.sort_type, dir=self.path, log_file=log_filename, verbose=True) if not self.previous_id: self.set_previous_id(prev_id) # ensures the amount of submissions downloaded from is equal to limit while continue_downloading: errors, skips = 0, 0 # get submissions (dict containing info) & use data to download submissions = self.get_submissions_info() for submission in submissions: url = submission['url'] title = submission['title'] # makes an assumption that len(file_extension) <= 5 _, filename = shorten_file_path_if_needed( slugify(title), max_length=self.OS_MAX_PATH_LENGTH - len(self.path) - 5) dl_directory = submission['dl_directory'] submission_id = submission['id'] # filename is '' or None, filename = datetime.now() if not filename: _, filename = shorten_file_path_if_needed( get_datetime_now(), max_length=self.OS_MAX_PATH_LENGTH - len(self.path) - 5) # if an entire imgur album was downloaded, # filenames will be stored here final_filenames = [] self.log.info('Attempting to save {} as {}'.format( url, dl_directory)) # check domain and call corresponding downloader # download functions or methods try: if 'imgur.com' in url: imgur = ImgurDownloader(imgur_url=url, dir_download=self.path, file_name=filename, delete_dne=True, debug=False) final_filenames, skipped = imgur.save_images() if len(final_filenames) == 1: filename = final_filenames[0] dl_directory = os.path.join( os.path.dirname(dl_directory), filename) elif 'deviantart.com' in url: download_deviantart_url(url, dl_directory) else: job = DownloadJob(url) job.run() # text submission on a subreddit if job.pathfmt is None: raise TurboPalmTreeException( 'No path for gallery-dl DownloadJob\n' '\turl = {}'.format(url)) dl_directory = os.path.abspath(job.pathfmt.path) dl_directory = move_file( dl_directory, join(self.path, filename + get_file_extension(dl_directory))) print('downloaded: {title}; {url}'.format(title=filename, url=url)) # get time if file is created, else just use the time now if dl_directory and os.path.exists(dl_directory): creation_time = os.path.getctime(dl_directory) else: creation_time = time.time() if not self.disable_im: metadata = { 'source_url': url, 'creation_time': creation_time } # add img, locate & delete older duplicates self.im.delete_duplicates(dl_directory, metadata=metadata) if not self.disable_db: # add some data to dict insert data into database submission['download_date'] = convert_to_readable_time( creation_time) self.db.insert(submission) except self.Exceptions as e: msg = '{}: {}'.format(type(e).__name__, e.args) self.log.warning(msg) print(Fore.RED + msg + Style.RESET_ALL) errors += 1 except KeyboardInterrupt: msg = 'KeyboardInterrupt caught, exiting program' self.log.info(msg) print(msg) continue_downloading = False break # update previous id downloaded if 'submission_id' in locals().keys(): self.set_previous_id(submission_id) # update count of media successfully downloaded download_count += self.limit - errors - skips error_count += errors skip_count += skips # update attribute limit which is used when getting submissions if download_count < limit: self.set_limit(limit - download_count) elif download_count >= limit or not continue_downloading: if 'submission_id' in locals().keys(): log_data[self.subreddit][self.sort_type]['last-id'] = \ submission_id history_log(self.path, log_filename, 'write', log_data) continue_downloading = False # continue_downloading is false if not self.disable_db: self.db.close() self._cleanup_files() print("{}{} errors occured".format(Fore.YELLOW, error_count)) print("{}Downloaded from {} submissions from {}/{}{reset}".format( Fore.GREEN, download_count, self.subreddit, self.sort_type, reset=Style.RESET_ALL))
def doDownload(): global downloadQueue global currentDownloadUrl global loopBreaker global terminateFlag global imgurAlbumSize print(downloadFormatString) ydl_opts = { "logger": MyLogger(), "progress_hooks": [my_hook], "prefer_ffmpeg": True, "restrictfilenames": True, "format": downloadFormatString, } nextUrl = getNextQueuedItem() if nextUrl != "NONE": currentDownloadUrl = nextUrl["url"] print("proceeding to " + currentDownloadUrl) try: # there's a bug where this will error if your download folder is inside your application folder os.chdir(youtubelocation) match = re.match( "(https?)://(www\.)?(i\.|m\.)?imgur\.com/(a/|gallery/|r/)?/?(\w*)/?(\w*)(#[0-9]+)?(.\w*)?", # NOQA currentDownloadUrl, ) if match: downloadQueue[currentDownloadUrl]["status"] = "downloading" downloadQueue[currentDownloadUrl]["mode"] = "imgur" print("Matched Regex") downloader = ImgurDownloader(currentDownloadUrl, youtubelocation) print("Downloader created...") print("This albums has {} images".format( downloader.num_images())) imgurAlbumSize = downloader.num_images() downloader.on_image_download(imgurOnDownloadHook) resultsTuple = downloader.save_images() print("Saved!") print(resultsTuple) downloadQueue[currentDownloadUrl]["status"] = "completed" if not match: nextUrl["mode"] = "youtube" with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download([nextUrl["url"]]) downloadQueue[nextUrl["url"]]["status"] = "completed" downloadQueue[nextUrl["url"]]["playable"] = queryVideo( downloadQueue[nextUrl["url"]]["filename"]) os.chdir(os.path.dirname(os.path.realpath(__file__))) loopBreaker = 10 except Exception as e: nextUrl["status"] = "error" nextUrl["error"] = e os.chdir(os.path.dirname(os.path.realpath(__file__))) nextUrl = getNextQueuedItem() if nextUrl != "NONE" and loopBreaker > 0: loopBreaker = loopBreaker - 1 print("loopBreaker:" + str(loopBreaker)) if terminateFlag == 0: doDownload() else: print("Nothing to do - Finishing Process")