예제 #1
0
def test_gifv_gif_direct_link():
    # since the extension is grabbed directly from the url, .gifv is the initial extension
    imgur = ImgurDownloader('http://i.imgur.com/MOvVbhc.gifv')

    # since the media is natively a video (.mp4) it is saved as such
    # NOTE: media downloaded is 1.4 MB
    imgur.save_images()
    file = os.path.join(os.getcwd(), 'MOvVbhc.mp4')
    assert(os.path.isfile(file))
    os.remove(file)
    def download(self):
        """Download media from submissions"""
        continue_downloading = True

        # var limit is constant, self.limit is not constant
        limit = self.limit

        # counters to keep track of how many submissions downloaded & more
        download_count, error_count, skip_count = 0, 0, 0

        # load last-id of submission downloaded from or create new file for id
        log_filename = '._history.txt'
        log_data, prev_id = process_subreddit_last_id(subreddit=self.subreddit,
                                                      sort_type=self.sort_type,
                                                      dir=self.path,
                                                      log_file=log_filename,
                                                      verbose=True)
        if not self.previous_id:
            self.set_previous_id(prev_id)

        # ensures the amount of submissions downloaded from is equal to limit
        while continue_downloading:
            errors, skips = 0, 0
            # get submissions (dict containing info) & use data to download
            submissions = self.get_submissions_info()
            for submission in submissions:
                url = submission['url']
                title = submission['title']
                # makes an assumption that len(file_extension) <= 5
                _, filename = shorten_file_path_if_needed(
                    slugify(title),
                    max_length=self.OS_MAX_PATH_LENGTH - len(self.path) - 5)
                dl_directory = submission['dl_directory']
                submission_id = submission['id']

                # filename is '' or None, filename = datetime.now()
                if not filename:
                    _, filename = shorten_file_path_if_needed(
                        get_datetime_now(),
                        max_length=self.OS_MAX_PATH_LENGTH - len(self.path) -
                        5)

                # if an entire imgur album was downloaded,
                # filenames will be stored here
                final_filenames = []

                self.log.info('Attempting to save {} as {}'.format(
                    url, dl_directory))

                # check domain and call corresponding downloader
                # download functions or methods
                try:
                    if 'imgur.com' in url:
                        imgur = ImgurDownloader(imgur_url=url,
                                                dir_download=self.path,
                                                file_name=filename,
                                                delete_dne=True,
                                                debug=False)
                        final_filenames, skipped = imgur.save_images()
                        if len(final_filenames) == 1:
                            filename = final_filenames[0]
                            dl_directory = os.path.join(
                                os.path.dirname(dl_directory), filename)

                    elif 'deviantart.com' in url:
                        download_deviantart_url(url, dl_directory)

                    else:
                        job = DownloadJob(url)
                        job.run()
                        # text submission on a subreddit
                        if job.pathfmt is None:
                            raise TurboPalmTreeException(
                                'No path for gallery-dl DownloadJob\n'
                                '\turl = {}'.format(url))
                        dl_directory = os.path.abspath(job.pathfmt.path)
                        dl_directory = move_file(
                            dl_directory,
                            join(self.path,
                                 filename + get_file_extension(dl_directory)))

                    print('downloaded: {title}; {url}'.format(title=filename,
                                                              url=url))

                    # get time if file is created, else just use the time now
                    if dl_directory and os.path.exists(dl_directory):
                        creation_time = os.path.getctime(dl_directory)
                    else:
                        creation_time = time.time()

                    if not self.disable_im:
                        metadata = {
                            'source_url': url,
                            'creation_time': creation_time
                        }
                        # add img, locate & delete older duplicates
                        self.im.delete_duplicates(dl_directory,
                                                  metadata=metadata)
                    if not self.disable_db:
                        # add some data to dict insert data into database
                        submission['download_date'] = convert_to_readable_time(
                            creation_time)
                        self.db.insert(submission)

                except self.Exceptions as e:
                    msg = '{}: {}'.format(type(e).__name__, e.args)
                    self.log.warning(msg)
                    print(Fore.RED + msg + Style.RESET_ALL)
                    errors += 1
                except KeyboardInterrupt:
                    msg = 'KeyboardInterrupt caught, exiting program'
                    self.log.info(msg)
                    print(msg)
                    continue_downloading = False
                    break

            # update previous id downloaded
            if 'submission_id' in locals().keys():
                self.set_previous_id(submission_id)

            # update count of media successfully downloaded
            download_count += self.limit - errors - skips
            error_count += errors
            skip_count += skips

            # update attribute limit which is used when getting submissions
            if download_count < limit:
                self.set_limit(limit - download_count)
            elif download_count >= limit or not continue_downloading:
                if 'submission_id' in locals().keys():
                    log_data[self.subreddit][self.sort_type]['last-id'] = \
                    submission_id

                history_log(self.path, log_filename, 'write', log_data)
                continue_downloading = False

        # continue_downloading is false
        if not self.disable_db:
            self.db.close()

        self._cleanup_files()
        print("{}{} errors occured".format(Fore.YELLOW, error_count))
        print("{}Downloaded from {} submissions from {}/{}{reset}".format(
            Fore.GREEN,
            download_count,
            self.subreddit,
            self.sort_type,
            reset=Style.RESET_ALL))
예제 #3
0
def doDownload():
    global downloadQueue
    global currentDownloadUrl
    global loopBreaker
    global terminateFlag
    global imgurAlbumSize
    print(downloadFormatString)
    ydl_opts = {
        "logger": MyLogger(),
        "progress_hooks": [my_hook],
        "prefer_ffmpeg": True,
        "restrictfilenames": True,
        "format": downloadFormatString,
    }
    nextUrl = getNextQueuedItem()
    if nextUrl != "NONE":
        currentDownloadUrl = nextUrl["url"]
        print("proceeding to " + currentDownloadUrl)
        try:
            # there's a bug where this will error if your download folder is inside your application folder
            os.chdir(youtubelocation)
            match = re.match(
                "(https?)://(www\.)?(i\.|m\.)?imgur\.com/(a/|gallery/|r/)?/?(\w*)/?(\w*)(#[0-9]+)?(.\w*)?",  # NOQA
                currentDownloadUrl,
            )
            if match:
                downloadQueue[currentDownloadUrl]["status"] = "downloading"
                downloadQueue[currentDownloadUrl]["mode"] = "imgur"
                print("Matched Regex")
                downloader = ImgurDownloader(currentDownloadUrl,
                                             youtubelocation)
                print("Downloader created...")
                print("This albums has {} images".format(
                    downloader.num_images()))
                imgurAlbumSize = downloader.num_images()
                downloader.on_image_download(imgurOnDownloadHook)

                resultsTuple = downloader.save_images()

                print("Saved!")
                print(resultsTuple)
                downloadQueue[currentDownloadUrl]["status"] = "completed"
            if not match:
                nextUrl["mode"] = "youtube"
                with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                    ydl.download([nextUrl["url"]])
                downloadQueue[nextUrl["url"]]["status"] = "completed"
                downloadQueue[nextUrl["url"]]["playable"] = queryVideo(
                    downloadQueue[nextUrl["url"]]["filename"])
            os.chdir(os.path.dirname(os.path.realpath(__file__)))
            loopBreaker = 10
        except Exception as e:
            nextUrl["status"] = "error"
            nextUrl["error"] = e
            os.chdir(os.path.dirname(os.path.realpath(__file__)))
        nextUrl = getNextQueuedItem()
        if nextUrl != "NONE" and loopBreaker > 0:
            loopBreaker = loopBreaker - 1
            print("loopBreaker:" + str(loopBreaker))
            if terminateFlag == 0:
                doDownload()
    else:
        print("Nothing to do - Finishing Process")