class Playlist(object):
    """""" """""" """ Initialization """ """""" """"""

    def __init__(self,
                 download_url,
                 working_dir,
                 folder_name,
                 audio_codec='flac',
                 audio_bitrate=1141,
                 playlist_start=1,
                 playlist_end=0,
                 clear_cache=False,
                 verbose=False,
                 test_id=None):
        """
        Initializer

        :param str audio_codec: Preferred audio codec
        :param int audio_bitrate: Preferred audio bitrate
        :param int playlist_start: Index specifying playlist item to start at
        :param int playlist_end: Index specifying playlist item to end at
        :param bool clear_cache: Flag that indicates to delete json file before downloading
        :param bool verbose: Print verbose message
        :param str test_id: Test case identifier
        """

        # Playlist configuration
        self.download_url = download_url
        self.working_dir = working_dir
        self.folder_name = folder_name
        self.audio_codec = audio_codec
        self.audio_bitrate = audio_bitrate
        self.playlist_start = playlist_start
        self.playlist_end = playlist_end
        self.clear_cache = clear_cache
        self.verbose = verbose
        self.test_id = test_id

        # Path
        self.download_dir = None
        self.playlist_file = None  # Path to playlist file
        self.downloaded_playlist_file = None  # Path to playlist file contains downloaded songs

        # Playlist data
        self.playlist_data = None  # Playlist data
        self.downloaded_playlist_data = None  # Playlist data
        self.playlist_entry_total = 0  # Total count of entries on playlist
        self.playlist_data_map = {}  # OrderedDict that maps index and entry_id
        self.scheduled_queue_indices = []  # Queue indices to be downloaded
        self.is_playlist = False  # Flag specifies data is playlist

        # Youtube Downloader
        self.ydl = YoutubeDL()
        self.ydl_helper = YDLHelper()

    """""" """""" """ Property """ """""" """"""

    @property
    def downloaded_songs_total(self):
        """
        :rtype int
        :return Number of songs downloaded
        """
        if self.downloaded_playlist_data is None:
            return 0
        else:
            return len(self.download_dir.get('entries', []))

    """""" """""" """ Validation """ """""" """"""

    def validate(self):
        # Validate playlist start and end
        if self.playlist_start <= 0:
            raise PlaylistParameterException(
                'Invalid start index. Value must be greater than or equal to 1.'
            )

        if 0 < self.playlist_end < self.playlist_start:
            raise PlaylistParameterException(
                'Invalid start and end index. End index must be greater than or equal to start index.'
            )

        # Validate audio codec
        if not (self.audio_codec in ['m4a', 'mp3', 'flac']):
            raise PlaylistParameterException(
                'Supported audio format is m4a, mp3, and flac for now.')

        # Validate audio bit rate
        if self.audio_bitrate < 0:
            raise PlaylistParameterException(
                'Audio bitrate must be positive integer.')

    """""" """""" """ Preprocess """ """""" """"""

    def preprocess(self, download_url, working_dir):
        """
        :param str download_url: URL to download
        :param str working_dir: Path to root directory
        """

        self.download_url = download_url
        """ Retrieve playlist """

        logger.info('Retrieving playlist...')
        logger.info('Download URL: {}'.format(self.download_url))

        try:
            ydl_opts = self.ydl_helper.get_preprocess_option(
                download_url=self.download_url,
                audio_codec=self.audio_codec,
                audio_bitrate=self.audio_bitrate,
                playlist_start=self.playlist_start,
                playlist_end=self.playlist_end,
                verbose=self.verbose,
            )
            logger.debug(pformat(ydl_opts))
            self.ydl.__init__(params=ydl_opts)
            # TODO: What is extra_info? Need investigation.
            # self.playlist_data = self.ydl.extract_info(download_url, download=False, process=False, extra_info={})
            self.playlist_data = self.ydl.extract_info(self.download_url,
                                                       download=False,
                                                       process=False)

        except:
            raise PlaylistPreprocessException('Could not retrieve playlist.',
                                              None)

        if self.playlist_data is None or self.ydl is None:
            raise PlaylistPreprocessException('Could not retrieve playlist.',
                                              None)

        logger.info('Done.')
        """ Validate playlist """

        logger.info('Validating playlist...')

        # Determines playlist type
        playlist_extractor = self.playlist_data['extractor'].lower()
        if playlist_extractor == 'youtube:playlist' or playlist_extractor == 'soundcloud:set':
            self.is_playlist = True
            # Define download folder name
            if self.test_id is not None:
                download_folder = self.test_id
            else:
                playlist_title = sanitize_filename(self.playlist_data['title'])
                download_folder = '[{}] {}'.format(self.playlist_data['id'],
                                                   playlist_title)
            self.download_dir = os.path.join(working_dir, download_folder)

        elif playlist_extractor == 'youtube' or playlist_extractor == 'soundcloud':
            self.is_playlist = False
            # Define download folder name
            if self.test_id is not None:
                download_folder = self.test_id
            else:
                download_folder = self.folder_name
            self.download_dir = os.path.join(working_dir, download_folder)

        else:
            raise PlaylistPreprocessException(
                'This playlist is not supported.', self.playlist_data)

        self.playlist_file = os.path.join(self.download_dir, '.queued.json')
        self.downloaded_playlist_file = os.path.join(self.download_dir,
                                                     '.downloaded.json')

        logger.debug(pformat(self.playlist_data))
        logger.info('Done.')
        """ Create directories """

        logger.info('Creating download directory...')

        # Download directory
        os.makedirs(self.download_dir, exist_ok=True)

        # Playlist
        if self.clear_cache and os.path.exists(self.playlist_file):
            os.remove(self.playlist_file)
        if os.path.exists(self.downloaded_playlist_file):
            os.remove(self.downloaded_playlist_file)

        logger.info('Done.')
        """ Process playlist """

        logger.info('Processing playlist...')

        if self.is_playlist:
            # Convert generator object to list
            self.playlist_data['entries'] = list(self.playlist_data['entries'])
            self.playlist_entry_total = len(self.playlist_data['entries'])

            # Merge playlist
            merged_playlist_data, queue_indices = self.__merge_playlist(
                self.playlist_data)
            with open(self.playlist_file, 'w') as f:
                json.dump(merged_playlist_data,
                          f,
                          indent=4,
                          ensure_ascii=False)
            self.playlist_data = merged_playlist_data
            self.scheduled_queue_indices = queue_indices

        logger.debug(pformat(self.playlist_data))
        logger.info('Done.')

        return self.download_dir

    """""" """""" """ Download """ """""" """"""

    def download(self):
        """
        The function to download songs on the playlist

        :return str is_downloaded: Flag that indicates songs are downloaded
        """
        """ Generate youtube-dl option """

        logger.info('Generating youtube-dl option...')

        ydl_opts = None
        if self.is_playlist and len(self.scheduled_queue_indices) > 0:
            ydl_opts = self.ydl_helper.get_download_option(
                download_dir=self.download_dir,
                hook=self.__download_hook,
                audio_codec=self.audio_codec,
                audio_bitrate=self.audio_bitrate,
                queue_indices=self.scheduled_queue_indices,
                verbose=self.verbose)
            logger.debug(pformat(ydl_opts))

        elif not self.is_playlist:
            ydl_opts = self.ydl_helper.get_download_option(
                download_dir=self.download_dir,
                hook=self.__download_hook,
                audio_codec=self.audio_codec,
                audio_bitrate=self.audio_bitrate,
                verbose=self.verbose)
            logger.debug(pformat(ydl_opts))

        logger.info('Done.')
        """ Download songs """

        if ydl_opts:
            logger.info('Downloading songs...')

            try:
                # Download playlist
                self.ydl.__init__(params=ydl_opts)
                self.downloaded_playlist_data = self.ydl.extract_info(
                    self.download_url, download=True)
                # Save playlist data
                with open(self.downloaded_playlist_file, 'w') as f:
                    json.dump(self.downloaded_playlist_data,
                              f,
                              indent=4,
                              ensure_ascii=False)

            except Exception:
                raise PlaylistDownloadException('Failed to download playlist.',
                                                None)

            if self.downloaded_playlist_data is None:
                raise PlaylistDownloadException('Failed to download playlist.',
                                                None)

            logger.info('Done.')
            return True

        else:
            logger.warning(
                'All songs on the playlist are already downloaded. There is nothing to process.'
            )
            return False

    def __download_hook(self, data, queue_index=0, queue_total=0):
        """
        The function that get called on download progress, with a dictionary with the entries.
        More info is available at https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py

        :param dict data: Progress information for each queue
        :param int queue_index: Current queue index displayed in log message
        :param int queue_total: Total number of queue displayed in log message
        :return:

        :param dict data: Progress information for each queue
        """

        try:
            # Filename
            song_filename = os.path.basename(data['filename'])
            elapsed = "{0:.2f}".format(data.get('elapsed', -1))

            if queue_index > 0 and queue_total > 0:

                try:
                    # Get index from id-index map
                    song_id, song_ext = os.path.splitext(song_filename)
                    song_index = self.playlist_data_map.get(song_id, -1)
                    entry_found = None
                    if song_index >= 0:
                        # YouTube user playlist or SoundCloud playlist
                        entry_found = self.playlist_data['entries'][song_index]

                    else:
                        # YouTube auto-generated playlist
                        for entry in self.playlist_data['entries']:
                            if entry['id'] == song_id:
                                entry_found = entry
                                break

                    if entry_found:
                        # Finish queue
                        entry_found['status'] = YDLQueueStatus.finished.value
                        with open(self.playlist_file, 'w') as f:
                            json.dump(self.playlist_data,
                                      f,
                                      indent=4,
                                      ensure_ascii=False)

                        # Print song info
                        song_title = self.playlist_data['entries'][
                            song_index].get('title', None)
                        song_title = '[title:{}]'.format(
                            song_title) if song_title else ''
                        elapsed = '[Elapsed:{}]'.format(
                            elapsed) if float(elapsed) >= 0 else ''
                        logger.info(
                            '[Process:{}/{}][ID:{}]{}[Size:{}]{} {}'.format(
                                queue_index, queue_total, song_id, song_title,
                                data['_total_bytes_str'], elapsed,
                                'Finished.'))

                    else:
                        # Print warning
                        song_title = self.playlist_data['entries'][
                            song_index].get('title', None)
                        song_title = '[title:{}]'.format(
                            song_title) if song_title else ''
                        elapsed = '[Elapsed:{}]'.format(
                            elapsed) if float(elapsed) >= 0 else ''
                        logger.warning(
                            '[Process:{}/{}][ID:{}]{}[Size:{}]{} {}'.format(
                                queue_index, queue_total, song_id, song_title,
                                data['_total_bytes_str'], elapsed,
                                'The downloaded song is different from the song on the playlist initially requested. This is caused by YouTube auto-generated playlist.'
                            ))

                except Exception as e:
                    logger.error('[Process:{}/{}] {}:{}'.format(
                        queue_index,
                        queue_total,
                        type(e),
                        str(e),
                    ))

            else:
                song_title, song_ext = os.path.splitext(song_filename)
                elapsed = '[Elapsed:{}]'.format(
                    elapsed) if float(elapsed) >= 0 else ''
                logger.info('[Title:{}][Size:{}]{} {}'.format(
                    song_title, data['_total_bytes_str'], elapsed,
                    'Finished.'))

        except Exception as e:
            logger.error('[Process:{}/{}] {}:{}'.format(
                queue_index, queue_total, type(e), str(e)))

    def __merge_playlist(self, pl_data):
        """
        The function that merges remote (head_playlist) and local playlist (base_playlist) and generate scheduled queue indices

        :param dict pl_data: Playlist data contains downloaded songs

        :rtype: (dict, list)
        :return: (playlist, indices):  (Merged playlist, Queue indices to download)
        """

        head_playlist_data = pl_data  # Playlist data downloaded from url
        base_playlist_data = None  # Playlist data previously saved on download directory
        """ Load playlist previously saved """

        if os.path.exists(self.playlist_file):
            with open(self.playlist_file) as f:
                # base_playlist_data = json.load(f, object_pairs_hook=OrderedDict)
                base_playlist_data = json.load(f)
        """ Merge Playlist """

        candidate_queue_indices = []
        candidate_queue_index = 1

        # Playlist
        if base_playlist_data:
            # Copy list to avoid index shifting when elements are removed while iterating.
            # https://stackoverflow.com/questions/1207406/how-to-remove-items-from-a-list-while-iterating
            head_index = 0
            head_entries = head_playlist_data['entries'][:]
            for head_entry in head_entries:
                # Delete entry if invalid.
                if head_entry is None or head_entry.get(
                        'title', 'N/A').lower() in [
                            '[private video]', '[deleted video]'
                        ]:
                    song_title = head_entry.get('title', None)
                    song_title = '[title:{}]'.format(
                        song_title) if song_title else ''
                    logger.error('[Playlist:{}/{}][ID:{}]{} {}'.format(
                        head_index + 1, len(head_playlist_data['entries']),
                        head_entry.get('id', 'N/A'), song_title,
                        'The video is private or deleted. Removed from the playlist.'
                    ))
                    del head_playlist_data['entries'][head_index]
                    candidate_queue_index += 1

                else:
                    # Copy list to avoid index shifting when elements are removed while iterating.
                    # https://stackoverflow.com/questions/1207406/how-to-remove-items-from-a-list-while-iterating
                    base_index = 0
                    base_entries = base_playlist_data['entries'][:]
                    for base_entry in base_entries:

                        # Delete entry if invalid.
                        if base_entry is None or base_entry.get(
                                'title', 'N/A').lower() in [
                                    '[private video]', '[deleted video]'
                                ]:
                            song_title = base_entry.get('title', None)
                            song_title = '[title:{}]'.format(
                                song_title) if song_title else ''
                            logger.error('[Playlist:{}/{}][ID:{}]{} {}'.format(
                                head_index + 1,
                                len(head_playlist_data['entries']),
                                base_entry.get('id', 'N/A'),
                                song_title,
                                'The video is private or deleted. Removed from the playlist.',
                            ))
                            del base_playlist_data['entries'][base_index]

                        else:
                            # If same entry is found, update status
                            if head_entry['id'] == base_entry['id']:

                                # Merge base status into head status
                                base_entry_status = base_entry.get(
                                    'status', YDLQueueStatus.ready.value)
                                head_playlist_data['entries'][head_index][
                                    'status'] = base_entry_status

                                # Queue index is out of range requested
                                if not self.__is_queue_in_range(head_index):
                                    song_title = base_entry.get('title', None)
                                    song_title = '[title:{}]'.format(
                                        song_title) if song_title else ''
                                    logger.debug(
                                        '[Playlist:{}/{}][ID:{}]{} {}'.format(
                                            head_index + 1,
                                            len(head_playlist_data['entries']),
                                            base_entry.get('id', 'N/A'),
                                            song_title,
                                            'This queue is out of range requested. Skipped.',
                                        ))

                                # Song is already downloaded
                                elif base_entry_status == YDLQueueStatus.finished.value:
                                    song_title = base_entry.get('title', None)
                                    song_title = '[title:{}]'.format(
                                        song_title) if song_title else ''
                                    logger.warning(
                                        '[Playlist:{}/{}][ID:{}]{} {}'.format(
                                            head_index + 1,
                                            len(head_playlist_data['entries']),
                                            base_entry.get('id', 'N/A'),
                                            song_title,
                                            'This queue is already finished. Skipped.',
                                        ))

                                # Song is not downloaded yet
                                else:
                                    song_title = base_entry.get('title', None)
                                    song_title = '[title:{}]'.format(
                                        song_title) if song_title else ''
                                    logger.info(
                                        '[Playlist:{}/{}][ID:{}]{} {}'.format(
                                            head_index + 1,
                                            len(head_playlist_data['entries']),
                                            base_entry.get('id', 'N/A'),
                                            song_title,
                                            'This queue is not finished yet. Added to scheduled queues.',
                                        ))

                                # Delete entry to make iteration faster
                                del base_playlist_data['entries'][base_index]
                                break

                            base_index += 1

                    # Update track number
                    head_entry['track_number'] = head_index + 1

                    # Add queue
                    is_not_finished = head_entry.get(
                        'status', YDLQueueStatus.ready.value
                    ) != YDLQueueStatus.finished.value
                    if self.__is_queue_in_range(
                            head_index) and is_not_finished:
                        candidate_queue_indices.append(candidate_queue_index)

                    # Add element to dictionary that maps index and entry_id
                    self.playlist_data_map[head_entry['id']] = head_index

                    candidate_queue_index += 1
                    head_index += 1

        # Single song
        else:
            # Copy list to avoid index shifting when elements are removed while iterating.
            # https://stackoverflow.com/questions/1207406/how-to-remove-items-from-a-list-while-iterating
            head_index = 0
            head_entries = head_playlist_data['entries'][:]
            for head_entry in head_entries:
                # Delete entry if invalid.
                if head_entry is None or head_entry.get(
                        'title', 'N/A').lower() in [
                            '[private video]', '[deleted video]'
                        ]:
                    song_title = head_entry.get('title', None)
                    song_title = '[title:{}]'.format(
                        song_title) if song_title else ''
                    logger.error('[Playlist:{}/{}][ID:{}]{} {}'.format(
                        head_index + 1, len(head_playlist_data['entries']),
                        head_entry.get('id', 'N/A'), song_title,
                        'The video is private or deleted. Removed from the playlist.'
                    ))
                    del head_playlist_data['entries'][head_index]
                    candidate_queue_index += 1

                else:
                    song_title = head_entry.get('title', None)
                    song_title = '[title:{}]'.format(
                        song_title) if song_title else ''

                    # Add queue
                    if self.__is_queue_in_range(head_index):
                        candidate_queue_indices.append(candidate_queue_index)
                        logger.info('[Playlist:{}/{}][ID:{}]{} {}'.format(
                            head_index + 1, len(head_playlist_data['entries']),
                            head_entry.get('id', 'N/A'), song_title,
                            'This queue is not finished yet. Added to scheduled queues.'
                        ))
                    else:
                        logger.debug('[Playlist:{}/{}][ID:{}]{} {}'.format(
                            head_index + 1, len(head_playlist_data['entries']),
                            head_entry.get('id', 'N/A'), song_title,
                            'This queue is out of range requested. Skipped.'))

                    # Update value
                    head_entry['status'] = YDLQueueStatus.ready.value

                    # Update track number
                    head_entry['track_number'] = head_index + 1

                    # Add element to dictionary that maps index and entry_id
                    self.playlist_data_map[head_entry['id']] = head_index

                    candidate_queue_index += 1
                    head_index += 1

        # Save playlist
        with open(self.playlist_file, 'w') as file:
            json.dump(head_playlist_data, file, indent=4, ensure_ascii=False)

        self.playlist_data = head_playlist_data

        return head_playlist_data, candidate_queue_indices

    def __is_queue_in_range(self, index):
        is_greater_than_start = self.playlist_end <= 0 and self.playlist_start <= index + 1
        is_between_start_and_end = self.playlist_start <= index + 1 <= self.playlist_end
        return is_greater_than_start or is_between_start_and_end

    """""" """""" """ Cleanup """ """""" """"""

    def cleanup(self):
        """
        The function that deletes all zero byte files from download directory.
        """
        """ Remove zero bytes files created by youtube-dl """

        logger.info('Deleting zero byte files...')
        if not os.path.isdir(self.download_dir):
            raise ValueError('Parameter path is not a directory.')
        files = glob.glob(self.download_dir + '/*')

        # Create generator to find and remove zero byte files
        generator = (os.remove(f) for f in files
                     if os.path.isfile(f) and os.path.getsize(f) == 0)

        try:
            while True:
                next(generator)
        except StopIteration:
            logger.info('Done.')
            return
        except Exception as e:
            logger.error('{}: {}'.format(type(e), e))
Beispiel #2
0
 def __init__(self, video, logger=None):
     params = get_ydl_options(video=video, logger=logger)
     logger.debug("outtmpl='{}'".format(params['outtmpl']))
     YoutubeDL.__init__(self, params=params)
     self.video = video