def cleanup(self): """ The function that deletes all zero byte files from download directory. """ """ Remove zero bytes files created by youtube-dl """ logger.info('Deleting zero byte files...') if not os.path.isdir(self.download_dir): raise ValueError('Parameter path is not a directory.') files = glob.glob(self.download_dir + '/*') # Create generator to find and remove zero byte files generator = (os.remove(f) for f in files if os.path.isfile(f) and os.path.getsize(f) == 0) try: while True: next(generator) except StopIteration: logger.info('Done.') return except Exception as e: logger.error('{}: {}'.format(type(e), e))
def __download_hook(self, data, queue_index=0, queue_total=0): """ The function that get called on download progress, with a dictionary with the entries. More info is available at https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py :param dict data: Progress information for each queue :param int queue_index: Current queue index displayed in log message :param int queue_total: Total number of queue displayed in log message :return: :param dict data: Progress information for each queue """ try: # Filename song_filename = os.path.basename(data['filename']) elapsed = "{0:.2f}".format(data.get('elapsed', -1)) if queue_index > 0 and queue_total > 0: try: # Get index from id-index map song_id, song_ext = os.path.splitext(song_filename) song_index = self.playlist_data_map.get(song_id, -1) entry_found = None if song_index >= 0: # YouTube user playlist or SoundCloud playlist entry_found = self.playlist_data['entries'][song_index] else: # YouTube auto-generated playlist for entry in self.playlist_data['entries']: if entry['id'] == song_id: entry_found = entry break if entry_found: # Finish queue entry_found['status'] = YDLQueueStatus.finished.value with open(self.playlist_file, 'w') as f: json.dump(self.playlist_data, f, indent=4, ensure_ascii=False) # Print song info song_title = self.playlist_data['entries'][ song_index].get('title', None) song_title = '[title:{}]'.format( song_title) if song_title else '' elapsed = '[Elapsed:{}]'.format( elapsed) if float(elapsed) >= 0 else '' logger.info( '[Process:{}/{}][ID:{}]{}[Size:{}]{} {}'.format( queue_index, queue_total, song_id, song_title, data['_total_bytes_str'], elapsed, 'Finished.')) else: # Print warning song_title = self.playlist_data['entries'][ song_index].get('title', None) song_title = '[title:{}]'.format( song_title) if song_title else '' elapsed = '[Elapsed:{}]'.format( elapsed) if float(elapsed) >= 0 else '' logger.warning( '[Process:{}/{}][ID:{}]{}[Size:{}]{} {}'.format( queue_index, queue_total, song_id, song_title, data['_total_bytes_str'], elapsed, 'The downloaded song is different from the song on the playlist initially requested. This is caused by YouTube auto-generated playlist.' )) except Exception as e: logger.error('[Process:{}/{}] {}:{}'.format( queue_index, queue_total, type(e), str(e), )) else: song_title, song_ext = os.path.splitext(song_filename) elapsed = '[Elapsed:{}]'.format( elapsed) if float(elapsed) >= 0 else '' logger.info('[Title:{}][Size:{}]{} {}'.format( song_title, data['_total_bytes_str'], elapsed, 'Finished.')) except Exception as e: logger.error('[Process:{}/{}] {}:{}'.format( queue_index, queue_total, type(e), str(e)))
def __merge_playlist(self, pl_data): """ The function that merges remote (head_playlist) and local playlist (base_playlist) and generate scheduled queue indices :param dict pl_data: Playlist data contains downloaded songs :rtype: (dict, list) :return: (playlist, indices): (Merged playlist, Queue indices to download) """ head_playlist_data = pl_data # Playlist data downloaded from url base_playlist_data = None # Playlist data previously saved on download directory """ Load playlist previously saved """ if os.path.exists(self.playlist_file): with open(self.playlist_file) as f: # base_playlist_data = json.load(f, object_pairs_hook=OrderedDict) base_playlist_data = json.load(f) """ Merge Playlist """ candidate_queue_indices = [] candidate_queue_index = 1 # Playlist if base_playlist_data: # Copy list to avoid index shifting when elements are removed while iterating. # https://stackoverflow.com/questions/1207406/how-to-remove-items-from-a-list-while-iterating head_index = 0 head_entries = head_playlist_data['entries'][:] for head_entry in head_entries: # Delete entry if invalid. if head_entry is None or head_entry.get( 'title', 'N/A').lower() in [ '[private video]', '[deleted video]' ]: song_title = head_entry.get('title', None) song_title = '[title:{}]'.format( song_title) if song_title else '' logger.error('[Playlist:{}/{}][ID:{}]{} {}'.format( head_index + 1, len(head_playlist_data['entries']), head_entry.get('id', 'N/A'), song_title, 'The video is private or deleted. Removed from the playlist.' )) del head_playlist_data['entries'][head_index] candidate_queue_index += 1 else: # Copy list to avoid index shifting when elements are removed while iterating. # https://stackoverflow.com/questions/1207406/how-to-remove-items-from-a-list-while-iterating base_index = 0 base_entries = base_playlist_data['entries'][:] for base_entry in base_entries: # Delete entry if invalid. if base_entry is None or base_entry.get( 'title', 'N/A').lower() in [ '[private video]', '[deleted video]' ]: song_title = base_entry.get('title', None) song_title = '[title:{}]'.format( song_title) if song_title else '' logger.error('[Playlist:{}/{}][ID:{}]{} {}'.format( head_index + 1, len(head_playlist_data['entries']), base_entry.get('id', 'N/A'), song_title, 'The video is private or deleted. Removed from the playlist.', )) del base_playlist_data['entries'][base_index] else: # If same entry is found, update status if head_entry['id'] == base_entry['id']: # Merge base status into head status base_entry_status = base_entry.get( 'status', YDLQueueStatus.ready.value) head_playlist_data['entries'][head_index][ 'status'] = base_entry_status # Queue index is out of range requested if not self.__is_queue_in_range(head_index): song_title = base_entry.get('title', None) song_title = '[title:{}]'.format( song_title) if song_title else '' logger.debug( '[Playlist:{}/{}][ID:{}]{} {}'.format( head_index + 1, len(head_playlist_data['entries']), base_entry.get('id', 'N/A'), song_title, 'This queue is out of range requested. Skipped.', )) # Song is already downloaded elif base_entry_status == YDLQueueStatus.finished.value: song_title = base_entry.get('title', None) song_title = '[title:{}]'.format( song_title) if song_title else '' logger.warning( '[Playlist:{}/{}][ID:{}]{} {}'.format( head_index + 1, len(head_playlist_data['entries']), base_entry.get('id', 'N/A'), song_title, 'This queue is already finished. Skipped.', )) # Song is not downloaded yet else: song_title = base_entry.get('title', None) song_title = '[title:{}]'.format( song_title) if song_title else '' logger.info( '[Playlist:{}/{}][ID:{}]{} {}'.format( head_index + 1, len(head_playlist_data['entries']), base_entry.get('id', 'N/A'), song_title, 'This queue is not finished yet. Added to scheduled queues.', )) # Delete entry to make iteration faster del base_playlist_data['entries'][base_index] break base_index += 1 # Update track number head_entry['track_number'] = head_index + 1 # Add queue is_not_finished = head_entry.get( 'status', YDLQueueStatus.ready.value ) != YDLQueueStatus.finished.value if self.__is_queue_in_range( head_index) and is_not_finished: candidate_queue_indices.append(candidate_queue_index) # Add element to dictionary that maps index and entry_id self.playlist_data_map[head_entry['id']] = head_index candidate_queue_index += 1 head_index += 1 # Single song else: # Copy list to avoid index shifting when elements are removed while iterating. # https://stackoverflow.com/questions/1207406/how-to-remove-items-from-a-list-while-iterating head_index = 0 head_entries = head_playlist_data['entries'][:] for head_entry in head_entries: # Delete entry if invalid. if head_entry is None or head_entry.get( 'title', 'N/A').lower() in [ '[private video]', '[deleted video]' ]: song_title = head_entry.get('title', None) song_title = '[title:{}]'.format( song_title) if song_title else '' logger.error('[Playlist:{}/{}][ID:{}]{} {}'.format( head_index + 1, len(head_playlist_data['entries']), head_entry.get('id', 'N/A'), song_title, 'The video is private or deleted. Removed from the playlist.' )) del head_playlist_data['entries'][head_index] candidate_queue_index += 1 else: song_title = head_entry.get('title', None) song_title = '[title:{}]'.format( song_title) if song_title else '' # Add queue if self.__is_queue_in_range(head_index): candidate_queue_indices.append(candidate_queue_index) logger.info('[Playlist:{}/{}][ID:{}]{} {}'.format( head_index + 1, len(head_playlist_data['entries']), head_entry.get('id', 'N/A'), song_title, 'This queue is not finished yet. Added to scheduled queues.' )) else: logger.debug('[Playlist:{}/{}][ID:{}]{} {}'.format( head_index + 1, len(head_playlist_data['entries']), head_entry.get('id', 'N/A'), song_title, 'This queue is out of range requested. Skipped.')) # Update value head_entry['status'] = YDLQueueStatus.ready.value # Update track number head_entry['track_number'] = head_index + 1 # Add element to dictionary that maps index and entry_id self.playlist_data_map[head_entry['id']] = head_index candidate_queue_index += 1 head_index += 1 # Save playlist with open(self.playlist_file, 'w') as file: json.dump(head_playlist_data, file, indent=4, ensure_ascii=False) self.playlist_data = head_playlist_data return head_playlist_data, candidate_queue_indices
def download(self): """ The function to download songs on the playlist :return str is_downloaded: Flag that indicates songs are downloaded """ """ Generate youtube-dl option """ logger.info('Generating youtube-dl option...') ydl_opts = None if self.is_playlist and len(self.scheduled_queue_indices) > 0: ydl_opts = self.ydl_helper.get_download_option( download_dir=self.download_dir, hook=self.__download_hook, audio_codec=self.audio_codec, audio_bitrate=self.audio_bitrate, queue_indices=self.scheduled_queue_indices, verbose=self.verbose) logger.debug(pformat(ydl_opts)) elif not self.is_playlist: ydl_opts = self.ydl_helper.get_download_option( download_dir=self.download_dir, hook=self.__download_hook, audio_codec=self.audio_codec, audio_bitrate=self.audio_bitrate, verbose=self.verbose) logger.debug(pformat(ydl_opts)) logger.info('Done.') """ Download songs """ if ydl_opts: logger.info('Downloading songs...') try: # Download playlist self.ydl.__init__(params=ydl_opts) self.downloaded_playlist_data = self.ydl.extract_info( self.download_url, download=True) # Save playlist data with open(self.downloaded_playlist_file, 'w') as f: json.dump(self.downloaded_playlist_data, f, indent=4, ensure_ascii=False) except Exception: raise PlaylistDownloadException('Failed to download playlist.', None) if self.downloaded_playlist_data is None: raise PlaylistDownloadException('Failed to download playlist.', None) logger.info('Done.') return True else: logger.warning( 'All songs on the playlist are already downloaded. There is nothing to process.' ) return False
def preprocess(self, download_url, working_dir): """ :param str download_url: URL to download :param str working_dir: Path to root directory """ self.download_url = download_url """ Retrieve playlist """ logger.info('Retrieving playlist...') logger.info('Download URL: {}'.format(self.download_url)) try: ydl_opts = self.ydl_helper.get_preprocess_option( download_url=self.download_url, audio_codec=self.audio_codec, audio_bitrate=self.audio_bitrate, playlist_start=self.playlist_start, playlist_end=self.playlist_end, verbose=self.verbose, ) logger.debug(pformat(ydl_opts)) self.ydl.__init__(params=ydl_opts) # TODO: What is extra_info? Need investigation. # self.playlist_data = self.ydl.extract_info(download_url, download=False, process=False, extra_info={}) self.playlist_data = self.ydl.extract_info(self.download_url, download=False, process=False) except: raise PlaylistPreprocessException('Could not retrieve playlist.', None) if self.playlist_data is None or self.ydl is None: raise PlaylistPreprocessException('Could not retrieve playlist.', None) logger.info('Done.') """ Validate playlist """ logger.info('Validating playlist...') # Determines playlist type playlist_extractor = self.playlist_data['extractor'].lower() if playlist_extractor == 'youtube:playlist' or playlist_extractor == 'soundcloud:set': self.is_playlist = True # Define download folder name if self.test_id is not None: download_folder = self.test_id else: playlist_title = sanitize_filename(self.playlist_data['title']) download_folder = '[{}] {}'.format(self.playlist_data['id'], playlist_title) self.download_dir = os.path.join(working_dir, download_folder) elif playlist_extractor == 'youtube' or playlist_extractor == 'soundcloud': self.is_playlist = False # Define download folder name if self.test_id is not None: download_folder = self.test_id else: download_folder = self.folder_name self.download_dir = os.path.join(working_dir, download_folder) else: raise PlaylistPreprocessException( 'This playlist is not supported.', self.playlist_data) self.playlist_file = os.path.join(self.download_dir, '.queued.json') self.downloaded_playlist_file = os.path.join(self.download_dir, '.downloaded.json') logger.debug(pformat(self.playlist_data)) logger.info('Done.') """ Create directories """ logger.info('Creating download directory...') # Download directory os.makedirs(self.download_dir, exist_ok=True) # Playlist if self.clear_cache and os.path.exists(self.playlist_file): os.remove(self.playlist_file) if os.path.exists(self.downloaded_playlist_file): os.remove(self.downloaded_playlist_file) logger.info('Done.') """ Process playlist """ logger.info('Processing playlist...') if self.is_playlist: # Convert generator object to list self.playlist_data['entries'] = list(self.playlist_data['entries']) self.playlist_entry_total = len(self.playlist_data['entries']) # Merge playlist merged_playlist_data, queue_indices = self.__merge_playlist( self.playlist_data) with open(self.playlist_file, 'w') as f: json.dump(merged_playlist_data, f, indent=4, ensure_ascii=False) self.playlist_data = merged_playlist_data self.scheduled_queue_indices = queue_indices logger.debug(pformat(self.playlist_data)) logger.info('Done.') return self.download_dir
def update(self, download_dir, pl_data, is_playlist): """ The function that update audio metadata. :param str download_dir: Download directory :param dict pl_data: Playlist data which contains downloaded song information :param bool is_playlist: Flag that indicates playlist contains multiple songs """ logger.info('Updating metadata...') if is_playlist: entries = pl_data.get('entries', []) album_title = pl_data.get('title', 'Unknown Album') album_artist = pl_data.get('uploader', None) album_composer = pl_data.get('extractor_key') process_index = 1 process_total = len(entries) for entry in entries: try: # Determine filename from entry id song_id = entry['id'] # song_index = self.playlist_data_map.get(song_id, -1) song_track_number = process_index song_title = sanitize_filename(entry.get('title', song_id)) source_audio_file = os.path.join(download_dir, '{}.{}'.format(song_id, self.audio_codec)) # Artwork image_file = None try: image_file = entry['thumbnails'][0]['filename'] except: pass # Update tag self.__update_tag( download_dir=download_dir, song_title=song_title, audio_file=source_audio_file, image_file=image_file, album_title=album_title, album_artist=album_artist, album_composer=album_composer, track_number=song_track_number, process_index=process_index, process_total=process_total, ) except: message = 'Could not update metadata because there is no data found on the playlist. The video may be private or deleted. Audio data is not saved.' logger.error('[Process:{}/{}][Track:{}] {}'.format(process_index, process_total, 'N/A', message)) process_index += 1 else: base_filename = sanitize_filename(pl_data.get('title', 'Unknown')) audio_file = os.path.join(download_dir, '{}.{}'.format(base_filename, self.audio_codec)) if os.path.exists(audio_file): image_file = None try: image_file = pl_data['thumbnails'][0]['filename'] except: pass self.__update_tag( download_dir=download_dir, audio_file=audio_file, image_file=image_file ) logger.info('Done.')
def __update_tag(self, download_dir, audio_file, image_file, song_title=None, album_title=None, album_artist=None, album_composer=None, track_number=-1, process_index=-1, process_total=-1): """ The function that update audio metadata for each song. :param str download_dir: Download directory :param str audio_file: Path to audio file :param str image_file: Path to image file :param str song_title: Song title :param str album_title: Album title to be saved in metadata :param str album_artist: Album artist to be saved in metadata :param str album_composer: Album composer to be saved in metadata :param int track_number: track number to be saved in metadata :param int process_index: Current process index displayed in log message :param int process_total: Total number of process displayed in log message """ if audio_file is None: logger.warning('[Process:{}/{}][Track:{}] Could not update metadata because there is no data found on the playlist. The video may be private or deleted.'.format(process_index, process_total, track_number)) return if process_index > 0 and process_total > 0: if track_number > 0: log_prefix = '[Process:{}/{}][Track:{}]'.format(process_index, process_total, track_number) else: log_prefix = '[Process:{}/{}]'.format(process_index, process_total) else: log_prefix = '' audio_filename = os.path.basename(audio_file) try: # Validate audio data if not os.path.isfile(audio_file): raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), audio_file) audio_mime_type = mimetypes.guess_type(audio_file) if contains_at_least(audio_mime_type, ['audio/x-mp4', 'audio/x-m4a', 'audio/mp4a-latm']): # For more info about mp4 tag is available at # https://github.com/quodlibet/mutagen/blob/cf399dc58940fb1356f672809d763be9e2af0033/mutagen/mp4/__init__.py # http://atomicparsley.sourceforge.net/mpeg-4files.html mp4_data = mp4.MP4(audio_file) # Track Number if not self.no_track_number and track_number > 0: mp4_data['trkn'] = [(track_number, 0)] # Cover image if not self.no_artwork: image_data = self.__get_tag_image(image_file, audio_mime_type) if image_data: mp4_data['covr'] = [image_data] # Album title if not self.no_album_title and album_title is not None: mp4_data['\xa9alb'] = album_title # Album artist if not self.no_album_artist and album_artist is not None: mp4_data['aART'] = album_artist # Composer if not self.no_composer and album_composer is not None: mp4_data['\xa9wrt'] = album_composer # Part of compilation if not self.no_compilation: mp4_data['cpil'] = True # Save mp4_data.save() elif contains_at_least(audio_mime_type, ['audio/x-mp3', 'audio/mpeg']): # For more info about ID3v2 tag is available at # https://github.com/quodlibet/mutagen/blob/4a5d7d17f1a611280cc52d229aa70b77ca3c55dd/mutagen/id3/_frames.py # https://help.mp3tag.de/main_tags.html mp3_data = id3.ID3(audio_file) # Cover image if not self.no_artwork: image_data = self.__get_tag_image(image_file, audio_mime_type) if image_data: mp3_data['APIC'] = image_data # Track number if not self.no_track_number and track_number > 0: mp3_data.add(id3.TRCK(encoding=3, text=['{}/{}'.format(track_number, 0)])) # Album title if not self.no_album_title and album_title is not None: mp3_data["TALB"] = id3.TALB(encoding=0, text=album_title) # Album artist if not self.no_album_artist and album_artist is not None: mp3_data["TPE2"] = id3.TPE2(encoding=0, text=album_artist) # Composer if not self.no_composer and album_composer is not None: mp3_data["TCOM"] = id3.TCOM(encoding=0, text=album_composer) # Part of compilation if not self.no_compilation: mp3_data['TCMP'] = id3.TCMP(encoding=0, text=['1']) # Save mp3_data.save() elif contains_at_least(audio_mime_type, ['audio/x-aac']): # TODO: Add AAC support pass # image_data = __get_tag_image(image_file, audio_mime_type) # aac_data = aac.AAC(audio_file) # if not self.no_track_number: # if track_number > 0 and track_total > 0: # aac_data.add_tags(id3.TRCK(encoding=3, text=['{}/{}'.format(track_number, track_total)])) # # mp3_data['TRCK'] = id3.TRCK(encoding=3, text=[str(track_number)]) # if image_data: # mp3_data['APIC'] = image_data # aac_data.save() elif contains_at_least(audio_mime_type, ['audio/x-flac']): # https://github.com/quodlibet/mutagen/blob/a1db79ece62c4e86259f15825e360d1ce0986a22/mutagen/flac.py # https://github.com/quodlibet/mutagen/blob/4a5d7d17f1a611280cc52d229aa70b77ca3c55dd/tests/test_flac.py flac_data = flac.FLAC(audio_file) # Artwork if not self.no_artwork: image_data = self.__get_tag_image(image_file, audio_mime_type) if image_data: flac_data.add_picture(image_data) # Save flac_data.save() flac_data = File(audio_file) # Track number if not self.no_track_number and track_number > 0: flac_data.tags['tracknumber'] = str(track_number) # Album title if not self.no_album_title and album_title is not None: flac_data.tags['album'] = album_title # Album artist if not self.no_album_artist and album_artist is not None: flac_data.tags['albumartist'] = album_artist # Composer if not self.no_composer and album_composer is not None: flac_data.tags['composer'] = album_composer # Part of compilation if not self.no_compilation: pass # Save flac_data.save() # audio = File(audio_file, easy=True) else: raise InvalidMimeTypeException("Invalid audio format.", audio_mime_type) # Remove artwork if succeeded if os.path.exists(image_file): os.remove(image_file) # Rename filename from id to title dest_audio_file = os.path.join(download_dir, '{}.{}'.format(song_title, self.audio_codec)) os.rename(audio_file, dest_audio_file) dest_audio_filename = os.path.basename(dest_audio_file) logger.info('{}[File:{}] Updated.'.format(log_prefix, dest_audio_filename)) except FileNotFoundError: message = 'File not found. Skipped.' logger.warning('{}[File:{}] {}'.format(log_prefix, audio_filename, message)) except InvalidDataException as e: message = e.message + ' Skipped.' logger.warning('{}[File:{}] {}'.format(log_prefix, audio_filename, message)) except InvalidMimeTypeException as e: message = e.message + ' Skipped.' logger.warning('{}[File:{}] {}'.format(log_prefix, audio_filename, message)) except Exception as e: message = 'Error {}: {} Skipped.'.format(type(e), str(e)) logger.error('{}[File:{}] {}'.format(log_prefix, audio_filename, message))
def download(self): """ The function that downloads songs from YouTube and SoundCloud :return bool result: Result of process, used by unit test """ print() atexit.register(print) """ Set log level """ if self.verbose: logger.setLevel(logging.DEBUG) else: logger.setLevel(logging.INFO) """ Print version """ logger.info(pkg_resources.require("music_dl")[0]) """ Validate parameters """ logger.info('Validating parameters...') try: # Validate download url url_parsed = urlparse(self.download_url) if not url_parsed.scheme.startswith('http'): raise DirectoryException( 'Invalid URL. URL must start with http*. Input value is {}' .format(self.download_url)) tld_parsed = tldextract.extract(self.download_url) if not (tld_parsed.domain in ['youtube', 'soundcloud']): raise DirectoryException( 'Invalid URL. Music Downloader supports only YouTube and SoundCloud. Input value is {}' .format(self.download_url)) # Validate download directory if not is_path_exists_or_creatable(self.working_dir): raise DirectoryException( 'Invalid directory. Please specify valid download directory. Input value is {}' .format(self.working_dir)) except DirectoryException as e: logger.error(e.message) logger.fatal('Aborted.') exit() # Validate playlist configuration try: self.playlist.validate() except PlaylistParameterException as e: logger.error(e.message) logger.fatal('Aborted.') exit() logger.info('Done.') """ Retrieve playlist """ download_dir = None try: download_dir = self.playlist.preprocess(self.download_url, self.working_dir) except PlaylistPreprocessException as e: logger.error(e.message) logger.error(e.data) logger.fatal('Aborted.') exit() """ Download playlist """ is_downloaded = False try: is_downloaded = self.playlist.download() except PlaylistPreprocessException as e: logger.error(e.message) logger.error(e.data) logger.fatal('Aborted.') exit() """ Update metadata """ """ Cleanup download directory """ self.playlist.cleanup() """ Print completion message """ logger.info('All process has done.') logger.info('Now you can find downloaded songs at {}'.format( colorama.Fore.LIGHTCYAN_EX + download_dir)) return True