def standardize_file_names(location): file = check_object(location) if not file: return (False, f'given path not exists') if file.is_dir(): files = [ file for file in list(file.rglob('**/*.*')) if file.suffix in tracks_suffixes ] else: files = [file] result = [] for file in files: mutagen_file = File(file, easy=True) artist = mutagen_file.get('artist', [''])[0] title = mutagen_file.get('title', [''])[0] new_name = file.name if artist and title and f'{artist} - ' not in file.name: file = Path(mutagen_file.filename) new_name = f'{artist} - {title}{file.suffix}' file.rename(file.parent / new_name) result.append((file.name != new_name, f"{file.name} --> {new_name}")) return result
def __init__(self, path): self.path = path self.corrupt = False self.bitrate = self.length = 0 self.title = self.artist = self.album = '' try: self.mimetype = subprocess.Popen([ "/usr/bin/file", "--mime-type", path], stdout=subprocess.PIPE).communicate()[0].split(": ")[-1].rstrip() except ValueError: print(path) av = self.mimetype[0:5] # enqueue any audio file if av == "audio": audio = MutagenFile( path, easy=True ) try: self.bitrate = int(audio.info.bitrate) except: pass try: self.length = int(audio.info.length) except: pass try: self.artist = unicode( audio.get('artist', [''])[0] ) self.album = unicode( audio.get('album', [''])[0] ) self.title = unicode( audio.get('title', [''])[0] ) self.tracknumber = int( audio.get('tracknumber', [0])[0].split("/")[0] ) # split in above b/c sometimes encoders will list track numbers as "i/n" except Exception, e: print e, audio, audio.info.bitrate
def get_different_artists(section) -> list: albums = section.albums() result = [] count = 0 for a in albums: album_artist = a.artist().title if album_artist == 'Various Artists': pass else: tracks = a.tracks() for t in tracks: count += 1 if count % 100 == 0: print('.', end='', flush=True) if count % 5000 == 0: pprint(result, width=160) file_name = next(t.iterParts()).file try: tags = File(file_name, easy=True) tag_albumartist = tags.get('albumartist', [''])[0] tag_artist = tags.get('artist', [''])[0] if (tag_albumartist != album_artist and tag_artist != album_artist): result.append((t.title, t.album().title, album_artist, tag_albumartist, tag_artist)) except MutagenError as err: pprint(err) print("Exception caught trying to read %s" % file_name) return result
def fetch_metadata(top): for dirpath, dirnames, filenames in os.walk(top): for filename in filenames: abs_path = os.path.join(dirpath, filename) if filename.lower().endswith(".mp3"): info = EasyID3(abs_path) else: info = MutagenFile(abs_path) if info is None: continue title = "".join(info.get("title", "")).encode("utf-8") artist = "".join(info.get("artist", "")).encode("utf-8") try: unicode_abs_path = unicode(abs_path.decode("utf-8")) audio_file = AudioFile.select_cond("path = ?", (unicode_abs_path,)).next() if os.stat(abs_path).st_mtime > audio_file.modtime: audio_file.title = title audio_file.artist = artist audio_file.path = abs_path audio_file.modtime = time.time() print "Updated %s" % abs_path except StopIteration: audio_file = AudioFile.new(title=title, artist=artist, path=abs_path, modtime=time.time()) print "Added %s to database" % abs_path
def process_files(path, stats): """ Recurse down directory path and process each audio file. """ counter = 0 for root, dirs, files in os.walk(path): for f in files: # We maintain a counter to keep file names unique counter += 1 stats.files_read += 1 filename = os.path.join(root, f) try: audio = File(filename, easy=True) except Exception, e: stats.add_type_failure(filename) continue if not audio: stats.add_parse_failure(filename) continue test_tags = ['artist', 'album', 'title'] missing_tags = [] for tag in test_tags: if not audio.has_key(tag): missing_tags.append(tag) tracknumber = audio.get('tracknumber', [None])[0] artist = sanitize_path(audio.get('artist', ['No Artist'])[0]) album = sanitize_path(audio.get('album', ['No Album'])[0]) title = sanitize_path( audio.get('title', ["%d_%s" % (counter, f)])[0]) if tracknumber: try: tracknumber = int(re.sub(r'.*?(\d+).*', r'\1', tracknumber)) except ValueError, e: tracknumber = None pass (_, extension) = os.path.splitext(filename) if not tracknumber: newname = os.path.join(artist, album, title + extension) else: newname = os.path.join( artist, album, "%02d - %s%s" % (tracknumber, title, extension)) if len(missing_tags) > 0: pretty_missing_tags = str.join(", ", missing_tags) stats.add_missing_tag_failure( filename + ": " + pretty_missing_tags, newname) stats.add_track(filename, newname, artist, album, tracknumber, title)
class Tag(object): def __init__(self, filename): self.tags = File(filename, easy=True) if self.tags == None: raise TagTypeError() @property def album(self): return self.tags.get('album') @album.setter def album(self, value): self.tags['album'] = value @album.deleter def album(self): del self.tags['album'] @property def artist(self): return self.tags.get('artist') @artist.setter def artist(self, value): self.tags['artist'] = value @artist.deleter def artist(self): del self.tags['artist'] @property def title(self): return self.tags.get('title') @title.setter def title(self, value): self.tags['title'] = value @title.deleter def title(self): del self.tags['title'] @property def track(self): return self.tags.get('tracknumber') @track.setter def track(self, value): self.tags['tracknumber'] = value @track.deleter def track(self): del self.tags['tracknumber'] def save(self): self.tags.save();
def get_artist_sort_title(self): try: tags = MFile(self.filename, easy=True) tag = tags.get('albumartistsort') # 'soaa' if tag: return tag[0] return tags.get('artistsort')[0] # 'soar' except: return None
def process_files(path, stats): """ Recurse down directory path and process each audio file. """ counter = 0 for root, dirs, files in os.walk(path): for f in files: # We maintain a counter to keep file names unique counter += 1 stats.files_read += 1 filename = os.path.join(root, f) try: audio = File(filename, easy=True) except Exception, e: stats.add_type_failure(filename) continue if not audio: stats.add_parse_failure(filename) continue test_tags = ['artist', 'album', 'title'] missing_tags = [] for tag in test_tags: if not audio.has_key(tag): missing_tags.append(tag) tracknumber = audio.get('tracknumber', [None])[0] artist = sanitize_path(audio.get('artist', ['No Artist'])[0]) album = sanitize_path(audio.get('album', ['No Album'])[0]) title = sanitize_path(audio.get('title', ["%d_%s" % (counter, f)])[0]) if tracknumber: try: tracknumber = int(re.sub(r'.*?(\d+).*', r'\1', tracknumber)) except ValueError, e: tracknumber = None pass (_, extension) = os.path.splitext(filename) if not tracknumber: newname = os.path.join(artist, album, title + extension) else: newname = os.path.join( artist, album, "%02d - %s%s" % (tracknumber, title, extension)) if len(missing_tags) > 0: pretty_missing_tags = str.join(", ", missing_tags) stats.add_missing_tag_failure( filename + ": " + pretty_missing_tags, newname) stats.add_track(filename, newname, artist, album, tracknumber, title)
def get_album_sort_title(self): try: tags = MFile(self.filename) return tags.get('albumsort')[0] except: try: tags = MFile(self.filename) return tags.get('albumsortorder')[0] except: return None
def get_artist_sort_title(self): try: tags = MFile(self.filename) return tags.get('performersortorder')[0] except: try: tags = MFile(self.filename) return tags.get('albumartistsort')[0] except: return None
def testFileTagProcessor(self): from mutagen import File from klangbecken.playlist import ( FileAddition, FileDeletion, MetadataChange, file_tag_processor, ) # No-ops file_tag_processor( self.tempdir, "nonexistant", "fileIdZZ", "mp3", [ FileAddition(""), MetadataChange("id", "abc"), MetadataChange("playlist", "abc"), MetadataChange("ext", "abc"), MetadataChange("nonexistant", "abc"), FileDeletion(), ], ) changes = { "artist": "New Artist (๛)", "title": "New Title (᛭)", "cue_in": "0.123", "cue_out": "123", "track_gain": "-12 dB", } metadata_changes = [ MetadataChange(key, val) for key, val in changes.items() ] # Valid files for filename in ["silence.mp3", "silence-stripped.mp3"]: prefix, ext = filename.split(".") path = os.path.join(self.tempdir, "music", filename) mutagenfile = File(path, easy=True) # Make sure tags are not already the same before updating for key, val in changes.items(): self.assertNotEqual(val, mutagenfile.get(key, [""])[0]) # Update and verify tags file_tag_processor(self.tempdir, "music", prefix, ext, metadata_changes) mutagenfile = File(path, easy=True) for key, val in changes.items(): self.assertEqual(len(mutagenfile.get(key, [""])), 1) self.assertEqual(val, mutagenfile.get(key, [""])[0])
def get_track_artist(self): album_artist = self.get_artist_title() try: tags = MFile(self.filename) track_artist = tags.get('artist')[0] if str(track_artist) != str(album_artist) or album_artist is None: return track_artist except: try: tags = MFile(self.filename) track_artist = tags.get('artist_credit')[0] if track_artist != album_artist and album_artist is not None: return track_artist except: pass return None
def get_artist_title(self): try: tags = MFile(self.filename, easy=True) tag = tags.get('albumartist') return tag[0] except: return None
def run(self): while not self.__was_stopped: if self.downloader is not None and self.downloader.ended: f = File(self.downloader.downloaded_path(), easy=True) if not re.search('[a-zA-Z0-9]', self.queue[0].artist): if re.search('[a-zA-Z0-9]', f.get("artist", "")): self.queue[0].artist = f["artist"] else: self.queue[0].artist = "Unknown" f["artist"] = self.queue[0].artist else: f["artist"] = self.queue[0].artist if not re.search('[a-zA-Z0-9]', self.queue[0].album): if re.search('[a-zA-Z0-9]', f.get("album", "")): self.queue[0].album = f["album"] else: self.queue[0].album = "Unknown" f["album"] = self.queue[0].album else: f["album"] = self.queue[0].album if not re.search('[a-zA-Z0-9]', self.queue[0].track): if re.search('[a-zA-Z0-9]', f.get("title", "")): self.queue[0].track = f["title"] else: self.queue[0].track = join(self.downloader.downloaded_path().split("/")[-1].split(".")[0:-1], '.') f["title"] = self.queue[0].track else: f["title"] = self.queue[0].track print f.tags f.save() target_dir = join([config.download_path, safe_filename(self.queue[0].artist), safe_filename(self.queue[0].album)], "/") if not os.path.exists(target_dir): os.makedirs(target_dir) file_path = target_dir + "/" + safe_filename(self.queue[0].track) + "_" + random_string() + "." + \ self.downloader.downloaded_path().split(".")[-1] shutil.move(self.downloader.downloaded_path(), file_path) library.add_track(TrackInfo(file_path)) del self.queue[0] self.downloader = None if len(self.queue) > 0: self.start_download() time.sleep(1)
def get_artist_title(self): try: tags = MFile(self.filename) tag = tags.get('albumartist') if tag and len(tag[0]) > 0: return tag[0] tag = tags.get('album artist') if tag and len(tag[0]) > 0: return tag[0] tag = tags.get('artist') return tag[0] except: try: tags = MFile(self.filename) return tags.get('album artist')[0] except: return None
def getOneSongInfo(self, songPath: str): """ 获取一首歌的信息 """ tag = TinyTag.get(songPath) fileInfo = QFileInfo(songPath) # 获取标签信息 suffix = "." + fileInfo.suffix() songName = tag.title if tag.title and tag.title.strip( ) else fileInfo.baseName() songer = tag.artist if tag.artist and tag.artist.strip() else "未知艺术家" album = tag.album if tag.album and tag.album.strip() else "未知专辑" tracknumber = str(tag.track) if tag.track else "0" tcon = tag.genre if tag.genre else "未知流派" duration = f"{int(tag.duration//60)}:{int(tag.duration%60):02}" album_list = adjustAlbumName(album) # 调整曲目序号 tracknumber = self.__adjustTrackNumber(tracknumber) # 获取年份 if tag.year and tag.year[0] != "0": year = tag.year[:4] + "年" else: tag = File(songPath) key_dict = {".m4a": "©day", ".mp3": "TDRC", ".flac": "year"} year = (str(tag.get(key_dict[suffix])[0])[:4] + "年" if tag.get(key_dict[suffix]) else "未知年份") # 获取时间戳 createTime = fileInfo.birthTime().toString(Qt.ISODate) modifiedTime = fileInfo.lastModified().toString(Qt.ISODate) songInfo = { "songPath": songPath, "songer": songer, "songName": songName, "album": album_list[0], # album为原专辑名 "modifiedAlbum": album_list[-1], # modifiedAlbum为修改后的专辑名 "tcon": tcon, "year": year, "tracknumber": tracknumber, "duration": duration, "suffix": suffix, "createTime": createTime, "modifiedTime": modifiedTime, } return songInfo
def get_track_genres(self): genre_list = [] try: tags = MFile(self.filename) genres = tags.get('\xa9gen') if genres is not None and len(genres) > 0: for genre in genres: for sub_genre in parse_genres(genre): genre_list.append(sub_genre.strip()) except Exception, e: Log('Exception reading (genre): ' + str(e))
async def make_np_embed(self, path: str, timestamp: timedelta) -> discord.Embed: file_ = File(path) total_length = timedelta(seconds=file_.info.length // 1) title = file_.get('title', path) if isinstance(title, list): title = title[0] embed = discord.Embed(title=title, colour=discord.Colour.random()) set_if_exists(embed, name='Artist', value=file_.get('artist')) set_if_exists(embed, name='Album', value=file_.get('album')) set_if_exists(embed, name='Track', value=file_.get('tracknumber')) embed.add_field(name='** **', value=create_bar(timestamp, total_length)) embed.set_thumbnail(url=await self.get_cache_url(path)) return embed
def get_tag(tags: File, tag_names: List[str], get_all=False): for tag in tag_names: val = tags.get(tag) if val: if hasattr(val, 'text'): val = val.text if get_all: return val while type(val) == list or type(val) == tuple: val = val[0] return val
def __getM4aAlbumCover(self, songInfo): """ 获取m4a文件的封面 """ isPicExist, sub_album_cover_folder = self.__isPicExist(songInfo) if isPicExist: return id_card = File(songInfo['songPath']) # 如果文件后缀名与实际类型不匹配就直接返回 if id_card.mime[0].split('/')[-1] != 'mp4': return if id_card.get('covr'): # 如果不存在专辑对应的目录,就新建一个并写入专辑封面 os.mkdir(sub_album_cover_folder) # 提取封面数据 pic_data = bytes(id_card['covr'][0]) self.__savePic(sub_album_cover_folder, songInfo, pic_data)
def _compute_image_folder(self): accepted_names = ["folder", "cover", "front"] for folder in self: _logger.debug("Computing image folder %s...", folder.path) # Keep image files only files = [ f for f in os.listdir(folder.path) if os.path.isfile(os.path.join(folder.path, f)) and imghdr.what(os.path.join(folder.path, f)) ] # Try to find an image with a name matching the accepted names folder.image_folder = False for f in files: for n in accepted_names: if n in f.lower(): with open(os.path.join(folder.path, f), "rb") as img: folder.image_folder = base64.b64encode(img.read()) break if folder.image_folder: break if folder.image_folder: continue # Try to find an embedded cover art try: track = folder.track_ids[:1] track_ext = os.path.splitext( track.path)[1].lower() if track else "" song = File(track.path) if track else False if song: data = False if track_ext == ".mp3" and song.tags.getall("APIC"): data = song.tags.getall("APIC")[0].data elif track_ext == ".flac" and song.pictures: data = song.pictures[0].data elif track_ext == ".mp4" and song.get("covr"): data = song["covr"][0] if data: folder.image_folder = base64.b64encode(data) except: _logger.debug("Error while getting embedded cover art of %s", track.path, exc_info=1) pass
def process_metadata(self, metadata): Log('Reading OGG tags from: ' + self.filename) try: tags = MFile(self.filename) Log('Found tags: ' + str(tags.keys())) except: Log('An error occured while attempting to parse the OGG file: ' + self.filename) return # Genres try: genres = tags.get('genre') if genres is not None and len(genres) > 0: metadata.genres.clear() for genre in genres: for sub_genre in parse_genres(genre): metadata.genres.add(sub_genre.strip()) except Exception, e: Log('Exception reading genre: ' + str(e))
def get_new_path(source, target): filename = os.path.basename(source).decode("utf-8") f = File(source) if f is None: raise UnsupportedFileType(source) if isinstance(f, mutagen.mp4.MP4): KeyMap = Mp4KeyMap else: KeyMap = Id3KeyMap try: artist = f[KeyMap.albumartist][0] except KeyError: try: artist = f[KeyMap.artist][0] except KeyError: artist = u"Unknown Artist" if f.get(KeyMap.compilation, None): artist = "Compilations" try: album = f[KeyMap.album][0] except KeyError: album = u"Unknown Album" base = os.path.join(target, artist, album) try: os.makedirs(base) except OSError: pass new_file_name = _get_file_name(base, filename) new_path = os.path.join(base, new_file_name) vprint("new file path: %s" % new_path) return os.path.normpath(new_path)
def process_metadata(self, metadata): Log("Reading FLAC tags from: " + self.filename) try: tags = MFile(self.filename) Log("Found tags: " + str(tags.keys())) except: Log("An error occurred while attempting to parse the FLAC file: " + self.filename) return # Genres try: genres = tags.get("genre") if genres is not None and len(genres) > 0: metadata.genres.clear() for genre in genres: for sub_genre in parse_genres(genre): if sub_genre.strip(): metadata.genres.add(sub_genre.strip()) except Exception, e: Log("Exception reading genre: " + str(e))
def process_metadata(self, metadata): Log("Reading MP4 tags from: " + self.filename) try: tags = MFile(self.filename) Log("Found tags: " + str(tags.keys())) except: Log("An error occurred while attempting to parse the MP4 file: " + self.filename) return # Genres try: genres = tags.get("\xa9gen") if genres is not None and len(genres) > 0: for genre in genres: for sub_genre in parse_genres(genre): sub_genre_stripped = sub_genre.strip() if sub_genre_stripped: if sub_genre_stripped not in metadata.genres: metadata.genres.add(sub_genre_stripped) except Exception, e: Log("Exception reading \xa9gen (genre): " + str(e))
def process_metadata(self, metadata): Log('Reading MP4 tags from: ' + self.filename) try: tags = MFile(self.filename) Log('Found tags: ' + str(tags.keys())) except: Log('An error occurred while attempting to parse the MP4 file: ' + self.filename) return # Genres try: genres = tags.get('\xa9gen') if genres is not None and len(genres) > 0: for genre in genres: for sub_genre in parse_genres(genre): sub_genre_stripped = sub_genre.strip() if sub_genre_stripped: if sub_genre_stripped not in metadata.genres: metadata.genres.add(sub_genre_stripped) except Exception, e: Log('Exception reading \xa9gen (genre): ' + str(e))
class OGGAudioHelper(AudioHelper): def __init__(self, filename): super(OGGAudioHelper, self).__init__(filename) try: Log('Reading OGG tags from: ' + self.filename) self.tags = MFile(self.filename) Log('Found OGG tags: ' + str(self.tags.keys())) except: Log('An error occured while attempting to parse the OGG file: ' + self.filename) @classmethod def is_helper_for(cls, tagType): return tagType in ['OggVorbis', 'OggOpus'] def process_metadata(self, metadata, prefs): # Genres try: genres = self.tags.get('genre') if genres is not None and len(genres) > 0: metadata.genres.clear() for genre in genres: for sub_genre in parse_genres(genre): if sub_genre.strip(): metadata.genres.add(sub_genre.strip()) except Exception, e: Log('Exception reading genre: ' + str(e)) # Release Date try: release_date = self.tags.get('date') if release_date is not None and len(release_date) > 0: metadata.originally_available_at = Datetime.ParseDate( release_date[0]) except Exception, e: Log('Exception reading release date' + str(e))
def get_track_parent_index(self): try: tags = MFile(self.filename) return int(cleanTrackAndDisk(tags.get('discnumber')[0])) except: return None
def get_album_sort_title(self): try: tags = MFile(self.filename, easy=True) return tags.get('albumsort')[0] # 'soal' except: return None
def get_track_sort_title(self): try: tags = MFile(self.filename, easy=True) return tags.get('titlesort')[0] # 'sonm' except: return None
def get_track(file, check=True): if check: file = check_object(file) if not file: return {} path = str(file.parent) common_name = file.name.rstrip(file.suffix).replace('_', ' ') folder = file.parent.name file_size = round(file.stat().st_size / (1024 * 1024), 2) file_format = file.suffix[1::] splited = common_name.split('-') artist = splited[0].strip() title = ('-'.join(splited[1:])).strip() track_dict = { 'location': path, 'common_name': common_name, 'album_name': '', 'album_year': '', 'artist': artist, 'title': title, 'track_number': '', 'genre': '', 'size': file_size, 'format': file_format, 'bitrate': 0, 'folder': folder } try: file = File(file, easy=True) if not file: return track_dict except: return track_dict artist = file.get('artist', [''])[0] title = file.get('title', [''])[0] if artist and title: common_name = f"{artist} - {title}" elif '-' in common_name: splited = common_name.split('-') artist = splited[0].strip() title = ('-'.join(splited[1:])).strip() track_dict = { 'location': path, 'common_name': common_name, 'album_name': file.get('album', [''])[0], 'album_year': file.get('date', [''])[0], 'artist': artist, 'title': title, 'track_number': file.get('tracknumber', [''])[0], 'genre': file.get('genre', [''])[0], 'size': file_size, 'format': file_format, 'bitrate': file.info.bitrate // 1000, 'folder': folder } return track_dict
class GetMetaDataThread(QThread): """ 爬取元数据的线程 """ # 负责传递状态的信号 crawlSignal = pyqtSignal(str) def __init__(self, targetPath_list=None, parent=None): super().__init__(parent=parent) # 循环标志位 self.keepRunning = True # 确保路径为列表 if not targetPath_list: targetPath_list = [] self.targetPath_list = targetPath_list # 存放专辑封面的文件夹 self.albumCoverFolder = r'resource\crawl_albums' # 拆分音频文件路径 self.splitText() def run(self): """ 开始爬取信息 """ self.kuGouCrawler = KuGouCrawler(self.albumCover_set, self.albumCoverFolder) for index, (songer, songname, songPath) in enumerate( zip(self.songer_list, self.songname_list, self.songPath_list)): if not self.keepRunning: break self.crawlSignal.emit(f'当前进度:{index}/{len(self.songPath_list)}') self.id_card = File(songPath) isTextModified = self.modifyTextPart(songname, songer) isAlbumModified = self.fetchAlbum(songer, songname) if isTextModified or isAlbumModified: try: # 歌曲播放时会导致保存失败 self.id_card.save() except: print(f'{songPath} 写入发生异常') else: self.crawlSignal.emit(f'当前进度:{index+1}/{len(self.songPath_list)}') self.kuGouCrawler.browser.quit() # 爬取流派 albumTcon_dict = {} if not self.keepRunning: return self.qqMusicCrawler = QQMusicCrawler(albumTcon_dict) self.crawlSignal.emit('酷狗爬取完成') for index, (songname, songPath) in enumerate( zip(self.songname_list, self.songPath_list)): if not self.keepRunning: break self.crawlSignal.emit(f'当前进度:{index}/{len(self.songPath_list)}') song = os.path.basename(songPath) self.qqMusicCrawler.get_tcon(song, songname, songPath) else: self.crawlSignal.emit(f'当前进度:{index+1}/{len(self.songPath_list)}') self.crawlSignal.emit('全部完成') self.qqMusicCrawler.browser.quit() def stop(self): """ 停止爬取 """ self.keepRunning = False self.crawlSignal.emit('强制退出') def splitText(self): """ 扫描文件夹,提取符合匹配条件的音频文件的信息 """ filePath_list = [] for target_path in self.targetPath_list: for _, _, sub_filename_list in os.walk(target_path): break # 更新文件路径列表 filePath_list += [ os.path.join(target_path, file_name) for file_name in sub_filename_list ] # 筛选符合条件的音频文件 self.filterAudioFile(filePath_list) # 创建存放爬取到的封面的文件夹 if not os.path.exists(self.albumCoverFolder): os.mkdir(self.albumCoverFolder) for _, _, albumCover_list in os.walk(self.albumCoverFolder): break self.albumCover_set = set(albumCover_list) def filterAudioFile(self, filePath_list): """分离歌手名,歌名和后缀名,flag用于表示是否将匹配到的音频文件拆开, flag = 1为拆开,flag=0为不拆开,update_songList用于更新歌曲文件列表""" self.songPath_list = filePath_list.copy() # 获取文件名列表 fileName_list = [ os.path.basename(filePath) for filePath in filePath_list ] # 创建列表 self.songer_list, self.songname_list = [], [] rex = r'(.+) - (.+)(\.mp3)|(.+) - (.+)(\.flac)|(.+) - (.+)(\.m4a)' for file_name, file_path in zip(fileName_list, filePath_list): Match = re.match(rex, file_name) if Match: if Match.group(1): self.songer_list.append(Match.group(1)) self.songname_list.append(Match.group(2)) elif Match.group(4): self.songer_list.append(Match.group(4)) self.songname_list.append(Match.group(5)) else: self.songer_list.append(Match.group(7)) self.songname_list.append(Match.group(8)) else: self.songPath_list.remove(file_path) def modifyTextPart(self, songname, songer) -> bool: """ 修改标题、参与创作艺术家、唱片艺术家并返回修改标志位 """ # 设置修改标志位 isModified = False suffix = self.id_card.mime[0].split('/')[-1] if suffix == 'mp3': # 如果没有标题则添加标题 if not self.id_card.get('TIT2') or str( self.id_card.get('TIT2')) != songname: self.id_card['TIT2'] = TIT2(encoding=3, text=songname) isModified = True # 如果没有歌手名则添加歌手名 if not self.id_card.get('TPE1') or str( self.id_card.get('TPE1')) != songer: self.id_card['TPE1'] = TPE1(encoding=3, text=songer) isModified = True if not self.id_card.get('TPE2') or str( self.id_card.get('TPE2')) != songer: self.id_card['TPE2'] = TPE2(encoding=3, text=songer) isModified = True elif suffix == 'flac': if not self.id_card.get( 'title') or self.id_card.get('title')[0] != songname: self.id_card['title'] = songname isModified = True # 如果没有歌手名则添加歌手名 if not self.id_card.get( 'artist') or self.id_card.get('artist')[0] != songer: self.id_card['artist'] = songer isModified = True elif suffix == 'mp4': # 如果没有标题则添加标题 if not self.id_card.get( '©nam') or self.id_card['©nam'][0] != songname: self.id_card['©nam'] = [songname] isModified = True if not self.id_card.get( '©ART') or self.id_card['©ART'][0] != songer: self.id_card['©ART'] = [songer] isModified = True if not self.id_card.get( 'aART') or self.id_card['aART'][0] != songer: self.id_card['aART'] = [songer] isModified = True return isModified def fetchAlbum(self, songer, songname) -> bool: """ 修改专辑信息并返回修改标志位 """ isModified = False suffix = self.id_card.mime[0].split('/')[-1] if suffix == 'mp3': # 如果没有专辑信息则从酷狗爬取专辑信息 rex = r'APIC.*' for key in self.id_card.tags.keys(): Match = re.match(rex, key) if Match: break # 专辑提取条件 album_get_cond = not self.id_card.get( 'TDRC') or not self.id_card.get('TALB') or str( self.id_card.get('TDRC'))[0] == '0' or (suffix == 'mp3' and not Match) if album_get_cond and suffix == 'mp3': self.kuGouCrawler.get_album(songer, songname, self.id_card) elif suffix == 'flac': flac_write_cond = not self.id_card.pictures or ( not self.id_card.get('year') or not self.id_card.get('album') or self.id_card.get('year')[0][0] == '0') # 如果有一项专辑信息缺失就去酷狗匹配信息 if flac_write_cond: self.kuGouCrawler.get_album(songer, songname, self.id_card) elif suffix == 'mp4': album_get_cond = not self.id_card.get( 'covr') or not self.id_card.get( '©alb') or not self.id_card.get( '©day') or self.id_card.get('©day')[0][0] == '0' if album_get_cond: self.kuGouCrawler.get_album(songer, songname, self.id_card) return isModified
def get_discnumber(mf: MusicFile) -> str: return mf.get("discnumber", [''])[0]
class GetMetaData(): """ 爬取音频文件标签卡信息 """ def __init__(self, targetPath_list: list): # 扫描的文件夹列表 self.targetPath_list = targetPath_list # 存放专辑封面的文件夹 self.albumCoverFolder = r'resource\crawl_albums' if not os.path.exists(self.albumCoverFolder): os.mkdir(self.albumCoverFolder) # 拆分音频文件路径 self.splitText() def runKuGouCrawler(self): """ 去酷狗爬取信息 """ self.kuGouCrawler = KuGouCrawler(self.albumCover_set, self.albumCoverFolder) for songer, songname, songPath in zip(self.songer_list, self.songname_list, self.songPath_list): self.id_card = File(songPath) isTextModified = self.modifyTextPart(songname, songer) isAlbumModified = self.fetchAlbum(songer, songname) if isTextModified or isAlbumModified: try: # 歌曲在播放时保存会失败 self.id_card.save() print(songPath + ' 保存成功') except MutagenError: pass self.kuGouCrawler.browser.quit() def runQQMusicCrawler(self): """ 去QQ音乐爬取信息 """ albumTcon_dict = {} self.qqMusicCrawler = QQMusicCrawler(albumTcon_dict) for songname, songPath in zip(self.songname_list, self.songPath_list): song = os.path.basename(songPath) self.qqMusicCrawler.get_tcon(song, songname, songPath) self.qqMusicCrawler.browser.quit() def splitText(self): """ 扫描文件夹,提取符合匹配条件的音频文件的信息 """ filePath_list = [] for target_path in self.targetPath_list: for _, _, sub_filename_list in os.walk(target_path): break # 更新文件路径列表 filePath_list += [ os.path.join(target_path, file_name) for file_name in sub_filename_list ] # 筛选符合条件的音频文件 self.filterAudioFile(filePath_list) # 创建存放爬取到的封面的文件夹 if not os.path.exists(self.albumCoverFolder): os.mkdir(self.albumCoverFolder) for _, _, albumCover_list in os.walk(self.albumCoverFolder): break self.albumCover_set = set(albumCover_list) def filterAudioFile(self, filePath_list): """分离歌手名,歌名和后缀名,flag用于表示是否将匹配到的音频文件拆开, flag = 1为拆开,flag=0为不拆开,update_songList用于更新歌曲文件列表""" self.songPath_list = filePath_list.copy() # 获取文件名列表 fileName_list = [ filePath.split('\\')[-1] for filePath in filePath_list ] #self.song_list = fileName_list.copy() # 创建列表 self.songer_list, self.songname_list = [], [] rex = r'(.+) - (.+)(\.mp3)|(.+) - (.+)(\.flac)|(.+) - (.+)(\.m4a)' for file_name, file_path in zip(fileName_list, filePath_list): Match = re.match(rex, file_name) if Match: if Match.group(1): self.songer_list.append(Match.group(1)) self.songname_list.append(Match.group(2)) elif Match.group(4): self.songer_list.append(Match.group(4)) self.songname_list.append(Match.group(5)) else: self.songer_list.append(Match.group(7)) self.songname_list.append(Match.group(8)) elif not Match: # self.song_list.remove(file_name) self.songPath_list.remove(file_path) def modifyTextPart(self, songname, songer) -> bool: """ 修改标题、参与创作艺术家、唱片艺术家并返回修改标志位 """ # 设置修改标志位 isModified = False suffix = self.id_card.mime[0].split('/')[-1] if suffix == 'mp3': # 如果没有标题则添加标题 if not self.id_card.get('TIT2') or str( self.id_card.get('TIT2')) != songname: self.id_card['TIT2'] = TIT2(encoding=3, text=songname) isModified = True # 如果没有歌手名则添加歌手名 if not self.id_card.get('TPE1') or str( self.id_card.get('TPE1')) != songer: self.id_card['TPE1'] = TPE1(encoding=3, text=songer) isModified = True if not self.id_card.get('TPE2') or str( self.id_card.get('TPE2')) != songer: self.id_card['TPE2'] = TPE2(encoding=3, text=songer) isModified = True elif suffix == 'flac': if not self.id_card.get( 'title') or self.id_card.get('title')[0] != songname: self.id_card['title'] = songname isModified = True # 如果没有歌手名则添加歌手名 if not self.id_card.get( 'artist') or self.id_card.get('artist')[0] != songer: self.id_card['artist'] = songer isModified = True elif suffix == 'mp4': # 如果没有标题则添加标题 if not self.id_card.get( '©nam') or self.id_card['©nam'][0] != songname: self.id_card['©nam'] = [songname] isModified = True if not self.id_card.get( '©ART') or self.id_card['©ART'][0] != songer: self.id_card['©ART'] = [songer] isModified = True if not self.id_card.get( 'aART') or self.id_card['aART'][0] != songer: self.id_card['aART'] = [songer] isModified = True return isModified def fetchAlbum(self, songer, songname) -> bool: """ 修改专辑信息并返回修改标志位 """ isModified = False suffix = self.id_card.mime[0].split('/')[-1] if suffix == 'mp3': # 如果没有专辑信息则从酷狗爬取专辑信息 rex = r'APIC.*' for key in self.id_card.tags.keys(): Match = re.match(rex, key) if Match: break # 专辑提取条件 album_get_cond = not self.id_card.get( 'TDRC') or not self.id_card.get('TALB') or str( self.id_card.get('TDRC'))[0] == '0' or (suffix == 'mp3' and not Match) if album_get_cond and suffix == 'mp3': self.kuGouCrawler.get_album(songer, songname, self.id_card) elif suffix == 'flac': flac_write_cond = not self.id_card.pictures or ( not self.id_card.get('year') or not self.id_card.get('album') or self.id_card.get('year')[0][0] == '0') # 如果有一项专辑信息缺失就去酷狗匹配信息 if flac_write_cond: self.kuGouCrawler.get_album(songer, songname, self.id_card) elif suffix == 'mp4': album_get_cond = not self.id_card.get( 'covr') or not self.id_card.get( '©alb') or not self.id_card.get( '©day') or self.id_card.get('©day')[0][0] == '0' if album_get_cond: self.kuGouCrawler.get_album(songer, songname, self.id_card) return isModified
class Songinfo(dict): TAG_TRANSLATE = { 'title': ('TIT2',), 'artist': ('TPE1', 'TPE2',), 'album': ('TALB',), } def __init__(self, filename): self._filename = filename self._match = None dict.__init__(self) def __str__(self): artist = self.get('artist', _("Unknown Artist")) title = self.get('title', _("Unknown Title")) return "%s - %s" % (artist, title) def fetch_info(self, optional=('album',)): """ Check the file type and call the corresponding method to get title info :-) """ self._audio = MutagenFile(self._filename) required = ('artist', 'title') info = { 'length': (int(self._audio.info.length) if self._audio.info.length else 0), } for tag in required + optional: try: info[tag] = self.get_taginfo(tag) except TitleNotFound: if tag in optional: continue raise self.update(info) def get_alternative_tag(self, tags): for tag in tags: item = self._audio.get(tag, None) if item and isinstance(item, list): return item[0] elif item: return item return None def get_from_fname(self, what): if self._match is not None: match = self._match else: try: parser = FilenameParser(self._filename) match = parser.parse() except LookupError: raise TitleNotFound(self._filename) if match: self._match = match try: return match[what] except KeyError: pass raise TitleNotFound(self._filename) def get_taginfo(self, what): item = self._audio.get(what, None) if item and isinstance(item, list): return item[0] elif not item and what in self.TAG_TRANSLATE: item = self.get_alternative_tag(self.TAG_TRANSLATE[what]) if not item: item = self.get_from_fname(what) if item: return item else: return item elif item: return item else: item = self.get_from_fname(what) if item: return item raise TitleNotFound(self._filename)
def get_track_title(self): try: tags = MFile(self.filename) return tags.get('title')[0] except: return None
def load_metadata_from_file(self): file_path = self.song_instance.song_file.file.name audio_file = File(file_path) self.name = str(audio_file.get('TIT2', None)) self.album = str(audio_file.get('TALB', None)) self.artist = str(audio_file.get('TPE1', None))
def add_simple_metadata(file_path, artist='', title='', album='', albumartist='', override=False): """ Automatically sets the metadata for a music file :param file_path: the path to the music file :param artist: given artist name :param title: given title name :param album: given album name :param albumartist: given album coverist :param override: if True, all of the metadata is overridden :return: True or False depending on whether audio file was changed or not """ try: audio = EasyID3(file_path) except mutagen.id3.ID3NoHeaderError: audio = File(file_path) audio.add_tags() audio.save() audio = EasyID3(file_path) filename = pathlib.Path(file_path).name advanced_audio = File(file_path) try: if (not override and audio.get('title', '') and audio.get('artist', '') and audio.get('albumartist', '') and has_album_cover(file_path)) and 'TDRC' in advanced_audio: return False if not artist: artist = get_artist(filename) else: if artist.count(' , '): artist.split(' , ') elif artist.count(' ,'): artist = artist.split(' ,') elif artist.count(', '): artist = artist.split(', ') elif artist.count(','): artist = artist.split(',') if not title: title = filename.split(' - ')[-1][:-4] if override: audio['title'] = title audio['artist'] = artist if album: audio['album'] = album if albumartist: audio['albumartist'] = albumartist else: if 'album' not in audio: if album == '': audio['album'] = title else: audio['album'] = album if 'title' not in audio: audio['title'] = title if 'artist' not in audio: audio['artist'] = artist if 'albumartist' not in audio: if albumartist: audio['albumartist'] = albumartist else: audio['albumartist'] = artist audio.save() audio = MP3(file_path) # if artist and title and override or audio.get('TDRC', False): # auto_set_year(audio, artist, title) if not has_album_cover(file_path): if not set_album_cover(file_path): print(f'Album art not found for {file_path}') except MutagenError: print(f'{filename} in use') return False except ValueError: print('Error adding metadata to', filename) return False return True
class QQMusicCrawler(): """ QQ音乐爬虫 """ def __init__(self, albumTcon_dict): option = FirefoxOptions() option.add_argument('-headless') self.browser = Firefox(options=option) self.albumTcon_dict = albumTcon_dict def get_tcon(self, song, songname, songPath): """从QQ音乐提取专辑流派""" self.song = song self.songname = songname self.id_card = File(songPath) self.suffix = self.id_card.mime[0].split('/')[-1] isModified = False if self.suffix == 'mp3': # 设置爬取专辑流派的条件 cond_mp3 = self.isTconNeedModify( ) or not self.id_card.get('TALB') or not self.id_card.get('TDRC') if cond_mp3: isModified = self.crawl() elif self.suffix == 'flac': cond_flac1 = self.isTconNeedModify( ) or not self.id_card.get('album') cond_flac2 = not self.id_card.get('year') if cond_flac1 or cond_flac2: isModified = self.crawl() elif self.suffix == 'mp4': cond_m4a = self.isTconNeedModify( ) or not self.id_card.get('©alb') or not self.id_card.get('©day') if cond_m4a: isModified = self.crawl() # 有修改信息的话就保存 if isModified: try: self.id_card.save() except MutagenError: pass def crawl(self) -> bool: """只要有部分专辑信息缺失就去QQ音乐爬取并返回爬取标志位""" isModified = False # 先检测专辑名还有专辑年份是否缺失 self.fetchLocalAlbumInfo() # 如果专辑不在字典里,就去QQ音乐爬取 if self.album not in self.albumTcon_dict.keys(): url = 'https://y.qq.com/portal/search.html#page=1&searchid=1&remoteplace=txt.yqq.top&t=song&w=' + \ self.song[:-len(self.suffix)-1] self.browser.get(url) try: # 搜索专辑 album_name = WebDriverWait(self.browser, 5).until( EC.presence_of_element_located( (By.CSS_SELECTOR, 'a.album_name'))) album_name.click() except: pass else: try: # 寻找流派信息 schools_element = WebDriverWait(self.browser, 5).until( EC.presence_of_element_located( (By.CSS_SELECTOR, 'li.data_info__item'))) schools = schools_element.text except: pass else: # 如果歌曲专辑信息缺失就补上 isPartModified = self.writeAlbumNameYear() isTconModified = False # 匹配流派信息 rex = r'流派:(.+)' Match = re.match(rex, schools) if Match: # 提取流派 tcon = Match.group(1) if tcon != '无流派': # 将匹配到的专辑信息写到字典里 if self.suffix == 'mp3': self.albumTcon_dict[self.album] = tcon if self.isTconNeedModify(): self.id_card['TCON'] = TCON(encoding=3, text=tcon) isTconModified = True elif self.suffix == 'flac': self.albumTcon_dict[self.album] = tcon if self.isTconNeedModify(): self.id_card['genre'] = tcon isTconModified = True elif self.suffix == 'mp4': self.albumTcon_dict[self.album] = tcon if self.isTconNeedModify(): self.id_card['©gen'] = [tcon] isTconModified = True isModified = isPartModified + isTconModified else: if self.isTconNeedModify(): tcon = self.albumTcon_dict[self.album] if self.suffix == 'mp3': self.id_card['TCON'] = TCON(encoding=3, text=tcon) elif self.suffix == 'flac': self.id_card['genre'] = tcon elif self.suffix == 'mp4': self.id_card['©gen'] = [tcon] return True return isModified def fetchLocalAlbumInfo(self): """ 检测文件的专辑名和年份信息是否缺失 """ if self.suffix == 'mp3': self.album = self.id_card.get('TALB') self.albumyear = self.id_card.get('TDRC') if self.album: self.album = str(self.album) if self.albumyear: self.albumyear = str(self.albumyear) elif self.suffix == 'flac': self.album = self.id_card.get('genre') self.albumyear = self.id_card.get('year') if self.album: self.album = self.album[0] if self.albumyear: self.albumyear = self.albumyear[0] elif self.suffix == 'mp4': self.album = self.id_card.get('©alb') self.albumyear = self.id_card.get('©day') if self.album: self.album = self.album[0] if self.albumyear: self.albumyear = self.albumyear[0] def writeAlbumNameYear(self) -> bool: """ 检查是否需要写入丢失的年份和专辑名 """ isModified = False if not self.album: isModified = True self.album = self.browser.find_element_by_class_name( 'data__name_txt').text if self.suffix == 'mp3': self.id_card['TALB'] = TALB(encoding=3, text=self.album) elif self.suffix == 'flac': self.id_card['album'] = self.album elif self.suffix == 'mp4': self.id_card['©alb'] = [self.album] if not self.albumyear: isModified = True self.albumyear = self.browser.find_element_by_css_selector( 'ul.data__info >li:nth-of-type(3)').text rex = r'发行时间:(\d{4})' Match = re.match(rex, self.albumyear) if self.suffix == 'mp3' and Match: self.id_card['TDRC'] = TDRC(encoding=3, text=Match.group(1)) elif self.suffix == 'flac' and Match: self.id_card['year'] = Match.group(1) elif self.suffix == 'mp4' and Match: self.id_card['©day'] = [Match.group(1)] return isModified def isTconNeedModify(self) -> bool: """ 检测是否需要修改流派信息 """ cond = False if self.suffix == 'mp3': cond = not self.id_card.get('TCON') or str( self.id_card.get('TCON')) in ['流行', '动漫', '无流派', '未知流派'] elif self.suffix == 'flac': cond = not self.id_card.get('genre') or self.id_card.get( 'genre')[0] in ['流行', '动漫', '无流派', '未知流派'] elif self.suffix == 'mp4': cond = not self.id_card.get('©gen') or self.id_card.get( '©gen')[0] in ['流行', '动漫', '无流派', '未知流派'] return cond
def get_artist_sort_title(self): try: tags = MFile(self.filename, easy=True) return tags.get('artistsort')[0] # 'soar' except: return None