def get_metadata(filename): """ Deprecated: Given the name of an audio file, this function returns the metadata (composer, genre and lyrics(if available)) :param filename: :return: """ import pandas as pd import os base_path = platform_details.get_platform_path( r"Users\theko\Documents\Dataset") metadata_path = os.path.join(base_path, 'metadata.csv') filename_new = filename.strip(base_path).replace("\\", "/") df = pd.read_csv(metadata_path, sep='\n') df[['Path', 'Artist', 'Raga', 'RagaId', 'fileId' ]] = df['Path,Artist,Raga,RagaId,fileId'].str.split(',', expand=True) df = df.drop(columns=['Path,Artist,Raga,RagaId,fileId']) df = df.set_index('Path') df = pd.DataFrame.drop_duplicates(df) # print(df) try: artist = df.loc[filename_new, 'Artist'] genre = df.loc[filename_new, 'Raga'] lyrics = '' return artist, genre, lyrics except: print(filename, filename_new, base_path) return 'unknown', 'Celtic', ''
def _creat_artist_ID_dictionary(): artist_ids = {} artists = {} artist_ids_file = platform_details.get_platform_path( r"Users\theko\Documents\Dataset\v4_artist_ids.txt") with open(artist_ids_file, 'r', encoding="utf-8") as f: for line in f: artist, artist_id = line.strip().split(';') artist_ids[artist] = int(artist_id) artists[int(artist_id)] = artist return artist_ids, artists
def _create_Raga_ID_ditionary(): genre_ids = {} genres = {} genre_ids_file = platform_details.get_platform_path( r"Users\theko\Documents\Dataset\v4_genre_ids.txt") with open(genre_ids_file, 'r', encoding="utf-8") as f: for line in f: genre, genre_id = line.strip().split(';') genre_ids[genre] = int(genre_id) genres[int(genre_id)] = genre return genre_ids, genres
def add_genre_to_dataset(genreID, genreFilesPath, copy=True): genre_id, genre = _create_Raga_ID_ditionary() files_list = librosa.util.find_files( platform_details.get_platform_path(genreFilesPath)) artists = [] fileIDs = [] for i in range(len(files_list)): meta = audio_metadata.load(files_list[i]) if meta['tags'].__contains__('artist'): artist = meta['tags'].artist else: artist = '' artists.append(artist) fileIDs.append( str(genreID).zfill(3) + str(0).zfill(3) + str(i + 1).zfill(3)) if genre.keys().__contains__(genreID): gen = genre[genreID] else: try: meta = audio_metadata.load(files_list[0]) gen = meta['tags'].genre except: raise Exception("Add genre name to the 'v4_genre_ids' first") ### assigning artist ID := 0 for now, will edit the code later ### TODO: add get_artist() and get_artist_id() metadata_path = os.path.join(copy_base, 'metadata.csv') if copy: with open(metadata_path, 'a', encoding='utf8') as f: for i in range(len(files_list)): copy_dest = os.path.join(copy_base, fileIDs[i]) try: os.mkdir(copy_dest) except: pass new_path = shutil.copy2(files_list[i], copy_dest) new_path = new_path.strip(copy_base) f.write( f"{new_path},{artists[i]},{gen},{gen.lower()},{fileIDs[i]}\n" ) print(f"file {i + 1} of {len(files_list)} copied!!")
import numpy as np import pandas as pd import matplotlib.pyplot as plt import os import shutil import librosa import platform_details from numba import vectorize import audio_metadata # copy_base = platform_details.get_platform_path('Users/theko/Documents/Dataset') ALL_COMPOSITIONS = [ name for name in os.listdir(copy_base) if os.path.isdir(os.path.join(copy_base, name)) ] # The following functions (`collate_raga_data`, `collate_and_add_celtic`, and `create_file_artist_genre_ids` ) are # specific to the current dataset and must not be used def collate_raga_data(base_dir, labels_json): IDX = os.listdir(base_dir) print(labels_json) LABELS = pd.read_json(labels_json, orient='index') AUDIO_DATA = pd.DataFrame() local_artists = {'': []} AUDIOS = [] Audios_artist = [] Audios_raga = [] Audios_ragaId = [] Audios_fileId = [] genre_id = 0
name = new_name[0] else: new_name = fname.replace('.opus', '') name = new_name name = name + '.mp3' out = ffmpeg.input(folder + '/' + fname).output(filename=folder + "mp3/" + name, format='mp3') out.run() i = i + 1 if __name__ == '__main__': # files = librosa.util.find_files(copy_base) # fname = np.random.choice(files) path = platform_details.get_platform_path("/vismaya/") extract_audio_from_videos(path) # print(fname) # cut_file_1min_segments(fname) # signal, sr = librosa.load(fname) # three_mins = 3*get_one_min(sr) # one_min = get_one_min(sr) # signal = signal[-three_mins:-one_min] # raga = get_metadata_windows(fname) # print(raga) # pcd_X, pcd_Y = calculate_pcd(signal, sr) # plt.plot(pcd_X, pcd_Y, label=raga) # plt.show() # sample= (1, 2, 3) # X = np.zeros(5)