def shapeDataConv2DGTZAN(my_json, percent_train): """ overview : Shape the datas to return the train and test sets. input : - my_json : .json file with all the datas we need. - percent_train : float -> [0.0 : 1.0] which represent the percentage of images which will be include in the train set. """ print("Shaping the data to get the Train and Test sets.") navigate.set_path_to_dataset_image_GTZAN(my_json) X, Y = [], [] path = navigate.get_actual_path() maxi = len(utils.get_list_subs()) convert_str = "L" # L=B&W, RGB for index_dir, dir_x in enumerate(utils.get_list_subs()): navigate.set_path(dir_x) for index_fig, fig in enumerate(utils.get_list_subs()): im = Image.open(navigate.get_actual_path() + "\\" + fig).convert( convert_str) # 640 * 480 pixels width, height = im.size pix = np.array(im.getdata()) im.close() X.append(pix) Y.append(dir_x) navigate.set_path_to_dataset_image_GTZAN(my_json) if (convert_str == "L"): X = reshapeEntriesConv2D(X, width, height, 1) if (convert_str == "RGB"): X = reshapeEntriesConv2D(X, width, height, 3) Y = reshapeOutputsDense(Y) X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=(1 - percent_train)) return X_train, Y_train, X_test, Y_test, width, height
def initialiseWAV(my_json): # -> 1716 secs, no probs print("Creating the WAV files.") navigate.set_path_to_dataset_MP3_sorted(my_json) all_datasets = utils.get_list_subs( ) # List all the sub folders of the dataset folder. threads = [] for index_d, dataset in enumerate(all_datasets): if (len(os.listdir(my_json['dataset_mp3_sorted_path'] + '\\' + dataset)) >= 30): navigate.set_path( my_json["dataset_mp3_sorted_path"] + "\\" + dataset ) # Set the path to the first sub dataset directory, which contains tracks. actual_dataset_path = navigate.get_actual_path() items_dataset = navigate.get_list_subs( ) # Return all the track of the actual folder. my_thread = WavThread("Wav Thread " + str(index_d), my_json, actual_dataset_path, dataset, items_dataset) threads.append(my_thread) navigate.set_path_to_dataset_MP3_sorted(my_json) else: print("There is less than 30 songs in the ", dataset, " dataset. I will not export it.") navigate.set_path_to_project_path(my_json) for thread in threads: thread.start() for thread in threads: thread.join() navigate.set_path_to_project_path(my_json)
def __init__(self, name, my_json, path, category, trackswav, dest, dpi, height, width, legend=False): print("Creating the Multiprocess class ", name) self.name = name self.my_json = my_json self.old_path = navigate.get_actual_path() self.path = path self.category = category self.trackswav = trackswav self.dest = dest self.dpi = dpi self.height = height self.width = width self.legend = legend print( "------------------------------------------------------------------" ) print("The ", name, " frame's datas are : ") print("The path of the wav directory containing the songs : ", path) print("The category selected is : ", category) print("This directory contains ", len(trackswav), " songs.") print("State of the legends : ", legend) print( "------------------------------------------------------------------" )
def set_security_autorizations_images(my_json): print("Setting the autorization of the image folders.") navigate.set_path_to_dataset_image(my_json) for dir_x in get_list_subs(): navigate.set_path(dir_x) # print("Path : ",navigate.get_actual_path(),".\n") os.chmod(navigate.get_actual_path(), 0o777) navigate.set_path_to_dataset_image(my_json)
def get_list_subs(): """ overview : Get a list of subs from the actual path. output : - subs : list which contains all the items of the actual path. """ subs = [dir_name for dir_name in os.listdir(navigate.get_actual_path())] return subs
def getNumberOfTracks(my_json): total = 0 navigate.set_path_to_dataset_WAV(my_json) for x_dir in navigate.get_list_subs(): l_dir = len(os.listdir(navigate.get_actual_path() + "\\" + x_dir)) total += l_dir print("We have ", l_dir, " songs in the ", x_dir, " directory.") print("In total, there is ", total, " tracks.")
def __init__(self, name, my_json, path, category, tracksmp3): Thread.__init__(self) print("Creating the Thread : ", name) self.name = name self.my_json = my_json self.old_path = navigate.get_actual_path() self.path = path self.category = category self.tracksmp3 = tracksmp3
def shapeDataConv2D(my_json, percent_train, encoder=False): """ overview : Shape the datas to return the train and test sets. input : - my_json : .json file with all the datas we need. - percent_train : float -> [0.0 : 1.0] which represent the percentage of images which will be include in the train set. """ print("Shaping the data to get the Train and Test sets.") navigate.set_path_to_dataset_image(my_json) X, Y = [], [] path = navigate.get_actual_path() maxi = len(utils.get_list_subs()) for index_dir, dir_x in enumerate(utils.get_list_subs()): navigate.set_path(dir_x) for index_fig, fig in enumerate(utils.get_list_subs()): im = Image.open(navigate.get_actual_path() + "\\" + fig).convert( 'L') # 640 * 480 pixels width, height = im.size pix = np.array(im.getdata()).reshape(width, height) if (encoder): category = str(im.info["category"]) else: category = ast.literal_eval(im.info["category_shaped"]) im.close() X.append(pix) Y.append(category) navigate.set_path_to_dataset_image(my_json) if (encoder): Y = encoder.fit_transform(Y) n_classes = encoder.classes_.size Y = Y.reshape(len(Y), 1) else: Y = reshapeOutputs(Y) X = reshapeEntriesConv2D(X, width, height) X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=(1 - percent_train)) return X_train, Y_train, X_test, Y_test, width, height
def initialiseFigs(my_json, dpi, height, width, legend, colormesh): # 1444 secs # We can't use threat, because matplotlib isn't thread protected. print("Creating the Images.") navigate.set_path_to_dataset_WAV(my_json) all_datasets = utils.get_list_subs( ) # List all the sub folders of the dataset folder. navigate.set_path_to_dataset_image(my_json) for dataset in all_datasets: if not (os.path.exists(dataset)): os.makedirs(dataset) navigate.set_path_to_dataset_WAV(my_json) processes = [] outputs_shaped = model.shapeOutputs(my_json) right_array = 0 flag = False for index_d, dataset in enumerate(all_datasets): for item in outputs_shaped: if (item['category_name'] == dataset): right_array = item["category_array"] flag = True navigate.set_path( dataset ) # Set the path to the first sub dataset directory, which contains tracks. actual_dataset_path = navigate.get_actual_path() items_dataset = utils.get_list_subs( ) # Return all the track of the actual folder. if (len(items_dataset) >= 50 and flag == True): # my_process = ImageProcess("Image process " + str(index_d), my_json, actual_dataset_path, dataset, items_dataset, right_array, my_json["dataset_image_path"] + "\\" + dataset, dpi, height, width, legend=legend, colormesh=colormesh) processes.append(my_process) navigate.set_path_to_dataset_WAV(my_json) processes = [mp.Process(target=p.run, args=()) for p in processes] for p in processes: p.start() for p in processes: p.join() navigate.set_path_to_project_path(my_json)
def shapeDataDense(my_json, percent_train, encoder=False): """ overview : Shape the datas to return the train and test sets. input : - my_json : .json file with all the datas we need. - percent_train : float -> [0.0 : 1.0] which represent the percentage of images which will be include in the train set. """ print("Shaping the data to get the Train and Test sets.") navigate.set_path_to_dataset_WAV_GTZAN(my_json) X, Y = [], [] path = navigate.get_actual_path() maxi = len(utils.get_list_subs()) encoder = LabelEncoder() for index_dir, dir_x in enumerate(utils.get_list_subs()): navigate.set_path(dir_x) for index_fig, audio_path in enumerate(utils.get_list_subs()): print("Index folder : ", index_dir + 1, "/10 -- File ", index_fig + 1, "/100.") audio, freq = lr.load(audio_path, mono=True) chroma_stft = lr.feature.chroma_stft(y=audio, sr=freq) rmse = lr.feature.rmse(y=audio) spec_cent = lr.feature.spectral_centroid(y=audio, sr=freq) spec_bw = lr.feature.spectral_bandwidth(y=audio, sr=freq) rolloff = lr.feature.spectral_rolloff(y=audio, sr=freq) zcr = lr.feature.zero_crossing_rate(audio) mfcc = lr.feature.mfcc(y=audio, sr=freq) data = [ np.mean(chroma_stft), np.mean(rmse), np.mean(spec_cent), np.mean(spec_bw), np.mean(rolloff), np.mean(zcr) ] for mfcc_x in mfcc: data.append(np.mean(mfcc_x)) X.append(data) Y.append(dir_x) break navigate.set_path_to_dataset_WAV_GTZAN(my_json) X = reshapeEntriesDense(X) Y = reshapeOutputsDense(Y) X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=(1 - percent_train)) return X_train, Y_train, X_test, Y_test
def initialiseMP3Sorted(my_json): # -> 1748 secs, no probs print("Sorting the MP3 files.") datas = pd.read_csv(tracks, encoding="utf-8", usecols=["album", 'track.19', "track.7", "track.16"], dtype={ "album": object, 'track.19': object, "track.7": object, "track.16": object }) datas = datas.rename( columns={ "album": "track_id", 'track.19': "title", "track.7": "genre_top", "track.16": "number" }) threads = [] navigate.set_path_to_datasets(my_json) if not (os.path.exists("Mp3Sorted")): os.makedirs("Mp3Sorted") navigate.set_path_to_dataset_MP3(my_json) for index_dir, x_dir in enumerate(utils.get_list_subs()): if (x_dir != 'checksums' and x_dir != "fma_metadata" and x_dir != "README.txt"): navigate.set_path(x_dir) actual_dataset_path = navigate.get_actual_path() items_dataset = navigate.get_list_subs( ) # Return all the track of the actual folder. my_thread = SortMP3Thread("MP3 Sorted Thread " + str(index_dir), my_json, actual_dataset_path, x_dir, items_dataset, datas) threads.append(my_thread) else: print("This is not a directory of mp3 tracks.") navigate.set_path_to_dataset_MP3(my_json) navigate.set_path_to_project_path(my_json) for thread in threads: thread.start() for thread in threads: thread.join() navigate.set_path_to_project_path(my_json)
def initialiseFigsGTZAN(my_json, dpi, height, width, legend): # 1444 secs # We can't use threat, because matplotlib isn't thread protected. print("Creating the Images.") navigate.set_path_to_dataset_WAV_GTZAN(my_json) all_datasets = utils.get_list_subs( ) # List all the sub folders of the dataset folder. navigate.set_path_to_dataset_image_GTZAN(my_json) for dataset in all_datasets: if not (os.path.exists(dataset)): os.makedirs(dataset) navigate.set_path_to_dataset_WAV_GTZAN(my_json) processes = [] for index_d, dataset in enumerate(all_datasets): navigate.set_path( dataset ) # Set the path to the first sub dataset directory, which contains tracks. actual_dataset_path = navigate.get_actual_path() items_dataset = utils.get_list_subs( ) # Return all the track of the actual folder. if (len(items_dataset) >= 50): # my_process = ImageProcessGTZAN( "Image GTZAN process " + str(index_d), my_json, actual_dataset_path, dataset, items_dataset, my_json["dataset_images_gtzan_path"] + "\\" + dataset, dpi, height, width, legend=legend) processes.append(my_process) navigate.set_path_to_dataset_WAV_GTZAN(my_json) processes = [mp.Process(target=p.run, args=()) for p in processes] for p in processes: p.start() for p in processes: p.join() navigate.set_path_to_project_path(my_json)
import matplotlib as mlp mlp.use("Agg") # Set the environnement to avoid displaying figures. import warnings warnings.simplefilter( action='ignore', category=FutureWarning ) # Avoid FutureWarning -> h5py from 'float' to 'np.float64' # My scripts import songs # A python file i created. import navigate # A python file i created. import utils # Utilities funcions. import model from threads import WavThread, ImageProcess, SortMP3Thread, ImageProcessGTZAN # Threads to accelerate the process. # Variables if (os.path.exists("src")): project_path = str(navigate.get_actual_path() + "\\src") # This is the path to the project folder. else: project_path = navigate.get_actual_path( ) # This is the path to the project folder. json_file = "config.json" tracks = "tracks.csv" # Functions def initialiseMP3Sorted(my_json): # -> 1748 secs, no probs print("Sorting the MP3 files.") datas = pd.read_csv(tracks, encoding="utf-8", usecols=["album", 'track.19', "track.7", "track.16"],