def initialiseWAV(my_json): # -> 1716 secs, no probs print("Creating the WAV files.") navigate.set_path_to_dataset_MP3_sorted(my_json) all_datasets = utils.get_list_subs( ) # List all the sub folders of the dataset folder. threads = [] for index_d, dataset in enumerate(all_datasets): if (len(os.listdir(my_json['dataset_mp3_sorted_path'] + '\\' + dataset)) >= 30): navigate.set_path( my_json["dataset_mp3_sorted_path"] + "\\" + dataset ) # Set the path to the first sub dataset directory, which contains tracks. actual_dataset_path = navigate.get_actual_path() items_dataset = navigate.get_list_subs( ) # Return all the track of the actual folder. my_thread = WavThread("Wav Thread " + str(index_d), my_json, actual_dataset_path, dataset, items_dataset) threads.append(my_thread) navigate.set_path_to_dataset_MP3_sorted(my_json) else: print("There is less than 30 songs in the ", dataset, " dataset. I will not export it.") navigate.set_path_to_project_path(my_json) for thread in threads: thread.start() for thread in threads: thread.join() navigate.set_path_to_project_path(my_json)
def shapeDataConv2DGTZAN(my_json, percent_train): """ overview : Shape the datas to return the train and test sets. input : - my_json : .json file with all the datas we need. - percent_train : float -> [0.0 : 1.0] which represent the percentage of images which will be include in the train set. """ print("Shaping the data to get the Train and Test sets.") navigate.set_path_to_dataset_image_GTZAN(my_json) X, Y = [], [] path = navigate.get_actual_path() maxi = len(utils.get_list_subs()) convert_str = "L" # L=B&W, RGB for index_dir, dir_x in enumerate(utils.get_list_subs()): navigate.set_path(dir_x) for index_fig, fig in enumerate(utils.get_list_subs()): im = Image.open(navigate.get_actual_path() + "\\" + fig).convert( convert_str) # 640 * 480 pixels width, height = im.size pix = np.array(im.getdata()) im.close() X.append(pix) Y.append(dir_x) navigate.set_path_to_dataset_image_GTZAN(my_json) if (convert_str == "L"): X = reshapeEntriesConv2D(X, width, height, 1) if (convert_str == "RGB"): X = reshapeEntriesConv2D(X, width, height, 3) Y = reshapeOutputsDense(Y) X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=(1 - percent_train)) return X_train, Y_train, X_test, Y_test, width, height
def set_security_autorizations_images(my_json): print("Setting the autorization of the image folders.") navigate.set_path_to_dataset_image(my_json) for dir_x in get_list_subs(): navigate.set_path(dir_x) # print("Path : ",navigate.get_actual_path(),".\n") os.chmod(navigate.get_actual_path(), 0o777) navigate.set_path_to_dataset_image(my_json)
def initialiseFigs(my_json, dpi, height, width, legend, colormesh): # 1444 secs # We can't use threat, because matplotlib isn't thread protected. print("Creating the Images.") navigate.set_path_to_dataset_WAV(my_json) all_datasets = utils.get_list_subs( ) # List all the sub folders of the dataset folder. navigate.set_path_to_dataset_image(my_json) for dataset in all_datasets: if not (os.path.exists(dataset)): os.makedirs(dataset) navigate.set_path_to_dataset_WAV(my_json) processes = [] outputs_shaped = model.shapeOutputs(my_json) right_array = 0 flag = False for index_d, dataset in enumerate(all_datasets): for item in outputs_shaped: if (item['category_name'] == dataset): right_array = item["category_array"] flag = True navigate.set_path( dataset ) # Set the path to the first sub dataset directory, which contains tracks. actual_dataset_path = navigate.get_actual_path() items_dataset = utils.get_list_subs( ) # Return all the track of the actual folder. if (len(items_dataset) >= 50 and flag == True): # my_process = ImageProcess("Image process " + str(index_d), my_json, actual_dataset_path, dataset, items_dataset, right_array, my_json["dataset_image_path"] + "\\" + dataset, dpi, height, width, legend=legend, colormesh=colormesh) processes.append(my_process) navigate.set_path_to_dataset_WAV(my_json) processes = [mp.Process(target=p.run, args=()) for p in processes] for p in processes: p.start() for p in processes: p.join() navigate.set_path_to_project_path(my_json)
def shapeDataDense(my_json, percent_train, encoder=False): """ overview : Shape the datas to return the train and test sets. input : - my_json : .json file with all the datas we need. - percent_train : float -> [0.0 : 1.0] which represent the percentage of images which will be include in the train set. """ print("Shaping the data to get the Train and Test sets.") navigate.set_path_to_dataset_WAV_GTZAN(my_json) X, Y = [], [] path = navigate.get_actual_path() maxi = len(utils.get_list_subs()) encoder = LabelEncoder() for index_dir, dir_x in enumerate(utils.get_list_subs()): navigate.set_path(dir_x) for index_fig, audio_path in enumerate(utils.get_list_subs()): print("Index folder : ", index_dir + 1, "/10 -- File ", index_fig + 1, "/100.") audio, freq = lr.load(audio_path, mono=True) chroma_stft = lr.feature.chroma_stft(y=audio, sr=freq) rmse = lr.feature.rmse(y=audio) spec_cent = lr.feature.spectral_centroid(y=audio, sr=freq) spec_bw = lr.feature.spectral_bandwidth(y=audio, sr=freq) rolloff = lr.feature.spectral_rolloff(y=audio, sr=freq) zcr = lr.feature.zero_crossing_rate(audio) mfcc = lr.feature.mfcc(y=audio, sr=freq) data = [ np.mean(chroma_stft), np.mean(rmse), np.mean(spec_cent), np.mean(spec_bw), np.mean(rolloff), np.mean(zcr) ] for mfcc_x in mfcc: data.append(np.mean(mfcc_x)) X.append(data) Y.append(dir_x) break navigate.set_path_to_dataset_WAV_GTZAN(my_json) X = reshapeEntriesDense(X) Y = reshapeOutputsDense(Y) X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=(1 - percent_train)) return X_train, Y_train, X_test, Y_test
def initialiseMP3Sorted(my_json): # -> 1748 secs, no probs print("Sorting the MP3 files.") datas = pd.read_csv(tracks, encoding="utf-8", usecols=["album", 'track.19', "track.7", "track.16"], dtype={ "album": object, 'track.19': object, "track.7": object, "track.16": object }) datas = datas.rename( columns={ "album": "track_id", 'track.19': "title", "track.7": "genre_top", "track.16": "number" }) threads = [] navigate.set_path_to_datasets(my_json) if not (os.path.exists("Mp3Sorted")): os.makedirs("Mp3Sorted") navigate.set_path_to_dataset_MP3(my_json) for index_dir, x_dir in enumerate(utils.get_list_subs()): if (x_dir != 'checksums' and x_dir != "fma_metadata" and x_dir != "README.txt"): navigate.set_path(x_dir) actual_dataset_path = navigate.get_actual_path() items_dataset = navigate.get_list_subs( ) # Return all the track of the actual folder. my_thread = SortMP3Thread("MP3 Sorted Thread " + str(index_dir), my_json, actual_dataset_path, x_dir, items_dataset, datas) threads.append(my_thread) else: print("This is not a directory of mp3 tracks.") navigate.set_path_to_dataset_MP3(my_json) navigate.set_path_to_project_path(my_json) for thread in threads: thread.start() for thread in threads: thread.join() navigate.set_path_to_project_path(my_json)
def initialiseFigsGTZAN(my_json, dpi, height, width, legend): # 1444 secs # We can't use threat, because matplotlib isn't thread protected. print("Creating the Images.") navigate.set_path_to_dataset_WAV_GTZAN(my_json) all_datasets = utils.get_list_subs( ) # List all the sub folders of the dataset folder. navigate.set_path_to_dataset_image_GTZAN(my_json) for dataset in all_datasets: if not (os.path.exists(dataset)): os.makedirs(dataset) navigate.set_path_to_dataset_WAV_GTZAN(my_json) processes = [] for index_d, dataset in enumerate(all_datasets): navigate.set_path( dataset ) # Set the path to the first sub dataset directory, which contains tracks. actual_dataset_path = navigate.get_actual_path() items_dataset = utils.get_list_subs( ) # Return all the track of the actual folder. if (len(items_dataset) >= 50): # my_process = ImageProcessGTZAN( "Image GTZAN process " + str(index_d), my_json, actual_dataset_path, dataset, items_dataset, my_json["dataset_images_gtzan_path"] + "\\" + dataset, dpi, height, width, legend=legend) processes.append(my_process) navigate.set_path_to_dataset_WAV_GTZAN(my_json) processes = [mp.Process(target=p.run, args=()) for p in processes] for p in processes: p.start() for p in processes: p.join() navigate.set_path_to_project_path(my_json)
def shapeDataConv2D(my_json, percent_train, encoder=False): """ overview : Shape the datas to return the train and test sets. input : - my_json : .json file with all the datas we need. - percent_train : float -> [0.0 : 1.0] which represent the percentage of images which will be include in the train set. """ print("Shaping the data to get the Train and Test sets.") navigate.set_path_to_dataset_image(my_json) X, Y = [], [] path = navigate.get_actual_path() maxi = len(utils.get_list_subs()) for index_dir, dir_x in enumerate(utils.get_list_subs()): navigate.set_path(dir_x) for index_fig, fig in enumerate(utils.get_list_subs()): im = Image.open(navigate.get_actual_path() + "\\" + fig).convert( 'L') # 640 * 480 pixels width, height = im.size pix = np.array(im.getdata()).reshape(width, height) if (encoder): category = str(im.info["category"]) else: category = ast.literal_eval(im.info["category_shaped"]) im.close() X.append(pix) Y.append(category) navigate.set_path_to_dataset_image(my_json) if (encoder): Y = encoder.fit_transform(Y) n_classes = encoder.classes_.size Y = Y.reshape(len(Y), 1) else: Y = reshapeOutputs(Y) X = reshapeEntriesConv2D(X, width, height) X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=(1 - percent_train)) return X_train, Y_train, X_test, Y_test, width, height