def testing(args, model=None): # load wav song = args.input_file x, fs = sf.read(song) results = None if args.jetson: sample_ptr = 0 while sample_ptr < x.shape[0]: chunk_end = min(sample_ptr + MAX_LEN, x.shape[0] - 1) chunk = x[sample_ptr:chunk_end, :] sample_ptr += MAX_LEN # Feature extraction feature = feature_extraction(chunk, fs) feature = np.transpose(feature[0:4], axes=(2, 1, 0)) # load model if model is None: model = load_model(args.model_path) # Inference print(feature[:, :, 0].shape) extract_result = inference(feature=feature[:, :, 0], model=model, batch_size=args.batch_size_test) # Output r = matrix_parser(extract_result) if results is None: results = r else: results = np.concatenate((results, r)) else: # Feature extraction feature = feature_extraction(x, fs) feature = np.transpose(feature[0:4], axes=(2, 1, 0)) # load model if model is None: model = load_model(args.model_path) # Inference print(feature[:, :, 0].shape) extract_result = inference(feature=feature[:, :, 0], model=model, batch_size=args.batch_size_test) # Output results = matrix_parser(extract_result) np.savetxt(args.output_file + ".txt", results) print("FINISHED")
def make_dataset_audio(song_list, label_list, data, dataset_name): X = [] Y = [] for song in tqdm.tqdm(song_list): out = feature_extraction(song) score = np.transpose(out[0:4], axes=(2, 1, 0)) X.append(score) if ("medleydb" in data): f, v, r = medleydb_preprocessing(song_list) else: v = None for label in tqdm.tqdm(label_list): score = label_parser(label, data, v) Y.append(score) pickle.dump(X, open(dataset_name, 'wb'), pickle.HIGHEST_PROTOCOL) pickle.dump(Y, open(dataset_name + "_label", 'wb'), pickle.HIGHEST_PROTOCOL) print(str(len(X)) + ' files written in ' + dataset_name)
def extract_melody(y, sr, model="Seg"): # Feature extraction feature = feature_extraction(y, sr) feature = np.transpose(feature[0:4], axes=(2, 1, 0)) # load model model = load_model(model) # Inference print(feature[:, :, 0].shape) extract_result = inference(feature=feature[:, :, 0], model=model, batch_size=10) # Output r = matrix_parser(extract_result) return r
def main(args): # Pre-process features assert(os.path.isfile(args.input_audio)), "The given path is not a file!. Please check your input again." print("Processing features") Z, tfrL0, tfrLF, tfrLQ, t, cenf, f = feature_extraction(args.input_audio) # Post-process feature according to the configuration of model feature_type, channels, out_class, timesteps = model_info(args.model_path) if feature_type == "HCFP": assert(len(channels) == (args.num_harmonics*2+2)) spec = [] ceps = [] for i in range(args.num_harmonics): spec.append(fetch_harmonic(tfrL0, cenf, i)) ceps.append(fetch_harmonic(tfrLQ, cenf, i)) spec = np.transpose(np.array(spec), axes=(2, 1, 0)) ceps = np.transpose(np.array(ceps), axes=(2, 1, 0)) feature = np.dstack((spec, ceps)) else: assert(len(channels) <= 4) feature = np.array([Z, tfrL0, tfrLF, tfrLQ]) feature = np.transpose(feature, axes=(2, 1, 0)) feature = create_batches(feature[:,:,channels], b_size=16, timesteps=timesteps) model = load_model(args.model_path) print("Predicting...") pred = predict(feature, model) p_out = h5py.File("pred.hdf", "w") p_out.create_dataset("0", data=pred) p_out.close() notes, midi = PostProcess(pred) if args.to_midi is not None: midi.write(args.to_midi)
def predictOne(self, path: str): """ method copied from the main file in the project """ # pkg_resources.() # project = importlib.import_module("vendors.Vocal-Melody-Extraction.project") from project.MelodyExt import feature_extraction from project.utils import load_model, save_model, matrix_parser from project.test import inference from project.model import seg, seg_pnn, sparse_loss from project.train import train_audio # load wav song = path # Feature extraction feature = feature_extraction(song) feature = np.transpose(feature[0:4], axes=(2, 1, 0)) # load model model = load_model( resource_filename( __name__, "../../../vendors/Vocal-Melody-Extraction/Pretrained_models/" + self.parameters["model"].value)) batch_size_test = 10 # Inference print(feature[:, :, 0].shape) extract_result = inference(feature=feature[:, :, 0], model=model, batch_size=batch_size_test) # Output r = matrix_parser(extract_result) return (Signal(r[:, 0], sampleRate=50), Signal(r[:, 1], sampleRate=50))
def main(): # Arguments parser = argparse.ArgumentParser() parser.add_argument( '-p', '--phase', help='phase: training or testing (default: %(default)s', type=str, default='testing') #arguments for training parser.add_argument('-t', '--model_type', help='model type: seg or pnn (default: %(default)s', type=str, default='seg') parser.add_argument( '-d', '--data_type', help='data type: audio or symbolic (default: %(default)s', type=str, default='audio') parser.add_argument('-da', '--dataset_path', nargs='+', help='path to data set (default: %(default)s', type=str, default='dataset') parser.add_argument('-la', '--label_path', nargs='+', help='path to data set label (default: %(default)s', type=str, default='dataset_label') parser.add_argument('-ms', '--model_path_symbolic', help='path to symbolic model (default: %(default)s', type=str, default='model_symbolic') parser.add_argument( '-w', '--window_width', help='width of the input feature (default: %(default)s', type=int, default=128) parser.add_argument( '-b', '--batch_size_train', help='batch size during training (default: %(default)s', type=int, default=12) parser.add_argument('-e', '--epoch', help='number of epoch (default: %(default)s', type=int, default=5) parser.add_argument('-n', '--steps', help='number of step per epoch (default: %(default)s', type=int, default=6000) parser.add_argument('-o', '--output_model_name', help='name of the output model (default: %(default)s', type=str, default="out") #arguments for testing parser.add_argument('-m', '--model_path', help='path to existing model (default: %(default)s', type=str, default='transfer_audio_directly') parser.add_argument('-i', '--input_file', help='path to input file (default: %(default)s', type=str, default='train01.wav') parser.add_argument('-bb', '--batch_size_test', help='batch size during testing (default: %(default)s', type=int, default=10) args = parser.parse_args() print(args) if (args.phase == "training"): #arguments setting TIMESTEPS = args.window_width #dataset_path = ["medleydb_48bin_all_4features", "mir1k_48bin_all_4features"] #label_path = ["medleydb_48bin_all_4features_label", "mir1k_48bin_all_4features_label"] dataset_path = args.dataset_path label_path = args.label_path # load or create model if ("seg" in args.model_type): model = seg(multi_grid_layer_n=1, feature_num=384, input_channel=1, timesteps=TIMESTEPS) elif ("pnn" in args.model_type): model = seg_pnn(multi_grid_layer_n=1, feature_num=384, timesteps=TIMESTEPS, prev_model=args.model_path_symbolic) model.compile(optimizer="adam", loss={'prediction': sparse_loss}, metrics=['accuracy']) #train train_audio(model, args.epoch, args.steps, args.batch_size_train, args.window_width, dataset_path, label_path) #save model save_model(model, args.output_model_name) else: # load wav song = args.input_file # Feature extraction feature = feature_extraction(song) feature = np.transpose(feature[0:4], axes=(2, 1, 0)) # load model model = load_model(args.model_path) # Inference print(feature[:, :, 0].shape) extract_result = inference(feature=feature[:, :, 0], model=model, batch_size=args.batch_size_test) # Output r = matrix_parser(extract_result) np.savetxt("out_seg.txt", r)
def main(args): # Pre-process features assert (os.path.isfile(args.input_audio) ), "The given path is not a file!. Please check your input again." print("Processing features") Z, tfrL0, tfrLF, tfrLQ, t, cenf, f = feature_extraction(args.input_audio) # Post-process feature according to the configuration of model feature_type, channels, out_class, timesteps = model_info(args.model_path) if feature_type == "HCFP": assert (len(channels) == (args.num_harmonics * 2 + 2)) spec = [] ceps = [] for i in range(args.num_harmonics): spec.append(fetch_harmonic(tfrL0, cenf, i)) ceps.append(fetch_harmonic(tfrLQ, cenf, i)) spec = np.transpose(np.array(spec), axes=(2, 1, 0)) ceps = np.transpose(np.array(ceps), axes=(2, 1, 0)) feature = np.dstack((spec, ceps)) else: assert (len(channels) <= 4) feature = np.array([Z, tfrL0, tfrLF, tfrLQ]) feature = np.transpose(feature, axes=(2, 1, 0)) model = load_model(args.model_path) print("Predicting...") pred = predict(feature, model, timesteps=timesteps, channels=channels, instruments=out_class - 1) p_out = h5py.File("pred.hdf", "w") p_out.create_dataset("0", data=pred) p_out.close() for i in range(pred.shape[2]): pred[:, :88, i] = peak_picking(pred[:, :, i]) pred = pred[:, :88] # Print figure base_path = args.input_audio[:args.input_audio.rfind("/")] save_name = os.path.join(base_path, args.output_fig_name) plot_range = range(500, 1500) if max(plot_range) > len(pred): plot_range = range(0, len(pred)) pp = pred[plot_range] if out_class >= 11: assert (out_class == 12 ), "There is something wrong with the configuration. \ Expected value: 12, Current value: {}".format( out_class) titles = MusicNet_Instruments else: assert (out_class == 2 ), "There is something wrong with the configuration. \ Expected value: 2, Current value: {}".format( out_class) titles = ["Piano"] print("Ploting figure...") #PLOT(pp, save_name, plot_range, titles=titles) print("Output figure to {}".format(base_path)) if args.to_midi is not None: midi_path = args.to_midi threshold = [0.45, 0.5] for th in threshold: midi = to_midi(pred, midi_path + "_" + str(th), threshold=th) roll = midi.get_piano_roll() print("Shape of output midi roll: ", roll.shape)