def predict(feature, model, threshold=None, full_predict=True, MAX_FRAME=9000, channels=[0], timesteps=128, instruments=1): original_v = True if threshold == None else False #if original_v: # print("Keeping original prediction value") if len(feature) > MAX_FRAME: overlap = 4 len_f = len(feature) turns = int(np.ceil(len_f / MAX_FRAME)) pred = [] #print("Total sub rounds: " + str(turns)) padding = np.zeros(((overlap, ) + feature.shape[1:])) feature = np.concatenate((padding, feature, padding), axis=0) for j in trange(turns, desc='A piece', leave=False): if j != (turns - 1): sub_feature = feature[j * MAX_FRAME:(j + 1) * MAX_FRAME + 2 * overlap] else: sub_feature = feature[j * MAX_FRAME:] tmp_pred = inference(feature=sub_feature[:, :, channels], model=model, batch_size=5, timestep=timesteps, threshold=threshold, isMPE=True, original_v=original_v, channel=len(channels), instruments=instruments, keep_progress=False) if j == 0: pred = tmp_pred[overlap:-overlap] else: pred = np.concatenate((pred, tmp_pred[overlap:-overlap]), axis=0) if not full_predict: break else: pred = inference(feature=feature[:, :, channels], model=model, threshold=threshold, isMPE=True, original_v=original_v, channel=len(channels), instruments=instruments) return pred
def testing(args, model=None): # load wav song = args.input_file x, fs = sf.read(song) results = None if args.jetson: sample_ptr = 0 while sample_ptr < x.shape[0]: chunk_end = min(sample_ptr + MAX_LEN, x.shape[0] - 1) chunk = x[sample_ptr:chunk_end, :] sample_ptr += MAX_LEN # Feature extraction feature = feature_extraction(chunk, fs) feature = np.transpose(feature[0:4], axes=(2, 1, 0)) # load model if model is None: model = load_model(args.model_path) # Inference print(feature[:, :, 0].shape) extract_result = inference(feature=feature[:, :, 0], model=model, batch_size=args.batch_size_test) # Output r = matrix_parser(extract_result) if results is None: results = r else: results = np.concatenate((results, r)) else: # Feature extraction feature = feature_extraction(x, fs) feature = np.transpose(feature[0:4], axes=(2, 1, 0)) # load model if model is None: model = load_model(args.model_path) # Inference print(feature[:, :, 0].shape) extract_result = inference(feature=feature[:, :, 0], model=model, batch_size=args.batch_size_test) # Output results = matrix_parser(extract_result) np.savetxt(args.output_file + ".txt", results) print("FINISHED")
def extract_melody(y, sr, model="Seg"): # Feature extraction feature = feature_extraction(y, sr) feature = np.transpose(feature[0:4], axes=(2, 1, 0)) # load model model = load_model(model) # Inference print(feature[:, :, 0].shape) extract_result = inference(feature=feature[:, :, 0], model=model, batch_size=10) # Output r = matrix_parser(extract_result) return r
def predictOne(self, path: str): """ method copied from the main file in the project """ # pkg_resources.() # project = importlib.import_module("vendors.Vocal-Melody-Extraction.project") from project.MelodyExt import feature_extraction from project.utils import load_model, save_model, matrix_parser from project.test import inference from project.model import seg, seg_pnn, sparse_loss from project.train import train_audio # load wav song = path # Feature extraction feature = feature_extraction(song) feature = np.transpose(feature[0:4], axes=(2, 1, 0)) # load model model = load_model( resource_filename( __name__, "../../../vendors/Vocal-Melody-Extraction/Pretrained_models/" + self.parameters["model"].value)) batch_size_test = 10 # Inference print(feature[:, :, 0].shape) extract_result = inference(feature=feature[:, :, 0], model=model, batch_size=batch_size_test) # Output r = matrix_parser(extract_result) return (Signal(r[:, 0], sampleRate=50), Signal(r[:, 1], sampleRate=50))
def main(): # Arguments parser = argparse.ArgumentParser() parser.add_argument( '-p', '--phase', help='phase: training or testing (default: %(default)s', type=str, default='testing') #arguments for training parser.add_argument('-t', '--model_type', help='model type: seg or pnn (default: %(default)s', type=str, default='seg') parser.add_argument( '-d', '--data_type', help='data type: audio or symbolic (default: %(default)s', type=str, default='audio') parser.add_argument('-da', '--dataset_path', nargs='+', help='path to data set (default: %(default)s', type=str, default='dataset') parser.add_argument('-la', '--label_path', nargs='+', help='path to data set label (default: %(default)s', type=str, default='dataset_label') parser.add_argument('-ms', '--model_path_symbolic', help='path to symbolic model (default: %(default)s', type=str, default='model_symbolic') parser.add_argument( '-w', '--window_width', help='width of the input feature (default: %(default)s', type=int, default=128) parser.add_argument( '-b', '--batch_size_train', help='batch size during training (default: %(default)s', type=int, default=12) parser.add_argument('-e', '--epoch', help='number of epoch (default: %(default)s', type=int, default=5) parser.add_argument('-n', '--steps', help='number of step per epoch (default: %(default)s', type=int, default=6000) parser.add_argument('-o', '--output_model_name', help='name of the output model (default: %(default)s', type=str, default="out") #arguments for testing parser.add_argument('-m', '--model_path', help='path to existing model (default: %(default)s', type=str, default='transfer_audio_directly') parser.add_argument('-i', '--input_file', help='path to input file (default: %(default)s', type=str, default='train01.wav') parser.add_argument('-bb', '--batch_size_test', help='batch size during testing (default: %(default)s', type=int, default=10) args = parser.parse_args() print(args) if (args.phase == "training"): #arguments setting TIMESTEPS = args.window_width #dataset_path = ["medleydb_48bin_all_4features", "mir1k_48bin_all_4features"] #label_path = ["medleydb_48bin_all_4features_label", "mir1k_48bin_all_4features_label"] dataset_path = args.dataset_path label_path = args.label_path # load or create model if ("seg" in args.model_type): model = seg(multi_grid_layer_n=1, feature_num=384, input_channel=1, timesteps=TIMESTEPS) elif ("pnn" in args.model_type): model = seg_pnn(multi_grid_layer_n=1, feature_num=384, timesteps=TIMESTEPS, prev_model=args.model_path_symbolic) model.compile(optimizer="adam", loss={'prediction': sparse_loss}, metrics=['accuracy']) #train train_audio(model, args.epoch, args.steps, args.batch_size_train, args.window_width, dataset_path, label_path) #save model save_model(model, args.output_model_name) else: # load wav song = args.input_file # Feature extraction feature = feature_extraction(song) feature = np.transpose(feature[0:4], axes=(2, 1, 0)) # load model model = load_model(args.model_path) # Inference print(feature[:, :, 0].shape) extract_result = inference(feature=feature[:, :, 0], model=model, batch_size=args.batch_size_test) # Output r = matrix_parser(extract_result) np.savetxt("out_seg.txt", r)