def testing(args, model=None): # load wav song = args.input_file x, fs = sf.read(song) results = None if args.jetson: sample_ptr = 0 while sample_ptr < x.shape[0]: chunk_end = min(sample_ptr + MAX_LEN, x.shape[0] - 1) chunk = x[sample_ptr:chunk_end, :] sample_ptr += MAX_LEN # Feature extraction feature = feature_extraction(chunk, fs) feature = np.transpose(feature[0:4], axes=(2, 1, 0)) # load model if model is None: model = load_model(args.model_path) # Inference print(feature[:, :, 0].shape) extract_result = inference(feature=feature[:, :, 0], model=model, batch_size=args.batch_size_test) # Output r = matrix_parser(extract_result) if results is None: results = r else: results = np.concatenate((results, r)) else: # Feature extraction feature = feature_extraction(x, fs) feature = np.transpose(feature[0:4], axes=(2, 1, 0)) # load model if model is None: model = load_model(args.model_path) # Inference print(feature[:, :, 0].shape) extract_result = inference(feature=feature[:, :, 0], model=model, batch_size=args.batch_size_test) # Output results = matrix_parser(extract_result) np.savetxt(args.output_file + ".txt", results) print("FINISHED")
def predict_hdf(cls, hdf_paths, model_path, pred_batch_size=4): """ This is a generator function. Assert there exist corresponding label files with extension .pickle under the same directory of given hdf_paths. """ if not isinstance(hdf_paths, list): hdf_paths = [hdf_paths] model = load_model(model_path) feature_type, channels, out_class, timesteps = model_info(model_path) for hdf_path in hdf_paths: with h5py.File(hdf_path, "r") as feat: label_path = hdf_path.replace(".hdf", ".pickle") label = pickle.load(open(label_path, "rb")) for key, ff in feat.items(): ll = label[key] #pred = predict(ff[:,:,channels], model, timesteps, out_class, batch_size=pred_batch_size) pred = predict_v1(ff[:, :, channels], model, timesteps, batch_size=pred_batch_size) yield pred, ll, key
def main(args): # Pre-process features assert ( os.path.isfile(args.input_audio) ), "The given path is not a file!. Please check your input again. Given input: {}".format( audio.input_audio) print("Processing features of input audio: {}".format(args.input_audio)) Z, tfrL0, tfrLF, tfrLQ, t, cenf, f = feature_extraction(args.input_audio) # Post-process feature according to the configuration of model feature_type, channels, out_class, timesteps = model_info(args.model_path) if feature_type == "HCFP": assert (len(channels) == (args.num_harmonics * 2 + 2)) spec = [] ceps = [] for i in range(args.num_harmonics): spec.append(fetch_harmonic(tfrL0, cenf, i)) ceps.append(fetch_harmonic(tfrLQ, cenf, i)) spec = np.transpose(np.array(spec), axes=(2, 1, 0)) ceps = np.transpose(np.array(ceps), axes=(2, 1, 0)) feature = np.dstack((spec, ceps)) else: assert (len(channels) <= 4) feature = np.array([Z, tfrL0, tfrLF, tfrLQ]) feature = np.transpose(feature, axes=(2, 1, 0)) model = load_model(args.model_path) print("Predicting...") #pred = predict(feature[:,:,channels], model, timesteps, out_class, batch_size=4, overlap_ratio=2/4) pred = predict_v1(feature[:, :, channels], model, timesteps, batch_size=4) #p_out = h5py.File("pred.hdf", "w") #p_out.create_dataset("0", data=pred) #p_out.close() midi = MultiPostProcess(pred, mode="note", onset_th=args.onset_th, dura_th=0.5, frm_th=3, inst_th=1.1, t_unit=0.02) if args.to_midi is not None: midi.write(args.to_midi) print("Midi written as {}".format(args.to_midi))
def extract_melody(y, sr, model="Seg"): # Feature extraction feature = feature_extraction(y, sr) feature = np.transpose(feature[0:4], axes=(2, 1, 0)) # load model model = load_model(model) # Inference print(feature[:, :, 0].shape) extract_result = inference(feature=feature[:, :, 0], model=model, batch_size=10) # Output r = matrix_parser(extract_result) return r
def main(args): # Pre-process features assert(os.path.isfile(args.input_audio)), "The given path is not a file!. Please check your input again." print("Processing features") Z, tfrL0, tfrLF, tfrLQ, t, cenf, f = feature_extraction(args.input_audio) # Post-process feature according to the configuration of model feature_type, channels, out_class, timesteps = model_info(args.model_path) if feature_type == "HCFP": assert(len(channels) == (args.num_harmonics*2+2)) spec = [] ceps = [] for i in range(args.num_harmonics): spec.append(fetch_harmonic(tfrL0, cenf, i)) ceps.append(fetch_harmonic(tfrLQ, cenf, i)) spec = np.transpose(np.array(spec), axes=(2, 1, 0)) ceps = np.transpose(np.array(ceps), axes=(2, 1, 0)) feature = np.dstack((spec, ceps)) else: assert(len(channels) <= 4) feature = np.array([Z, tfrL0, tfrLF, tfrLQ]) feature = np.transpose(feature, axes=(2, 1, 0)) feature = create_batches(feature[:,:,channels], b_size=16, timesteps=timesteps) model = load_model(args.model_path) print("Predicting...") pred = predict(feature, model) p_out = h5py.File("pred.hdf", "w") p_out.create_dataset("0", data=pred) p_out.close() notes, midi = PostProcess(pred) if args.to_midi is not None: midi.write(args.to_midi)
def predictOne(self, path: str): """ method copied from the main file in the project """ # pkg_resources.() # project = importlib.import_module("vendors.Vocal-Melody-Extraction.project") from project.MelodyExt import feature_extraction from project.utils import load_model, save_model, matrix_parser from project.test import inference from project.model import seg, seg_pnn, sparse_loss from project.train import train_audio # load wav song = path # Feature extraction feature = feature_extraction(song) feature = np.transpose(feature[0:4], axes=(2, 1, 0)) # load model model = load_model( resource_filename( __name__, "../../../vendors/Vocal-Melody-Extraction/Pretrained_models/" + self.parameters["model"].value)) batch_size_test = 10 # Inference print(feature[:, :, 0].shape) extract_result = inference(feature=feature[:, :, 0], model=model, batch_size=batch_size_test) # Output r = matrix_parser(extract_result) return (Signal(r[:, 0], sampleRate=50), Signal(r[:, 1], sampleRate=50))
def main(): # Arguments parser = argparse.ArgumentParser() parser.add_argument( '-p', '--phase', help='phase: training or testing (default: %(default)s', type=str, default='testing') #arguments for training parser.add_argument('-t', '--model_type', help='model type: seg or pnn (default: %(default)s', type=str, default='seg') parser.add_argument( '-d', '--data_type', help='data type: audio or symbolic (default: %(default)s', type=str, default='audio') parser.add_argument('-da', '--dataset_path', nargs='+', help='path to data set (default: %(default)s', type=str, default='dataset') parser.add_argument('-la', '--label_path', nargs='+', help='path to data set label (default: %(default)s', type=str, default='dataset_label') parser.add_argument('-ms', '--model_path_symbolic', help='path to symbolic model (default: %(default)s', type=str, default='model_symbolic') parser.add_argument( '-w', '--window_width', help='width of the input feature (default: %(default)s', type=int, default=128) parser.add_argument( '-b', '--batch_size_train', help='batch size during training (default: %(default)s', type=int, default=12) parser.add_argument('-e', '--epoch', help='number of epoch (default: %(default)s', type=int, default=5) parser.add_argument('-n', '--steps', help='number of step per epoch (default: %(default)s', type=int, default=6000) parser.add_argument('-o', '--output_model_name', help='name of the output model (default: %(default)s', type=str, default="out") #arguments for testing parser.add_argument('-m', '--model_path', help='path to existing model (default: %(default)s', type=str, default='transfer_audio_directly') parser.add_argument('-i', '--input_file', help='path to input file (default: %(default)s', type=str, default='train01.wav') parser.add_argument('-bb', '--batch_size_test', help='batch size during testing (default: %(default)s', type=int, default=10) args = parser.parse_args() print(args) if (args.phase == "training"): #arguments setting TIMESTEPS = args.window_width #dataset_path = ["medleydb_48bin_all_4features", "mir1k_48bin_all_4features"] #label_path = ["medleydb_48bin_all_4features_label", "mir1k_48bin_all_4features_label"] dataset_path = args.dataset_path label_path = args.label_path # load or create model if ("seg" in args.model_type): model = seg(multi_grid_layer_n=1, feature_num=384, input_channel=1, timesteps=TIMESTEPS) elif ("pnn" in args.model_type): model = seg_pnn(multi_grid_layer_n=1, feature_num=384, timesteps=TIMESTEPS, prev_model=args.model_path_symbolic) model.compile(optimizer="adam", loss={'prediction': sparse_loss}, metrics=['accuracy']) #train train_audio(model, args.epoch, args.steps, args.batch_size_train, args.window_width, dataset_path, label_path) #save model save_model(model, args.output_model_name) else: # load wav song = args.input_file # Feature extraction feature = feature_extraction(song) feature = np.transpose(feature[0:4], axes=(2, 1, 0)) # load model model = load_model(args.model_path) # Inference print(feature[:, :, 0].shape) extract_result = inference(feature=feature[:, :, 0], model=model, batch_size=args.batch_size_test) # Output r = matrix_parser(extract_result) np.savetxt("out_seg.txt", r)
def seg_pnn(feature_num=128, timesteps=256, multi_grid_layer_n=5, multi_grid_n=3, prev_model="melody_transfer_transpose"): layer_out = [] input_score_48 = Input(shape=(timesteps, feature_num, 1), name="input_score_48") input_score_12 = Input(shape=(timesteps, feature_num // 3, 1), name="input_score_12") me_transfer_seg = seg(multi_grid_layer_n=1, timesteps=timesteps, prog=True) me_seg = load_model(prev_model) model_copy(me_seg, me_transfer_seg) #TODO: move inside model_copy for index, layer in enumerate(me_transfer_seg.layers): me_transfer_seg.layers[index].trainable = False o_p = me_transfer_seg([input_score_12]) en_l = Conv2D(2**5, (7, 7), strides=(1, 1), padding="same")(input_score_48) o = adapter(o_p[0], 2**(5), dropout_rate=0.2) en_l = add([en_l, o]) en_l1 = conv_block(en_l, 2**5, (3, 3), strides=(2, 2)) en_l1 = conv_block(en_l1, 2**5, (3, 3), strides=(1, 1)) layer_out.append(en_l1) o = adapter(o_p[1], 2**(5), dropout_rate=0.2) en_l1 = add([en_l1, o]) en_l2 = conv_block(en_l1, 2**6, (3, 3), strides=(2, 2)) en_l2 = conv_block(en_l2, 2**6, (3, 3), strides=(1, 1)) en_l2 = conv_block(en_l2, 2**6, (3, 3), strides=(1, 1)) layer_out.append(en_l2) o = adapter(o_p[2], 2**(6), dropout_rate=0.2) en_l2 = add([en_l2, o]) en_l3 = conv_block(en_l2, 2**7, (3, 3), strides=(2, 2)) en_l3 = conv_block(en_l3, 2**7, (3, 3), strides=(1, 1)) en_l3 = conv_block(en_l3, 2**7, (3, 3), strides=(1, 1)) en_l3 = conv_block(en_l3, 2**7, (3, 3), strides=(1, 1)) layer_out.append(en_l3) o = adapter(o_p[3], 2**(7), dropout_rate=0.2) en_l3 = add([en_l3, o]) en_l4 = conv_block(en_l3, 2**8, (3, 3), strides=(2, 2)) en_l4 = conv_block(en_l4, 2**8, (3, 3), strides=(1, 1)) en_l4 = conv_block(en_l4, 2**8, (3, 3), strides=(1, 1)) en_l4 = conv_block(en_l4, 2**8, (3, 3), strides=(1, 1)) en_l4 = conv_block(en_l4, 2**8, (3, 3), strides=(1, 1)) layer_out.append(en_l4) o = adapter(o_p[4], 2**(8), dropout_rate=0.2) en_l4 = add([en_l4, o]) feature = en_l4 for i in range(multi_grid_layer_n): feature = BatchNormalization()(Activation("relu")(feature)) feature = Dropout(0.3)(feature) m = BatchNormalization()(Conv2D(2**9, (1, 1), strides=(1, 1), padding="same", activation="relu")(feature)) multi_grid = m for ii in range(multi_grid_n): m = BatchNormalization()(Conv2D(2**9, (3, 3), strides=(1, 1), dilation_rate=2**ii, padding="same", activation="relu")(feature)) multi_grid = concatenate([multi_grid, m]) multi_grid = Dropout(0.3)(multi_grid) feature = Conv2D(2**9, (1, 1), strides=(1, 1), padding="same")(multi_grid) o = adapter(o_p[5], 2**(9), dropout_rate=0.3) feature = add([feature, o]) feature = BatchNormalization()(Activation("relu")(feature)) feature = Dropout(0.4)(feature) feature = Conv2D(2**8, (1, 1), strides=(1, 1), padding="same")(feature) feature = add([feature, layer_out[3]]) de_l1 = transpose_conv_block(feature, 2**7, (3, 3), strides=(2, 2)) o = adapter(o_p[6], 2**(7), kernel_size=(1, 5), dropout_rate=0.4) de_l1 = add([de_l1, o]) skip = de_l1 de_l1 = BatchNormalization()(Activation("relu")(de_l1)) de_l1 = concatenate( [de_l1, BatchNormalization()(Activation("relu")(layer_out[2]))]) de_l1 = Dropout(0.4)(de_l1) de_l1 = Conv2D(2**7, (1, 1), strides=(1, 1), padding="same")(de_l1) de_l1 = add([de_l1, skip]) de_l2 = transpose_conv_block(de_l1, 2**6, (3, 3), strides=(2, 2)) o = adapter(o_p[7], 2**(6), kernel_size=(1, 5), dropout_rate=0.4) de_l2 = add([de_l2, o]) skip = de_l2 de_l2 = BatchNormalization()(Activation("relu")(de_l2)) de_l2 = concatenate( [de_l2, BatchNormalization()(Activation("relu")(layer_out[1]))]) de_l2 = Dropout(0.4)(de_l2) de_l2 = Conv2D(2**6, (1, 1), strides=(1, 1), padding="same")(de_l2) de_l2 = add([de_l2, skip]) de_l3 = transpose_conv_block(de_l2, 2**5, (3, 3), strides=(2, 2)) o = adapter(o_p[8], 2**(5), kernel_size=(1, 5), dropout_rate=0.4) de_l3 = add([de_l3, o]) skip = de_l3 de_l3 = BatchNormalization()(Activation("relu")(de_l3)) de_l3 = concatenate( [de_l3, BatchNormalization()(Activation("relu")(layer_out[0]))]) de_l3 = Dropout(0.4)(de_l3) de_l3 = Conv2D(2**5, (1, 1), strides=(1, 1), padding="same")(de_l3) de_l3 = add([de_l3, skip]) de_l4 = transpose_conv_block(de_l3, 2**5, (3, 3), strides=(2, 2)) o = adapter(o_p[9], 2**(5), kernel_size=(1, 5), dropout_rate=0.4) de_l4 = add([de_l4, o]) de_l4 = BatchNormalization()(Activation("relu")(de_l4)) de_l4 = Dropout(0.4)(de_l4) out = Conv2D(2, (1, 1), strides=(1, 1), padding="same", name='prediction')(de_l4) model = Model(inputs=[input_score_48, input_score_12], outputs=out) return model
def main(args): if args.dataset not in dataflow_cls: raise TypeError # Hyper parameters that will be stored for future reuse hparams = {} # Parameters that will be passed to dataflow df_params = {} # Handling root path to the dataset d_path = dataset_paths[args.dataset] if args.dataset_path is not None: assert(os.path.isdir(args.dataset_path)) d_path = args.dataset_path df_params["dataset_path"] = d_path # Number of channels that model need to know about ch_num = len(args.channels) channels = args.channels # Type of feature to use feature_type = "CFP" # Number of output classes out_classes = 3 # Output model name out_model_name = args.output_model_name # Feature length on time dimension timesteps = args.timesteps # Continue to train on a pre-trained model if args.input_model is not None: # output model name is the same as input model out_model_name = args.input_model # load configuration of previous training feature_type, channels, out_classes, timesteps = model_info(args.input_model) ch_num = len(channels) else: if args.dataset == "MusicNet": # Sepcial settings for MusicNet that has multiple instruments presented if args.use_harmonic: ch_num = Harmonic_num * 2 channels = [i for i in range(ch_num)] feature_type = "HCFP" if args.multi_instruemnts: out_classes = 12 # There are total 11 types of instruments in MusicNet df_params["b_sz"] = args.train_batch_size df_params["phase"] = "train" df_params["use_ram"] = args.use_ram df_params["channels"] = channels df_params["mpe_only"] = not args.multi_instruments df_params["timesteps"] = timesteps print("Loading training data") df_cls = dataflow_cls[args.dataset] train_df = df_cls(**df_params) df_params["b_sz"] = args.val_batch_size df_params["phase"] = "val" print("Loading validation data") val_df = df_cls(**df_params) hparams["channels"] = channels hparams["timesteps"] = timesteps hparams["feature_type"] = feature_type hparams["output_classes"] = out_classes print("Creating/loading model") # Create model if args.input_model is not None: model = load_model(args.input_model) else: # Create new model #model = seg(multi_grid_layer_n=1, feature_num=384, input_channel=ch_num, timesteps=timesteps, # out_class=out_classes) model = model_attn.seg(feature_num=384, input_channel=ch_num, timesteps=timesteps, out_class=out_classes) out_model_name = os.path.join(default_model_path, out_model_name) # Save model and configurations if not os.path.exists(out_model_name): os.makedirs(out_model_name) save_model(model, out_model_name, **hparams) model.compile(optimizer="adam", loss={'prediction': sparse_loss}, metrics=['accuracy']) # create callbacks earlystop = callbacks.EarlyStopping(monitor="val_acc", patience=args.early_stop) checkpoint = callbacks.ModelCheckpoint(os.path.join(out_model_name, "weights.h5"), monitor="val_acc", save_best_only=True, save_weights_only=True) tensorboard = callbacks.TensorBoard(log_dir=os.path.join("tensorboard", args.output_model_name), write_images=True) callback_list = [checkpoint, earlystop, tensorboard] print("Start training") # Start training train(model, train_df, val_df, epoch = args.epoch, callbacks = callback_list, steps = args.steps, v_steps = args.val_steps)
def main(args): if args.dataset not in dataflow_cls: raise TypeError # Hyper parameters that will be stored for future reuse hparams = {} # Parameters that will be passed to dataflow df_params = {} # Handling root path to the dataset d_path = dataset_paths[args.dataset] if args.dataset_path is not None: assert (os.path.isdir(args.dataset_path)) d_path = args.dataset_path # Number of channels that model need to know about ch_num = len(args.channels) channels = args.channels # Type of feature to use feature_type = "CFP" # Output model name out_model_name = args.output_model_name # Feature length on time dimension timesteps = args.timesteps # Label type mode = "frame_onset" l_type = MusicNetLabelType(mode, timesteps=timesteps) # Number of output classes out_classes = l_type.get_out_classes() # Continue to train on a pre-trained model if args.input_model is not None: # load configuration of previous training feature_type, channels, out_classes, timesteps = model_info( args.input_model) ch_num = len(channels) else: if args.dataset == "MusicNet": # Sepcial settings for MusicNet that has multiple instruments presented if args.use_harmonic: ch_num = HarmonicNum * 2 channels = [i for i in range(ch_num)] feature_type = "HCFP" df_params["b_sz"] = args.train_batch_size df_params["phase"] = "train" df_params["use_ram"] = args.use_ram df_params["channels"] = channels df_params["timesteps"] = timesteps df_params["out_classes"] = out_classes df_params["dataset_path"] = d_path df_params["label_conversion_func"] = l_type.get_conversion_func() print("Loading training data") df_cls = dataflow_cls[args.dataset] train_df = df_cls(**df_params) print("Loading validation data") df_params["b_sz"] = args.val_batch_size df_params["phase"] = "val" val_df = df_cls(**df_params) hparams["channels"] = channels hparams["timesteps"] = timesteps hparams["feature_type"] = feature_type hparams["output_classes"] = out_classes print("Creating/loading model") # Create model if args.input_model is not None: model = load_model(args.input_model) else: # Create new model model = seg(feature_num=384, input_channel=ch_num, timesteps=timesteps, out_class=out_classes, multi_grid_layer_n=1, multi_grid_n=3) #model = model_attn.seg(feature_num=384, input_channel=ch_num, timesteps=timesteps, # out_class=out_classes) # Save model and configurations out_model_name = os.path.join(default_model_path, out_model_name) if not os.path.exists(out_model_name): os.makedirs(out_model_name) save_model(model, out_model_name, **hparams) # Weighted loss weight = None # Frame mode if weight is not None: assert (len(weight) == out_classes ), "Weight length: {}, out classes: {}".format( len(weight), out_classes) #loss_func = lambda label,pred: sparse_loss(label, pred, weight=weight) loss_func = lambda label, pred: mctl_loss( label, pred, out_classes=out_classes, weight=weight) # Use multi-gpu to train the model if False: para_model = multi_gpu_model(model, gpus=2, cpu_merge=False) para_model.compile(optimizer="adam", loss={'prediction': loss_func}, metrics=['accuracy']) model = para_model else: model.compile(optimizer="adam", loss={'prediction': loss_func}, metrics=['accuracy']) # create callbacks earlystop = callbacks.EarlyStopping(monitor="val_loss", patience=args.early_stop) checkpoint = callbacks.ModelCheckpoint(os.path.join( out_model_name, "weights.h5"), monitor="val_loss", save_best_only=False, save_weights_only=True) tensorboard = callbacks.TensorBoard(log_dir=os.path.join( "tensorboard", args.output_model_name), write_images=True) callback_list = [checkpoint, earlystop, tensorboard] print("Start training") # Start training train(model, train_df, val_df, epoch=args.epoch, callbacks=callback_list, steps=args.steps, v_steps=args.val_steps)
def FullTest(model_path, test_path, label_path=None, pred_save_path="./predictions", use_ram=True, MAX_FRAME=1800): # Load files print("Loading files") features = parse_path(test_path) for ff in features: if not ff.endswith(".hdf"): idx = features.index(ff) del features[idx] if label_path is not None: # Assume there are exactly label files corresponding to the test audios #labels = parse_path(label_path, label=True) labels = [] for ff in features: ext = ff[ff.rfind("."):] if ext != ".hdf" and ext != ".pickle": continue ll = ff[(ff.rfind("/") + 1):] if "_label" not in ll: ll = ll[:ll.rfind(".")] + "_label.pickle" labels.append(os.path.join(label_path, ll)) labels = load_files(labels, use_ram=use_ram) features = load_files(features, use_ram=use_ram) model = load_model(model_path) # Validate on model/feature configurations f_type, channels, out_classes, timesteps = model_info(model_path) key = list(features.keys()) if f_type == "HCFP" and features[key[0]].shape[2] < 12: assert ( False ), "The model uses HCFP as input feature, but loaded features are not." if f_type == "CFP" and features[key[0]].shape[2] == 12: assert (len(channels) == 2 and 1 in channels and 3 in channels), """The The given feature are HCFP, but the model uses more feature types. Model input feature types: """ + str( channels) + " ({0: Z, 1: Spec, 2: GCoS, 3: Ceps})" channels = [0, 6] mpe = False if out_classes == 2: mpe = True # To avoid running out of memory. # 9000 is suitable for 32G RAM with one instrument only and all 4 channels used. (Max ram usage almost 100%) #MAX_FRAME = 1800 print("Max frame per prediction: ", MAX_FRAME) # Start to predict pred_out = h5py.File(os.path.join(pred_save_path, "pred.hdf"), "w") label_out = h5py.File(os.path.join(pred_save_path, "label.hdf"), "w") len_data = len(features) for idx in trange(len_data, desc='Dataset'): i = key[idx] feature = features[i][:] pred = predict(feature, model, MAX_FRAME=MAX_FRAME, channels=list(channels), instruments=out_classes - 1, timesteps=timesteps) # Save to output pred_out.create_dataset(str(i), data=pred, compression="gzip", compression_opts=5) del feature, features[i] # Process corresponding label if label_path is not None: ll = labels[0] if type(ll) != np.ndarray: ll = label_conversion(ll, 352, 128, mpe=mpe)[:, :, 1:] label_out.create_dataset(str(i), data=ll, compression="gzip", compression_opts=5) del labels[0] pred_out.close() label_out.close()
def main(): # Arguments parser = argparse.ArgumentParser() parser.add_argument('-p', '--phase', help='phase: training or testing (default: %(default)s', type=str, default='testing') # arguments for testing parser.add_argument('-d', '--dataset_path', help='path to data set (default: %(default)s', type=str, default='bach_dataset.pickle') parser.add_argument('-e', '--epoch', help='number of epoch(default: %(default)s', type=int, default=80) parser.add_argument('-n', '--steps', help='number of step per epoch(default: %(default)s', type=int, default=6000) parser.add_argument('-b', '--batch_size_train', help='batch size(default: %(default)s', type=int, default=88*3) parser.add_argument('-o', '--output_model_name', help='name of the output model(default: %(default)s', type=str, default="out") # arguments for testing parser.add_argument('-m', '--model_path', help='path to existing model (default: %(default)s', type=str, default='bach') parser.add_argument('-i', '--input_file', help='path to input file (default: %(default)s', type=str, default="LiveAndLetDie_all.mid") parser.add_argument('-ii', '--input_file_melody', help='path to input melody file (default: %(default)s', type=str, default="LiveAndLetDie_main.mid") parser.add_argument('-s', '--subdivision', help='subdivision within one beat (default: %(default)s', type=int, default=4) args = parser.parse_args() print(args) if(args.phase == "training"): #set arguments timesteps = 32 step = 4 subdivision = args.subdivision batch_size = args.batch_size_train dataset_path = args.dataset_path #create model model = lstm_wavenet(num_features_lr=91, timesteps=timesteps, step=step, num_units_lstm=[150, 150, 150, 150], num_dense=150, conv_layers=5, skip_layers=2) model.compile(optimizer="adam", loss={'prediction': 'binary_crossentropy'}, metrics=['accuracy']) #train model = train(model, dataset_path, subdivision, epoch=args.epoch, steps=args.steps, timesteps=timesteps, step=step, batch_size=batch_size) #save model save_model(model, args.output_model_name) else: #load input file subdivision = args.subdivision path = args.input_file path_melody = args.input_file_melody score = midi2score(path, subdivision) if(path_melody == "none"): score_melody = np.zeros(score.shape) else: score_melody = midi2score(path_melody, subdivision) score = add_beat(score, subdivision) score_melody = add_beat(score_melody, subdivision) score = np.array(score[0:640]) score_melody = np.array(score_melody[0:640]) extended_score = padding(score, 32, 4) #load model model = load_model(model_name=args.model_path) #generation result = style_transfer(extended_score, score_melody, model, iter_num=25) #save result score2midi("test.mid", result, subdivision, 120, melody_constraint=True, melody=score_melody) print("saved")
def main(args): model = load_model(args.model_path) feature_type, channels, out_classes, timesteps = model_info( args.model_path) d_path = dataset_paths[args.dataset] df_cls = dataflow_cls[args.dataset] df = df_cls(d_path, "test", timesteps=timesteps, channels=channels, b_sz=16) eval_flow = EvalFlow(df) wr_f = None wr_l = None if args.save_pred is not None: if not os.path.exists(args.save_pred): os.makedirs(args.save_pred) out_f = h5py.File(os.path.join(args.save_pred, "pred.hdf"), "w") out_l = h5py.File(os.path.join(args.save_pred, "label.hdf"), "w") wr_f = lambda i, d: out_f.create_dataset( str(i), data=d, compression="gzip", compression_opts=5) wr_l = lambda i, l: out_l.create_dataset( str(i), data=l, compression="gzip", compression_opts=5) preds = [] lls = [] results = {"l_prec": [], "l_rec": [], "l_f": []} for i in range(10): #len(eval_flow)): # This loop go through pieces print("{}/{}".format(i + 1, len(eval_flow))) features = [] labels = [] for x, y in eval_flow: # Collect batches from a single piece features.append(x) labels.append(y) #print(y.shape) pred, ll = predict(features, labels, model) """ p = np.where(pred[:,:,1]>pred[:,:,0], 1, 0) l = ll[:,:,1] prec, rec, f, l_prec, l_rec, l_f = evaluation([p], [ll]) results["l_prec"] += l_prec results["l_rec"] += l_rec results["l_f"] += l_f if len(preds)%2 == 0: eval_stats(l_prec, l_rec, l_f) """ if args.save_pred is not None: wr_f(i, pred) wr_l(i, ll) #for i in range(len(preds)): # p = preds[i] # a = np.where(p[:,:,1]>p[:,:,0], 1, 0) # preds[i] = roll_down_sample(a) # lls[i] = roll_down_sample(lls[i]) #eval_stats(l_prec, l_rec, l_f) if args.save_pred is not None: out_f.close() out_l.close()
def arguments_post_process(args): # path to dataset if args.MusicNet_feature_path is not None: base_path = args.MusicNet_feature_path dataset_type = "MusicNet" elif args.MAPS_feature_path is not None: base_path = args.MAPS_feature_path dataset_type = "MAPS" else: assert ( False ), "Please at least assign one of the flags: --MAPS-feature-path or --MusicNet-feature-path" # Continue to train on a pre-trained model if args.input_model is not None: # output model name is the same as input model args.output_model_name = args.input_model # load configuration of previous training feature_type, channels, out_classes = model_info( os.path.join("model", args.input_model)) ch_num = len(channels) args.channels = channels # load model model = load_model(os.path.join("model", args.input_model)) # Train a new model else: # setup output model name if " + " in args.output_model_name: args.output_model_name = args.output_model_name[0:13] + str( args.channel) # Number of channels to use ch_num = len(args.channels) # Train on MusicNet if dataset_type == "MusicNet": # Input parameters if args.no_harmonic == True: ch_num = 2 args.channels = [0, 6] # Spec. and Ceps. channel args.channels = [1, 3] # For train on maestro feature_type = "CFP" else: ch_num = Harmonic_Num * 2 args.channels = [i for i in range(ch_num) ] # Including harmonic channels feature_type = "HCFP" # Output parameters if args.mpe_only: out_classes = 2 else: out_classes = 12 # Train on MAPS elif dataset_type == "MAPS": base_path = args.MAPS_feature_path out_classes = 2 dataset_type = "MAPS" feature_type = "CFP" args.no_harmonic = True # Create new model model = seg(multi_grid_layer_n=1, feature_num=384, input_channel=ch_num, timesteps=args.window_width, out_class=out_classes) path = os.path.join("./model", args.output_model_name) # Save model and configurations if not os.path.exists(path): os.makedirs(path) save_model(model, path, feature_type=feature_type, input_channels=args.channels, output_classes=out_classes) model.compile(optimizer="adam", loss={'prediction': sparse_loss}, metrics=['accuracy']) # Load file according to recrodings in SongList.csv file distinct_file = set() with open(os.path.join(base_path, "SongList.csv"), newline='') as config: reader = csv.DictReader(config) for row in reader: distinct_file.add(row["File name"]) dataset_path = [ff for ff in distinct_file][0:args.num_datasets] label_path = [i + "_label.pickle" for i in dataset_path] dataset_path = [i + ".hdf" for i in dataset_path] print("Datasets chosen: ", dataset_path) dataset_path = [os.path.join(base_path, dp) for dp in dataset_path] label_path = [os.path.join(base_path, lp) for lp in label_path] return model, dataset_path, label_path, dataset_type
def main(args): # Pre-process features assert (os.path.isfile(args.input_audio) ), "The given path is not a file!. Please check your input again." print("Processing features") Z, tfrL0, tfrLF, tfrLQ, t, cenf, f = feature_extraction(args.input_audio) # Post-process feature according to the configuration of model feature_type, channels, out_class, timesteps = model_info(args.model_path) if feature_type == "HCFP": assert (len(channels) == (args.num_harmonics * 2 + 2)) spec = [] ceps = [] for i in range(args.num_harmonics): spec.append(fetch_harmonic(tfrL0, cenf, i)) ceps.append(fetch_harmonic(tfrLQ, cenf, i)) spec = np.transpose(np.array(spec), axes=(2, 1, 0)) ceps = np.transpose(np.array(ceps), axes=(2, 1, 0)) feature = np.dstack((spec, ceps)) else: assert (len(channels) <= 4) feature = np.array([Z, tfrL0, tfrLF, tfrLQ]) feature = np.transpose(feature, axes=(2, 1, 0)) model = load_model(args.model_path) print("Predicting...") pred = predict(feature, model, timesteps=timesteps, channels=channels, instruments=out_class - 1) p_out = h5py.File("pred.hdf", "w") p_out.create_dataset("0", data=pred) p_out.close() for i in range(pred.shape[2]): pred[:, :88, i] = peak_picking(pred[:, :, i]) pred = pred[:, :88] # Print figure base_path = args.input_audio[:args.input_audio.rfind("/")] save_name = os.path.join(base_path, args.output_fig_name) plot_range = range(500, 1500) if max(plot_range) > len(pred): plot_range = range(0, len(pred)) pp = pred[plot_range] if out_class >= 11: assert (out_class == 12 ), "There is something wrong with the configuration. \ Expected value: 12, Current value: {}".format( out_class) titles = MusicNet_Instruments else: assert (out_class == 2 ), "There is something wrong with the configuration. \ Expected value: 2, Current value: {}".format( out_class) titles = ["Piano"] print("Ploting figure...") #PLOT(pp, save_name, plot_range, titles=titles) print("Output figure to {}".format(base_path)) if args.to_midi is not None: midi_path = args.to_midi threshold = [0.45, 0.5] for th in threshold: midi = to_midi(pred, midi_path + "_" + str(th), threshold=th) roll = midi.get_piano_roll() print("Shape of output midi roll: ", roll.shape)