def training(args): # arguments setting TIMESTEPS = args.window_width # dataset_path = ["medleydb_48bin_all_4features", "mir1k_48bin_all_4features"] # label_path = ["medleydb_48bin_all_4features_label", "mir1k_48bin_all_4features_label"] dataset_path = args.dataset_path label_path = args.label_path # load or create model if ("seg" in args.model_type): model = seg(multi_grid_layer_n=1, feature_num=384, input_channel=1, timesteps=TIMESTEPS) elif ("pnn" in args.model_type): model = seg_pnn(multi_grid_layer_n=1, feature_num=384, timesteps=TIMESTEPS, prev_model=args.model_path_symbolic) model.compile(optimizer="adam", loss={'prediction': sparse_loss}, metrics=['accuracy']) # train train_audio(model, args.epoch, args.steps, args.batch_size_train, args.window_width, dataset_path, label_path) # save model save_model(model, args.output_model_name)
def main(): # Arguments parser = argparse.ArgumentParser() parser.add_argument( '-p', '--phase', help='phase: training or testing (default: %(default)s', type=str, default='testing') #arguments for training parser.add_argument('-t', '--model_type', help='model type: seg or pnn (default: %(default)s', type=str, default='seg') parser.add_argument( '-d', '--data_type', help='data type: audio or symbolic (default: %(default)s', type=str, default='audio') parser.add_argument('-da', '--dataset_path', nargs='+', help='path to data set (default: %(default)s', type=str, default='dataset') parser.add_argument('-la', '--label_path', nargs='+', help='path to data set label (default: %(default)s', type=str, default='dataset_label') parser.add_argument('-ms', '--model_path_symbolic', help='path to symbolic model (default: %(default)s', type=str, default='model_symbolic') parser.add_argument( '-w', '--window_width', help='width of the input feature (default: %(default)s', type=int, default=128) parser.add_argument( '-b', '--batch_size_train', help='batch size during training (default: %(default)s', type=int, default=12) parser.add_argument('-e', '--epoch', help='number of epoch (default: %(default)s', type=int, default=5) parser.add_argument('-n', '--steps', help='number of step per epoch (default: %(default)s', type=int, default=6000) parser.add_argument('-o', '--output_model_name', help='name of the output model (default: %(default)s', type=str, default="out") #arguments for testing parser.add_argument('-m', '--model_path', help='path to existing model (default: %(default)s', type=str, default='transfer_audio_directly') parser.add_argument('-i', '--input_file', help='path to input file (default: %(default)s', type=str, default='train01.wav') parser.add_argument('-bb', '--batch_size_test', help='batch size during testing (default: %(default)s', type=int, default=10) args = parser.parse_args() print(args) if (args.phase == "training"): #arguments setting TIMESTEPS = args.window_width #dataset_path = ["medleydb_48bin_all_4features", "mir1k_48bin_all_4features"] #label_path = ["medleydb_48bin_all_4features_label", "mir1k_48bin_all_4features_label"] dataset_path = args.dataset_path label_path = args.label_path # load or create model if ("seg" in args.model_type): model = seg(multi_grid_layer_n=1, feature_num=384, input_channel=1, timesteps=TIMESTEPS) elif ("pnn" in args.model_type): model = seg_pnn(multi_grid_layer_n=1, feature_num=384, timesteps=TIMESTEPS, prev_model=args.model_path_symbolic) model.compile(optimizer="adam", loss={'prediction': sparse_loss}, metrics=['accuracy']) #train train_audio(model, args.epoch, args.steps, args.batch_size_train, args.window_width, dataset_path, label_path) #save model save_model(model, args.output_model_name) else: # load wav song = args.input_file # Feature extraction feature = feature_extraction(song) feature = np.transpose(feature[0:4], axes=(2, 1, 0)) # load model model = load_model(args.model_path) # Inference print(feature[:, :, 0].shape) extract_result = inference(feature=feature[:, :, 0], model=model, batch_size=args.batch_size_test) # Output r = matrix_parser(extract_result) np.savetxt("out_seg.txt", r)
def main(args): if args.dataset not in dataflow_cls: raise TypeError # Hyper parameters that will be stored for future reuse hparams = {} # Parameters that will be passed to dataflow df_params = {} # Handling root path to the dataset d_path = dataset_paths[args.dataset] if args.dataset_path is not None: assert(os.path.isdir(args.dataset_path)) d_path = args.dataset_path df_params["dataset_path"] = d_path # Number of channels that model need to know about ch_num = len(args.channels) channels = args.channels # Type of feature to use feature_type = "CFP" # Number of output classes out_classes = 3 # Output model name out_model_name = args.output_model_name # Feature length on time dimension timesteps = args.timesteps # Continue to train on a pre-trained model if args.input_model is not None: # output model name is the same as input model out_model_name = args.input_model # load configuration of previous training feature_type, channels, out_classes, timesteps = model_info(args.input_model) ch_num = len(channels) else: if args.dataset == "MusicNet": # Sepcial settings for MusicNet that has multiple instruments presented if args.use_harmonic: ch_num = Harmonic_num * 2 channels = [i for i in range(ch_num)] feature_type = "HCFP" if args.multi_instruemnts: out_classes = 12 # There are total 11 types of instruments in MusicNet df_params["b_sz"] = args.train_batch_size df_params["phase"] = "train" df_params["use_ram"] = args.use_ram df_params["channels"] = channels df_params["mpe_only"] = not args.multi_instruments df_params["timesteps"] = timesteps print("Loading training data") df_cls = dataflow_cls[args.dataset] train_df = df_cls(**df_params) df_params["b_sz"] = args.val_batch_size df_params["phase"] = "val" print("Loading validation data") val_df = df_cls(**df_params) hparams["channels"] = channels hparams["timesteps"] = timesteps hparams["feature_type"] = feature_type hparams["output_classes"] = out_classes print("Creating/loading model") # Create model if args.input_model is not None: model = load_model(args.input_model) else: # Create new model #model = seg(multi_grid_layer_n=1, feature_num=384, input_channel=ch_num, timesteps=timesteps, # out_class=out_classes) model = model_attn.seg(feature_num=384, input_channel=ch_num, timesteps=timesteps, out_class=out_classes) out_model_name = os.path.join(default_model_path, out_model_name) # Save model and configurations if not os.path.exists(out_model_name): os.makedirs(out_model_name) save_model(model, out_model_name, **hparams) model.compile(optimizer="adam", loss={'prediction': sparse_loss}, metrics=['accuracy']) # create callbacks earlystop = callbacks.EarlyStopping(monitor="val_acc", patience=args.early_stop) checkpoint = callbacks.ModelCheckpoint(os.path.join(out_model_name, "weights.h5"), monitor="val_acc", save_best_only=True, save_weights_only=True) tensorboard = callbacks.TensorBoard(log_dir=os.path.join("tensorboard", args.output_model_name), write_images=True) callback_list = [checkpoint, earlystop, tensorboard] print("Start training") # Start training train(model, train_df, val_df, epoch = args.epoch, callbacks = callback_list, steps = args.steps, v_steps = args.val_steps)
def main(args): if args.dataset not in dataflow_cls: raise TypeError # Hyper parameters that will be stored for future reuse hparams = {} # Parameters that will be passed to dataflow df_params = {} # Handling root path to the dataset d_path = dataset_paths[args.dataset] if args.dataset_path is not None: assert (os.path.isdir(args.dataset_path)) d_path = args.dataset_path # Number of channels that model need to know about ch_num = len(args.channels) channels = args.channels # Type of feature to use feature_type = "CFP" # Output model name out_model_name = args.output_model_name # Feature length on time dimension timesteps = args.timesteps # Label type mode = "frame_onset" l_type = MusicNetLabelType(mode, timesteps=timesteps) # Number of output classes out_classes = l_type.get_out_classes() # Continue to train on a pre-trained model if args.input_model is not None: # load configuration of previous training feature_type, channels, out_classes, timesteps = model_info( args.input_model) ch_num = len(channels) else: if args.dataset == "MusicNet": # Sepcial settings for MusicNet that has multiple instruments presented if args.use_harmonic: ch_num = HarmonicNum * 2 channels = [i for i in range(ch_num)] feature_type = "HCFP" df_params["b_sz"] = args.train_batch_size df_params["phase"] = "train" df_params["use_ram"] = args.use_ram df_params["channels"] = channels df_params["timesteps"] = timesteps df_params["out_classes"] = out_classes df_params["dataset_path"] = d_path df_params["label_conversion_func"] = l_type.get_conversion_func() print("Loading training data") df_cls = dataflow_cls[args.dataset] train_df = df_cls(**df_params) print("Loading validation data") df_params["b_sz"] = args.val_batch_size df_params["phase"] = "val" val_df = df_cls(**df_params) hparams["channels"] = channels hparams["timesteps"] = timesteps hparams["feature_type"] = feature_type hparams["output_classes"] = out_classes print("Creating/loading model") # Create model if args.input_model is not None: model = load_model(args.input_model) else: # Create new model model = seg(feature_num=384, input_channel=ch_num, timesteps=timesteps, out_class=out_classes, multi_grid_layer_n=1, multi_grid_n=3) #model = model_attn.seg(feature_num=384, input_channel=ch_num, timesteps=timesteps, # out_class=out_classes) # Save model and configurations out_model_name = os.path.join(default_model_path, out_model_name) if not os.path.exists(out_model_name): os.makedirs(out_model_name) save_model(model, out_model_name, **hparams) # Weighted loss weight = None # Frame mode if weight is not None: assert (len(weight) == out_classes ), "Weight length: {}, out classes: {}".format( len(weight), out_classes) #loss_func = lambda label,pred: sparse_loss(label, pred, weight=weight) loss_func = lambda label, pred: mctl_loss( label, pred, out_classes=out_classes, weight=weight) # Use multi-gpu to train the model if False: para_model = multi_gpu_model(model, gpus=2, cpu_merge=False) para_model.compile(optimizer="adam", loss={'prediction': loss_func}, metrics=['accuracy']) model = para_model else: model.compile(optimizer="adam", loss={'prediction': loss_func}, metrics=['accuracy']) # create callbacks earlystop = callbacks.EarlyStopping(monitor="val_loss", patience=args.early_stop) checkpoint = callbacks.ModelCheckpoint(os.path.join( out_model_name, "weights.h5"), monitor="val_loss", save_best_only=False, save_weights_only=True) tensorboard = callbacks.TensorBoard(log_dir=os.path.join( "tensorboard", args.output_model_name), write_images=True) callback_list = [checkpoint, earlystop, tensorboard] print("Start training") # Start training train(model, train_df, val_df, epoch=args.epoch, callbacks=callback_list, steps=args.steps, v_steps=args.val_steps)
def main(): # Arguments parser = argparse.ArgumentParser() parser.add_argument('-p', '--phase', help='phase: training or testing (default: %(default)s', type=str, default='testing') # arguments for testing parser.add_argument('-d', '--dataset_path', help='path to data set (default: %(default)s', type=str, default='bach_dataset.pickle') parser.add_argument('-e', '--epoch', help='number of epoch(default: %(default)s', type=int, default=80) parser.add_argument('-n', '--steps', help='number of step per epoch(default: %(default)s', type=int, default=6000) parser.add_argument('-b', '--batch_size_train', help='batch size(default: %(default)s', type=int, default=88*3) parser.add_argument('-o', '--output_model_name', help='name of the output model(default: %(default)s', type=str, default="out") # arguments for testing parser.add_argument('-m', '--model_path', help='path to existing model (default: %(default)s', type=str, default='bach') parser.add_argument('-i', '--input_file', help='path to input file (default: %(default)s', type=str, default="LiveAndLetDie_all.mid") parser.add_argument('-ii', '--input_file_melody', help='path to input melody file (default: %(default)s', type=str, default="LiveAndLetDie_main.mid") parser.add_argument('-s', '--subdivision', help='subdivision within one beat (default: %(default)s', type=int, default=4) args = parser.parse_args() print(args) if(args.phase == "training"): #set arguments timesteps = 32 step = 4 subdivision = args.subdivision batch_size = args.batch_size_train dataset_path = args.dataset_path #create model model = lstm_wavenet(num_features_lr=91, timesteps=timesteps, step=step, num_units_lstm=[150, 150, 150, 150], num_dense=150, conv_layers=5, skip_layers=2) model.compile(optimizer="adam", loss={'prediction': 'binary_crossentropy'}, metrics=['accuracy']) #train model = train(model, dataset_path, subdivision, epoch=args.epoch, steps=args.steps, timesteps=timesteps, step=step, batch_size=batch_size) #save model save_model(model, args.output_model_name) else: #load input file subdivision = args.subdivision path = args.input_file path_melody = args.input_file_melody score = midi2score(path, subdivision) if(path_melody == "none"): score_melody = np.zeros(score.shape) else: score_melody = midi2score(path_melody, subdivision) score = add_beat(score, subdivision) score_melody = add_beat(score_melody, subdivision) score = np.array(score[0:640]) score_melody = np.array(score_melody[0:640]) extended_score = padding(score, 32, 4) #load model model = load_model(model_name=args.model_path) #generation result = style_transfer(extended_score, score_melody, model, iter_num=25) #save result score2midi("test.mid", result, subdivision, 120, melody_constraint=True, melody=score_melody) print("saved")
def arguments_post_process(args): # path to dataset if args.MusicNet_feature_path is not None: base_path = args.MusicNet_feature_path dataset_type = "MusicNet" elif args.MAPS_feature_path is not None: base_path = args.MAPS_feature_path dataset_type = "MAPS" else: assert ( False ), "Please at least assign one of the flags: --MAPS-feature-path or --MusicNet-feature-path" # Continue to train on a pre-trained model if args.input_model is not None: # output model name is the same as input model args.output_model_name = args.input_model # load configuration of previous training feature_type, channels, out_classes = model_info( os.path.join("model", args.input_model)) ch_num = len(channels) args.channels = channels # load model model = load_model(os.path.join("model", args.input_model)) # Train a new model else: # setup output model name if " + " in args.output_model_name: args.output_model_name = args.output_model_name[0:13] + str( args.channel) # Number of channels to use ch_num = len(args.channels) # Train on MusicNet if dataset_type == "MusicNet": # Input parameters if args.no_harmonic == True: ch_num = 2 args.channels = [0, 6] # Spec. and Ceps. channel args.channels = [1, 3] # For train on maestro feature_type = "CFP" else: ch_num = Harmonic_Num * 2 args.channels = [i for i in range(ch_num) ] # Including harmonic channels feature_type = "HCFP" # Output parameters if args.mpe_only: out_classes = 2 else: out_classes = 12 # Train on MAPS elif dataset_type == "MAPS": base_path = args.MAPS_feature_path out_classes = 2 dataset_type = "MAPS" feature_type = "CFP" args.no_harmonic = True # Create new model model = seg(multi_grid_layer_n=1, feature_num=384, input_channel=ch_num, timesteps=args.window_width, out_class=out_classes) path = os.path.join("./model", args.output_model_name) # Save model and configurations if not os.path.exists(path): os.makedirs(path) save_model(model, path, feature_type=feature_type, input_channels=args.channels, output_classes=out_classes) model.compile(optimizer="adam", loss={'prediction': sparse_loss}, metrics=['accuracy']) # Load file according to recrodings in SongList.csv file distinct_file = set() with open(os.path.join(base_path, "SongList.csv"), newline='') as config: reader = csv.DictReader(config) for row in reader: distinct_file.add(row["File name"]) dataset_path = [ff for ff in distinct_file][0:args.num_datasets] label_path = [i + "_label.pickle" for i in dataset_path] dataset_path = [i + ".hdf" for i in dataset_path] print("Datasets chosen: ", dataset_path) dataset_path = [os.path.join(base_path, dp) for dp in dataset_path] label_path = [os.path.join(base_path, lp) for lp in label_path] return model, dataset_path, label_path, dataset_type