コード例 #1
0
def training(args):
    # arguments setting
    TIMESTEPS = args.window_width

    # dataset_path = ["medleydb_48bin_all_4features", "mir1k_48bin_all_4features"]
    # label_path = ["medleydb_48bin_all_4features_label", "mir1k_48bin_all_4features_label"]
    dataset_path = args.dataset_path
    label_path = args.label_path

    # load or create model
    if ("seg" in args.model_type):
        model = seg(multi_grid_layer_n=1,
                    feature_num=384,
                    input_channel=1,
                    timesteps=TIMESTEPS)
    elif ("pnn" in args.model_type):
        model = seg_pnn(multi_grid_layer_n=1,
                        feature_num=384,
                        timesteps=TIMESTEPS,
                        prev_model=args.model_path_symbolic)

    model.compile(optimizer="adam",
                  loss={'prediction': sparse_loss},
                  metrics=['accuracy'])

    # train
    train_audio(model, args.epoch, args.steps, args.batch_size_train,
                args.window_width, dataset_path, label_path)

    # save model
    save_model(model, args.output_model_name)
コード例 #2
0
def main():
    # Arguments
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-p',
        '--phase',
        help='phase: training or testing (default: %(default)s',
        type=str,
        default='testing')

    #arguments for training
    parser.add_argument('-t',
                        '--model_type',
                        help='model type: seg or pnn (default: %(default)s',
                        type=str,
                        default='seg')
    parser.add_argument(
        '-d',
        '--data_type',
        help='data type: audio or symbolic (default: %(default)s',
        type=str,
        default='audio')
    parser.add_argument('-da',
                        '--dataset_path',
                        nargs='+',
                        help='path to data set (default: %(default)s',
                        type=str,
                        default='dataset')
    parser.add_argument('-la',
                        '--label_path',
                        nargs='+',
                        help='path to data set label (default: %(default)s',
                        type=str,
                        default='dataset_label')
    parser.add_argument('-ms',
                        '--model_path_symbolic',
                        help='path to symbolic model (default: %(default)s',
                        type=str,
                        default='model_symbolic')

    parser.add_argument(
        '-w',
        '--window_width',
        help='width of the input feature (default: %(default)s',
        type=int,
        default=128)
    parser.add_argument(
        '-b',
        '--batch_size_train',
        help='batch size during training (default: %(default)s',
        type=int,
        default=12)
    parser.add_argument('-e',
                        '--epoch',
                        help='number of epoch (default: %(default)s',
                        type=int,
                        default=5)
    parser.add_argument('-n',
                        '--steps',
                        help='number of step per epoch (default: %(default)s',
                        type=int,
                        default=6000)

    parser.add_argument('-o',
                        '--output_model_name',
                        help='name of the output model (default: %(default)s',
                        type=str,
                        default="out")

    #arguments for testing
    parser.add_argument('-m',
                        '--model_path',
                        help='path to existing model (default: %(default)s',
                        type=str,
                        default='transfer_audio_directly')
    parser.add_argument('-i',
                        '--input_file',
                        help='path to input file (default: %(default)s',
                        type=str,
                        default='train01.wav')
    parser.add_argument('-bb',
                        '--batch_size_test',
                        help='batch size during testing (default: %(default)s',
                        type=int,
                        default=10)

    args = parser.parse_args()
    print(args)

    if (args.phase == "training"):
        #arguments setting
        TIMESTEPS = args.window_width

        #dataset_path = ["medleydb_48bin_all_4features", "mir1k_48bin_all_4features"]
        #label_path = ["medleydb_48bin_all_4features_label", "mir1k_48bin_all_4features_label"]
        dataset_path = args.dataset_path
        label_path = args.label_path

        # load or create model
        if ("seg" in args.model_type):
            model = seg(multi_grid_layer_n=1,
                        feature_num=384,
                        input_channel=1,
                        timesteps=TIMESTEPS)
        elif ("pnn" in args.model_type):
            model = seg_pnn(multi_grid_layer_n=1,
                            feature_num=384,
                            timesteps=TIMESTEPS,
                            prev_model=args.model_path_symbolic)

        model.compile(optimizer="adam",
                      loss={'prediction': sparse_loss},
                      metrics=['accuracy'])

        #train
        train_audio(model, args.epoch, args.steps, args.batch_size_train,
                    args.window_width, dataset_path, label_path)

        #save model
        save_model(model, args.output_model_name)
    else:
        # load wav
        song = args.input_file

        # Feature extraction
        feature = feature_extraction(song)
        feature = np.transpose(feature[0:4], axes=(2, 1, 0))

        # load model
        model = load_model(args.model_path)

        # Inference
        print(feature[:, :, 0].shape)
        extract_result = inference(feature=feature[:, :, 0],
                                   model=model,
                                   batch_size=args.batch_size_test)

        # Output
        r = matrix_parser(extract_result)

        np.savetxt("out_seg.txt", r)
コード例 #3
0
def main(args):
    if args.dataset not in dataflow_cls:
        raise TypeError
    
    # Hyper parameters that will be stored for future reuse
    hparams = {}

    # Parameters that will be passed to dataflow
    df_params = {}
    
    # Handling root path to the dataset
    d_path = dataset_paths[args.dataset]
    if args.dataset_path is not None:
        assert(os.path.isdir(args.dataset_path))
        d_path = args.dataset_path
    df_params["dataset_path"] = d_path
    
    # Number of channels that model need to know about
    ch_num = len(args.channels)
    channels = args.channels
    
    # Type of feature to use
    feature_type = "CFP"
    
    # Number of output classes
    out_classes = 3

    # Output model name
    out_model_name = args.output_model_name
    
    # Feature length on time dimension
    timesteps = args.timesteps

    # Continue to train on a pre-trained model
    if args.input_model is not None:
        # output model name is the same as input model
        out_model_name = args.input_model
        
        # load configuration of previous training
        feature_type, channels, out_classes, timesteps = model_info(args.input_model)
        ch_num = len(channels)
    else:
        if args.dataset == "MusicNet":
            # Sepcial settings for MusicNet that has multiple instruments presented
            if args.use_harmonic:
                ch_num = Harmonic_num * 2
                channels = [i for i in range(ch_num)]
                feature_type = "HCFP"
            if args.multi_instruemnts:
                out_classes = 12 # There are total 11 types of instruments in MusicNet

        
    df_params["b_sz"]      = args.train_batch_size
    df_params["phase"]     = "train"
    df_params["use_ram"]   = args.use_ram
    df_params["channels"]  = channels
    df_params["mpe_only"]  = not args.multi_instruments
    df_params["timesteps"] = timesteps

    print("Loading training data")
    df_cls = dataflow_cls[args.dataset]
    train_df = df_cls(**df_params)

    df_params["b_sz"]  = args.val_batch_size
    df_params["phase"] = "val"

    print("Loading validation data")
    val_df = df_cls(**df_params)

    
    hparams["channels"]       = channels
    hparams["timesteps"]      = timesteps
    hparams["feature_type"]   = feature_type
    hparams["output_classes"] = out_classes

    
    print("Creating/loading model")
    # Create model
    if args.input_model is not None:
        model = load_model(args.input_model)
    else:
        # Create new model
        #model = seg(multi_grid_layer_n=1, feature_num=384, input_channel=ch_num, timesteps=timesteps,
        #            out_class=out_classes)
        model = model_attn.seg(feature_num=384, input_channel=ch_num, timesteps=timesteps,
                               out_class=out_classes)
        
        out_model_name = os.path.join(default_model_path, out_model_name)
        # Save model and configurations
        if not os.path.exists(out_model_name):
            os.makedirs(out_model_name)
        save_model(model, out_model_name, **hparams)

    model.compile(optimizer="adam", loss={'prediction': sparse_loss}, metrics=['accuracy'])


    # create callbacks
    earlystop   = callbacks.EarlyStopping(monitor="val_acc", patience=args.early_stop)
    checkpoint  = callbacks.ModelCheckpoint(os.path.join(out_model_name, "weights.h5"), 
                                            monitor="val_acc", save_best_only=True, save_weights_only=True)
    tensorboard = callbacks.TensorBoard(log_dir=os.path.join("tensorboard", args.output_model_name),
                                        write_images=True)
    callback_list = [checkpoint, earlystop, tensorboard]
    
    print("Start training")
    # Start training
    train(model, train_df, val_df,
          epoch     = args.epoch,
          callbacks = callback_list,
          steps     = args.steps,
          v_steps   = args.val_steps)
コード例 #4
0
def main(args):
    if args.dataset not in dataflow_cls:
        raise TypeError

    # Hyper parameters that will be stored for future reuse
    hparams = {}

    # Parameters that will be passed to dataflow
    df_params = {}

    # Handling root path to the dataset
    d_path = dataset_paths[args.dataset]
    if args.dataset_path is not None:
        assert (os.path.isdir(args.dataset_path))
        d_path = args.dataset_path

    # Number of channels that model need to know about
    ch_num = len(args.channels)
    channels = args.channels

    # Type of feature to use
    feature_type = "CFP"

    # Output model name
    out_model_name = args.output_model_name

    # Feature length on time dimension
    timesteps = args.timesteps

    # Label type
    mode = "frame_onset"
    l_type = MusicNetLabelType(mode, timesteps=timesteps)

    # Number of output classes
    out_classes = l_type.get_out_classes()

    # Continue to train on a pre-trained model
    if args.input_model is not None:
        # load configuration of previous training
        feature_type, channels, out_classes, timesteps = model_info(
            args.input_model)
        ch_num = len(channels)
    else:
        if args.dataset == "MusicNet":
            # Sepcial settings for MusicNet that has multiple instruments presented
            if args.use_harmonic:
                ch_num = HarmonicNum * 2
                channels = [i for i in range(ch_num)]
                feature_type = "HCFP"

    df_params["b_sz"] = args.train_batch_size
    df_params["phase"] = "train"
    df_params["use_ram"] = args.use_ram
    df_params["channels"] = channels
    df_params["timesteps"] = timesteps
    df_params["out_classes"] = out_classes
    df_params["dataset_path"] = d_path
    df_params["label_conversion_func"] = l_type.get_conversion_func()

    print("Loading training data")
    df_cls = dataflow_cls[args.dataset]
    train_df = df_cls(**df_params)

    print("Loading validation data")
    df_params["b_sz"] = args.val_batch_size
    df_params["phase"] = "val"
    val_df = df_cls(**df_params)

    hparams["channels"] = channels
    hparams["timesteps"] = timesteps
    hparams["feature_type"] = feature_type
    hparams["output_classes"] = out_classes

    print("Creating/loading model")
    # Create model
    if args.input_model is not None:
        model = load_model(args.input_model)
    else:
        # Create new model
        model = seg(feature_num=384,
                    input_channel=ch_num,
                    timesteps=timesteps,
                    out_class=out_classes,
                    multi_grid_layer_n=1,
                    multi_grid_n=3)
        #model = model_attn.seg(feature_num=384, input_channel=ch_num, timesteps=timesteps,
        #                       out_class=out_classes)

    # Save model and configurations
    out_model_name = os.path.join(default_model_path, out_model_name)
    if not os.path.exists(out_model_name):
        os.makedirs(out_model_name)
    save_model(model, out_model_name, **hparams)

    # Weighted loss
    weight = None  # Frame mode
    if weight is not None:
        assert (len(weight) == out_classes
                ), "Weight length: {}, out classes: {}".format(
                    len(weight), out_classes)
    #loss_func = lambda label,pred: sparse_loss(label, pred, weight=weight)
    loss_func = lambda label, pred: mctl_loss(
        label, pred, out_classes=out_classes, weight=weight)

    # Use multi-gpu to train the model
    if False:
        para_model = multi_gpu_model(model, gpus=2, cpu_merge=False)
        para_model.compile(optimizer="adam",
                           loss={'prediction': loss_func},
                           metrics=['accuracy'])
        model = para_model
    else:
        model.compile(optimizer="adam",
                      loss={'prediction': loss_func},
                      metrics=['accuracy'])

    # create callbacks
    earlystop = callbacks.EarlyStopping(monitor="val_loss",
                                        patience=args.early_stop)
    checkpoint = callbacks.ModelCheckpoint(os.path.join(
        out_model_name, "weights.h5"),
                                           monitor="val_loss",
                                           save_best_only=False,
                                           save_weights_only=True)
    tensorboard = callbacks.TensorBoard(log_dir=os.path.join(
        "tensorboard", args.output_model_name),
                                        write_images=True)
    callback_list = [checkpoint, earlystop, tensorboard]

    print("Start training")
    # Start training
    train(model,
          train_df,
          val_df,
          epoch=args.epoch,
          callbacks=callback_list,
          steps=args.steps,
          v_steps=args.val_steps)
コード例 #5
0
def main():
    # Arguments
    parser = argparse.ArgumentParser()

    parser.add_argument('-p', '--phase',
                        help='phase: training or testing (default: %(default)s',
                        type=str, default='testing')
    # arguments for testing
    parser.add_argument('-d', '--dataset_path',
                        help='path to data set (default: %(default)s',
                        type=str, default='bach_dataset.pickle')

    parser.add_argument('-e', '--epoch',
                        help='number of epoch(default: %(default)s',
                        type=int, default=80)
    parser.add_argument('-n', '--steps',
                        help='number of step per epoch(default: %(default)s',
                        type=int, default=6000)
    parser.add_argument('-b', '--batch_size_train',
                        help='batch size(default: %(default)s',
                        type=int, default=88*3)
    parser.add_argument('-o', '--output_model_name',
                        help='name of the output model(default: %(default)s',
                        type=str, default="out")
    # arguments for testing
    parser.add_argument('-m', '--model_path',
                        help='path to existing model (default: %(default)s',
                        type=str, default='bach')
    parser.add_argument('-i', '--input_file',
                        help='path to input file (default: %(default)s',
                        type=str, default="LiveAndLetDie_all.mid")
    parser.add_argument('-ii', '--input_file_melody',
                        help='path to input melody file (default: %(default)s',
                        type=str, default="LiveAndLetDie_main.mid")
    parser.add_argument('-s', '--subdivision',
                        help='subdivision within one beat (default: %(default)s',
                        type=int, default=4)

    args = parser.parse_args()
    print(args)

    if(args.phase == "training"):
        #set arguments

        timesteps = 32
        step = 4
        subdivision = args.subdivision
        batch_size = args.batch_size_train
        dataset_path = args.dataset_path

        #create model

        model = lstm_wavenet(num_features_lr=91, timesteps=timesteps,
                             step=step, num_units_lstm=[150, 150, 150, 150],
                             num_dense=150,
                             conv_layers=5,
                             skip_layers=2)

        model.compile(optimizer="adam", loss={'prediction': 'binary_crossentropy'}, metrics=['accuracy'])

        #train

        model = train(model,
                      dataset_path,
                      subdivision,
                      epoch=args.epoch,
                      steps=args.steps,
                      timesteps=timesteps,
                      step=step,
                      batch_size=batch_size)
        #save model

        save_model(model, args.output_model_name)

    else:
        #load input file

        subdivision = args.subdivision
        path = args.input_file
        path_melody = args.input_file_melody
        score = midi2score(path, subdivision)

        if(path_melody == "none"):
            score_melody = np.zeros(score.shape)
        else:
            score_melody = midi2score(path_melody, subdivision)

        score = add_beat(score, subdivision)
        score_melody = add_beat(score_melody, subdivision)

        score = np.array(score[0:640])
        score_melody = np.array(score_melody[0:640])

        extended_score = padding(score, 32, 4)

        #load model

        model = load_model(model_name=args.model_path)

        #generation

        result = style_transfer(extended_score, score_melody, model, iter_num=25)

        #save result

        score2midi("test.mid", result, subdivision, 120, melody_constraint=True, melody=score_melody)
        print("saved")
def arguments_post_process(args):

    # path to dataset
    if args.MusicNet_feature_path is not None:
        base_path = args.MusicNet_feature_path
        dataset_type = "MusicNet"
    elif args.MAPS_feature_path is not None:
        base_path = args.MAPS_feature_path
        dataset_type = "MAPS"
    else:
        assert (
            False
        ), "Please at least assign one of the flags: --MAPS-feature-path or --MusicNet-feature-path"

    # Continue to train on a pre-trained model
    if args.input_model is not None:
        # output model name is the same as input model
        args.output_model_name = args.input_model

        # load configuration of previous training
        feature_type, channels, out_classes = model_info(
            os.path.join("model", args.input_model))
        ch_num = len(channels)
        args.channels = channels

        # load model
        model = load_model(os.path.join("model", args.input_model))

    # Train a new model
    else:
        # setup output model name
        if " + " in args.output_model_name:
            args.output_model_name = args.output_model_name[0:13] + str(
                args.channel)

        # Number of channels to use
        ch_num = len(args.channels)

        # Train on MusicNet
        if dataset_type == "MusicNet":
            # Input parameters
            if args.no_harmonic == True:
                ch_num = 2
                args.channels = [0, 6]  # Spec. and Ceps. channel
                args.channels = [1, 3]  # For train on maestro
                feature_type = "CFP"
            else:
                ch_num = Harmonic_Num * 2
                args.channels = [i for i in range(ch_num)
                                 ]  # Including harmonic channels
                feature_type = "HCFP"
            # Output parameters
            if args.mpe_only:
                out_classes = 2
            else:
                out_classes = 12
        # Train on MAPS
        elif dataset_type == "MAPS":
            base_path = args.MAPS_feature_path
            out_classes = 2
            dataset_type = "MAPS"
            feature_type = "CFP"
            args.no_harmonic = True

        # Create new model
        model = seg(multi_grid_layer_n=1,
                    feature_num=384,
                    input_channel=ch_num,
                    timesteps=args.window_width,
                    out_class=out_classes)

        path = os.path.join("./model", args.output_model_name)
        # Save model and configurations
        if not os.path.exists(path):
            os.makedirs(path)
        save_model(model,
                   path,
                   feature_type=feature_type,
                   input_channels=args.channels,
                   output_classes=out_classes)

    model.compile(optimizer="adam",
                  loss={'prediction': sparse_loss},
                  metrics=['accuracy'])

    # Load file according to recrodings in SongList.csv file
    distinct_file = set()
    with open(os.path.join(base_path, "SongList.csv"), newline='') as config:
        reader = csv.DictReader(config)
        for row in reader:
            distinct_file.add(row["File name"])
    dataset_path = [ff for ff in distinct_file][0:args.num_datasets]
    label_path = [i + "_label.pickle" for i in dataset_path]
    dataset_path = [i + ".hdf" for i in dataset_path]
    print("Datasets chosen: ", dataset_path)

    dataset_path = [os.path.join(base_path, dp) for dp in dataset_path]
    label_path = [os.path.join(base_path, lp) for lp in label_path]

    return model, dataset_path, label_path, dataset_type