コード例 #1
0
def testing(args, model=None):
    # load wav
    song = args.input_file
    x, fs = sf.read(song)
    results = None
    if args.jetson:
        sample_ptr = 0
        while sample_ptr < x.shape[0]:
            chunk_end = min(sample_ptr + MAX_LEN, x.shape[0] - 1)
            chunk = x[sample_ptr:chunk_end, :]
            sample_ptr += MAX_LEN

            # Feature extraction
            feature = feature_extraction(chunk, fs)
            feature = np.transpose(feature[0:4], axes=(2, 1, 0))

            # load model
            if model is None:
                model = load_model(args.model_path)

            # Inference
            print(feature[:, :, 0].shape)
            extract_result = inference(feature=feature[:, :, 0],
                                       model=model,
                                       batch_size=args.batch_size_test)

            # Output
            r = matrix_parser(extract_result)

            if results is None:
                results = r
            else:
                results = np.concatenate((results, r))
    else:
        # Feature extraction
        feature = feature_extraction(x, fs)
        feature = np.transpose(feature[0:4], axes=(2, 1, 0))

        # load model
        if model is None:
            model = load_model(args.model_path)

        # Inference
        print(feature[:, :, 0].shape)
        extract_result = inference(feature=feature[:, :, 0],
                                   model=model,
                                   batch_size=args.batch_size_test)

        # Output
        results = matrix_parser(extract_result)

    np.savetxt(args.output_file + ".txt", results)
    print("FINISHED")
コード例 #2
0
def make_dataset_audio(song_list, label_list, data, dataset_name):

    X = []
    Y = []
    for song in tqdm.tqdm(song_list):

        out = feature_extraction(song)
        score = np.transpose(out[0:4], axes=(2, 1, 0))

        X.append(score)

    if ("medleydb" in data):
        f, v, r = medleydb_preprocessing(song_list)
    else:
        v = None

    for label in tqdm.tqdm(label_list):
        score = label_parser(label, data, v)

        Y.append(score)
    pickle.dump(X, open(dataset_name, 'wb'), pickle.HIGHEST_PROTOCOL)
    pickle.dump(Y, open(dataset_name + "_label", 'wb'),
                pickle.HIGHEST_PROTOCOL)

    print(str(len(X)) + ' files written in ' + dataset_name)
コード例 #3
0
def extract_melody(y, sr, model="Seg"):

    # Feature extraction
    feature = feature_extraction(y, sr)
    feature = np.transpose(feature[0:4], axes=(2, 1, 0))

    # load model
    model = load_model(model)

    # Inference
    print(feature[:, :, 0].shape)
    extract_result = inference(feature=feature[:, :, 0],
                               model=model,
                               batch_size=10)

    # Output
    r = matrix_parser(extract_result)

    return r
def main(args):
    # Pre-process features
    assert(os.path.isfile(args.input_audio)), "The given path is not a file!. Please check your input again."
    print("Processing features")
    Z, tfrL0, tfrLF, tfrLQ, t, cenf, f = feature_extraction(args.input_audio)
    
    # Post-process feature according to the configuration of model
    feature_type, channels, out_class, timesteps = model_info(args.model_path)
    if feature_type == "HCFP":
        assert(len(channels) == (args.num_harmonics*2+2))
        
        spec = []
        ceps = []
        for i in range(args.num_harmonics):
            spec.append(fetch_harmonic(tfrL0, cenf, i))
            ceps.append(fetch_harmonic(tfrLQ, cenf, i))
        
        spec = np.transpose(np.array(spec), axes=(2, 1, 0))
        ceps = np.transpose(np.array(ceps), axes=(2, 1, 0))
        
        feature = np.dstack((spec, ceps))
    else:
        assert(len(channels) <= 4)
        
        feature = np.array([Z, tfrL0, tfrLF, tfrLQ])
        feature = np.transpose(feature, axes=(2, 1, 0))
    
    feature = create_batches(feature[:,:,channels], b_size=16, timesteps=timesteps)
    model = load_model(args.model_path)
    

    print("Predicting...")
    pred = predict(feature, model)
    
    p_out = h5py.File("pred.hdf", "w")
    p_out.create_dataset("0", data=pred)
    p_out.close()

    notes, midi = PostProcess(pred)
    
    if args.to_midi is not None:
        midi.write(args.to_midi)
コード例 #5
0
    def predictOne(self, path: str):
        """
        method copied from the main file in the project
        """
        # pkg_resources.()
        # project = importlib.import_module("vendors.Vocal-Melody-Extraction.project")
        from project.MelodyExt import feature_extraction
        from project.utils import load_model, save_model, matrix_parser
        from project.test import inference
        from project.model import seg, seg_pnn, sparse_loss
        from project.train import train_audio

        # load wav
        song = path

        # Feature extraction
        feature = feature_extraction(song)
        feature = np.transpose(feature[0:4], axes=(2, 1, 0))

        # load model

        model = load_model(
            resource_filename(
                __name__,
                "../../../vendors/Vocal-Melody-Extraction/Pretrained_models/" +
                self.parameters["model"].value))
        batch_size_test = 10
        # Inference
        print(feature[:, :, 0].shape)
        extract_result = inference(feature=feature[:, :, 0],
                                   model=model,
                                   batch_size=batch_size_test)

        # Output
        r = matrix_parser(extract_result)
        return (Signal(r[:, 0], sampleRate=50), Signal(r[:, 1], sampleRate=50))
コード例 #6
0
def main():
    # Arguments
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-p',
        '--phase',
        help='phase: training or testing (default: %(default)s',
        type=str,
        default='testing')

    #arguments for training
    parser.add_argument('-t',
                        '--model_type',
                        help='model type: seg or pnn (default: %(default)s',
                        type=str,
                        default='seg')
    parser.add_argument(
        '-d',
        '--data_type',
        help='data type: audio or symbolic (default: %(default)s',
        type=str,
        default='audio')
    parser.add_argument('-da',
                        '--dataset_path',
                        nargs='+',
                        help='path to data set (default: %(default)s',
                        type=str,
                        default='dataset')
    parser.add_argument('-la',
                        '--label_path',
                        nargs='+',
                        help='path to data set label (default: %(default)s',
                        type=str,
                        default='dataset_label')
    parser.add_argument('-ms',
                        '--model_path_symbolic',
                        help='path to symbolic model (default: %(default)s',
                        type=str,
                        default='model_symbolic')

    parser.add_argument(
        '-w',
        '--window_width',
        help='width of the input feature (default: %(default)s',
        type=int,
        default=128)
    parser.add_argument(
        '-b',
        '--batch_size_train',
        help='batch size during training (default: %(default)s',
        type=int,
        default=12)
    parser.add_argument('-e',
                        '--epoch',
                        help='number of epoch (default: %(default)s',
                        type=int,
                        default=5)
    parser.add_argument('-n',
                        '--steps',
                        help='number of step per epoch (default: %(default)s',
                        type=int,
                        default=6000)

    parser.add_argument('-o',
                        '--output_model_name',
                        help='name of the output model (default: %(default)s',
                        type=str,
                        default="out")

    #arguments for testing
    parser.add_argument('-m',
                        '--model_path',
                        help='path to existing model (default: %(default)s',
                        type=str,
                        default='transfer_audio_directly')
    parser.add_argument('-i',
                        '--input_file',
                        help='path to input file (default: %(default)s',
                        type=str,
                        default='train01.wav')
    parser.add_argument('-bb',
                        '--batch_size_test',
                        help='batch size during testing (default: %(default)s',
                        type=int,
                        default=10)

    args = parser.parse_args()
    print(args)

    if (args.phase == "training"):
        #arguments setting
        TIMESTEPS = args.window_width

        #dataset_path = ["medleydb_48bin_all_4features", "mir1k_48bin_all_4features"]
        #label_path = ["medleydb_48bin_all_4features_label", "mir1k_48bin_all_4features_label"]
        dataset_path = args.dataset_path
        label_path = args.label_path

        # load or create model
        if ("seg" in args.model_type):
            model = seg(multi_grid_layer_n=1,
                        feature_num=384,
                        input_channel=1,
                        timesteps=TIMESTEPS)
        elif ("pnn" in args.model_type):
            model = seg_pnn(multi_grid_layer_n=1,
                            feature_num=384,
                            timesteps=TIMESTEPS,
                            prev_model=args.model_path_symbolic)

        model.compile(optimizer="adam",
                      loss={'prediction': sparse_loss},
                      metrics=['accuracy'])

        #train
        train_audio(model, args.epoch, args.steps, args.batch_size_train,
                    args.window_width, dataset_path, label_path)

        #save model
        save_model(model, args.output_model_name)
    else:
        # load wav
        song = args.input_file

        # Feature extraction
        feature = feature_extraction(song)
        feature = np.transpose(feature[0:4], axes=(2, 1, 0))

        # load model
        model = load_model(args.model_path)

        # Inference
        print(feature[:, :, 0].shape)
        extract_result = inference(feature=feature[:, :, 0],
                                   model=model,
                                   batch_size=args.batch_size_test)

        # Output
        r = matrix_parser(extract_result)

        np.savetxt("out_seg.txt", r)
def main(args):
    # Pre-process features
    assert (os.path.isfile(args.input_audio)
            ), "The given path is not a file!. Please check your input again."
    print("Processing features")
    Z, tfrL0, tfrLF, tfrLQ, t, cenf, f = feature_extraction(args.input_audio)

    # Post-process feature according to the configuration of model
    feature_type, channels, out_class, timesteps = model_info(args.model_path)
    if feature_type == "HCFP":
        assert (len(channels) == (args.num_harmonics * 2 + 2))

        spec = []
        ceps = []
        for i in range(args.num_harmonics):
            spec.append(fetch_harmonic(tfrL0, cenf, i))
            ceps.append(fetch_harmonic(tfrLQ, cenf, i))

        spec = np.transpose(np.array(spec), axes=(2, 1, 0))
        ceps = np.transpose(np.array(ceps), axes=(2, 1, 0))

        feature = np.dstack((spec, ceps))
    else:
        assert (len(channels) <= 4)

        feature = np.array([Z, tfrL0, tfrLF, tfrLQ])
        feature = np.transpose(feature, axes=(2, 1, 0))

    model = load_model(args.model_path)

    print("Predicting...")
    pred = predict(feature,
                   model,
                   timesteps=timesteps,
                   channels=channels,
                   instruments=out_class - 1)
    p_out = h5py.File("pred.hdf", "w")
    p_out.create_dataset("0", data=pred)
    p_out.close()

    for i in range(pred.shape[2]):
        pred[:, :88, i] = peak_picking(pred[:, :, i])
    pred = pred[:, :88]

    # Print figure
    base_path = args.input_audio[:args.input_audio.rfind("/")]
    save_name = os.path.join(base_path, args.output_fig_name)

    plot_range = range(500, 1500)
    if max(plot_range) > len(pred):
        plot_range = range(0, len(pred))
    pp = pred[plot_range]

    if out_class >= 11:
        assert (out_class == 12
                ), "There is something wrong with the configuration. \
                                Expected value: 12, Current value: {}".format(
                    out_class)
        titles = MusicNet_Instruments
    else:
        assert (out_class == 2
                ), "There is something wrong with the configuration. \
                               Expected value: 2, Current value: {}".format(
                    out_class)
        titles = ["Piano"]

    print("Ploting figure...")
    #PLOT(pp, save_name, plot_range, titles=titles)
    print("Output figure to {}".format(base_path))

    if args.to_midi is not None:
        midi_path = args.to_midi

        threshold = [0.45, 0.5]
        for th in threshold:
            midi = to_midi(pred, midi_path + "_" + str(th), threshold=th)

            roll = midi.get_piano_roll()
            print("Shape of output midi roll: ", roll.shape)