def predict(feature,
            model,
            threshold=None,
            full_predict=True,
            MAX_FRAME=9000,
            channels=[0],
            timesteps=128,
            instruments=1):

    original_v = True if threshold == None else False
    #if original_v:
    #    print("Keeping original prediction value")

    if len(feature) > MAX_FRAME:
        overlap = 4

        len_f = len(feature)
        turns = int(np.ceil(len_f / MAX_FRAME))
        pred = []
        #print("Total sub rounds: " + str(turns))

        padding = np.zeros(((overlap, ) + feature.shape[1:]))
        feature = np.concatenate((padding, feature, padding), axis=0)

        for j in trange(turns, desc='A piece', leave=False):
            if j != (turns - 1):
                sub_feature = feature[j * MAX_FRAME:(j + 1) * MAX_FRAME +
                                      2 * overlap]
            else:
                sub_feature = feature[j * MAX_FRAME:]

            tmp_pred = inference(feature=sub_feature[:, :, channels],
                                 model=model,
                                 batch_size=5,
                                 timestep=timesteps,
                                 threshold=threshold,
                                 isMPE=True,
                                 original_v=original_v,
                                 channel=len(channels),
                                 instruments=instruments,
                                 keep_progress=False)
            if j == 0:
                pred = tmp_pred[overlap:-overlap]
            else:
                pred = np.concatenate((pred, tmp_pred[overlap:-overlap]),
                                      axis=0)

            if not full_predict:
                break

    else:
        pred = inference(feature=feature[:, :, channels],
                         model=model,
                         threshold=threshold,
                         isMPE=True,
                         original_v=original_v,
                         channel=len(channels),
                         instruments=instruments)
    return pred
def testing(args, model=None):
    # load wav
    song = args.input_file
    x, fs = sf.read(song)
    results = None
    if args.jetson:
        sample_ptr = 0
        while sample_ptr < x.shape[0]:
            chunk_end = min(sample_ptr + MAX_LEN, x.shape[0] - 1)
            chunk = x[sample_ptr:chunk_end, :]
            sample_ptr += MAX_LEN

            # Feature extraction
            feature = feature_extraction(chunk, fs)
            feature = np.transpose(feature[0:4], axes=(2, 1, 0))

            # load model
            if model is None:
                model = load_model(args.model_path)

            # Inference
            print(feature[:, :, 0].shape)
            extract_result = inference(feature=feature[:, :, 0],
                                       model=model,
                                       batch_size=args.batch_size_test)

            # Output
            r = matrix_parser(extract_result)

            if results is None:
                results = r
            else:
                results = np.concatenate((results, r))
    else:
        # Feature extraction
        feature = feature_extraction(x, fs)
        feature = np.transpose(feature[0:4], axes=(2, 1, 0))

        # load model
        if model is None:
            model = load_model(args.model_path)

        # Inference
        print(feature[:, :, 0].shape)
        extract_result = inference(feature=feature[:, :, 0],
                                   model=model,
                                   batch_size=args.batch_size_test)

        # Output
        results = matrix_parser(extract_result)

    np.savetxt(args.output_file + ".txt", results)
    print("FINISHED")
def extract_melody(y, sr, model="Seg"):

    # Feature extraction
    feature = feature_extraction(y, sr)
    feature = np.transpose(feature[0:4], axes=(2, 1, 0))

    # load model
    model = load_model(model)

    # Inference
    print(feature[:, :, 0].shape)
    extract_result = inference(feature=feature[:, :, 0],
                               model=model,
                               batch_size=10)

    # Output
    r = matrix_parser(extract_result)

    return r
예제 #4
0
    def predictOne(self, path: str):
        """
        method copied from the main file in the project
        """
        # pkg_resources.()
        # project = importlib.import_module("vendors.Vocal-Melody-Extraction.project")
        from project.MelodyExt import feature_extraction
        from project.utils import load_model, save_model, matrix_parser
        from project.test import inference
        from project.model import seg, seg_pnn, sparse_loss
        from project.train import train_audio

        # load wav
        song = path

        # Feature extraction
        feature = feature_extraction(song)
        feature = np.transpose(feature[0:4], axes=(2, 1, 0))

        # load model

        model = load_model(
            resource_filename(
                __name__,
                "../../../vendors/Vocal-Melody-Extraction/Pretrained_models/" +
                self.parameters["model"].value))
        batch_size_test = 10
        # Inference
        print(feature[:, :, 0].shape)
        extract_result = inference(feature=feature[:, :, 0],
                                   model=model,
                                   batch_size=batch_size_test)

        # Output
        r = matrix_parser(extract_result)
        return (Signal(r[:, 0], sampleRate=50), Signal(r[:, 1], sampleRate=50))
def main():
    # Arguments
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-p',
        '--phase',
        help='phase: training or testing (default: %(default)s',
        type=str,
        default='testing')

    #arguments for training
    parser.add_argument('-t',
                        '--model_type',
                        help='model type: seg or pnn (default: %(default)s',
                        type=str,
                        default='seg')
    parser.add_argument(
        '-d',
        '--data_type',
        help='data type: audio or symbolic (default: %(default)s',
        type=str,
        default='audio')
    parser.add_argument('-da',
                        '--dataset_path',
                        nargs='+',
                        help='path to data set (default: %(default)s',
                        type=str,
                        default='dataset')
    parser.add_argument('-la',
                        '--label_path',
                        nargs='+',
                        help='path to data set label (default: %(default)s',
                        type=str,
                        default='dataset_label')
    parser.add_argument('-ms',
                        '--model_path_symbolic',
                        help='path to symbolic model (default: %(default)s',
                        type=str,
                        default='model_symbolic')

    parser.add_argument(
        '-w',
        '--window_width',
        help='width of the input feature (default: %(default)s',
        type=int,
        default=128)
    parser.add_argument(
        '-b',
        '--batch_size_train',
        help='batch size during training (default: %(default)s',
        type=int,
        default=12)
    parser.add_argument('-e',
                        '--epoch',
                        help='number of epoch (default: %(default)s',
                        type=int,
                        default=5)
    parser.add_argument('-n',
                        '--steps',
                        help='number of step per epoch (default: %(default)s',
                        type=int,
                        default=6000)

    parser.add_argument('-o',
                        '--output_model_name',
                        help='name of the output model (default: %(default)s',
                        type=str,
                        default="out")

    #arguments for testing
    parser.add_argument('-m',
                        '--model_path',
                        help='path to existing model (default: %(default)s',
                        type=str,
                        default='transfer_audio_directly')
    parser.add_argument('-i',
                        '--input_file',
                        help='path to input file (default: %(default)s',
                        type=str,
                        default='train01.wav')
    parser.add_argument('-bb',
                        '--batch_size_test',
                        help='batch size during testing (default: %(default)s',
                        type=int,
                        default=10)

    args = parser.parse_args()
    print(args)

    if (args.phase == "training"):
        #arguments setting
        TIMESTEPS = args.window_width

        #dataset_path = ["medleydb_48bin_all_4features", "mir1k_48bin_all_4features"]
        #label_path = ["medleydb_48bin_all_4features_label", "mir1k_48bin_all_4features_label"]
        dataset_path = args.dataset_path
        label_path = args.label_path

        # load or create model
        if ("seg" in args.model_type):
            model = seg(multi_grid_layer_n=1,
                        feature_num=384,
                        input_channel=1,
                        timesteps=TIMESTEPS)
        elif ("pnn" in args.model_type):
            model = seg_pnn(multi_grid_layer_n=1,
                            feature_num=384,
                            timesteps=TIMESTEPS,
                            prev_model=args.model_path_symbolic)

        model.compile(optimizer="adam",
                      loss={'prediction': sparse_loss},
                      metrics=['accuracy'])

        #train
        train_audio(model, args.epoch, args.steps, args.batch_size_train,
                    args.window_width, dataset_path, label_path)

        #save model
        save_model(model, args.output_model_name)
    else:
        # load wav
        song = args.input_file

        # Feature extraction
        feature = feature_extraction(song)
        feature = np.transpose(feature[0:4], axes=(2, 1, 0))

        # load model
        model = load_model(args.model_path)

        # Inference
        print(feature[:, :, 0].shape)
        extract_result = inference(feature=feature[:, :, 0],
                                   model=model,
                                   batch_size=args.batch_size_test)

        # Output
        r = matrix_parser(extract_result)

        np.savetxt("out_seg.txt", r)