Esempio n. 1
0
def process_feature_song_list(dataset_name,
                              song_list,
                              harmonic=False,
                              num_harmonic=0):

    fs = 44100
    if harmonic:
        freq_range = [1.0, fs / 2]
    else:
        freq_range = [27.5, 4487.0]
    hdf_out = h5py.File(dataset_name + ".hdf", "w")

    for idx, song in enumerate(song_list):
        print("Extracting({}/{}): {}".format(idx + 1, len(song_list), song))

        out = feature_extraction(song,
                                 fc=freq_range[0],
                                 tc=(1 / freq_range[1]),
                                 Down_fs=fs)
        cenf = out[5]
        #z, spec, gcos, ceps, cenf = out[0:5]

        piece = np.transpose(np.array(out[0:4]), axes=(2, 1, 0))

        if harmonic:
            # Harmonic spectrum
            har = []
            for i in range(num_harmonic + 1):
                har.append(fetch_harmonic(out[1], cenf, i))
            har_s = np.transpose(np.array(har), axes=(2, 1, 0))

            # Harmonic GCoS
            har = []
            for i in range(num_harmonic + 1):
                har.append(fetch_harmonic(out[2], cenf, i))
            har_g = np.transpose(np.array(har), axes=(2, 1, 0))

            # Harmonic cepstrum
            har = []
            for i in range(num_harmonic + 1):
                har.append(fetch_harmonic(out[3], cenf, i, is_reverse=True))
            har_c = np.transpose(np.array(har), axes=(2, 1, 0))

            piece = np.dstack((har_s, har_g, har_c))

        key = os.path.basename(song)
        key = key.replace(".wav", "")
        hdf_out.create_dataset(key,
                               data=piece,
                               compression="gzip",
                               compression_opts=5)

    hdf_out.close()
Esempio n. 2
0
def main(args):
    # Pre-process features
    assert (
        os.path.isfile(args.input_audio)
    ), "The given path is not a file!. Please check your input again. Given input: {}".format(
        audio.input_audio)
    print("Processing features of input audio: {}".format(args.input_audio))
    Z, tfrL0, tfrLF, tfrLQ, t, cenf, f = feature_extraction(args.input_audio)

    # Post-process feature according to the configuration of model
    feature_type, channels, out_class, timesteps = model_info(args.model_path)
    if feature_type == "HCFP":
        assert (len(channels) == (args.num_harmonics * 2 + 2))

        spec = []
        ceps = []
        for i in range(args.num_harmonics):
            spec.append(fetch_harmonic(tfrL0, cenf, i))
            ceps.append(fetch_harmonic(tfrLQ, cenf, i))

        spec = np.transpose(np.array(spec), axes=(2, 1, 0))
        ceps = np.transpose(np.array(ceps), axes=(2, 1, 0))

        feature = np.dstack((spec, ceps))
    else:
        assert (len(channels) <= 4)

        feature = np.array([Z, tfrL0, tfrLF, tfrLQ])
        feature = np.transpose(feature, axes=(2, 1, 0))

    model = load_model(args.model_path)
    print("Predicting...")
    #pred = predict(feature[:,:,channels], model, timesteps, out_class, batch_size=4, overlap_ratio=2/4)
    pred = predict_v1(feature[:, :, channels], model, timesteps, batch_size=4)

    #p_out = h5py.File("pred.hdf", "w")
    #p_out.create_dataset("0", data=pred)
    #p_out.close()

    midi = MultiPostProcess(pred,
                            mode="note",
                            onset_th=args.onset_th,
                            dura_th=0.5,
                            frm_th=3,
                            inst_th=1.1,
                            t_unit=0.02)

    if args.to_midi is not None:
        midi.write(args.to_midi)
        print("Midi written as {}".format(args.to_midi))
Esempio n. 3
0
def main():
    parser = create_parser()
    args = parser.parse_args()

    # Pre-process features
    assert (
        os.path.isfile(args.input_audio)
    ), f"The given path is not a file!. Please check your input again. Given input: {args.input_audio}"
    print("Processing features of input audio: {}".format(args.input_audio))
    Z, tfrL0, tfrLF, tfrLQ, t, cenf, f = feature_extraction(args.input_audio)

    # Load pre-trained model
    minfo = ModelInfo()
    model = minfo.load_model(args.model_path)
    __import__('ipdb').set_trace()
    minfo.onset_th = minfo.onset_th if args.onset_th is None else args.onset_th
    print(minfo)

    # Post-process feature according to the configuration of model
    if minfo.feature_type == "HCFP":
        assert (len(minfo.input_channels) == (HarmonicNum * 2 + 2))

        spec = []
        ceps = []
        for i in range(HarmonicNum + 1):
            spec.append(fetch_harmonic(tfrL0, cenf, i))
            ceps.append(fetch_harmonic(tfrLQ, cenf, i))

        spec = np.transpose(np.array(spec), axes=(2, 1, 0))
        ceps = np.transpose(np.array(ceps), axes=(2, 1, 0))

        feature = np.dstack((spec, ceps))
    else:
        assert (len(minfo.input_channels) <= 4)

        feature = np.array([Z, tfrL0, tfrLF, tfrLQ])
        feature = np.transpose(feature, axes=(2, 1, 0))

    print("Predicting...")
    pred = predict_v1(feature[:, :, minfo.input_channels],
                      model,
                      minfo.timesteps,
                      batch_size=4)

    mode_mapping = {
        "frame": "true_frame",
        "frame_onset": "note",
        "multi_instrument_frame": "true_frame",
        "multi_instrument_note": "note"
    }

    midi = MultiPostProcess(pred,
                            mode=mode_mapping[minfo.label_type],
                            onset_th=minfo.onset_th,
                            dura_th=minfo.dura_th,
                            frm_th=minfo.frm_th,
                            inst_th=minfo.inst_th,
                            t_unit=0.02)

    if args.to_midi is not None:
        midi.write(args.to_midi)
        print("Midi written as {}".format(args.to_midi))