def create_midi_file(gt_midi, model_output_dir, midi_output_dir, filename, step='time', max_len=30):
    """
    Convert csv piano rolls into MIDI files, for a single given MIDI file.
    
    Parameters
    ==========
    gt_midi : string
        The directory containing all of the ground truth MIDI files there are csv files for.
        
    model_output_dir : string
        The directory containing the output csv files to convert.
        
    midi_output_dir : string
        The directory to write the resulting MIDI files to.
        
    filename : string
        The MAPS filename of the MIDI file to convert.
        
    step : string
        The frame step type to use for conversion. Either "time" (default), "quant", or "event".
        
    max_len : int
        The number of seconds of each file to convert. Defaults to 30.
    """
    data = DataMaps()
    data.make_from_file(os.path.join(gt_midi, filename), step, [0, max_len], acoustic_model="kelz")

    csv_path = os.path.join(model_output_dir, filename.replace('.mid', '_pr.csv'))
    roll = np.loadtxt(csv_path)
    roll_time = convert_note_to_time(roll, data.corresp, data.input_fs, max_len=max_len)
    midi_data = make_midi_from_roll(roll_time, 25)

    output_filename = os.path.join(midi_output_dir, get_name_from_maps(filename) + '_' + filename[-6:-4])
    save_midi(midi_data, output_filename + '.mid')
Exemple #2
0
def test(params):
    global priors
    global step
    global order
    global with_offset
    print(params)
    
    transitions = np.zeros((88, 2 ** order, 2))
    for i in range(88):
        transitions[i, :, 0] = params
        transitions[i, :, 1] = 1 - transitions[i, :, 0]
    
    frames = np.zeros((0, 3))
    notes = np.zeros((0, 3))
    
    folder = "data/outputs-20/valid"
    for file in glob.glob(os.path.join(folder, "*.mid")):
        print(file)
        sys.stdout.flush()
        
        data = DataMaps()
        data.make_from_file(file, step, [0, 30], acoustic_model="kelz")
        
        pr = hmm_eval.decode_all_pitches(data.input, priors, transitions)

        if step != "time":
            pr = convert_note_to_time(pr, data.corresp, data.input_fs, max_len=30)

        data = DataMaps()
        data.make_from_file(file, "time", section=[0, 30], acoustic_model="kelz")
        target = data.target

        #Evaluate
        P_f,R_f,F_f = compute_eval_metrics_frame(pr, target)
        P_n,R_n,F_n = compute_eval_metrics_note(pr, target, min_dur=0.05, with_offset=with_offset)
        
        print(f"Frame P,R,F: {P_f:.3f},{R_f:.3f},{F_f:.3f}, Note P,R,F: {P_n:.3f},{R_n:.3f},{F_n:.3f}")
        sys.stdout.flush()

        frames = np.vstack((frames, [P_f, R_f, F_f]))
        notes = np.vstack((notes, [P_n, R_n, F_n]))

    P_f, R_f, F_f = np.mean(frames, axis=0)
    P_n, R_n, F_n = np.mean(notes, axis=0)
    
    print(f"Frame P,R,F: {P_f:.3f},{R_f:.3f},{F_f:.3f}, Note P,R,F: {P_n:.3f},{R_n:.3f},{F_n:.3f}")
    print(str(F_n) + ": " + str(params))
    sys.stdout.flush()
    
    out = "hmm/models/" + step + "." + str(order) + "." + str(F_n) + ".pkl"
    with open(out, "wb") as file:
        pickle.dump({"priors" : priors,
                     "transitions" : transitions}, file)
    
    
    return -F_n
Exemple #3
0
def weight_search(params, num=0, verbose=False):
    print(params)
    sys.stdout.flush()

    gt = params[0]
    min_diff = params[1]
    history = int(params[2])
    num_layers = int(params[3])
    is_weight = params[4]
    features = params[5]

    history_context = 0
    prior_context = 0

    if len(params) > 6:
        history_context = params[6]
        prior_context = params[7]

    use_lstm = True
    if len(params) > 8:
        use_lstm = params[8]

    warnings.filterwarnings("ignore", message="tick should be an int.")

    max_len = 30
    section = [0, max_len]

    note_range = [21, 109]
    note_min = note_range[0]
    note_max = note_range[1]

    # Load model
    model = model_dict['model']
    sess = model_dict['sess']

    # Get weight_model data
    pkl = data_dict['gt' if gt else 'beam']

    X = pkl['X']
    Y = pkl['Y']
    D = pkl['D']
    max_history = pkl['history']
    no_mlm = pkl['no_mlm'] if 'no_mlm' in pkl else False

    if np.max(D) < min_diff:
        print("No training data generated")
        sys.stdout.flush()
        return 0.0

    data_points = np.where(D > min_diff)
    data_features = []

    if history > 0:
        data_features.extend(range(max_history - history, max_history))

    if features:
        data_features.extend(range(max_history, len(X[0]) - 2))

    data_features.append(-2)

    if use_lstm:
        data_features.append(-1)

    X = X[:, data_features]

    if prior_context + history_context > 0:
        X_new = np.zeros(
            (X.shape[0],
             X.shape[1] + prior_context * 4 + 2 * history_context * history))

        for i in range(int(X.shape[0] / 88)):
            x_frame = X[88 * i:88 * (i + 1), :]

            X_new[88 * i:88 * (i + 1), :] = pad_x(x_frame, x_frame[:, -2],
                                                  x_frame[:, -1],
                                                  x_frame[:, :history],
                                                  history, history_context,
                                                  prior_context)

        X = X_new

    X = X[data_points]
    Y = Y[data_points]

    if len(X) == 0:
        print("No training data generated")
        sys.stdout.flush()
        return 0.0

    # Train weight model
    print("Training weight model")
    sys.stdout.flush()
    layers = []
    for i in range(num_layers):
        layers.append(5)

    weight_model = train_model(X, Y, layers=layers, weight=is_weight)

    global most_recent_model
    most_recent_model = {
        'model': weight_model,
        'history': history,
        'features': features,
        'weight': is_weight,
        'history_context': history_context,
        'prior_context': prior_context,
        'use_lstm': use_lstm,
        'no_mlm': no_mlm
    }

    weight_model_name = "weight_model."
    weight_model_name += "gt" if gt else "b10"
    weight_model_name += "_md" + str(min_diff)
    weight_model_name += "_h" + str(history)
    weight_model_name += "_l" + str(num_layers)
    if features:
        weight_model_name += "_f"
    weight_model_name += "_hc" + str(history_context)
    weight_model_name += "_pc" + str(prior_context)
    if not use_lstm:
        weight_model_name += "_noLSTM"
    if no_mlm:
        weight_model_name += "_noMLM"
    weight_model_name += "_weight" if is_weight else "_prior"
    weight_model_name += "." + global_params['step'] + "." + str(num) + ".pkl"

    # Write out weight model
    with open(os.path.join(global_params['model_out'], weight_model_name),
              "wb") as file:
        pickle.dump(most_recent_model, file)

    results = {}
    frames = np.zeros((0, 3))
    notes = np.zeros((0, 3))

    for filename in glob.glob(os.path.join(data_dict['valid'], "*.mid")):
        print(filename)
        sys.stdout.flush()

        data = DataMaps()
        data.make_from_file(filename,
                            global_params['step'],
                            section,
                            acoustic_model=global_params['acoustic'])

        # Decode
        pr, priors, weights, combined_priors = decode(
            data.input,
            model,
            sess,
            branch_factor=5,
            beam_size=50,
            weight=[[0.8], [0.2]],
            out=None,
            hash_length=12,
            weight_model=weight_model,
            verbose=verbose,
            weight_model_dict=most_recent_model)

        if global_params['step'] != "time":
            pr = convert_note_to_time(pr,
                                      data.corresp,
                                      data.input_fs,
                                      max_len=max_len)

        data = DataMaps()
        data.make_from_file(filename,
                            "time",
                            section=section,
                            acoustic_model=global_params['acoustic'])
        target = data.target

        #Evaluate
        P_f, R_f, F_f = compute_eval_metrics_frame(pr, target)
        P_n, R_n, F_n = compute_eval_metrics_note(pr, target, min_dur=0.05)

        print(
            f"Frame P,R,F: {P_f:.3f},{R_f:.3f},{F_f:.3f}, Note P,R,F: {P_n:.3f},{R_n:.3f},{F_n:.3f}"
        )
        sys.stdout.flush()

        frames = np.vstack((frames, [P_f, R_f, F_f]))
        notes = np.vstack((notes, [P_n, R_n, F_n]))

        if F_n < global_params['early_exit']:
            print("Early stopping, F-measure too low.")
            sys.stdout.flush()
            return 0.0

    P_f, R_f, F_f = np.mean(frames, axis=0)
    P_n, R_n, F_n = np.mean(notes, axis=0)

    print(
        f"Frame P,R,F: {P_f:.3f},{R_f:.3f},{F_f:.3f}, Note P,R,F: {P_n:.3f},{R_n:.3f},{F_n:.3f}"
    )
    print(str(F_n) + ": " + str(params))
    sys.stdout.flush()
    return -F_n
Exemple #4
0
        input_roll = np.loadtxt(filename_input)
        target_roll = data.target
        mask = data.get_key_profile()
        mask_octave = data.get_key_profile_octave()

        # import matplotlib.pyplot as plt
        # plt.imshow(mask, aspect='auto')
        # plt.show(block=[bool])

        if args.step in ['quant', 'quant_short', 'event']:
            data_quant = DataMaps()
            data_quant.make_from_file(filename_target,
                                      args.step, [0, 30],
                                      acoustic_model='kelz')
            input_roll = convert_note_to_time(input_roll,
                                              data_quant.corresp,
                                              data_quant.input_fs,
                                              max_len=30)
        if args.step == 'beat':
            data_quant = DataMapsBeats()
            data_quant.make_from_file(filename_target,
                                      args.beat_gt,
                                      args.beat_subdiv, [0, 30],
                                      acoustic_model='kelz')
            input_roll = convert_note_to_time(input_roll,
                                              data_quant.corresp,
                                              data_quant.input_fs,
                                              max_len=30)
        if args.step == 'time' and args.with_quant:
            data_quant = DataMaps()
            data_quant.make_from_file(filename_target,
                                      'quant', [0, 30],




filename = os.path.join(folder,filename)

data = DataMaps()
data.make_from_file(filename,'time',section=[0,30], acoustic_model='kelz')
roll_gt = data.target

data_est = DataMapsBeats()
data_est.make_from_file(filename,False,section=[0,30], acoustic_model='kelz')
est_roll = np.loadtxt(os.path.join(folder_est,os.path.basename(filename).replace('.mid','_pr.csv')))
# est_roll = (data_est.input>0.5).astype(float)
roll_time_est = convert_note_to_time(est_roll,data_est.corresp,25,30)

data_gt = DataMapsBeats()
data_gt.make_from_file(filename,True,section=[0,30], acoustic_model='kelz')
gt_roll = np.loadtxt(os.path.join(folder_gt,os.path.basename(filename).replace('.mid','_pr.csv')))
# gt_roll = (data_gt.input>0.5).astype(float)
roll_time_gt = convert_note_to_time(gt_roll,data_gt.corresp,25,30)

print(est_roll.shape)
print(gt_roll.shape)

# plt.subplot(221)
# plt.imshow(data_est.input,aspect='auto',origin='lower')
# plt.subplot(222)
# plt.imshow(est_roll,aspect='auto',origin='lower')
#
Exemple #6
0
            data = DataMapsBeats()
            data.make_from_file(file,args.beat_gt,args.beat_subdiv,section, acoustic_model="kelz")
        else:
            data = DataMaps()
            data.make_from_file(file, args.step, section,acoustic_model="kelz")


        pr = decode_all_pitches(data.input, priors, transitions)

        # Save output
        if not args.save is None:
            np.save(os.path.join(args.save, base.replace('.mid','_pr')), pr)
            np.savetxt(os.path.join(args.save, base.replace('.mid','_pr.csv')), pr)

        if args.step in ['quant','event','beat']:
            pr = convert_note_to_time(pr, data.corresp, data.input_fs, max_len=max_len)

        data = DataMaps()
        data.make_from_file(file, "time", section=section, acoustic_model="kelz")
        target = data.target

        #Evaluate
        P_f,R_f,F_f = compute_eval_metrics_frame(pr, target)
        P_n,R_n,F_n = compute_eval_metrics_note(pr, target, min_dur=0.05)

        frames = np.vstack((frames, [P_f, R_f, F_f]))
        notes = np.vstack((notes, [P_n, R_n, F_n]))

        print(f"Frame P,R,F: {P_f:.3f},{R_f:.3f},{F_f:.3f}, Note P,R,F: {P_n:.3f},{R_n:.3f},{F_n:.3f}")
        sys.stdout.flush()
def weight_search(params, num=0, verbose=False):
    global global_params
    print(params)
    sys.stdout.flush()

    # Parse params
    min_diff = params[0]
    history = int(params[1])
    num_layers = int(params[2])
    is_weight = params[3]
    features = params[4]

    warnings.filterwarnings("ignore", message="tick should be an int.")

    max_len = 30
    section = [0, max_len]

    # Load model
    model = model_dict['model']
    sess = model_dict['sess']

    # Get weight_model data
    pkl = data_dict['blending_data']

    X = pkl['X']
    Y = pkl['Y']
    D = pkl['D']
    max_history = pkl['history']
    features_available = pkl['features']
    with_onsets = pkl['with_onsets']

    # Filter data for min_diff
    X, Y = filter_data_by_min_diff(
        X, Y,
        np.maximum(D[:, 0], D[:, 1]) if with_onsets else D, min_diff)
    if len(X) == 0:
        print("No training data generated.")
        sys.stdout.flush()
        return 0.0

    # Filter X for desired input fields
    X = filter_X_features(X, history, max_history, features,
                          features_available, with_onsets)

    # Ablate X
    X = ablate(X, global_params['ablate'], with_onsets=with_onsets)

    history = min(history, max_history)
    if features and not features_available:
        features = False

    # Train weight model
    print("Training weight model")
    sys.stdout.flush()
    layers = []
    for i in range(num_layers):
        layers.append(10 if with_onsets else 5)

    weight_model = train_model(X,
                               Y,
                               layers=layers,
                               weight=is_weight,
                               with_onsets=with_onsets)

    # Save model
    global most_recent_model
    most_recent_model = {
        'model': weight_model,
        'history': history,
        'features': features,
        'weight': is_weight,
        'with_onsets': with_onsets,
        'ablate': global_params['ablate']
    }

    weight_model_name = get_filename(min_diff, history, num_layers, features,
                                     with_onsets, is_weight,
                                     global_params['step'])

    # Write out weight model
    with open(os.path.join(global_params['model_out'], weight_model_name),
              "wb") as file:
        pickle.dump(most_recent_model, file)

    # Evaluation
    results = {}
    frames = np.zeros((0, 3))
    notes = np.zeros((0, 3))

    for filename in sorted(glob.glob(os.path.join(data_dict['valid'],
                                                  "*.mid"))):
        print(filename)
        sys.stdout.flush()

        if global_params['step'] == 'beat':
            data = DataMapsBeats()
            data.make_from_file(filename,
                                global_params['beat_gt'],
                                global_params['beat_subdiv'],
                                section,
                                acoustic_model=global_params['acoustic'],
                                with_onsets=with_onsets)
        else:
            data = DataMaps()
            data.make_from_file(filename,
                                global_params['step'],
                                section,
                                acoustic_model=global_params['acoustic'],
                                with_onsets=with_onsets)

        # Decode
        input_data = data.input
        if with_onsets:
            input_data = np.zeros(
                (data.input.shape[0] * 2, data.input.shape[1]))
            input_data[:data.input.shape[0], :] = data.input[:, :, 0]
            input_data[data.input.shape[0]:, :] = data.input[:, :, 1]

        # Add noise
        input_data = add_noise_to_input_data(input_data, data_dict['noise'],
                                             data_dict['noise_gauss'])

        pr, priors, weights, combined_priors = decode(
            input_data,
            model,
            sess,
            branch_factor=5,
            beam_size=50,
            weight=[[0.8], [0.2]],
            out=None,
            hash_length=12,
            weight_model=weight_model,
            verbose=verbose,
            weight_model_dict=most_recent_model)

        # Evaluate
        if with_onsets:
            target_data = pm.PrettyMIDI(filename)
            corresp = data.corresp
            [P_f, R_f,
             F_f], [P_n, R_n, F_n
                    ], _, _ = compute_eval_metrics_with_onset(pr,
                                                              corresp,
                                                              target_data,
                                                              double_roll=True,
                                                              min_dur=0.05,
                                                              section=section)

        else:
            if global_params['step'] in [
                    'quant', 'event', 'quant_short', 'beat'
            ]:
                pr = convert_note_to_time(pr,
                                          data.corresp,
                                          data.input_fs,
                                          max_len=max_len)

            data = DataMaps()
            if global_params['step'] == "20ms" or with_onsets:
                data.make_from_file(filename,
                                    "20ms",
                                    section=section,
                                    with_onsets=False,
                                    acoustic_model="kelz")
            else:
                data.make_from_file(filename,
                                    "time",
                                    section=section,
                                    with_onsets=False,
                                    acoustic_model="kelz")
            target = data.target

            #Evaluate
            P_f, R_f, F_f = compute_eval_metrics_frame(pr, target)
            P_n, R_n, F_n = compute_eval_metrics_note(pr, target, min_dur=0.05)

        print(
            f"Frame P,R,F: {P_f:.3f},{R_f:.3f},{F_f:.3f}, Note P,R,F: {P_n:.3f},{R_n:.3f},{F_n:.3f}"
        )
        sys.stdout.flush()

        frames = np.vstack((frames, [P_f, R_f, F_f]))
        notes = np.vstack((notes, [P_n, R_n, F_n]))

        if F_n < global_params['early_exit']:
            print("Early stopping, F-measure too low.")
            sys.stdout.flush()
            return 0.0

    P_f, R_f, F_f = np.mean(frames, axis=0)
    P_n, R_n, F_n = np.mean(notes, axis=0)

    print(
        f"Frame P,R,F: {P_f:.3f},{R_f:.3f},{F_f:.3f}, Note P,R,F: {P_n:.3f},{R_n:.3f},{F_n:.3f}"
    )
    print(str(F_n) + ": " + str(params))
    sys.stdout.flush()
    return -F_n