def test(params): global priors global step global order global with_offset print(params) transitions = np.zeros((88, 2 ** order, 2)) for i in range(88): transitions[i, :, 0] = params transitions[i, :, 1] = 1 - transitions[i, :, 0] frames = np.zeros((0, 3)) notes = np.zeros((0, 3)) folder = "data/outputs-20/valid" for file in glob.glob(os.path.join(folder, "*.mid")): print(file) sys.stdout.flush() data = DataMaps() data.make_from_file(file, step, [0, 30], acoustic_model="kelz") pr = hmm_eval.decode_all_pitches(data.input, priors, transitions) if step != "time": pr = convert_note_to_time(pr, data.corresp, data.input_fs, max_len=30) data = DataMaps() data.make_from_file(file, "time", section=[0, 30], acoustic_model="kelz") target = data.target #Evaluate P_f,R_f,F_f = compute_eval_metrics_frame(pr, target) P_n,R_n,F_n = compute_eval_metrics_note(pr, target, min_dur=0.05, with_offset=with_offset) print(f"Frame P,R,F: {P_f:.3f},{R_f:.3f},{F_f:.3f}, Note P,R,F: {P_n:.3f},{R_n:.3f},{F_n:.3f}") sys.stdout.flush() frames = np.vstack((frames, [P_f, R_f, F_f])) notes = np.vstack((notes, [P_n, R_n, F_n])) P_f, R_f, F_f = np.mean(frames, axis=0) P_n, R_n, F_n = np.mean(notes, axis=0) print(f"Frame P,R,F: {P_f:.3f},{R_f:.3f},{F_f:.3f}, Note P,R,F: {P_n:.3f},{R_n:.3f},{F_n:.3f}") print(str(F_n) + ": " + str(params)) sys.stdout.flush() out = "hmm/models/" + step + "." + str(order) + "." + str(F_n) + ".pkl" with open(out, "wb") as file: pickle.dump({"priors" : priors, "transitions" : transitions}, file) return -F_n
results = {} for midi_name in os.listdir(input_folder): if not midi_name.startswith('.') and midi_name.endswith('.mid'): print(midi_name) input_midi = pm.PrettyMIDI(os.path.join(input_folder, midi_name)) target_midi = pm.PrettyMIDI(os.path.join(target_folder, midi_name)) output = (input_midi.get_piano_roll(fs) > 0).astype(int) output = output[:, int(section[0] * fs):int(section[1] * fs)] target = (target_midi.get_piano_roll(fs) > 0).astype(int) target = target[:, int(section[0] * fs):int(section[1] * fs)] P_f, R_f, F_f = compute_eval_metrics_frame(output, target) notes_est, intervals_est = [], [] for note in sum([instr.notes for instr in input_midi.instruments], []): if section is None or (note.start < section[1] and note.end > section[0]): ### +21-1 because in get_notes_intervals_with_onsets, we add +1 so that pitches are not equal to 0 notes_est += [note.pitch] intervals_est += [[ max(note.start, section[0]), min(note.end, section[1]) ]] notes_est = np.array(notes_est) intervals_est = np.array(intervals_est)
def weight_search(params, num=0, verbose=False): print(params) sys.stdout.flush() gt = params[0] min_diff = params[1] history = int(params[2]) num_layers = int(params[3]) is_weight = params[4] features = params[5] history_context = 0 prior_context = 0 if len(params) > 6: history_context = params[6] prior_context = params[7] use_lstm = True if len(params) > 8: use_lstm = params[8] warnings.filterwarnings("ignore", message="tick should be an int.") max_len = 30 section = [0, max_len] note_range = [21, 109] note_min = note_range[0] note_max = note_range[1] # Load model model = model_dict['model'] sess = model_dict['sess'] # Get weight_model data pkl = data_dict['gt' if gt else 'beam'] X = pkl['X'] Y = pkl['Y'] D = pkl['D'] max_history = pkl['history'] no_mlm = pkl['no_mlm'] if 'no_mlm' in pkl else False if np.max(D) < min_diff: print("No training data generated") sys.stdout.flush() return 0.0 data_points = np.where(D > min_diff) data_features = [] if history > 0: data_features.extend(range(max_history - history, max_history)) if features: data_features.extend(range(max_history, len(X[0]) - 2)) data_features.append(-2) if use_lstm: data_features.append(-1) X = X[:, data_features] if prior_context + history_context > 0: X_new = np.zeros( (X.shape[0], X.shape[1] + prior_context * 4 + 2 * history_context * history)) for i in range(int(X.shape[0] / 88)): x_frame = X[88 * i:88 * (i + 1), :] X_new[88 * i:88 * (i + 1), :] = pad_x(x_frame, x_frame[:, -2], x_frame[:, -1], x_frame[:, :history], history, history_context, prior_context) X = X_new X = X[data_points] Y = Y[data_points] if len(X) == 0: print("No training data generated") sys.stdout.flush() return 0.0 # Train weight model print("Training weight model") sys.stdout.flush() layers = [] for i in range(num_layers): layers.append(5) weight_model = train_model(X, Y, layers=layers, weight=is_weight) global most_recent_model most_recent_model = { 'model': weight_model, 'history': history, 'features': features, 'weight': is_weight, 'history_context': history_context, 'prior_context': prior_context, 'use_lstm': use_lstm, 'no_mlm': no_mlm } weight_model_name = "weight_model." weight_model_name += "gt" if gt else "b10" weight_model_name += "_md" + str(min_diff) weight_model_name += "_h" + str(history) weight_model_name += "_l" + str(num_layers) if features: weight_model_name += "_f" weight_model_name += "_hc" + str(history_context) weight_model_name += "_pc" + str(prior_context) if not use_lstm: weight_model_name += "_noLSTM" if no_mlm: weight_model_name += "_noMLM" weight_model_name += "_weight" if is_weight else "_prior" weight_model_name += "." + global_params['step'] + "." + str(num) + ".pkl" # Write out weight model with open(os.path.join(global_params['model_out'], weight_model_name), "wb") as file: pickle.dump(most_recent_model, file) results = {} frames = np.zeros((0, 3)) notes = np.zeros((0, 3)) for filename in glob.glob(os.path.join(data_dict['valid'], "*.mid")): print(filename) sys.stdout.flush() data = DataMaps() data.make_from_file(filename, global_params['step'], section, acoustic_model=global_params['acoustic']) # Decode pr, priors, weights, combined_priors = decode( data.input, model, sess, branch_factor=5, beam_size=50, weight=[[0.8], [0.2]], out=None, hash_length=12, weight_model=weight_model, verbose=verbose, weight_model_dict=most_recent_model) if global_params['step'] != "time": pr = convert_note_to_time(pr, data.corresp, data.input_fs, max_len=max_len) data = DataMaps() data.make_from_file(filename, "time", section=section, acoustic_model=global_params['acoustic']) target = data.target #Evaluate P_f, R_f, F_f = compute_eval_metrics_frame(pr, target) P_n, R_n, F_n = compute_eval_metrics_note(pr, target, min_dur=0.05) print( f"Frame P,R,F: {P_f:.3f},{R_f:.3f},{F_f:.3f}, Note P,R,F: {P_n:.3f},{R_n:.3f},{F_n:.3f}" ) sys.stdout.flush() frames = np.vstack((frames, [P_f, R_f, F_f])) notes = np.vstack((notes, [P_n, R_n, F_n])) if F_n < global_params['early_exit']: print("Early stopping, F-measure too low.") sys.stdout.flush() return 0.0 P_f, R_f, F_f = np.mean(frames, axis=0) P_n, R_n, F_n = np.mean(notes, axis=0) print( f"Frame P,R,F: {P_f:.3f},{R_f:.3f},{F_f:.3f}, Note P,R,F: {P_n:.3f},{R_n:.3f},{F_n:.3f}" ) print(str(F_n) + ": " + str(params)) sys.stdout.flush() return -F_n
max_len=30) if args.step == 'time' and args.with_quant: data_quant = DataMaps() data_quant.make_from_file(filename_target, 'quant', [0, 30], acoustic_model='kelz') input_roll = align_matrix(input_roll, data_quant.corresp, data_quant.input_fs, method='quant') input_roll = convert_note_to_time(input_roll, data_quant.corresp, data_quant.input_fs, max_len=30) P_f, R_f, F_f = compute_eval_metrics_frame(input_roll, target_roll) P_n, R_n, F_n = compute_eval_metrics_note( input_roll, target_roll, min_dur=0.05, with_offset=args.with_offset, min_gap=0.05 if args.gap else None) err_FP, err_tot, err_FP_o, err_tot_o = out_key_errors_binary_mask( input_roll, target_roll, mask, mask_octave) print( f"Frame P,R,F: {P_f:.3f},{R_f:.3f},{F_f:.3f}, Note P,R,F: {P_n:.3f},{R_n:.3f},{F_n:.3f}" ) print( f"Out-key-errors FP: {err_FP:.3f}, Total: {err_tot:.3f}, OctaveFP: {err_FP_o:.3f}, OctaveTotal: {err_tot_o:.3f}" )
pr = decode_all_pitches(data.input, priors, transitions) # Save output if not args.save is None: np.save(os.path.join(args.save, base.replace('.mid','_pr')), pr) np.savetxt(os.path.join(args.save, base.replace('.mid','_pr.csv')), pr) if args.step in ['quant','event','beat']: pr = convert_note_to_time(pr, data.corresp, data.input_fs, max_len=max_len) data = DataMaps() data.make_from_file(file, "time", section=section, acoustic_model="kelz") target = data.target #Evaluate P_f,R_f,F_f = compute_eval_metrics_frame(pr, target) P_n,R_n,F_n = compute_eval_metrics_note(pr, target, min_dur=0.05) frames = np.vstack((frames, [P_f, R_f, F_f])) notes = np.vstack((notes, [P_n, R_n, F_n])) print(f"Frame P,R,F: {P_f:.3f},{R_f:.3f},{F_f:.3f}, Note P,R,F: {P_n:.3f},{R_n:.3f},{F_n:.3f}") sys.stdout.flush() results[base] = [[P_f,R_f,F_f],[P_n,R_n,F_n]] P_f, R_f, F_f = np.mean(frames, axis=0) P_n, R_n, F_n = np.mean(notes, axis=0) print(f"Averages: Frame P,R,F: {P_f:.3f},{R_f:.3f},{F_f:.3f}, Note P,R,F: {P_n:.3f},{R_n:.3f},{F_n:.3f}") sys.stdout.flush()
def weight_search(params, num=0, verbose=False): global global_params print(params) sys.stdout.flush() # Parse params min_diff = params[0] history = int(params[1]) num_layers = int(params[2]) is_weight = params[3] features = params[4] warnings.filterwarnings("ignore", message="tick should be an int.") max_len = 30 section = [0, max_len] # Load model model = model_dict['model'] sess = model_dict['sess'] # Get weight_model data pkl = data_dict['blending_data'] X = pkl['X'] Y = pkl['Y'] D = pkl['D'] max_history = pkl['history'] features_available = pkl['features'] with_onsets = pkl['with_onsets'] # Filter data for min_diff X, Y = filter_data_by_min_diff( X, Y, np.maximum(D[:, 0], D[:, 1]) if with_onsets else D, min_diff) if len(X) == 0: print("No training data generated.") sys.stdout.flush() return 0.0 # Filter X for desired input fields X = filter_X_features(X, history, max_history, features, features_available, with_onsets) # Ablate X X = ablate(X, global_params['ablate'], with_onsets=with_onsets) history = min(history, max_history) if features and not features_available: features = False # Train weight model print("Training weight model") sys.stdout.flush() layers = [] for i in range(num_layers): layers.append(10 if with_onsets else 5) weight_model = train_model(X, Y, layers=layers, weight=is_weight, with_onsets=with_onsets) # Save model global most_recent_model most_recent_model = { 'model': weight_model, 'history': history, 'features': features, 'weight': is_weight, 'with_onsets': with_onsets, 'ablate': global_params['ablate'] } weight_model_name = get_filename(min_diff, history, num_layers, features, with_onsets, is_weight, global_params['step']) # Write out weight model with open(os.path.join(global_params['model_out'], weight_model_name), "wb") as file: pickle.dump(most_recent_model, file) # Evaluation results = {} frames = np.zeros((0, 3)) notes = np.zeros((0, 3)) for filename in sorted(glob.glob(os.path.join(data_dict['valid'], "*.mid"))): print(filename) sys.stdout.flush() if global_params['step'] == 'beat': data = DataMapsBeats() data.make_from_file(filename, global_params['beat_gt'], global_params['beat_subdiv'], section, acoustic_model=global_params['acoustic'], with_onsets=with_onsets) else: data = DataMaps() data.make_from_file(filename, global_params['step'], section, acoustic_model=global_params['acoustic'], with_onsets=with_onsets) # Decode input_data = data.input if with_onsets: input_data = np.zeros( (data.input.shape[0] * 2, data.input.shape[1])) input_data[:data.input.shape[0], :] = data.input[:, :, 0] input_data[data.input.shape[0]:, :] = data.input[:, :, 1] # Add noise input_data = add_noise_to_input_data(input_data, data_dict['noise'], data_dict['noise_gauss']) pr, priors, weights, combined_priors = decode( input_data, model, sess, branch_factor=5, beam_size=50, weight=[[0.8], [0.2]], out=None, hash_length=12, weight_model=weight_model, verbose=verbose, weight_model_dict=most_recent_model) # Evaluate if with_onsets: target_data = pm.PrettyMIDI(filename) corresp = data.corresp [P_f, R_f, F_f], [P_n, R_n, F_n ], _, _ = compute_eval_metrics_with_onset(pr, corresp, target_data, double_roll=True, min_dur=0.05, section=section) else: if global_params['step'] in [ 'quant', 'event', 'quant_short', 'beat' ]: pr = convert_note_to_time(pr, data.corresp, data.input_fs, max_len=max_len) data = DataMaps() if global_params['step'] == "20ms" or with_onsets: data.make_from_file(filename, "20ms", section=section, with_onsets=False, acoustic_model="kelz") else: data.make_from_file(filename, "time", section=section, with_onsets=False, acoustic_model="kelz") target = data.target #Evaluate P_f, R_f, F_f = compute_eval_metrics_frame(pr, target) P_n, R_n, F_n = compute_eval_metrics_note(pr, target, min_dur=0.05) print( f"Frame P,R,F: {P_f:.3f},{R_f:.3f},{F_f:.3f}, Note P,R,F: {P_n:.3f},{R_n:.3f},{F_n:.3f}" ) sys.stdout.flush() frames = np.vstack((frames, [P_f, R_f, F_f])) notes = np.vstack((notes, [P_n, R_n, F_n])) if F_n < global_params['early_exit']: print("Early stopping, F-measure too low.") sys.stdout.flush() return 0.0 P_f, R_f, F_f = np.mean(frames, axis=0) P_n, R_n, F_n = np.mean(notes, axis=0) print( f"Frame P,R,F: {P_f:.3f},{R_f:.3f},{F_f:.3f}, Note P,R,F: {P_n:.3f},{R_n:.3f},{F_n:.3f}" ) print(str(F_n) + ": " + str(params)) sys.stdout.flush() return -F_n
hash_length=args.hash, weight_model_dict=weight_model_dict, verbose=args.verbose, gt=data.target if args.gt else None, weight_model=weight_model) # Evaluate if args.output is not None: np.save(os.path.join(args.output, "pr"), pr) np.save(os.path.join(args.output, "priors"), priors) np.save(os.path.join(args.output, "weights"), weights) np.save(os.path.join(args.output, "combined_priors"), combined_priors) if args.step in ['quant', 'event']: pr = dataMaps.convert_note_to_time(pr, data.corresp, max_len=max_len) data = dataMaps.DataMaps() data.make_from_file(args.MIDI, "time", section=section) target = data.target P_f, R_f, F_f = eval_utils.compute_eval_metrics_frame(pr, target) P_n, R_n, F_n = eval_utils.compute_eval_metrics_note(pr, target, min_dur=0.05) print( f"Frame P,R,F: {P_f:.3f},{R_f:.3f},{F_f:.3f}, Note P,R,F: {P_n:.3f},{R_n:.3f},{F_n:.3f}" )