def __init__(self, mode, timesteps=128): self.mode = mode self.timesteps = timesteps self.l_conv = lambda label, tid, **kwargs: label_conversion( label, tid, timesteps=timesteps, **kwargs) self.mode_mapping = { "frame": { "conversion_func": self.get_frame, "out_classes": 2 }, "frame_onset": { "conversion_func": self.get_frame_onset, "out_classes": 3 }, "frame_onset_offset": { "conversion_func": self.get_frame_onset_offset, "out_classes": 4 } } self._update_mode() if mode not in self.mode_mapping: raise ValueError( f"Available mode: {self.mode_mapping.keys()}. Provided: {mode}" )
def gen_onsets_info_from_label(label, inst_num=1, t_unit=0.02, mpe=False): roll = label_conversion(label, 0, timesteps=len(label), onsets=True, mpe=mpe, ori_feature_size=88, feature_num=88) roll = np.where(roll > 0.5, 1, 0) midi_ch_mapping = sorted([v for v in MusicNetMIDIMapping.values()]) ch = midi_ch_mapping.index(inst_num) + 1 interval, pitches = gen_onsets_info(roll[:, :, ch], t_unit=t_unit) if len(interval) == 0: return interval, pitches off_roll = label_conversion(label, 0, timesteps=len(label), mpe=mpe, ori_feature_size=88, feature_num=88) off_roll = np.where(off_roll > 0.5, 1, 0) for i in range(off_roll[:, :, ch].shape[1]): notes = find_occur(roll[:, i, ch], t_unit=t_unit) for nn in notes: on_idx = nn["onset"] off_roll[on_idx - 1:on_idx, i, ch] = 0 off_interval, _ = gen_onsets_info(off_roll[:, :, ch], t_unit=t_unit) if len(interval) != len(off_interval): l_on = len(interval) l_off = len(off_interval) print("WARNING!! Interval length inconsistent. Diff: {}".format( abs(l_on - l_off))) print("On len: {}, Off len: {}".format(l_on, l_off)) min_l = min(l_on, l_off) interval = interval[:min_l] off_interval = off_interval[:min_l] interval[:, 1] = off_interval[:, 1] inconsist = np.where(interval[:, 1] - interval[:, 0] <= 0) interval[inconsist, 1] += t_unit * 2 return interval, pitches
def gen_frame_info_from_label(label, inst_num=1, t_unit=0.02, mpe=False): roll = label_conversion(label, 0, timesteps=len(label), mpe=mpe, ori_feature_size=88, feature_num=88) midi_ch_mapping = sorted([v for v in MusicNetMIDIMapping.values()]) ch = midi_ch_mapping.index(inst_num) + 1 return gen_frame_info(roll[:, :, ch], t_unit=t_unit)
def __init__(self, mode:str, timesteps=128): self.mode = mode self.timesteps = timesteps self.l_conv = lambda label, tid, **kwargs: label_conversion(label, tid, timesteps=timesteps, **kwargs) if mode == "frame": self.conversion_func = self.get_frame self.out_classes = 2 elif mode == "frame_onset": self.conversion_func = self.get_frame_onset self.out_classes = 3 elif mode == "frame_onset_offset": self.conversion_func = self.get_frame_onset_offset self.out_classes = 4 elif self.customized_mode(mode): pass else: raise ValueError("Available mode: 'frame', 'frame_onset', 'frame_onset_offset'. Provided: {}".format(mode))
def FullTest(model_path, test_path, label_path=None, pred_save_path="./predictions", use_ram=True, MAX_FRAME=1800): # Load files print("Loading files") features = parse_path(test_path) for ff in features: if not ff.endswith(".hdf"): idx = features.index(ff) del features[idx] if label_path is not None: # Assume there are exactly label files corresponding to the test audios #labels = parse_path(label_path, label=True) labels = [] for ff in features: ext = ff[ff.rfind("."):] if ext != ".hdf" and ext != ".pickle": continue ll = ff[(ff.rfind("/") + 1):] if "_label" not in ll: ll = ll[:ll.rfind(".")] + "_label.pickle" labels.append(os.path.join(label_path, ll)) labels = load_files(labels, use_ram=use_ram) features = load_files(features, use_ram=use_ram) model = load_model(model_path) # Validate on model/feature configurations f_type, channels, out_classes, timesteps = model_info(model_path) key = list(features.keys()) if f_type == "HCFP" and features[key[0]].shape[2] < 12: assert ( False ), "The model uses HCFP as input feature, but loaded features are not." if f_type == "CFP" and features[key[0]].shape[2] == 12: assert (len(channels) == 2 and 1 in channels and 3 in channels), """The The given feature are HCFP, but the model uses more feature types. Model input feature types: """ + str( channels) + " ({0: Z, 1: Spec, 2: GCoS, 3: Ceps})" channels = [0, 6] mpe = False if out_classes == 2: mpe = True # To avoid running out of memory. # 9000 is suitable for 32G RAM with one instrument only and all 4 channels used. (Max ram usage almost 100%) #MAX_FRAME = 1800 print("Max frame per prediction: ", MAX_FRAME) # Start to predict pred_out = h5py.File(os.path.join(pred_save_path, "pred.hdf"), "w") label_out = h5py.File(os.path.join(pred_save_path, "label.hdf"), "w") len_data = len(features) for idx in trange(len_data, desc='Dataset'): i = key[idx] feature = features[i][:] pred = predict(feature, model, MAX_FRAME=MAX_FRAME, channels=list(channels), instruments=out_classes - 1, timesteps=timesteps) # Save to output pred_out.create_dataset(str(i), data=pred, compression="gzip", compression_opts=5) del feature, features[i] # Process corresponding label if label_path is not None: ll = labels[0] if type(ll) != np.ndarray: ll = label_conversion(ll, 352, 128, mpe=mpe)[:, :, 1:] label_out.create_dataset(str(i), data=ll, compression="gzip", compression_opts=5) del labels[0] pred_out.close() label_out.close()
def gen_onsets_info_from_label(label, inst_num=1, t_unit=0.02): roll = label_conversion(label, 0, timesteps=len(label), onsets=True, ori_feature_size=88, feature_num=88) roll = np.where(roll>0.5, 1, 0) midi_ch_mapping = sorted([v for v in MusicNetMIDIMapping.values()]) ch = midi_ch_mapping.index(inst_num)+1 return gen_onsets_info(roll[:,:,ch], t_unit=t_unit)