def TrainDNNExtractor(trainidx, epoch=20, saveas="dnn.model"): cqtfilelist = np.array(find_files(const.PATH_MIDIHCQT, ext="npz"))[trainidx] #midifilelist = find_files(const.PATH_MIDI,ext="mid")[:filecnt] filecnt = cqtfilelist.size chainer.config.train = True chainer.config.enable_backprop = True dnn = networks.FeatureDNN() model = networks.DNNModel(dnn) model.to_gpu(0) opt = optimizers.MomentumSGD() opt.setup(model) spl = np.arange(0, filecnt, 2000) if spl[-1] < filecnt: spl = np.append(spl, filecnt) print("split count:%d" % (spl.size - 1)) print(spl) print("start epochs...") S = [] T = [] for cqtfile in cqtfilelist: dat = np.load(cqtfile) spec = utils.PreprocessSpec(dat["spec"][:, :]) targ = GetConvnetTargetFromPianoroll(dat["target"]).astype(np.int32) assert (spec.shape[0] == targ.shape[0]) S.append(spec) T.append(targ) S = np.concatenate(S, axis=0) T = np.concatenate(T, axis=0) for ep in range(epoch): sum_loss = 0 assert (S.shape[0] == T.shape[0]) randidx = np.random.permutation(S.shape[0]) for i in range(0, randidx.size, const.CONV_TRAIN_BATCH): x_batch = S[randidx[i:i + const.CONV_TRAIN_BATCH], :] t_batch = T[randidx[i:i + const.CONV_TRAIN_BATCH], :] x_in = cp.asarray(x_batch) t_in = cp.asarray(t_batch) model.cleargrads() loss = model(x_in, t_in) loss.backward() opt.update() sum_loss += loss.data * const.CONV_TRAIN_BATCH dnn.save(saveas) print("epoch: %d/%d loss:%.04f" % (ep + 1, epoch, sum_loss / const.CONV_TRAIN_BATCH))
def EvaluateConvnet(modelfile, cqtfilelist): predicted_chroma = [] target_chroma = [] predicted_bass = [] target_bass = [] predicted_top = [] target_top = [] #dnn = networks.ConvnetFeatExtractor() dnn = networks.FeatureDNN() #dnn = networks.FullCNNFeatExtractor() dnn.load(modelfile) dnn.to_gpu(0) chainer.config.train = False chainer.config.enable_backprop = False for cqtfile in cqtfilelist: #print(cqtfile) dat = np.load(cqtfile) cqt = utils.PreprocessSpec(dat["spec"][:, :]) t = chromatemplate.GetConvnetTargetFromPianoroll(dat["target"]) #assert(cqt.shape[0]==t.shape[0]) out = cp.asnumpy(dnn.GetFeature(cp.asarray(cqt)).data) #out = dnn.GetFeature(cqt).data out_len = out.shape[0] pred_chroma = np.zeros((out_len, 12), dtype="int32") pred_bass = np.zeros((out_len, 12), dtype="int32") pred_top = np.zeros((out_len, 12), dtype="int32") pred_chroma[out[:, 12:24] > 0.5] = 1 pred_bass[np.arange(out_len), np.argmax(out[:, :12], axis=1)] = 1 #pred_bass[out[:,:12]<=0.5] = 0 pred_top[np.arange(out_len), np.argmax(out[:, 24:36], axis=1)] = 1 #pred_top[out[:,24:36]<=0.5] = 0 predicted_chroma.append(pred_chroma[:, :].astype(np.bool)) target_chroma.append(t[:out_len, 12:24].astype(np.bool)) predicted_bass.append(pred_bass[:out_len, :].astype(np.bool)) target_bass.append(t[:out_len, :12].astype(np.bool)) predicted_top.append(pred_top[:out_len, :].astype(np.bool)) target_top.append(t[:out_len, 24:36].astype(np.bool)) eval_chroma = evaluate(predicted_chroma, target_chroma) eval_bass = evaluate(predicted_bass, target_bass) eval_top = evaluate(predicted_top, target_top) return eval_chroma, eval_bass, eval_top
def TrainTranscribeDNNChord(idx, epoch=20, saveas="dnn_deepchroma.model"): cqtfilelist = np.array(find_files(const.PATH_HCQT, ext="npy"))[idx] chordlablist = np.array( find_files(const.PATH_CHORDLAB, ext=["lab", "chords"]))[idx] featurelist = [] targetlist = [] chainer.config.train = True chainer.config.enable_backprop = True for cqtfile, labfile in zip(cqtfilelist, chordlablist): cqt = np.load(cqtfile)[0, :, :] chroma = voc.LoadChromaTarget(labfile) min_sz = min([cqt.shape[0], chroma.shape[0]]) cqt = utils.Embed(utils.PreprocessSpec(cqt[:min_sz]), size=7) chroma = chroma[:min_sz] featurelist.append(cqt) targetlist.append(chroma.astype(np.int32)) featurelist = np.concatenate(featurelist) targetlist = np.concatenate(targetlist) itemcnt = targetlist.shape[0] print("DNN Training begin...") dnn = networks.FeatureDNN() dnn.train = True model = networks.DNNModel(predictor=dnn) model.to_gpu() opt = optimizers.AdaDelta() opt.setup(model) for ep in range(epoch): randidx = np.random.permutation(itemcnt) sumloss = 0.0 for i in range(0, itemcnt, const.DNN_TRAIN_BATCH): X = cp.asarray(featurelist[randidx[i:i + const.DNN_TRAIN_BATCH]]) T = cp.asarray(targetlist[randidx[i:i + const.DNN_TRAIN_BATCH]]) opt.update(model, X, T) sumloss += model.loss.data * const.DNN_TRAIN_BATCH print("epoch %d/%d loss=%.3f" % (ep + 1, epoch, sumloss / itemcnt)) dnn.save(saveas)
n_bins=144, bins_per_octave=24, filter_scale=2, tuning=None)).T.astype(np.float32)), size=1) #dat = np.load("/media/wuyiming/TOSHIBA EXT/midihcqt_12/000005.npy") #dat_24 = np.load("/media/wuyiming/TOSHIBA EXT/midihcqt_24/000005.npz") #spec_dnn = U.Embed(U.PreprocessSpec(dat_24["spec"]),size=7) spec = spec[:, :250, :] spec_dnn = spec_dnn[:250, :] cnn = networks.FullCNNFeatExtractor() cnn.load("fullcnn_crossentropy_6000.model") deepchroma = networks.FeatureDNN() deepchroma.load( "/home/wuyiming/Projects/TranscriptionChordRecognition/dnn3500.model") chroma_cnn = cnn.GetFeature(spec).data[:, 12:24].T chroma_dnn = deepchroma.GetFeature(spec_dnn).data[:, 12:24].T chroma = np.log( 1 + chroma_cqt(wav, sr=C.SR, hop_length=C.H, bins_per_octave=24)[:, :250]) target = chromatemplate.GetConvnetTargetFromPianoroll( U.GetPianoroll( "/media/wuyiming/TOSHIBA EXT/AIST.RWC-MDB-P-2001.SMF_SYNC/RM-P051.SMF_SYNC.MID" )) target = target[10:260, 12:24].T plt.subplot(4, 1, 1)