def EstimateChord(idx, dnnmodel, todir=False): #dnn = networks.FeatureDNN() #dnn = networks.ConvnetFeatExtractor() dnn = networks.FullCNNFeatExtractor() #dnn = networks.NoOperation() dnn.load(dnnmodel) dnn.to_gpu(0) decoder = networks.NBLSTMCRF() decoder.load() decoder.to_gpu(0) cqtfilelist = np.array(find_files(const.PATH_HCQT, ext="npy"))[idx] i = 0 chainer.config.train = False chainer.config.enable_backprop = False for cqtfile in cqtfilelist: cqt = utils.Embed(utils.PreprocessSpec(np.load(cqtfile)[:, :, :]), 1) chroma = dnn.GetFeature(cp.asarray(cqt)).data path = decoder.argmax(chroma) feat = cp.asnumpy(chroma) if todir: fname = cqtfile.split("/")[-1] + ".lab" alb = cqtfile.split("/")[-2] utils.SaveEstimatedLabelsFramewise( path, const.PATH_ESTIMATE_CROSS + alb + "/" + fname, feat) else: utils.SaveEstimatedLabelsFramewise( path, const.PATH_ESTIMATE + "%03d.lab" % i, feat) i += 1
type=str, default="nblstm_crf.model", action="store") args = parser.parse_args() audio_list = find_files("Datas/audios_estimation") for audiofile in audio_list: fname = audiofile.split("/")[-1] print("Processing: %s" % fname) #load audio y,sr = load(audiofile,sr=C.SR) #extract Harmonic-CQT from audio fmin = note_to_hz("C1") hcqt = np.stack([np.abs(cqt(y,sr=C.SR,hop_length=C.H,n_bins=C.BIN_CNT,bins_per_octave=C.OCT_BIN,fmin=fmin*(h+1),filter_scale=2,tuning=None)).T.astype(np.float32) for h in range(C.CQT_H)]) #extract feature using trained CNN extractor cnn_feat_extractor = N.FullCNNFeatExtractor() cnn_feat_extractor.load(args.f) feat = cnn_feat_extractor.GetFeature(U.PreprocessSpec(hcqt)).data #decode label sequence decoder = N.NBLSTMCRF() decoder.load(args.d) labels = decoder.argmax(feat) #convert into .lab file labfile = os.path.join("Datas/labs_estimated",fname+".lab") U.SaveEstimatedLabelsFramewise(labels,labfile,feat)
def TrainNStepCRF(idx, epoch=20, augment=0, featmodel=const.DEFAULT_CONVNETFILE, path_blstm="blstm.model", savefile="nblstm_crf.model"): cqtfilelist = np.array(find_files(const.PATH_HCQT, ext="npy")) chordlablist = np.array( find_files(const.PATH_CHORDLAB, ext=["lab", "chords"])) if idx is not None: cqtfilelist = cqtfilelist[idx] chordlablist = chordlablist[idx] chainer.config.train = False chainer.config.enable_backprop = False #dnn = networks.TripleDNNExtractor() #dnn = networks.FeatureDNN() dnn = networks.FullCNNFeatExtractor() #dnn = networks.NoOperation() #dnn = networks.ConvnetFeatExtractor() dnn.load(featmodel) dnn.to_gpu(0) rnn = networks.NBLSTMCRF() rnn.blstm.load(path_blstm) rnn.to_gpu(0) opt = optimizers.MomentumSGD() opt.setup(rnn) #opt.add_hook(optimizer.WeightDecay(0.001)) X = [] T = [] for cqtfile, labfile in zip(cqtfilelist, chordlablist): cqt = utils.Embed(utils.PreprocessSpec(np.load(cqtfile)[:, :, :]), 1) feature = cp.asnumpy(dnn.GetFeature(cp.asarray(cqt)).data) lab = utils.LoadLabelArr(labfile) min_sz = min([feature.shape[0], lab.shape[0]]) X.append(feature[:min_sz, :]) T.append(lab[:min_sz]) sizes = np.array([x.shape[0] for x in X], dtype="int32") print("start epoch:") chainer.config.train = False chainer.config.enable_backprop = True last_loss = np.inf for ep in range(epoch): sum_loss = 0.0 rand_songid = np.random.randint(len(X), size=np.sum(sizes) // const.DECODER_TRAIN_SEQLEN * 8) for i in range(0, rand_songid.size, const.DECODER_TRAIN_BATCH): xbatch = [] tbatch = [] for songid in rand_songid[i:i + const.DECODER_TRAIN_BATCH]: seq_len = sizes[songid] idx = np.random.randint(seq_len - const.DECODER_TRAIN_SEQLEN - 1) x_snip = X[songid][idx:idx + const.DECODER_TRAIN_SEQLEN, :] t_snip = T[songid][idx:idx + const.DECODER_TRAIN_SEQLEN] if augment > 0: shift = np.random.randint(augment) x_snip, t_snip = shift_data(x_snip, t_snip, shift) xbatch.append(Variable(cp.asarray(x_snip))) tbatch.append(Variable(cp.asarray(t_snip))) rnn.cleargrads() opt.update(rnn, xbatch, tbatch) sum_loss += rnn.loss.data print("epoch %d/%d loss=%.3f" % (ep + 1, epoch, sum_loss / 12800.0)) rnn.save(savefile)