コード例 #1
0
def EstimateChord(idx, dnnmodel, todir=False):
    #dnn = networks.FeatureDNN()
    #dnn = networks.ConvnetFeatExtractor()
    dnn = networks.FullCNNFeatExtractor()
    #dnn = networks.NoOperation()
    dnn.load(dnnmodel)
    dnn.to_gpu(0)
    decoder = networks.NBLSTMCRF()
    decoder.load()
    decoder.to_gpu(0)
    cqtfilelist = np.array(find_files(const.PATH_HCQT, ext="npy"))[idx]
    i = 0
    chainer.config.train = False
    chainer.config.enable_backprop = False
    for cqtfile in cqtfilelist:
        cqt = utils.Embed(utils.PreprocessSpec(np.load(cqtfile)[:, :, :]), 1)
        chroma = dnn.GetFeature(cp.asarray(cqt)).data
        path = decoder.argmax(chroma)
        feat = cp.asnumpy(chroma)
        if todir:
            fname = cqtfile.split("/")[-1] + ".lab"
            alb = cqtfile.split("/")[-2]
            utils.SaveEstimatedLabelsFramewise(
                path, const.PATH_ESTIMATE_CROSS + alb + "/" + fname, feat)
        else:
            utils.SaveEstimatedLabelsFramewise(
                path, const.PATH_ESTIMATE + "%03d.lab" % i, feat)
        i += 1
コード例 #2
0
                    type=str, default="nblstm_crf.model", action="store")
args = parser.parse_args()

audio_list = find_files("Datas/audios_estimation")

for audiofile in audio_list:
    fname = audiofile.split("/")[-1]
    print("Processing: %s" % fname)
    #load audio
    y,sr = load(audiofile,sr=C.SR)
    
    #extract Harmonic-CQT from audio
    fmin = note_to_hz("C1")
    hcqt = np.stack([np.abs(cqt(y,sr=C.SR,hop_length=C.H,n_bins=C.BIN_CNT,bins_per_octave=C.OCT_BIN,fmin=fmin*(h+1),filter_scale=2,tuning=None)).T.astype(np.float32) for h in range(C.CQT_H)])
    
    #extract feature using trained CNN extractor
    cnn_feat_extractor = N.FullCNNFeatExtractor()
    cnn_feat_extractor.load(args.f)
    
    feat = cnn_feat_extractor.GetFeature(U.PreprocessSpec(hcqt)).data
    
    #decode label sequence
    decoder = N.NBLSTMCRF()
    decoder.load(args.d)
    
    labels = decoder.argmax(feat)
    
    #convert into .lab file
    labfile = os.path.join("Datas/labs_estimated",fname+".lab")
    U.SaveEstimatedLabelsFramewise(labels,labfile,feat)
コード例 #3
0
def TrainNStepCRF(idx,
                  epoch=20,
                  augment=0,
                  featmodel=const.DEFAULT_CONVNETFILE,
                  path_blstm="blstm.model",
                  savefile="nblstm_crf.model"):
    cqtfilelist = np.array(find_files(const.PATH_HCQT, ext="npy"))
    chordlablist = np.array(
        find_files(const.PATH_CHORDLAB, ext=["lab", "chords"]))
    if idx is not None:
        cqtfilelist = cqtfilelist[idx]
        chordlablist = chordlablist[idx]
    chainer.config.train = False
    chainer.config.enable_backprop = False
    #dnn = networks.TripleDNNExtractor()
    #dnn = networks.FeatureDNN()
    dnn = networks.FullCNNFeatExtractor()
    #dnn = networks.NoOperation()
    #dnn = networks.ConvnetFeatExtractor()
    dnn.load(featmodel)
    dnn.to_gpu(0)

    rnn = networks.NBLSTMCRF()
    rnn.blstm.load(path_blstm)
    rnn.to_gpu(0)
    opt = optimizers.MomentumSGD()
    opt.setup(rnn)
    #opt.add_hook(optimizer.WeightDecay(0.001))
    X = []
    T = []
    for cqtfile, labfile in zip(cqtfilelist, chordlablist):
        cqt = utils.Embed(utils.PreprocessSpec(np.load(cqtfile)[:, :, :]), 1)
        feature = cp.asnumpy(dnn.GetFeature(cp.asarray(cqt)).data)
        lab = utils.LoadLabelArr(labfile)
        min_sz = min([feature.shape[0], lab.shape[0]])
        X.append(feature[:min_sz, :])
        T.append(lab[:min_sz])
    sizes = np.array([x.shape[0] for x in X], dtype="int32")
    print("start epoch:")
    chainer.config.train = False
    chainer.config.enable_backprop = True
    last_loss = np.inf
    for ep in range(epoch):
        sum_loss = 0.0
        rand_songid = np.random.randint(len(X),
                                        size=np.sum(sizes) //
                                        const.DECODER_TRAIN_SEQLEN * 8)
        for i in range(0, rand_songid.size, const.DECODER_TRAIN_BATCH):
            xbatch = []
            tbatch = []
            for songid in rand_songid[i:i + const.DECODER_TRAIN_BATCH]:
                seq_len = sizes[songid]
                idx = np.random.randint(seq_len - const.DECODER_TRAIN_SEQLEN -
                                        1)
                x_snip = X[songid][idx:idx + const.DECODER_TRAIN_SEQLEN, :]
                t_snip = T[songid][idx:idx + const.DECODER_TRAIN_SEQLEN]
                if augment > 0:
                    shift = np.random.randint(augment)
                    x_snip, t_snip = shift_data(x_snip, t_snip, shift)
                xbatch.append(Variable(cp.asarray(x_snip)))
                tbatch.append(Variable(cp.asarray(t_snip)))
            rnn.cleargrads()
            opt.update(rnn, xbatch, tbatch)
            sum_loss += rnn.loss.data

        print("epoch %d/%d loss=%.3f" % (ep + 1, epoch, sum_loss / 12800.0))
        rnn.save(savefile)