Example #1
0
    def main(self):
        datadir = self._basedir + '/../data'

        Log.i("*** [START] ***")
        parse(model_file=self._basedir + "/../output/cws.model",
              embed_file=datadir + "/zhwiki-embeddings-100.txt")
        Log.i("*** [DONE] ***")
Example #2
0
    def main(self):
        corpus = "pku"
        datadir = self._basedir + '/../data'

        Log.i("*** [START] ***")
        train(
            train_file=datadir + "/icwb2-data/training/%s_training.utf8" % corpus,
            test_file=datadir + "/icwb2-data/gold/%s_test_gold.utf8" % corpus,
            embed_file=datadir + "/zhwiki-embeddings-100.txt",
            n_epoch=self._args.epoch,
            batch_size=self._args.batchsize,
            gpu=self._args.gpu,
            save=self._basedir + "/../output/cws.model" if self._args.save else None
        )
        Log.i("*** [DONE] ***")
Example #3
0
    def main(self):
        corpus = "pku"
        datadir = self._basedir + '/../data'

        Log.i("*** [START] ***")
        train(train_file=datadir +
              "/icwb2-data/training/%s_training.utf8" % corpus,
              test_file=datadir +
              "/icwb2-data/gold/%s_test_gold.utf8" % corpus,
              embed_file=datadir + "/zhwiki-embeddings-100.txt",
              n_epoch=self._args.epoch,
              batch_size=self._args.batchsize,
              gpu=self._args.gpu,
              save=self._basedir +
              "/../output/cws.model" if self._args.save else None)
        Log.i("*** [DONE] ***")
Example #4
0
def parse(model_file, embed_file):

    # Load files
    Log.i('initialize preprocessor with %s' % embed_file)
    processor = Preprocessor(embed_file)

    Log.v('')
    Log.v("initialize ...")
    Log.v('')

    with np.load(model_file) as f:
        embeddings = np.zeros(f['embed/W'].shape, dtype=np.float32)

    # Set up a neural network
    cls = BLSTMCRF if _use_crf else BLSTM
    model = cls(
        embeddings=embeddings,
        n_labels=4,
        dropout=0.2,
        train=False,
    )
    Log.i("loading a model from %s ..." % model_file)
    serializers.load_npz(model_file, model)

    LABELS = ['B', 'M', 'E', 'S']

    def _process(raw_text):
        if not raw_text:
            return
        xs = [processor.transform_one([c for c in raw_text])]
        ys = model.parse(xs)
        labels = [LABELS[y] for y in ys[0]]
        print(' '.join(labels))
        seq = []
        for c, label in zip(raw_text, labels):
            seq.append(c)
            if label == 'E' or label == 'S':
                seq.append(' ')
        print(''.join(seq))
        print('-')

    print("Input a Chinese sentence! (use 'q' to exit)")
    while True:
        x = input()
        if x == 'q':
            break
        _process(x)
Example #5
0
    def _process(dataset, model):
        size = len(dataset)
        batch_count = 0
        loss = 0.0
        accuracy = 0.0

        p = ProgressBar(min_value=0, max_value=size, fd=sys.stderr).start()
        for i, (xs, ys) in enumerate(dataset.batch(batch_size, colwise=True, shuffle=model.train)):
            p.update((batch_size * i) + 1)
            batch_count += 1
            batch_loss, batch_accuracy = model(xs, ys)
            loss += batch_loss.data
            accuracy += batch_accuracy
            if model.train:
                _update(optimizer, batch_loss)

        p.finish()
        Log.i("[%s] epoch %d - #samples: %d, loss: %f, accuracy: %f"
              % ('training' if model.train else 'evaluation', epoch + 1, size,
                 loss / batch_count, accuracy / batch_count))
Example #6
0
    def _process(dataset, model):
        size = len(dataset)
        batch_count = 0
        loss = 0.0
        accuracy = 0.0

        p = ProgressBar(min_value=0, max_value=size, fd=sys.stderr).start()
        for i, (xs, ys) in enumerate(
                dataset.batch(batch_size, colwise=True, shuffle=model.train)):
            p.update((batch_size * i) + 1)
            batch_count += 1
            batch_loss, batch_accuracy = model(xs, ys)
            loss += batch_loss.data
            accuracy += batch_accuracy
            if model.train:
                _update(optimizer, batch_loss)

        p.finish()
        Log.i("[%s] epoch %d - #samples: %d, loss: %f, accuracy: %f" %
              ('training' if model.train else 'evaluation', epoch + 1, size,
               loss / batch_count, accuracy / batch_count))
Example #7
0
def test(model_file, moderu):
    # Load files

    Log.v('')
    Log.v("initialize ...")
    Log.v('')

    # Set up a neural network

    Log.i("loading a model from %s ..." % model_file)
    serializers.load_npz(model_file, moderu)

    # Set closetest data
    preds = 0.0
    cishu = 0
    loss_1 = 0.0

    pathDir = os.listdir(cfg.closetest)

    yucezhi_1 = []
    zhenzhi_1 = []

    # losslist = []
    # fause_ratelist= []
    # correct_ratelist = []
    # correct_flist = []
    # fause_flist = []

    for allDir in pathDir:  #测试数据是一个一个地往模型里扔
        print(allDir)
        filename = allDir
        allDir = os.path.join(cfg.closetest, allDir)
        f = open(allDir, 'r')
        a = np.loadtxt(
            f,
            delimiter=',',
            skiprows=0,
        ).astype(np.float32)
        # print(a.shape)
        closetestX = a[:, 1:None]
        closetestlabel = a[:, 0:1]
        # print('Xclosetest shape: {}'.format(closetestX.shape))

        if cfg.mse == True:

            closetestlabel = label.label_1(closetestlabel)

        if cfg.cross == True:

            closetestlabel = label.label_2(closetestlabel)

        # print('Yclosetest shape: {}'.format(closetestlabel.shape))
        # skl.preprocessing.normalize(closetestX, norm='l2')

        zhuanghuan = []
        zhuanghuan1 = []
        #输入到blsm中的数据必须是二维的array,形如[[1 2]],第一个维度表示数据的个数,第二个维度是具体的特征值

        zhuanghuan.append(np.array(closetestX, np.float32))
        zhuanghuan1.append(np.array(closetestlabel, np.int32))
        closetestX = np.array(zhuanghuan)
        closetestlabel = np.array(zhuanghuan1)

        testloss, preds_1, yucezhi = moderu(closetestX, closetestlabel)

        # print("现在输出预测值")
        # print("现在输出preds_1")
        # print(preds_1)
        # print("现在输出closetestlabel")
        # print(closetestlabel)
        # os.system("pause")
        yucezhi_1.extend(yucezhi)
        zhenzhi_1.extend(closetestlabel)

        loss_1 = loss_1 + testloss
        preds += preds_1
        cishu = cishu + 1
        # print(preds)
        # print(cishu)
        testloss.unchain_backward()

    yucezhi_2 = []
    zhenzhi_2 = []

    for a in zhenzhi_1:
        # print(type(a))
        # os.system("pause")
        zhenzhi_2.extend(a.tolist())

    for b in yucezhi_1:
        # print(type(b.data))
        # os.system("pause")
        yucezhi_2.extend((b.data).tolist())  # 加了tolist居然好使了
    confuse = confusion_matrix(zhenzhi_2, yucezhi_2)

    Log.i("#closetest:----datasize: %d, accuracy: %f, testloss: %f" %
          (cishu, preds / cishu, loss_1.data / cishu))

    # losslist.append(testloss/cishu)

    Log.i("the confuse matrix is")
    Log.i(confuse)
    '''
       混淆矩阵的形状是
                   预测值
                   0   1
       真实值  0   a   b
               1   c   d
       真实值是0,预测值也是0的情况是有a次
       真实值是0,但是预测值是1的情况有b次。。。。。。
    '''
    print('the name of model is%s' % model_file)
    Log.i('the name of model is%s' % model_file)
    print('这次测试的混淆矩阵是')
    print(confuse)  #输入混淆矩阵的是两个list
    fause = confuse[1][1] / (confuse[1][0] + confuse[1][1])
    fause_1 = confuse[1][1] / (confuse[0][1] + confuse[1][1])
    correct = confuse[0][0] / (confuse[0][0] + confuse[0][1])
    correct_1 = confuse[0][0] / (confuse[0][0] + confuse[1][0])
    c = confuse[0][0] + confuse[0][1]
    f = confuse[1][0] + confuse[1][1]
    all = confuse[0][0] + confuse[0][1] + confuse[1][0] + confuse[1][1]
    fause_rate = f / all
    correct_rate = c / all
    correct_f = (2 * correct * correct_1) / (correct + correct_1)
    fause_f = (2 * fause * fause_1) / (fause + fause_1)

    # print("测试loss是")
    # print(testloss)
    # os.system("pause")

    print("总共的帧数是:%d" % all)
    Log.i("all of the frames:%d" % all)
    print("错误识别的再现率是:%f" % fause)
    Log.i("the precision of label1:%f" % float(fause))
    # fause_ratelist.append(fause)

    print("原本就是错误的帧数为:%f,占总帧数的%f" % (f, fause_rate))
    Log.i("the number of label1:%f,label1's rate%f" % (f, fause_rate))
    print("正确认识的再现率是:%f" % correct)
    Log.i("the precision of label0:%f" % float(correct))
    # correct_ratelist.append(correct)

    print("原本就是正确的帧数为:%f,占总帧数的%f" % (c, correct_rate))
    Log.i("the number of label0:%f,label0's rate%f" % (c, correct_rate))
    print("错误识别的适合率是:%f" % fause_1)
    Log.i("the recall of label1:%f" % fause_1)
    print("正确认识的适合率是:%f" % correct_1)
    Log.i("the recall of label0:%f" % correct_1)
    print("正确认识的F值为:%f" % correct_f)
    # correct_flist.append(correct_f)

    Log.i("the F score of label0:%f" % correct_f)
    print("错误识别的F值为:%f" % fause_f)
    # fause_flist.append(fause_f)

    Log.i("the F score of label1:%f" % fause_f)

    return loss_1.data / cishu, fause, fause_f, correct_rate, correct_f, preds / cishu
    print('##################### ClosePredict Done ########################')
Example #8
0
def train(
        n_epoch=10,  # 20
        batch_size=10,  # 20
        #n_epoch和batch_size在这里设置是没用的
        #但是记住,每次把它们调整成一样的
    gpu=-1,
        save=True):
    hparams = {
        'batchsize': cfg.batchsize,
        'dropout_ratio': cfg.dropout,
        'adagrad_lr': cfg.adagrad_lr,  # 0.0005 < lr < 0.01
        'weight_decay': 0.0001,  # 0.0001
        'inputN': cfg.inputN,
        'output': cfg.outputN,
        'lr': cfg.lr,
        'lstmcenshu': cfg.lstmcenshu,
        'cudnn': cfg.cudnn,
        'embed_size': cfg.embed_size,
        'xunliandata':
        (str(cfg.gakusyu)).encode(encoding='utf-8'),  #如果要保存字符串的话得重新编码
        'mse': cfg.mse,
        'cross': cfg.cross,
    }

    Log.v('')
    Log.v("initialize ...")
    Log.v('--------------------------------')
    Log.i('# Minibatch-size: %d' % cfg.batchsize)
    Log.i('# epoch: %d' % cfg.epoch)
    Log.i('# gpu: %d' % cfg.gpu)
    Log.i('# hyper-parameters: %s' % str(hparams))
    Log.v('--------------------------------')
    Log.v('')

    train_data = []
    train_label = []

    # Set Training data
    pathDir = os.listdir(cfg.gakusyu)
    # print(pathDir)
    cot = 0
    for allDir in pathDir:
        # print(allDir)
        allDir = os.path.join(cfg.gakusyu, allDir)

        f = open(allDir, 'r')
        print("我读到了文件%s" % allDir)
        a = np.loadtxt(f, delimiter=',', skiprows=0).astype(np.float32)
        Xtrain = a[:, 1:None]
        Labeltrain = a[:, 0:1]
        # skl.preprocessing.normalize(Xtrain, norm='l2')
        #这是用来干什么的暂时不知道
        # ds = X.shape[0]
        # print('Xtrain shape: {}'.format(Xtrain.shape))
        if cfg.cross == True:

            Labeltrain = label.label_2(Labeltrain)

            train_label.append(np.array(Labeltrain, np.int32))
            train_data.append(np.array(Xtrain, np.float32))

        if cfg.mse == True:

            Labeltrain = label.label_1(Labeltrain)

            train_label.append(np.array(Labeltrain, np.float32))
            train_data.append(np.array(Xtrain, np.float32))

#把形如[[0],[1]]的标签变成形如[[1,0],[0,1]]的标签
    train_data = np.array(train_data)
    train_label = np.array(train_label)

    # print("现在打印所有的标签")
    # print(train_label)
    # print(len(train_label))
    # print("现在打印所有的学习数据")
    # print(type(train_label))
    # print(train_data)

    sample_size = len(train_data)
    print("我们的学习数据一共有%d个" % sample_size)
    # Set up a neural network
    cls = BLSTM
    model = cls(
        f_dim=cfg.inputN,
        n_labels=cfg.outputN,  # (11*2) +1,
        dropout=hparams['dropout_ratio'],
        train=True,
    )

    if gpu >= 0:
        cuda.get_device_from_id(gpu).use()
        model.to_gpu()
        # cuda.get_device_from_id(gpu)
        # model.to_gpu()

    optimizer = optimizers.Adam(alpha=cfg.lr)
    optimizer.setup(model)
    # optimizer.add_hook(WeightDecay(hparams['weight_decay']))
    losslist = []
    fause_ratelist = []
    correct_ratelist = []
    correct_flist = []
    fause_flist = []

    #从这里开始,训练和更新参数的代码都写在这里面了
    for epoch in range(n_epoch):
        print("第%d个epoch的训练开始训练" % (epoch + 1))

        batch_count = 0
        loss = 0.0
        accuracy = 0.0

        perm = np.random.permutation(sample_size)

        y_batch_1 = []
        yucezhi_1 = []

        for i in range(
                0, sample_size, batch_size
        ):  #sample_size是语音数据的总个数,每一次循环就是一次训练,每一次训练使用batch_size个数据
            # print("第%d次正向传播,这次正向传播的batch_size是%d"%((i/batch_size)+1,batch_size))

            x_batch = train_data[perm[i:i + batch_size]]
            y_batch = train_label[perm[i:i + batch_size]]

            batch_count += 1
            # print("这次正向传播的学习数据一共有%d个"%len(x_batch))
            # print("下面是x_batch")
            # print(x_batch)
            # print("这次正向传播的学习数据的标签的个数有%d个"%len(y_batch))
            # print("下面是y_batch")
            # print(y_batch)
            # model.cleargrads()

            batch_loss, preds_1, yucezhi = model(
                x_batch, y_batch)  # 只要是往实例里面传递参数,就是调用了_call_函数
            # 把batch_size传进去,才方便计算loss
            # print(y_batch)
            # print(yucezhi)
            # os.system("pause")

            for a in y_batch:
                # print(type(a))
                # os.system("pause")
                y_batch_1.extend(a.tolist())

            for b in yucezhi:
                # print(type(b.data))
                # os.system("pause")
                yucezhi_1.extend((b.data).tolist())  #加了tolist居然好使了

            print("下面打印的是这次正向传播的loss")
            print(batch_loss.data)

            loss += batch_loss.data
            # print("下面打印的是这次正向传播的准确率")
            # print(batch_accuracy)
            # accuracy += batch_accuracy
            # print("下面打印叠加之后的loss")
            # print(loss)
            print("下面打印batch_count")
            print(batch_count)

            # 这部分代码是胡欢的
            # optimizer.target.cleargrads()
            # loss.backward()
            # optimizer.update()

            # 这部分代码是后藤的
            # optimizer.target.zerograds()
            # loss.backward()
            # # loss.unchain_backward()#如果训练数据太长的话,比如说可以规定每过30个数据忘记一次参数,想要忘记参数的话就调用一次这个函数
            # optimizer.update()
            if model.train == True:
                optimizer.target.zerograds()
                batch_loss.backward()
                batch_loss.unchain_backward()
                optimizer.update()

        confuse = confusion_matrix(
            y_batch_1, yucezhi_1)  # 注意如果输出值跟真实值完全一致,且值都是0或者都是1,它只会输出一维的列表

        fause = confuse[1][1] / (confuse[1][0] + confuse[1][1])
        # fause_1 = confuse[1][1] / (confuse[0][1] + confuse[1][1])
        correct = confuse[0][0] / (confuse[0][0] + confuse[0][1])
        # correct_1 = confuse[0][0] / (confuse[0][0] + confuse[1][0])
        # c = confuse[0][0] + confuse[0][1]
        # f = confuse[1][0] + confuse[1][1]
        # all = confuse[0][0] + confuse[0][1] + confuse[1][0] + confuse[1][1]
        # correct_f = (2 * correct * correct_1) / (correct + correct_1)
        # fause_f = (2 * fause * fause_1) / (fause + fause_1)
        # correct_f = correct_f/batch_count
        # fause_f = fause_f/batch_count

        print("本次epoch训练完之后的总的loss跟batch_count是")
        print(loss)
        print(batch_count)

        Log.i(
            "[%s] epoch %d - - #samples: %d, loss: %f, fause_rate: %f, correct_rate: %f"
            % ('training' if model.train else 'evaluation', epoch + 1,
               sample_size, loss / batch_count, fause, correct))
        Log.v('-')
        loss_1 = loss / batch_count
        losslist.append(loss_1 / batch_size)  #这里计算出来的loss就是每个语音文件的loss
        fause_ratelist.append(fause)
        correct_ratelist.append(correct)
        print("现在把第%d个epoach的模型保存下来")
        name = str(epoch + 1) + 'model'
        Log.i("saving the model to %s ..." % (epoch + 1) + 'model')
        serializers.save_npz(os.path.join(os.getcwd() + '/model/', name),
                             model)
    np.savetxt(os.getcwd() + '/libs/train/loss.txt', losslist)  #把loss存入文件中
    np.savetxt(os.getcwd() + '/libs/train/fause_rate.txt', fause_ratelist)
    np.savetxt(os.getcwd() + '/libs/train/correct_rate.txt', correct_ratelist)
Example #9
0
    def main(self):

        if cfg.xunlian == True:

            Log.i("*** [START] ***")
            Log.i(" ")
            Log.i(" ")

            tr.train(
                n_epoch=self._args.epoch,
                batch_size=self._args.batchsize,
                gpu=self._args.gpu,
                save=self._args.save
                # 要不要保存训练好的模型
                # save="kakilstm.model" if self._args.save else None,
            )

        if cfg.bice == True:

            Log.i("*** [closetesting] ***")
            Log.i(" ")
            Log.i(" ")

            losslist = []
            fause_ratelist = []
            correct_ratelist = []
            correct_flist = []
            fause_flist = []
            accuracy = []

            cls = BLSTM
            model = cls(
                f_dim=cfg.inputN,
                n_labels=cfg.outputN,  # (11*2)+1,
                dropout=0,
                train=False,
            )

            if cfg.gpu >= 0:
                cuda.get_device_from_id(cfg.gpu)
                model.to_gpu()
                # cuda.get_device_from_id(gpu)
                # model.to_gpu()

            if cfg.bice_lianxu == True:

                for i in range(cfg.test_size):
                    testloss, fause_rate, fause_f, correct_rate, correct_f, matomeaccu = ct.test(
                        model_file=os.path.join(os.getcwd() + '/model/' +
                                                str(i + 1) + 'model'),
                        moderu=model)  # 执行closetest,连续测试各个模型

                    losslist.append(testloss)
                    fause_ratelist.append(fause_rate)
                    correct_ratelist.append(correct_rate)
                    correct_flist.append(correct_f)
                    fause_flist.append(fause_f)
                    accuracy.append(matomeaccu)
            else:
                testloss, fause_rate, fause_f, correct_rate, correct_f, matomeaccu = ct.test(
                    model_file=cfg.close_model,
                    moderu=model)  # 执行closetest,只测试一个模型

                losslist.append(testloss)
                fause_ratelist.append(fause_rate)
                correct_ratelist.append(correct_rate)
                correct_flist.append(correct_f)
                fause_flist.append(fause_f)
                accuracy.append(matomeaccu)

            np.savetxt(os.getcwd() + '/libs/closetest/loss.txt',
                       losslist)  # 把loss存入文件中
            np.savetxt(os.getcwd() + '/libs/closetest/fause_rate.txt',
                       fause_ratelist)
            np.savetxt(os.getcwd() + '/libs/closetest/correct_rate.txt',
                       correct_ratelist)
            np.savetxt(os.getcwd() + '/libs/closetest/correct_f.txt',
                       correct_flist)
            np.savetxt(os.getcwd() + '/libs/closetest/fause_f.txt',
                       fause_flist)
            np.savetxt(os.getcwd() + '/libs/closetest/accuracy.txt', accuracy)

            Log.i("*** [CLOSETESTDONE] ***")
            Log.i(" ")
            Log.i(" ")

        if cfg.kaice == True:

            Log.i("*** [opentesting] ***")
            Log.i(" ")
            Log.i(" ")

            losslist = []
            fause_ratelist = []
            correct_ratelist = []
            correct_flist = []
            fause_flist = []
            accuracy = []

            cls = BLSTM
            model_1 = cls(
                f_dim=cfg.inputN,
                n_labels=cfg.outputN,  # (11*2)+1,
                dropout=0,
                train=False,
            )

            if cfg.gpu >= 0:
                cuda.get_device_from_id(cfg.gpu)
                model_1.to_gpu()
                # cuda.get_device_from_id(gpu)
                # model.to_gpu()

            if cfg.kaice_lianxu == True:

                for i in range(cfg.test_size):
                    testloss, fause_rate, fause_f, correct_rate, correct_f, matomeaccu = ot.test(
                        model_file=os.path.join(os.getcwd() + '/model/' +
                                                str(i + 1) + 'model'),
                        moderu=model_1)  # 执行closetest,连续测试各个模型

                    losslist.append(testloss)
                    fause_ratelist.append(fause_rate)
                    correct_ratelist.append(correct_rate)
                    correct_flist.append(correct_f)
                    fause_flist.append(fause_f)
                    accuracy.append(matomeaccu)
            else:
                testloss, fause_rate, fause_f, correct_rate, correct_f, matomeaccu = ot.test(
                    model_file=cfg.open_model,
                    moderu=model_1)  # 执行closetest,只测试一个模型

                losslist.append(testloss)
                fause_ratelist.append(fause_rate)
                correct_ratelist.append(correct_rate)
                correct_flist.append(correct_f)
                fause_flist.append(fause_f)
                accuracy.append(matomeaccu)

            np.savetxt(os.getcwd() + '/libs/opentest/loss.txt',
                       losslist)  # 把loss存入文件中
            np.savetxt(os.getcwd() + '/libs/opentest/fause_rate.txt',
                       fause_ratelist)
            np.savetxt(os.getcwd() + '/libs/opentest/correct_rate.txt',
                       correct_ratelist)
            np.savetxt(os.getcwd() + '/libs/opentest/correct_f.txt',
                       correct_flist)
            np.savetxt(os.getcwd() + '/libs/opentest/fause_f.txt', fause_flist)
            np.savetxt(os.getcwd() + '/libs/opentest/accuracy.txt', accuracy)

            Log.i("*** [OPENTESTDONE] ***")
            Log.i(" ")
            Log.i(" ")
Example #10
0
def train(train_file,
          test_file,
          embed_file,
          n_epoch=20,
          batch_size=20,
          gpu=-1,
          save=None):

    # Load files
    Log.i('initialize preprocessor with %s' % embed_file)
    processor = Preprocessor(embed_file)
    reader = CorpusReader(processor)
    Log.i('load train dataset from %s' % str(train_file))
    train_dataset = reader.load(train_file, train=True)
    Log.i('load test dataset from %s' % str(test_file))
    test_dataset = reader.load(test_file, train=False)

    hparams = {
        'dropout_ratio': 0.2,
        'adagrad_lr': 0.2,
        'weight_decay': 0.0001,
    }

    Log.v('')
    Log.v("initialize ...")
    Log.v('--------------------------------')
    Log.i('# Minibatch-size: %d' % batch_size)
    Log.i('# epoch: %d' % n_epoch)
    Log.i('# gpu: %d' % gpu)
    Log.i('# hyper-parameters: %s' % str(hparams))
    Log.v('--------------------------------')
    Log.v('')

    # Set up a neural network
    cls = BLSTMCRF if _use_crf else BLSTM
    model = cls(
        embeddings=processor.embeddings,
        n_labels=4,
        dropout=hparams['dropout_ratio'],
        train=True,
    )
    if gpu >= 0:
        cuda.get_device(gpu).use()
        model.to_gpu()
    eval_model = model.copy()
    eval_model.train = False

    # Setup an optimizer
    optimizer = optimizers.AdaGrad(lr=hparams['adagrad_lr'])
    optimizer.setup(model)
    optimizer.add_hook(WeightDecay(hparams['weight_decay']))

    def _update(optimizer, loss):
        optimizer.target.zerograds()
        loss.backward()
        optimizer.update()

    def _process(dataset, model):
        size = len(dataset)
        batch_count = 0
        loss = 0.0
        accuracy = 0.0

        p = ProgressBar(min_value=0, max_value=size, fd=sys.stderr).start()
        for i, (xs, ys) in enumerate(
                dataset.batch(batch_size, colwise=True, shuffle=model.train)):
            p.update((batch_size * i) + 1)
            batch_count += 1
            batch_loss, batch_accuracy = model(xs, ys)
            loss += batch_loss.data
            accuracy += batch_accuracy
            if model.train:
                _update(optimizer, batch_loss)

        p.finish()
        Log.i("[%s] epoch %d - #samples: %d, loss: %f, accuracy: %f" %
              ('training' if model.train else 'evaluation', epoch + 1, size,
               loss / batch_count, accuracy / batch_count))

    for epoch in range(n_epoch):
        _process(train_dataset, model)
        _process(test_dataset, eval_model)
        Log.v('-')

    if save is not None:
        Log.i("saving the model to %s ..." % save)
        serializers.save_npz(save, model)
Example #11
0
__fig_name = '{}_{}'.format('mon' if is_mono else 'bin',
                            time.strftime("%Y_%m_%d_%H_%M_%S"))
nb_ch = cfg.nb_ch
batch_size = cfg.batch_size  # Decrease this if you want to run on smaller GPU's
fft_point = cfg.fft_point
seq_len = cfg.seq_len  # Frame sequence length. Input to the CRNN.
nb_epoch = cfg.nb_epoch  # Training epochs
patience = cfg.patience  # Patience for early stopping
sr = cfg.sr
nfft = cfg.nfft
frames_1_sec = cfg.frames_1_sec
print(
    'TRAINING PARAMETERS: nb_ch: {}, seq_len: {}, batch_size: {}, nb_epoch: {}, frames_1_sec: {}'
    .format(nb_ch, seq_len, batch_size, nb_epoch, frames_1_sec))
Log.i(
    'TRAINING PARAMETERS: nb_ch: %d, seq_len: %s, batch_size: %d, nb_epoch: %d, frames_1_sec: %s'
    % (nb_ch, seq_len, batch_size, nb_epoch, frames_1_sec))
__models_dir = 'models/'
utils.create_folder(__models_dir)
cnn_nb_filt = cfg.cnn_nb_filt  # CNN filter size
cnn_pool_size = cfg.cnn_pool_size  # Maxpooling across frequency. Length of cnn_pool_size =  number of CNN layers
rnn_nb = cfg.rnn_nb  # Number of RNN nodes.  Length of rnn_nb =  number of RNN layers                        # rnn的层数等于rnn_nb的长度
fc_nb = cfg.fc_nb  # Number of FC nodes.  Length of fc_nb =  number of FC layers
dropout_rate = cfg.dropout_rate  # Dropout after each layer
############################################################################################
############################################################################################

print(
    'MODEL PARAMETERS:\n cnn_nb_filt: {}, cnn_pool_size: {}, rnn_nb: {}, fc_nb: {}, dropout_rate: {}'
    .format(cnn_nb_filt, cnn_pool_size, rnn_nb, fc_nb, dropout_rate))
Log.i(
Example #12
0
def train(
        train_file,
        test_file,
        embed_file,
        n_epoch=20,
        batch_size=20,
        gpu=-1,
        save=None):

    # Load files
    Log.i('initialize preprocessor with %s' % embed_file)
    processor = Preprocessor(embed_file)
    reader = CorpusReader(processor)
    Log.i('load train dataset from %s' % str(train_file))
    train_dataset = reader.load(train_file, train=True)
    Log.i('load test dataset from %s' % str(test_file))
    test_dataset = reader.load(test_file, train=False)

    hparams = {
        'dropout_ratio': 0.2,
        'adagrad_lr': 0.2,
        'weight_decay': 0.0001,
    }

    Log.v('')
    Log.v("initialize ...")
    Log.v('--------------------------------')
    Log.i('# Minibatch-size: %d' % batch_size)
    Log.i('# epoch: %d' % n_epoch)
    Log.i('# gpu: %d' % gpu)
    Log.i('# hyper-parameters: %s' % str(hparams))
    Log.v('--------------------------------')
    Log.v('')

    # Set up a neural network
    cls = BLSTMCRF if _use_crf else BLSTM
    model = cls(
        embeddings=processor.embeddings,
        n_labels=4,
        dropout=hparams['dropout_ratio'],
        train=True,
    )
    if gpu >= 0:
        cuda.get_device(gpu).use()
        model.to_gpu()
    eval_model = model.copy()
    eval_model.train = False

    # Setup an optimizer
    optimizer = optimizers.AdaGrad(lr=hparams['adagrad_lr'])
    optimizer.setup(model)
    optimizer.add_hook(WeightDecay(hparams['weight_decay']))

    def _update(optimizer, loss):
        optimizer.target.zerograds()
        loss.backward()
        optimizer.update()

    def _process(dataset, model):
        size = len(dataset)
        batch_count = 0
        loss = 0.0
        accuracy = 0.0

        p = ProgressBar(min_value=0, max_value=size, fd=sys.stderr).start()
        for i, (xs, ys) in enumerate(dataset.batch(batch_size, colwise=True, shuffle=model.train)):
            p.update((batch_size * i) + 1)
            batch_count += 1
            batch_loss, batch_accuracy = model(xs, ys)
            loss += batch_loss.data
            accuracy += batch_accuracy
            if model.train:
                _update(optimizer, batch_loss)

        p.finish()
        Log.i("[%s] epoch %d - #samples: %d, loss: %f, accuracy: %f"
              % ('training' if model.train else 'evaluation', epoch + 1, size,
                 loss / batch_count, accuracy / batch_count))

    for epoch in range(n_epoch):
        _process(train_dataset, model)
        _process(test_dataset, eval_model)
        Log.v('-')

    if save is not None:
        Log.i("saving the model to %s ..." % save)
        serializers.save_npz(save, model)