예제 #1
0
파일: sed_1_2D.py 프로젝트: shuyuqing/-
def load_data(feat_folder_train, feat_folder_test):
    train = np.load(feat_folder_train)  #把学习数据和训练数据都加载进来
    test = np.load(feat_folder_test)
    train = train['arr_0']
    test = test['arr_0']
    Xtrain = train[:, 1:None]
    Ytrain = train[:, 0:1]
    Ytrain = label.label_1(Ytrain)
    Xtest = test[:, 1:None]
    Ytest = test[:, 0:1]
    Ytest = label.label_1(Ytest)
    return Xtrain, Ytrain, Xtest, Ytest
예제 #2
0
파일: close_test.py 프로젝트: shuyuqing/-
def test(model_file, moderu):
    # Load files

    Log.v('')
    Log.v("initialize ...")
    Log.v('')

    # Set up a neural network

    Log.i("loading a model from %s ..." % model_file)
    serializers.load_npz(model_file, moderu)

    # Set closetest data
    preds = 0.0
    cishu = 0
    loss_1 = 0.0

    pathDir = os.listdir(cfg.closetest)

    yucezhi_1 = []
    zhenzhi_1 = []

    # losslist = []
    # fause_ratelist= []
    # correct_ratelist = []
    # correct_flist = []
    # fause_flist = []

    for allDir in pathDir:  #测试数据是一个一个地往模型里扔
        print(allDir)
        filename = allDir
        allDir = os.path.join(cfg.closetest, allDir)
        f = open(allDir, 'r')
        a = np.loadtxt(
            f,
            delimiter=',',
            skiprows=0,
        ).astype(np.float32)
        # print(a.shape)
        closetestX = a[:, 1:None]
        closetestlabel = a[:, 0:1]
        # print('Xclosetest shape: {}'.format(closetestX.shape))

        if cfg.mse == True:

            closetestlabel = label.label_1(closetestlabel)

        if cfg.cross == True:

            closetestlabel = label.label_2(closetestlabel)

        # print('Yclosetest shape: {}'.format(closetestlabel.shape))
        # skl.preprocessing.normalize(closetestX, norm='l2')

        zhuanghuan = []
        zhuanghuan1 = []
        #输入到blsm中的数据必须是二维的array,形如[[1 2]],第一个维度表示数据的个数,第二个维度是具体的特征值

        zhuanghuan.append(np.array(closetestX, np.float32))
        zhuanghuan1.append(np.array(closetestlabel, np.int32))
        closetestX = np.array(zhuanghuan)
        closetestlabel = np.array(zhuanghuan1)

        testloss, preds_1, yucezhi = moderu(closetestX, closetestlabel)

        # print("现在输出预测值")
        # print("现在输出preds_1")
        # print(preds_1)
        # print("现在输出closetestlabel")
        # print(closetestlabel)
        # os.system("pause")
        yucezhi_1.extend(yucezhi)
        zhenzhi_1.extend(closetestlabel)

        loss_1 = loss_1 + testloss
        preds += preds_1
        cishu = cishu + 1
        # print(preds)
        # print(cishu)
        testloss.unchain_backward()

    yucezhi_2 = []
    zhenzhi_2 = []

    for a in zhenzhi_1:
        # print(type(a))
        # os.system("pause")
        zhenzhi_2.extend(a.tolist())

    for b in yucezhi_1:
        # print(type(b.data))
        # os.system("pause")
        yucezhi_2.extend((b.data).tolist())  # 加了tolist居然好使了
    confuse = confusion_matrix(zhenzhi_2, yucezhi_2)

    Log.i("#closetest:----datasize: %d, accuracy: %f, testloss: %f" %
          (cishu, preds / cishu, loss_1.data / cishu))

    # losslist.append(testloss/cishu)

    Log.i("the confuse matrix is")
    Log.i(confuse)
    '''
       混淆矩阵的形状是
                   预测值
                   0   1
       真实值  0   a   b
               1   c   d
       真实值是0,预测值也是0的情况是有a次
       真实值是0,但是预测值是1的情况有b次。。。。。。
    '''
    print('the name of model is%s' % model_file)
    Log.i('the name of model is%s' % model_file)
    print('这次测试的混淆矩阵是')
    print(confuse)  #输入混淆矩阵的是两个list
    fause = confuse[1][1] / (confuse[1][0] + confuse[1][1])
    fause_1 = confuse[1][1] / (confuse[0][1] + confuse[1][1])
    correct = confuse[0][0] / (confuse[0][0] + confuse[0][1])
    correct_1 = confuse[0][0] / (confuse[0][0] + confuse[1][0])
    c = confuse[0][0] + confuse[0][1]
    f = confuse[1][0] + confuse[1][1]
    all = confuse[0][0] + confuse[0][1] + confuse[1][0] + confuse[1][1]
    fause_rate = f / all
    correct_rate = c / all
    correct_f = (2 * correct * correct_1) / (correct + correct_1)
    fause_f = (2 * fause * fause_1) / (fause + fause_1)

    # print("测试loss是")
    # print(testloss)
    # os.system("pause")

    print("总共的帧数是:%d" % all)
    Log.i("all of the frames:%d" % all)
    print("错误识别的再现率是:%f" % fause)
    Log.i("the precision of label1:%f" % float(fause))
    # fause_ratelist.append(fause)

    print("原本就是错误的帧数为:%f,占总帧数的%f" % (f, fause_rate))
    Log.i("the number of label1:%f,label1's rate%f" % (f, fause_rate))
    print("正确认识的再现率是:%f" % correct)
    Log.i("the precision of label0:%f" % float(correct))
    # correct_ratelist.append(correct)

    print("原本就是正确的帧数为:%f,占总帧数的%f" % (c, correct_rate))
    Log.i("the number of label0:%f,label0's rate%f" % (c, correct_rate))
    print("错误识别的适合率是:%f" % fause_1)
    Log.i("the recall of label1:%f" % fause_1)
    print("正确认识的适合率是:%f" % correct_1)
    Log.i("the recall of label0:%f" % correct_1)
    print("正确认识的F值为:%f" % correct_f)
    # correct_flist.append(correct_f)

    Log.i("the F score of label0:%f" % correct_f)
    print("错误识别的F值为:%f" % fause_f)
    # fause_flist.append(fause_f)

    Log.i("the F score of label1:%f" % fause_f)

    return loss_1.data / cishu, fause, fause_f, correct_rate, correct_f, preds / cishu
    print('##################### ClosePredict Done ########################')
예제 #3
0
파일: trainer.py 프로젝트: shuyuqing/-
def train(
        n_epoch=10,  # 20
        batch_size=10,  # 20
        #n_epoch和batch_size在这里设置是没用的
        #但是记住,每次把它们调整成一样的
    gpu=-1,
        save=True):
    hparams = {
        'batchsize': cfg.batchsize,
        'dropout_ratio': cfg.dropout,
        'adagrad_lr': cfg.adagrad_lr,  # 0.0005 < lr < 0.01
        'weight_decay': 0.0001,  # 0.0001
        'inputN': cfg.inputN,
        'output': cfg.outputN,
        'lr': cfg.lr,
        'lstmcenshu': cfg.lstmcenshu,
        'cudnn': cfg.cudnn,
        'embed_size': cfg.embed_size,
        'xunliandata':
        (str(cfg.gakusyu)).encode(encoding='utf-8'),  #如果要保存字符串的话得重新编码
        'mse': cfg.mse,
        'cross': cfg.cross,
    }

    Log.v('')
    Log.v("initialize ...")
    Log.v('--------------------------------')
    Log.i('# Minibatch-size: %d' % cfg.batchsize)
    Log.i('# epoch: %d' % cfg.epoch)
    Log.i('# gpu: %d' % cfg.gpu)
    Log.i('# hyper-parameters: %s' % str(hparams))
    Log.v('--------------------------------')
    Log.v('')

    train_data = []
    train_label = []

    # Set Training data
    pathDir = os.listdir(cfg.gakusyu)
    # print(pathDir)
    cot = 0
    for allDir in pathDir:
        # print(allDir)
        allDir = os.path.join(cfg.gakusyu, allDir)

        f = open(allDir, 'r')
        print("我读到了文件%s" % allDir)
        a = np.loadtxt(f, delimiter=',', skiprows=0).astype(np.float32)
        Xtrain = a[:, 1:None]
        Labeltrain = a[:, 0:1]
        # skl.preprocessing.normalize(Xtrain, norm='l2')
        #这是用来干什么的暂时不知道
        # ds = X.shape[0]
        # print('Xtrain shape: {}'.format(Xtrain.shape))
        if cfg.cross == True:

            Labeltrain = label.label_2(Labeltrain)

            train_label.append(np.array(Labeltrain, np.int32))
            train_data.append(np.array(Xtrain, np.float32))

        if cfg.mse == True:

            Labeltrain = label.label_1(Labeltrain)

            train_label.append(np.array(Labeltrain, np.float32))
            train_data.append(np.array(Xtrain, np.float32))

#把形如[[0],[1]]的标签变成形如[[1,0],[0,1]]的标签
    train_data = np.array(train_data)
    train_label = np.array(train_label)

    # print("现在打印所有的标签")
    # print(train_label)
    # print(len(train_label))
    # print("现在打印所有的学习数据")
    # print(type(train_label))
    # print(train_data)

    sample_size = len(train_data)
    print("我们的学习数据一共有%d个" % sample_size)
    # Set up a neural network
    cls = BLSTM
    model = cls(
        f_dim=cfg.inputN,
        n_labels=cfg.outputN,  # (11*2) +1,
        dropout=hparams['dropout_ratio'],
        train=True,
    )

    if gpu >= 0:
        cuda.get_device_from_id(gpu).use()
        model.to_gpu()
        # cuda.get_device_from_id(gpu)
        # model.to_gpu()

    optimizer = optimizers.Adam(alpha=cfg.lr)
    optimizer.setup(model)
    # optimizer.add_hook(WeightDecay(hparams['weight_decay']))
    losslist = []
    fause_ratelist = []
    correct_ratelist = []
    correct_flist = []
    fause_flist = []

    #从这里开始,训练和更新参数的代码都写在这里面了
    for epoch in range(n_epoch):
        print("第%d个epoch的训练开始训练" % (epoch + 1))

        batch_count = 0
        loss = 0.0
        accuracy = 0.0

        perm = np.random.permutation(sample_size)

        y_batch_1 = []
        yucezhi_1 = []

        for i in range(
                0, sample_size, batch_size
        ):  #sample_size是语音数据的总个数,每一次循环就是一次训练,每一次训练使用batch_size个数据
            # print("第%d次正向传播,这次正向传播的batch_size是%d"%((i/batch_size)+1,batch_size))

            x_batch = train_data[perm[i:i + batch_size]]
            y_batch = train_label[perm[i:i + batch_size]]

            batch_count += 1
            # print("这次正向传播的学习数据一共有%d个"%len(x_batch))
            # print("下面是x_batch")
            # print(x_batch)
            # print("这次正向传播的学习数据的标签的个数有%d个"%len(y_batch))
            # print("下面是y_batch")
            # print(y_batch)
            # model.cleargrads()

            batch_loss, preds_1, yucezhi = model(
                x_batch, y_batch)  # 只要是往实例里面传递参数,就是调用了_call_函数
            # 把batch_size传进去,才方便计算loss
            # print(y_batch)
            # print(yucezhi)
            # os.system("pause")

            for a in y_batch:
                # print(type(a))
                # os.system("pause")
                y_batch_1.extend(a.tolist())

            for b in yucezhi:
                # print(type(b.data))
                # os.system("pause")
                yucezhi_1.extend((b.data).tolist())  #加了tolist居然好使了

            print("下面打印的是这次正向传播的loss")
            print(batch_loss.data)

            loss += batch_loss.data
            # print("下面打印的是这次正向传播的准确率")
            # print(batch_accuracy)
            # accuracy += batch_accuracy
            # print("下面打印叠加之后的loss")
            # print(loss)
            print("下面打印batch_count")
            print(batch_count)

            # 这部分代码是胡欢的
            # optimizer.target.cleargrads()
            # loss.backward()
            # optimizer.update()

            # 这部分代码是后藤的
            # optimizer.target.zerograds()
            # loss.backward()
            # # loss.unchain_backward()#如果训练数据太长的话,比如说可以规定每过30个数据忘记一次参数,想要忘记参数的话就调用一次这个函数
            # optimizer.update()
            if model.train == True:
                optimizer.target.zerograds()
                batch_loss.backward()
                batch_loss.unchain_backward()
                optimizer.update()

        confuse = confusion_matrix(
            y_batch_1, yucezhi_1)  # 注意如果输出值跟真实值完全一致,且值都是0或者都是1,它只会输出一维的列表

        fause = confuse[1][1] / (confuse[1][0] + confuse[1][1])
        # fause_1 = confuse[1][1] / (confuse[0][1] + confuse[1][1])
        correct = confuse[0][0] / (confuse[0][0] + confuse[0][1])
        # correct_1 = confuse[0][0] / (confuse[0][0] + confuse[1][0])
        # c = confuse[0][0] + confuse[0][1]
        # f = confuse[1][0] + confuse[1][1]
        # all = confuse[0][0] + confuse[0][1] + confuse[1][0] + confuse[1][1]
        # correct_f = (2 * correct * correct_1) / (correct + correct_1)
        # fause_f = (2 * fause * fause_1) / (fause + fause_1)
        # correct_f = correct_f/batch_count
        # fause_f = fause_f/batch_count

        print("本次epoch训练完之后的总的loss跟batch_count是")
        print(loss)
        print(batch_count)

        Log.i(
            "[%s] epoch %d - - #samples: %d, loss: %f, fause_rate: %f, correct_rate: %f"
            % ('training' if model.train else 'evaluation', epoch + 1,
               sample_size, loss / batch_count, fause, correct))
        Log.v('-')
        loss_1 = loss / batch_count
        losslist.append(loss_1 / batch_size)  #这里计算出来的loss就是每个语音文件的loss
        fause_ratelist.append(fause)
        correct_ratelist.append(correct)
        print("现在把第%d个epoach的模型保存下来")
        name = str(epoch + 1) + 'model'
        Log.i("saving the model to %s ..." % (epoch + 1) + 'model')
        serializers.save_npz(os.path.join(os.getcwd() + '/model/', name),
                             model)
    np.savetxt(os.getcwd() + '/libs/train/loss.txt', losslist)  #把loss存入文件中
    np.savetxt(os.getcwd() + '/libs/train/fause_rate.txt', fause_ratelist)
    np.savetxt(os.getcwd() + '/libs/train/correct_rate.txt', correct_ratelist)
예제 #4
0
파일: sed_1_2D.py 프로젝트: shuyuqing/-
    Y_test = bn
    print('训练数据特征值')
    print(X.shape)
    print("训练数据标签")
    print(Y.shape)
    # os.system('pause')
    """
    train = np.load(path_train)
    test = np.load(path_test)

    X = train['arr_0']
    Y = train['arr_1']
    X_test = test['arr_0']
    Y_test = test['arr_1']

    Y = label.label_1(Y)  #把标签转化为可以放入神经网络的样子
    Y_test = label.label_1(Y_test)  #把标签转化为可以放入神经网络的样子

    print(X.shape)
    print(Y.shape)
    print('测试数据')
    print(X_test.shape)
    print(Y_test.shape)

    # os.system('pause')
    # X, Y, X_test, Y_test = preprocess_data(X, Y, X_test, Y_test, seq_len, nb_ch)#标签学习数据都切,用于每一帧都对应一个标签那种

    X, X_test = preprocess_data_1(X, X_test, seq_len,
                                  nb_ch)  #只切割学习数据,用于一个block对应一个标签那种

    print('训练数据特征值')