Esempio n. 1
0
# (在这里参数就是权重)所以也包含了X_train和Y_train
def CIFAR10_loss_fun(data, weights):
    """
    data = [X_train, Y_train]
    """
    from Lecture3.Loss import L_SVM
    return L_SVM(data[0], data[1], weights)


if __name__ == '__main__':
    from load_data import load_CIFAR10, sample_training_data

    X_train, Y_train = load_CIFAR10('../data/cifar10/')[0:2]
    X_train = np.append(X_train, np.ones((X_train.shape[0], 1)), axis=1)
    data_train = [X_train, Y_train]
    data_batch = sample_training_data(data_train, 256)  # 256个数据
    W = np.random.rand(10, 3073) * 0.001  # 随机权重向量
    op = Optimization()
    df, loss_original = op.eval_numerical_gradient(CIFAR10_loss_fun,
                                                   data_batch, W)  # 得到梯度、初始损失值

    print('original loss: %f' % (loss_original, ))
    min_loss = loss_original
    # 查看不同步长的效果
    for step_size_log in [-10, -9, -8, -7, -6, -5, -4, -3, -2, -1]:
        step_size = 10**step_size_log

        W_new = W - step_size * df  # 权重空间中的新位置
        loss_new = CIFAR10_loss_fun(data_batch, W_new)
        print('for step size %f new loss: %.7f' % (step_size, loss_new))
        if loss_new < min_loss:
Esempio n. 2
0

if __name__ == '__main__':
    from load_data import load_CIFAR10, sample_training_data

    X_train, Y_train, X_test, Y_test = load_CIFAR10('../data/cifar10/')  # 3072 x 50000

    mean_train = np.mean(X_train, axis = 1).reshape((-1, 1))
    std_train = np.std(X_train, axis = 1).reshape((-1, 1))

    X_train -= mean_train  # 0中心化:均值减法
    X_train /= np.std(X_train, axis = 1).reshape((-1, 1))  # 归一化:每个维度都除以其标准差
    X_test -= mean_train
    X_test /= np.std(X_train, axis = 1).reshape((-1, 1))

    data_batch = sample_training_data([X_train, Y_train], 256)  # 256个数据, 3072 x 256
    in_num = 3072
    hidden_num = 100
    out_num = 10
    nn = NN(data_batch[0], in_num, hidden_num, out_num, data_batch[1], 10 ** -3, 0.01)
    loss_original = nn.loss
    print("original loss: %f" % (loss_original,))
    min_loss = loss_original
    for step_size_log in [-10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, ]:
        step_size = 10 ** step_size_log
        n = copy.deepcopy(nn)
        n.hidden_layer.step_size = step_size
        n.output_layer.step_size = step_size

        dh = n.output_layer.backward(n.gradient)
        n.hidden_layer.backward(dh)
Esempio n. 3
0
    def train(self, X_train, Y_train, X_validation, Y_validation, node_num,
              num_epochs, reg, update = 'sgd',
              learning_rate_decay = 1, learning_rate = None,
              sample_batch = False, batch_size = None, model = None):

        # 训练数据集,批量训练
        if sample_batch and batch_size is not None:
            train_data = sample_training_data([X_train, Y_train], batch_size)  # 256个数据, 3072 x 256
            val_data = sample_training_data([X_validation, Y_validation], batch_size // 2)
        elif not sample_batch:
            train_data = [X_train, Y_train]
            val_data = [X_validation, Y_validation]
        else:
            raise Exception("missing 'batch_size")
        train_data_size = train_data[0].shape[1]
        val_data_size = val_data[0].shape[1]

        in_num = node_num[0]  # 3072
        hidden_num = node_num[1]  # 100
        out_num = node_num[2]  # 10

        if model is not None:
            learning_rate = model['lr']

        nn = NN(train_data[0], in_num, hidden_num, out_num, train_data[1], step_size = learning_rate, lam = reg)

        if model is not None:
            nn.set(model['W1'], model['b1'], model['W2'], model['b2'])

        if learning_rate is None:
            # 选取合适步长
            loss_original = nn.loss
            print("original loss: %f" % (loss_original,))
            min_loss = loss_original
            step_size_log_list = [-10 + learning_rate_decay * x for x in range(10 // learning_rate_decay + 1)]

            for step_size_log in step_size_log_list:
                step_size = 10 ** step_size_log
                n = copy.deepcopy(nn)
                n.hidden_layer.step_size = step_size
                n.output_layer.step_size = step_size

                dh = n.output_layer.backward(n.gradient)
                n.hidden_layer.backward(dh)

                loss_new = n.forward()[0]
                print("step_size: %s, loss: %f" % (format(step_size, '.2e'), loss_new,))
                if loss_new < min_loss:
                    min_loss = loss_new
                    best_step_size = step_size
            print("best step size %s" % format(best_step_size, '.2e'))
            # 设置步长
            nn.hidden_layer.step_size = best_step_size
            nn.output_layer.step_size = best_step_size

        best_val_accuracy = 0
        for i in range(num_epochs):
            if sample_batch:
                train_data = sample_training_data([X_train, Y_train], batch_size)  # 256个数据, 3072 x 256
                val_data = sample_training_data([X_validation, Y_validation], batch_size // 2)
            else:
                train_data = None

            val_loss, val_probability = nn.validation(val_data)
            loss, tr_probability = nn.forward(train_data)

            # 按置信度求正确率
            # tr_accuracy = np.sum(tr_probability > 0.9) / train_data_size
            # val_accuracy = np.sum(val_probability > 0.9) / val_data_size

            # 按 softmax 结果的均值求正确率
            tr_accuracy = np.sum(tr_probability) / train_data_size
            val_accuracy = np.sum(val_probability) / val_data_size
            lr = learning_rate if learning_rate is not None else best_step_size
            print("epoch  %d / %d, loss: %f, train: %f, validation: %f, lr: %s"
                  % (i + 1, num_epochs, loss, tr_accuracy, val_accuracy, format(lr, '.2e')))
            if val_accuracy > best_val_accuracy:
                best_val_accuracy = val_accuracy
            dh = nn.output_layer.backward(nn.gradient)
            nn.hidden_layer.backward(dh)
            # time.sleep(0.1)
        print("finished optimization, best validation accuracy: %f" % best_val_accuracy)
        best_model = {}
        best_model['W1'] = nn.hidden_layer.W
        best_model['b1'] = nn.hidden_layer.b
        best_model['W2'] = nn.output_layer.W
        best_model['b2'] = nn.output_layer.b
        best_model['lr'] = lr
        return best_model
Esempio n. 4
0
def CIFAR10_test():
    from load_data import load_CIFAR10, sample_training_data

    X_train, Y_train, X_test, Y_test = load_CIFAR10(
        '../data/cifar10/')  # 3072 x 50000
    '''数据预处理'''
    mean_train = np.mean(X_train, axis=1).reshape((-1, 1))
    std_train = np.std(X_train, axis=1).reshape((-1, 1))

    X_train -= mean_train  # 0中心化:均值减法
    X_train /= std_train  # 归一化:每个维度都除以其标准差
    X_test -= mean_train
    X_test /= std_train
    '''神经网络初始化'''
    data_batch = sample_training_data([X_train, Y_train],
                                      256)  # 256个数据, 3072 x 256
    in_num = 3072
    hidden_num = 100
    out_num = 10
    nn = NN(data_batch[0], in_num, hidden_num, out_num, data_batch[1], 10**-3,
            0.01)
    loss_original = nn.loss
    print("original loss: %f" % (loss_original, ))
    '''选取合适步长'''
    min_loss = loss_original
    for step_size_log in [
            -10,
            -9,
            -8,
            -7,
            -6,
            -5,
            -4,
            -3,
            -2,
            -1,
            0,
    ]:
        step_size = 10**step_size_log
        n = copy.deepcopy(nn)
        n.hidden_layer.step_size = step_size
        n.output_layer.step_size = step_size

        dh = n.output_layer.backward(n.gradient)
        n.hidden_layer.backward(dh)

        loss_new = n.forward()
        print("step_size: %.10f, loss: %f" % (
            step_size,
            loss_new,
        ))
        if loss_new < min_loss:
            min_loss = loss_new
            best_step_size = step_size

    print("best step size %.10f" % (best_step_size, ))
    nn.hidden_layer.step_size = best_step_size
    nn.output_layer.step_size = best_step_size
    time.sleep(1)
    '''训练之前先选取一小部分数据,看神经网络是否可以过饱和,判断反向传播是否正常工作'''
    batch_size = 16
    data_batch = sample_training_data([X_train, Y_train], batch_size)
    for i in range(300):
        nn.forward(data_batch)
        loss = nn.loss
        correct = np.sum(nn.probability > 0.9) / batch_size
        print("i: %d , loss: %f, correct ratio: %f" % (
            i,
            loss,
            correct,
        ))
        if (loss < 0.00001 or correct >= 0.99):
            break
        dh = nn.output_layer.backward(nn.gradient)
        nn.hidden_layer.backward(dh)
        time.sleep(0.1)
    print("loss: %f, correct ratio: %f" % (
        loss,
        correct,
    ))
    if (correct < 0.99):
        raise Exception("BP does not work correctly")
    '''开始训练'''
    for i in range(1000):
        batch_size = 256
        data_batch = sample_training_data([X_train, Y_train], batch_size)
        nn.forward(data_batch)
        # h_weights_grad = nn.eval_numerical_gradient(nn.hidden_layer)
        # o_weights_grad = nn.eval_numerical_gradient(nn.output_layer)
        loss = nn.loss
        correct = np.sum(nn.probability > 0.5) / batch_size
        print("i: %d , loss: %f, correct ratio: %f" % (
            i,
            loss,
            correct,
        ))
        if (loss < 0.00001):
            break

        dh = nn.output_layer.backward(nn.gradient)
        nn.hidden_layer.backward(dh)
        # time.sleep(0.1)
    '''训练结果
    训练次数,损失,正确率
    i: 0 , loss: 5.286373, correct ratio: 0.027344
    i: 1 , loss: 5.164099, correct ratio: 0.015625
    i: 2 , loss: 5.090839, correct ratio: 0.015625
    i: 3 , loss: 4.836606, correct ratio: 0.035156
    i: 4 , loss: 4.861015, correct ratio: 0.031250
    i: 5 , loss: 4.918304, correct ratio: 0.019531
    i: 6 , loss: 4.632134, correct ratio: 0.015625
    ...
    i: 991 , loss: 3.045871, correct ratio: 0.187500
    i: 992 , loss: 3.044970, correct ratio: 0.207031
    i: 993 , loss: 2.993322, correct ratio: 0.257812
    i: 994 , loss: 3.028033, correct ratio: 0.187500
    i: 995 , loss: 2.953715, correct ratio: 0.234375
    i: 996 , loss: 2.930693, correct ratio: 0.234375
    i: 997 , loss: 3.072322, correct ratio: 0.183594
    i: 998 , loss: 3.051221, correct ratio: 0.191406
    i: 999 , loss: 2.898737, correct ratio: 0.207031
    '''
    print("loss: %f" % (loss, ))
    '''测试神经网络'''
    data_test = [X_test, Y_test]
    nn.forward(data_test)
    loss = nn.loss
    print("test loss: %f, correct ratio: %f" % (
        loss,
        np.sum(nn.probability > 0.5) / batch_size,
    ))
    '''测试结果 test loss: 2.840334, correct ratio: 0.222656'''
    '''保存神经网络网络'''
    select = input("save weights and bias ? (y or n)")
    if select is "y":
        save_weights(nn)
        print("save successfully")