コード例 #1
0
ファイル: grad_check.py プロジェクト: resistzzz/ToyNN_new
def test_cross_entropy_loss():
    print("gradient check: Cross Entropy")

    x = np.random.rand(5*8).reshape((5, 8)).astype('float32')
    softmax = activation.Softmax()
    x_soft = softmax(x)
    y = np.array([1, 4, 6, 3, 2], dtype='int32')
    y_onehot = np.zeros((5, 8)).astype('float32')
    y_onehot[range(0, 5), y] = 1.
    print(x)
    print('log loss: ', log_loss(y, x_soft, labels=[0, 1, 2, 3, 4, 5, 6, 7]))
    cross_entropy_f = loss.CrossEntropyLoss()
    cross_entropy_torch = nn.CrossEntropyLoss()

    torch_x = torch.Tensor(x)
    torch_x.requires_grad = True
    ce_loss_torch = cross_entropy_torch(torch_x, torch.LongTensor(y))
    ce_loss = cross_entropy_f(x_soft, y_onehot)
    print("Value:\ntorch:{},mine:{}, delta:{}"
          .format(ce_loss_torch.item(), ce_loss, (ce_loss-ce_loss_torch.item())))
    ce_loss_torch.backward()
    torch_x_grad = torch_x.grad.data.numpy()
    x_grad = softmax.backward(cross_entropy_f.backward())
    # print(np.sum(x_grad - torch_x_grad, 0))
    print(x_grad - torch_x_grad)
コード例 #2
0
    def __init__(self, X_train, Y_train, Net='LeNet5', opti='SGDMomentum'):
        # Prepare Data: Load, Shuffle, Normalization, Batching, Preprocessing
        self.X_train = X_train
        self.Y_train = Y_train

        self.batch_size = 64
        # D_in: input depth of network, 784, 28*28 input grayscale image
        self.D_in = 784
        # D_out: output depth of network = 10, the 10 digits
        self.D_out = 10

        print('  Net: ' + str(Net))
        print('  batch_size: ' + str(self.batch_size))
        print('  D_in: ' + str(self.D_in))
        print('  D_out: ' + str(self.D_out))
        print('  Optimizer: ' + opti)

        # =======================
        if Net == 'TwoLayerNet':
            # H is the size of the one hidden layer.
            H = 400
            self.model = ANN.TwoLayerNet(self.D_in, H, self.D_out)
        elif Net == 'ThreeLayerNet':
            #######################################
            ############  TODO   ##################
            #######################################
            # H1, H2 are the size of the two hidden layers.
            #self.model = ANN.ThreeLayerNet (self.D_in, H1, H2, self.D_out)
            print('Not Implemented.')
            exit(0)
        elif Net == 'LeNet5':
            self.model = CNN.LeNet5()

        # store training loss over iterations, for later visualization
        self.losses = []

        if opti == 'SGD':
            self.opti = optimizer.SGD(self.model.get_params(),
                                      lr=0.0001,
                                      reg=0)
        else:
            self.opti = optimizer.SGDMomentum(self.model.get_params(),
                                              lr=0.0001,
                                              momentum=0.80,
                                              reg=0.00003)

        self.criterion = loss.CrossEntropyLoss()
コード例 #3
0
ファイル: grad_check.py プロジェクト: wondervictor/Toynn
def test_fully_connected():
    print("gradient check: FullyConnected")

    x = np.random.rand(5 * 8).reshape((5, 8)).astype('float32')
    y = np.array([1, 4, 6, 3, 2], dtype='int32')
    y_onehot = np.zeros((5, 12)).astype('float32')
    y_onehot[range(0, 5), y] = 1.

    # --- mine --
    fc1 = layer.FullyConnected(8, 10)
    fc2 = layer.FullyConnected(10, 12)
    relu1 = activation.ReLU()
    softmax = activation.Softmax()
    ce_func = loss.CrossEntropyLoss()
    fc_out1 = fc1(x)
    fc_out1 = relu1(fc_out1)
    fc_out2 = fc2(fc_out1)
    fc_out2 = softmax(fc_out2)
    sqaure_loss = ce_func(fc_out2, y_onehot)

    # --- torch ---
    weights1 = fc1.weights.get_data()
    bias1 = fc1.bias.get_data()
    weights2 = fc2.weights.get_data()
    bias2 = fc2.bias.get_data()

    torch_fc = nn.Linear(8, 10)
    torch_fc2 = nn.Linear(10, 12)
    torch_fc.weight.data.copy_(torch.Tensor(weights1.T))
    torch_fc.bias.data.copy_(torch.Tensor(bias1))
    torch_fc2.weight.data.copy_(torch.Tensor(weights2.T))
    torch_fc2.bias.data.copy_(torch.Tensor(bias2))
    torch_relu = nn.ReLU()

    torch_square_func = nn.CrossEntropyLoss()
    torch_x = torch.Tensor(x)
    torch_x.requires_grad = True
    torch_fc_out = torch_fc(torch_x)
    torch_fc_out1 = torch_relu(torch_fc_out)
    torch_fc_out2 = torch_fc2(torch_fc_out1)
    torch_sqaure_loss = torch_square_func(torch_fc_out2, torch.LongTensor(y))

    print("Value:\ntorch:{}, mini:{}, delta:{}".format(
        torch_sqaure_loss.item(), sqaure_loss,
        (torch_sqaure_loss.item() - sqaure_loss)))

    # --- my grad ---
    grad_x = ce_func.backward()
    grad_x = softmax.backward(grad_x)
    grad_fc2 = fc2.backward(grad_x)
    grad_w2 = fc2.weights.get_grad()
    grad_b2 = fc2.bias.get_grad()

    grad_x = relu1.backward(grad_fc2)
    grad_x = fc1.backward(grad_x)
    grad_w1 = fc1.weights.get_grad()
    grad_b1 = fc1.bias.get_grad()

    # --- torch grad ---
    torch_sqaure_loss.backward()
    torch_grad_x = torch_x.grad.data.numpy()
    torch_grad_w1 = torch_fc.weight.grad.data.numpy()
    torch_grad_b1 = torch_fc.bias.grad.data.numpy()
    torch_grad_w2 = torch_fc2.weight.grad.data.numpy()
    torch_grad_b2 = torch_fc2.bias.grad.data.numpy()
    print("--grad x ---")
    print(grad_x - torch_grad_x)

    print("--grad w1 ---")
    print(grad_w1 - torch_grad_w1.T)

    print("--grad b1 ---")
    print(grad_b1 - torch_grad_b1)

    print("--grad w2 ---")
    print(grad_w2 - torch_grad_w2.T)

    print("--grad b2 ---")
    print(grad_b2 - torch_grad_b2)
コード例 #4
0
      str(D_out))

### TWO LAYER NET FORWARD TEST ###
#H=400
#model = nn.TwoLayerNet(batch_size, D_in, H, D_out)
H1 = 300
H2 = 100
model = nn.ThreeLayerNet(batch_size, D_in, H1, H2, D_out)

losses = []
#optim = optimizer.SGD(model.get_params(), lr=0.0001, reg=0)
optim = optimizer.SGDMomentum(model.get_params(),
                              lr=0.0001,
                              momentum=0.80,
                              reg=0.00003)
criterion = loss.CrossEntropyLoss()

# TRAIN
ITER = 25000
for i in range(ITER):
    # get batch, make onehot
    X_batch, Y_batch = util.get_batch(X_train, Y_train, batch_size)
    Y_batch = util.MakeOneHot(Y_batch, D_out)

    # forward, loss, backward, step
    Y_pred = model.forward(X_batch)
    loss, dout = criterion.get(Y_pred, Y_batch)
    model.backward(dout)
    optim.step()

    if i % 100 == 0: