def __init__(self): super(SimplerCNN, self).__init__() self.dropout2d_input = nn.Dropout2d(rate=0.3) self.conv1 = nn.Conv2d(in_channels=3, out_channels=15, kernel_size=3, stride=3, padding=2) self.relu1 = nn.LeakyRelu() self.conv2 = nn.Conv2d(in_channels=15, out_channels=30, kernel_size=3, stride=3, padding=3) self.relu2 = nn.LeakyRelu() self.dropout2d_conv1 = nn.Dropout2d(rate=0.5) self.conv3 = nn.Conv2d(in_channels=30, out_channels=40, kernel_size=4) self.relu3 = nn.LeakyRelu() self.flatten = nn.Flatten() self.dropout2d_conv2 = nn.Dropout2d(rate=0.2) self.linear = nn.Linear(in_dimension=360, out_dimension=180) self.relu4 = nn.LeakyRelu() self.bn1 = nn.BatchNorm() self.dropout3 = nn.Dropout(rate=0.3) self.linear2 = nn.Linear(in_dimension=180, out_dimension=10) self.bn2 = nn.BatchNorm() self.softmax = nn.Softmax() self.set_forward()
def conv(b): """ the derivative check in the gradient checker relates to the input of the function hence, the input should be z - since the backward step computes @loss / @z """ # simulate end of classification conv = nn.Conv2d(in_channels=1, out_channels=3, kernel_size=2) relu = nn.Relu() flatten = nn.Flatten() linear = nn.Linear(in_dimension=12, out_dimension=4) softmax = nn.Softmax() conv.set_biases(b.reshape(3, 1)) # forward a = flatten(relu(conv(x))) dist = softmax(linear(a)) # backward labels = np.zeros(dist.shape) labels[:, 1] = 1 loss = -np.log(np.sum(dist * labels, axis=1)) softmax_grad = softmax.backward(labels) linear_grad = linear.backward(softmax_grad) flatten_grad = flatten.backward(linear_grad) relu_grad = relu.backward(flatten_grad) conv_grad = conv.backward(relu_grad) b_grad = conv.b_grad return loss, b_grad
def conv_layer(x): """ the derivative check in the gradient checker relates to the input of the function hence, the input should be z - since the backward step computes @loss / @z """ conv1 = nn.Conv2d(in_channels=1, out_channels=2, kernel_size=2) relu1 = nn.Relu() conv2 = nn.Conv2d(in_channels=2, out_channels=4, kernel_size=2) relu2 = nn.Relu() flatten = nn.Flatten() linear = nn.Linear(4, 2) softmax = nn.Softmax() # forward pass a = relu1(conv1(x)) a = relu2(conv2(a)) a_flatten = flatten(a) dist = softmax(linear(a_flatten)) # backward labels = np.zeros(dist.shape) labels[:, 1] = 1 loss = -np.log(np.sum(dist * labels, axis=1)) softmax_grad = softmax.backward(labels) linear_grad = linear.backward(softmax_grad) flatten_grad = flatten.backward(linear_grad) relu2_grad = relu2.backward(flatten_grad) conv2_grad = conv2.backward(relu2_grad) relu1_grad = relu1.backward(conv2_grad) conv1_grad = conv1.backward(relu1_grad) return loss, conv1_grad
def __init__(self): super(NN, self).__init__() self.linear1 = nn.Linear(in_dimension=3072, out_dimension=256) self.relu1 = nn.LeakyRelu() self.dropout1 = nn.Dropout(rate=0.3) self.linear2 = nn.Linear(in_dimension=256, out_dimension=10) self.softmax = nn.Softmax() self.set_forward()
def softmax_layer(x): softmax = nn.Softmax() num_classes = x.shape label = np.zeros(num_classes) label[:, 0] = 1 dist = softmax(x) loss = -np.log(np.sum(dist * label, axis=1)) grad = softmax.backward(label) return loss, grad
def relu_layer(x): relu = nn.Relu() softmax = nn.Softmax() a = softmax(relu(x)) num_classes = x.shape labels = np.zeros(num_classes) labels[:, 0] = 1 loss = -np.log(np.sum(a * labels, axis=1)) softmax_grad = softmax.backward(labels) relu_grad = relu.backward(softmax_grad) return loss, relu_grad
def sigmoid_layer(x): sigmoid = nn.Sigmoid() softmax = nn.Softmax() a = softmax(sigmoid(x)) num_classes = x.shape labels = np.zeros(num_classes) labels[:, 0] = 1 loss = -np.log(np.sum(a * labels, axis=1)) softmax_grad = softmax.backward(labels) sigmoid_grad = sigmoid.backward(softmax_grad) return loss, sigmoid_grad
def test_linear_module_softmax_1(self): x = np.array([[1, 2, 0, -1], [1, 2, -1, -2]]) w = np.array([[0., 1., 0., 0.], [0., 2., 2., 0.]]) b = np.zeros(2) expected_res = np.array([[.119202, .88079], [.5, .5]]) linear = nn.Linear(4, 2) softmax = nn.Softmax() linear.set_weights(w) linear.set_biases(b) z = linear(x) a = softmax(z) np.testing.assert_allclose(a, expected_res, atol=0.0001)
def __init__(self): super(SimpleCNN, self).__init__() self.conv1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=3, padding=2) self.tanh1 = nn.Tanh() self.conv2 = nn.Conv2d(in_channels=6, out_channels=10, kernel_size=3, stride=3, padding=3) self.tanh2 = nn.Tanh() self.dropout2d = nn.Dropout2d(rate=0.5) self.flatten = nn.Flatten() self.linear = nn.Linear(in_dimension=360, out_dimension=10) self.softmax = nn.Softmax() self.set_forward()
def flatten(x): flatten_ = nn.Flatten() linear = nn.Linear(in_dimension=48, out_dimension=4) softmax = nn.Softmax() # forward flatten_x = flatten_(x) dist = softmax(linear(flatten_x)) # backward labels = np.zeros(dist.shape) labels[:, 1] = 1 loss = -np.log(np.sum(dist * labels, axis=1)) softmax_grad = softmax.backward(labels) linear_grad = linear.backward(softmax_grad) flatten_grad = flatten_.backward(linear_grad) return loss, flatten_grad
def linear_layer(b): """ the derivative check in the gradient checker relates to the input of the function hence, the input should be z - since the backward step computes @loss / @z """ # simulate end of classification linear = nn.Linear(in_dimension=3, out_dimension=2) linear.set_biases(b) softmax = nn.Softmax() # forward dist = softmax(linear(z)) # backward labels = np.zeros(dist.shape) labels[:, 1] = 1 loss = -np.log(np.sum(dist * labels, axis=1)) softmax_grad = softmax.backward(labels) linear_grad = linear.backward(softmax_grad) b_grad = linear.b_grad return loss, b_grad
def linear_layer(z): """ the derivative check in the gradient checker relates to the input of the function hence, the input should be z - since the backward step computes @loss / @z """ # simulate end of classification relu_layer = nn.Relu() linear = nn.Linear(in_dimension=2, out_dimension=5) softmax = nn.Softmax() a_L_mins_1 = relu_layer(z) z_L = linear(a_L_mins_1) a_L = softmax(z_L) labels = np.zeros(a_L.shape) labels[:, 1] = 1 loss = -np.log(np.sum(a_L * labels, axis=1)) softmax_grad = softmax.backward(labels) layer_L_grad = linear.backward(softmax_grad) relu_grad = relu_layer.backward(layer_L_grad) return loss, relu_grad