class ShallowConvNet(Net): def __init__(self, device=torch.device('cuda:0')): super().__init__(device) self.conv1 = Conv(1, 6, kernel_size=5, noise_std=1e-0, act='TanH', device=self.device) self.act1 = Activation('TanH') self.pool1 = Pool(2, device=self.device) self.fc1 = Linear(6*12*12, 100, noise_std=1e-0, act='TanH', device=self.device) self.act2 = Activation('TanH') self.fc2 = Linear(100, 10, noise_std=1e-0, act='TanH', device=self.device) self.softmax = Activation('Softmax') self.layers = [self.conv1, self.fc1, self.fc2] def forward(self, input): conv_out_1 = self.conv1.forward(input) act_out_1 = self.act1.forward(conv_out_1) pool_out_1 = self.pool1.forward(act_out_1) pool_out_1 = pool_out_1.reshape(len(pool_out_1), -1) fc_out_1 = self.fc1.forward(pool_out_1) act_out_2 = self.act2.forward(fc_out_1) fc_out_2 = self.fc2.forward(act_out_2) output = self.softmax.forward(fc_out_2) return output
def test_OneNeuronGradient(self): layer = Linear(2, 1) x = np.random.rand(2) y = layer.forward(x) deriv_grad = layer.backward(np.ones(1)) numgrad = numerical_gradient.calc(layer.forward, x) numerical_gradient.assert_are_similar(deriv_grad, numgrad[0])
def test_compare_with_Linear(self): in_size = 2 out_size = 3 x = np.random.rand(in_size) # x = np.array([1., 1]) optimizer = SGD(0.1) linear = Linear(in_size, out_size, initialize='zeros') wx = Wx(in_size, out_size, initialize='zeros') plusbias = PlusBias(out_size, initialize='zeros') wxbias = Seq(wx, plusbias) linear_y = linear.forward(x) wxbias_y = wxbias.forward(x) assert_array_equal(linear_y, wxbias_y) dJdy = np.random.rand(out_size) linear_grad = linear.backward(dJdy) wxbias_grad = wxbias.backward(dJdy) assert_array_equal(linear_grad, wxbias_grad) linear.update_weights(optimizer) wxbias.update_weights(optimizer) stack = np.vstack([plusbias.b.get(), wx.W.get().T]).T assert_array_equal(linear.W, stack)
def test_TwoNeuronsGradient(self): layer = Linear(3, 2) x = np.random.rand(3) y = layer.forward(x) deriv_grad = layer.backward(np.ones(2)) numgrad = numerical_gradient.calc(layer.forward, x) numgrad = np.sum(numgrad, axis=0) numerical_gradient.assert_are_similar(deriv_grad, numgrad)
def test_dtypes(self): x16 = np.array([1.99999999, 1.999, 1.9], dtype=np.float16) x64 = np.array([1.99999999, 1.999, 1.9], dtype=np.float64) l16 = Linear(3, 2, initialize='random', dtype=np.float16) self.assertEqual(l16.W.dtype, np.float16) y16 = l16.forward(x16) lNone = Linear(3, 2, initialize='ones') self.assertEqual(lNone.W.dtype, np.float64) yNone = lNone.forward(x64) l64 = Linear(3, 2, initialize='ones', dtype=np.float64) self.assertEqual(l64.W.dtype, np.float64) y64 = l64.forward(x64) assert_array_equal(yNone, y64) self.assertFalse(np.array_equal(y16, y64))
def test_OneNeuronBackward(self): layer = Linear(2, 1, initialize='ones') x = np.array([2., 2.]) y = layer.forward(x) self.assertEqual(y, [5.]) dJdy = np.array([3]) dxdy = layer.backward(dJdy) assert_array_equal(dxdy, [3., 3.])
class RNNCell(Layer): """ Vanilla RNN implementation Hidden(t) = Activation(Linear(Hidden(t-1) + Linear(Input(t))) Output(t) = Linear(Hidden(t)) """ def __init__(self, n_inputs, n_hidden, n_output, activation='sigmoid'): """ :param n_inputs: dimension of inputs :param n_hidden: dimension of hidden layer :param n_output: dimension of output (token) :param activation: either sigmoid or tanh """ super().__init__() self.n_inputs = n_inputs self.n_hidden = n_hidden self.n_output = n_output if activation == 'sigmoid': self.activation = Sigmoid() elif activation == 'tanh': self.activation = Tanh() else: raise Exception("Non-linearity not found") self.w_ih = Linear(n_inputs, n_hidden) self.w_hh = Linear(n_hidden, n_hidden) self.w_ho = Linear(n_hidden, n_output) self.parameters += self.w_ih.get_parameters() self.parameters += self.w_hh.get_parameters() self.parameters += self.w_ho.get_parameters() def forward(self, input_tensor, hidden): """ Forward prop - returns both the output and the hidden """ from_prev_hidden = self.w_hh.forward(hidden) combined = self.w_ih.forward(input_tensor) + from_prev_hidden new_hidden = self.activation.forward(combined) output = self.w_ho.forward(new_hidden) return output, new_hidden def init_hidden(self, batch_size=1): """ First hidden state is all zeros""" return Tensor(np.zeros((batch_size, self.n_hidden)), autograd=True)
class DenseNet(Net): def __init__(self, device=torch.device('cuda:0')): super().__init__(device) self.fc1 = Linear(28*28, 50, noise_std=1e-0, device=self.device) self.act1 = Activation('TanH') self.fc2 = Linear(50, 10, noise_std=1e-0, device=self.device) self.softmax = Activation('Softmax') self.layers = [self.fc1, self.fc2] def forward(self, input): input = input.reshape(len(input), -1) fc_out_1 = self.fc1.forward(input) act_out_1 = self.act1.forward(fc_out_1) fc_out_2 = self.fc2.forward(act_out_1) output = self.softmax.forward(fc_out_2) return output
class LeNet5(Net): def __init__(self, device=torch.device('cuda:0')): super().__init__(device) self.conv1 = Conv(1, 6, kernel_size=5, noise_std=1e-0, act='ReLU', device=self.device) self.act1 = Activation('ReLU') self.pool1 = Pool(2, device=self.device) self.conv2 = Conv(6, 16, kernel_size=5, noise_std=1e-0, act='ReLU', device=self.device) self.act2 = Activation('ReLU') self.pool2 = Pool(2, device=self.device) self.fc1 = Linear(256, 120, noise_std=1e-0, act='ReLU', device=self.device) self.act3 = Activation('ReLU') self.fc2 = Linear(120, 84, noise_std=1e-0, act='ReLU', device=self.device) self.act4 = Activation('ReLU') self.fc3 = Linear(84, 10, noise_std=1e-0, act='ReLU', device=self.device) self.softmax = Activation('Softmax') self.layers = [self.conv1, self.conv2, self.fc1, self.fc2, self.fc3] def forward(self, input): conv_out_1 = self.conv1.forward(input) act_out_1 = self.act1.forward(conv_out_1) pool_out_1 = self.pool1.forward(act_out_1) conv_out_2 = self.conv2.forward(pool_out_1) act_out_2 = self.act2.forward(conv_out_2) pool_out_2 = self.pool2.forward(act_out_2) pool_out_2 = pool_out_2.reshape(len(pool_out_2), -1) fc_out_1 = self.fc1.forward(pool_out_2) act_out_3 = self.act3.forward(fc_out_1) fc_out_2 = self.fc2.forward(act_out_3) act_out_4 = self.act4.forward(fc_out_2) fc_out_3 = self.fc3.forward(act_out_4) output = self.softmax.forward(fc_out_3) return output
def test_OneNeuronUpdateGradient(self): layer = Linear(2, 1, initialize='ones') x = np.array([2., 2.]) y = layer.forward(x) self.assertEqual(y, [5.]) dJdy = np.array([3]) dxdy = layer.backward(dJdy) assert_array_equal(dxdy, [3., 3.]) update_grad = layer.calc_update_gradient(dJdy) assert_array_equal(layer.W + update_grad, np.array([[4, 7, 7]]))
class DenseNet_CNN(Net): def __init__(self, device=torch.device('cuda:0')): super().__init__(device) # self.fc1 = Linear(28*28, 25, noise_std=1e-0, device=self.device) self.fc1 = Conv(1, 25, kernel_size=25, noise_std=1e-0, act='TanH', device=self.device) self.act1 = Activation('TanH') self.fc2 = Linear(16*25, 10, noise_std=1e-0, device=self.device) self.softmax = Activation('Softmax') self.layers = [self.fc1, self.fc2] def forward(self, input): #input = input.reshape(len(input), -1) fc_out_1 = self.fc1.forward(input) act_out_1 = self.act1.forward(fc_out_1) act_out_1 = act_out_1.reshape(len(act_out_1), -1) fc_out_2 = self.fc2.forward(act_out_1) output = self.softmax.forward(fc_out_2) return output
def test_linear_layer(self): linear = Linear((3, 2), 5, initializer) inputs = np.ones((3, 2)) linear.forward(inputs) assert linear.output.shape == (3, 5)
def test_initialize_with_value(self): W = np.matrix('1 2 3; 4 5 6') x = np.random.rand(2) layer = Linear(2000, 2000, initialize=W) y = layer.forward(x) assert_array_equal(y, W.dot(np.hstack([1, x])))
def test_TwoNeuronsForward(self): layer = Linear(2, 2, initialize='ones') data = np.array([.3, .3]) y = layer.forward(data) assert_array_equal(y, [1.6, 1.6])
def test_OneNeuronForward(self): layer = Linear(2, 1, initialize='ones') data = np.array([2., 2.]) y = layer.forward(data) self.assertEqual(y, [5.0])
class LSTMCell(Layer): """ Base LSTM implementation: Cell(t) = forget_gate * Cell(t-1) + input_gate * update where forget_gate, input_gate are Linear(prev_hidden)+Linear(input) and sigmoided and update is Linear(prev_hidden)+Linear(input) and tanhed Output(t) = output_gate * Tanh(Cell(t)) """ def __init__(self, n_inputs, n_hidden, n_output): super().__init__() self.n_inputs = n_inputs self.n_hidden = n_hidden self.n_output = n_output self.xf = Linear(n_inputs, n_hidden) self.xi = Linear(n_inputs, n_hidden) self.xo = Linear(n_inputs, n_hidden) self.xc = Linear(n_inputs, n_hidden) self.hf = Linear(n_hidden, n_hidden, bias=False) self.hi = Linear(n_hidden, n_hidden, bias=False) self.ho = Linear(n_hidden, n_hidden, bias=False) self.hc = Linear(n_hidden, n_hidden, bias=False) self.w_ho = Linear(n_hidden, n_output, bias=False) self.parameters += self.xf.get_parameters() self.parameters += self.xi.get_parameters() self.parameters += self.xo.get_parameters() self.parameters += self.xc.get_parameters() self.parameters += self.hf.get_parameters() self.parameters += self.hi.get_parameters() self.parameters += self.ho.get_parameters() self.parameters += self.hc.get_parameters() self.parameters += self.w_ho.get_parameters() def forward(self, current_input, hidden): """ updates cell, and returns the current time step's output, new hidden state, and the updated cell """ prev_hidden = hidden[0] prev_cell = hidden[1] f = (self.xf.forward(current_input) + self.hf.forward(prev_hidden)).sigmoid() i = (self.xi.forward(current_input) + self.hi.forward(prev_hidden)).sigmoid() o = (self.xo.forward(current_input) + self.ho.forward(prev_hidden)).sigmoid() g = (self.xc.forward(current_input) + self.hc.forward(prev_hidden)).tanh() cell = (f * prev_cell) + (i * g) h = o * cell.tanh() output = self.w_ho.forward(h) return output, (h, cell) def init_hidden(self, batch_size=1): """ inits both hidden and cell to all zeros """ h = Tensor(np.zeros((batch_size, self.n_hidden)), autograd=True) c = Tensor(np.zeros((batch_size, self.n_hidden)), autograd=True) h.data[:, 0] += 1 c.data[:, 0] += 1 return h, c