def plot_activations(): '''This function plots all the activation functions implemented.''' fig, ax = plt.subplots(1, 1, figsize=(5, 5)) x_range = np.arange(-3, 3, 0.01) x = torch.Tensor(x_range) tanh = Tanh() plt.plot(x_range, tanh.forward(x).numpy(), color='b', label='Tanh', alpha=0.5) plt.plot(x_range, tanh.backward(1).numpy(), color='b', label='Tanh derivative', alpha=0.5, linestyle=':') relu = ReLU() plt.plot(x_range, relu.forward(x).numpy(), color='g', label='ReLU (0)', alpha=0.5) plt.plot(x_range, relu.backward(1).numpy(), color='g', label='ReLU derivative', alpha=0.5, linestyle=':') leakyrelu = LeakyReLU() plt.plot(x_range, leakyrelu.forward(x).numpy(), color='m', label='LeakyReLU (0.01)', alpha=0.5) plt.plot(x_range, leakyrelu.backward(1).numpy(), color='m', label='LeakyReLU derivative', alpha=0.5, linestyle=':') prelu = PReLU(init=0.1) plt.plot(x_range, prelu.forward(x).numpy(), color='y', label='PReLU', alpha=0.5) plt.plot(x_range, prelu.backward(1).numpy(), color='y', label='PReLU derivative (0.1 - trainable)', alpha=0.5, linestyle=':') plt.legend(framealpha=1) plt.tight_layout() plt.savefig('figures/activations.png', dpi=300) plt.show()
def __init__(self,input_size,layer_size,act_func=None): self.layer_size = layer_size self.input_size = input_size self.W = np.random.normal(loc=0,scale=1.0,size=(self.input_size,self.layer_size)) self.b = np.zeros(shape=(1,self.layer_size)) + 0.1 self.h = None #output self.z = None self.grad_act_z = None self.delta = None self.grad_w = np.zeros(shape=self.W.shape) self.grad_b = np.zeros(shape=self.b.shape) self.grad_w_count = 0 self.grad_b_count = 0 if act_func!=None: self.act_func = act_func else: self.act_func = Tanh()
def __init__(self, units=0, input_shape=0, dtype=np.float32, gate_act=Sigmoid): super(LSTM, self).__init__(input_shape, units) self.gate_acts = { "I": gate_act(), "F": gate_act(), "O": gate_act(), "U": Tanh() } self.act_tanh = Tanh() self.Wx = {"I": None, "F": None, "U": None, "O": None} self.Wh = {"I": None, "F": None, "U": None, "O": None} self.B = {"I": None, "F": None, "U": None, "O": None} for k in ["I", "F", "U", "O"]: self.Wx[k] = np.random.uniform(-1, 1, (input_shape, units)).astype(dtype) self.Wh[k] = np.random.uniform(-1, 1, (units, units)).astype(dtype) self.B[k] = np.random.uniform(-1, 1, 1).astype(dtype)
def trainXORWithTanh(self, training, stopCondition): logicStatements = num.array([[0, 0], [0, 1], [1, 0], [1, 1]]) expectedOutput = num.array([[0], [1], [1], [0]]) S = [logicStatements.shape[1], 3, expectedOutput.shape[1]] model = MultiPerceptron(activation=Tanh(), weightsInitializer=ZeroInitializer()) model.configureLayers(S) training.executeOn(model=model, input=logicStatements, target=expectedOutput, learningRate=0.1, stopCondition=stopCondition) return model
class Dense: def __init__(self,input_size,layer_size,act_func=None): self.layer_size = layer_size self.input_size = input_size self.W = np.random.normal(loc=0,scale=1.0,size=(self.input_size,self.layer_size)) self.b = np.zeros(shape=(1,self.layer_size)) + 0.1 self.h = None #output self.z = None self.grad_act_z = None self.delta = None self.grad_w = np.zeros(shape=self.W.shape) self.grad_b = np.zeros(shape=self.b.shape) self.grad_w_count = 0 self.grad_b_count = 0 if act_func!=None: self.act_func = act_func else: self.act_func = Tanh() def eval_z(self,input_h): #Test input shape if input_h.shape[1]!=self.input_size: raise ValueError("input shape : %s expected (%d,1)" %(str(input_h.shape),self.input_size)) self.z = np.matmul(input_h,self.W) + self.b # Test z shape if self.z.shape[1] != self.layer_size: raise ValueError("input shape : %s expected (%d,1)" % (str(self.z.shape), self.layer_size)) self.grad_act_z = self.act_func.eval_grad(self.z) def eval_h(self,input_h): self.eval_z(input_h) self.h = self.act_func.eval(self.z) def set_delta(self,delta): self.delta = delta """ delta^(l-1) = [ (W^l)^T delta^l ] grad(act(z^(l-1))) """ def eval_delta_back(self,grad_act_z_back): if self.delta is None: raise ValueError("self.delta is None") if self.grad_act_z is None: raise ValueError("self.grad_act_z is None") return np.matmul(self.delta,np.transpose(self.W)) * grad_act_z_back def accum_grad(self,input_h): self.grad_w += np.matmul(np.transpose(input_h),self.delta) self.grad_b += self.delta self.grad_w_count += 1 self.grad_b_count += 1 def update_w(self,lr): self.W = self.W - self.grad_w*lr/self.grad_w_count self.b = self.b - self.grad_b*lr/self.grad_b_count self.grad_w = np.zeros(shape=self.W.shape) self.grad_b = np.zeros(shape=self.b.shape) self.grad_w_count = 0 self.grad_b_count = 0
def __init__(self, arg): super(LSTM, self).__init__() self.input_dim = arg["input_dim"] self.hidden_dim = arg["hidden_dim"] self.output_dim = arg["output_dim"] self.w_forget = 2*np.random.random((self.hidden_dim, self.input_dim+self.hidden_dim)) - 1 self.w_memory_weight = 2*np.random.random((self.hidden_dim, self.input_dim+self.hidden_dim)) - 1 self.w_memory_content = 2*np.random.random((self.hidden_dim, self.input_dim+self.hidden_dim)) - 1 self.w_output = 2*np.random.random((self.hidden_dim, self.input_dim+self.hidden_dim)) - 1 self.w_predict = 2*np.random.random((self.output_dim, self.hidden_dim)) - 1 self.activation_forget = arg["activation_forget"]() if "activation_forget" in arg else Sigmoid() self.activation_memory_weight = arg["activation_memory_weight"]() if "activation_memory_weight" in arg else Sigmoid() self.activation_memory_content = arg["activation_memory_content"]() if "activation_memory_content" in arg else Tanh() self.activation_output_weight = arg["activation_output_weight"]() if "activation_output_weight" in arg else Sigmoid() self.activation_output_content = arg["activation_output_content"]() if "activation_output_content" in arg else Tanh()
def test_evaluateDerivativeOnVectors(self): tanh = Tanh() vector = num.array([1, 2, 3]) num.testing.assert_array_equal(tanh.derivative(vector), num.array([0, -3, -8]))
from activation import Tanh from gate import AddGate, MultiplyGate import numpy as np mulGate = MultiplyGate() addGate = AddGate() activation = Tanh() class RNNLayer: def forward(self, x, prev_s, U, W, V): self.mulu = mulGate.forward(U, x) self.mulw = mulGate.forward(W, prev_s) self.add = addGate.forward(self.mulw, self.mulu) self.s = activation.forward(self.add) self.mulv = mulGate.forward(V, self.s) def backward(self, x, prev_s, U, W, V, diff_s, dmulv, forward=True): if forward: self.forward(x, prev_s, U, W, V) dV, dsv = mulGate.backward(V, self.s, dmulv) ds = dsv + diff_s dadd = activation.backward(self.add, ds) dmulw, dmulu = addGate.backward(self.mulw, self.mulu, dadd) dW, dprev_s = mulGate.backward(W, prev_s, dmulw) dU, dx = mulGate.backward(U, x, dmulu) return (dprev_s, dU, dW, dV) def backward1(self, x, prev_s, U, W, V, delta1, dmulv, forward=True): if forward: self.forward(x, prev_s, U, W, V)
def test_evaluateOnVectors(self): tanh = Tanh() vector = num.array([1, 2, 3]) num.testing.assert_array_equal(tanh(vector), num.tanh(vector))
def test_evaluateDerivativeOnNumbers(self): tanh = Tanh() self.assertEqual(tanh.derivative(1), 0) self.assertEqual(tanh.derivative(2), -3)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import scipy as sp from activation import Sigmoid, Softmax, Tanh ACTIVATION_MAP = {'tanh': Tanh(), 'sigmoid': Sigmoid(), 'softmax': Softmax()} class LSTMLayer(object): def __init__(self, activation='tanh'): self.activation = activation self.a = None self.h = None self.y = None def activate(self, x): return ACTIVATION_MAP[self.activation].eval(x) def backward(self): return ACTIVATION_MAP[self.activation].gradient(self.a) def loss(self, t): return ACTIVATION_MAP[self.activation].loss(t, self.y) class HiddenLayer(LSTMLayer): def __init__(self, hidden_size=10, gate_activation='sigmoid',
def test_evaluateOnNumbers(self): tanh = Tanh() self.assertEqual(tanh(1), num.tanh(1)) self.assertEqual(tanh(0.75), num.tanh(0.75))
from activation import Tanh from gate import AddGate, MultiplyGate mulgate = MultiplyGate() addgate = AddGate() tanh = Tanh() class RNNLayer: def foward(self, x, prev_a, waa, wax, wya): self.mulax = mulgate.forward(wax, x) self.mulaa = mulgate.forward(waa, prev_a) self.add = addgate.forward(self.mulax, self.mulaa) self.a = tanh.forward(self.add) self.mulya = mulgate.forward(wya, a) ## dmulya = y^t - yt ## dV = (y^t - yt) * at def backward(self, x, prev_a, waa, wax, wya, diff_a, dmulya): self.forward(x, prev_a, waa, wax, wya) dV, dav = mulgate.backward(wya, self.a, dmulya) da = dav + diff_a
def main(): # generate data and translate labels train_features, train_targets = generate_all_datapoints_and_labels() test_features, test_targets = generate_all_datapoints_and_labels() train_labels, test_labels = convert_labels(train_targets), convert_labels(test_targets) print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('Model: Linear + ReLU + Linear +ReLU + Linear + ReLU + Linear + Tanh') print('Loss: MSE') print('Optimizer: SGD') print('*************************************************************************') print('Training') print('*************************************************************************') # build network, loss and optimizer for Model 1 my_model_design_1=[Linear(2,25), ReLU(), Linear(25,25), Dropout(p=0.5), ReLU(), Linear(25,25), ReLU(),Linear(25,2),Tanh()] my_model_1=Sequential(my_model_design_1) optimizer_1=SGD(my_model_1,lr=1e-3) criterion_1=LossMSE() # train Model 1 batch_size=1 for epoch in range(50): temp_train_loss_sum=0. temp_test_loss_sum=0. num_train_correct=0 num_test_correct=0 # trained in batch-fashion: here batch size = 1 for temp_batch in range(0,len(train_features), batch_size): temp_train_features=train_features.narrow(0, temp_batch, batch_size) temp_train_labels=train_labels.narrow(0, temp_batch, batch_size) for i in range(batch_size): # clean parameter gradient before each batch optimizer_1.zero_grad() temp_train_feature=temp_train_features[i] temp_train_label=temp_train_labels[i] # forward pass to compute loss temp_train_pred=my_model_1.forward(temp_train_feature) temp_train_loss=criterion_1.forward(temp_train_pred,temp_train_label) temp_train_loss_sum+=temp_train_loss _, temp_train_pred_cat=torch.max(temp_train_pred,0) _, temp_train_label_cat=torch.max(temp_train_label,0) if temp_train_pred_cat==temp_train_label_cat: num_train_correct+=1 # calculate gradient according to loss gradient temp_train_loss_grad=criterion_1.backward(temp_train_pred,temp_train_label) # accumulate parameter gradient in each batch my_model_1.backward(temp_train_loss_grad) # update parameters by optimizer optimizer_1.step() # evaluate the current model on testing set # only forward pass is implemented for i_test in range(len(test_features)): temp_test_feature=test_features[i_test] temp_test_label=test_labels[i_test] temp_test_pred=my_model_1.forward(temp_test_feature) temp_test_loss=criterion_1.forward(temp_test_pred,temp_test_label) temp_test_loss_sum+=temp_test_loss _, temp_test_pred_cat=torch.max(temp_test_pred,0) _, temp_test_label_cat=torch.max(temp_test_label,0) if temp_test_pred_cat==temp_test_label_cat: num_test_correct+=1 temp_train_loss_mean=temp_train_loss_sum/len(train_features) temp_test_loss_mean=temp_test_loss_sum/len(test_features) temp_train_accuracy=num_train_correct/len(train_features) temp_test_accuracy=num_test_correct/len(test_features) print("Epoch: {}/{}..".format(epoch+1, 50), "Training Loss: {:.4f}..".format(temp_train_loss_mean), "Training Accuracy: {:.4f}..".format(temp_train_accuracy), "Validation/Test Loss: {:.4f}..".format(temp_test_loss_mean), "Validation/Test Accuracy: {:.4f}..".format(temp_test_accuracy), ) # # visualize the classification performance of Model 1 on testing set test_pred_labels_1=[] for i in range(1000): temp_test_feature=test_features[i] temp_test_label=test_labels[i] temp_test_pred=my_model_1.forward(temp_test_feature) _, temp_train_pred_cat=torch.max(temp_test_pred,0) if test_targets[i].int() == temp_train_pred_cat.int(): test_pred_labels_1.append(int(test_targets[i])) else: test_pred_labels_1.append(2) fig,axes = plt.subplots(1,1,figsize=(6,6)) axes.scatter(test_features[:,0], test_features[:,1], c=test_pred_labels_1) axes.set_title('Classification Performance of Model 1') plt.show() print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('Model: Linear + ReLU + Linear + Dropout+ SeLU + Linear + Dropout + ReLU + Linear + Sigmoid') print('Loss: Cross Entropy') print('Optimizer: Adam') print('*************************************************************************') print('Training') print('*************************************************************************') # build network, loss function and optimizer for Model 2 my_model_design_2=[Linear(2,25), ReLU(), Linear(25,25), Dropout(p=0.5), SeLU(), Linear(25,25),Dropout(p=0.5), ReLU(),Linear(25,2), Sigmoid()] my_model_2=Sequential(my_model_design_2) optimizer_2=Adam(my_model_2,lr=1e-3) criterion_2=CrossEntropy() # train Model 2 batch_size=1 epoch=0 while(epoch<25): temp_train_loss_sum=0. temp_test_loss_sum=0. num_train_correct=0 num_test_correct=0 # trained in batch-fashion: here batch size = 1 for temp_batch in range(0,len(train_features), batch_size): temp_train_features=train_features.narrow(0, temp_batch, batch_size) temp_train_labels=train_labels.narrow(0, temp_batch, batch_size) for i in range(batch_size): # clean parameter gradient before each batch optimizer_2.zero_grad() temp_train_feature=temp_train_features[i] temp_train_label=temp_train_labels[i] # forward pass to compute loss temp_train_pred=my_model_2.forward(temp_train_feature) temp_train_loss=criterion_2.forward(temp_train_pred,temp_train_label) temp_train_loss_sum+=temp_train_loss _, temp_train_pred_cat=torch.max(temp_train_pred,0) _, temp_train_label_cat=torch.max(temp_train_label,0) if temp_train_pred_cat==temp_train_label_cat: num_train_correct+=1 # calculate gradient according to loss gradient temp_train_loss_grad=criterion_2.backward(temp_train_pred,temp_train_label) ''' if (not temp_train_loss_grad[0]>=0) and (not temp_train_loss_grad[0]<0): continue ''' # accumulate parameter gradient in each batch my_model_2.backward(temp_train_loss_grad) # update parameters by optimizer optimizer_2.step() # evaluate the current model on testing set # only forward pass is implemented for i_test in range(len(test_features)): temp_test_feature=test_features[i_test] temp_test_label=test_labels[i_test] temp_test_pred=my_model_2.forward(temp_test_feature) temp_test_loss=criterion_2.forward(temp_test_pred,temp_test_label) temp_test_loss_sum+=temp_test_loss _, temp_test_pred_cat=torch.max(temp_test_pred,0) _, temp_test_label_cat=torch.max(temp_test_label,0) if temp_test_pred_cat==temp_test_label_cat: num_test_correct+=1 temp_train_loss_mean=temp_train_loss_sum/len(train_features) temp_test_loss_mean=temp_test_loss_sum/len(test_features) temp_train_accuracy=num_train_correct/len(train_features) temp_test_accuracy=num_test_correct/len(test_features) # in case there is gradient explosion problem, initiliza model again and restart training # but the situation seldom happens if (not temp_train_loss_grad[0]>=0) and (not temp_train_loss_grad[0]<0): epoch=0 my_model_design_2=[Linear(2,25), ReLU(), Linear(25,25), Dropout(p=0.5), ReLU(), Linear(25,25),Dropout(p=0.5), ReLU(),Linear(25,2),Sigmoid()] my_model_2=Sequential(my_model_design_2) optimizer_2=Adam(my_model_2,lr=1e-3) criterion_2=CrossEntropy() print('--------------------------------------------------------------------------------') print('--------------------------------------------------------------------------------') print('--------------------------------------------------------------------------------') print('--------------------------------------------------------------------------------') print('--------------------------------------------------------------------------------') print('Restart training because of gradient explosion') continue print("Epoch: {}/{}..".format(epoch+1, 25), "Training Loss: {:.4f}..".format(temp_train_loss_mean), "Training Accuracy: {:.4f}..".format(temp_train_accuracy), "Validation/Test Loss: {:.4f}..".format(temp_test_loss_mean), "Validation/Test Accuracy: {:.4f}..".format(temp_test_accuracy), ) epoch+=1 # visualize the classification performance of Model 2 on testing set test_pred_labels_2=[] for i in range(1000): temp_test_feature=test_features[i] temp_test_label=test_labels[i] temp_test_pred=my_model_2.forward(temp_test_feature) _, temp_train_pred_cat=torch.max(temp_test_pred,0) if test_targets[i].int() == temp_train_pred_cat.int(): test_pred_labels_2.append(int(test_targets[i])) else: test_pred_labels_2.append(2) fig,axes = plt.subplots(1,1,figsize=(6,6)) axes.scatter(test_features[:,0], test_features[:,1], c=test_pred_labels_2) axes.set_title('Classification Performance of Model 2') plt.show()
from activation import Tanh, ReLU, LeakyReLU, PReLU from optimizer import SGD from loss import MSELoss, CrossEntropyLoss from utils import gen_disc_set, plot_dataset, build_CV_sets, standardise_input, train, test if __name__ == '__main__': lr = 0.01 k_fold = 10 CV_sets = build_CV_sets(k_fold, 1000) print('CV sets built.') test_input, test_target = gen_disc_set(1000) for criterion in [MSELoss(), CrossEntropyLoss()]: for mini_batch_size in [20]: for activation in [Tanh()]: print('***') print('Criterion: {}, mini_batch_size: {}, activation: {}.'. format(criterion.name(), mini_batch_size, activation.name())) print('***') training_time_acc = [] test_error_acc = [] for i in tqdm(range(k_fold), leave=False): torch.manual_seed(2019) model = Sequential([ Linear(2, 25, activation.name()), activation, Linear(25, 25, activation.name()), activation,
class LSTM(Layer): def __init__(self, units=0, input_shape=0, dtype=np.float32, gate_act=Sigmoid): super(LSTM, self).__init__(input_shape, units) self.gate_acts = { "I": gate_act(), "F": gate_act(), "O": gate_act(), "U": Tanh() } self.act_tanh = Tanh() self.Wx = {"I": None, "F": None, "U": None, "O": None} self.Wh = {"I": None, "F": None, "U": None, "O": None} self.B = {"I": None, "F": None, "U": None, "O": None} for k in ["I", "F", "U", "O"]: self.Wx[k] = np.random.uniform(-1, 1, (input_shape, units)).astype(dtype) self.Wh[k] = np.random.uniform(-1, 1, (units, units)).astype(dtype) self.B[k] = np.random.uniform(-1, 1, 1).astype(dtype) def configure(self, data_shape, phase, prevLayer=None): self.batch = data_shape[0] for k in self.gate_acts: self.gate_acts[k].configure(data_shape, phase, prevLayer) self.act_tanh.configure(data_shape, phase, prevLayer) self.optimizers = [] for i in range(8): self.optimizers.append(copy.deepcopy(self.optimizer)) self.buff = { "C": None, "C_1": None, "H": None, "H_1": None, "I": None, "F": None, "U": None, "O": None, "X": None } for k in self.buff: self.buff[k] = np.zeros((self.batch, self.units)) self.X = np.zeros((self.batch, self.input_shape), dtype=self.dtype) def forward(self, x): self.X[:] = x for k in ["I", "F", "O", "U"]: self.buff[k] = self.gate_acts[k].forward( self.X.dot(self.Wx[k]) + self.buff["H_1"].dot(self.Wh[k]) + self.B[k]) self.buff["C"] = self.buff["I"] * self.buff["C_1"] + self.buff[ "U"] * self.buff["I"] self.Ctanh = self.act_tanh.forward(self.buff["C"]) self.buff["H"] = self.Ctanh * self.buff["O"] self.buff["C_1"] = self.buff["C"] self.buff["H_1"] = self.buff["H"] return self.buff["H"] def backward(self, e): delta = {} delta["C"] = self.act_tanh.backward(e) * self.buff["O"] delta["C_1"] = delta["C"] * self.buff["F"] delta["O"] = self.gate_acts["O"].backward(e) * self.Ctanh delta["I"] = self.gate_acts["I"].backward(delta["C"]) * self.buff["U"] delta["U"] = self.gate_acts["U"].backward(delta["C"]) * self.buff["I"] delta["F"] = self.gate_acts["F"].backward( delta["C"]) * self.buff["C_1"] delta["H"] = delta["I"].dot(self.Wh["I"].T) + delta["O"].dot( self.Wh["O"].T) + delta["U"].dot(self.Wh["U"].T) + delta["F"].dot( self.Wh["F"].T) #update for i, k in enumerate(["I", "F", "U", "O"]): np.subtract( self.Wx[k], self.optimizers[i](np.sum(np.einsum( "bi,bj->bij", self.X, self.learning_rate * delta[k]), axis=0)) / self.batch, self.Wx[k]) np.subtract( self.Wh[k], self.optimizers[4 + i](np.sum( np.einsum("bi,bj->bij", self.buff["H_1"], self.learning_rate * delta[k]), axis=0)) / self.batch, self.Wh[k]) self.B[k] -= np.sum(self.learning_rate * delta[k]) return delta["H"]
class TestMultiPerceptron(unittest.TestCase): tanh = Tanh() def assertArrayEqual(self, array1, array2): num.testing.assert_array_equal(array1, array2) def test_initialization(self): model = MultiPerceptron(activation=self.tanh, weightsInitializer=zeroInitializer) self.assertEqual(model._activation, self.tanh) self.assertEqual(model._weightsInitializer, zeroInitializer) self.assertTrue(model._useBias) self.assertEqual(len(model._weights), 0) self.assertEqual(len(model._layerOutputs), 0) def test_addLayers(self): model = MultiPerceptron(activation=self.tanh, weightsInitializer=zeroInitializer) model.addLayer(3, 2) self.assertEqual(len(model._weights), 1) self.assertEqual(len(model._layerOutputs), 1) self.assertEqual(model._weights[0].shape, (4, 2)) self.assertArrayEqual(model._weights[0], num.array([[0, 0], [0, 0], [0, 0], [0, 0]])) self.assertArrayEqual(model._layerOutputs[0], [None]) model.addLayer(2, 1) self.assertEqual(len(model._weights), 2) self.assertEqual(len(model._layerOutputs), 2) self.assertEqual(model._weights[0].shape, (4, 2)) self.assertEqual(model._weights[1].shape, (3, 1)) self.assertArrayEqual(model._weights[0], num.array([[0, 0], [0, 0], [0, 0], [0, 0]])) self.assertArrayEqual(model._weights[1], num.array([[0], [0], [0]])) self.assertArrayEqual(model._layerOutputs[0], [None]) self.assertArrayEqual(model._layerOutputs[1], [None]) def test_addLayersAtOnce(self): model = MultiPerceptron(activation=self.tanh, weightsInitializer=zeroInitializer) model.configureLayers([3, 2, 1]) self.assertEqual(len(model._weights), 2) self.assertEqual(len(model._layerOutputs), 3) self.assertEqual(model._weights[0].shape, (4, 2)) self.assertEqual(model._weights[1].shape, (3, 1)) self.assertArrayEqual(model._weights[0], num.array([[0, 0], [0, 0], [0, 0], [0, 0]])) self.assertArrayEqual(model._weights[1], num.array([[0], [0], [0]])) self.assertArrayEqual(model._layerOutputs[0], [None]) self.assertArrayEqual(model._layerOutputs[1], [None]) self.assertArrayEqual(model._layerOutputs[2], [None]) def test_propagateForward(self): S = [3, 2, 1] Xh = num.array([[1, 0, 1]]) model = MultiPerceptron(activation=self.tanh, weightsInitializer=zeroInitializer) model.configureLayers(S) Y = model.propagateForward(Xh) W1 = zeroInitializer(S[0] + 1, S[1]) W2 = zeroInitializer(S[1] + 1, S[2]) Y0 = num.zeros((1, S[0] + 1)) Y1 = num.zeros((1, S[1] + 1)) Y2 = num.zeros((1, S[2])) Y0[:] = bias_add(Xh) Y1[:] = bias_add(num.tanh(num.dot(Y0, W1))) Y2[:] = num.tanh(num.dot(Y1, W2)) self.assertArrayEqual(model._layerOutputs[0], Y0) self.assertArrayEqual(model._layerOutputs[1], Y1) self.assertArrayEqual(model._layerOutputs[2], Y2) self.assertArrayEqual(Y, Y2) def test_propagateBackwards(self): S = [3, 2, 1] Xh = num.array([[1, 0, 1]]) Zh = num.array([[1]]) lr = 0.1 model = MultiPerceptron(activation=self.tanh, weightsInitializer=zeroInitializer) model.configureLayers(S) model.propagateForward(Xh) W1 = zeroInitializer(S[0] + 1, S[1]) W2 = zeroInitializer(S[1] + 1, S[2]) self.assertArrayEqual(model._weights[0], W1) self.assertArrayEqual(model._weights[1], W2) E = model.propagateBackwards(Zh, lr) Y0 = num.zeros((1, S[0] + 1)) Y1 = num.zeros((1, S[1] + 1)) Y2 = num.zeros((1, S[2])) Y0[:] = bias_add(Xh) Y1[:] = bias_add(num.tanh(num.dot(Y0, W1))) Y2[:] = num.tanh(num.dot(Y1, W2)) dW1 = num.zeros_like(W1) dW2 = num.zeros_like(W2) E2 = Zh - Y2 dY2 = 1 - num.square(Y2) D2 = E2 * dY2 dW2 += lr * num.dot(Y1.T, D2) E1 = num.dot(D2, W2.T) dY1 = 1 - num.square(Y1) D1 = bias_sub(E1 * dY1) dW1 += lr * num.dot(Y0.T, D1) W1 += dW1 W2 += dW2 self.assertArrayEqual(E, E2) self.assertArrayEqual(model._layerOutputs[0], Y0) self.assertArrayEqual(model._layerOutputs[1], Y1) self.assertArrayEqual(model._layerOutputs[2], Y2) self.assertArrayEqual(model._weights[0], W1) self.assertArrayEqual(model._weights[1], W2) def test_propagateBackwardsWithoutUpdatingWeights(self): S = [3, 2, 1] Xh = num.array([[1, 0, 1]]) Zh = num.array([[1]]) lr = 0.1 model = MultiPerceptron(activation=self.tanh, weightsInitializer=zeroInitializer) model.configureLayers(S) model.propagateForward(Xh) W1 = zeroInitializer(S[0] + 1, S[1]) W2 = zeroInitializer(S[1] + 1, S[2]) self.assertArrayEqual(model._weights[0], W1) self.assertArrayEqual(model._weights[1], W2) E = model.propagateBackwards(Zh, lr, updateWeights=False) Y0 = num.zeros((1, S[0] + 1)) Y1 = num.zeros((1, S[1] + 1)) Y2 = num.zeros((1, S[2])) Y0[:] = bias_add(Xh) Y1[:] = bias_add(num.tanh(num.dot(Y0, W1))) Y2[:] = num.tanh(num.dot(Y1, W2)) E2 = Zh - Y2 self.assertArrayEqual(E, E2) self.assertArrayEqual(model._layerOutputs[0], Y0) self.assertArrayEqual(model._layerOutputs[1], Y1) self.assertArrayEqual(model._layerOutputs[2], Y2) self.assertArrayEqual(model._weights[0], W1) self.assertArrayEqual(model._weights[1], W2) def test_summary(self): model = MultiPerceptron(activation=self.tanh, weightsInitializer=zeroInitializer) model.configureLayers([2, 3, 1]) expectedSummary = """Activation: Tanh With Bias: True Layers: 2 Weights: [(3, 3), (4, 1)] Trainable params: 13""" self.assertEqual(model.summary(), expectedSummary)
def test_description(self): tanh = Tanh() self.assertEqual(tanh.description(), 'Tanh')
# Build network num_hidden = 3 weight_init = 'pytorch_default' bias_init = 'zero' layers = [] linear = Linear(2, 25, weight_init=weight_init, bias_init=bias_init) layers.append(linear) layers.append(Relu()) for i in range(num_hidden - 1): layers.append(Linear(25, 25, weight_init=weight_init, bias_init=bias_init)) layers.append(Relu()) layers.append(Linear(25, 2, weight_init=weight_init, bias_init=bias_init)) layers.append(Tanh()) net_2layer = Network(layers, train_input.shape[0]) # Choose loss mse = MSE() # Choose parameters lr = 0.05 num_iter = 1000 timesteps = [] loss_at_timesteps = [] # Train model