Beispiel #1
0
def plot_activations():
    '''This function plots all the activation functions implemented.'''
    fig, ax = plt.subplots(1, 1, figsize=(5, 5))
    x_range = np.arange(-3, 3, 0.01)
    x = torch.Tensor(x_range)
    tanh = Tanh()
    plt.plot(x_range,
             tanh.forward(x).numpy(),
             color='b',
             label='Tanh',
             alpha=0.5)
    plt.plot(x_range,
             tanh.backward(1).numpy(),
             color='b',
             label='Tanh derivative',
             alpha=0.5,
             linestyle=':')
    relu = ReLU()
    plt.plot(x_range,
             relu.forward(x).numpy(),
             color='g',
             label='ReLU (0)',
             alpha=0.5)
    plt.plot(x_range,
             relu.backward(1).numpy(),
             color='g',
             label='ReLU derivative',
             alpha=0.5,
             linestyle=':')
    leakyrelu = LeakyReLU()
    plt.plot(x_range,
             leakyrelu.forward(x).numpy(),
             color='m',
             label='LeakyReLU (0.01)',
             alpha=0.5)
    plt.plot(x_range,
             leakyrelu.backward(1).numpy(),
             color='m',
             label='LeakyReLU derivative',
             alpha=0.5,
             linestyle=':')
    prelu = PReLU(init=0.1)
    plt.plot(x_range,
             prelu.forward(x).numpy(),
             color='y',
             label='PReLU',
             alpha=0.5)
    plt.plot(x_range,
             prelu.backward(1).numpy(),
             color='y',
             label='PReLU derivative (0.1 - trainable)',
             alpha=0.5,
             linestyle=':')
    plt.legend(framealpha=1)
    plt.tight_layout()
    plt.savefig('figures/activations.png', dpi=300)
    plt.show()
Beispiel #2
0
    def __init__(self,input_size,layer_size,act_func=None):
        self.layer_size = layer_size
        self.input_size = input_size
        self.W = np.random.normal(loc=0,scale=1.0,size=(self.input_size,self.layer_size))
        self.b = np.zeros(shape=(1,self.layer_size)) + 0.1
        self.h = None #output
        self.z = None
        self.grad_act_z = None
        self.delta = None
        self.grad_w = np.zeros(shape=self.W.shape)
        self.grad_b = np.zeros(shape=self.b.shape)
        self.grad_w_count = 0
        self.grad_b_count = 0


        if act_func!=None:
            self.act_func = act_func
        else:
            self.act_func = Tanh()
Beispiel #3
0
 def __init__(self,
              units=0,
              input_shape=0,
              dtype=np.float32,
              gate_act=Sigmoid):
     super(LSTM, self).__init__(input_shape, units)
     self.gate_acts = {
         "I": gate_act(),
         "F": gate_act(),
         "O": gate_act(),
         "U": Tanh()
     }
     self.act_tanh = Tanh()
     self.Wx = {"I": None, "F": None, "U": None, "O": None}
     self.Wh = {"I": None, "F": None, "U": None, "O": None}
     self.B = {"I": None, "F": None, "U": None, "O": None}
     for k in ["I", "F", "U", "O"]:
         self.Wx[k] = np.random.uniform(-1, 1,
                                        (input_shape, units)).astype(dtype)
         self.Wh[k] = np.random.uniform(-1, 1, (units, units)).astype(dtype)
         self.B[k] = np.random.uniform(-1, 1, 1).astype(dtype)
    def trainXORWithTanh(self, training, stopCondition):
        logicStatements = num.array([[0, 0], [0, 1], [1, 0], [1, 1]])
        expectedOutput = num.array([[0], [1], [1], [0]])

        S = [logicStatements.shape[1], 3, expectedOutput.shape[1]]

        model = MultiPerceptron(activation=Tanh(),
                                weightsInitializer=ZeroInitializer())
        model.configureLayers(S)

        training.executeOn(model=model,
                           input=logicStatements,
                           target=expectedOutput,
                           learningRate=0.1,
                           stopCondition=stopCondition)

        return model
Beispiel #5
0
class Dense:
    def __init__(self,input_size,layer_size,act_func=None):
        self.layer_size = layer_size
        self.input_size = input_size
        self.W = np.random.normal(loc=0,scale=1.0,size=(self.input_size,self.layer_size))
        self.b = np.zeros(shape=(1,self.layer_size)) + 0.1
        self.h = None #output
        self.z = None
        self.grad_act_z = None
        self.delta = None
        self.grad_w = np.zeros(shape=self.W.shape)
        self.grad_b = np.zeros(shape=self.b.shape)
        self.grad_w_count = 0
        self.grad_b_count = 0


        if act_func!=None:
            self.act_func = act_func
        else:
            self.act_func = Tanh()

    def eval_z(self,input_h):

        #Test input shape
        if input_h.shape[1]!=self.input_size:
            raise ValueError("input shape : %s expected (%d,1)"
                             %(str(input_h.shape),self.input_size))

        self.z = np.matmul(input_h,self.W) + self.b

        # Test z shape
        if self.z.shape[1] != self.layer_size:
            raise ValueError("input shape : %s expected (%d,1)"
                             % (str(self.z.shape), self.layer_size))

        self.grad_act_z = self.act_func.eval_grad(self.z)

    def eval_h(self,input_h):
        self.eval_z(input_h)
        self.h = self.act_func.eval(self.z)

    def set_delta(self,delta):
        self.delta = delta

    """
    delta^(l-1) = [ (W^l)^T delta^l ] grad(act(z^(l-1)))
    """
    def eval_delta_back(self,grad_act_z_back):
        if self.delta is None:
            raise ValueError("self.delta is None")

        if self.grad_act_z is None:
            raise ValueError("self.grad_act_z is None")

        return np.matmul(self.delta,np.transpose(self.W)) * grad_act_z_back

    def accum_grad(self,input_h):
        self.grad_w += np.matmul(np.transpose(input_h),self.delta)
        self.grad_b += self.delta
        self.grad_w_count += 1
        self.grad_b_count += 1


    def update_w(self,lr):
        self.W = self.W - self.grad_w*lr/self.grad_w_count
        self.b = self.b - self.grad_b*lr/self.grad_b_count
        self.grad_w = np.zeros(shape=self.W.shape)
        self.grad_b = np.zeros(shape=self.b.shape)
        self.grad_w_count = 0
        self.grad_b_count = 0
Beispiel #6
0
	def __init__(self, arg):
		super(LSTM, self).__init__()

		self.input_dim 	= arg["input_dim"]
		self.hidden_dim = arg["hidden_dim"]
		self.output_dim = arg["output_dim"]

		self.w_forget 			= 2*np.random.random((self.hidden_dim, self.input_dim+self.hidden_dim)) - 1
		self.w_memory_weight 	= 2*np.random.random((self.hidden_dim, self.input_dim+self.hidden_dim)) - 1
		self.w_memory_content 	= 2*np.random.random((self.hidden_dim, self.input_dim+self.hidden_dim)) - 1
		self.w_output 			= 2*np.random.random((self.hidden_dim, self.input_dim+self.hidden_dim)) - 1
		self.w_predict 			= 2*np.random.random((self.output_dim, self.hidden_dim)) - 1

		self.activation_forget 			= arg["activation_forget"]() 			if "activation_forget" 			in arg else Sigmoid()
		self.activation_memory_weight 	= arg["activation_memory_weight"]() 	if "activation_memory_weight" 	in arg else Sigmoid()
		self.activation_memory_content 	= arg["activation_memory_content"]() 	if "activation_memory_content" 	in arg else Tanh()
		self.activation_output_weight 	= arg["activation_output_weight"]() 	if "activation_output_weight" 	in arg else Sigmoid()
		self.activation_output_content 	= arg["activation_output_content"]() 	if "activation_output_content" 	in arg else Tanh()
Beispiel #7
0
    def test_evaluateDerivativeOnVectors(self):
        tanh = Tanh()

        vector = num.array([1, 2, 3])
        num.testing.assert_array_equal(tanh.derivative(vector),
                                       num.array([0, -3, -8]))
from activation import Tanh
from gate import AddGate, MultiplyGate
import numpy as np


mulGate = MultiplyGate()
addGate = AddGate()
activation = Tanh()

class RNNLayer:
    def forward(self, x, prev_s, U, W, V):
        self.mulu = mulGate.forward(U, x)
        self.mulw = mulGate.forward(W, prev_s)
        self.add = addGate.forward(self.mulw, self.mulu)
        self.s = activation.forward(self.add)
        self.mulv = mulGate.forward(V, self.s)

    def backward(self, x, prev_s, U, W, V, diff_s, dmulv, forward=True):
        if forward:
            self.forward(x, prev_s, U, W, V)
        dV, dsv = mulGate.backward(V, self.s, dmulv)
        ds = dsv + diff_s
        dadd = activation.backward(self.add, ds)
        dmulw, dmulu = addGate.backward(self.mulw, self.mulu, dadd)
        dW, dprev_s = mulGate.backward(W, prev_s, dmulw)
        dU, dx = mulGate.backward(U, x, dmulu)
        return (dprev_s, dU, dW, dV)
    
    def backward1(self, x, prev_s, U, W, V, delta1, dmulv, forward=True):
        if forward:
            self.forward(x, prev_s, U, W, V)
Beispiel #9
0
    def test_evaluateOnVectors(self):
        tanh = Tanh()

        vector = num.array([1, 2, 3])
        num.testing.assert_array_equal(tanh(vector), num.tanh(vector))
Beispiel #10
0
    def test_evaluateDerivativeOnNumbers(self):
        tanh = Tanh()

        self.assertEqual(tanh.derivative(1), 0)
        self.assertEqual(tanh.derivative(2), -3)
Beispiel #11
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import scipy as sp

from activation import Sigmoid, Softmax, Tanh

ACTIVATION_MAP = {'tanh': Tanh(), 'sigmoid': Sigmoid(), 'softmax': Softmax()}


class LSTMLayer(object):
    def __init__(self, activation='tanh'):
        self.activation = activation
        self.a = None
        self.h = None
        self.y = None

    def activate(self, x):
        return ACTIVATION_MAP[self.activation].eval(x)

    def backward(self):
        return ACTIVATION_MAP[self.activation].gradient(self.a)

    def loss(self, t):
        return ACTIVATION_MAP[self.activation].loss(t, self.y)


class HiddenLayer(LSTMLayer):
    def __init__(self,
                 hidden_size=10,
                 gate_activation='sigmoid',
Beispiel #12
0
    def test_evaluateOnNumbers(self):
        tanh = Tanh()

        self.assertEqual(tanh(1), num.tanh(1))
        self.assertEqual(tanh(0.75), num.tanh(0.75))
from activation import Tanh
from gate import AddGate, MultiplyGate

mulgate = MultiplyGate()
addgate = AddGate()
tanh = Tanh()


class RNNLayer:
    def foward(self, x, prev_a, waa, wax, wya):
        self.mulax = mulgate.forward(wax, x)
        self.mulaa = mulgate.forward(waa, prev_a)
        self.add = addgate.forward(self.mulax, self.mulaa)
        self.a = tanh.forward(self.add)
        self.mulya = mulgate.forward(wya, a)

## dmulya = y^t - yt
## dV = (y^t - yt) * at

    def backward(self, x, prev_a, waa, wax, wya, diff_a, dmulya):
        self.forward(x, prev_a, waa, wax, wya)
        dV, dav = mulgate.backward(wya, self.a, dmulya)
        da = dav + diff_a
Beispiel #14
0
def main():
    # generate data and translate labels
    train_features, train_targets = generate_all_datapoints_and_labels()
    test_features, test_targets = generate_all_datapoints_and_labels()
    train_labels, test_labels = convert_labels(train_targets), convert_labels(test_targets)


    print('*************************************************************************')
    print('*************************************************************************')
    print('*************************************************************************')
    print('*************************************************************************')
    print('*************************************************************************')
    print('Model: Linear + ReLU + Linear +ReLU + Linear + ReLU + Linear + Tanh')
    print('Loss: MSE')
    print('Optimizer: SGD')
    print('*************************************************************************')
    print('Training')
    print('*************************************************************************')
    # build network, loss and optimizer for Model 1
    my_model_design_1=[Linear(2,25), ReLU(), Linear(25,25), Dropout(p=0.5), ReLU(),
                       Linear(25,25), ReLU(),Linear(25,2),Tanh()]
    my_model_1=Sequential(my_model_design_1)
    optimizer_1=SGD(my_model_1,lr=1e-3)
    criterion_1=LossMSE()

    # train Model 1
    batch_size=1
    for epoch in range(50):
        temp_train_loss_sum=0.
        temp_test_loss_sum=0.
        num_train_correct=0
        num_test_correct=0
        
        # trained in batch-fashion: here batch size = 1
        for temp_batch in range(0,len(train_features), batch_size):
            temp_train_features=train_features.narrow(0, temp_batch, batch_size)  
            temp_train_labels=train_labels.narrow(0, temp_batch, batch_size)  
            
            for i in range(batch_size):
                # clean parameter gradient before each batch
                optimizer_1.zero_grad()  
                temp_train_feature=temp_train_features[i]
                temp_train_label=temp_train_labels[i]
                
                # forward pass to compute loss
                temp_train_pred=my_model_1.forward(temp_train_feature)
                temp_train_loss=criterion_1.forward(temp_train_pred,temp_train_label)
                temp_train_loss_sum+=temp_train_loss
                
                _, temp_train_pred_cat=torch.max(temp_train_pred,0)
                _, temp_train_label_cat=torch.max(temp_train_label,0)

                
                if temp_train_pred_cat==temp_train_label_cat:
                    num_train_correct+=1
  
                # calculate gradient according to loss gradient
                temp_train_loss_grad=criterion_1.backward(temp_train_pred,temp_train_label)
                # accumulate parameter gradient in each batch
                my_model_1.backward(temp_train_loss_grad)                       
            
            # update parameters by optimizer
            optimizer_1.step()
            
            
        # evaluate the current model on testing set
        # only forward pass is implemented
        for i_test in range(len(test_features)):
            temp_test_feature=test_features[i_test]
            temp_test_label=test_labels[i_test]

            temp_test_pred=my_model_1.forward(temp_test_feature)
            temp_test_loss=criterion_1.forward(temp_test_pred,temp_test_label)
            temp_test_loss_sum+=temp_test_loss

            
            _, temp_test_pred_cat=torch.max(temp_test_pred,0)
            _, temp_test_label_cat=torch.max(temp_test_label,0)

            if temp_test_pred_cat==temp_test_label_cat:
                num_test_correct+=1
            
            
        temp_train_loss_mean=temp_train_loss_sum/len(train_features)
        temp_test_loss_mean=temp_test_loss_sum/len(test_features)
        
        temp_train_accuracy=num_train_correct/len(train_features)
        temp_test_accuracy=num_test_correct/len(test_features)
        
        print("Epoch: {}/{}..".format(epoch+1, 50),
                      "Training Loss: {:.4f}..".format(temp_train_loss_mean),
                      "Training Accuracy: {:.4f}..".format(temp_train_accuracy), 
                      "Validation/Test Loss: {:.4f}..".format(temp_test_loss_mean),
                      "Validation/Test Accuracy: {:.4f}..".format(temp_test_accuracy),  )
        
        
        
    # # visualize the classification performance of Model 1 on testing set
    test_pred_labels_1=[]
    for i in range(1000): 
        temp_test_feature=test_features[i]
        temp_test_label=test_labels[i]

        temp_test_pred=my_model_1.forward(temp_test_feature)

        _, temp_train_pred_cat=torch.max(temp_test_pred,0)
        if test_targets[i].int() == temp_train_pred_cat.int():
            test_pred_labels_1.append(int(test_targets[i]))
        else:
            test_pred_labels_1.append(2)
            
    fig,axes = plt.subplots(1,1,figsize=(6,6))
    axes.scatter(test_features[:,0], test_features[:,1], c=test_pred_labels_1)
    axes.set_title('Classification Performance of Model 1')
    plt.show()
                      
      
    print('*************************************************************************')
    print('*************************************************************************')
    print('*************************************************************************')
    print('*************************************************************************')
    print('*************************************************************************')
    print('Model: Linear + ReLU + Linear + Dropout+ SeLU + Linear + Dropout + ReLU + Linear + Sigmoid')
    print('Loss: Cross Entropy')
    print('Optimizer: Adam')
    print('*************************************************************************')
    print('Training')
    print('*************************************************************************')
    
    # build network, loss function and optimizer for Model 2
    my_model_design_2=[Linear(2,25), ReLU(), Linear(25,25), Dropout(p=0.5), SeLU(),
                       Linear(25,25),Dropout(p=0.5), ReLU(),Linear(25,2),
                       Sigmoid()]
    my_model_2=Sequential(my_model_design_2)
    optimizer_2=Adam(my_model_2,lr=1e-3)
    criterion_2=CrossEntropy()

    # train Model 2
    batch_size=1
    epoch=0
    while(epoch<25):
        temp_train_loss_sum=0.
        temp_test_loss_sum=0.
        num_train_correct=0
        num_test_correct=0
        
        # trained in batch-fashion: here batch size = 1
        for temp_batch in range(0,len(train_features), batch_size):
            temp_train_features=train_features.narrow(0, temp_batch, batch_size)  
            temp_train_labels=train_labels.narrow(0, temp_batch, batch_size)  
            
            for i in range(batch_size):
                # clean parameter gradient before each batch
                optimizer_2.zero_grad()  
                temp_train_feature=temp_train_features[i]
                temp_train_label=temp_train_labels[i]
                
                # forward pass to compute loss
                temp_train_pred=my_model_2.forward(temp_train_feature)
                temp_train_loss=criterion_2.forward(temp_train_pred,temp_train_label)
                temp_train_loss_sum+=temp_train_loss
                
                _, temp_train_pred_cat=torch.max(temp_train_pred,0)
                _, temp_train_label_cat=torch.max(temp_train_label,0)

                
                if temp_train_pred_cat==temp_train_label_cat:
                    num_train_correct+=1
       
                
                # calculate gradient according to loss gradient
                temp_train_loss_grad=criterion_2.backward(temp_train_pred,temp_train_label)
                '''
                if (not temp_train_loss_grad[0]>=0) and (not temp_train_loss_grad[0]<0):
                    continue
                '''
                # accumulate parameter gradient in each batch
                my_model_2.backward(temp_train_loss_grad)     
                
            # update parameters by optimizer
            optimizer_2.step()
            
        # evaluate the current model on testing set
        # only forward pass is implemented
        for i_test in range(len(test_features)):
            temp_test_feature=test_features[i_test]
            temp_test_label=test_labels[i_test]

            temp_test_pred=my_model_2.forward(temp_test_feature)
            temp_test_loss=criterion_2.forward(temp_test_pred,temp_test_label)
            temp_test_loss_sum+=temp_test_loss

            
            _, temp_test_pred_cat=torch.max(temp_test_pred,0)
            _, temp_test_label_cat=torch.max(temp_test_label,0)

            if temp_test_pred_cat==temp_test_label_cat:
                num_test_correct+=1
            
            
        temp_train_loss_mean=temp_train_loss_sum/len(train_features)
        temp_test_loss_mean=temp_test_loss_sum/len(test_features)
        
        temp_train_accuracy=num_train_correct/len(train_features)
        temp_test_accuracy=num_test_correct/len(test_features)
        
        # in case there is gradient explosion problem, initiliza model again and restart training
        # but the situation seldom happens
        if (not temp_train_loss_grad[0]>=0) and (not temp_train_loss_grad[0]<0):
            epoch=0
            my_model_design_2=[Linear(2,25), ReLU(), Linear(25,25), Dropout(p=0.5), ReLU(),
                       Linear(25,25),Dropout(p=0.5), ReLU(),Linear(25,2),Sigmoid()]
            my_model_2=Sequential(my_model_design_2)
            optimizer_2=Adam(my_model_2,lr=1e-3)
            criterion_2=CrossEntropy()
            print('--------------------------------------------------------------------------------')
            print('--------------------------------------------------------------------------------')
            print('--------------------------------------------------------------------------------')
            print('--------------------------------------------------------------------------------')
            print('--------------------------------------------------------------------------------')
            print('Restart training because of gradient explosion')
            continue
        
        print("Epoch: {}/{}..".format(epoch+1, 25),
                      "Training Loss: {:.4f}..".format(temp_train_loss_mean),
                      "Training Accuracy: {:.4f}..".format(temp_train_accuracy), 
                      "Validation/Test Loss: {:.4f}..".format(temp_test_loss_mean),
                      "Validation/Test Accuracy: {:.4f}..".format(temp_test_accuracy),  )
        epoch+=1 
        
    # visualize the classification performance of Model 2 on testing set
    test_pred_labels_2=[]
    for i in range(1000): 
        temp_test_feature=test_features[i]
        temp_test_label=test_labels[i]

        temp_test_pred=my_model_2.forward(temp_test_feature)

        _, temp_train_pred_cat=torch.max(temp_test_pred,0)
        if test_targets[i].int() == temp_train_pred_cat.int():
            test_pred_labels_2.append(int(test_targets[i]))
        else:
            test_pred_labels_2.append(2)
            
    fig,axes = plt.subplots(1,1,figsize=(6,6))
    axes.scatter(test_features[:,0], test_features[:,1], c=test_pred_labels_2)
    axes.set_title('Classification Performance of Model 2')
    plt.show()
Beispiel #15
0
from activation import Tanh, ReLU, LeakyReLU, PReLU
from optimizer import SGD
from loss import MSELoss, CrossEntropyLoss
from utils import gen_disc_set, plot_dataset, build_CV_sets, standardise_input, train, test

if __name__ == '__main__':

    lr = 0.01
    k_fold = 10
    CV_sets = build_CV_sets(k_fold, 1000)
    print('CV sets built.')
    test_input, test_target = gen_disc_set(1000)

    for criterion in [MSELoss(), CrossEntropyLoss()]:
        for mini_batch_size in [20]:
            for activation in [Tanh()]:

                print('***')
                print('Criterion: {}, mini_batch_size: {}, activation: {}.'.
                      format(criterion.name(), mini_batch_size,
                             activation.name()))
                print('***')

                training_time_acc = []
                test_error_acc = []
                for i in tqdm(range(k_fold), leave=False):

                    torch.manual_seed(2019)
                    model = Sequential([
                        Linear(2, 25, activation.name()), activation,
                        Linear(25, 25, activation.name()), activation,
Beispiel #16
0
class LSTM(Layer):
    def __init__(self,
                 units=0,
                 input_shape=0,
                 dtype=np.float32,
                 gate_act=Sigmoid):
        super(LSTM, self).__init__(input_shape, units)
        self.gate_acts = {
            "I": gate_act(),
            "F": gate_act(),
            "O": gate_act(),
            "U": Tanh()
        }
        self.act_tanh = Tanh()
        self.Wx = {"I": None, "F": None, "U": None, "O": None}
        self.Wh = {"I": None, "F": None, "U": None, "O": None}
        self.B = {"I": None, "F": None, "U": None, "O": None}
        for k in ["I", "F", "U", "O"]:
            self.Wx[k] = np.random.uniform(-1, 1,
                                           (input_shape, units)).astype(dtype)
            self.Wh[k] = np.random.uniform(-1, 1, (units, units)).astype(dtype)
            self.B[k] = np.random.uniform(-1, 1, 1).astype(dtype)

    def configure(self, data_shape, phase, prevLayer=None):
        self.batch = data_shape[0]
        for k in self.gate_acts:
            self.gate_acts[k].configure(data_shape, phase, prevLayer)
        self.act_tanh.configure(data_shape, phase, prevLayer)
        self.optimizers = []
        for i in range(8):
            self.optimizers.append(copy.deepcopy(self.optimizer))
        self.buff = {
            "C": None,
            "C_1": None,
            "H": None,
            "H_1": None,
            "I": None,
            "F": None,
            "U": None,
            "O": None,
            "X": None
        }
        for k in self.buff:
            self.buff[k] = np.zeros((self.batch, self.units))
        self.X = np.zeros((self.batch, self.input_shape), dtype=self.dtype)

    def forward(self, x):
        self.X[:] = x

        for k in ["I", "F", "O", "U"]:
            self.buff[k] = self.gate_acts[k].forward(
                self.X.dot(self.Wx[k]) + self.buff["H_1"].dot(self.Wh[k]) +
                self.B[k])
        self.buff["C"] = self.buff["I"] * self.buff["C_1"] + self.buff[
            "U"] * self.buff["I"]
        self.Ctanh = self.act_tanh.forward(self.buff["C"])
        self.buff["H"] = self.Ctanh * self.buff["O"]
        self.buff["C_1"] = self.buff["C"]
        self.buff["H_1"] = self.buff["H"]
        return self.buff["H"]

    def backward(self, e):
        delta = {}
        delta["C"] = self.act_tanh.backward(e) * self.buff["O"]
        delta["C_1"] = delta["C"] * self.buff["F"]
        delta["O"] = self.gate_acts["O"].backward(e) * self.Ctanh
        delta["I"] = self.gate_acts["I"].backward(delta["C"]) * self.buff["U"]
        delta["U"] = self.gate_acts["U"].backward(delta["C"]) * self.buff["I"]
        delta["F"] = self.gate_acts["F"].backward(
            delta["C"]) * self.buff["C_1"]
        delta["H"] = delta["I"].dot(self.Wh["I"].T) + delta["O"].dot(
            self.Wh["O"].T) + delta["U"].dot(self.Wh["U"].T) + delta["F"].dot(
                self.Wh["F"].T)

        #update
        for i, k in enumerate(["I", "F", "U", "O"]):
            np.subtract(
                self.Wx[k], self.optimizers[i](np.sum(np.einsum(
                    "bi,bj->bij", self.X, self.learning_rate * delta[k]),
                                                      axis=0)) / self.batch,
                self.Wx[k])
            np.subtract(
                self.Wh[k], self.optimizers[4 + i](np.sum(
                    np.einsum("bi,bj->bij", self.buff["H_1"],
                              self.learning_rate * delta[k]),
                    axis=0)) / self.batch, self.Wh[k])
            self.B[k] -= np.sum(self.learning_rate * delta[k])

        return delta["H"]
class TestMultiPerceptron(unittest.TestCase):
    tanh = Tanh()

    def assertArrayEqual(self, array1, array2):
        num.testing.assert_array_equal(array1, array2)

    def test_initialization(self):
        model = MultiPerceptron(activation=self.tanh,
                                weightsInitializer=zeroInitializer)

        self.assertEqual(model._activation, self.tanh)
        self.assertEqual(model._weightsInitializer, zeroInitializer)
        self.assertTrue(model._useBias)
        self.assertEqual(len(model._weights), 0)
        self.assertEqual(len(model._layerOutputs), 0)

    def test_addLayers(self):
        model = MultiPerceptron(activation=self.tanh,
                                weightsInitializer=zeroInitializer)
        model.addLayer(3, 2)

        self.assertEqual(len(model._weights), 1)
        self.assertEqual(len(model._layerOutputs), 1)

        self.assertEqual(model._weights[0].shape, (4, 2))

        self.assertArrayEqual(model._weights[0],
                              num.array([[0, 0], [0, 0], [0, 0], [0, 0]]))
        self.assertArrayEqual(model._layerOutputs[0], [None])

        model.addLayer(2, 1)

        self.assertEqual(len(model._weights), 2)
        self.assertEqual(len(model._layerOutputs), 2)

        self.assertEqual(model._weights[0].shape, (4, 2))
        self.assertEqual(model._weights[1].shape, (3, 1))

        self.assertArrayEqual(model._weights[0],
                              num.array([[0, 0], [0, 0], [0, 0], [0, 0]]))
        self.assertArrayEqual(model._weights[1], num.array([[0], [0], [0]]))
        self.assertArrayEqual(model._layerOutputs[0], [None])
        self.assertArrayEqual(model._layerOutputs[1], [None])

    def test_addLayersAtOnce(self):
        model = MultiPerceptron(activation=self.tanh,
                                weightsInitializer=zeroInitializer)
        model.configureLayers([3, 2, 1])

        self.assertEqual(len(model._weights), 2)
        self.assertEqual(len(model._layerOutputs), 3)

        self.assertEqual(model._weights[0].shape, (4, 2))
        self.assertEqual(model._weights[1].shape, (3, 1))

        self.assertArrayEqual(model._weights[0],
                              num.array([[0, 0], [0, 0], [0, 0], [0, 0]]))
        self.assertArrayEqual(model._weights[1], num.array([[0], [0], [0]]))
        self.assertArrayEqual(model._layerOutputs[0], [None])
        self.assertArrayEqual(model._layerOutputs[1], [None])
        self.assertArrayEqual(model._layerOutputs[2], [None])

    def test_propagateForward(self):
        S = [3, 2, 1]
        Xh = num.array([[1, 0, 1]])

        model = MultiPerceptron(activation=self.tanh,
                                weightsInitializer=zeroInitializer)
        model.configureLayers(S)
        Y = model.propagateForward(Xh)

        W1 = zeroInitializer(S[0] + 1, S[1])
        W2 = zeroInitializer(S[1] + 1, S[2])
        Y0 = num.zeros((1, S[0] + 1))
        Y1 = num.zeros((1, S[1] + 1))
        Y2 = num.zeros((1, S[2]))

        Y0[:] = bias_add(Xh)
        Y1[:] = bias_add(num.tanh(num.dot(Y0, W1)))
        Y2[:] = num.tanh(num.dot(Y1, W2))

        self.assertArrayEqual(model._layerOutputs[0], Y0)
        self.assertArrayEqual(model._layerOutputs[1], Y1)
        self.assertArrayEqual(model._layerOutputs[2], Y2)
        self.assertArrayEqual(Y, Y2)

    def test_propagateBackwards(self):
        S = [3, 2, 1]
        Xh = num.array([[1, 0, 1]])
        Zh = num.array([[1]])
        lr = 0.1

        model = MultiPerceptron(activation=self.tanh,
                                weightsInitializer=zeroInitializer)
        model.configureLayers(S)
        model.propagateForward(Xh)

        W1 = zeroInitializer(S[0] + 1, S[1])
        W2 = zeroInitializer(S[1] + 1, S[2])

        self.assertArrayEqual(model._weights[0], W1)
        self.assertArrayEqual(model._weights[1], W2)

        E = model.propagateBackwards(Zh, lr)

        Y0 = num.zeros((1, S[0] + 1))
        Y1 = num.zeros((1, S[1] + 1))
        Y2 = num.zeros((1, S[2]))

        Y0[:] = bias_add(Xh)
        Y1[:] = bias_add(num.tanh(num.dot(Y0, W1)))
        Y2[:] = num.tanh(num.dot(Y1, W2))

        dW1 = num.zeros_like(W1)
        dW2 = num.zeros_like(W2)

        E2 = Zh - Y2
        dY2 = 1 - num.square(Y2)
        D2 = E2 * dY2
        dW2 += lr * num.dot(Y1.T, D2)

        E1 = num.dot(D2, W2.T)
        dY1 = 1 - num.square(Y1)
        D1 = bias_sub(E1 * dY1)
        dW1 += lr * num.dot(Y0.T, D1)

        W1 += dW1
        W2 += dW2

        self.assertArrayEqual(E, E2)
        self.assertArrayEqual(model._layerOutputs[0], Y0)
        self.assertArrayEqual(model._layerOutputs[1], Y1)
        self.assertArrayEqual(model._layerOutputs[2], Y2)
        self.assertArrayEqual(model._weights[0], W1)
        self.assertArrayEqual(model._weights[1], W2)

    def test_propagateBackwardsWithoutUpdatingWeights(self):
        S = [3, 2, 1]
        Xh = num.array([[1, 0, 1]])
        Zh = num.array([[1]])
        lr = 0.1

        model = MultiPerceptron(activation=self.tanh,
                                weightsInitializer=zeroInitializer)
        model.configureLayers(S)
        model.propagateForward(Xh)

        W1 = zeroInitializer(S[0] + 1, S[1])
        W2 = zeroInitializer(S[1] + 1, S[2])

        self.assertArrayEqual(model._weights[0], W1)
        self.assertArrayEqual(model._weights[1], W2)

        E = model.propagateBackwards(Zh, lr, updateWeights=False)

        Y0 = num.zeros((1, S[0] + 1))
        Y1 = num.zeros((1, S[1] + 1))
        Y2 = num.zeros((1, S[2]))

        Y0[:] = bias_add(Xh)
        Y1[:] = bias_add(num.tanh(num.dot(Y0, W1)))
        Y2[:] = num.tanh(num.dot(Y1, W2))

        E2 = Zh - Y2

        self.assertArrayEqual(E, E2)
        self.assertArrayEqual(model._layerOutputs[0], Y0)
        self.assertArrayEqual(model._layerOutputs[1], Y1)
        self.assertArrayEqual(model._layerOutputs[2], Y2)
        self.assertArrayEqual(model._weights[0], W1)
        self.assertArrayEqual(model._weights[1], W2)

    def test_summary(self):
        model = MultiPerceptron(activation=self.tanh,
                                weightsInitializer=zeroInitializer)
        model.configureLayers([2, 3, 1])

        expectedSummary = """Activation: Tanh
With Bias: True
Layers: 2
Weights: [(3, 3), (4, 1)]
Trainable params: 13"""

        self.assertEqual(model.summary(), expectedSummary)
Beispiel #18
0
    def test_description(self):
        tanh = Tanh()

        self.assertEqual(tanh.description(), 'Tanh')
Beispiel #19
0
# Build network

num_hidden = 3
weight_init = 'pytorch_default'
bias_init = 'zero'
layers = []

linear = Linear(2, 25, weight_init=weight_init, bias_init=bias_init)
layers.append(linear)
layers.append(Relu())
for i in range(num_hidden - 1):
    layers.append(Linear(25, 25, weight_init=weight_init, bias_init=bias_init))
    layers.append(Relu())
layers.append(Linear(25, 2, weight_init=weight_init, bias_init=bias_init))
layers.append(Tanh())
net_2layer = Network(layers, train_input.shape[0])

# Choose loss

mse = MSE()

# Choose parameters

lr = 0.05
num_iter = 1000

timesteps = []
loss_at_timesteps = []

# Train model