def test_check_setup(self): sys.stdout.write('FNN_trainer -> Performing check_setup test ... ') sys.stdout.flush() numx.random.seed(42) l1 = FullConnLayer(2 * 2, 3 * 3) l2 = FullConnLayer(3 * 3, 2 * 2) l3 = FullConnLayer(2 * 2, 1 * 1) model = MODEL.Model([l1, l2, l3]) trainer = TRAINER.GDTrainer(model) res = trainer.check_setup( data=numx.arange(4).reshape(1, 4), labels=[ numx.arange(9).reshape(1, 9), numx.arange(4).reshape(1, 4), numx.arange(1).reshape(1, 1) ], epsilon=[0.01, 0.01, 0.01], momentum=[0.09, 0.09, 0.09], reg_L1Norm=[0.0002, 0.0002, 0.0002], reg_L2Norm=[0.0002, 0.0002, 0.0002], corruptor=None, reg_costs=[1.0, 1.0, 1.0], costs=[CFct.SquaredError, CFct.SquaredError, CFct.SquaredError], reg_sparseness=[0.1, 0.1, 0.1], desired_sparseness=[0.01, 0.01, 0.01], costs_sparseness=[ CFct.SquaredError, CFct.SquaredError, CFct.SquaredError ], update_offsets=[0.01, 0.01, 0.01], restrict_gradient=[0.01, 0.01, 0.01], restriction_norm='Mat') assert numx.all(res) print('successfully passed!') sys.stdout.flush()
def test_calculate_cost(self): sys.stdout.write('FNN_trainer -> Performing calculate_cost test ... ') sys.stdout.flush() numx.random.seed(42) l1 = FullConnLayer(2 * 2, 3 * 3) l2 = FullConnLayer(3 * 3, 2 * 2) l3 = FullConnLayer(2 * 2, 1 * 1) model = MODEL.Model([l1, l2, l3]) trainer = TRAINER.GDTrainer(model) model.forward_propagate(numx.arange(4).reshape(1, 4)) cost = model.calculate_cost( [ numx.arange(9).reshape(1, 9), numx.arange(4).reshape(1, 4), numx.arange(1).reshape(1, 1) ], [CFct.SquaredError, CFct.SquaredError, CFct.SquaredError], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [CFct.SquaredError, CFct.SquaredError, CFct.SquaredError], [1.0, 1.0, 1.0]) assert numx.all(numx.abs(cost - 117.72346036) < self.epsilon) cost = model.calculate_cost([ numx.arange(9).reshape(1, 9), numx.arange(4).reshape(1, 4), numx.arange(1).reshape(1, 1) ], [CFct.SquaredError, CFct.SquaredError, CFct.SquaredError], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [None, None, None], [0.0, 0.0, 0.0]) assert numx.all(numx.abs(cost - 108.42118343) < self.epsilon) cost = model.calculate_cost( [None, None, numx.arange(1).reshape(1, 1)], [None, None, CFct.SquaredError], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [None, None, None], [0.0, 0.0, 0.0]) assert numx.all(numx.abs(cost - 0.10778406) < self.epsilon) print('successfully passed!') sys.stdout.flush()
def test___init__(self): sys.stdout.write('FNN_trainer -> Performing init test ... ') sys.stdout.flush() l1 = FullConnLayer(2 * 2, 3 * 3) l2 = FullConnLayer(3 * 3, 2 * 2) l3 = FullConnLayer(2 * 2, 1 * 1) model = MODEL.Model([l1, l2, l3]) trainer = TRAINER.GDTrainer(model) assert numx.all(trainer._old_grad[0][0].shape == (4, 9)) assert numx.all(trainer._old_grad[0][1].shape == (1, 9)) assert numx.all(trainer._old_grad[1][0].shape == (9, 4)) assert numx.all(trainer._old_grad[1][1].shape == (1, 4)) assert numx.all(trainer._old_grad[2][0].shape == (4, 1)) assert numx.all(trainer._old_grad[2][1].shape == (1, 1)) print('successfully passed!') sys.stdout.flush()
def check(self, data, delta, act1, act2, act3, reg_sparseness, desired_sparseness, cost_sparseness, reg_targets, desired_targets, cost_targets, full): connections = None if full is False: connections = generate_2d_connection_matrix( 6, 6, 3, 3, 2, 2, False) model1 = FullConnLayer(6 * 6, 4 * 4, activation_function=act1, initial_weights='AUTO', initial_bias=0.0, initial_offset=0.0, connections=connections, dtype=numx.float64) model2 = FullConnLayer(4 * 4, 5 * 5, activation_function=act2, initial_weights='AUTO', initial_bias=0.0, initial_offset=0.5, dtype=numx.float64) model3 = FullConnLayer(5 * 5, 6 * 6, activation_function=act3, initial_weights='AUTO', initial_bias=0.0, initial_offset=0.5, dtype=numx.float64) model = MODEL.Model([model1, model2, model3]) trainer = TRAINER.GDTrainer(model) _, _, maxw, maxb = model.finit_differences( delta, data, desired_targets, cost_targets, reg_targets, desired_sparseness, cost_sparseness, reg_sparseness) return numx.max([maxw, maxb])
activation_function=ACT.ExponentialLinear(), initial_weights='AUTO', initial_bias=0.0, initial_offset=numx.mean(train_data, axis=0).reshape( 1, train_data.shape[1]), connections=None, dtype=numx.float64) l2 = LAYER.FullConnLayer(input_dim=1000, output_dim=train_label.shape[1], activation_function=ACT.SoftMax(), initial_weights='AUTO', initial_bias=0.0, initial_offset=0.0, connections=None, dtype=numx.float64) model = MODEL.Model([l1, l2]) # Choose an Optimizer trainer = TRAINER.ADAGDTrainer(model) #trainer = TRAINER.GDTrainer(model) # Train model max_epochs = 20 batch_size = 20 eps = 0.1 print 'Training' for epoch in range(1, max_epochs + 1): train_data, train_label = npExt.shuffle_dataset(train_data, train_label) for b in range(0, train_data.shape[0], batch_size): trainer.train( data=train_data[b:b + batch_size, :],
def test_FNN_convergence(self): sys.stdout.write( 'FNN_trainer -> Performing several convergences tests ... ') sys.stdout.flush() numx.random.seed(42) x = numx.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]]) l = numx.array([[1, 0], [0, 1], [0, 1], [1, 0] ]) # 1,0 = 0, 0,1 = 1 otherwise Softmax would not work for act_out in [AFct.SoftMax]: for act_in in [ AFct.SoftSign, AFct.SoftPlus, AFct.Sigmoid, AFct.HyperbolicTangent ]: for cost in [ CFct.CrossEntropyError, CFct.SquaredError, CFct.NegLogLikelihood ]: l1 = FullConnLayer(input_dim=2, output_dim=5, activation_function=act_in, initial_weights='AUTO', initial_bias=0.0, initial_offset=0.5, connections=None) l2 = FullConnLayer(input_dim=5, output_dim=2, activation_function=act_out, initial_weights='AUTO', initial_bias=0.0, initial_offset=0.5, connections=None) model = MODEL.Model([l1, l2]) trainer = TRAINER.ADAGDTrainer(model) for _ in range(1000): trainer.train(data=x, labels=[None, l], epsilon=[0.3, 0.3], reg_L1Norm=[0.000, 0.000], reg_L2Norm=[0.000, 0.000], corruptor=None, reg_costs=[0.0, 1.0], costs=[None, cost], reg_sparseness=[0.0, 0.0], desired_sparseness=[0.0, 0.0], costs_sparseness=[None, None], update_offsets=[0.1, 0.1], restrict_gradient=0.0, restriction_norm='Mat') model.forward_propagate(x) assert numx.all(trainer.calculate_errors(l) == 0) l1 = FullConnLayer(input_dim=2, output_dim=5, activation_function=act_in, initial_weights='AUTO', initial_bias=0.0, initial_offset=0.5, connections=None) l2 = FullConnLayer(input_dim=5, output_dim=2, activation_function=act_out, initial_weights='AUTO', initial_bias=0.0, initial_offset=0.5, connections=None) model = MODEL.Model([l1, l2]) trainer = TRAINER.ADAGDTrainer(model) for _ in range(1000): trainer.train( data=x, labels=[None, l], epsilon=[0.3, 0.3], reg_L1Norm=[0.000, 0.000], reg_L2Norm=[0.000, 0.000], corruptor=None, reg_costs=[0.0, 1.0], costs=[None, cost], reg_sparseness=[0.1, 0.0], desired_sparseness=[0.1, 0.0], costs_sparseness=[CFct.SquaredError, None], update_offsets=[0.1, 0.1], restrict_gradient=0.0, restriction_norm='Mat') model.forward_propagate(x) assert numx.all(trainer.calculate_errors(l) == 0) l1 = FullConnLayer(input_dim=2, output_dim=5, activation_function=act_in, initial_weights='AUTO', initial_bias=0.0, initial_offset=0.5, connections=None) l2 = FullConnLayer(input_dim=5, output_dim=2, activation_function=act_out, initial_weights='AUTO', initial_bias=0.0, initial_offset=0.5, connections=None) model = MODEL.Model([l1, l2]) trainer = TRAINER.GDTrainer(model) for _ in range(2000): trainer.train( data=x, labels=[None, l], epsilon=[0.3, 0.3], momentum=[0.9, 0.9], reg_L1Norm=[0.000, 0.000], reg_L2Norm=[0.000, 0.000], corruptor=None, reg_costs=[0.0, 1.0], costs=[None, cost], reg_sparseness=[0.1, 0.0], desired_sparseness=[0.1, 0.0], costs_sparseness=[CFct.SquaredError, None], update_offsets=[0.1, 0.1], restrict_gradient=[0.0, 0.0], restriction_norm='Mat') model.forward_propagate(x) assert numx.all(trainer.calculate_errors(l) == 0) print('successfully passed!')
def train_Hebbian_descent_model(train_data, train_label, centered, act, epochs, epsilon, batch_size, weightdecay): """ Performs a training trail for Hebbian descent and returns the model and absolut errors. :param train_data: Training data. :type train_data: numpy array :param train_label: Training label. :type train_label: numpy array :param centered: True if centering is used false otherwise :type centered: bool :param act: An activation function to be used :type act: pydeep.base.activationFunction object :param epochs: Numbe rof epochs to be used :type epochs: int :param epsilon: Learning rate to be used :type epsilon: float :param batch_size: batch2 siez to be used :type batch_size: int :param weightdecay: Weight decay to be used :type weightdecay: float :return: Results for gradient descent, hebbian descent :rtype: 4 numpy arrays """ # Get input, ouput dims and num datapoints length input_dim = train_data.shape[1] output_dim = train_label.shape[1] num_pattern = train_data.shape[0] if train_data.shape[0] != train_label.shape[0]: raise Exception("Length of the input and output datasets must match") # If not centered set the offset parameters to zero mu = 0 if centered: mu = np.mean(train_data, axis=0).reshape(1, input_dim) # Create a model model = fnnmodel.Model([ fnnlayer.FullConnLayer(input_dim=input_dim, output_dim=output_dim, activation_function=act, initial_weights='AUTO', initial_bias=0.0, initial_offset=mu, connections=None, dtype=np.float64) ]) # TLoop over epochs and datapoints for e in range(epochs): for b in range(0, num_pattern, batch_size): # Get the next data point input_point = np.copy(train_data[b:(b + batch_size), :].reshape( batch_size, train_data.shape[1])) output_point = np.copy(train_label[b:(b + batch_size), :].reshape( batch_size, train_label.shape[1])) # Calculate output z = np.dot(input_point - model.layers[0].offset, model.layers[0].weights) + \ model.layers[0].bias h = model.layers[0].activation_function.f(z) # Calcualte difference deltas = (h - output_point) # Calculate updates update_b_new = np.sum(-deltas, axis=0) update_w_new = -np.dot( (input_point - model.layers[0].offset).T, deltas) # Update model model.layers[ 0].weights += epsilon / batch_size * update_w_new - weightdecay * model.layers[ 0].weights model.layers[0].bias += epsilon / batch_size * update_b_new # Calculate mean absolute deviation err_train = np.abs(model.forward_propagate(train_data) - train_label) # Return model and errors return model, np.mean(err_train, axis=1)