def test_save_model(self): """ :return: """ model = Sequential() model.add(Linear(input_size=2, out=24, activation='tanh')) model.add(Linear(input_size=24, out=2, activation='tanh')) pass
def test_load_model(self): """ :return: """ model = Sequential() model.add(Linear(input_size=2, out=24, activation='tanh')) model.add(Linear(input_size=24, out=2, activation='tanh')) file_name = "model.h5py"
def test_init_not_compatible(self): with self.assertRaises(NotCompatibleError): model = Sequential([ Linear(input_size=2, out=22, activation='tanh'), Linear(input_size=23, out=22, activation='tanh') # second layer's input_size is not compatible with previous layer output_size ])
def get_categorical_model(input_neurons, output_neurons, layers=None): """ creates a model with Categorical Crossentropy Loss :param input_neurons: input neuron number :param output_neurons: output neuron number :param layers: list of intermediate neuron sizes, default is the number of neurons and layer sizes for neuron :return: network with Categorical Crossentropy loss """ if layers is None: layers = [25, 25, 25] default_act = 'relu' model = Sequential() idx = 1 layers.insert(0, input_neurons) while idx < len(layers): model.add(Linear(out=layers[idx], input_size=layers[idx - 1], activation=default_act)) idx += 1 # model.add(Dropout(prob=0.2)) model.add(Linear(out=output_neurons, activation='softmax')) # Set loss function to model: Sequential object ce = LossCrossEntropy() model.loss = ce return model
def _read_txt_old(path): print('loading plain text model from', path) with open(path, 'rb') as f: content = f.read().split('\n') modules = [] c = 0 line = content[c] while len(line) > 0: if line.startswith( Linear.__name__ ): # @UndefinedVariable import error suppression for PyDev users lineparts = line.split() m = int(lineparts[1]) n = int(lineparts[2]) mod = Linear(m, n) for i in range(m): c += 1 mod.W[i, :] = np.array([ float(val) for val in content[c].split() if len(val) > 0 ]) c += 1 mod.B = np.array([float(val) for val in content[c].split()]) modules.append(mod) elif line.startswith( Rect.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(Rect()) elif line.startswith( Tanh.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(Tanh()) elif line.startswith( SoftMax.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(SoftMax()) elif line.startswith( BinStep.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(BinStep()) elif line.startswith( NegAbs.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(NegAbs()) else: raise ValueError('Layer type ' + [s for s in line.split() if len(s) > 0][0] + ' not supported by legacy plain text format.') c += 1 line = content[c] return Sequential(modules)
def test_init_not_input_size(self): """ :return: """ with self.assertRaises(InputSizeNotFoundError): model = Sequential([ Linear(out=22, activation='tanh'), # NO input_size is given Linear(input_size=23, out=22, activation='tanh') ])
def _convert_to_nn(self, svm_model, y_train, x_val): #convert to linear NN print('converting {} model to linear NN'.format( self.__class__.__name__)) W = svm_model.coef_.T B = svm_model.intercept_ if numpy.unique(y_train).size == 2: linear_layer = Linear(W.shape[0], 2) linear_layer.W = numpy.concatenate([-W, W], axis=1) linear_layer.B = numpy.concatenate([-B, B], axis=0) else: linear_layer = Linear(*(W.shape)) linear_layer.W = W linear_layer.B = B svm_model = self.model nn_model = Sequential([Flatten(), linear_layer]) if not self.use_gpu: nn_model.to_numpy() #sanity check model conversion self._sanity_check_model_conversion(svm_model, nn_model, x_val) print('model conversion sanity check passed') return nn_model
def _read_txt_helper(path): with open(path, 'rb') as f: content = f.read().split('\n') modules = [] c = 0 line = content[c] while len(line) > 0: if line.startswith( Linear.__name__ ): # @UndefinedVariable import error suppression for PyDev users ''' Format of linear layer Linear <rows_of_W> <columns_of_W> <flattened weight matrix W> <flattened bias vector> ''' _, m, n = line.split() m = int(m) n = int(n) layer = Linear(m, n) layer.W = np.array([ float(weightstring) for weightstring in content[c + 1].split() if len(weightstring) > 0 ]).reshape((m, n)) layer.B = np.array([ float(weightstring) for weightstring in content[c + 2].split() if len(weightstring) > 0 ]) modules.append(layer) c += 3 # the description of a linear layer spans three lines elif line.startswith( Convolution.__name__ ): # @UndefinedVariable import error suppression for PyDev users ''' Format of convolution layer Convolution <rows_of_W> <columns_of_W> <depth_of_W> <number_of_filters_W> <stride_axis_0> <stride_axis_1> <flattened filter block W> <flattened bias vector> ''' _, h, w, d, n, s0, s1 = line.split() h = int(h) w = int(w) d = int(d) n = int(n) s0 = int(s0) s1 = int(s1) layer = Convolution(filtersize=(h, w, d, n), stride=(s0, s1)) layer.W = np.array([ float(weightstring) for weightstring in content[c + 1].split() if len(weightstring) > 0 ]).reshape((h, w, d, n)) layer.B = np.array([ float(weightstring) for weightstring in content[c + 2].split() if len(weightstring) > 0 ]) modules.append(layer) c += 3 #the description of a convolution layer spans three lines elif line.startswith( SumPool.__name__ ): # @UndefinedVariable import error suppression for PyDev users ''' Format of sum pooling layer SumPool <mask_heigth> <mask_width> <stride_axis_0> <stride_axis_1> ''' _, h, w, s0, s1 = line.split() h = int(h) w = int(w) s0 = int(s0) s1 = int(s1) layer = SumPool(pool=(h, w), stride=(s0, s1)) modules.append(layer) c += 1 # one line of parameterized layer description elif line.startswith( MaxPool.__name__ ): # @UndefinedVariable import error suppression for PyDev users ''' Format of max pooling layer MaxPool <mask_heigth> <mask_width> <stride_axis_0> <stride_axis_1> ''' _, h, w, s0, s1 = line.split() h = int(h) w = int(w) s0 = int(s0) s1 = int(s1) layer = MaxPool(pool=(h, w), stride=(s0, s1)) modules.append(layer) c += 1 # one line of parameterized layer description elif line.startswith( Flatten.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(Flatten()) c += 1 #one line of parameterless layer description elif line.startswith( Rect.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(Rect()) c += 1 #one line of parameterless layer description elif line.startswith( Tanh.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(Tanh()) c += 1 #one line of parameterless layer description elif line.startswith( SoftMax.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(SoftMax()) c += 1 #one line of parameterless layer description else: raise ValueError( 'Layer type identifier' + [s for s in line.split() if len(s) > 0][0] + ' not supported for reading from plain text file') #skip info of previous layers, read in next layer header line = content[c] return Sequential(modules)
# data = np.load('mnist.npz',) with np.load('mnist.npz', 'r', allow_pickle=True) as data: X = data['X'] y = data['y'] else: X, y = fetch_openml('mnist_784', version=1, return_X_y=True) # X, y = mnist.data / 255.0, mnist.target np.savez('mnist.npz', X=X, y=y) print("data shape:", X.shape, y.shape) X, Y = X / 255, one_hot(y) train_x, test_x, train_y, test_y = X[:60000], X[60000:], Y[:60000], Y[60000:] #### build model net = Sequential() net.add(Dense(784, 400)) net.add(ReLU()) # net.add(Sigmoid()) # net.add(SoftPlus()) # net.add(Dropout()) net.add(Dense(400, 128)) net.add(ReLU()) # net.add(BatchMeanSubtraction()) # net.add(ReLU()) # net.add(Dropout()) net.add(Dense(128, 10)) net.add(SoftMax()) # criterion = MultiLabelCriterion() # loss function criterion = CrossEntropyCriterion()
# ----- Define the paramters for learning ----- nb_classes = train_labels.shape[0] features = train_features.size(1) nb_samples = train_features.size(0) epsilon = 0.1 eta = .2 #nb_samples is now defined in Sequential() batch_size = config.batch_size epochs = int(config.epochs / (nb_samples / batch_size)) # Zeta is to make it work correctly with Sigma activation function. # train_label = train_label.add(0.125).mul(0.8) # test_label = test_label.add(0.125).mul(0.8) # ----- Implementation of the architecture ----- architecture = Sequential(Linear(2, 25, ReLU()), Linear(25, 25, ReLU()), Linear(25, 25, ReLU()), Linear(25, 2, Sigma())) # ----- Training ----- round = 1 prev_loss = math.inf prev_prev_loss = math.inf errors = [] for epoch in range(epochs): for batch_start in range(0, nb_samples, batch_size): features = train_features[batch_start:batch_start + batch_size, :] labels = train_labels[batch_start:batch_start + batch_size] tr_loss, tr_error = architecture.forward(train_features, train_labels) architecture.backward() architecture.update(eta) loss, error = architecture.forward(test_features, test_labels) print(' --- Epoch ', round, ' Test Loss: ', loss.item(), '---',
def test_Sequential(self): np.random.seed(42) torch.manual_seed(42) batch_size, n_in = 2, 4 for _ in range(100): # layers initialization alpha = 0.9 torch_layer = torch.nn.BatchNorm1d(n_in, eps=BatchNormalization.EPS, momentum=1. - alpha, affine=True) torch_layer.bias.data = torch.from_numpy( np.random.random(n_in).astype(np.float32)) custom_layer = Sequential() bn_layer = BatchNormalization(alpha) bn_layer.moving_mean = torch_layer.running_mean.numpy().copy() bn_layer.moving_variance = torch_layer.running_var.numpy().copy() custom_layer.add(bn_layer) scaling_layer = ChannelwiseScaling(n_in) scaling_layer.gamma = torch_layer.weight.data.numpy() scaling_layer.beta = torch_layer.bias.data.numpy() custom_layer.add(scaling_layer) custom_layer.train() layer_input = np.random.uniform(-5, 5, (batch_size, n_in)).astype( np.float32) next_layer_grad = np.random.uniform( -5, 5, (batch_size, n_in)).astype(np.float32) # 1. check layer output custom_layer_output = custom_layer.updateOutput(layer_input) layer_input_var = Variable(torch.from_numpy(layer_input), requires_grad=True) torch_layer_output_var = torch_layer(layer_input_var) self.assertTrue( np.allclose(torch_layer_output_var.data.numpy(), custom_layer_output, atol=1e-6)) # 2. check layer input grad custom_layer_grad = custom_layer.backward(layer_input, next_layer_grad) torch_layer_output_var.backward(torch.from_numpy(next_layer_grad)) torch_layer_grad_var = layer_input_var.grad self.assertTrue( np.allclose(torch_layer_grad_var.data.numpy(), custom_layer_grad, atol=1e-5)) # 3. check layer parameters grad weight_grad, bias_grad = custom_layer.getGradParameters()[1] torch_weight_grad = torch_layer.weight.grad.data.numpy() torch_bias_grad = torch_layer.bias.grad.data.numpy() self.assertTrue( np.allclose(torch_weight_grad, weight_grad, atol=1e-6)) self.assertTrue(np.allclose(torch_bias_grad, bias_grad, atol=1e-6))
nn = NN2(X.shape[1], 30, Y.shape[1]) optim_nn = LossMSE() trainer = Trainer(nn, optim_nn, v=True) iterations = 200 eta = 0.0001 # In[16]: cost_nn = trainer.trainBatchGD(X, Y, iterations, eta=eta) plotCostAndData(nn, X, Y, cost_nn) # ### Sequential # In[18]: nn_seq = Sequential(Linear(D_in, H1), Tanh(), Linear(H1, H2), Tanh(), Linear(H2, D_out)) optim_nn = LossMSE() trainer = Trainer(nn_seq, optim_nn, v=True) iterations = 300 eta = 0.0008 nn_seq.modules # In[19]: cost_nn_seq = trainer.trainBatchGD(X, Y, iterations, eta=eta) plotCostAndData(nn_seq, X, Y, cost_nn_seq) # ## Circular input # In[248]:
############################### # Use this example to debug your code, start with logistic regression and then # test other layers. You do not need to change anything here. This code is # provided for you to test the layers. Next you will use similar code in MNIST task. ############################### ############################### #### generate_data X, Y = generate_two_classes(500) print("Data dimenstions: ", X.shape, Y.shape) # plt.scatter(X[:,0], X[:,1], c=Y.argmax(axis=-1)) # plt.show() ############################### #### build model net = Sequential() net.add(Dense(2, 4)) net.add(ReLU()) net.add(Dense(4, 2)) net.add(SoftMax()) criterion = MSECriterion() # loss function # Optimizer params optimizer_config = {'learning_rate': 1e-2, 'momentum': 0.9} optimizer_state = {} # Looping params n_epoch = 20 batch_size = 128
"""This file declares the models to be used for testing.""" from modules import Sequential, Linear, ReLU, Tanh, Sigmoid MODEL1 = Sequential("ReLu", Linear(2, 25), ReLU(), Linear(25, 25), ReLU(), Linear(25, 25), ReLU(), Linear(25, 2), Sigmoid()) MODEL2 = Sequential("Tanh", Linear(2, 25), Tanh(), Linear(25, 25), Tanh(), Linear(25, 25), Tanh(), Linear(25, 2), Sigmoid()) MODEL3 = Sequential("ReLu + He", Linear(2, 25, "He"), ReLU(), Linear(25, 25, "He"), ReLU(), Linear(25, 25, "He"), ReLU(), Linear(25, 2, "He"), Sigmoid()) MODEL4 = Sequential("Tanh + Xavier", Linear(2, 25, "Xavier"), Tanh(), Linear(25, 25, "Xavier"), Tanh(), Linear(25, 25, "Xavier"), Tanh(), Linear(25, 2, "Xavier"), Sigmoid()) # Best model is actually almost model 2 MODEL_BEST = Sequential("Best", Linear(2, 25), Tanh(), Linear(25, 25), Tanh(), Linear(25, 25), Tanh(), Linear(25, 2, "He"), Sigmoid())
test_errors = [] for n in range(nb_tries): print("Try: " + str(n + 1)) modules = [ Linear(2, nb_hidden), eLU(), Linear(nb_hidden, nb_hidden), ReLU(), Linear(nb_hidden, nb_hidden), LeakyReLU(0.01), Linear(nb_hidden, 2), Tanh() ] model = Sequential(modules) losses, train_errors, validation_errors = train_model( model, train_input, train_target, validation_input, validation_target, nb_epochs, mini_batch_size, learning_rate, 0, None, 'Adadelta', 'MSE') nb_test_errors, test_misclassified = compute_nb_errors( model, test_input, test_target, mini_batch_size) print('Test error: {:0.2f}% ({:d}/{:d})'.format( (100 * nb_test_errors) / test_input.size(0), nb_test_errors, test_input.size(0))) test_errors.append((100 * nb_test_errors) / test_input.size(0)) # Plots #plot_loss_errors(nb_epochs, losses, train_errors, validation_errors) #plot_targets_misclassifications(test_input_not_normalized, test_target, test_misclassified) # Mean and standard deviation of the model test errors over all the tries
############################### # Use this example to debug your code, start with logistic regression and then # test other layers. You do not need to change anything here. This code is # provided for you to test the layers. Next you will use similar code in MNIST task. ############################### ############################### #### generate_data X, Y = generate_spirale(500, 3) print("Data dimenstions: ", X.shape, Y.shape) plt.scatter(X[:, 0], X[:, 1], c=Y.argmax(axis=-1)) plt.show() ############################### #### build model net = Sequential() net.add(Dense(2, 40)) # net.add(Dropout()) # net.add(BatchMeanSubtraction()) net.add(ReLU()) net.add(Dense(40, 40)) # net.add(Tanh()) net.add(ReLU()) # net.add(Dropout()) net.add(Dense(40, 3)) net.add(SoftMax()) # criterion = MultiLabelCriterion() # loss function criterion = CrossEntropyCriterion() ############################### #### optimizer config
target = torch.zeros((nb, 2)) target[(input - 0.5).pow(2).sum(1) < 0.5 / pi, 1] = 1 target[(input - 0.5).pow(2).sum(1) >= 0.5 / pi, 0] = 1 return input, target train_input, train_target = generate_disc_set(1000) test_input, test_target = generate_disc_set(1000) batch_size = 100 num_batches = len(train_input) // batch_size # Reset the seeds before each model creation so that # parameters are initialized the same for a fair comparison. torch.manual_seed(0) relu = Sequential(Linear(2, 25), ReLU(), Linear(25, 25), ReLU(), Linear(25, 25), ReLU(), Linear(25, 2)) torch.manual_seed(0) tanh = Sequential(Linear(2, 25), Tanh(), Linear(25, 25), Tanh(), Linear(25, 25), Tanh(), Linear(25, 2)) criterion = MSE() def fit(model): optimizer = SGD(model.parameters(), model.grads(), lr=0.1) losses = [] print('Epoch | Loss') for epoch in range(500): epoch_loss = 0
optimizer = adam_optimizer optimizer_config = { 'learning_rate': 1e-2, 'beta1': 9e-1, 'beta2': 999e-3, 'epsilon': 10e-8 } optimizer_state = {} # Looping params n_epoch = 20 batch_size = 1024 for activation_name, activation in activations.items(): nn1 = Sequential() nn1.add(Dense(784, 100)) nn1.add(activation()) nn1.add(Dense(100, 50)) nn1.add(activation()) nn1.add(Dense(50, 10)) nn1.add(SoftMax()) print("****************************************************") print(f"Training NN with {activation_name} without Batch Normalization") print("****************************************************") loss_history1 = fit(X_train, y_train, X_val, y_val, nn1, n_epoch, batch_size, criterion, optimizer, optimizer_config, optimizer_state)