def test2layergradientsWBN(samples=1, dimensions=3072): print("\n\nTesting 2-layer gradients (WITH BN, NO REG) using a batch size of {}".format(samples)) trainingData, trainingLabels, encodedTrainingLabels = loadData("Datasets/cifar-10-batches-mat/data_batch_1.mat") trainingData = trainingData[0:dimensions, 0:samples] trainingLabels = trainingLabels[0:dimensions, 0:samples] encodedTrainingLabels = encodedTrainingLabels[0:dimensions, 0:samples] network = Model() linear = Linear(dimensions, 50, regularization=0.00, initializer="xavier") network.addLayer(linear) bnlayer = BatchNormalization(50) network.addLayer(bnlayer) network.addLayer(Relu()) linear2 = Linear(50, 10, regularization=0.00, initializer="xavier") network.addLayer(linear2) network.addLayer(Softmax()) sgd = SGD(lr=0.001, lr_decay=1.0, momentum=0.0, shuffle=True) network.compile(sgd, "cce") #network.fit(trainingData, encodedTrainingLabels, epochs=200, validationData=None, batch_size=samples) network.predict(trainingData, updateInternal=True) network.backpropagate(encodedTrainingLabels) timestamp = datetime.now().strftime('%Y-%b-%d--%H-%M-%S') numerical_gradW1 = compute_grads_w_BN(1e-4, linear.W, trainingData, encodedTrainingLabels, network) numerical_gradb1 = compute_grads_w_BN(1e-4, linear.b, trainingData, encodedTrainingLabels, network) numerical_gradgamma = compute_grads_w_BN(1e-4, bnlayer.gamma, trainingData, encodedTrainingLabels, network) numerical_gradbeta = compute_grads_w_BN(1e-4, bnlayer.beta, trainingData, encodedTrainingLabels, network) numerical_gradW2 = compute_grads_w_BN(1e-4, linear2.W, trainingData, encodedTrainingLabels, network) numerical_gradb2 = compute_grads_w_BN(1e-4, linear2.b, trainingData, encodedTrainingLabels, network) print("W1") relative_errorW = grad_difference(linear.gradW, numerical_gradW1) print("b1") relative_errorb = grad_difference(linear.gradb, numerical_gradb1) print("gamma1") relative_errorW = grad_difference(bnlayer.gradGamma, numerical_gradgamma) print("beta1") relative_errorb = grad_difference(bnlayer.gradBeta, numerical_gradbeta) print("W2") relative_errorW2 = grad_difference(linear2.gradW, numerical_gradW2) print("b2") relative_errorb2 = grad_difference(linear2.gradb, numerical_gradb2) print("\n")
def __init__(self, block, layers, width=1, num_classes=10, input_size=32, weight_noise=False, act_noise_a=False, act_noise_b=False, rank=5, noise_sd=0.0, m_test=1, m_train=1, learn_noise=False): super(ResNet_Cifar, self).__init__() self.weight_noise = weight_noise self.act_noise_a = act_noise_a self.act_noise_b = act_noise_b self.rank = rank inplanes = int(16 * width) self.inplanes = inplanes self.conv1 = Conv2d(3, inplanes, kernel_size=3, stride=1, padding=1, bias=False, act_dim_a=input_size, act_dim_b=input_size) self.bn1 = nn.BatchNorm2d(16) self.relu = nn.ReLU(inplace=True) self.layer1 = self._make_layer(block, inplanes, layers[0], input_size=input_size) self.layer2 = self._make_layer(block, 2 * inplanes, layers[1], stride=2, input_size=input_size) self.layer3 = self._make_layer(block, 4 * inplanes, layers[2], stride=2, input_size=input_size // 2) self.avgpool = nn.AvgPool2d(8, stride=1) self.fc = Linear(4 * inplanes * block.expansion, num_classes) self.num_classes = num_classes self.learn_noise = learn_noise self.noise_sd = torch.tensor(noise_sd, requires_grad=learn_noise) self.m_test = m_test self.m_train = m_train for m in self.modules(): if isinstance(m, Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels nn.init.kaiming_normal_(m.weight.data) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_()
from network import Network from utils import LOG_INFO, lc_plot from layers import Relu, Sigmoid, Linear, Gelu from loss import EuclideanLoss, SoftmaxCrossEntropyLoss, HingeLoss from solve_net import train_net, test_net from load_data import load_mnist_2d import time NAME = "2layersRelu.HingeLoss" train_data, test_data, train_label, test_label = load_mnist_2d('data') # Your model defintion here # You should explore different model architecture model = Network() model.add(Linear('fc0', 784, 500, 0.01, 'relu', 'act0')) model.add(Linear('fc1', 500, 256, 0.01, 'relu', 'act1')) model.add(Linear('fc2', 256, 10, 0.01, None, 'act2')) loss = HingeLoss(name='loss') # Training configuration # You should adjust these hyperparameters # NOTE: one iteration means model forward-backwards one batch of samples. # one epoch means model has gone through all the training samples. # 'disp_freq' denotes number of iterations in one epoch to display information. config = { 'learning_rate': 0.1, 'weight_decay': 0.0005, 'momentum': 0.9, 'batch_size': 500,
# plt.axis('off') train_data, test_data, train_label, test_label = load_mnist_4d('data') # Your model defintion here # You should explore different model architecture model = Network() model.add(Conv2D('conv1', 1, 12, 3, 1, 1)) model.add(Relu('relu1')) model.add(AvgPool2D('pool1', 2, 0)) # output shape: N x 4 x 14 x 14 model.add(Conv2D('conv2', 12, 10, 3, 1, 1)) model.add(Relu('relu2')) model.add(AvgPool2D('pool2', 2, 0)) # output shape: N x 4 x 7 x 7 model.add(Reshape('flatten', (-1, 49 * 10))) model.add(Linear('fc3', 49 * 10, 10, 0.1)) # loss = EuclideanLoss(name='loss') loss = SoftmaxCrossEntropyLoss(name='loss') # Training configuration # You should adjust these hyperparameters # NOTE: one iteration means model forward-backwards one batch of samples. # one epoch means model has gone through all the training samples. # 'disp_freq' denotes number of iterations in one epoch to display information. # np.random.seed(1626) config = { 'learning_rate': 0.01, 'weight_decay': 0.000, 'momentum': 0.9,
def paramSearch(method="range"): trainingData, trainingLabels, \ validationData, validationLabels, \ testingData, testingLabels = loadAllData("Datasets/cifar-10-batches-mat/", valsplit=0.20) bestLambda = 0.0 bestLR = 0.0 bestValAcc = 0.0 bestLoss = 0.0 bestModel = None data = [[],[],[]] if method == "range": lambdaValues = np.arange(0, 0.05, 0.001) lrValues = np.arange(0.04, 0.08, 0.005) elif method == "sampling": lrValues = np.random.uniform(0.06, 0.07, 15) lambdaValues = np.random.uniform(0.001, 0.005, 15) data.append((lrValues.shape[0], lambdaValues.shape[0])) # Append axis dimensions for 3D plotting for lambdaValue in lambdaValues: for lr in lrValues: print("Lambda:{}".format(lambdaValue)) print("LR:{}".format(lr)) network = Model() network.addLayer(Linear(32*32*3, 50, regularization=lambdaValue, initializer="he")) network.addLayer(BatchNormalization(50, trainable=True)) network.addLayer(Relu()) network.addLayer(Linear(50, 30, regularization=lambdaValue, initializer="he")) network.addLayer(BatchNormalization(30, trainable=True)) network.addLayer(Relu()) network.addLayer(Linear(30,10, regularization=lambdaValue, initializer="he")) network.addLayer(Softmax()) sgd = SGD(lr=lr, lr_decay=0.95, momentum=0.7, shuffle=True, lr_min=1e-5) network.compile(sgd, "cce") timestamp = datetime.now().strftime('%Y-%b-%d--%H-%M-%S') network.fit(trainingData, trainingLabels, epochs=20, validationData=(validationData, validationLabels), batch_size=100, verbose=False) #plotAccuracy(network, "plots/", timestamp) #plotLoss(network, "plots/", timestamp) loss, acc = network.evaluate(validationData, validationLabels) print("Val loss: {} , Val acc: {}".format(loss, acc) ) print("\n\n") data[0].append(lr) data[1].append(lambdaValue) data[2].append(acc) if acc > bestValAcc: bestLambda = lambdaValue bestLR = lr bestValAcc = acc bestLoss = loss bestModel = network loss, acc = bestModel.evaluate(testingData, testingLabels) print("Test loss: {} , Test acc: {}".format(loss, acc) ) print("\n\n") return bestLambda, bestLR, bestValAcc, bestLoss, data
from network import Network from utils import LOG_INFO, make_plot from layers import Relu, Sigmoid, Linear from loss import EuclideanLoss, SoftmaxCrossEntropyLoss from solve_net import train_net, test_net from load_data import load_mnist_2d import time train_data, test_data, train_label, test_label = load_mnist_2d('data') # Your model defintion here # You should explore different model architecture model1 = Network(name='model1') model1.add(Linear('m1_fc1', 784, 256, 0.01)) model1.add(Sigmoid('m1_fc1')) model1.add(Linear('m1_fc3', 256, 10, 0.01)) model2 = Network(name='model2') model2.add(Linear('m2_fc1', 784, 256, 0.01)) model2.add(Relu('m2_fc2')) model2.add(Linear('m2_fc3', 256, 10, 0.01)) model3 = Network(name='model3') model3.add(Linear('m3_fc1', 784, 512, 0.01)) model3.add(Sigmoid('m3_fc2')) model3.add(Linear('m3_fc3', 512, 128, 0.01)) model3.add(Sigmoid('m3_fc4')) model3.add(Linear('m3_fc5', 128, 10, 0.01)) model4 = Network(name='model4')
init_std=0.01)) model.add(Relu('relu1')) model.add(AvgPool2D('pool1', kernel_size=2, pad=0)) # output shape: N x 4 x 14 x 14 model.add( Conv2D('conv2', in_channel=4, out_channel=4, kernel_size=3, pad=1, init_std=0.01)) model.add(Relu('relu2')) model.add(AvgPool2D('pool2', kernel_size=2, pad=0)) # output shape: N x 4 x 7 x 7 model.add(Reshape('flatten', (-1, 196))) model.add(Linear('fc3', in_num=196, out_num=10, init_std=0.1)) ''' # LeNet model = Network() model.add(Conv2D('conv1', in_channel=1, out_channel=6, kernel_size=5, pad=1, init_std=0.01)) model.add(Relu('relu1')) model.add(AvgPool2D('pool1', kernel_size=2, pad=0)) # output shape: N x 14 x 14 x 6 model.add(Conv2D('conv2', in_channel=6, out_channel=16, kernel_size=5, pad=1, init_std=0.01)) model.add(Relu('relu2')) model.add(AvgPool2D('pool2', kernel_size=2, pad=0)) # output shape: N x 5 x 5 x 16 = N x 400 #model.add(Reshape('flatten', (-1, 196))) model.add(Linear('fc3', in_num=400, out_num=120, init_std=0.1)) model.add(Relu('relu3')) model.add(Linear('fc4', in_num=120, out_num=10, init_std=0.1)) '''
from network import Network from utils import LOG_INFO from layers import Relu, Sigmoid, Linear from loss import EuclideanLoss from solve_net import train_net, test_net from load_data import load_mnist_2d import numpy as np train_data, test_data, train_label, test_label = load_mnist_2d('data') # Your model defintion here # You should explore different model architecture model = Network() model.add(Linear('fc1', 784, 300, 0.01)) model.add(Sigmoid('Sigmoid1')) model.add(Linear('fc2', 300, 300, 0.01)) model.add(Relu('Relu1')) model.add(Linear('fc3', 300, 10, 0.01)) loss = EuclideanLoss(name='loss') # Training configuration # You should adjust these hyperparameters # NOTE: one iteration means model forward-backwards one batch of samples. # one epoch means model has gone through all the training samples. # 'disp_freq' denotes number of iterations in one epoch to display information. config = { 'learning_rate': 0.1, 'weight_decay': 0.0001, 'momentum': 0.7,
def __init__(self, nfeat, nhid, nclass, dropout): super(MLP, self).__init__() self.Linear1 = Linear(nfeat, nhid, dropout) self.Linear2 = Linear(nhid, nclass, dropout)
from network import Network from utils import LOG_INFO from layers import Relu, Sigmoid, Linear from loss import EuclideanLoss#, SoftmaxCrossEntropyLoss from solve_net import train_net, test_net from load_data import load_mnist_2d import numpy as np train_data, test_data, train_label, test_label = load_mnist_2d('data') # Your model defintion here # You should explore different model architecture init_std = 0.01 model = Network() model.add(Linear('fc1', 784, 50, init_std)) model.add(Sigmoid('ac1')) model.add(Linear('fc2', 50, 10, init_std)) loss = EuclideanLoss(name='loss') # Training configuration # You should adjust these hyperparameters # NOTE: one iteration means model forward-backwards one batch of samples. # one epoch means model has gone through all the training samples. # 'disp_freq' denotes number of iterations in one epoch to display information. config = { 'learning_rate': 0.003, 'weight_decay': 1e-4, 'momentum': 0.9, 'batch_size': 100,
from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split from datatype import Tensor def to_one_hot(vector: Tensor) -> Tensor: """Create one hot encoding of a vector.""" oh = np.zeros((vector.shape[0], vector.max() + 1)) oh[np.arange(vector.shape[0]), vector] = 1 return oh train = pd.read_csv("mnist_train.csv", header=None).values[:, 1:] train_label = pd.read_csv("mnist_train.csv", header=None).values[:, 0] net = Network(layers=[Linear(784, 128), Sigmoid(), Linear(128, 10),]) loss = CrossEntropyLoss() optim = SGD(5e-2, 0.001) x_train, x_val, y_train, y_val = train_test_split( train.astype(np.float32) / 255, train_label.astype(np.int32), test_size=0.2, random_state=42, ) # to_one_hot y_train = to_one_hot(y_train) y_val = to_one_hot(y_val) batch_size = 100 progress_bar = auto.tqdm(range(200)) accuracies: dict = {"train": [], "val": [], "test": []}
from mnist import load_mnist_for_cnn import theano.tensor as T import theano theano.config.floatX = 'float32' train_data, test_data, train_label, test_label = load_mnist_for_cnn('data') model = Network() model.add(Convolution('conv1', 5, 1, 8, 0.01)) # output size: N x 4 x 24 x 24 model.add(Relu('relu1')) model.add(Pooling('pool1', 2)) # output size: N x 4 x 12 x 12 model.add(Convolution('conv2', 3, 8, 12, 0.01)) # output size: N x 8 x 10 x 10 model.add(Relu('relu2')) model.add(Pooling('pool2', 2)) # output size: N x 8 x 5 x 5 model.add(Linear('fc3', 12 * 5 * 5, 10, 0.01)) # input reshaped to N x 200 in Linear layer model.add(Softmax('softmax')) loss = CrossEntropyLoss(name='xent') # optim = SGDOptimizer(learning_rate=0.0001, weight_decay=0.005, momentum=0.9) optim = AdagradOptimizer(learning_rate=0.0002, eps=1e-5) input_placeholder = T.ftensor4('input') label_placeholder = T.fmatrix('label') model.compile(input_placeholder, label_placeholder, loss, optim) solve_net(model, train_data, train_label, test_data,
from loss import CrossEntropyLoss from optimizer import SGDOptimizer from solve_net import solve_net from mnist import load_mnist_for_cnn import theano.tensor as T train_data, test_data, train_label, test_label = load_mnist_for_cnn('data') model = Network() model.add(Convolution('conv1', 5, 1, 4, 0.1)) # output size: N x 4 x 24 x 24 model.add(Relu('relu1')) model.add(Pooling('pool1', 2)) # output size: N x 4 x 12 x 12 model.add(Convolution('conv2', 3, 4, 8, 0.1)) # output size: N x 8 x 10 x 10 model.add(Relu('relu2')) model.add(Pooling('pool2', 2)) # output size: N x 8 x 5 x 5 model.add(Linear('fc3', 200, 10, 0.1)) # input reshaped to N x 200 in Linear layer model.add(Softmax('softmax')) loss = CrossEntropyLoss(name='xent') optim = SGDOptimizer(learning_rate=0.001, weight_decay=0.005, momentum=0.9) input_placeholder = T.ftensor4('input') label_placeholder = T.fmatrix('label') model.compile(input_placeholder, label_placeholder, loss, optim) solve_net(model, train_data, train_label, test_data, test_label,
from sklearn.model_selection import train_test_split, KFold from datatype import Tensor def to_one_hot(vector: Tensor) -> Tensor: """Create one hot encoding of a vector.""" oh = np.zeros((vector.shape[0], vector.max() + 1)) oh[np.arange(vector.shape[0]), vector] = 1 return oh train = pd.read_csv("mnist_train.csv", header=None).values[:, 1:] train_label = pd.read_csv("mnist_train.csv", header=None).values[:, 0] net = Network(layers=[ Linear(784, 128), ReLU(), Linear(128, 1), ]) loss = MSE() optim = SGD(1e-4, 0.0) x_train, x_val, y_train, y_val = train_test_split( train.astype(np.float32) / 255, train_label.astype(np.int32), test_size=0.2, random_state=42, ) # to_one_hot batch_size = 100 progress_bar = auto.tqdm(range(10))
""" The canonical example of a function that can't be learned with a simple linear model is XOR """ import numpy as np from train import train from nn import NeuralNet from layers import Linear, Tanh inputs = np.array([[0, 0], [1, 0], [0, 1], [1, 1]]) targets = np.array([[1, 0], [0, 1], [0, 1], [1, 0]]) net = NeuralNet([ Linear(input_size=2, output_size=2), Tanh(), Linear(input_size=2, output_size=2) ]) train(net, inputs, targets) for x, y in zip(inputs, targets): predicted = net.forward(x) print(x, predicted, y)
config = { 'learning_rate': 0.01, 'weight_decay': 0.001, 'momentum': 0.8, 'batch_size': 64, 'max_epoch': 50, 'disp_freq': 1000, 'test_epoch': 2, 'stop_time': args.stop_time } if Type(args.train_one_layer): config['max_epoch'] = 50 model1 = Network() model1.add(Linear('fc1', 784, 256, 0.01)) model1.add(Sigmoid('sigmoid1')) model1.add(Linear('fc2', 256, 10, 0.01)) model1.add(Sigmoid('sigmoid2')) one_hidden_Sigmoid_Euc = { 'model': model1, 'loss': loss1, 'config': config } model2 = Network() model2.add(Linear("fc1", 784, 256, 0.01)) model2.add(Relu("relu1")) model2.add(Linear("fc2", 256, 10, 0.01)) model2.add(Sigmoid('sigmoid2')) one_hidden_Relu_Euc = {
def predictor(predictor_name, representation, output_dim, params=None): output = Linear('{}.predictor.Linear'.format(predictor_name), representation, output_dim, initializer='glorot_uniform') return output
from load_data import load_mnist_2d import csv from utils import writer train_data, test_data, train_label, test_label = load_mnist_2d('data') writer.writerow( ['type', 'middle_layers', 'learning_rate', 'weight_decay', 'momentum']) # Your model defintion here # You should explore different model architecture middle_layers1 = 500 middle_layers2 = 300 model = Network() model.add(Linear('fc1', 784, middle_layers1, 0.01)) model.add(Relu('sig1')) model.add(Linear('fc2', middle_layers1, middle_layers2, 0.01)) model.add(Sigmoid('sig2')) model.add(Linear('fc3', middle_layers2, 10, 0.01)) loss = EuclideanLoss(name='loss') # Training configuration # You should adjust these hyperparameters # NOTE: one iteration means model forward-backwards one batch of samples. # one epoch means model has gone through all the training samples. # 'disp_freq' denotes number of iterations in one epoch to display information. config = { 'learning_rate': 0.01,
if len(sys.argv) >= 2: filename = sys.argv[1] else: filename = "test" # Your model defintion here # You should explore different model architecture model = Network() model.add(Conv2D('conv1', 1, 4, 3, 1, 1)) model.add(Relu('relu1')) model.add(AvgPool2D('pool1', 2, 0)) # output shape: N x 4 x 14 x 14 model.add(Conv2D('conv2', 4, 4, 3, 1, 1)) model.add(Relu('relu2')) model.add(AvgPool2D('pool2', 2, 0)) # output shape: N x 4 x 7 x 7 model.add(Reshape('flatten', (-1, 196))) model.add(Linear('fc3', 196, 10, 0.1)) loss = SoftmaxCrossEntropyLoss(name='loss') # Training configuration # You should adjust these hyperparameters # NOTE: one iteration means model forward-backwards one batch of samples. # one epoch means model has gone through all the training samples. # 'disp_freq' denotes number of iterations in one epoch to display information. config = { 'learning_rate': 0.01, 'weight_decay': 0, 'momentum': 0.9, 'batch_size': 100, 'max_epoch': 10,
from network import Network from utils import LOG_INFO from layers import Relu, Sigmoid, Linear from loss import EuclideanLoss from solve_net import train_net, test_net from load_data import load_mnist_2d train_data, test_data, train_label, test_label = load_mnist_2d('data') # Your model defintion here # You should explore different model architecture model = Network() model.add(Linear('fc1', 784, 512, 0.01)) model.add(Relu('relu1')) model.add(Linear('fc2', 512, 512, 0.01)) model.add(Relu('relu2')) model.add(Linear('fc3', 512, 10, 0.01)) loss = EuclideanLoss(name='loss') # Training configuration # You should adjust these hyperparameters # NOTE: one iteration means model forward-backwards one batch of samples. # one epoch means model has gone through all the training samples. # 'disp_freq' denotes number of iterations in one epoch to display information. config = { 'learning_rate': 0.01, 'weight_decay': 5e-4,
:return: ''' return [x >> i & 1 for i in range(10)] inputs = np.array([ binary_encode(x) for x in range(101, 1024) ]) targets = np.array([ fizz_buzz_encode(x) for x in range(101, 1024) ]) net = NeuralNet([ Linear(input_size = 10, output_size = 50), Tanh(), Linear(input_size = 50, output_size = 4) ]) train(net, inputs, targets, num_epochs = 5000, optimizer = SGD(lr = 0.001)) for x in range(1, 101): predicted = net.forward(binary_encode(x)) predicted_idx = np.argmax(predicted) actual_idx = np.argmax(fizz_buzz_encode(x)) labels = [str(x), "fizz", "buzz", "fizzbuzz"]
def __init__(self, state_size: int, n_heads: int, dropout: float=0.1, input_size: Optional[torch.Tensor]=None): super().__init__(state_size, n_heads, dropout) self.data_to_kv = Linear(state_size, 2 * n_heads * self.projection_size, bias=False) self.data_to_q = Linear(state_size if input_size is None else input_size, n_heads * self.projection_size, bias=False) self.reset_parameters()
from network import Network from utils import LOG_INFO from layers import Relu, Sigmoid, Linear from loss import EuclideanLoss from solve_net import train_net, test_net from load_data import load_mnist_2d import numpy as np train_data, test_data, train_label, test_label = load_mnist_2d('data') # Your model defintion here # You should explore different model architecture model = Network() model.add(Linear('fc1', 784, 100, 0.01)) model.add(Sigmoid('Sigmoid1')) model.add(Linear('fc2', 100, 40, 0.01)) model.add(Sigmoid('Sigmoid2')) model.add(Linear('fc3', 40, 10, 0.01)) loss = EuclideanLoss(name='loss') # Training configuration # You should adjust these hyperparameters # NOTE: one iteration means model forward-backwards one batch of samples. # one epoch means model has gone through all the training samples. # 'disp_freq' denotes number of iterations in one epoch to display information. config = { 'learning_rate': 0.08, 'weight_decay': 0.001, 'momentum': 0.9,
x = preprocessing(x) xt = preprocessing(xt) #x = np.random.random((n, 1, 28, 28)) #y = np.random.randint(2, size=(n)) # Model net = Net() net.push(Conv2d(5, 5, 1, 6)) # 1x28x28 -> 6x24x24 net.push(Relu()) net.push(Maxpooling(2, 2)) # 6x24x24 -> 6x12x12 net.push(Conv2d(5, 5, 6, 16)) # 6x12x12 -> 16x8x8 net.push(Relu()) net.push(Maxpooling(2, 2)) # 16x8x8 -> 16x4x4 net.push(Reshape((256))) net.push(Linear(256, 84)) net.push(Relu()) net.push(Softmax(84, 10)) # Data data = DataProvider() n = 10000 data.train_input(x[:n], y[:n]) data.test_input(xt, yt) data.batch_size(16) lr = 0.0009 gamma = 0.9 for epoch in xrange(50): print 'Epoch: ', epoch
from network import Network from utils import LOG_INFO from layers import Relu, Sigmoid, Linear from loss import EuclideanLoss from solve_net import train_net, test_net from load_data import load_mnist_2d train_data, test_data, train_label, test_label = load_mnist_2d('data') # Your model defintion here # You should explore different model architecture model = Network() model.add(Linear('fc1', 784, 10, 0.01)) model.add(Sigmoid('fc2')) #model.add(Linear('fc3', 10, 10, 0.01)) #model.add(Sigmoid('fc4')) loss = EuclideanLoss(name='loss') # Training configuration # You should adjust these hyperparameters # NOTE: one iteration means model forward-backwards one batch of samples. # one epoch means model has gone through all the training samples. # 'disp_freq' denotes number of iterations in one epoch to display information. config = { 'learning_rate': 0.01, 'weight_decay': 0.0, 'momentum': 0.9, 'batch_size': 100,
from network import Network from utils import LOG_INFO from layers import Relu, Sigmoid, Linear from loss import EuclideanLoss from solve_net import train_net, test_net from load_data import load_mnist_2d train_data, test_data, train_label, test_label = load_mnist_2d('data') # Your model defintion here # You should explore different model architecture model = Network() model.add(Linear('fc1', 784, 200, 0.01)) model.add(Sigmoid('actv1')) model.add(Linear('fc2', 200, 10, 0.01)) model.add(Sigmoid('actv2')) loss = EuclideanLoss(name='loss') # Training configuration # You should adjust these hyperparameters # NOTE: one iteration means model forward-backwards one batch of samples. # one epoch means model has gone through all the training samples. # 'disp_freq' denotes number of iterations in one epoch to display information. config = { 'learning_rate': 5e-5, 'weight_decay': 1e-3, 'momentum': 0.0, 'batch_size': 100, 'max_epoch': 100,
from layers import Linear, Softmax, Tanh, Dropout, Relu, Sigmod from nn import NetWork from loss import Cross_emptropy from train import train, evaluate from keras.utils import to_categorical import numpy as np iris = load_iris() X = iris.data y = iris.target y = to_categorical(y).astype(np.int) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25) net = NetWork([ Linear(input_dim=4, output_dim=5), Sigmod(), Linear(input_dim=5, output_dim=3), Dropout(p=0.3), Softmax(input_dim=3) ]) train(net, X_train, y_train, epochs=2000, loss=Cross_emptropy(), show_info=True) acc = evaluate(net, X_test, y_test) print('acc:{:.2f}%'.format(acc * 100))
from network import Network from utils import LOG_INFO, lc_plot from layers import Relu, Sigmoid, Linear, Gelu from loss import EuclideanLoss, SoftmaxCrossEntropyLoss, HingeLoss from solve_net import train_net, test_net from load_data import load_mnist_2d import time NAME = "1layersSigmoid.Cross" train_data, test_data, train_label, test_label = load_mnist_2d('data') # Your model defintion here # You should explore different model architecture model = Network() model.add(Linear('fc0', 784, 256, 0.01, 'sigmoid', 'act0')) model.add(Linear('fc2', 256, 10, 0.01, None, 'act2')) loss = SoftmaxCrossEntropyLoss(name='loss') # Training configuration # You should adjust these hyperparameters # NOTE: one iteration means model forward-backwards one batch of samples. # one epoch means model has gone through all the training samples. # 'disp_freq' denotes number of iterations in one epoch to display information. config = { 'learning_rate': 0.1, 'weight_decay': 0.0005, 'momentum': 0.9, 'batch_size': 500, 'max_epoch': 50,
from network import Network from data_preparation import load_data from solve_rnn import solve_rnn import theano.tensor as T X_train, y_train, X_test, y_test = load_data() HIDDEN_DIM = 32 INPUT_DIM = 20 OUTPUT_DIM = 10 model = Network() model.add(RNN('rnn1', HIDDEN_DIM, INPUT_DIM, 0.1)) # output shape: 4 x HIDDEN_DIM model.add(Linear('fc', HIDDEN_DIM, OUTPUT_DIM, 0.1)) # output shape: 4 x OUTPUT_DIM model.add(Softmax('softmax')) loss = CrossEntropyLoss('xent') optim = RMSpropOptimizer(learning_rate=0.01, rho=0.9) input_placeholder = T.fmatrix('input') label_placeholder = T.fmatrix('label') model.compile(input_placeholder, label_placeholder, loss, optim) MAX_EPOCH = 6 DISP_FREQ = 1000 TEST_FREQ = 10000 solve_rnn(model, X_train, y_train, X_test, y_test, MAX_EPOCH, DISP_FREQ,
def bn_9_layer_test(epochs=2, reg=0.0, lr=0.01, momentum=0.7): trainingData, trainingLabels, \ validationData, validationLabels, \ testingData, testingLabels = loadAllData("Datasets/cifar-10-batches-mat/", valsplit=0.20) timestamp = datetime.now().strftime('%Y-%b-%d--%H-%M-%S') network = Model(name="NO BN") network.addLayer(Linear(32*32*3, 50, regularization=reg, initializer="he")) #network.addLayer(BatchNormalization(50, trainable=True, alpha=0.99)) network.addLayer(Relu()) network.addLayer(Linear(50, 30, regularization=reg, initializer="he")) #network.addLayer(BatchNormalization(30, trainable=True, alpha=0.99)) network.addLayer(Relu()) network.addLayer(Linear(30, 20, regularization=reg, initializer="he")) #network.addLayer(BatchNormalization(30, trainable=True, alpha=0.99)) network.addLayer(Relu()) network.addLayer(Linear(20, 20, regularization=reg, initializer="he")) #network.addLayer(BatchNormalization(30, trainable=True, alpha=0.99)) network.addLayer(Relu()) network.addLayer(Linear(20, 10, regularization=reg, initializer="he")) #network.addLayer(BatchNormalization(30, trainable=True, alpha=0.99)) network.addLayer(Relu()) network.addLayer(Linear(10, 10, regularization=reg, initializer="he")) #network.addLayer(BatchNormalization(30, trainable=True, alpha=0.99)) network.addLayer(Relu()) network.addLayer(Linear(10, 10, regularization=reg, initializer="he")) #network.addLayer(BatchNormalization(30, trainable=True, alpha=0.99)) network.addLayer(Relu()) network.addLayer(Linear(10, 10, regularization=reg, initializer="he")) #network.addLayer(BatchNormalization(30, trainable=True, alpha=0.99)) network.addLayer(Relu()) network.addLayer(Linear(10,10, regularization=reg, initializer="he")) network.addLayer(Softmax()) sgd = SGD(lr=lr, lr_decay=1.00, momentum=momentum, shuffle=True, lr_min=1e-5) network.compile(sgd, "cce") network.fit(trainingData, trainingLabels, epochs=epochs, batch_size=100, validationData=(validationData, validationLabels)) networkBN = Model(name="WITH BN") networkBN.addLayer(Linear(32*32*3, 50, regularization=reg, initializer="he")) networkBN.addLayer(BatchNormalization(50, trainable=True, alpha=0.99)) networkBN.addLayer(Relu()) networkBN.addLayer(Linear(50, 30, regularization=reg, initializer="he")) networkBN.addLayer(BatchNormalization(30, trainable=True, alpha=0.99)) networkBN.addLayer(Relu()) networkBN.addLayer(Linear(30, 20, regularization=reg, initializer="he")) networkBN.addLayer(BatchNormalization(20, trainable=True, alpha=0.99)) networkBN.addLayer(Relu()) networkBN.addLayer(Linear(20, 20, regularization=reg, initializer="he")) networkBN.addLayer(BatchNormalization(20, trainable=True, alpha=0.99)) networkBN.addLayer(Relu()) networkBN.addLayer(Linear(20, 10, regularization=reg, initializer="he")) networkBN.addLayer(BatchNormalization(10, trainable=True, alpha=0.99)) networkBN.addLayer(Relu()) networkBN.addLayer(Linear(10, 10, regularization=reg, initializer="he")) networkBN.addLayer(BatchNormalization(10, trainable=True, alpha=0.99)) networkBN.addLayer(Relu()) networkBN.addLayer(Linear(10, 10, regularization=reg, initializer="he")) networkBN.addLayer(BatchNormalization(10, trainable=True, alpha=0.99)) networkBN.addLayer(Relu()) networkBN.addLayer(Linear(10, 10, regularization=reg, initializer="he")) networkBN.addLayer(BatchNormalization(10, trainable=True, alpha=0.99)) networkBN.addLayer(Relu()) networkBN.addLayer(Linear(10,10, regularization=reg, initializer="he")) networkBN.addLayer(Softmax()) sgd2 = SGD(lr=lr, lr_decay=1.00, momentum=momentum, shuffle=True, lr_min=1e-5) networkBN.compile(sgd2, "cce") networkBN.fit(trainingData, trainingLabels, epochs=epochs, batch_size=100, validationData=(validationData, validationLabels)) #plotAccuracy(network, "plots/", timestamp) #plotLoss(network, "plots/", timestamp) loss, acc = network.evaluate(testingData, testingLabels) print("NO BN: Test loss: {} , Test acc: {}".format(loss, acc) ) loss, acc = networkBN.evaluate(testingData, testingLabels) print("W BN: Test loss: {} , Test acc: {}".format(loss, acc) ) multiPlotLoss((network, networkBN), "plots/", timestamp, title="9-layer network loss over epochs, eta:{}, lambda:{}".format(lr, reg)) multiPlotAccuracy((network, networkBN), "plots/", timestamp, title="9-layer network accuracy over epochs, eta:{}, lambda:{}".format(lr, reg))