Esempio n. 1
0
def test2layergradientsWBN(samples=1, dimensions=3072):

    print("\n\nTesting 2-layer gradients (WITH BN, NO REG) using a batch size of {}".format(samples))
    trainingData, trainingLabels, encodedTrainingLabels = loadData("Datasets/cifar-10-batches-mat/data_batch_1.mat")

    
    trainingData = trainingData[0:dimensions, 0:samples]
    trainingLabels = trainingLabels[0:dimensions, 0:samples]
    encodedTrainingLabels = encodedTrainingLabels[0:dimensions, 0:samples]
    
    

    network = Model()
    linear = Linear(dimensions, 50, regularization=0.00, initializer="xavier")
    network.addLayer(linear)

    bnlayer = BatchNormalization(50)
    network.addLayer(bnlayer)
    network.addLayer(Relu())

    linear2 = Linear(50, 10, regularization=0.00, initializer="xavier")
    network.addLayer(linear2)
    network.addLayer(Softmax())

    sgd = SGD(lr=0.001, lr_decay=1.0, momentum=0.0, shuffle=True)
    network.compile(sgd, "cce")

    #network.fit(trainingData, encodedTrainingLabels, epochs=200, validationData=None, batch_size=samples)

    network.predict(trainingData, updateInternal=True)
    network.backpropagate(encodedTrainingLabels)
    
    timestamp = datetime.now().strftime('%Y-%b-%d--%H-%M-%S')

    
    
    numerical_gradW1 = compute_grads_w_BN(1e-4, linear.W, trainingData, encodedTrainingLabels, network)
    numerical_gradb1 = compute_grads_w_BN(1e-4, linear.b, trainingData, encodedTrainingLabels, network)

    numerical_gradgamma = compute_grads_w_BN(1e-4, bnlayer.gamma, trainingData, encodedTrainingLabels, network)
    numerical_gradbeta = compute_grads_w_BN(1e-4, bnlayer.beta, trainingData, encodedTrainingLabels, network)

    numerical_gradW2 = compute_grads_w_BN(1e-4, linear2.W, trainingData, encodedTrainingLabels, network)
    numerical_gradb2 = compute_grads_w_BN(1e-4, linear2.b, trainingData, encodedTrainingLabels, network)


    print("W1")
    relative_errorW = grad_difference(linear.gradW, numerical_gradW1)
    print("b1")
    relative_errorb = grad_difference(linear.gradb, numerical_gradb1)

    print("gamma1")
    relative_errorW = grad_difference(bnlayer.gradGamma, numerical_gradgamma)
    print("beta1")
    relative_errorb = grad_difference(bnlayer.gradBeta, numerical_gradbeta)

    print("W2")
    relative_errorW2 = grad_difference(linear2.gradW, numerical_gradW2)
    print("b2")
    relative_errorb2 = grad_difference(linear2.gradb, numerical_gradb2)

    print("\n")
Esempio n. 2
0
    def __init__(self,
                 block,
                 layers,
                 width=1,
                 num_classes=10,
                 input_size=32,
                 weight_noise=False,
                 act_noise_a=False,
                 act_noise_b=False,
                 rank=5,
                 noise_sd=0.0,
                 m_test=1,
                 m_train=1,
                 learn_noise=False):
        super(ResNet_Cifar, self).__init__()

        self.weight_noise = weight_noise
        self.act_noise_a = act_noise_a
        self.act_noise_b = act_noise_b
        self.rank = rank

        inplanes = int(16 * width)
        self.inplanes = inplanes
        self.conv1 = Conv2d(3,
                            inplanes,
                            kernel_size=3,
                            stride=1,
                            padding=1,
                            bias=False,
                            act_dim_a=input_size,
                            act_dim_b=input_size)
        self.bn1 = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self._make_layer(block,
                                       inplanes,
                                       layers[0],
                                       input_size=input_size)
        self.layer2 = self._make_layer(block,
                                       2 * inplanes,
                                       layers[1],
                                       stride=2,
                                       input_size=input_size)
        self.layer3 = self._make_layer(block,
                                       4 * inplanes,
                                       layers[2],
                                       stride=2,
                                       input_size=input_size // 2)
        self.avgpool = nn.AvgPool2d(8, stride=1)
        self.fc = Linear(4 * inplanes * block.expansion, num_classes)
        self.num_classes = num_classes
        self.learn_noise = learn_noise
        self.noise_sd = torch.tensor(noise_sd, requires_grad=learn_noise)
        self.m_test = m_test
        self.m_train = m_train

        for m in self.modules():
            if isinstance(m, Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                nn.init.kaiming_normal_(m.weight.data)
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
Esempio n. 3
0
from network import Network
from utils import LOG_INFO, lc_plot
from layers import Relu, Sigmoid, Linear, Gelu
from loss import EuclideanLoss, SoftmaxCrossEntropyLoss, HingeLoss
from solve_net import train_net, test_net
from load_data import load_mnist_2d
import time

NAME = "2layersRelu.HingeLoss"
train_data, test_data, train_label, test_label = load_mnist_2d('data')

# Your model defintion here
# You should explore different model architecture
model = Network()
model.add(Linear('fc0', 784, 500, 0.01, 'relu', 'act0'))
model.add(Linear('fc1', 500, 256, 0.01, 'relu', 'act1'))
model.add(Linear('fc2', 256, 10, 0.01, None, 'act2'))

loss = HingeLoss(name='loss')

# Training configuration
# You should adjust these hyperparameters
# NOTE: one iteration means model forward-backwards one batch of samples.
#       one epoch means model has gone through all the training samples.
#       'disp_freq' denotes number of iterations in one epoch to display information.

config = {
    'learning_rate': 0.1,
    'weight_decay': 0.0005,
    'momentum': 0.9,
    'batch_size': 500,
    # plt.axis('off')


train_data, test_data, train_label, test_label = load_mnist_4d('data')

# Your model defintion here
# You should explore different model architecture
model = Network()
model.add(Conv2D('conv1', 1, 12, 3, 1, 1))
model.add(Relu('relu1'))
model.add(AvgPool2D('pool1', 2, 0))  # output shape: N x 4 x 14 x 14
model.add(Conv2D('conv2', 12, 10, 3, 1, 1))
model.add(Relu('relu2'))
model.add(AvgPool2D('pool2', 2, 0))  # output shape: N x 4 x 7 x 7
model.add(Reshape('flatten', (-1, 49 * 10)))
model.add(Linear('fc3', 49 * 10, 10, 0.1))

# loss = EuclideanLoss(name='loss')
loss = SoftmaxCrossEntropyLoss(name='loss')

# Training configuration
# You should adjust these hyperparameters
# NOTE: one iteration means model forward-backwards one batch of samples.
#       one epoch means model has gone through all the training samples.
#       'disp_freq' denotes number of iterations in one epoch to display information.

# np.random.seed(1626)
config = {
    'learning_rate': 0.01,
    'weight_decay': 0.000,
    'momentum': 0.9,
def paramSearch(method="range"):

    trainingData, trainingLabels, \
    validationData, validationLabels, \
    testingData, testingLabels = loadAllData("Datasets/cifar-10-batches-mat/", valsplit=0.20)    



    bestLambda = 0.0
    bestLR = 0.0
    bestValAcc = 0.0
    bestLoss = 0.0
    bestModel = None

    data = [[],[],[]]
    
    if method == "range":
        lambdaValues = np.arange(0, 0.05, 0.001)
        lrValues = np.arange(0.04, 0.08, 0.005)

    elif method == "sampling":
        lrValues = np.random.uniform(0.06, 0.07, 15)
        lambdaValues = np.random.uniform(0.001, 0.005, 15)
        
    data.append((lrValues.shape[0], lambdaValues.shape[0])) # Append axis dimensions for 3D plotting



    for lambdaValue in lambdaValues:
        for lr in lrValues:


            print("Lambda:{}".format(lambdaValue))
            print("LR:{}".format(lr))

            network = Model()
            network.addLayer(Linear(32*32*3, 50, regularization=lambdaValue, initializer="he"))
            network.addLayer(BatchNormalization(50, trainable=True))
            network.addLayer(Relu())

            network.addLayer(Linear(50, 30, regularization=lambdaValue, initializer="he"))
            network.addLayer(BatchNormalization(30, trainable=True))
            network.addLayer(Relu())

            network.addLayer(Linear(30,10, regularization=lambdaValue, initializer="he"))
            network.addLayer(Softmax())

            sgd = SGD(lr=lr, lr_decay=0.95, momentum=0.7, shuffle=True, lr_min=1e-5)  
        
            network.compile(sgd, "cce")
            
            timestamp = datetime.now().strftime('%Y-%b-%d--%H-%M-%S')

            network.fit(trainingData, trainingLabels, epochs=20, validationData=(validationData, validationLabels), batch_size=100, verbose=False)

            
            #plotAccuracy(network, "plots/", timestamp)
            #plotLoss(network, "plots/", timestamp)
            
            loss, acc = network.evaluate(validationData, validationLabels)
            print("Val loss: {} , Val acc: {}".format(loss, acc) )
            print("\n\n")
            
            data[0].append(lr)
            data[1].append(lambdaValue)
            data[2].append(acc)
            
            if acc > bestValAcc:
                bestLambda = lambdaValue
                bestLR = lr
                bestValAcc = acc
                bestLoss = loss
                bestModel = network
    


    loss, acc = bestModel.evaluate(testingData, testingLabels)
    print("Test loss: {} , Test acc: {}".format(loss, acc) )
    print("\n\n")

    return bestLambda, bestLR,  bestValAcc, bestLoss, data
Esempio n. 6
0
from network import Network
from utils import LOG_INFO, make_plot
from layers import Relu, Sigmoid, Linear
from loss import EuclideanLoss, SoftmaxCrossEntropyLoss
from solve_net import train_net, test_net
from load_data import load_mnist_2d
import time

train_data, test_data, train_label, test_label = load_mnist_2d('data')

# Your model defintion here
# You should explore different model architecture

model1 = Network(name='model1')
model1.add(Linear('m1_fc1', 784, 256, 0.01))
model1.add(Sigmoid('m1_fc1'))
model1.add(Linear('m1_fc3', 256, 10, 0.01))

model2 = Network(name='model2')
model2.add(Linear('m2_fc1', 784, 256, 0.01))
model2.add(Relu('m2_fc2'))
model2.add(Linear('m2_fc3', 256, 10, 0.01))

model3 = Network(name='model3')
model3.add(Linear('m3_fc1', 784, 512, 0.01))
model3.add(Sigmoid('m3_fc2'))
model3.add(Linear('m3_fc3', 512, 128, 0.01))
model3.add(Sigmoid('m3_fc4'))
model3.add(Linear('m3_fc5', 128, 10, 0.01))

model4 = Network(name='model4')
Esempio n. 7
0
           init_std=0.01))
model.add(Relu('relu1'))
model.add(AvgPool2D('pool1', kernel_size=2,
                    pad=0))  # output shape: N x 4 x 14 x 14
model.add(
    Conv2D('conv2',
           in_channel=4,
           out_channel=4,
           kernel_size=3,
           pad=1,
           init_std=0.01))
model.add(Relu('relu2'))
model.add(AvgPool2D('pool2', kernel_size=2,
                    pad=0))  # output shape: N x 4 x 7 x 7
model.add(Reshape('flatten', (-1, 196)))
model.add(Linear('fc3', in_num=196, out_num=10, init_std=0.1))
'''
# LeNet
model = Network()
model.add(Conv2D('conv1', in_channel=1, out_channel=6, kernel_size=5, pad=1, init_std=0.01))
model.add(Relu('relu1'))
model.add(AvgPool2D('pool1', kernel_size=2, pad=0))  # output shape: N x 14 x 14 x 6
model.add(Conv2D('conv2', in_channel=6, out_channel=16, kernel_size=5, pad=1, init_std=0.01))
model.add(Relu('relu2'))
model.add(AvgPool2D('pool2', kernel_size=2, pad=0))  # output shape: N x 5 x 5 x 16 = N x 400
#model.add(Reshape('flatten', (-1, 196)))
model.add(Linear('fc3', in_num=400, out_num=120, init_std=0.1))
model.add(Relu('relu3'))
model.add(Linear('fc4', in_num=120, out_num=10, init_std=0.1))
'''
Esempio n. 8
0
from network import Network
from utils import LOG_INFO
from layers import Relu, Sigmoid, Linear
from loss import EuclideanLoss
from solve_net import train_net, test_net
from load_data import load_mnist_2d
import numpy as np

train_data, test_data, train_label, test_label = load_mnist_2d('data')

# Your model defintion here
# You should explore different model architecture
model = Network()
model.add(Linear('fc1', 784, 300, 0.01))
model.add(Sigmoid('Sigmoid1'))
model.add(Linear('fc2', 300, 300, 0.01))
model.add(Relu('Relu1'))
model.add(Linear('fc3', 300, 10, 0.01))

loss = EuclideanLoss(name='loss')

# Training configuration
# You should adjust these hyperparameters
# NOTE: one iteration means model forward-backwards one batch of samples.
#       one epoch means model has gone through all the training samples.
#       'disp_freq' denotes number of iterations in one epoch to display information.

config = {
    'learning_rate': 0.1,
    'weight_decay': 0.0001,
    'momentum': 0.7,
Esempio n. 9
0
    def __init__(self, nfeat, nhid, nclass, dropout):
        super(MLP, self).__init__()

        self.Linear1 = Linear(nfeat, nhid, dropout)
        self.Linear2 = Linear(nhid, nclass, dropout)
Esempio n. 10
0
from network import Network
from utils import LOG_INFO
from layers import Relu, Sigmoid, Linear
from loss import EuclideanLoss#, SoftmaxCrossEntropyLoss
from solve_net import train_net, test_net
from load_data import load_mnist_2d

import numpy as np

train_data, test_data, train_label, test_label = load_mnist_2d('data')

# Your model defintion here
# You should explore different model architecture
init_std = 0.01
model = Network()
model.add(Linear('fc1', 784, 50, init_std))
model.add(Sigmoid('ac1'))
model.add(Linear('fc2', 50, 10, init_std))
loss = EuclideanLoss(name='loss')

# Training configuration
# You should adjust these hyperparameters
# NOTE: one iteration means model forward-backwards one batch of samples.
#       one epoch means model has gone through all the training samples.
#       'disp_freq' denotes number of iterations in one epoch to display information.

config = {
    'learning_rate': 0.003,
    'weight_decay': 1e-4,
    'momentum': 0.9,
    'batch_size': 100,
Esempio n. 11
0
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from datatype import Tensor


def to_one_hot(vector: Tensor) -> Tensor:
    """Create one hot encoding of a vector."""
    oh = np.zeros((vector.shape[0], vector.max() + 1))
    oh[np.arange(vector.shape[0]), vector] = 1
    return oh


train = pd.read_csv("mnist_train.csv", header=None).values[:, 1:]
train_label = pd.read_csv("mnist_train.csv", header=None).values[:, 0]

net = Network(layers=[Linear(784, 128), Sigmoid(), Linear(128, 10),])

loss = CrossEntropyLoss()
optim = SGD(5e-2, 0.001)
x_train, x_val, y_train, y_val = train_test_split(
    train.astype(np.float32) / 255,
    train_label.astype(np.int32),
    test_size=0.2,
    random_state=42,
)  # to_one_hot

y_train = to_one_hot(y_train)
y_val = to_one_hot(y_val)
batch_size = 100
progress_bar = auto.tqdm(range(200))
accuracies: dict = {"train": [], "val": [], "test": []}
Esempio n. 12
0
from mnist import load_mnist_for_cnn

import theano.tensor as T
import theano

theano.config.floatX = 'float32'

train_data, test_data, train_label, test_label = load_mnist_for_cnn('data')
model = Network()
model.add(Convolution('conv1', 5, 1, 8, 0.01))  # output size: N x 4 x 24 x 24
model.add(Relu('relu1'))
model.add(Pooling('pool1', 2))  # output size: N x 4 x 12 x 12
model.add(Convolution('conv2', 3, 8, 12, 0.01))  # output size: N x 8 x 10 x 10
model.add(Relu('relu2'))
model.add(Pooling('pool2', 2))  # output size: N x 8 x 5 x 5
model.add(Linear('fc3', 12 * 5 * 5, 10,
                 0.01))  # input reshaped to N x 200 in Linear layer
model.add(Softmax('softmax'))

loss = CrossEntropyLoss(name='xent')

# optim = SGDOptimizer(learning_rate=0.0001, weight_decay=0.005, momentum=0.9)
optim = AdagradOptimizer(learning_rate=0.0002, eps=1e-5)

input_placeholder = T.ftensor4('input')
label_placeholder = T.fmatrix('label')
model.compile(input_placeholder, label_placeholder, loss, optim)

solve_net(model,
          train_data,
          train_label,
          test_data,
from loss import CrossEntropyLoss
from optimizer import SGDOptimizer
from solve_net import solve_net
from mnist import load_mnist_for_cnn

import theano.tensor as T

train_data, test_data, train_label, test_label = load_mnist_for_cnn('data')
model = Network()
model.add(Convolution('conv1', 5, 1, 4, 0.1))  # output size: N x 4 x 24 x 24
model.add(Relu('relu1'))
model.add(Pooling('pool1', 2))  # output size: N x 4 x 12 x 12
model.add(Convolution('conv2', 3, 4, 8, 0.1))  # output size: N x 8 x 10 x 10
model.add(Relu('relu2'))
model.add(Pooling('pool2', 2))  # output size: N x 8 x 5 x 5
model.add(Linear('fc3', 200, 10,
                 0.1))  # input reshaped to N x 200 in Linear layer
model.add(Softmax('softmax'))

loss = CrossEntropyLoss(name='xent')

optim = SGDOptimizer(learning_rate=0.001, weight_decay=0.005, momentum=0.9)

input_placeholder = T.ftensor4('input')
label_placeholder = T.fmatrix('label')
model.compile(input_placeholder, label_placeholder, loss, optim)

solve_net(model,
          train_data,
          train_label,
          test_data,
          test_label,
Esempio n. 14
0
from sklearn.model_selection import train_test_split, KFold
from datatype import Tensor


def to_one_hot(vector: Tensor) -> Tensor:
    """Create one hot encoding of a vector."""
    oh = np.zeros((vector.shape[0], vector.max() + 1))
    oh[np.arange(vector.shape[0]), vector] = 1
    return oh


train = pd.read_csv("mnist_train.csv", header=None).values[:, 1:]
train_label = pd.read_csv("mnist_train.csv", header=None).values[:, 0]

net = Network(layers=[
    Linear(784, 128),
    ReLU(),
    Linear(128, 1),
])

loss = MSE()
optim = SGD(1e-4, 0.0)
x_train, x_val, y_train, y_val = train_test_split(
    train.astype(np.float32) / 255,
    train_label.astype(np.int32),
    test_size=0.2,
    random_state=42,
)  # to_one_hot

batch_size = 100
progress_bar = auto.tqdm(range(10))
Esempio n. 15
0
"""
The canonical example of a function that can't be 
learned with a simple linear model is XOR
"""
import numpy as np

from train import train
from nn import NeuralNet
from layers import Linear, Tanh

inputs = np.array([[0, 0], [1, 0], [0, 1], [1, 1]])

targets = np.array([[1, 0], [0, 1], [0, 1], [1, 0]])

net = NeuralNet([
    Linear(input_size=2, output_size=2),
    Tanh(),
    Linear(input_size=2, output_size=2)
])

train(net, inputs, targets)

for x, y in zip(inputs, targets):
    predicted = net.forward(x)
    print(x, predicted, y)
Esempio n. 16
0
    config = {
        'learning_rate': 0.01,
        'weight_decay': 0.001,
        'momentum': 0.8,
        'batch_size': 64,
        'max_epoch': 50,
        'disp_freq': 1000,
        'test_epoch': 2,
        'stop_time': args.stop_time
    }

    if Type(args.train_one_layer):
        config['max_epoch'] = 50

        model1 = Network()
        model1.add(Linear('fc1', 784, 256, 0.01))
        model1.add(Sigmoid('sigmoid1'))
        model1.add(Linear('fc2', 256, 10, 0.01))
        model1.add(Sigmoid('sigmoid2'))
        one_hidden_Sigmoid_Euc = {
            'model': model1,
            'loss': loss1,
            'config': config
        }

        model2 = Network()
        model2.add(Linear("fc1", 784, 256, 0.01))
        model2.add(Relu("relu1"))
        model2.add(Linear("fc2", 256, 10, 0.01))
        model2.add(Sigmoid('sigmoid2'))
        one_hidden_Relu_Euc = {
Esempio n. 17
0
def predictor(predictor_name, representation, output_dim, params=None):
    output = Linear('{}.predictor.Linear'.format(predictor_name),
                    representation,
                    output_dim,
                    initializer='glorot_uniform')
    return output
Esempio n. 18
0
from load_data import load_mnist_2d
import csv
from utils import writer

train_data, test_data, train_label, test_label = load_mnist_2d('data')

writer.writerow(
    ['type', 'middle_layers', 'learning_rate', 'weight_decay', 'momentum'])

# Your model defintion here
# You should explore different model architecture
middle_layers1 = 500
middle_layers2 = 300

model = Network()
model.add(Linear('fc1', 784, middle_layers1, 0.01))
model.add(Relu('sig1'))
model.add(Linear('fc2', middle_layers1, middle_layers2, 0.01))
model.add(Sigmoid('sig2'))
model.add(Linear('fc3', middle_layers2, 10, 0.01))

loss = EuclideanLoss(name='loss')

# Training configuration
# You should adjust these hyperparameters
# NOTE: one iteration means model forward-backwards one batch of samples.
#       one epoch means model has gone through all the training samples.
#       'disp_freq' denotes number of iterations in one epoch to display information.

config = {
    'learning_rate': 0.01,
if len(sys.argv) >= 2:
    filename = sys.argv[1]
else:
    filename = "test"

# Your model defintion here
# You should explore different model architecture
model = Network()
model.add(Conv2D('conv1', 1, 4, 3, 1, 1))
model.add(Relu('relu1'))
model.add(AvgPool2D('pool1', 2, 0))  # output shape: N x 4 x 14 x 14
model.add(Conv2D('conv2', 4, 4, 3, 1, 1))
model.add(Relu('relu2'))
model.add(AvgPool2D('pool2', 2, 0))  # output shape: N x 4 x 7 x 7
model.add(Reshape('flatten', (-1, 196)))
model.add(Linear('fc3', 196, 10, 0.1))

loss = SoftmaxCrossEntropyLoss(name='loss')

# Training configuration
# You should adjust these hyperparameters
# NOTE: one iteration means model forward-backwards one batch of samples.
#       one epoch means model has gone through all the training samples.
#       'disp_freq' denotes number of iterations in one epoch to display information.

config = {
    'learning_rate': 0.01,
    'weight_decay': 0,
    'momentum': 0.9,
    'batch_size': 100,
    'max_epoch': 10,
from network import Network
from utils import LOG_INFO
from layers import Relu, Sigmoid, Linear
from loss import EuclideanLoss
from solve_net import train_net, test_net
from load_data import load_mnist_2d


train_data, test_data, train_label, test_label = load_mnist_2d('data')

# Your model defintion here
# You should explore different model architecture
model = Network()
model.add(Linear('fc1', 784, 512, 0.01))
model.add(Relu('relu1'))
model.add(Linear('fc2', 512, 512, 0.01))
model.add(Relu('relu2'))
model.add(Linear('fc3', 512, 10, 0.01))


loss = EuclideanLoss(name='loss')

# Training configuration
# You should adjust these hyperparameters
# NOTE: one iteration means model forward-backwards one batch of samples.
#       one epoch means model has gone through all the training samples.
#       'disp_freq' denotes number of iterations in one epoch to display information.

config = {
    'learning_rate': 0.01,
    'weight_decay': 5e-4,
Esempio n. 21
0
    :return:
    '''
    return [x >> i & 1 for i in range(10)]

inputs = np.array([
    binary_encode(x)
    for x in range(101, 1024)
])

targets = np.array([
    fizz_buzz_encode(x)
    for x in range(101, 1024)
])

net = NeuralNet([
    Linear(input_size = 10, output_size = 50),
    Tanh(),
    Linear(input_size = 50, output_size = 4)
])

train(net,
      inputs,
      targets,
      num_epochs = 5000,
      optimizer = SGD(lr = 0.001))

for x in range(1, 101):
    predicted = net.forward(binary_encode(x))
    predicted_idx = np.argmax(predicted)
    actual_idx = np.argmax(fizz_buzz_encode(x))
    labels = [str(x), "fizz", "buzz", "fizzbuzz"]
Esempio n. 22
0
 def __init__(self, state_size: int, n_heads: int, dropout: float=0.1, input_size: Optional[torch.Tensor]=None):
     super().__init__(state_size, n_heads, dropout)
     self.data_to_kv = Linear(state_size, 2 * n_heads * self.projection_size, bias=False)
     self.data_to_q = Linear(state_size if input_size is None else input_size,
                                                         n_heads * self.projection_size, bias=False)
     self.reset_parameters()
from network import Network
from utils import LOG_INFO
from layers import Relu, Sigmoid, Linear
from loss import EuclideanLoss
from solve_net import train_net, test_net
from load_data import load_mnist_2d
import numpy as np

train_data, test_data, train_label, test_label = load_mnist_2d('data')

# Your model defintion here
# You should explore different model architecture
model = Network()
model.add(Linear('fc1', 784, 100, 0.01))
model.add(Sigmoid('Sigmoid1'))
model.add(Linear('fc2', 100, 40, 0.01))
model.add(Sigmoid('Sigmoid2'))
model.add(Linear('fc3', 40, 10, 0.01))

loss = EuclideanLoss(name='loss')

# Training configuration
# You should adjust these hyperparameters
# NOTE: one iteration means model forward-backwards one batch of samples.
#       one epoch means model has gone through all the training samples.
#       'disp_freq' denotes number of iterations in one epoch to display information.

config = {
    'learning_rate': 0.08,
    'weight_decay': 0.001,
    'momentum': 0.9,
Esempio n. 24
0
    x = preprocessing(x)
    xt = preprocessing(xt)
    #x = np.random.random((n, 1, 28, 28))
    #y = np.random.randint(2, size=(n))

    # Model
    net = Net()
    net.push(Conv2d(5, 5, 1, 6))  # 1x28x28 -> 6x24x24
    net.push(Relu())
    net.push(Maxpooling(2, 2))  # 6x24x24 -> 6x12x12
    net.push(Conv2d(5, 5, 6, 16))  # 6x12x12 -> 16x8x8
    net.push(Relu())
    net.push(Maxpooling(2, 2))  # 16x8x8 -> 16x4x4
    net.push(Reshape((256)))
    net.push(Linear(256, 84))
    net.push(Relu())
    net.push(Softmax(84, 10))

    # Data
    data = DataProvider()
    n = 10000
    data.train_input(x[:n], y[:n])
    data.test_input(xt, yt)
    data.batch_size(16)

    lr = 0.0009
    gamma = 0.9
    for epoch in xrange(50):
        print 'Epoch: ', epoch
Esempio n. 25
0
from network import Network
from utils import LOG_INFO
from layers import Relu, Sigmoid, Linear
from loss import EuclideanLoss
from solve_net import train_net, test_net
from load_data import load_mnist_2d


train_data, test_data, train_label, test_label = load_mnist_2d('data')

# Your model defintion here
# You should explore different model architecture
model = Network()
model.add(Linear('fc1', 784, 10, 0.01))
model.add(Sigmoid('fc2'))
#model.add(Linear('fc3', 10, 10, 0.01))
#model.add(Sigmoid('fc4'))

loss = EuclideanLoss(name='loss')

# Training configuration
# You should adjust these hyperparameters
# NOTE: one iteration means model forward-backwards one batch of samples.
#       one epoch means model has gone through all the training samples.
#       'disp_freq' denotes number of iterations in one epoch to display information.

config = {
    'learning_rate': 0.01,
    'weight_decay': 0.0,
    'momentum': 0.9,
    'batch_size': 100,
Esempio n. 26
0
from network import Network
from utils import LOG_INFO
from layers import Relu, Sigmoid, Linear
from loss import EuclideanLoss
from solve_net import train_net, test_net
from load_data import load_mnist_2d

train_data, test_data, train_label, test_label = load_mnist_2d('data')

# Your model defintion here
# You should explore different model architecture
model = Network()
model.add(Linear('fc1', 784, 200, 0.01))
model.add(Sigmoid('actv1'))
model.add(Linear('fc2', 200, 10, 0.01))
model.add(Sigmoid('actv2'))

loss = EuclideanLoss(name='loss')

# Training configuration
# You should adjust these hyperparameters
# NOTE: one iteration means model forward-backwards one batch of samples.
#       one epoch means model has gone through all the training samples.
#       'disp_freq' denotes number of iterations in one epoch to display information.

config = {
    'learning_rate': 5e-5,
    'weight_decay': 1e-3,
    'momentum': 0.0,
    'batch_size': 100,
    'max_epoch': 100,
Esempio n. 27
0
from layers import Linear, Softmax, Tanh, Dropout, Relu, Sigmod
from nn import NetWork
from loss import Cross_emptropy
from train import train, evaluate
from keras.utils import to_categorical
import numpy as np

iris = load_iris()
X = iris.data
y = iris.target
y = to_categorical(y).astype(np.int)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

net = NetWork([
    Linear(input_dim=4, output_dim=5),
    Sigmod(),
    Linear(input_dim=5, output_dim=3),
    Dropout(p=0.3),
    Softmax(input_dim=3)
])

train(net,
      X_train,
      y_train,
      epochs=2000,
      loss=Cross_emptropy(),
      show_info=True)
acc = evaluate(net, X_test, y_test)
print('acc:{:.2f}%'.format(acc * 100))
from network import Network
from utils import LOG_INFO, lc_plot
from layers import Relu, Sigmoid, Linear, Gelu
from loss import EuclideanLoss, SoftmaxCrossEntropyLoss, HingeLoss
from solve_net import train_net, test_net
from load_data import load_mnist_2d
import time

NAME = "1layersSigmoid.Cross"
train_data, test_data, train_label, test_label = load_mnist_2d('data')

# Your model defintion here
# You should explore different model architecture
model = Network()
model.add(Linear('fc0', 784, 256, 0.01, 'sigmoid', 'act0'))
model.add(Linear('fc2', 256, 10, 0.01, None, 'act2'))

loss = SoftmaxCrossEntropyLoss(name='loss')

# Training configuration
# You should adjust these hyperparameters
# NOTE: one iteration means model forward-backwards one batch of samples.
#       one epoch means model has gone through all the training samples.
#       'disp_freq' denotes number of iterations in one epoch to display information.

config = {
    'learning_rate': 0.1,
    'weight_decay': 0.0005,
    'momentum': 0.9,
    'batch_size': 500,
    'max_epoch': 50,
Esempio n. 29
0
from network import Network
from data_preparation import load_data
from solve_rnn import solve_rnn

import theano.tensor as T

X_train, y_train, X_test, y_test = load_data()

HIDDEN_DIM = 32
INPUT_DIM = 20
OUTPUT_DIM = 10

model = Network()
model.add(RNN('rnn1', HIDDEN_DIM, INPUT_DIM,
              0.1))  # output shape: 4 x HIDDEN_DIM
model.add(Linear('fc', HIDDEN_DIM, OUTPUT_DIM,
                 0.1))  # output shape: 4 x OUTPUT_DIM
model.add(Softmax('softmax'))

loss = CrossEntropyLoss('xent')

optim = RMSpropOptimizer(learning_rate=0.01, rho=0.9)
input_placeholder = T.fmatrix('input')
label_placeholder = T.fmatrix('label')

model.compile(input_placeholder, label_placeholder, loss, optim)

MAX_EPOCH = 6
DISP_FREQ = 1000
TEST_FREQ = 10000

solve_rnn(model, X_train, y_train, X_test, y_test, MAX_EPOCH, DISP_FREQ,
Esempio n. 30
0
def bn_9_layer_test(epochs=2, reg=0.0, lr=0.01, momentum=0.7):

    trainingData, trainingLabels, \
    validationData, validationLabels, \
    testingData, testingLabels = loadAllData("Datasets/cifar-10-batches-mat/", valsplit=0.20)
    timestamp = datetime.now().strftime('%Y-%b-%d--%H-%M-%S')


    network = Model(name="NO BN")
    network.addLayer(Linear(32*32*3, 50, regularization=reg, initializer="he"))
    #network.addLayer(BatchNormalization(50, trainable=True, alpha=0.99))
    network.addLayer(Relu())

    network.addLayer(Linear(50, 30, regularization=reg, initializer="he"))
    #network.addLayer(BatchNormalization(30, trainable=True, alpha=0.99))
    network.addLayer(Relu())

    network.addLayer(Linear(30, 20, regularization=reg, initializer="he"))
    #network.addLayer(BatchNormalization(30, trainable=True, alpha=0.99))
    network.addLayer(Relu())

    network.addLayer(Linear(20, 20, regularization=reg, initializer="he"))
    #network.addLayer(BatchNormalization(30, trainable=True, alpha=0.99))
    network.addLayer(Relu())

    network.addLayer(Linear(20, 10, regularization=reg, initializer="he"))
    #network.addLayer(BatchNormalization(30, trainable=True, alpha=0.99))
    network.addLayer(Relu())

    network.addLayer(Linear(10, 10, regularization=reg, initializer="he"))
    #network.addLayer(BatchNormalization(30, trainable=True, alpha=0.99))
    network.addLayer(Relu())

    network.addLayer(Linear(10, 10, regularization=reg, initializer="he"))
    #network.addLayer(BatchNormalization(30, trainable=True, alpha=0.99))
    network.addLayer(Relu())

    network.addLayer(Linear(10, 10, regularization=reg, initializer="he"))
    #network.addLayer(BatchNormalization(30, trainable=True, alpha=0.99))
    network.addLayer(Relu())

    network.addLayer(Linear(10,10, regularization=reg, initializer="he"))
    network.addLayer(Softmax())

    sgd = SGD(lr=lr, lr_decay=1.00, momentum=momentum, shuffle=True, lr_min=1e-5)  
 
    network.compile(sgd, "cce")
    network.fit(trainingData, trainingLabels, epochs=epochs, batch_size=100, validationData=(validationData, validationLabels))
    
    


    networkBN = Model(name="WITH BN")
    networkBN.addLayer(Linear(32*32*3, 50, regularization=reg, initializer="he"))
    networkBN.addLayer(BatchNormalization(50, trainable=True, alpha=0.99))
    networkBN.addLayer(Relu())

    networkBN.addLayer(Linear(50, 30, regularization=reg, initializer="he"))
    networkBN.addLayer(BatchNormalization(30, trainable=True, alpha=0.99))
    networkBN.addLayer(Relu())

    networkBN.addLayer(Linear(30, 20, regularization=reg, initializer="he"))
    networkBN.addLayer(BatchNormalization(20, trainable=True, alpha=0.99))
    networkBN.addLayer(Relu())

    networkBN.addLayer(Linear(20, 20, regularization=reg, initializer="he"))
    networkBN.addLayer(BatchNormalization(20, trainable=True, alpha=0.99))
    networkBN.addLayer(Relu())

    networkBN.addLayer(Linear(20, 10, regularization=reg, initializer="he"))
    networkBN.addLayer(BatchNormalization(10, trainable=True, alpha=0.99))
    networkBN.addLayer(Relu())

    networkBN.addLayer(Linear(10, 10, regularization=reg, initializer="he"))
    networkBN.addLayer(BatchNormalization(10, trainable=True, alpha=0.99))
    networkBN.addLayer(Relu())

    networkBN.addLayer(Linear(10, 10, regularization=reg, initializer="he"))
    networkBN.addLayer(BatchNormalization(10, trainable=True, alpha=0.99))
    networkBN.addLayer(Relu())

    networkBN.addLayer(Linear(10, 10, regularization=reg, initializer="he"))
    networkBN.addLayer(BatchNormalization(10, trainable=True, alpha=0.99))
    networkBN.addLayer(Relu())

    networkBN.addLayer(Linear(10,10, regularization=reg, initializer="he"))
    networkBN.addLayer(Softmax())

    sgd2 = SGD(lr=lr, lr_decay=1.00, momentum=momentum, shuffle=True, lr_min=1e-5)  
 
    networkBN.compile(sgd2, "cce")
    networkBN.fit(trainingData, trainingLabels, epochs=epochs, batch_size=100, validationData=(validationData, validationLabels))
    #plotAccuracy(network, "plots/", timestamp)
    #plotLoss(network, "plots/", timestamp)

    loss, acc = network.evaluate(testingData, testingLabels)
    print("NO BN: Test loss: {} , Test acc: {}".format(loss, acc) )
    
    loss, acc = networkBN.evaluate(testingData, testingLabels)
    print("W BN: Test loss: {} , Test acc: {}".format(loss, acc) )

    multiPlotLoss((network, networkBN), "plots/", timestamp, title="9-layer network loss over epochs, eta:{}, lambda:{}".format(lr, reg))
    multiPlotAccuracy((network, networkBN), "plots/", timestamp, title="9-layer network accuracy over epochs, eta:{}, lambda:{}".format(lr, reg))