예제 #1
0
def tanhexp(xTrain, xTest, yTrain, yTest, hots):
    print('EXPERIMENTO CON FUNCION DE ACTIVACION TANH')
    # Primera red neuronal
    nn = NeuralNetwork(7, [5, 4], 2, 3)
    nn.set_activation([Tanh(), Tanh(), Tanh()])
    nn.set_learning_rate([0.6, 0.6, 0.6])

    train_plot_cm(nn, xTrain, xTest, yTrain, yTest,
                  epochs=60, encoding_dict=hots)

    # Segunda red neuronal
    nn = NeuralNetwork(7, [5, 4], 2, 3)
    nn.set_activation([Tanh(), Tanh(), Tanh()])
    nn.set_learning_rate([0.3, 0.3, 0.3])

    train_plot_cm(nn, xTrain, xTest, yTrain, yTest,
                  epochs=60, encoding_dict=hots)

    # Tercera red neuronal
    nn = NeuralNetwork(7, [5, 4], 2, 3)
    nn.set_activation([Tanh(), Tanh(), Tanh()])
    nn.set_learning_rate([0.2, 0.2, 0.2])

    train_plot_cm(nn, xTrain, xTest, yTrain, yTest,
                  epochs=60, encoding_dict=hots)
 def test_set_activation(self):
     self.assertEqual(Sigmoid().__class__,
                      self.nn.layers[0].get_activation().__class__)
     self.assertEqual(Sigmoid().__class__,
                      self.nn.layers[1].get_activation().__class__)
     self.assertEqual(Sigmoid().__class__,
                      self.nn.layers[2].get_activation().__class__)
     self.nn.set_activation([Tanh(), Step(), Tanh()])
     self.assertEqual(Tanh().__class__,
                      self.nn.layers[0].get_activation().__class__)
     self.assertEqual(Step().__class__,
                      self.nn.layers[1].get_activation().__class__)
     self.assertEqual(Tanh().__class__,
                      self.nn.layers[2].get_activation().__class__)
예제 #3
0
def twohl(xTrain, xTest, yTrain, yTest, hots):
    print('EXPERIMENTO CON DOS CAPAS ESCONDIDAS')
    # Primera red neuronal
    nn = NeuralNetwork(7, [5, 4], 2, 3)
    nn.set_activation([Sigmoid(), Sigmoid(), Sigmoid()])
    nn.set_learning_rate([0.6, 0.6, 0.6])

    train_plot_cm(nn, xTrain, xTest, yTrain, yTest,
                  epochs=50, encoding_dict=hots)

    # Segunda red neuronal
    nn = NeuralNetwork(7, [5, 4], 2, 3)
    nn.set_activation([Tanh(), Tanh(), Tanh()])
    nn.set_learning_rate([0.6, 0.6, 0.6])

    train_plot_cm(nn, xTrain, xTest, yTrain, yTest,
                  epochs=50, encoding_dict=hots)

    # Tercera red neuronal
    nn = NeuralNetwork(7, [5, 4], 2, 3)
    nn.set_activation([Sigmoid(), Sigmoid(), Sigmoid()])
    nn.set_learning_rate([0.3, 0.3, 0.3])

    train_plot_cm(nn, xTrain, xTest, yTrain, yTest,
                  epochs=50, encoding_dict=hots)

    # Cuarta red neuronal
    nn = NeuralNetwork(7, [10, 10], 2, 3)
    nn.set_activation([Sigmoid(), Sigmoid(), Sigmoid()])
    nn.set_learning_rate([0.6, 0.6, 0.6])

    train_plot_cm(nn, xTrain, xTest, yTrain, yTest,
                  epochs=50, encoding_dict=hots)

    # Quinta red neuronal
    nn = NeuralNetwork(7, [1, 2], 2, 3)
    nn.set_activation([Sigmoid(), Sigmoid(), Sigmoid()])
    nn.set_learning_rate([0.6, 0.6, 0.6])

    train_plot_cm(nn, xTrain, xTest, yTrain, yTest,
                  epochs=50, encoding_dict=hots)
예제 #4
0
def run_all_model(train_input,
                  train_target,
                  test_input,
                  test_target,
                  Sample_number,
                  save_plot=False):

    # Define constants along the test
    hidden_nb = 25
    std = 0.1
    eta = 3e-1
    batch_size = 200
    epochs_number = 1000

    # Model 1. No dropout; constant learning rate (SGD)
    print('\nModel 1: Optimizer: SGD; No dropout; ReLU; CrossEntropy')

    # Define model name for plots
    mname = 'Model1'

    # Define structure of the network
    linear_1 = Linear(2, hidden_nb)
    relu_1 = Relu()
    linear_2 = Linear(hidden_nb, hidden_nb)
    relu_2 = Relu()
    linear_3 = Linear(hidden_nb, hidden_nb)
    relu_3 = Relu()
    linear_4 = Linear(hidden_nb, 2)
    loss = CrossEntropy()

    model_1 = Sequential(linear_1,
                         relu_1,
                         linear_2,
                         relu_2,
                         linear_3,
                         relu_3,
                         linear_4,
                         loss=CrossEntropy())

    # Initialize weights
    model_1.normalize_parameters(mean=0, std=std)
    # Define optimizer
    optimizer = Sgd(eta)

    # Train model
    my_loss_1 = train_model(model_1, train_input, train_target, optimizer,
                            epochs_number, Sample_number, batch_size)

    # Evalute model and produce plots
    model_1_perf = evaluate_model(model_1,
                                  train_input,
                                  train_target,
                                  test_input,
                                  test_target,
                                  my_loss_1,
                                  save_plot,
                                  mname=mname)

    # Model 2. No dropout; decreasing learning rate (DecreaseSGD)
    print('\nModel 2: Optimizer: DecreaseSGD; No dropout; ReLU; CrossEntropy')

    # Define model name for plots
    mname = 'Model2'

    # Define structure of the network
    linear_1 = Linear(2, hidden_nb)
    relu_1 = Relu()
    linear_2 = Linear(hidden_nb, hidden_nb)
    relu_2 = Relu()
    linear_3 = Linear(hidden_nb, hidden_nb)
    relu_3 = Relu()
    linear_4 = Linear(hidden_nb, 2)

    model_2 = Sequential(linear_1,
                         relu_1,
                         linear_2,
                         relu_2,
                         linear_3,
                         relu_3,
                         linear_4,
                         loss=CrossEntropy())

    # Initialize weights
    model_2.normalize_parameters(mean=0, std=std)
    # Define optimizer
    optimizer = DecreaseSGD(eta)

    # Train model
    my_loss_2 = train_model(model_2, train_input, train_target, optimizer,
                            epochs_number, Sample_number, batch_size)
    # Evalute model and produce plots
    model_2_perf = evaluate_model(model_2,
                                  train_input,
                                  train_target,
                                  test_input,
                                  test_target,
                                  my_loss_2,
                                  save_plot,
                                  mname=mname)

    # Model 3. No dropout; Adam Optimizer
    print('\nModel 3: Optimizer: Adam; No dropout; ReLU; CrossEntropy')

    # Define model name for plots
    mname = 'Model3'

    # Custom hyperparameters
    eta_adam = 1e-3
    epochs_number_adam = 500

    # Define structure of the network
    linear_1 = Linear(2, hidden_nb)
    relu_1 = Relu()
    linear_2 = Linear(hidden_nb, hidden_nb)
    relu_2 = Relu()
    linear_3 = Linear(hidden_nb, hidden_nb)
    relu_3 = Relu()
    linear_4 = Linear(hidden_nb, 2)
    loss = CrossEntropy()

    model_3 = Sequential(linear_1,
                         relu_1,
                         linear_2,
                         relu_2,
                         linear_3,
                         relu_3,
                         linear_4,
                         loss=CrossEntropy())

    # Initialize weights
    model_3.normalize_parameters(mean=0, std=std)
    # Define optimizer
    optimizer = Adam(eta_adam, 0.9, 0.99, 1e-8)

    # Train model
    my_loss_3 = train_model(model_3, train_input, train_target, optimizer,
                            epochs_number_adam, Sample_number, batch_size)

    # Evalute model and produce plots
    model_3_perf = evaluate_model(model_3,
                                  train_input,
                                  train_target,
                                  test_input,
                                  test_target,
                                  my_loss_3,
                                  save_plot,
                                  mname=mname)

    # PLOT TO COMPARE OPTIMIZERS
    if save_plot:
        fig = plt.figure(figsize=(10, 4))
        plt.plot(range(0, epochs_number), my_loss_1, linewidth=1)
        plt.plot(range(0, epochs_number), my_loss_2, linewidth=1)
        plt.plot(range(0, epochs_number_adam), my_loss_3, linewidth=1)
        plt.legend(["SGD", "Decreasing SGD", "Adam"])
        plt.title("Loss")
        plt.xlabel("Epochs")
        plt.savefig('output/compare_optimizers.pdf', bbox_inches='tight')
        plt.close(fig)

    # Model 4. Dropout; SGD
    print('\nModel 4: Optimizer: SGD; Dropout; ReLU; CrossEntropy')

    # Define model name for plots
    mname = 'Model4'

    # Define structure of the network
    dropout = 0.15

    linear_1 = Linear(2, hidden_nb)
    relu_1 = Relu()
    linear_2 = Linear(hidden_nb, hidden_nb, dropout=dropout)
    relu_2 = Relu()
    linear_3 = Linear(hidden_nb, hidden_nb, dropout=dropout)
    relu_3 = Relu()
    linear_4 = Linear(hidden_nb, 2)

    model_4 = Sequential(linear_1,
                         relu_1,
                         linear_2,
                         relu_2,
                         linear_3,
                         relu_3,
                         linear_4,
                         loss=CrossEntropy())

    # Initialize weights
    model_4.normalize_parameters(mean=0, std=std)
    # Define optimizer
    optimizer = Sgd(eta)

    # Train model
    my_loss_4 = train_model(model_4, train_input, train_target, optimizer,
                            epochs_number, Sample_number, batch_size)

    # Evalute model and produce plots
    model_4_perf = evaluate_model(model_4,
                                  train_input,
                                  train_target,
                                  test_input,
                                  test_target,
                                  my_loss_4,
                                  save_plot,
                                  mname=mname)

    # PLOT TO COMPARE DROPOUT AND NO DROPOUT
    if save_plot:
        fig = plt.figure(figsize=(10, 4))
        plt.plot(range(0, epochs_number), my_loss_1, linewidth=1)
        plt.plot(range(0, epochs_number), my_loss_4, linewidth=1)
        plt.legend(["Without Dropout", "With Dropout"])
        plt.title("Loss")
        plt.xlabel("Epochs")
        plt.savefig('output/compare_dropout.pdf', bbox_inches='tight')
        plt.close(fig)

    print('\nEvaluation of different activation functions\n')

    # Model 5. No Dropout; SGD; Tanh
    print('\nModel 5: Optimizer: SGD; No dropout; Tanh; CrossEntropy')

    # Define model name for plots
    mname = 'Model5'

    # Define structure of the network
    linear_1 = Linear(2, hidden_nb)
    relu_1 = Tanh()
    linear_2 = Linear(hidden_nb, hidden_nb)
    relu_2 = Tanh()
    linear_3 = Linear(hidden_nb, hidden_nb)
    relu_3 = Tanh()
    linear_4 = Linear(hidden_nb, 2)

    model_5 = Sequential(linear_1,
                         relu_1,
                         linear_2,
                         relu_2,
                         linear_3,
                         relu_3,
                         linear_4,
                         loss=CrossEntropy())

    # Initialize weights
    model_5.normalize_parameters(mean=0, std=std)
    # Define optimizer
    optimizer = Sgd(eta)

    # Train model
    my_loss_5 = train_model(model_5, train_input, train_target, optimizer,
                            epochs_number, Sample_number, batch_size)

    # Evalute model and produce plots
    model_5_perf = evaluate_model(model_5,
                                  train_input,
                                  train_target,
                                  test_input,
                                  test_target,
                                  my_loss_5,
                                  save_plot,
                                  mname=mname)

    # Model 6. Xavier Initialization
    print(
        '\nModel 6: Optimizer: SGD; No dropout; Tanh; Xavier initialization; CrossEntropy'
    )

    # Define model name for plots
    mname = 'Model6'

    # Define network structure
    linear_1 = Linear(2, hidden_nb)
    relu_1 = Tanh()
    linear_2 = Linear(hidden_nb, hidden_nb)
    relu_2 = Tanh()
    linear_3 = Linear(hidden_nb, hidden_nb)
    relu_3 = Tanh()
    linear_4 = Linear(hidden_nb, 2)

    model_6 = Sequential(linear_1,
                         relu_1,
                         linear_2,
                         relu_2,
                         linear_3,
                         relu_3,
                         linear_4,
                         loss=CrossEntropy())

    model_6.xavier_parameters()
    optimizer = Sgd()

    # Train model
    my_loss_6 = train_model(model_6, train_input, train_target, optimizer,
                            epochs_number, Sample_number, batch_size)

    # Evalute model and produce plots
    model_6_perf = evaluate_model(model_6,
                                  train_input,
                                  train_target,
                                  test_input,
                                  test_target,
                                  my_loss_6,
                                  save_plot,
                                  mname=mname)

    # Model 7. Sigmoid
    print('\nModel 7: Optimizer: SGD; No dropout; Sigmoid; CrossEntropy')

    # Define model name for plots
    mname = 'Model7'

    # Define parameter for sigmoid activation
    p_lambda = 0.1

    # Define network structure
    linear_1 = Linear(2, hidden_nb)
    relu_1 = Sigmoid(p_lambda)
    linear_2 = Linear(hidden_nb, hidden_nb)
    relu_2 = Sigmoid(p_lambda)
    linear_3 = Linear(hidden_nb, hidden_nb)
    relu_3 = Sigmoid(p_lambda)
    linear_4 = Linear(hidden_nb, 2)

    model_7 = Sequential(linear_1,
                         relu_1,
                         linear_2,
                         relu_2,
                         linear_3,
                         relu_3,
                         linear_4,
                         loss=CrossEntropy())

    model_7.normalize_parameters(mean=0.5, std=1)
    optimizer = Sgd(eta=0.5)

    # Train model
    my_loss_7 = train_model(model_7, train_input, train_target, optimizer,
                            epochs_number, Sample_number, batch_size)

    # Evalute model and produce plots
    model_7_perf = evaluate_model(model_7,
                                  train_input,
                                  train_target,
                                  test_input,
                                  test_target,
                                  my_loss_7,
                                  save_plot,
                                  mname=mname)

    # PLOT TO COMPARE EFFECT OF DIFFERENT ACTIVATIONS
    if save_plot:
        fig = plt.figure(figsize=(10, 4))
        plt.plot(range(0, epochs_number), my_loss_1, linewidth=0.5)
        plt.plot(range(0, epochs_number), my_loss_5, linewidth=0.5, alpha=0.8)
        plt.plot(range(0, epochs_number), my_loss_6, linewidth=0.5, alpha=0.8)
        plt.plot(range(0, epochs_number), my_loss_7, linewidth=0.5)
        plt.legend(["Relu", "Tanh", "Tanh (Xavier)", "Sigmoid"])
        plt.title("Loss")
        plt.xlabel("Epochs")
        plt.savefig('output/compare_activations.pdf', bbox_inches='tight')
        plt.close(fig)

    print('\nEvaluation of base model with MSE loss\n')

    # Model 8. MSE loss
    print('\nModel 8: Optimizer: SGD; No dropout; Relu; MSE')

    # Define model name for plots
    mname = 'Model8'
    linear_1 = Linear(2, hidden_nb)
    relu_1 = Relu()
    linear_2 = Linear(hidden_nb, hidden_nb)
    relu_2 = Relu()
    linear_3 = Linear(hidden_nb, hidden_nb)
    relu_3 = Relu()
    linear_4 = Linear(hidden_nb, 2)
    loss = LossMSE()

    model_8 = Sequential(linear_1,
                         relu_1,
                         linear_2,
                         relu_2,
                         linear_3,
                         relu_3,
                         linear_4,
                         loss=loss)

    model_8.normalize_parameters(mean=0, std=std)
    optimizer = Sgd(eta)

    # Train model
    my_loss_8 = train_model(model_8, train_input, train_target, optimizer,
                            epochs_number, Sample_number, batch_size)

    # Evalute model and produce plots
    model_8_perf = evaluate_model(model_8,
                                  train_input,
                                  train_target,
                                  test_input,
                                  test_target,
                                  my_loss_8,
                                  save_plot,
                                  mname=mname)

    print('Evaluation done! ')

    train_loss = torch.tensor([
        model_1_perf[0], model_2_perf[0], model_3_perf[0], model_4_perf[0],
        model_5_perf[0], model_6_perf[0], model_7_perf[0], model_8_perf[0]
    ])
    train_error = torch.tensor([
        model_1_perf[1], model_2_perf[1], model_3_perf[1], model_4_perf[1],
        model_5_perf[1], model_6_perf[1], model_7_perf[1], model_8_perf[1]
    ])
    test_loss = torch.tensor([
        model_1_perf[2], model_2_perf[2], model_3_perf[2], model_4_perf[2],
        model_5_perf[2], model_6_perf[2], model_7_perf[2], model_8_perf[2]
    ])
    test_error = torch.tensor([
        model_1_perf[3], model_2_perf[3], model_3_perf[3], model_4_perf[3],
        model_5_perf[3], model_6_perf[3], model_7_perf[3], model_8_perf[3]
    ])

    return train_loss, train_error, test_loss, test_error
예제 #5
0
from Activation import Tanh
from Gates import AddGate, MultiplyGate

mulGate = MultiplyGate()
addGate = AddGate()
activation = Tanh()


class Layer:
    def forward(self, x, prev, U, W, V):
        self.mulu = mulGate.forward(U, x)
        self.mulw = mulGate.forward(W, prev)
        self.add = addGate.forward(self.mulw, self.mulu)
        self.s = activation.forward(self.add)
        self.mulv = mulGate.forward(V, self.s)

    def backward(self, x, prev, U, W, V, diff, dmulv):
        self.forward(x, prev, U, W, V)
        dV, dsv = mulGate.backward(V, self.s, dmulv)
        ds = dsv + diff
        dadd = activation.backward(self.add, ds)
        dmulw, dmulu = addGate.backward(self.mulw, self.mulu, dadd)
        dW, dprev = mulGate.backward(W, prev, dmulw)
        dU, dx = mulGate.backward(U, x, dmulu)
        return (dprev, dU, dW, dV)
예제 #6
0
###############################################################

if args.load:
    train_conv = False
    train_fc = True
    weights_conv = args.load
    weights_fc = args.load
else:
    train_conv = False
    train_fc = True
    weights_conv = '../vgg_weights/vgg_weights.npy'
    weights_fc = None

if args.act == 'tanh':
    act = Tanh()
elif args.act == 'relu':
    act = Relu()
else:
    assert (False)

###############################################################

dropout_rate = tf.placeholder(tf.float32, shape=())
learning_rate = tf.placeholder(tf.float32, shape=())

l0 = Convolution(input_sizes=[batch_size, 224, 224, 3],
                 filter_sizes=[3, 3, 3, 64],
                 num_classes=num_classes,
                 init_filters=args.init,
                 strides=[1, 1, 1, 1],
예제 #7
0
 def test_set_activation(self):
     self.assertEqual(Sigmoid().__class__,
                      self.layer1.get_activation().__class__)
     self.layer1.set_activation(Tanh())
     self.assertEqual(Tanh().__class__,
                      self.layer1.get_activation().__class__)
예제 #8
0
##############################################

tf.set_random_seed(0)
tf.reset_default_graph()

batch_size = tf.placeholder(tf.int32, shape=())
XTRAIN = tf.placeholder(tf.float32, [None, 32, 32, 3])
YTRAIN = tf.placeholder(tf.float32, [None, 100])
XTRAIN = tf.map_fn(lambda frame: tf.image.per_image_standardization(frame), XTRAIN)

XTEST = tf.placeholder(tf.float32, [None, 32, 32, 3])
YTEST = tf.placeholder(tf.float32, [None, 100])
XTEST = tf.map_fn(lambda frame1: tf.image.per_image_standardization(frame1), XTEST)

l0 = Convolution(input_sizes=[batch_size, 32, 32, 3], filter_sizes=[5, 5, 3, 96], num_classes=100, init_filters=args.init, strides=[1, 1, 1, 1], padding="SAME", alpha=ALPHA, activation=Tanh(), last_layer=False)
l1 = MaxPool(size=[batch_size, 32, 32, 96], ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="SAME")
l2 = FeedbackConv(size=[batch_size, 16, 16, 96], num_classes=100, sparse=sparse, rank=rank)

l3 = Convolution(input_sizes=[batch_size, 16, 16, 96], filter_sizes=[5, 5, 96, 128], num_classes=100, init_filters=args.init, strides=[1, 1, 1, 1], padding="SAME", alpha=ALPHA, activation=Tanh(), last_layer=False)
l4 = MaxPool(size=[batch_size, 16, 16, 128], ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="SAME")
l5 = FeedbackConv(size=[batch_size, 8, 8, 128], num_classes=100, sparse=sparse, rank=rank)

l6 = Convolution(input_sizes=[batch_size, 8, 8, 128], filter_sizes=[5, 5, 128, 256], num_classes=100, init_filters=args.init, strides=[1, 1, 1, 1], padding="SAME", alpha=ALPHA, activation=Tanh(), last_layer=False)
l7 = MaxPool(size=[batch_size, 8, 8, 256], ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="SAME")
l8 = FeedbackConv(size=[batch_size, 4, 4, 256], num_classes=100, sparse=sparse, rank=rank)

l9 = ConvToFullyConnected(shape=[4, 4, 256])
l10 = FullyConnected(size=[4*4*256, 2048], num_classes=100, init_weights=args.init, alpha=ALPHA, activation=Tanh(), last_layer=False)
l11 = FeedbackFC(size=[4*4*256, 2048], num_classes=100, sparse=sparse, rank=rank)
예제 #9
0
파일: imagenet3.py 프로젝트: bcrafton/dfa
    get_conv1_bias = tf.get_default_graph().get_tensor_by_name(os.path.split(conv1.name)[0] + '/bias:0')

    get_conv2_weights = tf.get_default_graph().get_tensor_by_name(os.path.split(conv2.name)[0] + '/kernel:0')
    get_conv2_bias = tf.get_default_graph().get_tensor_by_name(os.path.split(conv2.name)[0] + '/bias:0')

    get_conv3_weights = tf.get_default_graph().get_tensor_by_name(os.path.split(conv3.name)[0] + '/kernel:0')
    get_conv3_bias = tf.get_default_graph().get_tensor_by_name(os.path.split(conv3.name)[0] + '/bias:0')

    get_conv4_weights = tf.get_default_graph().get_tensor_by_name(os.path.split(conv4.name)[0] + '/kernel:0')
    get_conv4_bias = tf.get_default_graph().get_tensor_by_name(os.path.split(conv4.name)[0] + '/bias:0')

    get_conv5_weights = tf.get_default_graph().get_tensor_by_name(os.path.split(conv5.name)[0] + '/kernel:0')
    get_conv5_bias = tf.get_default_graph().get_tensor_by_name(os.path.split(conv5.name)[0] + '/bias:0')

else: 
    l0 = Convolution(input_sizes=[batch_size, 227, 227, 3], filter_sizes=[11, 11, 3, 96], num_classes=num_classes, init_filters=args.init, strides=[1, 4, 4, 1], padding="VALID", alpha=ALPHA, activation=Tanh(), bias=0.0, last_layer=False)
    l1 = MaxPool(size=[batch_size, 55, 55, 96], ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")

    l2 = Convolution(input_sizes=[batch_size, 27, 27, 96], filter_sizes=[5, 5, 96, 256], num_classes=num_classes, init_filters=args.init, strides=[1, 1, 1, 1], padding="SAME", alpha=ALPHA, activation=Tanh(), bias=0.0, last_layer=False)
    l3 = MaxPool(size=[batch_size, 27, 27, 256], ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")

    l4 = Convolution(input_sizes=[batch_size, 13, 13, 256], filter_sizes=[3, 3, 256, 384], num_classes=num_classes, init_filters=args.init, strides=[1, 1, 1, 1], padding="SAME", alpha=ALPHA, activation=Tanh(), bias=0.0, last_layer=False)

    l5 = Convolution(input_sizes=[batch_size, 13, 13, 384], filter_sizes=[3, 3, 384, 384], num_classes=num_classes, init_filters=args.init, strides=[1, 1, 1, 1], padding="SAME", alpha=ALPHA, activation=Tanh(), bias=0.0, last_layer=False)

    l6 = Convolution(input_sizes=[batch_size, 13, 13, 384], filter_sizes=[3, 3, 384, 256], num_classes=num_classes, init_filters=args.init, strides=[1, 1, 1, 1], padding="SAME", alpha=ALPHA, activation=Tanh(), bias=0.0, last_layer=False)
    l7 = MaxPool(size=[batch_size, 13, 13, 256], ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")

    l8 = ConvToFullyConnected(shape=[6, 6, 256])
    l9 = FullyConnected(size=[6*6*256, 4096], num_classes=num_classes, init_weights=args.init, alpha=ALPHA, activation=Tanh(), bias=0.0, last_layer=False)