def __init__(self,
                 in_dim,
                 hl_dims,
                 bias=False,
                 activation_fun=tf.nn.relu,
                 batch_normalization=True):
        self.in_dim = in_dim
        self.hl_dims = hl_dims
        self.bias = bias
        self.activation_func = activation_fun
        self.batch_normalization = batch_normalization
        self.fc_layers = []
        # This must be equal, in order to add residual
        assert in_dim == hl_dims[-1]

        idim = self.in_dim
        for i in range(len(hl_dims)):
            if i == 0:  # Give Relu NN activation function
                self.fc_layers.append(
                    FullyConnectedLayer(idim,
                                        hl_dims[i],
                                        bias=self.bias,
                                        activation_fun=activation_fun))
            else:  # Give No activation function
                self.fc_layers.append(
                    FullyConnectedLayer(idim,
                                        hl_dims[i],
                                        bias=self.bias,
                                        activation_fun=None))
            idim = hl_dims[i]
Exemple #2
0
    def _build_architecture_get_prediction_and_regularization_cost(
            architecture, weight_decay, current_input):
        architecture_built = list()
        regularization_cost = Variable(0.0)
        weight_decay_variable = Variable(weight_decay)  # TODO: constant
        previous_layer_output = architecture[0]['input']

        for layer_dictionary in architecture:
            assert previous_layer_output == layer_dictionary["input"], \
                'Inconsistent architecture: can not feed {} outputs to {} inputs'.format(
                    previous_layer_output,
                    layer_dictionary['input']
                )
            activation_function = activation_function_name_to_class[
                layer_dictionary["nonlinear"]]
            regularization_method = regularization_method_name_to_class[
                layer_dictionary["regularization"]]
            layer = FullyConnectedLayer(layer_dictionary["input"],
                                        layer_dictionary["output"],
                                        activation_function, current_input)
            regularization_cost = Add(
                regularization_cost,
                Multiply(weight_decay_variable,
                         regularization_method(layer.get_weight())))
            architecture_built.append(layer)
            current_input = layer
            previous_layer_output = layer_dictionary['output']

        return architecture_built, current_input, regularization_cost
Exemple #3
0
    def test_forward_backward_1_no_activation(self):
        x = np.arange(6).reshape(3, 2)
        x_variable = Variable(x)
        fc = FullyConnectedLayer(2, 1, Identity, x_variable)
        w = fc._w._value.copy()
        b = fc._b._value.copy()
        wxb_desired = x @ w + b
        wxb_actual = fc.forward()

        np.testing.assert_almost_equal(wxb_actual, wxb_desired)

        fc.backward(np.array([[6.0], [7.0], [8.0]]))

        dl_dw_actual = fc._w.get_gradient()
        dl_dx_actual = x_variable.get_gradient()
        dl_dw_desired = np.array([[0 * 6 + 2 * 7 + 4 * 8], [1 * 6 + 3 * 7 + 5 * 8]])
        dl_dx_desired = np.array([[w[0,0] * 6, w[1,0] * 6], [w[0,0] * 7, w[1,0] * 7], [w[0,0] * 8, w[1,0] * 8]])

        np.testing.assert_allclose(dl_dw_actual, dl_dw_desired)
        np.testing.assert_allclose(dl_dx_actual, dl_dx_desired)

        dl_db_actual = fc._b.get_gradient()
        dl_db_desired = np.array([6 + 7 + 8])

        np.testing.assert_allclose(dl_db_actual, dl_db_desired)
    def __init__(self,
                 in_dim,
                 hl_sizes,
                 out_dim,
                 isClassification=True,
                 learning_rate=.0001):
        self.X = tf.placeholder(dtype=tf.float32,
                                shape=[None, in_dim],
                                name="X")
        self.Y = tf.placeholder(dtype=tf.float32,
                                shape=[None, out_dim],
                                name="Y")
        self.blocks = []
        self.learning_rate = learning_rate
        self.isClassification = isClassification

        # Create Layers / Blocks
        idim = in_dim
        self.blocks.append(FullyConnectedLayer(
            idim, hl_sizes[0]))  # Non Res FC Layer
        idim = hl_sizes[0]
        self.blocks.append(
            FullyConnectedResNetBlock(idim, hl_sizes, bias=False))  # Res Block
        self.blocks.append(
            FullyConnectedLayer(hl_sizes[-1], out_dim,
                                activation_fun=None))  # Non Res Block

        # Roll On Through Those Tensors, Cowboy
        Z = self.X
        for i in range(len(self.blocks)):
            Z = self.blocks[i].forward(Z)

        self.Y_pred = Z
        # Now setup cost function
        if self.isClassification:
            self.Yk = tf.nn.softmax(self.Y_pred, axis=1)
            self.cost = tf.reduce_sum(
                tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.Y,
                                                           logits=self.Y_pred,
                                                           dim=1))
            self.optimizer = tf.train.AdamOptimizer(
                self.learning_rate).minimize(self.cost)
            #self.optimizer = tf.train.AdagradDAOptimizer(.001).minimize(self.cost)
        else:  # Is Regressive
            self.cost = tf.reduce_sum(
                tf.squared_difference(self.Y_pred, self.Y))
            self.optimizer = tf.train.AdamOptimizer(
                self.learning_rate).minimize(self.cost)

        # Start the session
        self.set_session(tf.Session())
        self.sess.run(tf.global_variables_initializer())
        print(
            "Session Initialized: Network Params will be dumped to CNN_Parameters.txt"
        )
    def __init__(self, input_dim, output_dim, hidden_layer_sizes = [64], activation_fun = tf.nn.relu, use_res_net_blocks = True, training_rate = 1e-3):
        self.X = tf.placeholder(tf.float32, shape=[None,input_dim], name = "inputs")
        self.Y = tf.placeholder(tf.float32, shape =[None,], name = "actions")
        self.hidden_layers = []
        self.input_dim = input_dim
        self.output_dim = output_dim 
        self.training_rate = training_rate
        idim = input_dim
        self.hl_sizes = hidden_layer_sizes
        self.experiences = {'states':[], 'values': []}
        self.min_experiences = 100
        self.max_expereinces = 1000
        self.batch_size = 32
        self.Xs = []
        self.Ys = []
        if use_res_net_blocks: # Then fill out tensorboard with res net
            for i in range(len(hl_sizes)):
                if i == 0:
                    self.hidden_layers.append(FullyConnectedLayer(idim, hl_sizes[i], activation_fun = activation_fun))
                else:
                    self.hidden_layers.append(FullyConnectedResNetBlock(idim, [hl_sizes[i]], activation_fun = activation_fun, batch_normalization = False))
                idim = hl_sizes[i]

        else: # Use regular fully connected layers
            for hl in self.hl_sizes:
                self.hidden_layers.append(FullyConnectedLayer(idim, hl, activation_fun = activation_fun))
                idim = hl
        # Computes the Value of the current state 
        self.h_last = FullyConnectedLayer(idim, output_dim, activation_fun = None)

        # Graph abstraction
        Z = self.X
        for hl in self.hidden_layers:
            Z = hl.forward(Z)
        self.Y_pred = self.h_last.forward(Z)
        self.Y_pred = tf.reshape(self.Y_pred, [-1])

        # Cost
        self.cost = tf.reduce_sum(tf.square(self.Y - self.Y_pred))
        self.train_op = tf.train.AdamOptimizer(self.training_rate).minimize(self.cost)
 def buildNetwork(self):
     """
     Builds the neural network with a fixed structure,
     and a variable number of outputs.
     """
     self.inputLayer = InputLayer()
     convLayer = ConvolutionalLayer(5,10)
     poolLayer = PoolingLayer(4)
     reluLayer = ReluLayer()
     convLayer2 = ConvolutionalLayer(4,20)
     pool2Layer = PoolingLayer(2)
     flattenLayer = FlattenLayer()
     reluLayer2 = ReluLayer()
     fullLayer = FullyConnectedLayer(20)
     self.outputLayer = OutputLayer(self.numOutputs)
     fullLayer.connect(self.outputLayer)
     flattenLayer.connect(fullLayer)
     reluLayer2.connect(flattenLayer)
     pool2Layer.connect(reluLayer2)
     convLayer2.connect(pool2Layer)
     reluLayer.connect(convLayer2)
     poolLayer.connect(reluLayer)
     convLayer.connect(poolLayer)
     self.inputLayer.connect(convLayer)
Exemple #7
0
    def test_forward_backward_4_no_activation(self):
        x = np.arange(6).reshape(3, 2)
        x_variable = Variable(x)
        fc = FullyConnectedLayer(2, 4, Identity, x_variable)
        w = fc._w._value.copy()
        b = fc._b._value.copy()
        wxb_desired = x @ w + b
        wxb_actual = fc.forward()

        np.testing.assert_almost_equal(wxb_actual, wxb_desired)

        dl_dxwb = np.arange(6, 6 + 3 * 4).reshape(3, 4)

        fc.backward(dl_dxwb)

        dl_dw_actual = fc._w.get_gradient()
        dl_dw_desired = np.array([
            [x[:, 0].T @ dl_dxwb[:, 0], x[:, 0].T @ dl_dxwb[:, 1], x[:, 0].T @ dl_dxwb[:, 2],
             x[:, 0].T @ dl_dxwb[:, 3]],
            [x[:, 1].T @ dl_dxwb[:, 0], x[:, 1].T @ dl_dxwb[:, 1], x[:, 1].T @ dl_dxwb[:, 2],
             x[:, 1].T @ dl_dxwb[:, 3]],
        ])

        np.testing.assert_allclose(dl_dw_actual, dl_dw_desired)
    def __init__(self, input_dim = 2, 
                output_dim = 1, hl_sizes = [64,64,64], 
                use_res_net = True, activation_fun = tf.nn.relu,
                training_rate = 1e-3, eps = 1.0):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.X = tf.placeholder(tf.float32, shape=[None,input_dim], name = "inputs")
        self.actions = tf.placeholder(tf.float32, shape =[None,], name = "actions")
        self.advantages = tf.placeholder(tf.float32, shape=[None,], name = "advantages")
        self.hidden_layers = []
        self.training_rate = training_rate
        self.min_experiences = 100
        self.max_experiences = 1000
        self.batch_size = 32
        self.inner_counter = 0.0
        self.eps = eps
        self.experiences = {'states': [], 'actions': [], 'advantages': []}
        idim = input_dim
        if use_res_net: # Then fill out tensorboard with res net
            for i in range(len(hl_sizes)):
                if i == 0:
                    self.hidden_layers.append(FullyConnectedLayer(idim, hl_sizes[i], activation_fun = activation_fun))
                else:
                    self.hidden_layers.append(FullyConnectedResNetBlock(idim, [hl_sizes[i]], activation_fun = activation_fun, batch_normalization = False))
                idim = hl_sizes[i]

        else: # Use regular fully connected layers
            for hl in hl_sizes:
                self.hidden_layers.append(FullyConnectedLayer(idim, hl, activation_fun = activation_fun))
                idim = hl
        # last layer is Regressive to single node -- One for Mean, One for Std_dev
        self.Y_mean = FullyConnectedLayer(idim, output_dim, activation_fun = None)
        self.Y_std = FullyConnectedLayer(idim, output_dim, activation_fun= tf.nn.relu)
        
        # Rollout Abstraction
        Z = self.X
        for hl in self.hidden_layers:
            Z = hl.forward(Z)
        #Mean
        mean = tf.reshape(self.Y_mean.forward(Z), [-1]) 
        std = tf.reshape(self.Y_std.forward(Z), [-1]) + 1e-5

        # Sample from the normal distribution
        norm = tf.contrib.distributions.Normal(mean, std)
        self.predict_op = tf.clip_by_value(norm.sample(), -1,1)

        log_probs = norm.log_prob(self.actions)
        self.cost = -tf.reduce_sum(self.advantages * log_probs + .1*norm.entropy())
        self.train_op = tf.train.AdamOptimizer(self.training_rate).minimize(self.cost)
Exemple #9
0
        theano.config.floatX = 'float32'
    else:
        print ("Running with a CPU.  If this is not desired, then the modify "+\
            "network3.py to set\nthe GPU flag to True.")

    training_data, validation_data, test_data = load_data_shared()

    mini_batch_size = 10
    from Network import Network
    from FullyConnectedLayer import FullyConnectedLayer
    from SoftmaxLayer import SoftmaxLayer
    from ConvPoolLayer import ConvPoolLayer
    

    net = Network([
            FullyConnectedLayer(n_in=784, n_out=100),
            SoftmaxLayer(n_in=100, n_out=10)
        ], mini_batch_size)

    net.SGD(training_data, 1, mini_batch_size, 0.1, validation_data, test_data)

    # add a convolutional layer: 
    
    net = Network([
            ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
                        filter_shape=(20, 1, 5, 5),
                        poolsize=(2, 2)),
            FullyConnectedLayer(n_in=20*12*12, n_out=100),
            SoftmaxLayer(n_in=100, n_out=10)], 
                mini_batch_size)
Exemple #10
0
from Network import Network
from FullyConnectedLayer import FullyConnectedLayer
import numpy as np

X = np.array([[0,0,1],
             [0,1,1],
             [1,0,1],
             [1,1,1]])
y = np.array([[0],[1],[1],[0]])

layers = [
    FullyConnectedLayer(X.shape[1], 4, False, 'sigmoid'),
    FullyConnectedLayer(4, 1, True, 'sigmoid')
]

net = Network(8000, layers)
net.train(X, y)
Exemple #11
0
import numpy as np
from FullyConnectedLayer import FullyConnectedLayer
from OutputLayer import OutputLayer

l1 = FullyConnectedLayer(17*17)
l2 = FullyConnectedLayer(50)
l3 = OutputLayer(10)

l1.append(l2).append(l3)

l1.forward(np.ones(17*17))

print l1.getUnits()
print l2.getUnits()
print l3.getUnits()

l3.setTrainData(np.array([1,0,0,0,0,0,0,0,0,0]))
l3.backward()
Exemple #12
0
    return np.sum(a) / float(X.shape[0]) * 100.

def one_hot(x, size):
    a = np.zeros((x.shape[0], size))
    a[np.arange(x.shape[0]), x] = 1.
    return a

if __name__ == '__main__':
    batch_size = 20

    # A simple strided convnet
    layers = [
        ConvolutionLayer((4, 4, 1, 20), strides=2, activation=lkrelu, filter_init=lambda shp: np.random.normal(size=shp) * np.sqrt(1.0 / (28*28 + 13*13*20)) ),
        ConvolutionLayer((5, 5, 20, 40), strides=2, activation=lkrelu, filter_init=lambda shp:  np.random.normal(size=shp) *  np.sqrt(1.0 / (13*13*20 + 5*5*40)) ),
        FlattenLayer((5, 5, 40)),
        FullyConnectedLayer((5*5*40, 100), activation=sigmoid, weight_init=lambda shp: np.random.normal(size=shp) * np.sqrt(1.0 / (5*5*40 + 100.))),
        FullyConnectedLayer((100, 10), activation=linear, weight_init=lambda shp: np.random.normal(size=shp) * np.sqrt(1.0 / (110.)))
    ]
    lr = 0.001
    k = 2000
    net = CNNetwork(layers, learning_rate=lr, loss_function=cross_entropy)

    (train_data_X, train_data_Y), v, (tx, ty) = mnist_loader.load_data('./data/mnist.pkl.gz')
    train_data_Y = one_hot(train_data_Y, size=10)
    ty = one_hot(ty, size=10)
    train_data_X = np.reshape(train_data_X, [-1, 28, 28, 1])
    tx = np.reshape(tx, [-1, 28, 28, 1])
    for epoch in range(100000):
        shuffled_index = np.random.permutation(train_data_X.shape[0])

        batch_train_X = train_data_X[shuffled_index[:batch_size]]
Exemple #13
0
testdata = []
for i in numbers:
    number_path_train = os.path.join(dataset_path, 'train', str(i))
    for filename in os.listdir(number_path_train):
        traindata.append((os.path.join(dataset_path, 'train', str(i), filename), i))

    number_path_test = os.path.join(dataset_path, 'test', str(i))
    for filename in os.listdir(number_path_test):
        testdata.append((os.path.join(dataset_path, 'test', str(i), filename), i))


random.shuffle(traindata)
random.shuffle(testdata)

lr = 0.01
l1 = FullyConnectedLayer(28*28, lr)
l2 = FullyConnectedLayer(30, lr)
l3 = OutputLayer(10, lr)

l1.append(l2).append(l3)

if mode == 'train':

    n_iter = 3
    for j in range(0, n_iter):
        i = 0
        for (image_path, label) in traindata:
            im_pil = Image.open(image_path)
            im = np.asarray(im_pil, dtype=np.float)
            im = im / 255
        
class PolicyModel:
    def __init__(self, input_dim = 2, 
                output_dim = 1, hl_sizes = [64,64,64], 
                use_res_net = True, activation_fun = tf.nn.relu,
                training_rate = 1e-3, eps = 1.0):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.X = tf.placeholder(tf.float32, shape=[None,input_dim], name = "inputs")
        self.actions = tf.placeholder(tf.float32, shape =[None,], name = "actions")
        self.advantages = tf.placeholder(tf.float32, shape=[None,], name = "advantages")
        self.hidden_layers = []
        self.training_rate = training_rate
        self.min_experiences = 100
        self.max_experiences = 1000
        self.batch_size = 32
        self.inner_counter = 0.0
        self.eps = eps
        self.experiences = {'states': [], 'actions': [], 'advantages': []}
        idim = input_dim
        if use_res_net: # Then fill out tensorboard with res net
            for i in range(len(hl_sizes)):
                if i == 0:
                    self.hidden_layers.append(FullyConnectedLayer(idim, hl_sizes[i], activation_fun = activation_fun))
                else:
                    self.hidden_layers.append(FullyConnectedResNetBlock(idim, [hl_sizes[i]], activation_fun = activation_fun, batch_normalization = False))
                idim = hl_sizes[i]

        else: # Use regular fully connected layers
            for hl in hl_sizes:
                self.hidden_layers.append(FullyConnectedLayer(idim, hl, activation_fun = activation_fun))
                idim = hl
        # last layer is Regressive to single node -- One for Mean, One for Std_dev
        self.Y_mean = FullyConnectedLayer(idim, output_dim, activation_fun = None)
        self.Y_std = FullyConnectedLayer(idim, output_dim, activation_fun= tf.nn.relu)
        
        # Rollout Abstraction
        Z = self.X
        for hl in self.hidden_layers:
            Z = hl.forward(Z)
        #Mean
        mean = tf.reshape(self.Y_mean.forward(Z), [-1]) 
        std = tf.reshape(self.Y_std.forward(Z), [-1]) + 1e-5

        # Sample from the normal distribution
        norm = tf.contrib.distributions.Normal(mean, std)
        self.predict_op = tf.clip_by_value(norm.sample(), -1,1)

        log_probs = norm.log_prob(self.actions)
        self.cost = -tf.reduce_sum(self.advantages * log_probs + .1*norm.entropy())
        self.train_op = tf.train.AdamOptimizer(self.training_rate).minimize(self.cost)
    def set_session(self, sess):
        self.session = sess

    def partial_fit(self, X, actions, advantages, printOp = True):
        #X = np.atleast_2d(X)
        actions = np.atleast_1d(actions)
        advantages = np.atleast_1d(advantages)
        #print(X, actions, advantages)
        self.experiences['actions'].append(actions)
        self.experiences['advantages'].append(advantages)
        self.experiences['states'].append(X)
        if len(self.experiences['advantages']) < self.min_experiences:
            return
        if len(self.experiences['advantages']) > self.max_experiences:
            self.experiences['advantages'].pop(0)
            self.experiences['actions'].pop(0)
            self.experiences['states'].pop(0)
        indxs = np.random.choice(len(self.experiences['advantages']), self.batch_size)
        #print("INDEXES: ", indxs)
        states = [self.experiences['states'][indx] for indx in indxs]
        states = np.reshape(np.array(states), (self.batch_size, self.input_dim))
        actions = [self.experiences['actions'][indx] for indx in indxs]
        actions = np.reshape(actions, (self.batch_size,))
        advantages = [self.experiences['advantages'][indx] for indx in indxs]
        advantages = np.reshape(advantages, (self.batch_size,))
        loss, _ = self.session.run([self.cost, self.train_op], feed_dict={self.X: states, self.actions: actions, self.advantages: advantages})
        if printOp:
            print("Computed Partial Fit with Loss of: ", loss)
    
    def predict(self, X):
        X = np.atleast_2d(X)
        return self.session.run(self.predict_op, feed_dict={self.X: X})
    
    def sample_action(self, X):
        return np.asscalar(self.predict(X)[0])
Exemple #15
0
import numpy as np

from NeuralNetwork import NeuralNetwork
from FullyConnectedLayer import FullyConnectedLayer
from ActivationLayer import ActivationLayer
from ActivationFunctions import tanh, tanhDerivative
from LossFunction import meanSquaredError, meanSquaredErrorDerivative

# Sample training data
inputData = np.array([[[0, 0]], [[0, 1]], [[1, 0]], [[1, 1]]])
expectedOutput = np.array([[[0]], [[1]], [[1]], [[0]]])

# Creating a neural network with 3 nodes in first hidden layer, 1 node in final layer, with activation functions
# after each layer
network = NeuralNetwork()
network.add(FullyConnectedLayer(2, 3))
network.add(ActivationLayer(tanh, tanhDerivative))
network.add(FullyConnectedLayer(3, 1))
network.add(ActivationLayer(tanh, tanhDerivative))

# Training network
network.setLoss(meanSquaredError, meanSquaredErrorDerivative)
network.train(inputData, expectedOutput, epochs=1000, learningRate=.1)

# Test the network
output = network.predict(inputData)
for set in range(len(inputData)):
    print("For set {} my prediction is {}. The correct value is {}".format(
        inputData[set], output[set], expectedOutput[set]))
Exemple #16
0
    def __init__(self, input_dims, output_dim,
                 fc_hl_sizes, isClassification = True, use_resnet = True ):
        
        self.isClassification = isClassification
        self.output_dim = output_dim
        self.input_dims = input_dims
        
        # Implement VGG Blocks for a Mini - VGG Architecture
        # Begin Graph Abstraction
        self.CNN_Block_Layers = []
        self.FC_Layers = []
        x_dims = [None] + list(input_dims)
        
        # Make sure the user did not say the image dim is 2D -> 2D should be changes to at least 3d
        assert len(input_dims) == 3 # Should be inputted as a 3D image (if 2D -> third channel is 1)
        self.X = tf.placeholder(dtype = tf.float32, shape = x_dims, name = "X")
        self.Y = tf.placeholder(dtype = tf.float32, shape = [None, output_dim], name = "Y")
        
        # Holds the individual layer's pool size and pool stride settings
        self.pool_stride_block_settings = {'Num_Pools': [], 'Conv_Stride': [] ,'Pool_Stride': []}
        
        # OPTION 1:
        # Stack Convolutional Blocks -- VGG
        if not use_resnet:
            ConvBlock = CNNBlocks.VGGConvPoolBlock64()
            self.CNN_Block_Layers.append(ConvBlock)
            ConvBlock = CNNBlocks.VGGConvPoolBlock64()
            self.CNN_Block_Layers.append(ConvBlock)
            ConvBlock = CNNBlocks.VGGConvPoolBlock128()
            self.CNN_Block_Layers.append(ConvBlock)
            ConvBlock = CNNBlocks.VGGConvPoolBlock256()
            self.CNN_Block_Layers.append(ConvBlock)
            ConvBlock = CNNBlocks.VGGConvPoolBlock512()
            self.CNN_Block_Layers.append(ConvBlock)
            ConvBlock = CNNBlocks.VGGConvPoolBlock512()
            self.CNN_Block_Layers.append(ConvBlock)
        else:
            # OPTION 2: 
            # Stack Convolutional Blocks -- ResNet
            ConvBlock = CNNBlocks.VGGConvPoolBlock64()
            self.CNN_Block_Layers.append(ConvBlock)
            ConvBlock = CNNBlocks.ResNetBlock128()
            self.CNN_Block_Layers.append(ConvBlock)
            ConvBlock = CNNBlocks.ResNetBlock128()
            self.CNN_Block_Layers.append(ConvBlock)
            ConvBlock = CNNBlocks.ResNetBlock128()
            self.CNN_Block_Layers.append(ConvBlock)
            ConvBlock = CNNBlocks.VGGConvPoolBlock128()
            self.CNN_Block_Layers.append(ConvBlock)
        
        # Determine Number of Stacked Convolutional Blocks
        self.num_conv_blocks = len(self.CNN_Block_Layers)
        # Initialize the Convolutional weights
        idim = self.input_dims[2]
        for i in range(self.num_conv_blocks):
            # Change the input of a block layer to have the same input channel dimention as the inputted image / feature map channel
            self.CNN_Block_Layers[i].set_layer_input_channel_dim(idim) #(0,idim) # Layer zero or 1
            idim = self.CNN_Block_Layers[i].get_layer_output_channel_dim() # Layer Zero or Layer 1 ( we choose layer one to connect to the next block)
            
            self.pool_stride_block_settings['Num_Pools'].append(self.CNN_Block_Layers[i].get_num_pools())
            self.pool_stride_block_settings['Pool_Stride'].append(self.CNN_Block_Layers[i].get_block_pool_stride())

            # Store Pool Stride information
            #self.pool_stride_block_settings['Num_Pools'].append(ConvBlock.get_num_pools())
            #self.pool_stride_block_settings['Pool_Stride'].append(ConvBlock.get_block_pool_stride())
           
        # Get input size for the fully connected layer
        FC_INPUT_SIZE = int(self.get_FC_input_size(input_dims,
                                               self.pool_stride_block_settings['Num_Pools'],
                                               self.pool_stride_block_settings['Pool_Stride']))
        idim = FC_INPUT_SIZE
        # Report FC Size for Conv - NN transition
        print("Fully Connected Input Size: ", idim)
        for i in range(len(fc_hl_sizes)):
            self.FC_Layers.append(FullyConnectedLayer(idim, fc_hl_sizes[i]))
            idim = fc_hl_sizes[i]
        self.FC_Layers.append(FullyConnectedLayer(idim, self.output_dim, activation_fun = None))
        
        # Rollout the network tensor abstraction
        Z = self.X
        # Convolutional Rollout
        for i in range(self.num_conv_blocks):
            Z = self.CNN_Block_Layers[i].forward(Z)
        
        # Reshape Z for the Fully Connected Rollout
        Z = tf.reshape(Z,(-1,FC_INPUT_SIZE))
        # Fully Conneccted Rollout
        for i in range(len(fc_hl_sizes)):
            Z = self.FC_Layers[i].forward(Z)
        
        self.Y_pred = self.FC_Layers[-1].forward(Z)
        
        # Here we either take the linear output unit, or we use the CNN for classification
        # If the classifcation flag is set to true, we use a softmax cross entropy with logits cost function
        if self.isClassification:
            self.Yk = tf.nn.softmax(self.Y_pred, axis = 1)
            self.cost = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits_v2(labels = self.Y, logits = self.Y_pred, dim = 1))
            self.optimizer = tf.train.AdamOptimizer(.001).minimize(self.cost)
            #self.optimizer = tf.train.AdagradDAOptimizer(.001).minimize(self.cost)
        else: # Is Regressive
            self.cost = tf.reduce_sum(tf.squared_difference(self.Y_pred, self.Y))
            self.optimizer = tf.train.AdamOptimizer(.001).minimize(self.cost)
        

        # Start the session
        self.set_session(tf.Session())
        self.sess.run(tf.global_variables_initializer())
        print("Session Initialized: Network Params will be dumped to CNN_Parameters.txt")
Exemple #17
0
def testGradient():
    """Test the backprop implementation by checking the gradients on a small network"""

    # load the training data
    images, labels = load_mnist()
    images /= 255.0

    grad_images = images[:,:,0:10] #use 10 image subset for gradient checking
    grad_labels = labels[0,0:10] #respective labels for the images--going to have to encode these labels

    # create a small network, 1 conv layer + 1 pooling layer + 1 fully connected softmax

    # convolutional layer, taking in a 28x28 image, using 2 9x9 filters
    # output should be 2 28-9+1x28-9+1 = 2 20x20 feature maps in a (20, 20, 2) form
    layer0 = ConvLayer(grad_images[:,:,0].reshape((28,28,1)), (28, 28, 1), (9, 9, 2, 1))
    print "initalized convolutional layer"
    layer0.forwardprop(grad_images[:,:,0].reshape((28,28,1)))
    print "finished forward pass of convolutional layer"

    # pooling layer, taking in 2 20x20 feature maps
    # output should be 2 10x10 feature maps (though may want to downsample 5x for gradient check)
    layer1 = PoolingLayer(layer0.output, (20, 20, 2))
    print "initialized pooling layer"
    layer1.downsample(layer0.output, (20, 20, 2))
    print "finished forward pass of pooling layer"

    # fully-connected softmax layer, taking in 2 10x10 feature maps (if downsampled by 2)
    # or taking in 2 4x4 feature maps (if downsampled by 5)
    # either way, flattened into a long input vector
    full_conn_input = layer1.output.flatten()
    layer2 = FullyConnectedLayer(full_conn_input.reshape((full_conn_input.size, 1)), full_conn_input.size, 10)
    print "initialized fully-conn layer"
    layer2.softmax_output(full_conn_input.reshape((full_conn_input.size, 1)))
    print "finished forward pass of fully-conn layer"

    # perform backpropagation
    target = np.zeros((10,1))
    for i in range(0, 10):
        if grad_labels[i] == 1:
            target[i] = 1
    layer2.backprop(0, 0, target)
    print "finished layer 2 backprop"
    layer1.upsample(layer2, 0)
    print "finished layer 1 backprop"
    layer0.backprop(layer1)
    print "finished layer 0 backprop"

    # # after initialization, finish training
    # for i in range(1, grad_labels.size):
    #     # forward propagation
    #     layer0.forwardprop(grad_images[:,:,i].reshape((28,28,1)))
    #     layer1.downsample(layer0.output, (20,20,2))
    #     full_conn_input = layer1.output.flatten()
    #     layer2.softmax_output(full_conn_input.reshape((full_conn_input.size, 1)))
    #
    #     # backpropagation
    #     target = np.zeros((10,1))
    #     for j in range(0,10):
    #         if grad_labels[i] == 1:
    #             target[i] = 1
    #     layer2.backprop(0, 0, target)
    #     layer1.upsample(layer2, 0)
    #     layer0.backprop(layer1)

    # check the gradient
    epsilon = 1.0e-4
    layer0_check = layer0
    layer1_check = layer1
    layer2_check = layer2

    layer0_w_vec = layer0.W.flatten()
    layer0_bias_vec = layer0.bias.flatten()
    layer0_gradw = layer0.gradient_w.flatten()
    layer0_gradb = layer0.gradient_b.flatten()

    layer2_w_vec = layer2.W.flatten()
    layer2_bias_vec = layer2.bias.flatten()
    layer2_gradw = layer2.gradient_w.flatten()
    layer2_gradb = layer2.gradient_b.flatten()

    w_vec = np.concatenate((layer0_w_vec, layer0_bias_vec, layer2_w_vec, layer2_bias_vec))
    backprop_vec = np.concatenate((layer0_gradw, layer0_gradb, layer2_gradw, layer2_gradb))
    print layer0_gradw
    gradient_check = np.zeros(w_vec.size)
    for i in range(0, w_vec.size):
        pos = w_vec
        pos[i] += epsilon
        neg = w_vec
        neg[i] -= epsilon
        # feed-forward to get J(w+e), J(w-e), subtract and calculate gradient
        # J(w+e)
        layer0_check.W = pos[0:layer0_w_vec.size].reshape(layer0.filter_shape)
        layer0_check.bias = pos[layer0_w_vec.size : layer0_w_vec.size+layer0_bias_vec.size].reshape(layer0.bias_shape)

        layer2_check.W = pos[layer0_w_vec.size+layer0_bias_vec.size : layer0.W.size+layer0.bias.size+layer2_w_vec.size].reshape(layer2.W.shape)
        layer2_check.bias = pos[layer0.W.size+layer0.bias.size+layer2_w_vec.size:].reshape(layer2.bias.shape)

        layer0_check.forwardprop(grad_images[:,:,0].reshape((28,28,1)))
        layer1_check.downsample(layer0_check.output, (20,20,2))
        full_conn_input = layer1.output.flatten()
        layer2_check.softmax_output(full_conn_input.reshape((full_conn_input.size, 1)))

        pos_out = J(layer2_check.output, grad_labels[0])
        # J(w-e)
        layer0_check.W = neg[0:layer0_w_vec.size].reshape(layer0.filter_shape)
        layer0_check.bias = neg[layer0_w_vec.size : layer0_w_vec.size+layer0_bias_vec.size].reshape(layer0.bias_shape)

        layer2_check.W = neg[layer0_w_vec.size+layer0_bias_vec.size : layer0.W.size+layer0.bias.size+layer2_w_vec.size].reshape(layer2.W.shape)
        layer2_check.bias = neg[layer0.W.size+layer0.bias.size+layer2_w_vec.size:].reshape(layer2.bias.shape)

        layer0_check.forwardprop(grad_images[:,:,0].reshape((28,28,1)))
        layer1_check.downsample(layer0_check.output, (20,20,2))
        full_conn_input = layer1.output.flatten()
        layer2_check.softmax_output(full_conn_input.reshape((full_conn_input.size, 1)))

        neg_out = J(layer2_check.output, grad_labels[0])
        # compute gradient for i
        gradient_check[i] = (pos_out - neg_out)/(2*epsilon)

    # print gradient_check
    print gradient_check[0:layer0_w_vec.size]
Exemple #18
0
def SGD_train(minibatch_size, data, labels, alpha, momentum, epochs):
    """Train the network with stochastic gradient descent

    :type minibatch_size: an integer
    :param minibatch_size: the size of the minibatches (usually something like 256)

    :type data: 3D matrix height x width x num training data pts.
    :param data: A 3D matrix that contains all of the training data points of the set

    :type labels: num training data pts x 1 vector
    :param labels: the labels for each image

    :type alpha: float
    :param alpha: the learning rate

    :type momentum: float
    :param momentum: the momentum

    :type epochs: an integer
    :param epochs: the number of epochs (ie. iterations) through the training
    """

    it = 0
    # convolutional layer, taking in a 28x28 image, using 2 9x9 filters
    # output should be 2 28-9+1x28-9+1 = 2 20x20 feature maps in a (20, 20, 2) form
    layer0 = ConvLayer((28, 28, 1), (9,9,2))
    print "initialized convolutional layer"
    # pooling layer, taking in 2 20x20 feature maps
    # output should be 2 10x10 feature maps
    layer1 = PoolingLayer((20, 20, 2))
    print "initialized pooling layer"
    # fully-connected softmax layer, taking in 2 10x10 feature maps (if downsampled by 2)
    # flattened into a long input vector
    layer2 = FullyConnectedLayer(200, 10)
    print "initialized fully-connected layer"
    params = np.concatenate((layer0.W.flatten(), layer0.bias.flatten(), layer2.W.flatten(), layer2.bias.flatten()))
    velocity = np.zeros(params.shape)

    for i in range(0, epochs):
        correct_class = 0
        cost = 0.0
        # shuffle the dataset--shuffle_vec will be used as indices
        shuffle_vec = rand.permutation(data.shape[2])

        for j in range(0, data.shape[2] - minibatch_size + 1, minibatch_size):
            # perform gradient descent w/each batch
            it += 1

            if it == 20:
                # increase momentum after 20 iterations
                momentum = 0.9

            # gradient should be an unrolled vector of the avg. sum of the 256 gradients gotten
            # from the forward pass and backprop
            for k in range(0, minibatch_size):
                layer0.forwardprop(data[:,:,shuffle_vec[j+k]].reshape((28,28,1)))
                layer1.downsample(layer0.output, (20,20,2))
                layer2_input = layer1.output.flatten()
                layer2.softmax_output(layer2_input.reshape((layer2_input.size, 1)))
                cost += J(layer2.output, labels[shuffle_vec[j+k]])
                # print "%d %d" % (np.argmax(layer2.output), labels[shuffle_vec[j+k]])

                if np.argmax(layer2.output) == labels[shuffle_vec[j+k]]:
                    correct_class += 1

                # backprop
                layer2.backprop(0, 0, encode_label(labels[shuffle_vec[j+k]]))
                layer1.upsample(layer2, 0)
                layer0.backprop(layer1)
                # flatten the gradient vector
                if k == 0:
                    grad = np.concatenate((layer0.gradient_w.flatten(), layer0.gradient_b.flatten(), layer2.gradient_w.flatten(), layer2.gradient_b.flatten()))
                else:
                    grad += np.concatenate((layer0.gradient_w.flatten(), layer0.gradient_b.flatten(), layer2.gradient_w.flatten(), layer2.gradient_b.flatten()))

            grad /= minibatch_size
            # update velocity vector
            velocity = momentum*velocity + alpha*grad
            params =  params - velocity

            # update the parameters
            layer0.W = params[0:layer0.W.flatten().size].reshape(layer0.W.shape)
            next_begin = layer0.W.flatten().size
            layer0.bias = params[next_begin:next_begin+layer0.bias.flatten().size].reshape(layer0.bias.shape)
            next_begin += layer0.bias.flatten().size
            layer2.W = params[next_begin:next_begin+layer2.W.flatten().size].reshape(layer2.W.shape)
            next_begin += layer2.W.flatten().size
            layer2.bias = params[next_begin:].reshape(layer2.bias.shape)

        # reduce learning rate by half after each epoch
        alpha /= 2.0
        print "%d correct classifications" % correct_class
        print "cost function is ", cost/(minibatch_size*(data.shape[2] - minibatch_size + 1))
class ValueModel:
    def __init__(self, input_dim, output_dim, hidden_layer_sizes = [64], activation_fun = tf.nn.relu, use_res_net_blocks = True, training_rate = 1e-3):
        self.X = tf.placeholder(tf.float32, shape=[None,input_dim], name = "inputs")
        self.Y = tf.placeholder(tf.float32, shape =[None,], name = "actions")
        self.hidden_layers = []
        self.input_dim = input_dim
        self.output_dim = output_dim 
        self.training_rate = training_rate
        idim = input_dim
        self.hl_sizes = hidden_layer_sizes
        self.experiences = {'states':[], 'values': []}
        self.min_experiences = 100
        self.max_expereinces = 1000
        self.batch_size = 32
        self.Xs = []
        self.Ys = []
        if use_res_net_blocks: # Then fill out tensorboard with res net
            for i in range(len(hl_sizes)):
                if i == 0:
                    self.hidden_layers.append(FullyConnectedLayer(idim, hl_sizes[i], activation_fun = activation_fun))
                else:
                    self.hidden_layers.append(FullyConnectedResNetBlock(idim, [hl_sizes[i]], activation_fun = activation_fun, batch_normalization = False))
                idim = hl_sizes[i]

        else: # Use regular fully connected layers
            for hl in self.hl_sizes:
                self.hidden_layers.append(FullyConnectedLayer(idim, hl, activation_fun = activation_fun))
                idim = hl
        # Computes the Value of the current state 
        self.h_last = FullyConnectedLayer(idim, output_dim, activation_fun = None)

        # Graph abstraction
        Z = self.X
        for hl in self.hidden_layers:
            Z = hl.forward(Z)
        self.Y_pred = self.h_last.forward(Z)
        self.Y_pred = tf.reshape(self.Y_pred, [-1])

        # Cost
        self.cost = tf.reduce_sum(tf.square(self.Y - self.Y_pred))
        self.train_op = tf.train.AdamOptimizer(self.training_rate).minimize(self.cost)
    def set_session(self, sess):
        self.session = sess

    def partial_fit(self, X, Y, printOp = True):
        self.experiences['states'].append(X)
        self.experiences['values'].append(Y)
        if len(self.experiences['states']) < self.min_experiences:
            return
        if len(self.experiences['states']) > self.max_expereinces:
            self.experiences['states'].pop(0)
            self.experiences['values'].pop(0)
        indxs = np.random.choice(len(self.experiences['states']), self.batch_size)
        #print("INDEXES: ", indxs)
        X = [self.experiences['states'][indx] for indx in indxs]
        X = np.reshape(np.array(X), (self.batch_size, self.input_dim))
        Y = [self.experiences['values'][indx] for indx in indxs]
        Y = np.reshape(np.array(Y), (self.batch_size,))
        loss, _ = self.session.run([self.cost, self.train_op], feed_dict = {self.X: X, self.Y: Y})
        if printOp:
            print("Partial Fit Loss is: ", loss)
    
    def predict(self, X):
        return self.session.run(self.Y_pred, feed_dict = {self.X: X})