def buildNetwork(self):
     """
     Builds the neural network with a fixed structure,
     and a variable number of outputs.
     """
     self.inputLayer = InputLayer()
     convLayer = ConvolutionalLayer(5,10)
     poolLayer = PoolingLayer(4)
     reluLayer = ReluLayer()
     convLayer2 = ConvolutionalLayer(4,20)
     pool2Layer = PoolingLayer(2)
     flattenLayer = FlattenLayer()
     reluLayer2 = ReluLayer()
     fullLayer = FullyConnectedLayer(20)
     self.outputLayer = OutputLayer(self.numOutputs)
     fullLayer.connect(self.outputLayer)
     flattenLayer.connect(fullLayer)
     reluLayer2.connect(flattenLayer)
     pool2Layer.connect(reluLayer2)
     convLayer2.connect(pool2Layer)
     reluLayer.connect(convLayer2)
     poolLayer.connect(reluLayer)
     convLayer.connect(poolLayer)
     self.inputLayer.connect(convLayer)
Example #2
0
def SGD_train(minibatch_size, data, labels, alpha, momentum, epochs):
    """Train the network with stochastic gradient descent

    :type minibatch_size: an integer
    :param minibatch_size: the size of the minibatches (usually something like 256)

    :type data: 3D matrix height x width x num training data pts.
    :param data: A 3D matrix that contains all of the training data points of the set

    :type labels: num training data pts x 1 vector
    :param labels: the labels for each image

    :type alpha: float
    :param alpha: the learning rate

    :type momentum: float
    :param momentum: the momentum

    :type epochs: an integer
    :param epochs: the number of epochs (ie. iterations) through the training
    """

    it = 0
    # convolutional layer, taking in a 28x28 image, using 2 9x9 filters
    # output should be 2 28-9+1x28-9+1 = 2 20x20 feature maps in a (20, 20, 2) form
    layer0 = ConvLayer((28, 28, 1), (9,9,2))
    print "initialized convolutional layer"
    # pooling layer, taking in 2 20x20 feature maps
    # output should be 2 10x10 feature maps
    layer1 = PoolingLayer((20, 20, 2))
    print "initialized pooling layer"
    # fully-connected softmax layer, taking in 2 10x10 feature maps (if downsampled by 2)
    # flattened into a long input vector
    layer2 = FullyConnectedLayer(200, 10)
    print "initialized fully-connected layer"
    params = np.concatenate((layer0.W.flatten(), layer0.bias.flatten(), layer2.W.flatten(), layer2.bias.flatten()))
    velocity = np.zeros(params.shape)

    for i in range(0, epochs):
        correct_class = 0
        cost = 0.0
        # shuffle the dataset--shuffle_vec will be used as indices
        shuffle_vec = rand.permutation(data.shape[2])

        for j in range(0, data.shape[2] - minibatch_size + 1, minibatch_size):
            # perform gradient descent w/each batch
            it += 1

            if it == 20:
                # increase momentum after 20 iterations
                momentum = 0.9

            # gradient should be an unrolled vector of the avg. sum of the 256 gradients gotten
            # from the forward pass and backprop
            for k in range(0, minibatch_size):
                layer0.forwardprop(data[:,:,shuffle_vec[j+k]].reshape((28,28,1)))
                layer1.downsample(layer0.output, (20,20,2))
                layer2_input = layer1.output.flatten()
                layer2.softmax_output(layer2_input.reshape((layer2_input.size, 1)))
                cost += J(layer2.output, labels[shuffle_vec[j+k]])
                # print "%d %d" % (np.argmax(layer2.output), labels[shuffle_vec[j+k]])

                if np.argmax(layer2.output) == labels[shuffle_vec[j+k]]:
                    correct_class += 1

                # backprop
                layer2.backprop(0, 0, encode_label(labels[shuffle_vec[j+k]]))
                layer1.upsample(layer2, 0)
                layer0.backprop(layer1)
                # flatten the gradient vector
                if k == 0:
                    grad = np.concatenate((layer0.gradient_w.flatten(), layer0.gradient_b.flatten(), layer2.gradient_w.flatten(), layer2.gradient_b.flatten()))
                else:
                    grad += np.concatenate((layer0.gradient_w.flatten(), layer0.gradient_b.flatten(), layer2.gradient_w.flatten(), layer2.gradient_b.flatten()))

            grad /= minibatch_size
            # update velocity vector
            velocity = momentum*velocity + alpha*grad
            params =  params - velocity

            # update the parameters
            layer0.W = params[0:layer0.W.flatten().size].reshape(layer0.W.shape)
            next_begin = layer0.W.flatten().size
            layer0.bias = params[next_begin:next_begin+layer0.bias.flatten().size].reshape(layer0.bias.shape)
            next_begin += layer0.bias.flatten().size
            layer2.W = params[next_begin:next_begin+layer2.W.flatten().size].reshape(layer2.W.shape)
            next_begin += layer2.W.flatten().size
            layer2.bias = params[next_begin:].reshape(layer2.bias.shape)

        # reduce learning rate by half after each epoch
        alpha /= 2.0
        print "%d correct classifications" % correct_class
        print "cost function is ", cost/(minibatch_size*(data.shape[2] - minibatch_size + 1))
Example #3
0
def testGradient():
    """Test the backprop implementation by checking the gradients on a small network"""

    # load the training data
    images, labels = load_mnist()
    images /= 255.0

    grad_images = images[:,:,0:10] #use 10 image subset for gradient checking
    grad_labels = labels[0,0:10] #respective labels for the images--going to have to encode these labels

    # create a small network, 1 conv layer + 1 pooling layer + 1 fully connected softmax

    # convolutional layer, taking in a 28x28 image, using 2 9x9 filters
    # output should be 2 28-9+1x28-9+1 = 2 20x20 feature maps in a (20, 20, 2) form
    layer0 = ConvLayer(grad_images[:,:,0].reshape((28,28,1)), (28, 28, 1), (9, 9, 2, 1))
    print "initalized convolutional layer"
    layer0.forwardprop(grad_images[:,:,0].reshape((28,28,1)))
    print "finished forward pass of convolutional layer"

    # pooling layer, taking in 2 20x20 feature maps
    # output should be 2 10x10 feature maps (though may want to downsample 5x for gradient check)
    layer1 = PoolingLayer(layer0.output, (20, 20, 2))
    print "initialized pooling layer"
    layer1.downsample(layer0.output, (20, 20, 2))
    print "finished forward pass of pooling layer"

    # fully-connected softmax layer, taking in 2 10x10 feature maps (if downsampled by 2)
    # or taking in 2 4x4 feature maps (if downsampled by 5)
    # either way, flattened into a long input vector
    full_conn_input = layer1.output.flatten()
    layer2 = FullyConnectedLayer(full_conn_input.reshape((full_conn_input.size, 1)), full_conn_input.size, 10)
    print "initialized fully-conn layer"
    layer2.softmax_output(full_conn_input.reshape((full_conn_input.size, 1)))
    print "finished forward pass of fully-conn layer"

    # perform backpropagation
    target = np.zeros((10,1))
    for i in range(0, 10):
        if grad_labels[i] == 1:
            target[i] = 1
    layer2.backprop(0, 0, target)
    print "finished layer 2 backprop"
    layer1.upsample(layer2, 0)
    print "finished layer 1 backprop"
    layer0.backprop(layer1)
    print "finished layer 0 backprop"

    # # after initialization, finish training
    # for i in range(1, grad_labels.size):
    #     # forward propagation
    #     layer0.forwardprop(grad_images[:,:,i].reshape((28,28,1)))
    #     layer1.downsample(layer0.output, (20,20,2))
    #     full_conn_input = layer1.output.flatten()
    #     layer2.softmax_output(full_conn_input.reshape((full_conn_input.size, 1)))
    #
    #     # backpropagation
    #     target = np.zeros((10,1))
    #     for j in range(0,10):
    #         if grad_labels[i] == 1:
    #             target[i] = 1
    #     layer2.backprop(0, 0, target)
    #     layer1.upsample(layer2, 0)
    #     layer0.backprop(layer1)

    # check the gradient
    epsilon = 1.0e-4
    layer0_check = layer0
    layer1_check = layer1
    layer2_check = layer2

    layer0_w_vec = layer0.W.flatten()
    layer0_bias_vec = layer0.bias.flatten()
    layer0_gradw = layer0.gradient_w.flatten()
    layer0_gradb = layer0.gradient_b.flatten()

    layer2_w_vec = layer2.W.flatten()
    layer2_bias_vec = layer2.bias.flatten()
    layer2_gradw = layer2.gradient_w.flatten()
    layer2_gradb = layer2.gradient_b.flatten()

    w_vec = np.concatenate((layer0_w_vec, layer0_bias_vec, layer2_w_vec, layer2_bias_vec))
    backprop_vec = np.concatenate((layer0_gradw, layer0_gradb, layer2_gradw, layer2_gradb))
    print layer0_gradw
    gradient_check = np.zeros(w_vec.size)
    for i in range(0, w_vec.size):
        pos = w_vec
        pos[i] += epsilon
        neg = w_vec
        neg[i] -= epsilon
        # feed-forward to get J(w+e), J(w-e), subtract and calculate gradient
        # J(w+e)
        layer0_check.W = pos[0:layer0_w_vec.size].reshape(layer0.filter_shape)
        layer0_check.bias = pos[layer0_w_vec.size : layer0_w_vec.size+layer0_bias_vec.size].reshape(layer0.bias_shape)

        layer2_check.W = pos[layer0_w_vec.size+layer0_bias_vec.size : layer0.W.size+layer0.bias.size+layer2_w_vec.size].reshape(layer2.W.shape)
        layer2_check.bias = pos[layer0.W.size+layer0.bias.size+layer2_w_vec.size:].reshape(layer2.bias.shape)

        layer0_check.forwardprop(grad_images[:,:,0].reshape((28,28,1)))
        layer1_check.downsample(layer0_check.output, (20,20,2))
        full_conn_input = layer1.output.flatten()
        layer2_check.softmax_output(full_conn_input.reshape((full_conn_input.size, 1)))

        pos_out = J(layer2_check.output, grad_labels[0])
        # J(w-e)
        layer0_check.W = neg[0:layer0_w_vec.size].reshape(layer0.filter_shape)
        layer0_check.bias = neg[layer0_w_vec.size : layer0_w_vec.size+layer0_bias_vec.size].reshape(layer0.bias_shape)

        layer2_check.W = neg[layer0_w_vec.size+layer0_bias_vec.size : layer0.W.size+layer0.bias.size+layer2_w_vec.size].reshape(layer2.W.shape)
        layer2_check.bias = neg[layer0.W.size+layer0.bias.size+layer2_w_vec.size:].reshape(layer2.bias.shape)

        layer0_check.forwardprop(grad_images[:,:,0].reshape((28,28,1)))
        layer1_check.downsample(layer0_check.output, (20,20,2))
        full_conn_input = layer1.output.flatten()
        layer2_check.softmax_output(full_conn_input.reshape((full_conn_input.size, 1)))

        neg_out = J(layer2_check.output, grad_labels[0])
        # compute gradient for i
        gradient_check[i] = (pos_out - neg_out)/(2*epsilon)

    # print gradient_check
    print gradient_check[0:layer0_w_vec.size]
Example #4
0
n_sample = 500  # Using for Error, Accuracy samples
alpha = 1  # how many times do you want to learn in 1 epoch. if 1, whole data learns.

input_train, input_test, correct_train, correct_test = cifar10_call(N)

n_train = input_train.shape[0]
n_test = input_test.shape[0]

img_h = 32
img_w = 32
img_ch = 3

# -- 각 층의 초기화 --
cl1 = ConvLayer(img_ch, img_h, img_w, 30, 3, 3, stride=1, pad=1)  # 앞3개:인풋 중간3개:필터
cl2 = ConvLayer(cl1.y_ch, cl1.y_h, cl1.y_w, 30, 3, 3, stride=1, pad=1)
pl1 = PoolingLayer(cl2.y_ch, cl2.y_h, cl2.y_w, pool=2, pad=0)  # pool:풀링크기(2*2), pad:패딩 너비
c_dr1 = fn.dropout(0.25)

cl3 = ConvLayer(pl1.y_ch, pl1.y_h, pl1.y_w, 60, 3, 3, stride=1, pad=1)
pl2 = PoolingLayer(cl3.y_ch, cl3.y_h, cl3.y_w, pool=2, pad=0)
c_dr2 = fn.dropout(0.25)

cl4 = ConvLayer(pl2.y_ch, pl2.y_h, pl2.y_w, 120, 3, 3, stride=1, pad=1)
pl3 = PoolingLayer(cl4.y_ch, cl4.y_h, cl4.y_w, pool=2, pad=0)

n_fc_in = pl3.y_ch * pl3.y_h * pl3.y_w
ml1 = MiddleLayer(n_fc_in, 500)
dr1 = fn.dropout(0.5)
ml2 = MiddleLayer(500, 500)
dr2 = fn.dropout(0.5)
ol1 = OutputLayer(500, 10)
Example #5
0
    def __init__(self,  phase, start_from_scratch= False ):
        encoding_t = 10
        self.encoding_t = encoding_t
        self.phase = phase
        path = dirname(dirname(realpath(__file__)))
        self.path_to_img_folder = path + '/datasets/'+ phase + 'Set/'
        classifier_dataset_path = path + '/datasets/ClassifierSet/'
        self.classifier_training_dataset_path = classifier_dataset_path +'TrainingData.csv'
        self.classifier_testing_dataset_path = classifier_dataset_path +'TestingData.csv'
        self.start_from_scratch = start_from_scratch
        self.path_to_log_file = path+ '/logs/log.csv'

        self.pathWeights = path + '/weights/'

        strides_conv= [1,1,1,1]
        padding= "SAME"
        pooling_type= "MAX"

        stdp_flag = phase == 'Learning'

        ''' after 20 img the first layer converges
        the second has some weights around 1 and the others
        are in the initial value range : .68 score 
        encoding_t,.02,-.0,-.02, stdp_flag ),
        encoding_t,.006,-.0,-.002, stdp_flag),
        '''

        '''after some img both layers converges
        with scary simmetry of weights around 0
        and around 1: same number
        score 0.67
        encoding_t,.01,-.0,-.012, stdp_flag ),
        encoding_t,.01,-.0,-.02, stdp_flag),
        maybe try a slower convergence
        '''
        '''
        self.layers = [
            ConvolutionalLayer(padding, strides_conv,
                [5,5,1,4],6.5, [1,160,250,1], [1,160,250,4],
                encoding_t,5,.004,-.0,-.008, stdp_flag ),
            PoolingLayer(padding, [6,6], [7,7], pooling_type, [1,27,42,4]),
            ConvolutionalLayer(padding,strides_conv,
                [17,17,4,20], 35., [1,27,42,4], [1,27,42,20],
                encoding_t,25,.002,-.0,-.004, stdp_flag),
            PoolingLayer(padding, [5,5], [5,5], pooling_type, [1,6,9,20]),
            ConvolutionalLayer(padding, strides_conv,
                [5,5,20,20], math.inf , [1,6,9,20], [1,6,9,20],
                encoding_t,0,.0,-.0,-.0, stdp_flag)
            ]

        '''

        self.layers = [
            ConvolutionalLayer(padding, strides_conv,
                [5,5,1,4],3.7, [1,160,250,1], [1,160,250,4],
                encoding_t,18,.004,-.0,-.008, stdp_flag ),
            PoolingLayer(padding, [6,6], [7,7], pooling_type, [1,27,42,4]),
            ConvolutionalLayer(padding,strides_conv,
                [17,17,4,20], 25., [1,27,42,4], [1,27,42,20],
                encoding_t,28,.002,-.0,-.004, stdp_flag),
            PoolingLayer(padding, [5,5], [5,5], pooling_type, [1,6,9,20]),
            ConvolutionalLayer(padding, strides_conv,
                [5,5,20,20], math.inf , [1,6,9,20], [1,6,9,20],
                encoding_t,0,.0,-.0,-.0, stdp_flag)
            ]

        if start_from_scratch:
            self.writeFeatureNamesinDataset( classifier_dataset_path +'TrainingData.csv')
            self.writeFeatureNamesinDataset( classifier_dataset_path +'TestingData.csv')