def buildNetwork(self): """ Builds the neural network with a fixed structure, and a variable number of outputs. """ self.inputLayer = InputLayer() convLayer = ConvolutionalLayer(5,10) poolLayer = PoolingLayer(4) reluLayer = ReluLayer() convLayer2 = ConvolutionalLayer(4,20) pool2Layer = PoolingLayer(2) flattenLayer = FlattenLayer() reluLayer2 = ReluLayer() fullLayer = FullyConnectedLayer(20) self.outputLayer = OutputLayer(self.numOutputs) fullLayer.connect(self.outputLayer) flattenLayer.connect(fullLayer) reluLayer2.connect(flattenLayer) pool2Layer.connect(reluLayer2) convLayer2.connect(pool2Layer) reluLayer.connect(convLayer2) poolLayer.connect(reluLayer) convLayer.connect(poolLayer) self.inputLayer.connect(convLayer)
def SGD_train(minibatch_size, data, labels, alpha, momentum, epochs): """Train the network with stochastic gradient descent :type minibatch_size: an integer :param minibatch_size: the size of the minibatches (usually something like 256) :type data: 3D matrix height x width x num training data pts. :param data: A 3D matrix that contains all of the training data points of the set :type labels: num training data pts x 1 vector :param labels: the labels for each image :type alpha: float :param alpha: the learning rate :type momentum: float :param momentum: the momentum :type epochs: an integer :param epochs: the number of epochs (ie. iterations) through the training """ it = 0 # convolutional layer, taking in a 28x28 image, using 2 9x9 filters # output should be 2 28-9+1x28-9+1 = 2 20x20 feature maps in a (20, 20, 2) form layer0 = ConvLayer((28, 28, 1), (9,9,2)) print "initialized convolutional layer" # pooling layer, taking in 2 20x20 feature maps # output should be 2 10x10 feature maps layer1 = PoolingLayer((20, 20, 2)) print "initialized pooling layer" # fully-connected softmax layer, taking in 2 10x10 feature maps (if downsampled by 2) # flattened into a long input vector layer2 = FullyConnectedLayer(200, 10) print "initialized fully-connected layer" params = np.concatenate((layer0.W.flatten(), layer0.bias.flatten(), layer2.W.flatten(), layer2.bias.flatten())) velocity = np.zeros(params.shape) for i in range(0, epochs): correct_class = 0 cost = 0.0 # shuffle the dataset--shuffle_vec will be used as indices shuffle_vec = rand.permutation(data.shape[2]) for j in range(0, data.shape[2] - minibatch_size + 1, minibatch_size): # perform gradient descent w/each batch it += 1 if it == 20: # increase momentum after 20 iterations momentum = 0.9 # gradient should be an unrolled vector of the avg. sum of the 256 gradients gotten # from the forward pass and backprop for k in range(0, minibatch_size): layer0.forwardprop(data[:,:,shuffle_vec[j+k]].reshape((28,28,1))) layer1.downsample(layer0.output, (20,20,2)) layer2_input = layer1.output.flatten() layer2.softmax_output(layer2_input.reshape((layer2_input.size, 1))) cost += J(layer2.output, labels[shuffle_vec[j+k]]) # print "%d %d" % (np.argmax(layer2.output), labels[shuffle_vec[j+k]]) if np.argmax(layer2.output) == labels[shuffle_vec[j+k]]: correct_class += 1 # backprop layer2.backprop(0, 0, encode_label(labels[shuffle_vec[j+k]])) layer1.upsample(layer2, 0) layer0.backprop(layer1) # flatten the gradient vector if k == 0: grad = np.concatenate((layer0.gradient_w.flatten(), layer0.gradient_b.flatten(), layer2.gradient_w.flatten(), layer2.gradient_b.flatten())) else: grad += np.concatenate((layer0.gradient_w.flatten(), layer0.gradient_b.flatten(), layer2.gradient_w.flatten(), layer2.gradient_b.flatten())) grad /= minibatch_size # update velocity vector velocity = momentum*velocity + alpha*grad params = params - velocity # update the parameters layer0.W = params[0:layer0.W.flatten().size].reshape(layer0.W.shape) next_begin = layer0.W.flatten().size layer0.bias = params[next_begin:next_begin+layer0.bias.flatten().size].reshape(layer0.bias.shape) next_begin += layer0.bias.flatten().size layer2.W = params[next_begin:next_begin+layer2.W.flatten().size].reshape(layer2.W.shape) next_begin += layer2.W.flatten().size layer2.bias = params[next_begin:].reshape(layer2.bias.shape) # reduce learning rate by half after each epoch alpha /= 2.0 print "%d correct classifications" % correct_class print "cost function is ", cost/(minibatch_size*(data.shape[2] - minibatch_size + 1))
def testGradient(): """Test the backprop implementation by checking the gradients on a small network""" # load the training data images, labels = load_mnist() images /= 255.0 grad_images = images[:,:,0:10] #use 10 image subset for gradient checking grad_labels = labels[0,0:10] #respective labels for the images--going to have to encode these labels # create a small network, 1 conv layer + 1 pooling layer + 1 fully connected softmax # convolutional layer, taking in a 28x28 image, using 2 9x9 filters # output should be 2 28-9+1x28-9+1 = 2 20x20 feature maps in a (20, 20, 2) form layer0 = ConvLayer(grad_images[:,:,0].reshape((28,28,1)), (28, 28, 1), (9, 9, 2, 1)) print "initalized convolutional layer" layer0.forwardprop(grad_images[:,:,0].reshape((28,28,1))) print "finished forward pass of convolutional layer" # pooling layer, taking in 2 20x20 feature maps # output should be 2 10x10 feature maps (though may want to downsample 5x for gradient check) layer1 = PoolingLayer(layer0.output, (20, 20, 2)) print "initialized pooling layer" layer1.downsample(layer0.output, (20, 20, 2)) print "finished forward pass of pooling layer" # fully-connected softmax layer, taking in 2 10x10 feature maps (if downsampled by 2) # or taking in 2 4x4 feature maps (if downsampled by 5) # either way, flattened into a long input vector full_conn_input = layer1.output.flatten() layer2 = FullyConnectedLayer(full_conn_input.reshape((full_conn_input.size, 1)), full_conn_input.size, 10) print "initialized fully-conn layer" layer2.softmax_output(full_conn_input.reshape((full_conn_input.size, 1))) print "finished forward pass of fully-conn layer" # perform backpropagation target = np.zeros((10,1)) for i in range(0, 10): if grad_labels[i] == 1: target[i] = 1 layer2.backprop(0, 0, target) print "finished layer 2 backprop" layer1.upsample(layer2, 0) print "finished layer 1 backprop" layer0.backprop(layer1) print "finished layer 0 backprop" # # after initialization, finish training # for i in range(1, grad_labels.size): # # forward propagation # layer0.forwardprop(grad_images[:,:,i].reshape((28,28,1))) # layer1.downsample(layer0.output, (20,20,2)) # full_conn_input = layer1.output.flatten() # layer2.softmax_output(full_conn_input.reshape((full_conn_input.size, 1))) # # # backpropagation # target = np.zeros((10,1)) # for j in range(0,10): # if grad_labels[i] == 1: # target[i] = 1 # layer2.backprop(0, 0, target) # layer1.upsample(layer2, 0) # layer0.backprop(layer1) # check the gradient epsilon = 1.0e-4 layer0_check = layer0 layer1_check = layer1 layer2_check = layer2 layer0_w_vec = layer0.W.flatten() layer0_bias_vec = layer0.bias.flatten() layer0_gradw = layer0.gradient_w.flatten() layer0_gradb = layer0.gradient_b.flatten() layer2_w_vec = layer2.W.flatten() layer2_bias_vec = layer2.bias.flatten() layer2_gradw = layer2.gradient_w.flatten() layer2_gradb = layer2.gradient_b.flatten() w_vec = np.concatenate((layer0_w_vec, layer0_bias_vec, layer2_w_vec, layer2_bias_vec)) backprop_vec = np.concatenate((layer0_gradw, layer0_gradb, layer2_gradw, layer2_gradb)) print layer0_gradw gradient_check = np.zeros(w_vec.size) for i in range(0, w_vec.size): pos = w_vec pos[i] += epsilon neg = w_vec neg[i] -= epsilon # feed-forward to get J(w+e), J(w-e), subtract and calculate gradient # J(w+e) layer0_check.W = pos[0:layer0_w_vec.size].reshape(layer0.filter_shape) layer0_check.bias = pos[layer0_w_vec.size : layer0_w_vec.size+layer0_bias_vec.size].reshape(layer0.bias_shape) layer2_check.W = pos[layer0_w_vec.size+layer0_bias_vec.size : layer0.W.size+layer0.bias.size+layer2_w_vec.size].reshape(layer2.W.shape) layer2_check.bias = pos[layer0.W.size+layer0.bias.size+layer2_w_vec.size:].reshape(layer2.bias.shape) layer0_check.forwardprop(grad_images[:,:,0].reshape((28,28,1))) layer1_check.downsample(layer0_check.output, (20,20,2)) full_conn_input = layer1.output.flatten() layer2_check.softmax_output(full_conn_input.reshape((full_conn_input.size, 1))) pos_out = J(layer2_check.output, grad_labels[0]) # J(w-e) layer0_check.W = neg[0:layer0_w_vec.size].reshape(layer0.filter_shape) layer0_check.bias = neg[layer0_w_vec.size : layer0_w_vec.size+layer0_bias_vec.size].reshape(layer0.bias_shape) layer2_check.W = neg[layer0_w_vec.size+layer0_bias_vec.size : layer0.W.size+layer0.bias.size+layer2_w_vec.size].reshape(layer2.W.shape) layer2_check.bias = neg[layer0.W.size+layer0.bias.size+layer2_w_vec.size:].reshape(layer2.bias.shape) layer0_check.forwardprop(grad_images[:,:,0].reshape((28,28,1))) layer1_check.downsample(layer0_check.output, (20,20,2)) full_conn_input = layer1.output.flatten() layer2_check.softmax_output(full_conn_input.reshape((full_conn_input.size, 1))) neg_out = J(layer2_check.output, grad_labels[0]) # compute gradient for i gradient_check[i] = (pos_out - neg_out)/(2*epsilon) # print gradient_check print gradient_check[0:layer0_w_vec.size]
n_sample = 500 # Using for Error, Accuracy samples alpha = 1 # how many times do you want to learn in 1 epoch. if 1, whole data learns. input_train, input_test, correct_train, correct_test = cifar10_call(N) n_train = input_train.shape[0] n_test = input_test.shape[0] img_h = 32 img_w = 32 img_ch = 3 # -- 각 층의 초기화 -- cl1 = ConvLayer(img_ch, img_h, img_w, 30, 3, 3, stride=1, pad=1) # 앞3개:인풋 중간3개:필터 cl2 = ConvLayer(cl1.y_ch, cl1.y_h, cl1.y_w, 30, 3, 3, stride=1, pad=1) pl1 = PoolingLayer(cl2.y_ch, cl2.y_h, cl2.y_w, pool=2, pad=0) # pool:풀링크기(2*2), pad:패딩 너비 c_dr1 = fn.dropout(0.25) cl3 = ConvLayer(pl1.y_ch, pl1.y_h, pl1.y_w, 60, 3, 3, stride=1, pad=1) pl2 = PoolingLayer(cl3.y_ch, cl3.y_h, cl3.y_w, pool=2, pad=0) c_dr2 = fn.dropout(0.25) cl4 = ConvLayer(pl2.y_ch, pl2.y_h, pl2.y_w, 120, 3, 3, stride=1, pad=1) pl3 = PoolingLayer(cl4.y_ch, cl4.y_h, cl4.y_w, pool=2, pad=0) n_fc_in = pl3.y_ch * pl3.y_h * pl3.y_w ml1 = MiddleLayer(n_fc_in, 500) dr1 = fn.dropout(0.5) ml2 = MiddleLayer(500, 500) dr2 = fn.dropout(0.5) ol1 = OutputLayer(500, 10)
def __init__(self, phase, start_from_scratch= False ): encoding_t = 10 self.encoding_t = encoding_t self.phase = phase path = dirname(dirname(realpath(__file__))) self.path_to_img_folder = path + '/datasets/'+ phase + 'Set/' classifier_dataset_path = path + '/datasets/ClassifierSet/' self.classifier_training_dataset_path = classifier_dataset_path +'TrainingData.csv' self.classifier_testing_dataset_path = classifier_dataset_path +'TestingData.csv' self.start_from_scratch = start_from_scratch self.path_to_log_file = path+ '/logs/log.csv' self.pathWeights = path + '/weights/' strides_conv= [1,1,1,1] padding= "SAME" pooling_type= "MAX" stdp_flag = phase == 'Learning' ''' after 20 img the first layer converges the second has some weights around 1 and the others are in the initial value range : .68 score encoding_t,.02,-.0,-.02, stdp_flag ), encoding_t,.006,-.0,-.002, stdp_flag), ''' '''after some img both layers converges with scary simmetry of weights around 0 and around 1: same number score 0.67 encoding_t,.01,-.0,-.012, stdp_flag ), encoding_t,.01,-.0,-.02, stdp_flag), maybe try a slower convergence ''' ''' self.layers = [ ConvolutionalLayer(padding, strides_conv, [5,5,1,4],6.5, [1,160,250,1], [1,160,250,4], encoding_t,5,.004,-.0,-.008, stdp_flag ), PoolingLayer(padding, [6,6], [7,7], pooling_type, [1,27,42,4]), ConvolutionalLayer(padding,strides_conv, [17,17,4,20], 35., [1,27,42,4], [1,27,42,20], encoding_t,25,.002,-.0,-.004, stdp_flag), PoolingLayer(padding, [5,5], [5,5], pooling_type, [1,6,9,20]), ConvolutionalLayer(padding, strides_conv, [5,5,20,20], math.inf , [1,6,9,20], [1,6,9,20], encoding_t,0,.0,-.0,-.0, stdp_flag) ] ''' self.layers = [ ConvolutionalLayer(padding, strides_conv, [5,5,1,4],3.7, [1,160,250,1], [1,160,250,4], encoding_t,18,.004,-.0,-.008, stdp_flag ), PoolingLayer(padding, [6,6], [7,7], pooling_type, [1,27,42,4]), ConvolutionalLayer(padding,strides_conv, [17,17,4,20], 25., [1,27,42,4], [1,27,42,20], encoding_t,28,.002,-.0,-.004, stdp_flag), PoolingLayer(padding, [5,5], [5,5], pooling_type, [1,6,9,20]), ConvolutionalLayer(padding, strides_conv, [5,5,20,20], math.inf , [1,6,9,20], [1,6,9,20], encoding_t,0,.0,-.0,-.0, stdp_flag) ] if start_from_scratch: self.writeFeatureNamesinDataset( classifier_dataset_path +'TrainingData.csv') self.writeFeatureNamesinDataset( classifier_dataset_path +'TestingData.csv')