def __init__(self): self.layers = [] self.layers.append(conv_layer(action=act.relu, zero_padding=2, action_derive=act.relu_derive, input_shape=(28, 28), kernel_stride=1, kernel_shape=(5, 5), kernel_num=6)) self.layers.append(pooling_layer(input_shape=(6, 28, 28), kernel_shape=(6, 2, 2), pooling_type='max_pooling', stride=2)) self.layers.append(conv_layer(action=act.relu, zero_padding=0, action_derive=act.relu_derive, input_shape=(6, 14, 14), kernel_stride=1, kernel_shape=(6, 5, 5), kernel_num=16)) self.layers.append(pooling_layer(input_shape=(16, 10, 10), kernel_shape=(16, 2, 2), pooling_type='max_pooling', stride=2)) self.layers.append(conv_layer(action=act.relu, zero_padding=0, action_derive=act.relu_derive, input_shape=(16, 5, 5), kernel_stride=1, kernel_shape=(16, 5, 5), kernel_num=120)) self.layers.append(fc_layer(action=act.relu, action_derive=act.relu_derive, layers=(120, 84))) self.layers.append(fc_layer(action=act.sigmoid, action_derive=act.sigmoid_derive, layers=(84, 10)))
def __init__(self, inputs, graph, schedule, options=object()): self.variables = {} self.constSet = set() self.available_layers = { 'Convolution': conv_layer(mode='NCHW'), # done 'BiasAdd': matop_layer('BiasAdd', mode="NCHW"), # done 'Eltwise': eltwise_layer(operation='SUM', mode='NCHW'), # TODO FIXME assumes add??? # 'Mean': reduce_layer('AVG', mode='NCHW'), # TODO FIXME assumes avgpool??? 'Reshape': reshape_layer(), 'Scale': scale_layer(mode='NCHW'), # done 'ReLU': relu_layer(), # done 'Pooling': pool_layer(mode='NCHW'), # done 'Concat': concat_layer(1), # done 'BatchNorm': batchnorm_layer(), # done 'InnerProduct': matop_layer('MatMul'), # done # 'Mul': matop_layer('MatMul'), #done 'Sub': matop_layer('Sub'), # done 'Identity': identity_layer(), 'Softmax': softmax_layer() } ignore = self.compute_ignore_nodes(inputs, graph) self._layers = self.create_schedule(graph, ignore, schedule)
def TopConv(input_vec, conv_filters_lst, beta, gamma): result_vec = input_vec for stage in range(5): result_vec = conv_layer(result_vec, conv_filters_lst[stage], beta, gamma) return result_vec
def init_conv_test(): a = np.array([[[0, 1, 1, 0, 2], [2, 2, 2, 2, 1], [1, 0, 0, 2, 0], [0, 1, 1, 0, 0], [1, 2, 0, 0, 2]], [[1, 0, 2, 2, 0], [0, 0, 0, 2, 0], [1, 2, 1, 2, 1], [1, 0, 0, 0, 0], [1, 2, 1, 1, 1]], [[2, 1, 2, 0, 0], [1, 0, 0, 1, 0], [0, 2, 1, 0, 1], [0, 1, 2, 2, 2], [2, 1, 0, 0, 1]]]) b = np.array([[[0, 1, 1], [2, 2, 2], [1, 0, 0]], [[1, 0, 2], [0, 0, 0], [1, 2, 1]]]) cl = conv_layer(action=identify, zero_padding=1, action_derive=identify_derive, input_shape=(3, 5, 5), kernel_stride=2, kernel_shape=(3, 3, 3), kernel_num=2) cl.weights[0] = np.array([[[-1, 1, 0], [0, 1, 0], [0, 1, 1]], [[-1, -1, 0], [0, 0, 0], [0, -1, 0]], [[0, 0, -1], [0, 1, 0], [1, -1, -1]]], dtype=np.float64) cl.bias[0] = 1 cl.weights[1] = np.array([[[1, 1, -1], [-1, -1, 1], [0, -1, 1]], [[0, 1, 0], [-1, 0, -1], [-1, 1, 0]], [[-1, 0, 0], [-1, 0, 1], [-1, 0, 0]]], dtype=np.float64) return a, b, cl
def test_conv_layer(): input_arr = np.array([[[0, 1, 1, 0, 2], [2, 2, 2, 2, 1], [1, 0, 0, 2, 0], [0, 1, 1, 0, 0], [1, 2, 0, 0, 2]], [[1, 0, 2, 2, 0], [0, 0, 0, 2, 0], [1, 2, 1, 2, 1], [1, 0, 0, 0, 0], [1, 2, 1, 1, 1]], [[2, 1, 2, 0, 0], [1, 0, 0, 1, 0], [0, 2, 1, 0, 1], [0, 1, 2, 2, 2], [2, 1, 0, 0, 1]]]) weights_1 = np.array([[[-1, 1, 0], [0, 1, 0], [0, 1, 1]], [[-1, -1, 0], [0, 0, 0], [0, -1, 0]], [[0, 0, -1], [0, 1, 0], [1, -1, -1]]]) weights_2 = np.array([[[1, 1, -1], [-1, -1, 1], [0, -1, 1]], [[0, 1, 0], [-1, 0, -1], [-1, 1, 0]], [[-1, 0, 0], [-1, 0, 1], [-1, 0, 0]]]) bias_1 = 1 bias_2 = 0 cl = conv_layer(action=relu, zero_padding=1, action_derive=relu_derive, input_shape=(3, 5, 5), kernel_stride=2, kernel_shape=(3, 3, 3), kernel_num=2) cl.weights[0] = weights_1 cl.weights[1] = weights_2 cl.bias[0] = bias_1 cl.bias[1] = bias_2 feature_map = cl.forward(input_arr) print(feature_map) pass
def __init__(self, input_shape): # define batch param self.batch_loss = 0 self.batch_acc = 0 self.batch_size = input_shape[0] # define network print('network structure:') self.conv1 = conv_layer(input_shape, 8, 5, 1, 2) #[batch_size, 28, 28, 1] print('conv1', self.conv1.output_shape) self.relu1 = relu(self.conv1.output_shape) print('relu1', self.relu1.output_shape) self.pool1 = max_pooling(self.relu1.output_shape) print('pool1', self.pool1.output_shape) self.conv2 = conv_layer(self.pool1.output_shape, 16, 3, 1, 1) print('conv2', self.conv2.output_shape) self.relu2 = relu(self.conv2.output_shape) print('relu2', self.relu2.output_shape) self.pool2 = max_pooling(self.relu2.output_shape) print('pool2', self.pool2.output_shape) self.fc = fc_layer(self.pool2.output_shape, 10) print('fc', self.fc.output_shape) self.loss = loss_layer(self.fc.output_shape)
import tensorflow.compat.v1 as tf tf.disable_v2_behavior() from conv_layer import conv_layer ## testing g = tf.Graph() with g.as_default(): x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1]) conv_layer(x, name='convtest', kernel_size=(3, 3), n_output_channels=32) del g, x
def evaluate_convnet(learning_rate=0.02, n_epochs=2000, dataset='single_sphere', nkerns=[32, 64, 64, 128], batch_size=128, filter_shapes=[[5, 5], [5, 5], [3, 3], [3, 3]], momentum=0.9, half_time=500): """ Demonstrates lenet on MNIST dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) datasets, max_row, max_col = load_char_data() #load_latline_dataset() # << TODO implement train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.tensor4('x') # the data is presented as spiking of sensors at lateral line y = T.ivector('y') # The output is the distance (in x- and y-directions) of sphere idxs = T.matrix('idxs') ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of sensor detections to a 4D tensor layer0_input = x # x.reshape((batch_size, depth_dim, conv_dims[0], conv_dims[1])) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) layer0 = conv_layer( rng, input=layer0_input, image_shape=(batch_size, 3, max_row, max_col), filter_shape=(nkerns[0], 3, filter_shapes[0][0], filter_shapes[0][1]), pooling=False, activation=T.nnet.relu ) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) layer1 = conv_layer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], max_row, max_col), filter_shape=(nkerns[1], nkerns[0], filter_shapes[1][0], filter_shapes[1][1]), pooling=True, poolsize=(2, 2), activation=T.nnet.relu, keepDims=True ) layer1b = conv_layer( rng, input=layer1.output, image_shape=(batch_size, nkerns[1], max_row, max_col), filter_shape=(nkerns[2], nkerns[1], filter_shapes[2][0], filter_shapes[2][1]), pooling=False, activation=T.nnet.relu, keepDims=True ) layer1c = conv_layer( rng, input=layer1b.output, image_shape=(batch_size, nkerns[2], max_row, max_col), filter_shape=(nkerns[3], nkerns[2], filter_shapes[3][0], filter_shapes[3][1]), pooling=False, activation=T.nnet.relu, keepDims=True ) spp_layer = SPP(layer1c.output, idxs) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer2_input = spp_layer.output # construct a fully-connected ReLU layer layer2 = HiddenLayer( rng, input=layer2_input, n_in=spp_layer.M * nkerns[-1], n_out=500, activation=T.nnet.relu ) layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=39) # linear regression by using a fully connected layer '''layer3 = HiddenLayer( rng, input=layer2.output, n_in=conv_dims[1] * 2, n_out=2, activation=None ) ''' # classify the values of the fully-connected sigmoidal layer #layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) demo_model = theano.function( [index], [layer3.y_pred, y], givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params + layer1b.params + layer1c.params # create a list of gradients for all model parameters #grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. #updates = [ # (param_i, param_i - learning_rate * grad_i) # for param_i, grad_i in zip(params, grads) #] l_r = T.scalar('l_r', dtype=theano.config.floatX) updates = gradient_updates_momentum(cost, params, l_r, momentum) train_model = theano.function( [index, l_r], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) # end-snippet-1 ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 if epoch % half_time == 0: learning_rate /= 2 for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index, learning_rate) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print('Optimization complete.') print('Best validation MSE of %f %% obtained at iteration %i, ' 'with test MSE %f ' % (best_validation_loss, best_iter + 1, test_score)) print(('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr) demo_outputs = [ demo_model(i) for i in range(n_test_batches) ] sensor_range = [-1.5, 1.5] y_range = [0, 1] plt.ion() plotting = False MED = 0 for i in range(n_test_batches): predicted, target = demo_outputs[i] for j in range(predicted.shape[0]): x_hat, y_hat = predicted[j] x, y = target[j] MED += numpy.sqrt((x - x_hat) ** 2 + (y - y_hat) ** 2) if plotting: plt.clf() plt.plot([x_hat], [y_hat], 'ro') plt.plot([x], [y], 'g+') plt.grid() plt.axis([sensor_range[0], sensor_range[1], y_range[0], y_range[1]]) plt.pause(0.05) MED /= 2000 print('MED = %f\n' % MED)
def evaluate_convnet(learning_rate=0.02, n_epochs=2000, dataset='single_sphere', nkerns=[32, 64, 64, 128], batch_size=128, filter_shapes=[[5, 5], [5, 5], [3, 3], [3, 3]], momentum=0.9, half_time=500): """ Demonstrates lenet on MNIST dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) datasets, max_row, max_col = load_char_data( ) #load_latline_dataset() # << TODO implement train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.tensor4( 'x') # the data is presented as spiking of sensors at lateral line y = T.ivector( 'y') # The output is the distance (in x- and y-directions) of sphere idxs = T.matrix('idxs') ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of sensor detections to a 4D tensor layer0_input = x # x.reshape((batch_size, depth_dim, conv_dims[0], conv_dims[1])) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) layer0 = conv_layer(rng, input=layer0_input, image_shape=(batch_size, 3, max_row, max_col), filter_shape=(nkerns[0], 3, filter_shapes[0][0], filter_shapes[0][1]), pooling=False, activation=T.nnet.relu) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) layer1 = conv_layer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], max_row, max_col), filter_shape=(nkerns[1], nkerns[0], filter_shapes[1][0], filter_shapes[1][1]), pooling=True, poolsize=(2, 2), activation=T.nnet.relu, keepDims=True) layer1b = conv_layer(rng, input=layer1.output, image_shape=(batch_size, nkerns[1], max_row, max_col), filter_shape=(nkerns[2], nkerns[1], filter_shapes[2][0], filter_shapes[2][1]), pooling=False, activation=T.nnet.relu, keepDims=True) layer1c = conv_layer(rng, input=layer1b.output, image_shape=(batch_size, nkerns[2], max_row, max_col), filter_shape=(nkerns[3], nkerns[2], filter_shapes[3][0], filter_shapes[3][1]), pooling=False, activation=T.nnet.relu, keepDims=True) spp_layer = SPP(layer1c.output, idxs) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer2_input = spp_layer.output # construct a fully-connected ReLU layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=spp_layer.M * nkerns[-1], n_out=500, activation=T.nnet.relu) layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=39) # linear regression by using a fully connected layer '''layer3 = HiddenLayer( rng, input=layer2.output, n_in=conv_dims[1] * 2, n_out=2, activation=None ) ''' # classify the values of the fully-connected sigmoidal layer #layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) demo_model = theano.function( [index], [layer3.y_pred, y], givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params + layer1b.params + layer1c.params # create a list of gradients for all model parameters #grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. #updates = [ # (param_i, param_i - learning_rate * grad_i) # for param_i, grad_i in zip(params, grads) #] l_r = T.scalar('l_r', dtype=theano.config.floatX) updates = gradient_updates_momentum(cost, params, l_r, momentum) train_model = theano.function( [index, l_r], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) # end-snippet-1 ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 if epoch % half_time == 0: learning_rate /= 2 for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index, learning_rate) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print('Optimization complete.') print('Best validation MSE of %f %% obtained at iteration %i, ' 'with test MSE %f ' % (best_validation_loss, best_iter + 1, test_score)) print( ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr) demo_outputs = [demo_model(i) for i in range(n_test_batches)] sensor_range = [-1.5, 1.5] y_range = [0, 1] plt.ion() plotting = False MED = 0 for i in range(n_test_batches): predicted, target = demo_outputs[i] for j in range(predicted.shape[0]): x_hat, y_hat = predicted[j] x, y = target[j] MED += numpy.sqrt((x - x_hat)**2 + (y - y_hat)**2) if plotting: plt.clf() plt.plot([x_hat], [y_hat], 'ro') plt.plot([x], [y], 'g+') plt.grid() plt.axis( [sensor_range[0], sensor_range[1], y_range[0], y_range[1]]) plt.pause(0.05) MED /= 2000 print('MED = %f\n' % MED)
def build_cnn(learning_rate=1e-4): ## Placeholders for X and y: tf_x = tf.placeholder(tf.float32, shape=[None, 784], name='tf_x') tf_y = tf.placeholder(tf.int32, shape=[None], name='tf_y') # reshape x to a 4D tensor: # [batchsize, width, height, 1] tf_x_image = tf.reshape(tf_x, shape=[-1, 28, 28, 1], name='tf_x_reshaped') ## One-hot encoding: tf_y_onehot = tf.one_hot(indices=tf_y, depth=10, dtype=tf.float32, name='tf_y_onehot') ## 1st layer: Conv_1 print('\nBuilding 1st layer: ') h1 = conv_layer(tf_x_image, name='conv_1', kernel_size=(5, 5), padding_mode='VALID', n_output_channels=32) ## MaxPooling h1_pool = tf.nn.max_pool(h1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') ## 2n layer: Conv_2 print('\nBuilding 2nd layer: ') h2 = conv_layer(h1_pool, name='conv_2', kernel_size=(5, 5), padding_mode='VALID', n_output_channels=64) ## MaxPooling h2_pool = tf.nn.max_pool(h2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') ## 3rd layer: Fully Connected print('\nBuilding 3rd layer:') h3 = fc_layer(h2_pool, name='fc_3', n_output_units=1024, activation_fn=tf.nn.relu) ## Dropout keep_prob = tf.placeholder(tf.float32, name='fc_keep_prob') h3_drop = tf.nn.dropout(h3, keep_prob=keep_prob, name='dropout_layer') ## 4th layer: Fully Connected (linear activation) print('\nBuilding 4th layer:') h4 = fc_layer(h3_drop, name='fc_4', n_output_units=10, activation_fn=None) ## Prediction predictions = { 'probabilities': tf.nn.softmax(h4, name='probabilities'), 'labels': tf.cast(tf.argmax(h4, axis=1), tf.int32, name='labels') } ## Visualize the graph with TensorBoard: ## Loss Function and Optimization cross_entropy_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=h4, labels=tf_y_onehot), name='cross_entropy_loss') ## Optimizer: optimizer = tf.train.AdamOptimizer(learning_rate) optimizer = optimizer.minimize(cross_entropy_loss, name='train_op') ## Computing the prediction accuracy correct_predictions = tf.equal(predictions['labels'], tf_y, name='correct_preds') accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32), name='accuracy')