Example #1
0
 def __init__(self):
     self.layers = []
     self.layers.append(conv_layer(action=act.relu, zero_padding=2,
                                   action_derive=act.relu_derive,
                                   input_shape=(28, 28), kernel_stride=1,
                                   kernel_shape=(5, 5), kernel_num=6))
     self.layers.append(pooling_layer(input_shape=(6, 28, 28),
                                      kernel_shape=(6, 2, 2),
                                      pooling_type='max_pooling', stride=2))
     self.layers.append(conv_layer(action=act.relu, zero_padding=0,
                                   action_derive=act.relu_derive,
                                   input_shape=(6, 14, 14), kernel_stride=1,
                                   kernel_shape=(6, 5, 5), kernel_num=16))
     self.layers.append(pooling_layer(input_shape=(16, 10, 10),
                                      kernel_shape=(16, 2, 2),
                                      pooling_type='max_pooling', stride=2))
     self.layers.append(conv_layer(action=act.relu, zero_padding=0,
                                   action_derive=act.relu_derive,
                                   input_shape=(16, 5, 5), kernel_stride=1,
                                   kernel_shape=(16, 5, 5), kernel_num=120))
     self.layers.append(fc_layer(action=act.relu,
                                 action_derive=act.relu_derive,
                                 layers=(120, 84)))
     self.layers.append(fc_layer(action=act.sigmoid,
                                 action_derive=act.sigmoid_derive,
                                 layers=(84, 10)))
Example #2
0
    def __init__(self, inputs, graph, schedule, options=object()):
        self.variables = {}
        self.constSet = set()

        self.available_layers = {
            'Convolution': conv_layer(mode='NCHW'),  # done
            'BiasAdd': matop_layer('BiasAdd', mode="NCHW"),  # done
            'Eltwise': eltwise_layer(operation='SUM',
                                     mode='NCHW'),  # TODO FIXME assumes add???
            # 'Mean': reduce_layer('AVG', mode='NCHW'), # TODO FIXME assumes avgpool???
            'Reshape': reshape_layer(),
            'Scale': scale_layer(mode='NCHW'),  # done
            'ReLU': relu_layer(),  # done
            'Pooling': pool_layer(mode='NCHW'),  # done
            'Concat': concat_layer(1),  # done
            'BatchNorm': batchnorm_layer(),  # done
            'InnerProduct': matop_layer('MatMul'),  # done
            # 'Mul': matop_layer('MatMul'), #done
            'Sub': matop_layer('Sub'),  # done
            'Identity': identity_layer(),
            'Softmax': softmax_layer()
        }

        ignore = self.compute_ignore_nodes(inputs, graph)
        self._layers = self.create_schedule(graph, ignore, schedule)
Example #3
0
def TopConv(input_vec, conv_filters_lst, beta, gamma):
    result_vec = input_vec
    for stage in range(5):
        result_vec = conv_layer(result_vec, conv_filters_lst[stage], beta,
                                gamma)

    return result_vec
def init_conv_test():
    a = np.array([[[0, 1, 1, 0, 2], [2, 2, 2, 2, 1], [1, 0, 0, 2, 0],
                   [0, 1, 1, 0, 0], [1, 2, 0, 0, 2]],
                  [[1, 0, 2, 2, 0], [0, 0, 0, 2, 0], [1, 2, 1, 2, 1],
                   [1, 0, 0, 0, 0], [1, 2, 1, 1, 1]],
                  [[2, 1, 2, 0, 0], [1, 0, 0, 1, 0], [0, 2, 1, 0, 1],
                   [0, 1, 2, 2, 2], [2, 1, 0, 0, 1]]])
    b = np.array([[[0, 1, 1], [2, 2, 2], [1, 0, 0]],
                  [[1, 0, 2], [0, 0, 0], [1, 2, 1]]])
    cl = conv_layer(action=identify,
                    zero_padding=1,
                    action_derive=identify_derive,
                    input_shape=(3, 5, 5),
                    kernel_stride=2,
                    kernel_shape=(3, 3, 3),
                    kernel_num=2)

    cl.weights[0] = np.array([[[-1, 1, 0], [0, 1, 0], [0, 1, 1]],
                              [[-1, -1, 0], [0, 0, 0], [0, -1, 0]],
                              [[0, 0, -1], [0, 1, 0], [1, -1, -1]]],
                             dtype=np.float64)
    cl.bias[0] = 1
    cl.weights[1] = np.array([[[1, 1, -1], [-1, -1, 1], [0, -1, 1]],
                              [[0, 1, 0], [-1, 0, -1], [-1, 1, 0]],
                              [[-1, 0, 0], [-1, 0, 1], [-1, 0, 0]]],
                             dtype=np.float64)
    return a, b, cl
Example #5
0
def test_conv_layer():
    input_arr = np.array([[[0, 1, 1, 0, 2], [2, 2, 2, 2, 1], [1, 0, 0, 2, 0],
                           [0, 1, 1, 0, 0], [1, 2, 0, 0, 2]],
                          [[1, 0, 2, 2, 0], [0, 0, 0, 2, 0], [1, 2, 1, 2, 1],
                           [1, 0, 0, 0, 0], [1, 2, 1, 1, 1]],
                          [[2, 1, 2, 0, 0], [1, 0, 0, 1, 0], [0, 2, 1, 0, 1],
                           [0, 1, 2, 2, 2], [2, 1, 0, 0, 1]]])
    weights_1 = np.array([[[-1, 1, 0], [0, 1, 0], [0, 1, 1]],
                          [[-1, -1, 0], [0, 0, 0], [0, -1, 0]],
                          [[0, 0, -1], [0, 1, 0], [1, -1, -1]]])
    weights_2 = np.array([[[1, 1, -1], [-1, -1, 1], [0, -1, 1]],
                          [[0, 1, 0], [-1, 0, -1], [-1, 1, 0]],
                          [[-1, 0, 0], [-1, 0, 1], [-1, 0, 0]]])
    bias_1 = 1
    bias_2 = 0
    cl = conv_layer(action=relu,
                    zero_padding=1,
                    action_derive=relu_derive,
                    input_shape=(3, 5, 5),
                    kernel_stride=2,
                    kernel_shape=(3, 3, 3),
                    kernel_num=2)
    cl.weights[0] = weights_1
    cl.weights[1] = weights_2
    cl.bias[0] = bias_1
    cl.bias[1] = bias_2
    feature_map = cl.forward(input_arr)
    print(feature_map)
    pass
Example #6
0
    def __init__(self, input_shape):
        # define batch param
        self.batch_loss = 0
        self.batch_acc = 0
        self.batch_size = input_shape[0]

        # define network
        print('network structure:')
        self.conv1 = conv_layer(input_shape, 8, 5, 1,
                                2)  #[batch_size, 28, 28, 1]
        print('conv1', self.conv1.output_shape)
        self.relu1 = relu(self.conv1.output_shape)
        print('relu1', self.relu1.output_shape)
        self.pool1 = max_pooling(self.relu1.output_shape)
        print('pool1', self.pool1.output_shape)
        self.conv2 = conv_layer(self.pool1.output_shape, 16, 3, 1, 1)
        print('conv2', self.conv2.output_shape)
        self.relu2 = relu(self.conv2.output_shape)
        print('relu2', self.relu2.output_shape)
        self.pool2 = max_pooling(self.relu2.output_shape)
        print('pool2', self.pool2.output_shape)
        self.fc = fc_layer(self.pool2.output_shape, 10)
        print('fc', self.fc.output_shape)
        self.loss = loss_layer(self.fc.output_shape)
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

from conv_layer import conv_layer

## testing
g = tf.Graph()
with g.as_default():
    x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1])
    conv_layer(x, name='convtest', kernel_size=(3, 3), n_output_channels=32)

del g, x
def evaluate_convnet(learning_rate=0.02, n_epochs=2000,
                    dataset='single_sphere',
                    nkerns=[32, 64, 64, 128], batch_size=128,
                    filter_shapes=[[5, 5], [5, 5], [3, 3], [3, 3]], momentum=0.9, half_time=500):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    datasets, max_row, max_col = load_char_data() #load_latline_dataset() # << TODO implement

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.tensor4('x')   # the data is presented as spiking of sensors at lateral line
    y = T.ivector('y')   # The output is the distance (in x- and y-directions) of sphere

    idxs = T.matrix('idxs')

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Reshape matrix of sensor detections to a 4D tensor
    layer0_input = x # x.reshape((batch_size, depth_dim, conv_dims[0], conv_dims[1]))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    layer0 = conv_layer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 3, max_row, max_col),
        filter_shape=(nkerns[0], 3, filter_shapes[0][0], filter_shapes[0][1]),
        pooling=False,
        activation=T.nnet.relu
    )

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)

    layer1 = conv_layer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, nkerns[0], max_row, max_col),
        filter_shape=(nkerns[1], nkerns[0], filter_shapes[1][0], filter_shapes[1][1]),
        pooling=True,
        poolsize=(2, 2),
        activation=T.nnet.relu,
        keepDims=True
    )

    layer1b = conv_layer(
        rng,
        input=layer1.output,
        image_shape=(batch_size, nkerns[1], max_row, max_col),
        filter_shape=(nkerns[2], nkerns[1], filter_shapes[2][0], filter_shapes[2][1]),
        pooling=False,
        activation=T.nnet.relu,
        keepDims=True
    )

    layer1c = conv_layer(
        rng,
        input=layer1b.output,
        image_shape=(batch_size, nkerns[2], max_row, max_col),
        filter_shape=(nkerns[3], nkerns[2], filter_shapes[3][0], filter_shapes[3][1]),
        pooling=False,
        activation=T.nnet.relu,
        keepDims=True
    )

    spp_layer = SPP(layer1c.output, idxs)

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer2_input = spp_layer.output

    # construct a fully-connected ReLU layer
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=spp_layer.M * nkerns[-1],
        n_out=500,
        activation=T.nnet.relu
    )

    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=39)

    # linear regression by using a fully connected layer
    '''layer3 = HiddenLayer(
        rng,
        input=layer2.output,
        n_in=conv_dims[1] * 2,
        n_out=2,
        activation=None
    )
    '''

    # classify the values of the fully-connected sigmoidal layer
    #layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    demo_model = theano.function(
        [index],
        [layer3.y_pred, y],
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params + layer1b.params + layer1c.params

    # create a list of gradients for all model parameters
    #grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    #updates = [
    #    (param_i, param_i - learning_rate * grad_i)
    #    for param_i, grad_i in zip(params, grads)
    #]

    l_r = T.scalar('l_r', dtype=theano.config.floatX)

    updates = gradient_updates_momentum(cost, params, l_r, momentum)

    train_model = theano.function(
        [index, l_r],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        if epoch % half_time == 0:
            learning_rate /= 2

        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print('training @ iter = ', iter)
            cost_ij = train_model(minibatch_index, learning_rate)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in range(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i)
                        for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation MSE of %f %% obtained at iteration %i, '
          'with test MSE %f ' %
          (best_validation_loss, best_iter + 1, test_score))
    print(('The code for file ' +
           os.path.split(__file__)[1] +
           ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)

    demo_outputs = [
        demo_model(i)
        for i in range(n_test_batches)
    ]

    sensor_range = [-1.5, 1.5]
    y_range = [0, 1]
    plt.ion()

    plotting = False

    MED = 0
    for i in range(n_test_batches):
        predicted, target = demo_outputs[i]
        for j in range(predicted.shape[0]):
            x_hat, y_hat = predicted[j]
            x, y = target[j]

            MED += numpy.sqrt((x - x_hat) ** 2 + (y - y_hat) ** 2)

            if plotting:
                plt.clf()
                plt.plot([x_hat], [y_hat], 'ro')
                plt.plot([x], [y], 'g+')
                plt.grid()
                plt.axis([sensor_range[0], sensor_range[1], y_range[0], y_range[1]])
                plt.pause(0.05)
    MED /= 2000

    print('MED = %f\n' % MED)
def evaluate_convnet(learning_rate=0.02,
                     n_epochs=2000,
                     dataset='single_sphere',
                     nkerns=[32, 64, 64, 128],
                     batch_size=128,
                     filter_shapes=[[5, 5], [5, 5], [3, 3], [3, 3]],
                     momentum=0.9,
                     half_time=500):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    datasets, max_row, max_col = load_char_data(
    )  #load_latline_dataset() # << TODO implement

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.tensor4(
        'x')  # the data is presented as spiking of sensors at lateral line
    y = T.ivector(
        'y')  # The output is the distance (in x- and y-directions) of sphere

    idxs = T.matrix('idxs')

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Reshape matrix of sensor detections to a 4D tensor
    layer0_input = x  # x.reshape((batch_size, depth_dim, conv_dims[0], conv_dims[1]))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    layer0 = conv_layer(rng,
                        input=layer0_input,
                        image_shape=(batch_size, 3, max_row, max_col),
                        filter_shape=(nkerns[0], 3, filter_shapes[0][0],
                                      filter_shapes[0][1]),
                        pooling=False,
                        activation=T.nnet.relu)

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)

    layer1 = conv_layer(rng,
                        input=layer0.output,
                        image_shape=(batch_size, nkerns[0], max_row, max_col),
                        filter_shape=(nkerns[1], nkerns[0],
                                      filter_shapes[1][0],
                                      filter_shapes[1][1]),
                        pooling=True,
                        poolsize=(2, 2),
                        activation=T.nnet.relu,
                        keepDims=True)

    layer1b = conv_layer(rng,
                         input=layer1.output,
                         image_shape=(batch_size, nkerns[1], max_row, max_col),
                         filter_shape=(nkerns[2], nkerns[1],
                                       filter_shapes[2][0],
                                       filter_shapes[2][1]),
                         pooling=False,
                         activation=T.nnet.relu,
                         keepDims=True)

    layer1c = conv_layer(rng,
                         input=layer1b.output,
                         image_shape=(batch_size, nkerns[2], max_row, max_col),
                         filter_shape=(nkerns[3], nkerns[2],
                                       filter_shapes[3][0],
                                       filter_shapes[3][1]),
                         pooling=False,
                         activation=T.nnet.relu,
                         keepDims=True)

    spp_layer = SPP(layer1c.output, idxs)

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer2_input = spp_layer.output

    # construct a fully-connected ReLU layer
    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=spp_layer.M * nkerns[-1],
                         n_out=500,
                         activation=T.nnet.relu)

    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=39)

    # linear regression by using a fully connected layer
    '''layer3 = HiddenLayer(
        rng,
        input=layer2.output,
        n_in=conv_dims[1] * 2,
        n_out=2,
        activation=None
    )
    '''

    # classify the values of the fully-connected sigmoidal layer
    #layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    demo_model = theano.function(
        [index], [layer3.y_pred, y],
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params + layer1b.params + layer1c.params

    # create a list of gradients for all model parameters
    #grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    #updates = [
    #    (param_i, param_i - learning_rate * grad_i)
    #    for param_i, grad_i in zip(params, grads)
    #]

    l_r = T.scalar('l_r', dtype=theano.config.floatX)

    updates = gradient_updates_momentum(cost, params, l_r, momentum)

    train_model = theano.function(
        [index, l_r],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        if epoch % half_time == 0:
            learning_rate /= 2

        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print('training @ iter = ', iter)
            cost_ij = train_model(minibatch_index, learning_rate)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation MSE of %f %% obtained at iteration %i, '
          'with test MSE %f ' %
          (best_validation_loss, best_iter + 1, test_score))
    print(
        ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' %
         ((end_time - start_time) / 60.)),
        file=sys.stderr)

    demo_outputs = [demo_model(i) for i in range(n_test_batches)]

    sensor_range = [-1.5, 1.5]
    y_range = [0, 1]
    plt.ion()

    plotting = False

    MED = 0
    for i in range(n_test_batches):
        predicted, target = demo_outputs[i]
        for j in range(predicted.shape[0]):
            x_hat, y_hat = predicted[j]
            x, y = target[j]

            MED += numpy.sqrt((x - x_hat)**2 + (y - y_hat)**2)

            if plotting:
                plt.clf()
                plt.plot([x_hat], [y_hat], 'ro')
                plt.plot([x], [y], 'g+')
                plt.grid()
                plt.axis(
                    [sensor_range[0], sensor_range[1], y_range[0], y_range[1]])
                plt.pause(0.05)
    MED /= 2000

    print('MED = %f\n' % MED)
def build_cnn(learning_rate=1e-4):
    ## Placeholders for X and y:
    tf_x = tf.placeholder(tf.float32, shape=[None, 784], name='tf_x')
    tf_y = tf.placeholder(tf.int32, shape=[None], name='tf_y')

    # reshape x to a 4D tensor:
    # [batchsize, width, height, 1]
    tf_x_image = tf.reshape(tf_x, shape=[-1, 28, 28, 1], name='tf_x_reshaped')
    ## One-hot encoding:
    tf_y_onehot = tf.one_hot(indices=tf_y,
                             depth=10,
                             dtype=tf.float32,
                             name='tf_y_onehot')

    ## 1st layer: Conv_1
    print('\nBuilding 1st layer: ')
    h1 = conv_layer(tf_x_image,
                    name='conv_1',
                    kernel_size=(5, 5),
                    padding_mode='VALID',
                    n_output_channels=32)
    ## MaxPooling
    h1_pool = tf.nn.max_pool(h1,
                             ksize=[1, 2, 2, 1],
                             strides=[1, 2, 2, 1],
                             padding='SAME')
    ## 2n layer: Conv_2
    print('\nBuilding 2nd layer: ')
    h2 = conv_layer(h1_pool,
                    name='conv_2',
                    kernel_size=(5, 5),
                    padding_mode='VALID',
                    n_output_channels=64)
    ## MaxPooling
    h2_pool = tf.nn.max_pool(h2,
                             ksize=[1, 2, 2, 1],
                             strides=[1, 2, 2, 1],
                             padding='SAME')

    ## 3rd layer: Fully Connected
    print('\nBuilding 3rd layer:')
    h3 = fc_layer(h2_pool,
                  name='fc_3',
                  n_output_units=1024,
                  activation_fn=tf.nn.relu)

    ## Dropout
    keep_prob = tf.placeholder(tf.float32, name='fc_keep_prob')
    h3_drop = tf.nn.dropout(h3, keep_prob=keep_prob, name='dropout_layer')

    ## 4th layer: Fully Connected (linear activation)
    print('\nBuilding 4th layer:')
    h4 = fc_layer(h3_drop, name='fc_4', n_output_units=10, activation_fn=None)

    ## Prediction
    predictions = {
        'probabilities': tf.nn.softmax(h4, name='probabilities'),
        'labels': tf.cast(tf.argmax(h4, axis=1), tf.int32, name='labels')
    }

    ## Visualize the graph with TensorBoard:

    ## Loss Function and Optimization
    cross_entropy_loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=h4, labels=tf_y_onehot),
        name='cross_entropy_loss')

    ## Optimizer:
    optimizer = tf.train.AdamOptimizer(learning_rate)
    optimizer = optimizer.minimize(cross_entropy_loss, name='train_op')

    ## Computing the prediction accuracy
    correct_predictions = tf.equal(predictions['labels'],
                                   tf_y,
                                   name='correct_preds')

    accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32),
                              name='accuracy')