def discriminator_class(inputs):
    with tf.name_scope('discriminator_class'):
        net = layers.fully_connected_layer(1, inputs, 16)
        net = layers.fully_connected_layer(2,
                                           net,
                                           1,
                                           tf.nn.sigmoid,
                                           zero_biases=True,
                                           zero_weights=True)

        return net
def build_mlp_inference_graph(x, dropout_keep_prob, is_training, params):

    nLayers = len(params['num_outputs'])
    prev_layer = None

    for i in range(nLayers):

        num_outputs = params['num_outputs'][i]
        layer_name = 'fc_layer' + str(i + 1)
        act = ACTIVATIONS[params['activations'][i]]

        if i == 0:
            inpt = x
        else:
            inpt = prev_layer

        if params['dropout'][i]:
            with tf.name_scope('dropout'):
                inpt = tf.nn.dropout(inpt, dropout_keep_prob)

        layer = fully_connected_layer(inputs=inpt,
                                      num_outputs=num_outputs,
                                      layer_name=layer_name,
                                      is_training=is_training,
                                      activation_fn=act)
        prev_layer = layer

    return prev_layer
Esempio n. 3
0
def deep_network_with_batchnorm(x,
                                y=None,
                                number_of_classes=2,
                                filters=(16, 32, 64, 128),
                                strides=(2, 1, 2, 1),
                                is_training=True):
    # TODO: Do the same as with deep_network, but this time add batchnorm before each convoulution.

    logits = None
    params = {}
    assert len(filters)==len(strides), 'The parameters filter and stride should have the same length, had length %d and %d' \
    %((len(filters), len(strides)))

    update_ops = []  #Fill this with update_ops from batch_norm

    ###### YOUR CODE #######
    # Build your network and output logits
    out = x

    for i, (filter, stride) in enumerate(zip(filters, strides), start=1):
        bn_out, bn_params, update_op = batch_norm(out, is_training)
        conv, conv_params = conv2d(bn_out,
                                   number_of_features=filter,
                                   stride=stride,
                                   k_size=3)  # k_size given by assignment
        out = tf.nn.relu(conv)

        for key, value in conv_params.items() + bn_params.items():
            params['conv%d/%s' % (i, key)] = value

        update_ops.append(update_op)

    logits, dense_params = fully_connected_layer(out, number_of_classes)

    for key, value in dense_params.items():
        params['fc/%s' % key] = value

    # END OF YOUR CODE

    if y is None:
        return logits, params, update_ops

    # TODO: Calculate softmax cross-entropy
    #  without using any of the softmax or cross-entropy functions from Tensorflow
    loss = None

    ###### YOUR CODE #######
    # Calculate loss
    h = tf.exp(logits - tf.reduce_max(logits, axis=1, keep_dims=True))
    h /= tf.reduce_sum(h, axis=1, keep_dims=True)

    loss = -tf.reduce_sum(y * tf.log(h), axis=1, keep_dims=True)
    loss = tf.reduce_mean(loss)
    #loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y) # For comparison and debug
    # END OF YOUR CODE

    update_op = tf.group(*tuple(update_ops))
    return logits, loss, params, update_op
def discriminator_latent(inputs, categorical_shape, continuous_shape):
    with tf.name_scope('discriminator_latent'):
        net = layers.fully_connected_layer(1, inputs, 16)

        cat_net = layers.fully_connected_layer(2,
                                               net,
                                               categorical_shape,
                                               tf.nn.softmax,
                                               zero_biases=True,
                                               zero_weights=True)
        con_net = layers.fully_connected_layer(3,
                                               net,
                                               continuous_shape,
                                               tf.nn.tanh,
                                               zero_biases=True,
                                               zero_weights=True)

        return cat_net, con_net
Esempio n. 5
0
def deep_residual_network(x,
                          y=None,
                          number_of_classes=2,
                          filters=(16, 32, 64, 128),
                          strides=(2, 1, 2, 1),
                          is_training=True):
    # TODO: Do the same as with deep_network_with_batchnorm*, but this time use the residual_blocks

    logits = None
    params = {}
    assert len(filters)==len(strides), 'The parameters filter and stride should have the same length, had length %d and %d' \
    %((len(filters), len(strides)))

    update_ops = []  #Fill this with update_ops from batch_norm

    ###### YOUR CODE #######
    # Build your network and output logits
    out = x

    for i, (filter, stride) in enumerate(zip(filters, strides), start=1):
        out, resnet_params, update_op = resnet_block(out,
                                                     filters=(filter, filter),
                                                     k_size=(3, 3),
                                                     stride=stride,
                                                     is_training=is_training)

        for key, value in resnet_params.items():
            params['resnet%d/%s' % (i, key)] = value

        update_ops.append(update_op)

    logits, dense_params = fully_connected_layer(out, number_of_classes)

    for key, value in dense_params.items():
        params['fc/%s' % key] = value
    # END OF YOUR CODE

    if y is None:
        return logits, params, update_ops

    # TODO: Calculate softmax cross-entropy loss
    #  without using any of the softmax or cross-entropy functions from Tensorflow
    loss = None

    ###### YOUR CODE #######
    # Calculate loss
    h = tf.exp(logits)
    h /= tf.reduce_sum(h)  #, axis=1, keep_dims=True)

    loss = -tf.reduce_mean(tf.to_float(y) * tf.log(h))
    #loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y) # For comparison and debug
    # END OF YOUR CODE

    update_op = tf.group(*tuple(update_ops))
    return logits, loss, params, update_op
Esempio n. 6
0
def deep_network(x,
                 y=None,
                 number_of_classes=2,
                 filters=(16, 32, 64, 128),
                 strides=(2, 1, 2, 1)):
    # TODO: Use the conv2d, tf.nn.relu and fully_connected_layer to create a (n+1)-layer network,
    # where n corresponds to the length of filters and strides.
    # your n first layers should be convoultional and the last layer fully connected.
    logits = None
    params = {}
    assert len(filters)==len(strides), 'The parameters filter and stride should have the same length, had length %d and %d' \
    %((len(filters), len(strides)))

    ###### YOUR CODE #######
    # Build your network and output logits
    out = x

    for i, (filter, stride) in enumerate(zip(filters, strides), start=1):
        conv, conv_params = conv2d(out,
                                   number_of_features=filter,
                                   stride=stride,
                                   k_size=3)  # k_size given by assignment
        out = tf.nn.relu(conv)

        for key, value in conv_params.items():
            params['conv%d/%s' % (i, key)] = value

    logits, dense_params = fully_connected_layer(out, number_of_classes)

    for key, value in dense_params.items():
        params['fc/%s' % key] = value

    # END OF YOUR CODE

    if y is None:
        return logits, params

    # TODO: Calculate softmax cross-entropy
    #  without using any of the softmax or cross-entropy functions from Tensorflow
    loss = None

    ###### YOUR CODE #######

    # Calculate loss
    h = tf.exp(logits - tf.reduce_max(logits, axis=1, keep_dims=True))
    h /= tf.reduce_sum(h, axis=1, keep_dims=True)

    loss = -tf.reduce_sum(y * tf.log(h), axis=1, keep_dims=True)
    loss = tf.reduce_mean(loss)
    #loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y) # For comparison and debug
    # END OF YOUR CODE

    return logits, loss, params
def generator(inputs, batch_size, training):
    with tf.name_scope('generator'):
        net = layers.fully_connected_layer(1, inputs, 4 * 4 * 512, None)
        net = tf.reshape(net, [batch_size, 4, 4, 512])
        net = layers.batch_norm(net, training, name='bn1')
        net = layers.conv2d_transpose_layer(1,
                                            net, [5, 5, 256],
                                            batch_size,
                                            stride=2)
        net = layers.batch_norm(net, training, name='bn2')
        net = layers.conv2d_transpose_layer(2,
                                            net, [5, 5, 128],
                                            batch_size,
                                            stride=2)
        net = layers.batch_norm(net, training, name='bn3')
        net = layers.conv2d_transpose_layer(3,
                                            net, [5, 5, 1],
                                            batch_size,
                                            tf.nn.sigmoid,
                                            stride=2,
                                            zero_biases=True)

        return net
Esempio n. 8
0
def conv_net_model_train(learning_rate, train_dir, save_dir):
    """
    The feed forward convolutional neural network model

    Hyper parameters include learning rate, number of convolutional layers and
    fully connected layers. (Currently TBD)

    """
    # Reset graphs
    tf.reset_default_graph()

    # Create placeholders
    x = tf.placeholder(dtype=tf.float32,
                       shape=[
                           None, INPUT_IMAGE_DIMENSION, INPUT_IMAGE_DIMENSION,
                           INPUT_IMAGE_CHANNELS
                       ],
                       name="x")
    y = tf.placeholder(dtype=tf.float32,
                       shape=[None, OUTPUT_VECTOR_SIZE],
                       name="y")
    weight1 = tf.Variable(tf.truncated_normal([4, 4, 3, 16], stddev=0.1),
                          dtype=tf.float32,
                          name="W1")
    bias1 = tf.Variable(tf.constant(0.1, shape=[16]),
                        dtype=tf.float32,
                        name="B1")
    weight2 = tf.Variable(tf.truncated_normal([4, 4, 16, 32], stddev=0.1),
                          dtype=tf.float32,
                          name="W2")
    bias2 = tf.Variable(tf.constant(0.1, shape=[32]),
                        dtype=tf.float32,
                        name="B2")
    weight3 = tf.Variable(tf.truncated_normal([4608, 2], stddev=0.1),
                          dtype=tf.float32,
                          name="W3")
    bias3 = tf.Variable(tf.constant(0.1, shape=[2]),
                        dtype=tf.float32,
                        name="B3")

    # First convolutional layer
    conv1 = ly.conv_layer(x, weight1, bias1, False)

    # First pooling
    pool1 = ly.pool_layer(conv1)

    # Second convolutional layer
    conv2 = ly.conv_layer(pool1, weight2, bias2, True)

    # Second pooling
    pool2 = ly.pool_layer(conv2)

    # Flatten input
    flattened = tf.reshape(pool2, shape=[-1, 12 * 12 * 32])

    # Create fully connected layer
    logits = ly.fully_connected_layer(flattened, weight3, bias3)

    # Create loss function
    with tf.name_scope("cross_entropy"):
        cross_entropy = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))

    # Create optimizer
    with tf.name_scope("train"):
        train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(
            cross_entropy)

    # Compute accuracy
    with tf.name_scope("accuracy"):
        # argmax gets the highest value in a given dimension (in this case, dimension 1)
        # equal checks if the label is equal to the computed logits
        correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
        # tf.reduce_mean computes the mean across the vector
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    saver = tf.train.Saver()

    with tf.Session() as sess:
        # Run model
        sess.run(tf.global_variables_initializer())
        data_reader = dr.DataReader(sess, train_dir, INPUT_IMAGE_DIMENSION,
                                    OUTPUT_VECTOR_SIZE, INPUT_IMAGE_CHANNELS)

        coord = tf.train.Coordinator()

        # Train the model
        for i in range(STEP_SIZE):
            images, labels = data_reader.get_train_batch(coord, BATCH_SIZE)

            if i % 10 == 0:
                a = sess.run(accuracy, feed_dict={x: images, y: labels})

                print("step", i, "of ", STEP_SIZE)
                print("Acc: ", a)

            # Run the training step
            sess.run(train_step, feed_dict={x: images, y: labels})

        saver.save(sess, save_dir)

    coord.request_stop()
Esempio n. 9
0
def conv_net_model_test(save_dir, test_dir, output_dir0, output_dir1):
    """
    The feed forward convolutional neural network model

    Hyper parameters include learning rate, number of convolutional layers and
    fully connected layers. (Currently TBD)

    """
    # Reset graphs
    tf.reset_default_graph()

    # Create placeholders
    x = tf.placeholder(dtype=tf.float32,
                       shape=[
                           None, INPUT_IMAGE_DIMENSION, INPUT_IMAGE_DIMENSION,
                           INPUT_IMAGE_CHANNELS
                       ],
                       name="x")

    weight1 = tf.Variable(tf.truncated_normal([4, 4, 3, 16], stddev=0.1),
                          dtype=tf.float32,
                          name="W1")
    bias1 = tf.Variable(tf.constant(0.1, shape=[16]),
                        dtype=tf.float32,
                        name="B1")
    weight2 = tf.Variable(tf.truncated_normal([4, 4, 16, 32], stddev=0.1),
                          dtype=tf.float32,
                          name="W2")
    bias2 = tf.Variable(tf.constant(0.1, shape=[32]),
                        dtype=tf.float32,
                        name="B2")
    weight3 = tf.Variable(tf.truncated_normal([4608, 2], stddev=0.1),
                          dtype=tf.float32,
                          name="W3")
    bias3 = tf.Variable(tf.constant(0.1, shape=[2]),
                        dtype=tf.float32,
                        name="B3")

    # First convolutional layer
    conv1 = ly.conv_layer(x, weight1, bias1, False)

    # First pooling
    pool1 = ly.pool_layer(conv1)

    # Second convolutional layer
    conv2 = ly.conv_layer(pool1, weight2, bias2, True)

    # Second pooling
    pool2 = ly.pool_layer(conv2)

    # Flatten input
    flattened = tf.reshape(pool2, shape=[-1, 12 * 12 * 32])

    # Create fully connected layer
    logits = ly.fully_connected_layer(flattened, weight3, bias3)

    saver = tf.train.Saver()

    with tf.Session() as sess:
        saver.restore(sess, save_dir)
        # Run model
        test_images = tdl.load_test_data(test_dir)

        coord = tf.train.Coordinator()

        # Test the model
        l = sess.run(tf.argmax(logits, 1), feed_dict={x: test_images})
        od.output(output_dir0, output_dir1, test_images, l)

    coord.request_stop()
Esempio n. 10
0
def conv_net_model(learning_rate,
                   graph_dir,
                   train_dir,
                   test_dir,
                   extra_fc_layer=False):
    """
    The feed forward convolutional neural network model

    Hyper parameters include learning rate, number of convolutional layers and
    fully connected layers. (Currently TBD)

    """
    # Reset graphs
    tf.reset_default_graph()

    # Create a tensorflow session
    sess = tf.Session()

    # Create placeholders
    x = tf.placeholder(dtype=tf.float32,
                       shape=[
                           None, INPUT_IMAGE_DIMENSION, INPUT_IMAGE_DIMENSION,
                           INPUT_IMAGE_CHANNELS
                       ],
                       name="x")
    y = tf.placeholder(dtype=tf.float32,
                       shape=[None, OUTPUT_VECTOR_SIZE],
                       name="y")
    # Visualize input x
    tf.summary.image("input", x, BATCH_SIZE)

    # First convolutional layer
    conv1, v = ly.conv_layer(x, INPUT_IMAGE_CHANNELS, 32, name="conv1")

    # Visualize convolution output
    utils.visualize_conv(v, INPUT_IMAGE_DIMENSION, 32, "raw_conv1")
    # Visualize relu activated convolution output
    utils.visualize_conv(conv1, INPUT_IMAGE_DIMENSION, 32, "relu_conv1")

    # First pooling
    pool1 = ly.pool_layer(conv1, name="pool1")
    image_dimension = int(INPUT_IMAGE_DIMENSION / 2)
    # Visualize first pooling
    utils.visualize_conv(pool1, image_dimension, 32, "pool1")

    # Flatten input
    flattened = tf.reshape(pool1,
                           shape=[-1, image_dimension * image_dimension * 32])

    # Create fully connected layer
    if extra_fc_layer:
        fc_layer = ly.fully_connected_layer(flattened,
                                            image_dimension * image_dimension *
                                            32,
                                            EXTRA_FC_LAYER_NODES,
                                            name="fc_layer")
        logits = ly.fully_connected_layer(fc_layer,
                                          EXTRA_FC_LAYER_NODES,
                                          OUTPUT_VECTOR_SIZE,
                                          name="logits")
    else:
        logits = ly.fully_connected_layer(flattened,
                                          image_dimension * image_dimension *
                                          32,
                                          OUTPUT_VECTOR_SIZE,
                                          name="logits")

    # Create loss function
    with tf.name_scope("cross_entropy"):
        cross_entropy = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))
        tf.summary.scalar("cross_entropy", cross_entropy)

    # Create optimizer
    with tf.name_scope("train"):
        train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(
            cross_entropy)

    # Compute accuracy
    with tf.name_scope("accuracy"):
        # argmax gets the highest value in a given dimension (in this case, dimension 1)
        # equal checks if the label is equal to the computed logits
        correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
        # tf.reduce_mean computes the mean across the vector
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        tf.summary.scalar("accuracy", accuracy)

    # Get all summary
    summ = tf.summary.merge_all()

    # Run model
    sess.run(tf.global_variables_initializer())
    writer = tf.summary.FileWriter(graph_dir)
    writer.add_graph(sess.graph)

    data_reader = dr.DataReader(sess, train_dir, test_dir,
                                INPUT_IMAGE_DIMENSION, OUTPUT_VECTOR_SIZE,
                                INPUT_IMAGE_CHANNELS)

    coord = tf.train.Coordinator()

    # Train the model
    for i in range(STEP_SIZE):
        images, labels = data_reader.get_train_batch(coord, BATCH_SIZE)

        if i % 5 == 0:
            [_, s] = sess.run([accuracy, summ],
                              feed_dict={
                                  x: images,
                                  y: labels
                              })
            writer.add_summary(s, i)

        # Run the training step
        sess.run(train_step, feed_dict={x: images, y: labels})

    # For model testing
    with tf.name_scope("test_accuracy"):
        test_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
        test_accuracy = tf.reduce_mean(tf.cast(test_prediction, tf.float32))
        test_accuracy_summary = tf.summary.scalar("test accuracy",
                                                  test_accuracy)

    # Test the model
    for i in range(TEST_STEP_SIZE):
        test_images, test_labels = data_reader.get_train_batch(
            coord, BATCH_SIZE)
        if i % 5 == 0:
            [_, s] = sess.run([test_accuracy, test_accuracy_summary],
                              feed_dict={
                                  x: test_images,
                                  y: test_labels
                              })
            writer.add_summary(s, i)
        sess.run(logits, feed_dict={x: test_images, y: test_labels})

    coord.request_stop()
Esempio n. 11
0
def build_encoder(x):
    # The encoder uses the deep residual network.
    outputs = []
    pooling = [1, 2, 2, 1]

    shape = x.get_shape().as_list()
    bs = shape[0]
    seq = shape[1]
    temp_shape = [bs * seq] + shape[2:]
    x = tf.reshape(x, temp_shape)
    # print x.get_shape().as_list()

    # layer 0
    with tf.variable_scope("encoder_layer0", reuse=tf.AUTO_REUSE):
        conv0_0 = layers.conv_layer(name="conv0_0",
                                    x=x,
                                    filter_shape=layers.create_variable(
                                        "filter0_0", shape=[7, 7, 3, 96]))
        conv0_0 = layers.batch_normalization(conv0_0, "conv0_0_bn")
        conv0_0 = layers.relu_layer(conv0_0)
        conv0_1 = layers.conv_layer(name="conv0_1",
                                    x=conv0_0,
                                    filter_shape=layers.create_variable(
                                        "filter0_1", shape=[3, 3, 96, 96]))
        conv0_1 = layers.batch_normalization(conv0_1, "conv0_1_bn")
        conv0_1 = layers.relu_layer(conv0_1)
        shortcut0 = layers.conv_layer(name="shortcut",
                                      x=x,
                                      filter_shape=layers.create_variable(
                                          "filter0_2", shape=[1, 1, 3, 96]))
        shortcut0 = layers.batch_normalization(shortcut0, "shortcut0_bn")
        shortcut0 = layers.relu_layer(shortcut0)
        layer0 = layers.pooling_layer("pooling", conv0_1 + shortcut0, pooling)
        outputs.append(layer0)  # [bs * size, 64, 64, 96]

    # layer 1
    with tf.variable_scope("encoder_layer1", reuse=tf.AUTO_REUSE):
        conv1_0 = layers.conv_layer(name="conv1_0",
                                    x=layer0,
                                    filter_shape=layers.create_variable(
                                        "filter1_0", shape=[3, 3, 96, 128]))
        conv1_0 = layers.batch_normalization(conv1_0, "conv1_0_bn")
        conv1_0 = layers.relu_layer(conv1_0)
        conv1_1 = layers.conv_layer(name="conv1_1",
                                    x=conv1_0,
                                    filter_shape=layers.create_variable(
                                        "filter1_1", shape=[3, 3, 128, 128]))
        conv1_1 = layers.batch_normalization(conv1_1, "conv1_1_bn")
        conv1_1 = layers.relu_layer(conv1_1)
        shortcut1 = layers.conv_layer(name="shortcut",
                                      x=layer0,
                                      filter_shape=layers.create_variable(
                                          "filter1_2", shape=[1, 1, 96, 128]))
        shortcut1 = layers.batch_normalization(shortcut1, "shortcut1_bn")
        shortcut1 = layers.relu_layer(shortcut1)
        layer1 = layers.pooling_layer("pooling", conv1_1 + shortcut1, pooling)
        outputs.append(layer1)  # [bs * size, 32, 32, 128]

    # layer 2
    with tf.variable_scope("encoder_layer2", reuse=tf.AUTO_REUSE):
        conv2_0 = layers.conv_layer(name="conv2_0",
                                    x=layer1,
                                    filter_shape=layers.create_variable(
                                        "filter2_0", shape=[3, 3, 128, 256]))
        conv2_0 = layers.batch_normalization(conv2_0, "conv2_0_bn")
        conv2_0 = layers.relu_layer(conv2_0)
        conv2_1 = layers.conv_layer(name="conv2_1",
                                    x=conv2_0,
                                    filter_shape=layers.create_variable(
                                        "filter2_1", shape=[3, 3, 256, 256]))
        conv2_1 = layers.batch_normalization(conv2_1, "conv2_1_bn")
        conv2_1 = layers.relu_layer(conv2_1)
        shortcut2 = layers.conv_layer(name="shortcut",
                                      x=layer1,
                                      filter_shape=layers.create_variable(
                                          "filter2_2", shape=[1, 1, 128, 256]))
        shortcut2 = layers.batch_normalization(shortcut2, "shortcut2_bn")
        shortcut2 = layers.relu_layer(shortcut2)
        layer2 = layers.pooling_layer("pooling", conv2_1 + shortcut2, pooling)
        outputs.append(layer2)  # [bs * size, 16, 16, 256]

    # layer 3
    with tf.variable_scope("encoder_layer3", reuse=tf.AUTO_REUSE):
        conv3_0 = layers.conv_layer(name="conv3_0",
                                    x=layer2,
                                    filter_shape=layers.create_variable(
                                        "filter3_0", shape=[3, 3, 256, 256]))
        conv3_0 = layers.batch_normalization(conv3_0, "conv3_0_bn")
        conv3_0 = layers.relu_layer(conv3_0)
        conv3_1 = layers.conv_layer(name="conv3_1",
                                    x=conv3_0,
                                    filter_shape=layers.create_variable(
                                        "filter3_1", shape=[3, 3, 256, 256]))
        conv3_1 = layers.batch_normalization(conv3_1, "conv3_1_bn")
        conv3_1 = layers.relu_layer(conv3_1)
        layer3 = layers.pooling_layer("pooling", conv3_1, pooling)
        outputs.append(layer3)  # [bs * size, 8, 8, 256]

    # layer 4
    with tf.variable_scope("encoder_layer4", reuse=tf.AUTO_REUSE):
        conv4_0 = layers.conv_layer(name="conv4_0",
                                    x=layer3,
                                    filter_shape=layers.create_variable(
                                        "filter4_0", shape=[3, 3, 256, 256]))
        conv4_0 = layers.batch_normalization(conv4_0, "conv4_0_bn")
        conv4_0 = layers.relu_layer(conv4_0)
        conv4_1 = layers.conv_layer(name="conv4_1",
                                    x=conv4_0,
                                    filter_shape=layers.create_variable(
                                        "filter4_1", shape=[3, 3, 256, 256]))
        conv4_1 = layers.batch_normalization(conv4_1, "conv4_1_bn")
        conv4_1 = layers.relu_layer(conv4_1)
        shortcut4 = layers.conv_layer(name="shortcut",
                                      x=layer3,
                                      filter_shape=layers.create_variable(
                                          "filter4_2", shape=[1, 1, 256, 256]))
        shortcut4 = layers.batch_normalization(shortcut4, "shortcut4_bn")
        shortcut4 = layers.relu_layer(shortcut4)
        layer4 = layers.pooling_layer("pooling", conv4_1 + shortcut4, pooling)
        outputs.append(layer4)  # [bs * size, 4, 4, 256]

    # layer 5
    with tf.variable_scope("encoder_layer5", reuse=tf.AUTO_REUSE):
        conv5_0 = layers.conv_layer(name="conv5_0",
                                    x=layer4,
                                    filter_shape=layers.create_variable(
                                        "filter5_0", shape=[3, 3, 256, 256]))
        conv5_0 = layers.batch_normalization(conv5_0, "conv5_0_bn")
        conv5_0 = layers.relu_layer(conv5_0)
        conv5_1 = layers.conv_layer(name="conv5_1",
                                    x=conv5_0,
                                    filter_shape=layers.create_variable(
                                        "filter5_1", shape=[3, 3, 256, 256]))
        conv5_1 = layers.batch_normalization(conv5_1, "conv5_1_bn")
        conv5_1 = layers.relu_layer(conv5_1)
        shortcut5 = layers.conv_layer(name="shortcut",
                                      x=layer4,
                                      filter_shape=layers.create_variable(
                                          "filter5_2", shape=[1, 1, 256, 256]))
        shortcut5 = layers.batch_normalization(shortcut5, "shortcut5_bn")
        shortcut5 = layers.relu_layer(shortcut5)
        layer5 = layers.pooling_layer("pooling", conv5_1 + shortcut5, pooling)
        outputs.append(layer5)  # [bs * size, 2, 2, 256]

    final_shape = [bs, seq, fc_layer_size[0]]
    # Flatten layer and fully connected layer
    flatten = layers.flatten_layer(layer5)
    outputs.append(flatten)

    with tf.variable_scope("fc_layer", reuse=tf.AUTO_REUSE):
        layer_fc = layers.fully_connected_layer(flatten, fc_layer_size[0],
                                                "fclayer_w", "fclayer_b")
        # layer_fc = layers.batch_normalization(layer_fc, "fc_bn")
        layer_fc = layers.relu_layer(layer_fc)
        outputs.append(layer_fc)  # [bs * size, 1024]

    # [bs, size, 1024]
    return tf.reshape(outputs[-1], final_shape)