Exemple #1
0
def _setup_net(placeholder, layers, weights, mean_pixel):
    """
    Returns the cnn built with given weights and normalized with mean_pixel
    """
    net = {}
    placeholder -= mean_pixel
    for i, name in enumerate(layers):
        kind = name[:4]
        with tf.variable_scope(name):
            if kind == 'conv':
                kernels, bias = weights[i][0][0][0][0]
                # matconvnet: [width, height, in_channels, out_channels]
                # tensorflow: [height, width, in_channels, out_channels]
                kernels = tf_utils.get_variable(
                    np.transpose(kernels, (1, 0, 2, 3)),
                    name=name + "_w")
                bias = tf_utils.get_variable(
                    bias.reshape(-1),
                    name=name + "_b")
                placeholder = tf_utils.conv2d(placeholder, kernels, bias)
            elif kind == 'relu':
                placeholder = tf.nn.relu(placeholder, name=name)
                tf_utils.add_activation_summary(placeholder, collections=['train'])
            elif kind == 'pool':
                placeholder = tf_utils.max_pool_2x2(placeholder)
            net[name] = placeholder

    return net
def inference(image, keep_prob):
    print("setting up vgg initialized conv layers ...")
    model_data = utils.get_model_data(FLAGS.model_path)

    mean = model_data['normalization'][0][0][0]
    mean_pixel = np.mean(mean, axis=(0, 1))

    weights = np.squeeze(model_data['layers'])

    processed_image = utils.process_image(image, mean_pixel)

    with tf.variable_scope("inference"):
        image_net = vgg_net(weights, processed_image)
        conv_final_layer = image_net["conv5_3"]

        pool5 = utils.max_pool_2x2(conv_final_layer, "pool5")

        W6 = utils.weight_variable([7, 7, 512, 4096], name="W6")
        b6 = utils.bias_variable([4096], name="b6")
        conv6 = utils.conv2d_basic(pool5, W6, b6, name="conv6")
        relu6 = tf.nn.relu(conv6, name="relu6")
        if FLAGS.debug:
            utils.add_activation_summary(relu6)
        relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob)

        W7 = utils.weight_variable([1, 1, 4096, 4096], name="W7")
        b7 = utils.weight_variable([4096], name="b7")
        conv7 = utils.conv2d_basic(relu_dropout6, W7, b7, name="conv7")
        relu7 = tf.nn.relu(conv7, name="relu7")
        if FLAGS.debug:
            utils.add_activation_summary(relu7)
        relu_dropout7 = tf.nn.dropout(relu7, keep_prob=keep_prob)

        W8 = utils.weight_variable([1, 1, 4096, NUM_OF_CLASSES], name="W8")
        b8 = utils.bias_variable([NUM_OF_CLASSES], name="b8")
        conv8 = utils.conv2d_basic(relu_dropout7, W8, b8, name="conv8")

        # now to upscale to actual image size
        deconv_shape1 = image_net["pool4"].get_shape()
        W_t1 = utils.weight_variable([4, 4, deconv_shape1[3].value, NUM_OF_CLASSES], name="W_t1")
        b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1")
        conv_t1 = utils.conv2d_transpose_strided(conv8, W_t1, b_t1, "conv_t1", output_shape=tf.shape(image_net("pool4")))
        fuse_1 = tf.add(conv_t1, image_net["pool4"], name="fuse_1")

        deconv_shape2 = image_net["pool3"].get_shape()
        W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2")
        b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2")
        conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, "conv_t2", output_shape=tf.shape(image_net("pool3")))
        fuse_2 = tf.add(conv_t2, image_net["pool3"], name="fuse_2")

        shape = tf.shape(image)
        deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], NUM_OF_CLASSES])
        W_t3 = utils.weight_variable([16, 16, NUM_OF_CLASSES, deconv_shape2[3].value], name="W_t3")
        b_t3 = utils.bias_variable([NUM_OF_CLASSES], name="b_t3")
        conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, "conv_t3", output_shape=deconv_shape3, stride=8)

        annotation_pred = tf.argmax(conv_t3, axis=2, name="prediction")

    return tf.expand_dims(annotation_pred, axi=3), conv_t3
Exemple #3
0
def GEDDnet(face,
            left_eye,
            right_eye,
            keep_prob,
            is_train,
            subj_id,
            vgg_path,
            num_subj=15,
            rf=[[2, 2], [3, 3], [5, 5], [11, 11]],
            num_face=[64, 128, 64, 64, 128, 256, 64],
            r=[[2, 2], [3, 3], [4, 5], [5, 11]],
            num_eye=[64, 128, 64, 64, 128, 256],
            num_comb=[0, 256]):

    num_comb[0] = num_face[-1] + 2 * num_eye[-1]

    vgg = np.load(vgg_path)
    with tf.variable_scope("transfer"):
        W_conv1_1 = tf.Variable(vgg['conv1_1_W'])
        b_conv1_1 = tf.Variable(vgg['conv1_1_b'])
        W_conv1_2 = tf.Variable(vgg['conv1_2_W'])
        b_conv1_2 = tf.Variable(vgg['conv1_2_b'])

        W_conv2_1 = tf.Variable(vgg['conv2_1_W'])
        b_conv2_1 = tf.Variable(vgg['conv2_1_b'])
        W_conv2_2 = tf.Variable(vgg['conv2_2_W'])
        b_conv2_2 = tf.Variable(vgg['conv2_2_b'])
    del vgg
    """ define network """
    # face
    face_h_conv1_1 = tf.nn.relu(conv2d(face, W_conv1_1) + b_conv1_1)
    face_h_conv1_2 = tf.nn.relu(conv2d(face_h_conv1_1, W_conv1_2) + b_conv1_2)
    face_h_pool1 = max_pool_2x2(face_h_conv1_2)

    face_h_conv2_1 = tf.nn.relu(conv2d(face_h_pool1, W_conv2_1) + b_conv2_1)
    face_h_conv2_2 = tf.nn.relu(conv2d(face_h_conv2_1, W_conv2_2) +
                                b_conv2_2) / 100.

    with tf.variable_scope("face"):

        face_W_conv2_3 = weight_variable([1, 1, num_face[1], num_face[2]],
                                         std=0.125)
        face_b_conv2_3 = bias_variable([num_face[2]], std=0.001)

        face_W_conv3_1 = weight_variable([3, 3, num_face[2], num_face[3]],
                                         std=0.06)
        face_b_conv3_1 = bias_variable([num_face[3]], std=0.001)
        face_W_conv3_2 = weight_variable([3, 3, num_face[3], num_face[3]],
                                         std=0.06)
        face_b_conv3_2 = bias_variable([num_face[3]], std=0.001)

        face_W_conv4_1 = weight_variable([3, 3, num_face[3], num_face[4]],
                                         std=0.08)
        face_b_conv4_1 = bias_variable([num_face[4]], std=0.001)
        face_W_conv4_2 = weight_variable([3, 3, num_face[4], num_face[4]],
                                         std=0.07)
        face_b_conv4_2 = bias_variable([num_face[4]], std=0.001)

        face_W_fc1 = weight_variable([6 * 6 * num_face[4], num_face[5]],
                                     std=0.035)
        face_b_fc1 = bias_variable([num_face[5]], std=0.001)

        face_W_fc2 = weight_variable([num_face[5], num_face[6]], std=0.1)
        face_b_fc2 = bias_variable([num_face[6]], std=0.001)

        face_h_conv2_3 = tf.nn.relu(
            conv2d(face_h_conv2_2, face_W_conv2_3) + face_b_conv2_3)
        face_h_conv2_3_norm = tf.layers.batch_normalization(face_h_conv2_3,
                                                            training=is_train,
                                                            scale=False,
                                                            renorm=True,
                                                            name="f_conv2_3")

        face_h_conv3_1 = tf.nn.relu(
            dilated2d(face_h_conv2_3_norm, face_W_conv3_1, rf[0]) +
            face_b_conv3_1)
        face_h_conv3_1_norm = tf.layers.batch_normalization(face_h_conv3_1,
                                                            training=is_train,
                                                            scale=False,
                                                            renorm=True,
                                                            name="f_conv3_1")

        face_h_conv3_2 = tf.nn.relu(
            dilated2d(face_h_conv3_1_norm, face_W_conv3_2, rf[1]) +
            face_b_conv3_2)
        face_h_conv3_2_norm = tf.layers.batch_normalization(face_h_conv3_2,
                                                            training=is_train,
                                                            scale=False,
                                                            renorm=True,
                                                            name="f_conv3_2")

        face_h_conv4_1 = tf.nn.relu(
            dilated2d(face_h_conv3_2_norm, face_W_conv4_1, rf[2]) +
            face_b_conv4_1)
        face_h_conv4_1_norm = tf.layers.batch_normalization(face_h_conv4_1,
                                                            training=is_train,
                                                            scale=False,
                                                            renorm=True,
                                                            name="f_conv4_1")

        face_h_conv4_2 = tf.nn.relu(
            dilated2d(face_h_conv4_1_norm, face_W_conv4_2, rf[3]) +
            face_b_conv4_2)
        face_h_conv4_2_norm = tf.layers.batch_normalization(face_h_conv4_2,
                                                            training=is_train,
                                                            scale=False,
                                                            renorm=True,
                                                            name="f_conv4_2")

        face_h_pool4_flat = tf.reshape(face_h_conv4_2_norm,
                                       [-1, 6 * 6 * num_face[4]])

        face_h_fc1 = tf.nn.relu(
            tf.matmul(face_h_pool4_flat, face_W_fc1) + face_b_fc1)
        face_h_fc1_norm = tf.layers.batch_normalization(face_h_fc1,
                                                        training=is_train,
                                                        scale=False,
                                                        renorm=True,
                                                        name="f_fc1")
        face_h_fc1_drop = tf.nn.dropout(face_h_fc1_norm, keep_prob)

        face_h_fc2 = tf.nn.relu(
            tf.matmul(face_h_fc1_drop, face_W_fc2) + face_b_fc2)
        face_h_fc2_norm = tf.layers.batch_normalization(face_h_fc2,
                                                        training=is_train,
                                                        scale=False,
                                                        renorm=True,
                                                        name="f_fc2")

    eye1_h_conv1_1 = tf.nn.relu(conv2d(left_eye, W_conv1_1) + b_conv1_1)
    eye1_h_conv1_2 = tf.nn.relu(conv2d(eye1_h_conv1_1, W_conv1_2) + b_conv1_2)
    eye1_h_pool1 = max_pool_2x2(eye1_h_conv1_2)

    eye1_h_conv2_1 = tf.nn.relu(conv2d(eye1_h_pool1, W_conv2_1) + b_conv2_1)
    eye1_h_conv2_2 = tf.nn.relu(conv2d(eye1_h_conv2_1, W_conv2_2) +
                                b_conv2_2) / 100.

    eye2_h_conv1_1 = tf.nn.relu(conv2d(right_eye, W_conv1_1) + b_conv1_1)
    eye2_h_conv1_2 = tf.nn.relu(conv2d(eye2_h_conv1_1, W_conv1_2) + b_conv1_2)
    eye2_h_pool1 = max_pool_2x2(eye2_h_conv1_2)

    eye2_h_conv2_1 = tf.nn.relu(conv2d(eye2_h_pool1, W_conv2_1) + b_conv2_1)
    eye2_h_conv2_2 = tf.nn.relu(conv2d(eye2_h_conv2_1, W_conv2_2) +
                                b_conv2_2) / 100.

    with tf.variable_scope("eye"):
        # left eye
        eye_W_conv2_3 = weight_variable([1, 1, num_eye[1], num_eye[2]],
                                        std=0.125)
        eye_b_conv2_3 = bias_variable([num_eye[2]], std=0.001)

        eye_W_conv3_1 = weight_variable([3, 3, num_eye[2], num_eye[3]],
                                        std=0.06)
        eye_b_conv3_1 = bias_variable([num_eye[3]], std=0.001)
        eye_W_conv3_2 = weight_variable([3, 3, num_eye[3], num_eye[3]],
                                        std=0.06)
        eye_b_conv3_2 = bias_variable([num_eye[3]], std=0.001)

        eye_W_conv4_1 = weight_variable([3, 3, num_eye[3], num_eye[4]],
                                        std=0.06)
        eye_b_conv4_1 = bias_variable([num_eye[4]], std=0.001)
        eye_W_conv4_2 = weight_variable([3, 3, num_eye[4], num_eye[4]],
                                        std=0.04)
        eye_b_conv4_2 = bias_variable([num_eye[4]], std=0.001)

        eye1_W_fc1 = weight_variable([4 * 6 * num_eye[4], num_eye[5]],
                                     std=0.026)
        eye1_b_fc1 = bias_variable([num_eye[5]], std=0.001)

        eye2_W_fc1 = weight_variable([4 * 6 * num_eye[4], num_eye[5]],
                                     std=0.026)
        eye2_b_fc1 = bias_variable([num_eye[5]], std=0.001)

        eye1_h_conv2_3 = tf.nn.relu(
            conv2d(eye1_h_conv2_2, eye_W_conv2_3) + eye_b_conv2_3)
        eye1_h_conv2_3_norm = tf.layers.batch_normalization(eye1_h_conv2_3,
                                                            training=is_train,
                                                            scale=False,
                                                            renorm=True,
                                                            name="e_conv2_3")

        eye1_h_conv3_1 = tf.nn.relu(
            dilated2d(eye1_h_conv2_3_norm, eye_W_conv3_1, r[0]) +
            eye_b_conv3_1)
        eye1_h_conv3_1_norm = tf.layers.batch_normalization(eye1_h_conv3_1,
                                                            training=is_train,
                                                            scale=False,
                                                            renorm=True,
                                                            name="e_conv3_1")

        eye1_h_conv3_2 = tf.nn.relu(
            dilated2d(eye1_h_conv3_1_norm, eye_W_conv3_2, r[1]) +
            eye_b_conv3_2)
        eye1_h_conv3_2_norm = tf.layers.batch_normalization(eye1_h_conv3_2,
                                                            training=is_train,
                                                            scale=False,
                                                            renorm=True,
                                                            name="e_conv3_2")

        eye1_h_conv4_1 = tf.nn.relu(
            dilated2d(eye1_h_conv3_2_norm, eye_W_conv4_1, r[2]) +
            eye_b_conv4_1)
        eye1_h_conv4_1_norm = tf.layers.batch_normalization(eye1_h_conv4_1,
                                                            training=is_train,
                                                            scale=False,
                                                            renorm=True,
                                                            name="e_conv4_1")

        eye1_h_conv4_2 = tf.nn.relu(
            dilated2d(eye1_h_conv4_1_norm, eye_W_conv4_2, r[3]) +
            eye_b_conv4_2)
        eye1_h_conv4_2_norm = tf.layers.batch_normalization(eye1_h_conv4_2,
                                                            training=is_train,
                                                            scale=False,
                                                            renorm=True,
                                                            name="e_conv4_2")

        eye1_h_pool4_flat = tf.reshape(eye1_h_conv4_2_norm,
                                       [-1, 4 * 6 * num_eye[4]])

        eye1_h_fc1 = tf.nn.relu(
            tf.matmul(eye1_h_pool4_flat, eye1_W_fc1) + eye1_b_fc1)
        eye1_h_fc1_norm = tf.layers.batch_normalization(eye1_h_fc1,
                                                        training=is_train,
                                                        scale=False,
                                                        renorm=True,
                                                        name="e1_fc1")

        # right eye
        eye2_h_conv2_3 = tf.nn.relu(
            conv2d(eye2_h_conv2_2, eye_W_conv2_3) + eye_b_conv2_3)
        eye2_h_conv2_3_norm = tf.layers.batch_normalization(eye2_h_conv2_3,
                                                            training=is_train,
                                                            scale=False,
                                                            renorm=True,
                                                            name="e_conv2_3",
                                                            reuse=True)

        eye2_h_conv3_1 = tf.nn.relu(
            dilated2d(eye2_h_conv2_3_norm, eye_W_conv3_1, r[0]) +
            eye_b_conv3_1)
        eye2_h_conv3_1_norm = tf.layers.batch_normalization(eye2_h_conv3_1,
                                                            training=is_train,
                                                            scale=False,
                                                            renorm=True,
                                                            name="e_conv3_1",
                                                            reuse=True)

        eye2_h_conv3_2 = tf.nn.relu(
            dilated2d(eye2_h_conv3_1_norm, eye_W_conv3_2, r[1]) +
            eye_b_conv3_2)
        eye2_h_conv3_2_norm = tf.layers.batch_normalization(eye2_h_conv3_2,
                                                            training=is_train,
                                                            scale=False,
                                                            renorm=True,
                                                            name="e_conv3_2",
                                                            reuse=True)

        eye2_h_conv4_1 = tf.nn.relu(
            dilated2d(eye2_h_conv3_2_norm, eye_W_conv4_1, r[2]) +
            eye_b_conv4_1)
        eye2_h_conv4_1_norm = tf.layers.batch_normalization(eye2_h_conv4_1,
                                                            training=is_train,
                                                            scale=False,
                                                            renorm=True,
                                                            name="e_conv4_1",
                                                            reuse=True)

        eye2_h_conv4_2 = tf.nn.relu(
            dilated2d(eye2_h_conv4_1_norm, eye_W_conv4_2, r[3]) +
            eye_b_conv4_2)
        eye2_h_conv4_2_norm = tf.layers.batch_normalization(eye2_h_conv4_2,
                                                            training=is_train,
                                                            scale=False,
                                                            renorm=True,
                                                            name="e_conv4_2",
                                                            reuse=True)

        eye2_h_pool4_flat = tf.reshape(eye2_h_conv4_2_norm,
                                       [-1, 4 * 6 * num_eye[4]])

        eye2_h_fc1 = tf.nn.relu(
            tf.matmul(eye2_h_pool4_flat, eye2_W_fc1) + eye2_b_fc1)
        eye2_h_fc1_norm = tf.layers.batch_normalization(eye2_h_fc1,
                                                        training=is_train,
                                                        scale=False,
                                                        renorm=True,
                                                        name="e2_fc1")

    # combine both eyes and face
    with tf.variable_scope("combine"):

        cls1_W_fc2 = weight_variable([num_comb[0], num_comb[1]], std=0.07)
        cls1_b_fc2 = bias_variable([num_comb[1]], std=0.001)

        cls1_W_fc3 = weight_variable([num_comb[1], 2], std=0.125)
        cls1_b_fc3 = bias_variable([2], std=0.001)

        cls1_h_fc1_norm = tf.concat(
            [face_h_fc2_norm, eye1_h_fc1_norm, eye2_h_fc1_norm], axis=1)
        cls1_h_fc1_drop = tf.nn.dropout(cls1_h_fc1_norm, keep_prob)
        cls1_h_fc2 = tf.nn.relu(
            tf.matmul(cls1_h_fc1_drop, cls1_W_fc2) + cls1_b_fc2)
        cls1_h_fc2_norm = tf.layers.batch_normalization(cls1_h_fc2,
                                                        training=is_train,
                                                        scale=False,
                                                        renorm=True,
                                                        name="c_fc2")
        cls1_h_fc2_drop = tf.nn.dropout(cls1_h_fc2_norm, keep_prob)

        t_hat = tf.matmul(cls1_h_fc2_drop, cls1_W_fc3) + cls1_b_fc3
    """ bias learning from subject id """
    num_bias = (2 * num_subj, )
    with tf.variable_scope("bias"):

        bias_W_fc = weight_variable([num_bias[0], 2], std=0.125)
        b_hat = tf.matmul(subj_id, bias_W_fc)

    g_hat = t_hat + b_hat

    l2_loss = (1e-2 * tf.nn.l2_loss(W_conv1_1) +
               1e-2 * tf.nn.l2_loss(W_conv1_2) +
               1e-2 * tf.nn.l2_loss(W_conv2_1) +
               1e-2 * tf.nn.l2_loss(W_conv2_2) +
               tf.nn.l2_loss(face_W_conv2_3) + tf.nn.l2_loss(face_W_conv3_1) +
               tf.nn.l2_loss(face_W_conv3_2) + tf.nn.l2_loss(face_W_conv4_1) +
               tf.nn.l2_loss(face_W_conv4_2) + tf.nn.l2_loss(face_W_fc1) +
               tf.nn.l2_loss(face_W_fc2) + tf.nn.l2_loss(eye_W_conv2_3) +
               tf.nn.l2_loss(eye_W_conv3_1) + tf.nn.l2_loss(eye_W_conv3_2) +
               tf.nn.l2_loss(eye_W_conv4_1) + tf.nn.l2_loss(eye_W_conv4_2) +
               tf.nn.l2_loss(eye1_W_fc1) + tf.nn.l2_loss(eye2_W_fc1) +
               tf.nn.l2_loss(cls1_W_fc2) + tf.nn.l2_loss(cls1_W_fc3))

    return g_hat, t_hat, bias_W_fc, l2_loss
Exemple #4
0
def polygon_regressor(x_image, input_res, input_channels, encoding_length,
                      output_vertex_count, weight_decay):
    """
    Builds the graph for a deep net for encoding and decoding polygons.

    Args:
      x_image: input tensor of shape (N_examples, input_res, input_res, input_channels)
      input_res: image resolution
      input_channels: image number of channels
      encoding_length: number of neurons used in the bottleneck to encode the input polygon
      output_vertex_count: number of vertex of the polygon output
      weight_decay: Weight decay coefficient

    Returns:
      y: tensor of shape (N_examples, output_vertex_count, 2), with vertex coordinates
      keep_prob: scalar placeholder for the probability of dropout.
    """
    # with tf.name_scope('reshape'):
    #     x_image = tf.reshape(x, [-1, input_res, input_res, 1])

    # First convolutional layer - maps one grayscale image to 32 feature maps.
    with tf.name_scope('conv1'):
        conv1 = tf_utils.complete_conv2d(x_image, 3, input_channels, 16,
                                         weight_decay)

    # Pooling layer - downsamples by 2X.
    with tf.name_scope('pool1'):
        h_pool1 = tf_utils.max_pool_2x2(conv1)

    # Second convolutional layer -- maps 32 feature maps to 64.
    with tf.name_scope('conv2'):
        conv2 = tf_utils.complete_conv2d(h_pool1, 3, 16, 32, weight_decay)

    # Second pooling layer.
    with tf.name_scope('pool2'):
        h_pool2 = tf_utils.max_pool_2x2(conv2)

    # Third convolutional layer -- maps 64 feature maps to 128.
    with tf.name_scope('conv3'):
        conv3 = tf_utils.complete_conv2d(h_pool2, 3, 32, 64, weight_decay)

    # Second pooling layer.
    with tf.name_scope('pool3'):
        h_pool3 = tf_utils.max_pool_2x2(conv3)

    reduction_factor = 8  # Adjust according to previous layers
    current_data_dimension = int(input_res / reduction_factor) * int(
        input_res / reduction_factor) * 64

    with tf.name_scope('flatten'):
        h_pool3_flat = tf.reshape(h_pool3, [-1, current_data_dimension])

    # Fully connected layer 1 -- after 2 round of downsampling, our 64x64 image
    # is down to 8x8x128 feature maps -- map this to 2048 features.
    with tf.name_scope('fc1'):
        fc1 = tf_utils.complete_fc(h_pool3_flat, current_data_dimension, 1024,
                                   weight_decay, tf.nn.relu)

    # Dropout - controls the complexity of the model, prevents co-adaptation of
    # features.
    with tf.name_scope('dropout'):
        keep_prob = tf.placeholder(tf.float32)
        # tf.summary.scalar('dropout_keep_probability', keep_prob)
        fc1_drop = tf.nn.dropout(fc1, keep_prob)

    # Map the 2048 features to encoding_length features
    with tf.name_scope('fc2'):
        fc2 = tf_utils.complete_fc(fc1_drop, 1024, encoding_length,
                                   weight_decay, tf.nn.relu)

    # --- Decoder --- #

    # Map the encoding_length features to 2048 features
    with tf.name_scope('fc3'):
        fc3 = tf_utils.complete_fc(fc2, encoding_length, 512, weight_decay,
                                   tf.nn.relu)

    # Map the 2048 features to the output_vertex_count * 2 output coordinates
    with tf.name_scope('fc4'):
        y_flat = tf_utils.complete_fc(fc3, 512, output_vertex_count * 2,
                                      weight_decay, tf.nn.sigmoid)

    with tf.name_scope('reshape_output'):
        y_coords = tf.reshape(y_flat, [-1, output_vertex_count, 2])

    return y_coords, keep_prob
Exemple #5
0
def main():
    """
    Runs a simple linear regression model on the mnist dataset.
    """

    # Load the mnist dataset. Class stores the train, validation and testing sets as numpy arrays.
    mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

    # Create a tensforlow session.
    sess = tf.InteractiveSession()

    # Create the computational graph. Start with creating placeholders for the input and output data.
    # Input placeholder.
    input_placeholder = tf.placeholder(tf.float32, shape=[None, 784])
    # Output placeholder.
    labeled_data = tf.placeholder(tf.float32, shape=[None, 10])

    # Reshape input to a 4D tensor of [ -1 , width, height, channels]. -1 ensures the size remains consitent with
    # the original size.
    image_shape = [-1, 28, 28, 1]
    input_image = tf.reshape(input_placeholder, image_shape)

    # Create convolutional layers containing 2 convolutional layers and 1 fully connected layer.
    # Layer 1 computes 32 features for each 5x5 patch.
    conv1_weights = tf_utils.weight_variable([5, 5, 1, 32])
    conv1_bias = tf_utils.bias_variable([32])
    # Apply ReLU activation and max pool.
    conv1_act = tf.nn.relu(
        tf_utils.conv2d(input_image, conv1_weights) + conv1_bias)
    conv1_pool = tf_utils.max_pool_2x2(conv1_act)

    # Layer 2 computes 64 features of 5x5 patch.
    conv2_weights = tf_utils.weight_variable([5, 5, 32, 64])
    conv2_bias = tf_utils.bias_variable([64])
    # Apply ReLU activation and max pool.
    conv2_act = tf.nn.relu(
        tf_utils.conv2d(conv1_pool, conv2_weights) + conv2_bias)
    conv2_pool = tf_utils.max_pool_2x2(conv2_act)

    # Add fully connected layers.
    fc1_weights = tf_utils.weight_variable([7 * 7 * 64, 1024])
    fc1_bias = tf_utils.bias_variable([1024])
    # Apply Relu activation to flattened conv2d pool layer.
    conv2_flat = tf.reshape(conv2_pool, [-1, 7 * 7 * 64])
    fc1_act = tf.nn.relu(tf.matmul(conv2_flat, fc1_weights) + fc1_bias)

    # Add dropout before the readout layer.
    keep_prob = tf.placeholder(tf.float32)
    dropout = tf.nn.dropout(fc1_act, keep_prob)

    # Add the readout layer for the 10 classes.
    readout_weights = tf_utils.weight_variable([1024, 10])
    readout_bias = tf_utils.bias_variable([10])
    readout_act = tf.matmul(dropout, readout_weights) + readout_bias

    # Cross entropy loss between the output labels and the model.
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=labeled_data,
                                                logits=readout_act))

    # Define the training step with a learning rate for gradient descent and our cross entropy loss.
    learning_rate = 1e-4
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)

    # Initialize all variables.
    sess.run(tf.global_variables_initializer())

    # Training model evaluation placeholders.
    # Define a placeholder for comparing equality between output and labels.
    predictions = tf.equal(tf.argmax(labeled_data, 1),
                           tf.argmax(readout_act, 1))
    accuracy = tf.reduce_mean(tf.cast(predictions, tf.float32))

    # Run the training for a n steps.
    steps = 10000
    batch_size = 50
    for step in xrange(steps):
        # Sample a batch from the mnist dataset.
        batch = mnist.train.next_batch(batch_size)
        # Create a dict of the data from the sampled batch and run one training step.
        train_step.run(feed_dict={
            input_placeholder: batch[0],
            labeled_data: batch[1],
            keep_prob: 0.5
        })

        # Print the training error after every 100 steps.
        if step % 100 == 0:
            train_accuracy = accuracy.eval(
                feed_dict={
                    input_placeholder: batch[0],
                    labeled_data: batch[1],
                    keep_prob: 1.0
                })
            print "Step: ", step, " | Train Accuracy: ", train_accuracy

    print "Accuracy: ", accuracy.eval(
        feed_dict={
            input_placeholder: mnist.test.images,
            labeled_data: mnist.test.labels,
            keep_prob: 1.0
        })
Exemple #6
0
def polygon_encoder_decoder(x_image,
                            input_res,
                            encoding_length,
                            output_vertex_count,
                            weight_decay=None):
    """
    Builds the graph for a deep net for encoding and decoding polygons.

    Args:
      x_image: input variable
      input_res: an input tensor with the dimensions (N_examples, input_res, input_res, 1)
      encoding_length: number of neurons used in the bottleneck to encode the input polygon
      output_vertex_count: number of vertex of the polygon output
      weight_decay: Weight decay coefficient

    Returns:
      y: tensor of shape (N_examples, output_vertex_count, 2), with vertex coordinates
      keep_prob: scalar placeholder for the probability of dropout.
    """
    # with tf.name_scope('reshape'):
    #     x_image = tf.reshape(x, [-1, input_res, input_res, 1])

    # First convolutional layer - maps one grayscale image to 8 feature maps.
    with tf.name_scope('Features'):
        with tf.name_scope('conv1'):
            conv1 = tf_utils.complete_conv2d(x_image, 5, 1, 8, weight_decay)

        # Pooling layer - downsamples by 2X.
        with tf.name_scope('pool1'):
            h_pool1 = tf_utils.max_pool_2x2(conv1)

        # Second convolutional layer -- maps 8 feature maps to 16.
        with tf.name_scope('conv2'):
            conv2 = tf_utils.complete_conv2d(h_pool1, 5, 8, 16, weight_decay)

        # Second pooling layer.
        with tf.name_scope('pool2'):
            h_pool2 = tf_utils.max_pool_2x2(conv2)

        # Third convolutional layer -- maps 16 feature maps to 32.
        with tf.name_scope('conv3'):
            conv3 = tf_utils.complete_conv2d(h_pool2, 5, 16, 32, weight_decay)

        # Third pooling layer.
        with tf.name_scope('pool3'):
            h_pool3 = tf_utils.max_pool_2x2(conv3)

    current_shape = h_pool3.shape
    current_data_dimension = int(current_shape[1] * current_shape[2] *
                                 current_shape[3])

    with tf.name_scope('Encoder'):
        with tf.name_scope('flatten'):
            h_pool3_flat = tf.reshape(h_pool3, [-1, current_data_dimension])

        # Dropout - controls the complexity of the model, prevents co-adaptation of
        # features.
        with tf.name_scope('dropout'):
            keep_prob = tf.placeholder(tf.float32)
            # tf.summary.scalar('dropout_keep_probability', keep_prob)
            h_pool3_flat_drop = tf.nn.dropout(h_pool3_flat, keep_prob)

        with tf.name_scope('fc1'):
            fc1 = tf_utils.complete_fc(h_pool3_flat_drop,
                                       current_data_dimension, encoding_length,
                                       weight_decay, tf.nn.relu)

    y_coords = decode(fc1,
                      encoding_length,
                      output_vertex_count,
                      weight_decay,
                      scope_name="Decode")

    return y_coords, keep_prob
Exemple #7
0
def create_fcn(placeholder, keep_prob, classes):
    """
    Setup the main conv/deconv network
    """
    with tf.variable_scope('inference'):
        vgg_net = create_vgg19(placeholder)
        conv_final = vgg_net['relu5_4']

        output = tf_utils.max_pool_2x2(conv_final)

        conv_shapes = [[7, 7, 512, 4096], [1, 1, 4096, 4096],
                       [1, 1, 4096, classes]]

        for i, conv_shape in enumerate(conv_shapes):
            name = 'conv%d' % (i + 6)
            with tf.variable_scope(name):
                W = tf_utils.weight_variable(conv_shape, name=name + '_w')
                b = tf_utils.bias_variable(conv_shape[-1:], name=name + '_b')
                output = tf_utils.conv2d(output, W, b)
            with tf.variable_scope('relu%d' % (i + 6)):
                if i < 2:
                    output = tf.nn.relu(output)
                    tf_utils.add_activation_summary(output,
                                                    collections=['train'])
                    output = tf.nn.dropout(output, keep_prob=keep_prob)

        pool4 = vgg_net['pool4']
        pool3 = vgg_net['pool3']

        deconv_shapes = [
            tf.shape(pool4),
            tf.shape(pool3),
            tf.stack([
                tf.shape(placeholder)[0],
                tf.shape(placeholder)[1],
                tf.shape(placeholder)[2], classes
            ])
        ]

        W_shapes = [[4, 4, pool4.get_shape()[3].value, classes],
                    [
                        4, 4,
                        pool3.get_shape()[3].value,
                        pool4.get_shape()[3].value
                    ], [16, 16, classes,
                        pool3.get_shape()[3].value]]

        strides = [2, 2, 8]

        for i in range(3):
            name = 'deconv%d' % (i + 1)
            with tf.variable_scope(name):
                W = tf_utils.weight_variable(W_shapes[i], name=name + '_w')
                output = tf_utils.conv2d_transpose(
                    output,
                    W,
                    None,
                    output_shape=deconv_shapes[i],
                    stride=strides[i])
            with tf.variable_scope('skip%d' % (i + 1)):
                if i < 2:
                    output = tf.add(output, vgg_net['pool%d' % (4 - i)])

        prediction = tf.argmax(output, dimension=3, name='prediction')

    return tf.expand_dims(prediction, dim=3), output