Exemple #1
0
    def stn(self, obj_image, mask_image, stn_filter):
        with tf.variable_scope("g_enerator") as scope:
            obj_image_stn = transformer(obj_image, stn_filter, (256, 256))
            tf.get_variable_scope().reuse_variables()
            mask_image = tf.cast(mask_image, 'float32')

            # self.mask_before = mask_image 
            mask_image_stn = transformer(mask_image, stn_filter, (256, 256))
            # self.mask_after = mask_image_stn 
            mask_image_stn = tf.cast(mask_image_stn, 'bool')
            # self.mask_cast = mask_image_stn
            return obj_image_stn, mask_image_stn
Exemple #2
0
 def generator_flow_joint_base(self, i1i2):
     theta = self.theta_generator_small(i1i2)
     i2hat0, flow0 = transformer(U=i1i2[..., 0:3],
                                 theta=theta,
                                 out_size=[self.h, self.w],
                                 mode='Projective2D',
                                 name='g_transformer')
     flow1 = self.flow_generator_flownet(i1i2, flow0)
     i2hat1 = transformer(U=i1i2[..., 0:3],
                          flow=flow1,
                          out_size=[self.h, self.w],
                          mode='Flow',
                          name='g_transformer')
     return i2hat0, i2hat1, flow0, flow1
Exemple #3
0
def conv_spatial_transfo(x, thetas, kernel_size):
    """Run the spatial transformer for each patch of kernel_size * kernel_size.

    Args:
        thetas: the parameters of each spatial transformer
        kernel_size: the size of each patch

    Return:
        The patches glued together after having been
        spatially transformed
    """
    size_x = int(x.get_shape()[1])
    size_y = int(x.get_shape()[2])
    channels = int(x.get_shape()[3])
    # Flatten the parameters
    thetas = tf.reshape(thetas, [-1, size_x * size_y, 6])

    # Extract patches of kernel_size * kernel_size at each
    # pixel of the input image
    x = tf.extract_image_patches(x,
                                 ksizes=[1, kernel_size, kernel_size, 1],
                                 strides=[1, 1, 1, 1],
                                 rates=[1, 1, 1, 1],
                                 padding='SAME')
    # Flatten the patches
    x = tf.reshape(x, [-1, kernel_size, kernel_size, channels])
    # Run through the spatial transformer
    res = transformer(x, thetas, (kernel_size, kernel_size))
    # Reform the image
    res = tf.reshape(
        res, [-1, size_x * kernel_size, size_y * kernel_size, channels])
    return res
def spatial_transformer(x, opt, keep_prob, out_size):
    """
	Generates spatial transformer network by setting up the two-layer localization network
	to figure out the parameters for an affine transformation of the input

	Args:
		x: input vector
		name: name of the spatial transformer
	Returns:
		h_trans: transformed feature map (tensor)

	"""
    x_tensor = tf.reshape(x, [-1, opt.pixels, opt.pixels, CHANNELS])

    # Weights for localization network
    W_fc_loc1 = weight_vector([IMAGE_PIXELS, 20])
    b_fc_loc1 = bias_vector([20])
    W_fc_loc2 = weight_vector([20, 6])
    # starting with identity transformation
    initial = np.array([[1., 0, 0], [0, 1., 0]])
    initial = initial.astype('float32')
    initial = initial.flatten()
    b_fc_loc2 = tf.Variable(initial_value=initial, name='b_fc_loc2')
    # Defining two layer localization network
    h_fc_loc1 = tf.nn.tanh(tf.matmul(x, W_fc_loc1) + b_fc_loc1)
    # h_fc_loc1_drop = tf.nn.dropout(h_fc_loc1, keep_prob)
    h_fc_loc2 = tf.nn.tanh(tf.matmul(h_fc_loc1, W_fc_loc2) + b_fc_loc2)

    h_trans = transformer(x_tensor, h_fc_loc2, out_size)

    return h_trans
	def build_network(self):

		self.X = tf.placeholder(tf.float32, [self.batch_size, self.img_height, self.img_width, self.channel], name='images')
		self.detection = tf.placeholder(tf.float32, [self.batch_size,2], name='detection')
		self.landmarks = tf.placeholder(tf.float32, [self.batch_size, 42], name='landmarks')
		self.visibility = tf.placeholder(tf.float32, [self.batch_size,21], name='visibility')
		self.pose = tf.placeholder(tf.float32, [self.batch_size,3], name='pose')
		self.gender = tf.placeholder(tf.float32, [self.batch_size,2], name='gender')


		theta = self.localization_squeezenet(self.X)
		self.T_mat = tf.reshape(theta, [-1, 2,3])
		self.cropped = transformer(self.X, self.T_mat, [self.out_height, self.out_width])

		net_output = self.hyperface(self.cropped) # (out_detection, out_landmarks, out_visibility, out_pose, out_gender)


		loss_detection = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(net_output[0], self.detection))
		
		visibility_mask = tf.reshape(tf.tile(tf.expand_dims(self.visibility, axis=2), [1,1,2]), [self.batch_size, -1])
		loss_landmarks = tf.reduce_mean(tf.square(visibility_mask*(net_output[1] - self.landmarks)))
		
		loss_visibility = tf.reduce_mean(tf.square(net_output[2] - self.visibility))
		loss_pose = tf.reduce_mean(tf.square(net_output[3] - self.pose))
		loss_gender = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(net_output[4], self.gender))

		self.loss = self.weight_detect*loss_detection + self.weight_landmarks*loss_landmarks  \
					+ self.weight_visibility*loss_visibility + self.weight_pose*loss_pose  \
					+ self.weight_gender*loss_gender
Exemple #6
0
    def _spatial_transformer(self, name, x, in_filters, arr_out_filters):
        width = x.get_shape().as_list()[1]
        height = x.get_shape().as_list()[2]

        with tf.variable_scope(name):
            _x = MaxPooling2D(x, name='pool1')
            with tf.variable_scope('conv_1'):
                _x = Conv2D(_x,
                            in_filters,
                            5,
                            arr_out_filters[0],
                            name='conv_1')
                _x = BatchNormalization(_x,
                                        self.mode == 'train',
                                        name='batch1')
                _x = MaxPooling2D(_x, use_relu=True, name='pool2')

            with tf.variable_scope('conv_2'):
                _x = Conv2D(_x,
                            arr_out_filters[0],
                            5,
                            arr_out_filters[1],
                            name='conv_2')
                _x = BatchNormalization(_x,
                                        self.mode == 'train',
                                        name='batch2')
                _x = MaxPooling2D(_x, use_relu=True, name='pool3')

            with tf.variable_scope('fc1'):
                _x_flat, _x_size = Flatten(_x)
                W_fc_loc1 = weight_variable([_x_size, arr_out_filters[2]])
                b_fc_loc1 = bias_variable([arr_out_filters[2]])
                h_fc_loc1 = tf.nn.tanh(
                    tf.matmul(_x_flat, W_fc_loc1) + b_fc_loc1)

            h_fc_loc1 = slim.dropout(h_fc_loc1,
                                     self._dropout,
                                     is_training=(self.mode == 'train'
                                                  and self._dropout > 0),
                                     scope='dropout')

            with tf.variable_scope('fc2'):
                W_fc_loc2 = weight_variable([arr_out_filters[2], 6])
                # Use identity transformation as starting point
                initial = np.array([[1., 0, 0], [0, 1., 0]])
                initial = initial.astype('float32')
                initial = initial.flatten()
                b_fc_loc2 = tf.Variable(initial_value=initial,
                                        name='b_fc_loc2')

                h_fc_loc2 = tf.nn.tanh(
                    tf.matmul(h_fc_loc1, W_fc_loc2) + b_fc_loc2)

            # %% We'll create a spatial transformer module to identify discriminative
            # %% patches
            out_size = (width, height)
            h_trans = transformer(x, h_fc_loc2, out_size)
            h_trans = tf.reshape(
                h_trans, [self.hps.batch_size, width, height, in_filters])
        return h_trans
Exemple #7
0
    def stp_transformation(self, prev_image, stp_input, num_masks, reuse= None, suffix = None):
        """Apply spatial transformer predictor (STP) to previous image.

        Args:
          prev_image: previous image to be transformed.
          stp_input: hidden layer to be used for computing STN parameters.
          num_masks: number of masks and hence the number of STP transformations.
        Returns:
          List of images transformed by the predicted STP parameters.
        """
        # Only import spatial transformer if needed.
        from spatial_transformer import transformer

        identity_params = tf.convert_to_tensor(
            np.array([1.0, 0.0, 0.0, 0.0, 1.0, 0.0], np.float32))
        transformed = []
        trafos = []
        for i in range(num_masks):
            params = slim.layers.fully_connected(
                stp_input, 6, scope='stp_params' + str(i) + suffix,
                activation_fn=None,
                reuse= reuse) + identity_params
            outsize = (prev_image.get_shape()[1], prev_image.get_shape()[2])
            transformed.append(transformer(prev_image, params, outsize))
            trafos.append(params)

        return transformed, trafos
Exemple #8
0
    def _spatial_transformer(self, name, x, in_filters, arr_out_filters):
        width = x.get_shape().as_list()[1]
        height = x.get_shape().as_list()[2]

        with tf.variable_scope(name):
            W_fc_loc1 = weight_variable([width * height * in_filters, arr_out_filters[2]])
            b_fc_loc1 = bias_variable([arr_out_filters[2]])

            W_fc_loc2 = weight_variable([arr_out_filters[2], 6])
            # Use identity transformation as starting point
            initial = np.array([[1., 0, 0], [0, 1., 0]])
            initial = initial.astype('float32')
            initial = initial.flatten()
            b_fc_loc2 = tf.Variable(initial_value=initial, name='b_fc_loc2')

            x_reshape = tf.reshape(x, [self.hps.batch_size, -1])
            # %% Define the two layer localisation network
            h_fc_loc1 = tf.nn.tanh(tf.matmul(x_reshape, W_fc_loc1) + b_fc_loc1)
            h_fc_loc1 = self._batch_norm2(name, h_fc_loc1)
            h_fc_loc1 = self._relu(h_fc_loc1)

            h_fc_loc1 = slim.dropout(h_fc_loc1, self._dropout,
                                     is_training=(self.mode == 'train'
                                                  and self._dropout
                                                  and self._dropout > 0),
                                     scope='dropout')

            h_fc_loc2 = tf.nn.tanh(tf.matmul(h_fc_loc1, W_fc_loc2) + b_fc_loc2)

            # %% We'll create a spatial transformer module to identify discriminative
            # %% patches
            out_size = (width, height)
            h_trans = transformer(x, h_fc_loc2, out_size)
            h_trans = tf.reshape(h_trans, [self.hps.batch_size, width, height, in_filters])
        return h_trans
Exemple #9
0
def stp_transformation(prev_image, stp_input, num_masks):
    """Apply spatial transformer predictor (STP) to previous image.

  Args:
    prev_image: previous image to be transformed.
    stp_input: hidden layer to be used for computing STN parameters.
    num_masks: number of masks and hence the number of STP transformations.
  Returns:
    List of images transformed by the predicted STP parameters.
  """
    # Only import spatial transformer if needed.
    import sys
    sys.path.append('../transformer')
    from spatial_transformer import transformer

    height = int(prev_image.get_shape()[1])
    width = int(prev_image.get_shape()[2])

    identity_params = tf.convert_to_tensor(
        np.array([1.0, 0.0, 0.0, 0.0, 1.0, 0.0], np.float32))
    transformed = []
    for i in range(num_masks - 1):
        params = slim.layers.fully_connected(
            stp_input, 6, scope='stp_params' + str(i),
            activation_fn=None) + identity_params
        transformed.append(transformer(prev_image, params, (height, width)))

    return transformed
Exemple #10
0
def stp_transformation(prev_image, stp_input, num_masks):
  """Apply spatial transformer predictor (STP) to previous image.
  将STP应用到先前图像

  Args:
    prev_image: previous image to be transformed.
    先前图像
    stp_input: hidden layer to be used for computing STN parameters.
    用于计算STP的隐藏层
    num_masks: number of masks and hence the number of STP transformations.掩码的数量以及STP转换的数量
  Returns:
    List of images transformed by the predicted STP parameters.
    由预测的STP参数转换的图像列表
  """
  # Only import spatial transformer if needed.
  from spatial_transformer import transformer

  identity_params = tf.convert_to_tensor(
      np.array([1.0, 0.0, 0.0, 0.0, 1.0, 0.0], np.float32))
  transformed = []
  for i in range(num_masks - 1):
    params = slim.layers.fully_connected(
        stp_input, 6, scope='stp_params' + str(i),
        activation_fn=None) + identity_params
    transformed.append(transformer(prev_image, params))

  return transformed
Exemple #11
0
 def generator_theta(self, i1i2):
     theta = self.theta_generator_small(i1i2)
     i2hat, flow = transformer(U=i1i2[..., 0:3],
                               theta=theta,
                               out_size=[self.h, self.w],
                               mode=self.config.transform,
                               name='g_transformer')
     return i2hat, flow
Exemple #12
0
 def zoom_image(self, x, new_height, new_width):
     assert len(x.shape) == 4
     delta = tf.zeros((tf.shape(x)[0], 2, new_height * new_width))
     zoomed_x = spatial_transformer.transformer(x, delta,
                                                (new_height, new_width))
     return tf.reshape(
         zoomed_x,
         [tf.shape(x)[0], new_height, new_width, x.shape[-1].value])
Exemple #13
0
 def generator_flow(self, i1i2):
     flow = self.flow_generator_flownet(i1i2)
     i2hat = transformer(U=i1i2[..., 0:3],
                         flow=flow,
                         out_size=[self.h, self.w],
                         mode='Flow',
                         name='g_transformer')
     return i2hat, flow
Exemple #14
0
    def warp_image(self, x, u, v):
        assert len(x.shape) == 4
        assert len(u.shape) == 3
        assert len(v.shape) == 3
        u = u / x.shape[2].value * 2
        v = v / x.shape[1].value * 2

        delta = tf.concat(axis=1, values=[u, v])
        return spatial_transformer.transformer(x, delta, (x.shape[-3].value, x.shape[-2].value))
Exemple #15
0
 def stn_img(self, batch_img):
     feature_map = self.flatten1(batch_img)
     feature_map = self.fc1(feature_map)
     feature_map = self.dropout1(feature_map)
     feature_map = self.fc2(feature_map)
     feature_map = transformer(U=batch_img,
                               theta=feature_map,
                               out_size=(40, 40))
     return feature_map
Exemple #16
0
def stn_idsia_inference_type2(batch_x):
    with tf.name_scope('stn_network_t2'):
        stn_output = stn_locnet_type2(stn_convolve_pool_flatten_type2(batch_x))
        transformed_batch_x = transformer(batch_x, stn_output, (IMAGE_SIZE, IMAGE_SIZE, TF_CONFIG['channels']))

    with tf.name_scope('idsia_classifier'):
        features, batch_act = idsia_convolve_pool_flatten(transformed_batch_x, multiscale=True)
        logits = idsia_fc_logits(features, multiscale=True)

    return logits, transformed_batch_x, batch_act
Exemple #17
0
def spatialTransformer(x_img, num_regions):
    """ Create spatial transformer network and return output tensor.

    Args:
    x_img: tensor
        The input image in NCHW format
    num_regions: int
        The number of region proposals generated by the network.

    The dropout probability defaults to `keep_prob=1.0`. The placeholder that
    controls it is `transformer/keep_prob:0`.

    Returns:
    image: tenser
        It has the same shape as the input `x_img`
    """
    # Sanity check: must be a valid NCHW image.
    assert len(x_img.shape) == 4
    _, chan, height, width = x_img.shape.as_list()

    with tf.variable_scope('transformer'):
        # Setup the two-layer localisation network to figure out the
        # parameters for an affine transformation of the input.
        kp = tf.placeholder_with_default(1.0, None, 'keep_prob')

        # Do nothing if the transformer was disabled.
        if num_regions in [None, 0]:
            return x_img

        # Spatial transformer uses NHWC format.
        x_img = tf.transpose(x_img, [0, 2, 3, 1])

        # Create variables for fully connected layer.
        W1, b1 = weights([chan * height * width,
                          num_regions]), bias([num_regions])

        # Weights and bias for spatial transform matrix. Initialise to identity.
        W2 = weights([num_regions, 6])
        initial = np.array([[1, 0, 0], [0, 1, 0]]).astype(np.float32).flatten()
        b2 = tf.Variable(initial_value=initial, name='b2')

        # Define the two layer localisation network.
        x_flat = tf.reshape(x_img, [-1, chan * height * width])
        h1 = tf.nn.tanh(tf.matmul(x_flat, W1) + b1)
        h1_drop = tf.nn.dropout(h1, keep_prob=kp)
        h2 = tf.nn.tanh(tf.matmul(h1_drop, W2) + b2)

        # We'll create a spatial transformer module to identify
        # discriminate patches
        out_flat = spatial_transformer.transformer(x_img, h2, (height, width))
        out_img = tf.reshape(out_flat, [-1, height, width, chan])

        # Return image as NCHW.
        return tf.transpose(out_img, [0, 3, 1, 2])
Exemple #18
0
 def stn_only(self, img, flow):
     b, h, w, c = flow.shape
     img = tf.convert_to_tensor(img)
     flow = tf.convert_to_tensor(flow)
     interp = transformer(U=img,
                          flow=flow,
                          out_size=[h, w],
                          mode='Flow',
                          name='stn_only')
     out_interp = self.sess.run([interp])
     return out_interp
Exemple #19
0
    def obj_ll(self, images, z_where):
        num_steps = self.conf.num_steps
        patch_h, patch_w = self.conf.patch_height, self.conf.patch_width
        n, scene_h, scene_w, chans = map(int, images.shape)

        # Extract object patches (also referred to as y)
        patches, object_scores = stn.batch_transformer(images, z_where,
                                                       [patch_h, patch_w])
        patches = tf.identity(patches, name='y')

        # Compute background score iteratively by 'cutting out' each object
        cur_bg_score = tf.ones_like(object_scores[:, 0])
        bg_maps = [cur_bg_score]
        obj_visible = []
        for step in range(num_steps):
            # Everything outside the scene is unobserved -> pad bg_score with zeros
            padded_bg_score = tf.pad(cur_bg_score, [[0, 0], [1, 1], [1, 1]])
            padded_bg_score = tf.expand_dims(padded_bg_score, -1)
            shifted_z_where = z_where[:, step] + [0., 0., 1., 0., 0., 1.]
            vis, _ = stn.transformer(padded_bg_score, shifted_z_where,
                                     [patch_h, patch_w])
            obj_visible.append(vis[..., 0])

            cur_bg_score *= 1 - object_scores[:, step]
            # cur_bg_score = tf.clip_by_value(cur_bg_score, 0.0, 1.0)
            bg_maps.append(cur_bg_score)

        tf.identity(cur_bg_score, name='bg_score')
        obj_visible = tf.stack(obj_visible, axis=1)
        overlap_ratio = 1 - tf.reduce_mean(obj_visible, axis=[2, 3])

        flattened_patches = tf.reshape(
            patches, [n * num_steps, patch_h * patch_w * chans])
        spn_input = flattened_patches

        pixels_visible = tf.reshape(obj_visible,
                                    [n, num_steps, patch_h * patch_w, 1])
        channels_visible = tf.tile(pixels_visible, [1, 1, 1, chans])
        channels_visible = tf.reshape(
            channels_visible, [n, num_steps, patch_h * patch_w * chans])
        channels_visible = tf.identity(channels_visible, name='obj_vis')
        marginalize = 1 - channels_visible
        marginalize = tf.reshape(marginalize,
                                 [n * num_steps, patch_h * patch_w * chans])

        spn_output = self.obj_spn.forward(spn_input, marginalize)
        p_ys = spn_output[:,
                          0]  # tf.reduce_logsumexp(spn_output + tf.log(0.1), axis=1)
        p_ys = tf.reshape(p_ys, [n, num_steps])
        # Scale by patch size to approximate a calibrated likelihood over x
        p_ys *= z_where[:, :, 0] * z_where[:, :, 4]

        return p_ys, bg_maps, overlap_ratio
def get_STL(path, num_batch):
    h = 384
    w = 384
    im = cv2.imread(path[0])
    im = im / 255.
    im = cv2.resize(im, (w, h), interpolation=cv2.INTER_CUBIC)

    im = im.reshape(1, h, w, 3)
    im = im.astype('float32')

    batch = np.append(im, im, axis=0)
    for p in path:
        im = cv2.imread(p)
        im = im / 255.
        im = cv2.resize(im, (w, h), interpolation=cv2.INTER_CUBIC)
        im = im.reshape(1, h, w, 3)
        im = im.astype('float32')
        batch = np.append(batch, im, axis=0)

    batch = batch[2:, :, :, :]

    out_size = (h, w)

    # %% Simulate batch
    x = tf.placeholder(tf.float32, [None, h, w, 3])
    x = tf.cast(batch, 'float32')

    # %% Create localisation network and convolutional layer
    with tf.variable_scope('spatial_transformer_0'):

        # %% Create a fully-connected layer with 6 output nodes
        n_fc = 6
        W_fc1 = tf.Variable(tf.zeros([h * w * 3, n_fc]), name='W_fc1')

        # %% Zoom into the image
        a, b, c, d, e, f = np.random.random(6) / 10

        initial = np.array([[1 - a, b, c], [d, 1 - e, f]])
        initial = initial.astype('float32')
        initial = initial.flatten()

        b_fc1 = tf.Variable(initial_value=initial, name='b_fc1')
        h_fc1 = tf.matmul(tf.zeros([num_batch, h * w * 3]), W_fc1) + b_fc1
        h_trans = transformer(x, h_fc1, out_size)

    # %% Run session
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        y = sess.run(h_trans, feed_dict={x: batch})
        sess.close()

    return y
Exemple #21
0
    def _spatial_transformer(self, name, x, in_filters, arr_out_filters):
        width = x.get_shape().as_list()[1]
        height = x.get_shape().as_list()[2]

        with tf.variable_scope(name):
            _x = MaxPooling2D(x, name='pool1')
            with tf.variable_scope('conv_1'):
                _x = Conv2D(_x,
                            in_filters,
                            5,
                            arr_out_filters[0],
                            name='conv_1')
                # _x = BatchNormalization(_x, self.mode == 'train', name='batch1')
                _x = MaxPooling2D(_x, use_relu=True, name='pool2')

            with tf.variable_scope('conv_2'):
                _x = Conv2D(_x,
                            arr_out_filters[0],
                            5,
                            arr_out_filters[1],
                            name='conv_2')
                # _x = BatchNormalization(_x, self.mode == 'train', name='batch2')
                _x = MaxPooling2D(_x, use_relu=True, name='pool3')

            with tf.variable_scope('fc1'):
                _x_flat, _x_size = Flatten(_x)
                _x = Dense(_x_flat,
                           _x_size,
                           arr_out_filters[2],
                           use_relu=True,
                           name='fc1')

            _x = slim.dropout(_x,
                              self._dropout,
                              is_training=(self.mode == 'train'
                                           and self._dropout > 0),
                              scope='dropout')

            with tf.variable_scope('fc2'):
                _x = Dense(_x,
                           arr_out_filters[2],
                           6,
                           use_relu=False,
                           trans=True,
                           name='fc2')

            out_size = (width, height)
            h_trans = transformer(x, _x, out_size)
            h_trans = tf.reshape(
                h_trans, [self.hps.batch_size, width, height, in_filters])
        return h_trans
Exemple #22
0
def model_sin():
    """
    Create model and return tensors necessary to run the model.

    Creates both training and testing phase tensors.
    """
    x = tf.placeholder(tf.float32, [None, 28, 28, 1])
    y_ = tf.placeholder(tf.float32, shape=[None, 10])
    if RESTRICT_ROTATE:
        initial = 0.0
        theta = tf.Variable(initial_value=initial, name='theta')
        sin = tf.sin(theta)
        cos = tf.cos(theta)
        rot_matrix = [cos, -sin, tf.constant(0.0),
                      sin, cos, tf.constant(0.0)]
    else:
        initial = np.array([[1, 0, 0], [0, 1, 0]])
        initial = initial.astype('float32')
        initial = initial.flatten()
        theta = tf.Variable(initial_value=initial, name='theta')
        rot_matrix = tf.identity(theta)
    h_fc1 = tf.zeros([1, 6]) + rot_matrix  # takes advantage of TF's broadcast
    transformed_x = transformer(x, h_fc1, (28, 28))
    if MODEL == 'LENET':
        net, model_var_dict = lenet(transformed_x)
    elif MODEL == 'BEGINNER':
        transformed_x = tf.reshape(transformed_x, (1, 28, 28, 1))
        W, b, net, model_var_dict = beginner(transformed_x)
    elif MODEL == 'SMALL_FNN':
        transformed_x = tf.reshape(transformed_x, (1, 784))
        net, model_var_dict = small_fnn(transformed_x)
    y = tf.nn.softmax(net)

    # test phase tensors
    test_cross_entropy = tf.reduce_mean(-tf.reduce_sum(y * tf.log(y),
                                   reduction_indices=[1]))
    test_opt = tf.train.GradientDescentOptimizer(10 ** -2)
    test_train_step = test_opt.minimize(test_cross_entropy,
                              var_list=[theta])
    # train phase tensors
    train_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(y, y_)
    train_opt = tf.train.GradientDescentOptimizer(10 ** -2)
    theta_train_step = train_opt.minimize(train_cross_entropy,
                              var_list=model_var_dict.values())

    # return all tensors since references are required to run operations
    return (x, y_, theta, rot_matrix, h_fc1, transformed_x, net,
            model_var_dict, y, test_cross_entropy, test_train_step,
            train_cross_entropy, train_opt, theta_train_step)
    def build_network(self):

        self.X = tf.placeholder(
            tf.float32,
            [self.batch_size, self.img_height, self.img_width, self.channel],
            name='images')
        self.detection = tf.placeholder(tf.float32, [self.batch_size, 2],
                                        name='detection')
        self.landmarks = tf.placeholder(tf.float32, [self.batch_size, 42],
                                        name='landmarks')
        self.visibility = tf.placeholder(tf.float32, [self.batch_size, 21],
                                         name='visibility')
        self.pose = tf.placeholder(tf.float32, [self.batch_size, 3],
                                   name='pose')
        self.gender = tf.placeholder(tf.float32, [self.batch_size, 2],
                                     name='gender')

        theta = self.localization_squeezenet(self.X)
        self.T_mat = tf.reshape(theta, [-1, 2, 3])
        self.cropped = transformer(self.X, self.T_mat,
                                   [self.out_height, self.out_width])

        net_output = self.hyperface(
            self.cropped
        )  # (out_detection, out_landmarks, out_visibility, out_pose, out_gender)

        loss_detection = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(net_output[0],
                                                    self.detection))

        visibility_mask = tf.reshape(
            tf.tile(tf.expand_dims(self.visibility, axis=2), [1, 1, 2]),
            [self.batch_size, -1])
        loss_landmarks = tf.reduce_mean(
            tf.square(visibility_mask * (net_output[1] - self.landmarks)))

        loss_visibility = tf.reduce_mean(
            tf.square(net_output[2] - self.visibility))
        loss_pose = tf.reduce_mean(tf.square(net_output[3] - self.pose))
        loss_gender = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(net_output[4],
                                                    self.gender))

        self.loss = self.weight_detect*loss_detection + self.weight_landmarks*loss_landmarks  \
           + self.weight_visibility*loss_visibility + self.weight_pose*loss_pose  \
           + self.weight_gender*loss_gender
Exemple #24
0
 def call(self, batch_img):
     """
     フィルタ同士を繋げ,ネットワークを生成する
     """
     feature_map = self.flatten1(batch_img)
     feature_map = self.fc1(feature_map)
     feature_map = self.dropout1(feature_map)
     feature_map = self.fc2(feature_map)
     feature_map = transformer(U=batch_img,
                               theta=feature_map,
                               out_size=(40, 40))
     #        feature_map = batch_img###############
     feature_map = self.conv1(feature_map)
     feature_map = self.conv2(feature_map)
     feature_map = self.flatten2(feature_map)
     feature_map = self.fc3(feature_map)
     feature_map = self.fc4(feature_map)
     return feature_map
Exemple #25
0
def gen_STN():
    #x = np.reshape(np.arange(5*5*1),(1,5,5,1)).astype(np.float32)
    sess = tf.Session()
    U = tf.range(3 * 6 * 8 * 3)
    U = tf.reshape(U, [3, 6, 8, 3])  #input is NCHW

    theta = tf.constant([[1, 0, 0, 0, 1, 0], [1, 0, 0, 0, 1, 0],
                         [1, 0, 0, 0, 1, 0]])
    output = transformer(U, theta, (6, 8))

    dump_data(sess.run(U), "U.data", fmt="binary", data_type="float32")
    dump_data(sess.run(U), "U.txt", fmt="float", data_type="float32")

    dump_data(sess.run(theta), "theta.data", fmt="binary", data_type="float32")
    dump_data(sess.run(theta), "theta.txt", fmt="float", data_type="float32")

    dump_data(sess.run(output),
              "output.data",
              fmt="binary",
              data_type="float32")
    dump_data(sess.run(output), "output.txt", fmt="float", data_type="float32")
Exemple #26
0
 def transform(self, inputs, out_height, out_width):
     net = tf.layers.conv2d(inputs, 64, 7, 3, activation=tf.nn.relu)
     net = tf.layers.max_pooling2d(net, 3, 2)
     net = tf.layers.conv2d(net, 128, 3, activation=tf.nn.relu)
     net = tf.layers.conv2d(net, 128, 3, activation=tf.nn.relu)
     net = tf.layers.max_pooling2d(net, 3, 2)
     net = tf.layers.conv2d(net, 256, 3, activation=tf.nn.relu)
     net = tf.layers.conv2d(net, 256, 3, activation=tf.nn.relu)
     net = tf.layers.conv2d(net, 128, 3, activation=tf.nn.relu)
     net = tf.layers.conv2d(net, 128, 3, activation=tf.nn.relu)
     net = tf.reduce_mean(net, [1, 2], name='global_pool')
     net = tf.layers.dense(net, 20, activation=tf.nn.tanh)
     net = tf.layers.dropout(net, 0.8)
     initial = np.array([[1., 0, 0], [0, 1., 0]])
     initial = initial.astype('float32')
     initial = initial.flatten()
     net = tf.layers.dense(net, 6, activation=tf.nn.tanh, bias_initializer=tf.initializers.constant(initial))
     self.localization = net
     for i in range(6):
         tf.summary.scalar("param%d" % i, net[0][i])
     return transformer(inputs, net, (out_height, out_width))
Exemple #27
0
    def transform_isotropic(self, inputs, out_height, out_width):
        net = tf.layers.conv2d(inputs, 64, 7, 3, activation=tf.nn.relu)
        net = tf.layers.max_pooling2d(net, 3, 2)
        net = tf.layers.conv2d(net, 128, 3, activation=tf.nn.relu)
        net = tf.layers.conv2d(net, 128, 3, activation=tf.nn.relu)
        net = tf.layers.max_pooling2d(net, 3, 2)
        net = tf.layers.conv2d(net, 256, 3, activation=tf.nn.relu)
        net = tf.layers.conv2d(net, 256, 3, activation=tf.nn.relu)
        net = tf.layers.conv2d(net, 128, 3, activation=tf.nn.relu)
        net = tf.layers.conv2d(net, 128, 3, activation=tf.nn.relu)
        net = tf.reduce_mean(net, [1, 2], name='global_pool')
        net = tf.layers.dense(net, 36, activation=tf.nn.tanh)
        net = tf.layers.dropout(net, 0.8)

        # net = tf.layers.dense(net, 18, activation=tf.nn.tanh)
        # net = tf.layers.dense(net, 12, activation=tf.nn.tanh)
        net = tf.layers.dense(net, 6, activation=tf.nn.tanh)
        self.localization = net
        # for i in range(12):
        for i in range(2):
            # for i in range(18):
            tf.summary.scalar("param%d" % i, net[0][i])
        # bg, title, credit = tf.split(inputs, [3, 4, 4], 3)
        bg, title = tf.split(inputs, [3, 4], 3)
        tf.summary.image("bg", bg, max_outputs=10)
        tf.summary.image("title", title, max_outputs=10)
        # tf.summary.image("credit", credit, max_outputs=10)
        # bg_p, title_p, credit_p = tf.split(net, 3, 1)
        # bg_p, title_p = tf.split(net, 2, 1)
        mul_c = tf.constant([[0., 0., 0., 0., 1., 1.]], tf.float32, shape=[1, 6])
        add_c = tf.constant([[0.5, 0., 0.5, 0., 0., 0.]], tf.float32, shape=[1, 6])

        # bg_trans = transformer(bg, tf.multiply(bg_p, cont_p), (out_height, out_width))
        theta = net * mul_c + add_c
        self.theta = theta
        title_trans = transformer(title, theta, (out_height, out_width))
        # credit_trans = transformer(credit, tf.multiply(credit_p, cont_p), (out_height, out_width))
        # return bg_trans, title_trans, credit_trans
        # return bg_trans, title_trans
        return title_trans
Exemple #28
0
 def _spatial_transform(self, x):
     ## x shape: [N, W, H, C=1]
     conv1_loc = tf.layers.conv2d(x,
                                  16,
                                  3,
                                  padding='same',
                                  activation=tf.nn.relu,
                                  name='conv1_loc')
     pool1_loc = tf.layers.max_pooling2d(conv1_loc, 2, 2)
     flat_loc = tf.contrib.layers.flatten(pool1_loc)
     fc1_loc = tf.contrib.layers.fully_connected(flat_loc,
                                                 64,
                                                 scope='fc1_loc')
     ac1_loc = tf.nn.tanh(fc1_loc)
     fc2_loc = tf.contrib.layers.fully_connected(ac1_loc,
                                                 6,
                                                 scope='fc2_loc')
     ac2_loc = tf.nn.tanh(fc2_loc)
     stn = st.transformer(x,
                          ac2_loc,
                          out_size=(self._img_height, self._img_width))
     return stn
Exemple #29
0
    def transform_mixed(self, inputs, out_height, out_width):
        net = tf.layers.conv2d(inputs, 64, 7, 3, activation=tf.nn.relu)
        net = tf.layers.max_pooling2d(net, 3, 2)
        net = tf.layers.conv2d(net, 128, 3, activation=tf.nn.relu)
        net = tf.layers.conv2d(net, 128, 3, activation=tf.nn.relu)
        net = tf.layers.max_pooling2d(net, 3, 2)
        net = tf.layers.conv2d(net, 256, 3, activation=tf.nn.relu)
        net = tf.layers.conv2d(net, 256, 3, activation=tf.nn.relu)
        net = tf.layers.conv2d(net, 128, 3, activation=tf.nn.relu)
        net = tf.layers.conv2d(net, 128, 3, activation=tf.nn.relu)
        net = tf.reduce_mean(net, [1, 2], name='global_pool')
        net = tf.layers.dense(net, 36, activation=tf.nn.tanh)
        net = tf.layers.dropout(net, 0.8)
        # initial = np.array([[1., 0, 0], [0, 1., 0], [1., 0, 0], [0, 1., 0], [1., 0, 0], [0, 1., 0]])
        initial = np.array([[1., 0, 0], [0, 1., 0]])
        initial = initial.astype('float32')
        initial = initial.flatten()

        # net = tf.layers.dense(net, 18, activation=tf.nn.tanh, bias_initializer=tf.initializers.constant(initial))
        # net = tf.layers.dense(net, 12, activation=tf.nn.tanh, bias_initializer=tf.initializers.constant(initial))
        net = tf.layers.dense(net, 6, activation=tf.nn.tanh, bias_initializer=tf.initializers.constant(initial))
        self.localization = net
        for i in range(6):
            # for i in range(18):
            tf.summary.scalar("param%d" % i, net[0][i])
        # bg, title, credit = tf.split(inputs, [3, 4, 4], 3)
        bg, title = tf.split(inputs, [3, 4], 3)
        tf.summary.image("bg", bg, max_outputs=10)
        tf.summary.image("title", title, max_outputs=10)
        # tf.summary.image("credit", credit, max_outputs=10)
        # bg_p, title_p, credit_p = tf.split(net, 3, 1)
        # bg_trans = transformer(bg, bg_p, (out_height, out_width))
        self.theta = net
        title_trans = transformer(title, net, (out_height, out_width))
        # credit_trans = transformer(credit, credit_p, (out_height, out_width))
        # return bg_trans, title_trans, credit_trans
        # return bg_trans, title_trans
        return title_trans
Exemple #30
0
def stp_transformation(prev_image, stp_input, num_masks):
  """Apply spatial transformer predictor (STP) to previous image.

  Args:
    prev_image: previous image to be transformed.
    stp_input: hidden layer to be used for computing STN parameters.
    num_masks: number of masks and hence the number of STP transformations.
  Returns:
    List of images transformed by the predicted STP parameters.
  """
  # Only import spatial transformer if needed.
  from spatial_transformer import transformer

  identity_params = tf.convert_to_tensor(
      np.array([1.0, 0.0, 0.0, 0.0, 1.0, 0.0], np.float32))
  transformed = []
  for i in range(num_masks - 1):
    params = slim.layers.fully_connected(
        stp_input, 6, scope='stp_params' + str(i),
        activation_fn=None) + identity_params
    transformed.append(transformer(prev_image, params))

  return transformed
def spTrans(x_tensor,width, height, channels, n_loc,keep_prob):
    resolution = width * height * channels
    W_fc_loc1 = weight_variable([resolution, n_loc])
    b_fc_loc1 = bias_variable([n_loc])

    W_fc_loc2 = weight_variable([n_loc, 6])
    # Use identity transformation as starting point
    initial = np.array([[1., 0, 0], [0, 1., 0]])
    initial = initial.astype('float32')
    initial = initial.flatten()
    b_fc_loc2 = tf.Variable(initial_value=initial, name='b_fc_loc2')

    # Two layer localisation network
    h_fc_loc1 = tf.nn.tanh(tf.matmul(x, W_fc_loc1) + b_fc_loc1)
    # dropout (reduce overfittin)
    h_fc_loc1_drop = tf.nn.dropout(h_fc_loc1, keep_prob)
    # %% Second layer
    h_fc_loc2 = tf.nn.tanh(tf.matmul(h_fc_loc1_drop, W_fc_loc2) + b_fc_loc2)
    # spatial transformer
    out_size = (width, height)
    h_trans = transformer(x_tensor, h_fc_loc2, out_size)

    return h_trans, b_fc_loc2, h_fc_loc2
Exemple #32
0
    def inference(self):
        self.base_network = layers.base_network(self.img, self.training, 'base_network')
        self.intermediate_layer = layers.intermediate_layer(self.base_network, self.training, 'intermediate_layer')
        self.logits_cls = layers.clf_layer(self.intermediate_layer, self.training,'cls_layer')
        self.scores_cls = tf.nn.sigmoid(self.logits_cls)

        reg = layers.reg_layer(self.intermediate_layer,self.training, 'reg_layer')
        
        self.tx, self.ty, self.tw, self.th = self.parameterize(reg)

        # Faster RCNN additional layers
        scores_cls_flat = tf.reshape(self.logits_cls,[-1, self.scores_cls.shape[1]*self.scores_cls.shape[2]])
        
        # Find the top 2 iou-score locations in each of the batch
        self.values, self.indices = tf.nn.top_k(scores_cls_flat, k=2, sorted=True, name=None)
        self.ind1, self.ind2 = self.indices[:,0:1], self.indices[:,1:2]
        self.ind1 = tf.concat([tf.reshape(tf.range(self.batch_size),[-1,1]), self.ind1], 1)
        self.ind2 = tf.concat([tf.reshape(tf.range(self.batch_size),[-1,1]), self.ind2], 1)
        
        x1, y1, w1, h1 = self.gather(reg[:,:,:,0:1], self.ind1), self.gather(reg[:,:,:,1:2], self.ind1), self.gather(reg[:,:,:,2:3], self.ind1), self.gather(reg[:,:,:,3:4], self.ind1)
        x2, y2, w2, h2 = self.gather(reg[:,:,:,0:1], self.ind2), self.gather(reg[:,:,:,1:2], self.ind2), self.gather(reg[:,:,:,2:3], self.ind2), self.gather(reg[:,:,:,3:4], self.ind2)
        
        x, y, w, h = tf.concat([x1,x2], axis=0), tf.concat([y1,y2], axis=0), tf.concat([w1,w2], axis=0), tf.concat([h1,h2], axis=0)
        
        theta = tf.concat([w*16/128.0, 0.0*w, (x*16 - 64)/64.0, 0.0*h, h*16/128, (y*16 - 64)/64.0],axis=1)
        img = tf.concat([self.base_network, self.base_network], 0)
        label1, label2 = self.gather(self.label, self.ind1), self.gather(self.label, self.ind2)
        label = tf.concat([label1, label2], 0)
        label = tf.one_hot(label, self.n_classes, on_value=1.0, off_value=0.0, axis=-1)
        self.one_hot_label = tf.reshape(label, [-1, self.n_classes])
       
        spatial_transformer_out = spatial_transformer.transformer(img, theta, out_size=(4,4))
        spatial_transformer_out = tf.reshape(spatial_transformer_out, [-1,4,4,128])
       
        self.logits = layers.faster_rcnn(spatial_transformer_out, self.training, 'faster_rcnn',self.n_classes)
        print('hi!')
W_fc_loc2 = weight_variable([20, 6])
initial = np.array([[1.,0, 0],[0,1.,0]]) # Use identity transformation as starting point
initial = initial.astype('float32')
initial = initial.flatten()
b_fc_loc2 = tf.Variable(initial_value=initial, name='b_fc_loc2')

# %% Define the two layer localisation network
h_fc_loc1 = tf.nn.tanh(tf.matmul(x, W_fc_loc1) + b_fc_loc1)
# %% We can add dropout for regularizing and to reduce overfitting like so:
keep_prob = tf.placeholder(tf.float32)
h_fc_loc1_drop = tf.nn.dropout(h_fc_loc1, keep_prob)
# %% Second layer
h_fc_loc2 = tf.nn.tanh(tf.matmul(h_fc_loc1_drop, W_fc_loc2) + b_fc_loc2)

# %% We'll create a spatial transformer module to identify discriminative patches
h_trans = transformer(x_tensor, h_fc_loc2, downsample_factor=1)

# %% We'll setup the first convolutional layer
# Weight matrix is [height x width x input_channels x output_channels]
filter_size = 3
n_filters_1 = 16
W_conv1 = weight_variable([filter_size, filter_size, 1, n_filters_1])

# %% Bias is [output_channels]
b_conv1 = bias_variable([n_filters_1])

# %% Now we can build a graph which does the first layer of convolution:
# we define our stride as batch x height x width x channels
# instead of pooling, we use strides of 2 and more layers
# with smaller filters.
Exemple #34
0
    def zoom_image(self, x, new_height, new_width):
        assert len(x.shape) == 4

        delta = tf.zeros((tf.shape(x)[0], 2, new_height * new_width))
        zoomed_x = spatial_transformer.transformer(x, delta, (new_height, new_width))
        return tf.reshape(zoomed_x, [tf.shape(x)[0], new_height, new_width, x.shape[-1].value])
Exemple #35
0
# %% Simulate batch
batch = np.append(im, im, axis=0)
batch = np.append(batch, im, axis=0)
num_batch = 3

x = tf.placeholder(tf.float32, [None, 1200, 1600, 3])
x = tf.cast(batch, 'float32')

# %% Create localisation network and convolutional layer
with tf.variable_scope('spatial_transformer_0'):

    # %% Create a fully-connected layer with 6 output nodes
    n_fc = 6
    W_fc1 = tf.Variable(tf.zeros([1200 * 1600 * 3, n_fc]), name='W_fc1')

    # %% Zoom into the image
    initial = np.array([[0.5, 0, 0], [0, 0.5, 0]])
    initial = initial.astype('float32')
    initial = initial.flatten()

    b_fc1 = tf.Variable(initial_value=initial, name='b_fc1')
    h_fc1 = tf.matmul(tf.zeros([num_batch, 1200 * 1600 * 3]), W_fc1) + b_fc1
    h_trans = transformer(x, h_fc1, out_size)

# %% Run session
sess = tf.Session()
sess.run(tf.initialize_all_variables())
y = sess.run(h_trans, feed_dict={x: batch})

# plt.imshow(y[0])
Exemple #36
0
initial = initial.astype('float32')
initial = initial.flatten()
b_fc_loc2 = tf.Variable(initial_value=initial, name='b_fc_loc2')

# %% Define the two layer localisation network
h_fc_loc1 = tf.nn.tanh(tf.matmul(x, W_fc_loc1) + b_fc_loc1)
# %% We can add dropout for regularizing and to reduce overfitting like so:
keep_prob = tf.placeholder(tf.float32)
h_fc_loc1_drop = tf.nn.dropout(h_fc_loc1, keep_prob)
# %% Second layer
h_fc_loc2 = tf.nn.tanh(tf.matmul(h_fc_loc1_drop, W_fc_loc2) + b_fc_loc2)

# %% We'll create a spatial transformer module to identify discriminative
# %% patches
out_size = (40, 40)
h_trans = transformer(x_tensor, h_fc_loc2, out_size)

# %% We'll setup the first convolutional layer
# Weight matrix is [height x width x input_channels x output_channels]
filter_size = 3
n_filters_1 = 16
W_conv1 = weight_variable([filter_size, filter_size, 1, n_filters_1])

# %% Bias is [output_channels]
b_conv1 = bias_variable([n_filters_1])

# %% Now we can build a graph which does the first layer of convolution:
# we define our stride as batch x height x width x channels
# instead of pooling, we use strides of 2 and more layers
# with smaller filters.
batch = np.append(batch, im, axis=0)

num_batch = 3
x = tf.placeholder(tf.float32, [None, 1200, 1600, 3])
x = tf.cast(batch,'float32')

num_batch = 3
x = tf.placeholder(tf.float32, [None, 1200, 1600, 3])
x = tf.cast(batch,'float32')

# Create localisation network and convolutional layer
with tf.variable_scope('spatial_transformer_0'):

    # %% Create a fully-connected layer:
    n_fc = 6 
    W_fc1 = tf.Variable(tf.zeros([1200 * 1600 * 3, n_fc]), name='W_fc1')
    initial = np.array([[0.5,0, 0],[0,0.5,0]]) 
    initial = initial.astype('float32')
    initial = initial.flatten()
    b_fc1 = tf.Variable(initial_value=initial, name='b_fc1')
    x_flatten = tf.reshape(x,[-1,1200 * 1600 * 3])
    #h_fc1 = tf.nn.relu(tf.matmul(x_flatten, W_fc1) + b_fc1)
    h_fc1 = tf.matmul(tf.zeros([num_batch ,1200 * 1600 * 3]), W_fc1) + b_fc1
    h_trans = transformer(x, h_fc1, downsample_factor=2)

# Run session
sess = tf.Session()
sess.run(tf.initialize_all_variables())
y = sess.run(h_trans, feed_dict={x: batch})

plt.imshow(y[0])