Beispiel #1
0
        def body(t, seq_img_trans):
            img = tf.image.per_image_standardization(_in[t])  # standardization
            img_trans = transformer(tf.expand_dims(img, 0), tf.stack([[scale_x, 0., 0., 0., scale_y, 0.]]))

            seq_img_trans = seq_img_trans.write(t, img_trans[0])

            return t + 1, seq_img_trans
Beispiel #2
0
def classifier(images, options, learner='cnn', name='classifier'):
    with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
        #         x = relu(conv2d(images, options.nf, ks=5, s=1, name='conv1'))  # 28*28*nf
        #         if learner == 'stn':
        #             theta = linear(tf.reshape(x, [-1, int(options.input_size * options.input_size * options.nf)]), 128,
        #                            name='loc_linear1')
        #             theta = linear(theta, 6, name='loc_linear2')
        #             x = transformer(x, theta)

        if learner == 'stn':
            theta = linear(tf.layers.flatten(images), 128, name='loc_linear1')
            theta = linear(theta, 6, name='loc_linear2')
            x = transformer(images, theta,
                            [options.input_size, options.input_size])
            x = relu(conv2d(x, options.nf, ks=5, s=1,
                            name='conv1'))  # 28*28*nf
        else:
            x = relu(conv2d(images, options.nf, ks=5, s=1,
                            name='conv1'))  # 28*28*nf

        x = relu(conv2d(x, 2 * options.nf, ks=3, s=2,
                        name='conv2'))  # 14*14*(2*nf)
        x = relu(conv2d(x, 4 * options.nf, ks=3, s=2,
                        name='conv3'))  # 7*7*(4*nf)

        x = linear(tf.layers.flatten(x), 128, name='linear1')
        x = dropout(x, 0.5, options.phase)
        x = linear(x, options.label_n, name='linear2')
        return x
Beispiel #3
0
    def decoder(self, merged_lv, activation, is_training, batch_size):
        with tf.variable_scope('decoder',
                               reuse=tf.AUTO_REUSE,
                               initializer=xavier_initializer_conv2d(),
                               regularizer=l2_regularizer(0.01)):
            d_conv = tf.reshape(merged_lv, [-1, 8, 8, 256])
            d_conv = tf.image.resize_images(d_conv, (16, 16))

            # stn >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
            n_fc = 6
            initial = np.array([[1., 0, 0], [0, 1., 0]])
            initial = initial.astype('float32').flatten()
            Wd_fc1 = tf.Variable(tf.zeros(shape=[16 * 16 * 256, n_fc]),
                                 name='Wdst1_fc1',
                                 validate_shape=False)
            bd_fc1 = tf.Variable(initial_value=initial, name='bdst1_fc1')
            hd_fc1 = tf.matmul(tf.zeros([batch_size, 16 * 16 * 256]),
                               Wd_fc1) + bd_fc1
            hd_trans = transformer(d_conv, hd_fc1)
            # stn <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<

            d_conv = self.coord_conv(hd_trans,
                                     256,
                                     3,
                                     padding='same',
                                     activation=None)
            d_conv = tf.layers.batch_normalization(d_conv,
                                                   training=is_training,
                                                   fused=True)
            d_conv = activation(d_conv)
            d_conv = tf.image.resize_images(d_conv, (32, 32))

            d_conv = self.coord_conv(d_conv,
                                     92,
                                     3,
                                     padding='same',
                                     activation=None)
            d_conv = tf.layers.batch_normalization(d_conv,
                                                   training=is_training,
                                                   fused=True)
            d_conv = activation(d_conv)
            d_conv = tf.image.resize_images(d_conv, (64, 64))

            d_conv = self.coord_conv(d_conv,
                                     48,
                                     3,
                                     padding='same',
                                     activation=None)
            d_conv = tf.layers.batch_normalization(d_conv,
                                                   training=is_training,
                                                   fused=True)
            d_conv = activation(d_conv)
            d_conv = tf.image.resize_images(d_conv, (128, 128))

            d_conv = self.coord_conv(d_conv,
                                     3,
                                     3,
                                     padding='same',
                                     activation=None)
            return d_conv
    def __init__(self, batch_size, image_height, image_width):
        input = Input(shape=(image_height, image_width, 3),
                      batch_size=batch_size
                      )  # 3 is number of channels, we're taking in RGB
        #Channel 1
        x = Conv2D(filters=32, kernel_size=3)(input)
        x = MaxPooling3D(pool_size=(3, 3, 1), strides=(2, 2, 1))(x)
        x = Conv2D(filters=32, kernel_size=5, strides=3, activation="relu")(x)
        x = BatchNormalization()(x)

        #Channel 2
        y = Conv2D(filters=32, kernel_size=5, strides=5,
                   activation="relu")(input)
        y = BatchNormalization()(y)

        #Merge channels
        x = concatenate(inputs=[x, y],
                        axis=-1)  #Channel wise concat, as channel is last dim
        x = Dropout(x, rate=0.5)
        x = Dense(units=32, activation="tanh")(x)
        x = Dense(units=6, activation="tanh")(x)

        out_image = transformer(input, x)

        return out_image
Beispiel #5
0
    def roi_rotate_tensor_pad(self, feature_map, transform_matrixs, box_masks,
                              box_widths):
        with tf.variable_scope("RoIrotate"):
            max_width = box_widths[tf.argmax(box_widths,
                                             0,
                                             output_type=tf.int32)]
            # box_widths = tf.cast(box_widths, tf.float32)
            tile_feature_maps = []
            # crop_boxes = []
            # crop_sizes = []
            # box_inds = []
            map_shape = tf.shape(feature_map)
            map_shape = tf.cast(map_shape, tf.float32)

            for i, mask in enumerate(
                    box_masks
            ):  # box_masks is a list of num of rois in each feature map
                _feature_map = feature_map[i]
                # _crop_box = tf.constant([0, 0, 8/map_shape[0], box_widths[i]/map_shape[1]])
                # _crop_size = tf.constant([8, tf.cast(box_widths[i], tf.int32)])
                _feature_map = tf.expand_dims(_feature_map, axis=0)
                box_nums = tf.shape(mask)[0]
                _feature_map = tf.tile(_feature_map, [box_nums, 1, 1, 1])
                # crop_boxes.append(_crop_box)
                # crop_sizes.append(_crop_size)
                tile_feature_maps.append(_feature_map)
                # box_inds.append(i)

            tile_feature_maps = tf.concat(
                tile_feature_maps, axis=0)  # N' * H * W * C where N' = N * B
            trans_feature_map = transformer(tile_feature_maps,
                                            transform_matrixs)

            box_nums = tf.shape(box_widths)[0]
            pad_rois = tf.TensorArray(tf.float32, box_nums)
            i = 0

            def cond(pad_rois, i):
                return i < box_nums

            def body(pad_rois, i):
                _affine_feature_map = trans_feature_map[i]
                width_box = box_widths[i]
                # _affine_feature_map = tf.expand_dims(_affine_feature_map, 0)
                # roi = tf.image.crop_and_resize(after_transform, [[0, 0, 8/map_shape[0], width_box/map_shape[1]]], [0], [8, tf.cast(width_box, tf.int32)])
                roi = tf.image.crop_to_bounding_box(_affine_feature_map, 0, 0,
                                                    8, width_box)
                pad_roi = tf.image.pad_to_bounding_box(roi, 0, 0, 8, max_width)
                pad_rois = pad_rois.write(i, pad_roi)
                i += 1

                return pad_rois, i

            pad_rois, _ = tf.while_loop(cond, body, loop_vars=[pad_rois, i])
            pad_rois = pad_rois.stack()

            print "pad_rois shape: ", pad_rois

            return pad_rois
Beispiel #6
0
    def call(self, inputs):

        x, x_loc = inputs
        if self.out_dims is None:
            B, H, W, C = x.shape.as_list()
            self.out_dims = (H, W)
        h_trans = transformer(x, x_loc, self.out_dims)
        return h_trans
Beispiel #7
0
    def __call__(self, fixed, moving):
        fixed_features = self.convnet(fixed)
        moving_features = self.convnet(moving)

        params = self.parameter_regressor(fixed_features, moving_features)
        transformation_matrix = self.__transformationMatrix(params)

        warped = transformer(fixed, transformation_matrix)
        return warped
Beispiel #8
0
def stn_block(name, theta, inp):
    with tf.variable_scope(name):
        theta = tf.reshape(theta, (-1, 2 * 3))

        # define loc net weight and bias
        loc_in = 112 * 112 * 3
        loc_out = 6
        W_loc = tf.Variable(tf.zeros([loc_in, loc_out]), name='W_loc')
        b_loc = theta

        # tie everything together
        fc_loc = tf.matmul(tf.zeros([opts['batch_size'] * 12, loc_in]),
                           W_loc) + b_loc  # [B*12, 6]
        op = transformer(inp, fc_loc)

    return op
Beispiel #9
0
    def _stn_layer(self, name_scope, inputs, reuse=False):
        # Flatten inputs
        B1, H1, W1, C1 = inputs.get_shape().as_list()
        fln_inputs = tf.reshape(inputs, [-1, H1 * W1 * C1])
        _, D = fln_inputs.get_shape().as_list()
        # Localization + Spatial Transformer
        with tf.variable_scope(name_scope, reuse=reuse):
            # Localization
            w = tf.get_variable(shape=[D, 6],
                                initializer=self.const_initializer,
                                name='weights')
            b = tf.get_variable(shape=[6],
                                initializer=self.ident_initializer,
                                name='biases')
            theta = tf.nn.tanh(tf.matmul(fln_inputs, w) + b)  # Bx6
            output = transformer(U=inputs, theta=theta, out_size=(H1, W1))

            return output
Beispiel #10
0
def rotate(x, mins, maxes, image_shape=[28, 28, 1]):
    angle = tf.random_uniform(shape=(),
                              minval=mins,
                              maxval=maxes,
                              dtype=tf.float32)
    # Rotation matrix + zero bias term
    theta = [tf.cos(angle), -tf.sin(angle), 0, tf.sin(angle), tf.cos(angle), 0]
    B, H, W, C = x.shape
    # define loc net weight and bias
    loc_in = H * W * C
    loc_out = 6
    W_loc = tf.constant(tf.zeros([loc_in, loc_out]),
                        name='W_loc',
                        trainable=False)
    b_loc = tf.constant(value=theta, name='b_loc', trainable=False)

    # tie everything together
    fc_loc = tf.matmul(tf.zeros([B, loc_in]), W_loc) + b_loc
    h_trans = transformer(x, fc_loc)
    return h_trans
Beispiel #11
0
    def roi_rotate_tensor(self,
                          feature_map,
                          transform_matrixs,
                          box_masks,
                          box_widths,
                          is_debug=False):
        """
		Input:
			feature_map: N * H * W * C
			transform_matrixs: N' * 6
			box_masks: list of tensor N'
			box_widths: N'
		"""
        with tf.variable_scope("RoIrotate"):
            max_width = box_widths[tf.argmax(box_widths,
                                             0,
                                             output_type=tf.int32)]
            box_widths = tf.cast(box_widths, tf.float32)
            tile_feature_maps = []
            # crop_boxes = []
            # crop_sizes = []
            # box_inds = []
            map_shape = tf.shape(feature_map)
            map_shape = tf.cast(map_shape, tf.float32)

            for i, mask in enumerate(
                    box_masks
            ):  # box_masks is a list of num of rois in each feature map
                _feature_map = feature_map[i]
                # _crop_box = tf.constant([0, 0, 8/map_shape[0], box_widths[i]/map_shape[1]])
                # _crop_size = tf.constant([8, tf.cast(box_widths[i], tf.int32)])
                _feature_map = tf.expand_dims(_feature_map, axis=0)
                box_nums = tf.shape(mask)[0]
                _feature_map = tf.tile(_feature_map, [box_nums, 1, 1, 1])
                # crop_boxes.append(_crop_box)
                # crop_sizes.append(_crop_size)
                tile_feature_maps.append(_feature_map)
                # box_inds.append(i)

            tile_feature_maps = tf.concat(
                tile_feature_maps, axis=0)  # N' * H * W * C where N' = N * B
            norm_box_widths = box_widths / map_shape[2]
            ones = tf.ones_like(norm_box_widths)
            norm_box_heights = ones * (8.0 / map_shape[1])
            zeros = tf.zeros_like(norm_box_widths)
            crop_boxes = tf.transpose(
                tf.stack([zeros, zeros, norm_box_heights, norm_box_widths]))
            """
			box_height = ones * 8
			box_height = tf.cast(box_height, tf.int32)
			box_width = ones * max_width
			box_width = tf.cast(box_width, tf.int32)
			"""
            crop_size = tf.transpose(tf.stack([8, max_width]))
            # crop_boxes = tf.stack(crop_boxes, axis=0)
            # crop_sizes = tf.stack(crop_sizes, axis=0)

            trans_feature_map = transformer(tile_feature_maps,
                                            transform_matrixs)

            # box_inds = tf.concat(box_masks, axis=0)
            box_inds = tf.range(tf.shape(trans_feature_map)[0])
            rois = tf.image.crop_and_resize(trans_feature_map, crop_boxes,
                                            box_inds, crop_size)

            pad_rois = tf.image.pad_to_bounding_box(rois, 0, 0, 8, max_width)

            print "pad_rois: ", pad_rois

            return pad_rois
Beispiel #12
0
from stn import spatial_transformer_network as transformer
import numpy as np
import tensorflow as tf

# params
n_fc = 6
B, H, W, C = (2, 200, 200, 3)

# identity transform
initial = np.array([[1., 0, 0], [0, 1., 0]])
initial = initial.astype('float32').flatten()

# input placeholder
x = tf.placeholder(tf.float32, [B, H, W, C])

# localization network
W_fc1 = tf.Variable(tf.zeros([H * W * C, n_fc]), name='W_fc1')
b_fc1 = tf.Variable(initial_value=initial, name='b_fc1')
h_fc1 = tf.matmul(tf.zeros([B, H * W * C]), W_fc1) + b_fc1

# spatial transformer layer
h_trans = transformer(x, h_fc1)
Beispiel #13
0
    def encoder(self, imgs, activation, is_training, batch_size, img_shape,
                channels):
        with tf.variable_scope('encoder',
                               reuse=tf.AUTO_REUSE,
                               initializer=xavier_initializer_conv2d(),
                               regularizer=l2_regularizer(0.01)):
            # stn >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
            n_fc = 6
            initial = np.array([[1., 0, 0], [0, 1., 0]])
            initial = initial.astype('float32').flatten()

            W_fc1 = tf.Variable(
                tf.zeros(shape=[img_shape * img_shape * channels, n_fc]),
                name='W_fc1',
                validate_shape=False)
            b_fc1 = tf.Variable(initial_value=initial, name='b_fc1')
            h_fc1 = tf.matmul(
                tf.zeros([batch_size, img_shape * img_shape * channels]),
                W_fc1) + b_fc1

            h_trans = transformer(imgs, h_fc1)
            # stn <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<

            e_conv = self.coord_conv(h_trans,
                                     48,
                                     3,
                                     padding='same',
                                     activation=None)
            e_conv = tf.layers.batch_normalization(e_conv,
                                                   training=is_training,
                                                   fused=True)
            e_conv = activation(e_conv)
            e_conv = tf.layers.max_pooling2d(e_conv, 2, 2)

            e_conv = self.coord_conv(e_conv,
                                     92,
                                     3,
                                     padding='same',
                                     activation=None)
            e_conv = tf.layers.batch_normalization(e_conv,
                                                   training=is_training,
                                                   fused=True)
            e_conv = activation(e_conv)
            e_conv = tf.layers.max_pooling2d(e_conv, 2, 2)

            e_conv = self.coord_conv(e_conv,
                                     256,
                                     3,
                                     padding='same',
                                     activation=None)
            e_conv = tf.layers.batch_normalization(e_conv,
                                                   training=is_training,
                                                   fused=True)
            e_conv = activation(e_conv)
            e_conv = tf.layers.max_pooling2d(e_conv, 2, 2)

            e_conv = self.coord_conv(e_conv,
                                     256,
                                     3,
                                     padding='same',
                                     activation=None)
            e_conv = tf.layers.batch_normalization(e_conv,
                                                   training=is_training,
                                                   fused=True)
            e_conv = activation(e_conv)
            e_conv = tf.layers.max_pooling2d(e_conv, 2, 2)

            e_conv = self.coord_conv(e_conv,
                                     256,
                                     3,
                                     padding='same',
                                     activation=None)
            e_conv = tf.layers.batch_normalization(e_conv,
                                                   training=is_training,
                                                   fused=True)
            e_conv = activation(e_conv)
            e_conv = tf.layers.max_pooling2d(e_conv, 2, 2)

            e_conv = self.coord_conv(e_conv,
                                     256,
                                     3,
                                     padding='same',
                                     activation=None)
            e_conv = tf.layers.batch_normalization(e_conv,
                                                   training=is_training,
                                                   fused=True)
            e_conv = activation(e_conv)
            e_conv = tf.layers.max_pooling2d(e_conv, 2, 2)

            lv = tf.layers.flatten(e_conv)
            return lv
    def build_model(self):
        # Helper Variables
        self.global_step_tensor = tf.Variable(0, trainable=False, name='global_step')
        self.global_step_inc = self.global_step_tensor.assign(self.global_step_tensor + 1)
        self.global_epoch_tensor = tf.Variable(0, trainable=False, name='global_epoch')
        self.global_epoch_inc = self.global_epoch_tensor.assign(self.global_epoch_tensor + 1)

        # Inputs to the network
        with tf.variable_scope('inputs'):
            self.x, y, self.length, self.lab_length = self.data_loader.get_input()
            self.y = tf.contrib.layers.dense_to_sparse(y, eos_token=-1)
            self.x = tf.expand_dims(self.x, 3)
            # Center Images
            x_shift = (tf.shape(self.x)[2] - self.length) / tf.constant(2)
            y_shift = tf.zeros_like(x_shift)
            translation_vector = tf.cast(tf.stack([x_shift, y_shift], axis=1), tf.float32)
            self.x = tf.contrib.image.translate(self.x, translation_vector)
            self.length = tf.cast(tf.math.ceil(tf.math.divide(self.length, tf.constant(self.reduce_factor))), tf.int32)
            batch_size = tf.shape(self.x)[0]
            self.is_training = tf.placeholder(tf.bool, name='Training_flag')
        tf.add_to_collection('inputs', self.x)
        tf.add_to_collection('inputs', self.length)
        tf.add_to_collection('inputs', self.lab_length)
        tf.add_to_collection('inputs', y)
        tf.add_to_collection('inputs', self.is_training)

        # Define CNN variables
        intitalizer = tf.contrib.layers.xavier_initializer_conv2d()

        out_W = tf.Variable(tf.truncated_normal([2 * self.rnn_num_hidden, self.data_loader.num_classes], stddev=0.1),
                            name='out_W')
        out_b = tf.Variable(tf.constant(0., shape=[self.data_loader.num_classes]), name='out_b')

        # localization network
        W_fc1 = tf.Variable(tf.zeros([self.stn_loc_fc, 6]), name='W_fc1')
        b_fc1 = tf.Variable(initial_value=[1., 0., 0., 0., 1., 0.], name='b_fc1')
        with tf.name_scope('Localization'):
            conv_loc = tf.layers.conv2d(self.x, self.stn_loc_conv_d[0], self.stn_loc_conv_s[0], padding='same')
            conv_loc = tf.nn.leaky_relu(conv_loc)
            conv_loc = tf.layers.max_pooling2d(conv_loc, 2, 2, padding='same')
            conv_loc = tf.layers.conv2d(conv_loc, self.stn_loc_conv_d[1], self.stn_loc_conv_s[1], padding='same')
            conv_loc = tf.nn.leaky_relu(conv_loc)

            fc_loc = tf.reduce_mean(conv_loc, axis=[1, 2])
            fc_loc = tf.layers.dense(fc_loc, self.stn_loc_fc)
            fc_loc = tf.nn.leaky_relu(fc_loc)
            theta = tf.matmul(fc_loc, W_fc1) + b_fc1

        # spatial transformer network
        h_trans = transformer(self.x, theta)

        # CNNs
        with tf.name_scope('CNN_Block_1'):
            conv1_out = tf.layers.dropout(h_trans, self.conv_dropouts[0], tf.concat(
                [tf.reshape(batch_size, [-1]), tf.constant(value=[1, 1, 1])], 0), training=self.is_training)
            conv1_out = tf.layers.conv2d(conv1_out, self.conv_depths[0], self.conv_patch_sizes[0], padding='same',
                                         activation=None, kernel_initializer=intitalizer)
            conv1_out = tf.layers.batch_normalization(conv1_out)
            conv1_out = tf.nn.leaky_relu(conv1_out)
            conv1_out = tf.layers.max_pooling2d(conv1_out, 2, 2, padding='same')

        with tf.name_scope('CNN_Block_2'):
            conv2_out = tf.layers.dropout(conv1_out, self.conv_dropouts[1], noise_shape=tf.concat(
                [tf.reshape(batch_size, [-1]), tf.constant(value=[1, 1, self.conv_depths[0]])], 0), training=self.is_training)
            conv2_out = tf.layers.conv2d(conv2_out, self.conv_depths[1], self.conv_patch_sizes[1], padding='same',
                                         activation=None, kernel_initializer=intitalizer)
            conv2_out = tf.layers.batch_normalization(conv2_out)
            conv2_out = tf.nn.leaky_relu(conv2_out)
            conv2_out = tf.layers.max_pooling2d(conv2_out, 2, 2, padding='same')

        with tf.name_scope('CNN_Block_3'):
            conv3_out = tf.layers.dropout(conv2_out, self.conv_dropouts[2], noise_shape=tf.concat(
                [tf.reshape(batch_size, [-1]), tf.constant(value=[1, 1, self.conv_depths[1]])], 0), training=self.is_training)
            conv3_out = tf.layers.conv2d(conv3_out, self.conv_depths[2], self.conv_patch_sizes[2], padding='same',
                                         activation=None, kernel_initializer=intitalizer)
            conv3_out = tf.layers.batch_normalization(conv3_out)
            conv3_out = tf.nn.leaky_relu(conv3_out)
            conv3_out = tf.layers.max_pooling2d(conv3_out, 2, 2, padding='same')

        with tf.name_scope('CNN_Block_4'):
            conv4_out = tf.layers.dropout(conv3_out, self.conv_dropouts[3], noise_shape=tf.concat(
                [tf.reshape(batch_size, [-1]), tf.constant(value=[1, 1, self.conv_depths[2]])], 0), training=self.is_training)
            conv4_out = tf.layers.conv2d(conv4_out, self.conv_depths[3], self.conv_patch_sizes[3], padding='same',
                                         activation=None, kernel_initializer=intitalizer)
            conv4_out = tf.layers.batch_normalization(conv4_out)
            conv4_out = tf.nn.leaky_relu(conv4_out)

        with tf.name_scope('CNN_Block_5'):
            conv5_out = tf.layers.dropout(conv4_out, self.conv_dropouts[4], noise_shape=tf.concat(
                [tf.reshape(batch_size, [-1]), tf.constant(value=[1, 1, self.conv_depths[3]])], 0), training=self.is_training)
            conv5_out = tf.layers.conv2d(conv5_out, self.conv_depths[4], self.conv_patch_sizes[4], padding='same',
                                         activation=None, kernel_initializer=intitalizer)
            conv5_out = tf.layers.batch_normalization(conv5_out)
            conv5_out = tf.nn.leaky_relu(conv5_out)

        output = tf.transpose(conv5_out, [2, 0, 1, 3])
        output = tf.reshape(output, [-1, batch_size, (self.config.im_height//self.reduce_factor)*self.conv_depths[4]])
        self.length = tf.tile(tf.expand_dims(tf.shape(output)[0], axis=0), [batch_size])

        # RNN
        with tf.variable_scope('MultiRNN', reuse=tf.AUTO_REUSE):
            for i in range(self.rnn_num_layers):
                output = tf.layers.dropout(output, self.rnn_dropout, training=self.is_training)
                lstm = tf.contrib.cudnn_rnn.CudnnLSTM(1, self.rnn_num_hidden, 'linear_input', 'bidirectional')
                output, state = lstm(output)

        # Fully Connected
        with tf.name_scope('Dense'):
            output = tf.concat(output, 2)
            # Linear dropout
            output = tf.layers.dropout(output, self.linear_dropout, training=self.is_training)
            # Reshaping to apply the same weights over the timesteps
            output = tf.reshape(output, [-1, 2*self.rnn_num_hidden])
            # Doing the affine projection
            logits = tf.matmul(output, out_W) + out_b

        # Reshaping back to the original shape
        self.logits = tf.reshape(logits, [-1, batch_size, self.data_loader.num_classes])

        with tf.variable_scope('loss-acc'):
            self.loss = warpctc_tensorflow.ctc(self.logits, self.y.values, self.lab_length, self.length,
                                               self.data_loader.num_classes - 1)
            self.cost = tf.reduce_mean(self.loss)
            self.prediction = tf.nn.ctc_beam_search_decoder(self.logits, sequence_length=self.length,
                                                            merge_repeated=False)
            self.cer = self.calc_cer(self.prediction[0][0], self.y)

        with tf.variable_scope('train_step'):
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                self.train_step = tf.train.RMSPropOptimizer(learning_rate=self.config.learning_rate,
                                                            decay=self.config.learning_rate_decay).minimize(
                    self.loss, global_step=self.global_step_tensor)

        tf.add_to_collection('train', self.train_step)
        tf.add_to_collection('train', self.cost)
        tf.add_to_collection('train', self.cer)
Beispiel #15
0
def spatial_transformer(images,
                        encoder_blocks=[128],
                        is_training=True,
                        reuse=False,
                        is_chief=True,
                        verbose=False,
                        **kwargs):
    """A simple Spatial Transformer Network constrained to TSR style transformation.
    
    Args:
        images: a 4D tensor of input images in [0., 1.]
        encoder_blocks: A list of integers indicating the number of each channel in the encoder. The last layer of the encoder 
            is fully-connected, while the rests are convolutional blocks with leaky ReLU and batch norm.
        is_training: whether we are in training mode or not 
        reuse: Whether to reuse the model variables
        is_chief: whether the model is run by the chief worker
        verbose: verbosity level
        kwargs: remaining keyword arguments (unused here)
        
    Returns:
        A 4D Tensor of images in [0., 1.]
    """
    del is_chief
    del kwargs

    # Use STN from https://github.com/kevinzakka/spatial-transformer-network
    sys.path.append('spatial-transformer-network')
    from stn import spatial_transformer_network as transformer
    with tf.control_dependencies([tf.assert_greater_equal(images, 0.)]):
        with tf.control_dependencies([tf.assert_less_equal(images, 1.)]):
            net = images
            in_dims = images.get_shape().as_list()[1:]

    with tf.variable_scope('localization_network', reuse=reuse):
        ## Encoder
        with tf.contrib.framework.arg_scope(
            [slim.conv2d],
                kernel_size=[3, 3],
                padding='SAME',
                stride=2,
                activation_fn=tf.nn.leaky_relu,
                normalizer_fn=slim.batch_norm,
                normalizer_params={
                    'is_training': is_training,
                    'decay': 0.9,
                    'epsilon': 1e-5
                },
                weights_initializer=tf.random_normal_initializer(0, 0.02)):
            for block_id, block_num_filters in enumerate(encoder_blocks[:-1]):
                scope = 'conv_%d' % (block_id + 1)
                net = slim.conv2d(net, block_num_filters, scope=scope)
                if verbose:
                    print('   \033[34m%s:\033[0m' % scope, net.get_shape())

        ## STN
        # fc 1
        net = tf.layers.flatten(net)
        net = slim.fully_connected(
            net,
            encoder_blocks[-1],
            activation_fn=tf.nn.tanh,
            weights_initializer=tf.zeros_initializer(),
            biases_initializer=tf.truncated_normal_initializer(stddev=0.01))
        if verbose: print('   \033[34mfc1:\033[0m', net.get_shape())
        # rotation angle (init: 0)
        theta = slim.fully_connected(
            net,
            1,
            activation_fn=tf.nn.tanh,
            weights_initializer=tf.zeros_initializer(),
            biases_initializer=tf.truncated_normal_initializer(
                stddev=0.01)) * np.pi
        rotate_matrix = tf.concat([
            tf.cos(theta), -tf.sin(theta),
            tf.zeros(tf.shape(theta)),
            tf.sin(theta),
            tf.cos(theta),
            tf.zeros(tf.shape(theta)),
            tf.zeros(tf.shape(theta)),
            tf.zeros(tf.shape(theta)),
            tf.ones(tf.shape(theta))
        ],
                                  axis=-1)
        rotate_matrix = tf.reshape(rotate_matrix, (-1, 3, 3))
        # translation and scale (init: identity)
        translate_matrix = slim.fully_connected(
            net,
            4,
            activation_fn=None,
            weights_initializer=tf.zeros_initializer(),
            biases_initializer=tf.constant_initializer([1., 1., 0.,
                                                        0.]))  #sx, sy, tx, tx
        translate_matrix = tf.split(translate_matrix, 4, axis=-1)
        translate_matrix = tf.concat([
            translate_matrix[0],
            tf.zeros(tf.shape(theta)), translate_matrix[2],
            tf.zeros(tf.shape(theta)), translate_matrix[1],
            translate_matrix[3],
            tf.zeros(tf.shape(theta)),
            tf.zeros(tf.shape(theta)),
            tf.ones(tf.shape(theta))
        ],
                                     axis=1)
        translate_matrix = tf.reshape(translate_matrix, (-1, 3, 3))
        # final transformation
        transform_matrix = tf.matmul(rotate_matrix, translate_matrix)
        transform_matrix = tf.layers.flatten(transform_matrix)
        transform_matrix = transform_matrix[:, :6]
        images = transformer(images, transform_matrix, out_dims=in_dims)
        images = tf.clip_by_value(images, 0., 1.)
        return images
 def transformer_net(self, x, theta):
     return transformer(x, theta)
Beispiel #17
0
    def call(self, inputs):

        x_loc = self.localisation_net(inputs)
        h_trans = transformer(inputs, x_loc, self.out_dims)
        return h_trans
Beispiel #18
0
loss = tf.reduce_mean(tf.square(para[:, 0] - angle_tensor))
error = tf.reduce_mean(tf.abs(para[:, 0] - angle_tensor) * 180)

update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

from stn import spatial_transformer_network as transformer
pred_image = []
for i in range(16):
    theta = tf.stack(
        [(tf.cos(para[i, 0]), -tf.sin(para[i, 0]), tf.constant(0.0)),
         (tf.sin(para[i, 0]), tf.cos(para[i, 0]), tf.constant(0.0))],
        axis=0)
    iImg = input_img[i, :, :, 0]
    iImg = tf.expand_dims(iImg, axis=0)
    iImg = tf.expand_dims(iImg, axis=3)
    pImg = transformer(iImg, theta, out_dims=[320, 320])
    pred_image.append(pImg)
pred_image = tf.concat(pred_image, axis=0)

optimizer = tf.train.AdamOptimizer(1e-3)

# train = optimizer.minimize(loss)

train_op = optimizer.minimize(loss)
# train_op = tf.group([train_op, update_ops])
with tf.control_dependencies(update_ops):
    train_op = optimizer.minimize(loss)

init = tf.initialize_all_variables()

sess = tf.Session()
Beispiel #19
0
input_img = np.concatenate([img1, img2, img3, img4], axis=0)
B, H, W, C = input_img.shape
print("Input Img Shape: {}".format(input_img.shape))

# identity transform
theta = np.array([[1., 0, 0], [0, 1., 0]])

x = tf.placeholder(tf.float32, [None, H, W, C])

with tf.variable_scope('spatial_transformer'):
    theta = theta.astype('float32')
    theta = theta.flatten()

    # define loc net weight and bias
    loc_in = H * W * C
    loc_out = 6
    W_loc = tf.Variable(tf.zeros([loc_in, loc_out]), name='W_loc')
    b_loc = tf.Variable(initial_value=theta, name='b_loc')

    # tie everything together
    fc_loc = tf.matmul(tf.zeros([B, loc_in]), W_loc) + b_loc
    h_trans = transformer(x, fc_loc)

# run session
sess = tf.Session()
sess.run(tf.global_variables_initializer())
y = sess.run(h_trans, feed_dict={x: input_img})
print("y: {}".format(y.shape))
array2img(y[0]).show()