def testBlockSizeNotDivisibleWidth(self):
   # The block size divides width but not height.
   x_np = [[[[1], [2], [3]],
            [[3], [4], [7]]]]
   block_size = 3
   with self.assertRaises(IndexError):
     _ = tf.space_to_depth(x_np, block_size)
 def testInputWrongDimMissingBatch(self):
   # The input is missing the first dimension ("batch")
   x_np = [[[1], [2]],
           [[3], [4]]]
   block_size = 2
   with self.assertRaises(ValueError):
     _ = tf.space_to_depth(x_np, block_size)
 def testBasic(self):
   x_np = [[[[1], [2]],
            [[3], [4]]]]
   with self.test_session(use_gpu=False):
     block_size = 2
     out_tf = tf.space_to_depth(x_np, block_size)
     self.assertAllEqual(out_tf.eval(), [[[[1, 2, 3, 4]]]])
 def testBlockSizeNotDivisibleBoth(self):
   # The block size does not divide neither width or height.
   x_np = [[[[1], [2]],
            [[3], [4]]]]
   block_size = 3
   with self.assertRaises(IndexError):
     _ = tf.space_to_depth(x_np, block_size)
 def testDepthInterleaved(self):
   x_np = [[[[1, 10], [2, 20]],
            [[3, 30], [4, 40]]]]
   with self.test_session(use_gpu=False):
     block_size = 2
     out_tf = tf.space_to_depth(x_np, block_size)
     self.assertAllEqual(out_tf.eval(), [[[[1, 10, 2, 20, 3, 30, 4, 40]]]])
 def testInputWrongDimMissingDepth(self):
   # The input is missing the last dimension ("depth")
   x_np = [[[1, 2],
            [3, 4]]]
   block_size = 2
   with self.assertRaises(ValueError):
     out_tf = tf.space_to_depth(x_np, block_size)
     out_tf.eval()
 def testBlockSize0(self):
   # The block size is 0.
   x_np = [[[[1], [2]],
            [[3], [4]]]]
   block_size = 0
   with self.assertRaises(ValueError):
     out_tf = tf.space_to_depth(x_np, block_size)
     out_tf.eval()
 def testBlockSizeOne(self):
   # The block size is 1. The block size needs to be > 1.
   x_np = [[[[1], [2]],
            [[3], [4]]]]
   block_size = 1
   with self.assertRaises(ValueError):
     out_tf = tf.space_to_depth(x_np, block_size)
     out_tf.eval()
 def testBlockSizeLarger(self):
   # The block size is too large for this input.
   x_np = [[[[1], [2]],
            [[3], [4]]]]
   block_size = 10
   with self.assertRaises(IndexError):
     out_tf = tf.space_to_depth(x_np, block_size)
     out_tf.eval()
 def testBlockSizeNotDivisibleHeight(self):
   # The block size divides height but not width.
   x_np = [[[[1], [2]],
            [[3], [4]],
            [[5], [6]]]]
   block_size = 3
   with self.assertRaises(IndexError):
     _ = tf.space_to_depth(x_np, block_size)
 def testDepthInterleavedDepth3(self):
   x_np = [[[[1, 2, 3], [4, 5, 6]],
            [[7, 8, 9], [10, 11, 12]]]]
   with self.test_session(use_gpu=False):
     block_size = 2
     out_tf = tf.space_to_depth(x_np, block_size)
     self.assertAllEqual(out_tf.eval(),
                         [[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]])
 def testSpaceToDepthTranspose(self):
     x = np.arange(5 * 10 * 16 * 7, dtype=np.float32).reshape([5, 10, 16, 7])
     block_size = 2
     paddings = np.zeros((2, 2), dtype=np.int32)
     y1 = tf.space_to_batch(x, paddings, block_size=block_size)
     y2 = tf.transpose(tf.space_to_depth(tf.transpose(x, [3, 1, 2, 0]), block_size=block_size), [3, 1, 2, 0])
     with self.test_session():
         self.assertAllEqual(y1.eval(), y2.eval())
 def testBlockSizeNotDivisibleDepth(self):
   # The depth is not divisible by the square of the block size.
   x_np = [[[[1, 1, 1, 1],
             [2, 2, 2, 2]],
            [[3, 3, 3, 3],
             [4, 4, 4, 4]]]]
   block_size = 3
   with self.assertRaises(IndexError):
     _ = tf.space_to_depth(x_np, block_size)
  def testLargerInput4x4(self):
    x_np = [[[[1], [2], [5], [6]],
             [[3], [4], [7], [8]],
             [[9], [10], [13], [14]],
             [[11], [12], [15], [16]]]]

    with self.test_session(use_gpu=False):
      block_size = 4
      out_tf = tf.space_to_depth(x_np, block_size)
      self.assertAllEqual(
          out_tf.eval(),
          [[[[1, 2, 5, 6, 3, 4, 7, 8, 9, 10, 13, 14, 11, 12, 15, 16]]]])
 def testDepthInterleavedLarge(self):
   x_np = [[[[1, 10], [2, 20], [5, 50], [6, 60]],
            [[3, 30], [4, 40], [7, 70], [8, 80]],
            [[9, 90], [10, 100], [13, 130], [14, 140]],
            [[11, 110], [12, 120], [15, 150], [16, 160]]]]
   with self.test_session(use_gpu=False):
     block_size = 2
     out_tf = tf.space_to_depth(x_np, block_size)
     self.assertAllEqual(out_tf.eval(),
                         [[[[1, 10, 2, 20, 3, 30, 4, 40],
                            [5, 50, 6, 60, 7, 70, 8, 80]],
                           [[9, 90, 10, 100, 11, 110, 12, 120],
                            [13, 130, 14, 140, 15, 150, 16, 160]]]])
 def testNonSquare(self):
   x_np = [[[[1, 10], [2, 20]],
            [[3, 30], [4, 40]],
            [[5, 50], [6, 60]],
            [[7, 70], [8, 80]],
            [[9, 90], [10, 100]],
            [[11, 110], [12, 120]]]]
   with self.test_session(use_gpu=False):
     block_size = 2
     out_tf = tf.space_to_depth(x_np, block_size)
     self.assertAllEqual(out_tf.eval(),
                         [[[[1, 10, 2, 20, 3, 30, 4, 40]],
                           [[5, 50, 6, 60, 7, 70, 8, 80]],
                           [[9, 90, 10, 100, 11, 110, 12, 120]]]])
  def _checkGrad(self, x, block_size):
    assert 4 == x.ndim
    with self.test_session():
      tf_x = tf.convert_to_tensor(x)
      tf_y = tf.space_to_depth(tf_x, block_size)
      epsilon = 1e-2
      ((x_jacob_t, x_jacob_n)) = tf.test.compute_gradient(
          tf_x,
          x.shape,
          tf_y,
          tf_y.get_shape().as_list(),
          x_init_value=x,
          delta=epsilon)

    self.assertAllClose(x_jacob_t, x_jacob_n, rtol=1e-2, atol=epsilon)
Example #18
0
 def __call__(self, shape, dtype='float32'):  # tf needs partition_info=None
     shape = list(shape)
     if self.scale == 1:
         return self.initializer(shape)
     new_shape = shape[:3] + [shape[3] // (self.scale ** 2)]
     if type(self.initializer) is dict:
         self.initializer = initializers.deserialize(self.initializer)
     var_x = self.initializer(new_shape, dtype)
     var_x = tf.transpose(var_x, perm=[2, 0, 1, 3])
     var_x = tf.image.resize_nearest_neighbor(
                      var_x,
                      size=(shape[0] * self.scale, shape[1] * self.scale),
                      align_corners=True)
     var_x = tf.space_to_depth(var_x, block_size=self.scale, data_format='NHWC')
     var_x = tf.transpose(var_x, perm=[1, 2, 0, 3])
     return var_x
Example #19
0
def icnr_keras(shape, dtype=None):
    """
    Custom initializer for subpix upscaling
    From https://github.com/kostyaev/ICNR
    Note: upscale factor is fixed to 2, and the base initializer is fixed to random normal.
    """
    # TODO Roll this into ICNR_init when porting GAN 2.2
    shape = list(shape)
    scale = 2
    initializer = tf.keras.initializers.RandomNormal(0, 0.02)

    new_shape = shape[:3] + [int(shape[3] / (scale ** 2))]
    var_x = initializer(new_shape, dtype)
    var_x = tf.transpose(var_x, perm=[2, 0, 1, 3])
    var_x = tf.image.resize_nearest_neighbor(var_x, size=(shape[0] * scale, shape[1] * scale))
    var_x = tf.space_to_depth(var_x, block_size=scale)
    var_x = tf.transpose(var_x, perm=[1, 2, 0, 3])
    return var_x
Example #20
0
def read_and_batchify_image(image_path, shape, image_type="jpg"):
    """Return the original image as read from image_path and the image splitted as a batch tensor.
    Args:
        image_path: image path
        shape: batch shape, like: [no_patches_per_side**2, patch_side, patch_side, 3]
        image_type: image type
    Returns:
        original_image, patches
        where original image is a tensor in the format [widht, height 3]
        and patches is a tensor of processed images, ready to be classified, with size
        [batch_size, w, h, 3]"""

    original_image = read_image(image_path, 3, image_type)

    # extract values from shape
    patch_side = shape[1]
    no_patches_per_side = int(math.sqrt(shape[0]))
    resized_input_side = patch_side * no_patches_per_side

    resized_image = resize_bl(original_image, resized_input_side)

    resized_image = tf.expand_dims(resized_image, 0)
    patches = tf.space_to_depth(resized_image, patch_side)
    print(patches)
    patches = tf.squeeze(patches, [0])  #4,4,192*192*3
    print(patches)
    patches = tf.reshape(patches,
                         [no_patches_per_side**2, patch_side, patch_side, 3])
    print(patches)
    patches_a = tf.split(0, no_patches_per_side**2, patches)
    print(patches_a)
    normalized_patches = []
    for patch in patches_a:
        patch_as_input_image = zm_mp(
            tf.reshape(tf.squeeze(patch, [0]), [patch_side, patch_side, 3]))
        print(patch_as_input_image)
        normalized_patches.append(patch_as_input_image)

    # the last patch is not a "patch" but the whole image resized to patch_side² x 3
    # to give a glance to the whole image, in parallel with the patch analysis
    normalized_patches.append(zm_mp(resize_bl(original_image, patch_side)))
    batch_of_patches = tf.pack(normalized_patches)
    return tf.image.convert_image_dtype(original_image,
                                        tf.uint8), batch_of_patches
Example #21
0
def space_to_depth_x2(x):
    """Thin wrapper for Tensorflow space_to_depth with block_size=2."""
    # Import currently required to make Lambda work.
    # See: https://github.com/fchollet/keras/issues/5088#issuecomment-273851273
    import tensorflow as tf
    return tf.space_to_depth(x, block_size=2)
Example #22
0
def dec_down(
        gs, zs_posterior, training, init = False, dropout_p = 0.5,
        n_scales = 1, n_residual_blocks = 2, activation = "elu",
        n_latent_scales = 2):
    assert n_residual_blocks % 2 == 0
    gs = list(gs)
    zs_posterior = list(zs_posterior)
    with model_arg_scope(
            init = init, dropout_p = dropout_p, activation = activation):
        # outputs
        hs = [] # hidden units
        ps = [] # priors
        zs = [] # prior samples
        # prepare input
        n_filters = gs[-1].shape.as_list()[-1]
        h = nn.nin(gs[-1], n_filters)
        for l in range(n_scales):
            # level module
            ## hidden units
            for i in range(n_residual_blocks // 2):
                h = nn.residual_block(h, gs.pop())
                hs.append(h)
            if l < n_latent_scales:
                ## prior
                spatial_shape = h.shape.as_list()[1]
                n_h_channels = h.shape.as_list()[-1]
                if spatial_shape == 1:
                    ### no spatial correlations
                    p = latent_parameters(h)
                    ps.append(p)
                    z_prior = latent_sample(p)
                    zs.append(z_prior)
                else:
                    ### four autoregressively modeled groups
                    if training:
                        z_posterior_groups = nn.split_groups(zs_posterior[0])
                    p_groups = []
                    z_groups = []
                    p_features = tf.space_to_depth(nn.residual_block(h), 2)
                    for i in range(4):
                        p_group = latent_parameters(p_features, num_filters = n_h_channels)
                        p_groups.append(p_group)
                        z_group = latent_sample(p_group)
                        z_groups.append(z_group)
                        # ar feedback sampled from
                        if training:
                            feedback = z_posterior_groups.pop(0)
                        else:
                            feedback = z_group
                        # prepare input for next group
                        if i + 1 < 4:
                            p_features = nn.residual_block(p_features, feedback)
                    if training:
                        assert not z_posterior_groups
                    # complete prior parameters
                    p = nn.merge_groups(p_groups)
                    ps.append(p)
                    # complete prior sample
                    z_prior = nn.merge_groups(z_groups)
                    zs.append(z_prior)
                ## vae feedback sampled from
                if training:
                    ## posterior
                    z = zs_posterior.pop(0)
                else:
                    ## prior
                    z = z_prior
                for i in range(n_residual_blocks // 2):
                    n_h_channels = h.shape.as_list()[-1]
                    h = tf.concat([h, z], axis = -1)
                    h = nn.nin(h, n_h_channels)
                    h = nn.residual_block(h, gs.pop())
                    hs.append(h)
            else:
                for i in range(n_residual_blocks // 2):
                    h = nn.residual_block(h, gs.pop())
                    hs.append(h)
            # prepare input to next level
            if l + 1 < n_scales:
                n_filters = gs[-1].shape.as_list()[-1]
                h = nn.upsample(h, n_filters)

        assert not gs
        if training:
            assert not zs_posterior

        return hs, ps, zs
Example #23
0
def reorg(x, stride):
    return tf.space_to_depth(x, block_size=stride)
Example #24
0
def conv_layers(tensor,
                filters,
                kernels,
                strides=None,
                pool_sizes=None,
                pool_strides=None,
                padding="same",
                activation=tf.nn.relu,
                linear_top_layer=False,
                drop_rates=None,
                drop_type="regular",
                conv_method="conv",
                pool_method="conv",
                pool_activation=None,
                dilations=None,
                batch_norm=False,
                training=False,
                weight_decay=0.0,
                weight_regularizer="l2",
                **kwargs):
    """Builds a stack of convolutional layers with dropout and max pooling."""
    if not filters:
        return tensor

    kernels = _to_array(kernels, len(filters), 1)
    pool_sizes = _to_array(pool_sizes, len(filters), 1)
    pool_strides = _to_array(pool_strides, len(filters), 1)
    strides = _to_array(strides, len(filters), 1)
    drop_rates = _to_array(drop_rates, len(filters), 0.)
    dilations = _to_array(dilations, len(filters), 1)
    conv_method = _to_array(conv_method, len(filters), "conv")
    pool_method = _to_array(pool_method, len(filters), "conv")

    kernel_initializer = tf.glorot_uniform_initializer()
    kernel_regularizer = regularizer_ops.weight_regularizer(
        weight_decay, weight_regularizer)

    conv = {
        "conv":
        functools.partial(tf.keras.layers.Conv2D,
                          kernel_initializer=kernel_initializer,
                          kernel_regularizer=kernel_regularizer),
        "transposed":
        functools.partial(tf.keras.layers.Conv2DTranspose,
                          kernel_initializer=kernel_initializer,
                          kernel_regularizer=kernel_regularizer),
        "separable":
        functools.partial(tf.keras.layers.SeparableConv2D,
                          depthwise_initializer=kernel_initializer,
                          pointwise_initializer=kernel_initializer,
                          depthwise_regularizer=kernel_regularizer,
                          pointwise_regularizer=kernel_regularizer),
    }

    for i, (fs, ks, ss, pz, pr, drp, dl, cm, pm) in enumerate(
            zip(filters, kernels, strides, pool_sizes, pool_strides,
                drop_rates, dilations, conv_method, pool_method)):

        with tf.variable_scope("conv_block_%d" % i):
            if i == len(filters) - 1 and linear_top_layer:
                activation = None
                pool_activation = None
            tensor = noise_ops.dropout(tensor,
                                       drp,
                                       training=training,
                                       type=drop_type)
            if dl > 1:
                conv_kwargs = _merge_dicts(kwargs, {"dilation_rate": dl})
            else:
                conv_kwargs = kwargs

            tensor = conv[cm](filters=fs,
                              kernel_size=ks,
                              strides=ss,
                              padding=padding,
                              use_bias=False,
                              name="conv2d",
                              **conv_kwargs).apply(tensor)
            if activation:
                if batch_norm:
                    tensor = tf.layers.batch_normalization(tensor,
                                                           training=training)
                tensor = activation(tensor)
            if pz > 1:
                if pm == "max":
                    tensor = tf.keras.layers.MaxPool2D(
                        pz, pr, padding, name="max_pool").apply(tensor)
                elif pm == "std":
                    tensor = tf.space_to_depth(tensor,
                                               pz,
                                               name="space_to_depth")
                elif pm == "dts":
                    tensor = tf.depth_to_space(tensor,
                                               pz,
                                               name="depth_to_space")
                else:
                    tensor = conv["conv"](fs,
                                          pz,
                                          pr,
                                          padding,
                                          use_bias=False,
                                          name="strided_conv2d",
                                          **kwargs).apply(tensor)
                    if pool_activation:
                        if batch_norm:
                            tensor = tf.layers.batch_normalization(
                                tensor, training=training)
                        tensor = pool_activation(tensor)
    return tensor
Example #25
0
def conv_layers(tensor,
                filters,
                kernels,
                strides=None,
                pool_sizes=None,
                pool_strides=None,
                padding="same",
                activation=tf.nn.relu,
                use_bias=False,
                linear_top_layer=False,
                drop_rates=None,
                conv_method="conv",
                pool_method="conv",
                pool_activation=None,
                batch_norm=False,
                training=False,
                weight_decay=0.0002,
                **kwargs):
  """Builds a stack of convolutional layers with dropout and max pooling."""
  if pool_sizes is None:
    pool_sizes = [1] * len(filters)
  if pool_strides is None:
    pool_strides = pool_sizes
  if strides is None:
    strides = [1] * len(filters)
  if drop_rates is None:
    drop_rates = [0.] * len(filters)
  elif isinstance(drop_rates, numbers.Number):
    drop_rates = [drop_rates] * len(filters)

  if conv_method == "conv":
    conv = functools.partial(
      tf.layers.conv2d,
      kernel_initializer=tf.glorot_uniform_initializer(),
      kernel_regularizer=tf.contrib.layers.l2_regularizer(weight_decay))
  elif conv_method == "transposed":
    conv = functools.partial(
      tf.layers.conv2d_transpose,
      kernel_initializer=tf.glorot_uniform_initializer(),
      kernel_regularizer=tf.contrib.layers.l2_regularizer(weight_decay))
  elif conv_method == "separable":
    conv = functools.partial(
      tf.layers.separable_conv2d,
      depthwise_initializer=tf.glorot_uniform_initializer(),
      pointwise_initializer=tf.glorot_uniform_initializer(),
      depthwise_regularizer=tf.contrib.layers.l2_regularizer(weight_decay),
      pointwise_regularizer=tf.contrib.layers.l2_regularizer(weight_decay))

  for i, (fs, ks, ss, pz, pr, drp) in enumerate(
    zip(filters, kernels, strides, pool_sizes, pool_strides, drop_rates)):
    with tf.variable_scope("conv_block_%d" % i):
      if i == len(filters) - 1 and linear_top_layer:
        activation = None
        pool_activation = None
      tensor = tf.layers.dropout(tensor, drp)
      tensor = conv(
        tensor, fs, ks, ss, padding, use_bias=use_bias, name="conv2d",
        **kwargs)
      if activation:
        if batch_norm:
          tensor = batch_normalization(tensor, training=training)
        tensor = activation(tensor)
      if pz > 1:
        if pool_method == "max":
          tensor = tf.layers.max_pooling2d(
            tensor, pz, pr, padding, name="max_pool")
        elif pool_method == "std":
          tensor = tf.space_to_depth(tensor, pz, name="space_to_depth")
        elif pool_method == "dts":
          tensor = tf.depth_to_space(tensor, pz, name="depth_to_space")
        else:
          tensor = conv(
            tensor, fs, pz, pr, padding, use_bias=use_bias,
            name="strided_conv2d", **kwargs)
          if pool_activation:
            if batch_norm:
              tensor = batch_normalization(tensor, training=training)
            tensor = pool_activation(tensor)
  return tensor
Example #26
0
 def build_graph(parameters):
   input_tensor = tf.placeholder(dtype=parameters["dtype"], name="input",
                                 shape=parameters["input_shape"])
   out = tf.space_to_depth(input_tensor, block_size=parameters["block_size"])
   return [input_tensor], [out]
Example #27
0
 def forward(self, x, **kwargs):
     return tf.space_to_depth(x, self.block_size), None
Example #28
0
def discriminator_simplified_api(inputs, is_train=True, reuse=False):
    df_dim = 64  # Dimension of discrim filters in first conv layer. [64]
    c_dim = FLAGS.c_dim  # n_color 3
    batch_size = FLAGS.batch_size  # 64
    w_init = tf.random_normal_initializer(stddev=0.02)
    gamma_init = tf.random_normal_initializer(1., 0.02)
    with tf.variable_scope("discriminator", reuse=reuse):
        tl.layers.set_name_reuse(reuse)

        net_in = InputLayer(inputs, name='d/in')

        net_h0 = \
            Conv2d(
                net_in,
                df_dim,
                (5, 5),
                act=lambda x: tl.act.lrelu(x, 0.2),
                padding='VALID',
                W_init=w_init,
                name='d/h0/conv2d',
            )

        net_h1 = \
            Conv2d(
                net_h0,
                df_dim*2,
                (5, 5),
                act=None,
                padding='VALID',
                W_init=w_init,
                b_init=None,
                name='d/h1/conv2d'
            )

        net_h1 = \
            BatchNormLayer(
                net_h1,
                act=lambda x: tl.act.lrelu(x, 0.2),
                is_train=is_train,
                gamma_init=gamma_init,
                name='d/h1/batch_norm'
            )

        net_h1.outputs = tf.space_to_depth(net_h1.outputs, 2)

        net_h2 = \
            Conv2d(
                net_h1,
                df_dim*4,
                (5, 5),
                act=None,
                padding='VALID',
                W_init=w_init,
                b_init=None,
                name='d/h2/conv2d',
            )

        net_h2 = \
            BatchNormLayer(
                net_h2,
                act=lambda x: tl.act.lrelu(x, 0.2),
                is_train=is_train,
                gamma_init=gamma_init,
                name='d/h2/batch_norm',
            )

        net_h2.outputs = tf.space_to_depth(net_h2.outputs, 2)

        net_h3 = \
            Conv2d(
                net_h2,
                df_dim*4,
                (5, 5),
                act=None,
                padding='VALID',
                W_init=w_init,
                b_init=None,
                name='d/h3/conv2d',
            )

        net_h3 = \
            BatchNormLayer(
                net_h3,
                act=lambda x: tl.act.lrelu(x, 0.2),
                is_train=is_train,
                gamma_init=gamma_init,
                name='d/h3/batch_norm',
            )

        net_h3.outputs = tf.space_to_depth(net_h3.outputs, 2)

        net_h3 = \
            Conv2d(
                net_h2,
                df_dim*4,
                (5, 5),
                act=None,
                padding='VALID',
                W_init=w_init,
                b_init=None,
                name='d/h4/conv2d',
            )

        net_h3 = \
            BatchNormLayer(
                net_h3,
                act=lambda x: tl.act.lrelu(x, 0.2),
                is_train=is_train,
                gamma_init=gamma_init,
                name='d/h4/batch_norm',
            )

        net_h4 = \
            FlattenLayer(
                net_h3,
                name='d/h5/flatten',
            )

        net_h4 = \
            DenseLayer(
                net_h4,
                n_units=1,
                act=tf.identity,
                W_init=w_init,
                name='d/h5/lin_sigmoid',
            )

        logits = net_h4.outputs
        net_h4.outputs = tf.nn.sigmoid(net_h4.outputs)
    return net_h4, logits
 def _testOne(self, inputs, block_size, outputs):
   for use_gpu in [False, True]:
     with self.test_session(use_gpu=use_gpu):
       x_tf = tf.space_to_depth(tf.to_float(inputs), block_size)
       self.assertAllEqual(x_tf.eval(), outputs)
 def testUnknownShape(self):
   t = tf.space_to_depth(tf.placeholder(tf.float32), block_size=4)
   self.assertEqual(4, t.get_shape().ndims)
Example #31
0
def space_to_depth_x2(x):
    """Thin wrapper for Tensorflow space_to_depth with block_size=2."""
    # Import currently required to make Lambda work.
    # See: https://github.com/fchollet/keras/issues/5088#issuecomment-273851273
    import tensorflow as tf
    return tf.space_to_depth(x, block_size=2)
Example #32
0
def position_sensitive_crop_regions(image,
                                    boxes,
                                    box_ind,
                                    crop_size,
                                    num_spatial_bins,
                                    global_pool,
                                    extrapolation_value=None):
    """Position-sensitive crop and pool rectangular regions from a feature grid.

    The output crops are split into `spatial_bins_y` vertical bins
    and `spatial_bins_x` horizontal bins. For each intersection of a vertical
    and a horizontal bin the output values are gathered by performing
    `tf.image.crop_and_resize` (bilinear resampling) on a a separate subset of
    channels of the image. This reduces `depth` by a factor of
    `(spatial_bins_y * spatial_bins_x)`.

    When global_pool is True, this function implements a differentiable version
    of position-sensitive RoI pooling used in
    [R-FCN detection system](https://arxiv.org/abs/1605.06409).

    When global_pool is False, this function implements a differentiable version
    of position-sensitive assembling operation used in
    [instance FCN](https://arxiv.org/abs/1603.08678).

    Args:
      image: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
        `int16`, `int32`, `int64`, `half`, `float32`, `float64`.
        A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
        Both `image_height` and `image_width` need to be positive.
      boxes: A `Tensor` of type `float32`.
        A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
        specifies the coordinates of a box in the `box_ind[i]` image and is
        specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized
        coordinate value of `y` is mapped to the image coordinate at
        `y * (image_height - 1)`, so as the `[0, 1]` interval of normalized image
        height is mapped to `[0, image_height - 1] in image height coordinates.
        We do allow y1 > y2, in which case the sampled crop is an up-down flipped
        version of the original image. The width dimension is treated similarly.
        Normalized coordinates outside the `[0, 1]` range are allowed, in which
        case we use `extrapolation_value` to extrapolate the input image values.
      box_ind:  A `Tensor` of type `int32`.
        A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
        The value of `box_ind[i]` specifies the image that the `i`-th box refers
        to.
      crop_size: A list of two integers `[crop_height, crop_width]`. All
        cropped image patches are resized to this size. The aspect ratio of the
        image content is not preserved. Both `crop_height` and `crop_width` need
        to be positive.
      num_spatial_bins: A list of two integers `[spatial_bins_y, spatial_bins_x]`.
        Represents the number of position-sensitive bins in y and x directions.
        Both values should be >= 1. `crop_height` should be divisible by
        `spatial_bins_y`, and similarly for width.
        The number of image channels should be divisible by
        (spatial_bins_y * spatial_bins_x).
        Suggested value from R-FCN paper: [3, 3].
      global_pool: A boolean variable.
        If True, we perform average global pooling on the features assembled from
          the position-sensitive score maps.
        If False, we keep the position-pooled features without global pooling
          over the spatial coordinates.
        Note that using global_pool=True is equivalent to but more efficient than
          running the function with global_pool=False and then performing global
          average pooling.
      extrapolation_value: An optional `float`. Defaults to `0`.
        Value used for extrapolation, when applicable.
    Returns:
      position_sensitive_features: A 4-D tensor of shape
        `[num_boxes, K, K, crop_channels]`,
        where `crop_channels = depth / (spatial_bins_y * spatial_bins_x)`,
        where K = 1 when global_pool is True (Average-pooled cropped regions),
        and K = crop_size when global_pool is False.
    Raises:
      ValueError: Raised in four situations:
        `num_spatial_bins` is not >= 1;
        `num_spatial_bins` does not divide `crop_size`;
        `(spatial_bins_y*spatial_bins_x)` does not divide `depth`;
        `bin_crop_size` is not square when global_pool=False due to the
          constraint in function space_to_depth.
    """
    total_bins = 1
    bin_crop_size = []

    for (num_bins, crop_dim) in zip(num_spatial_bins, crop_size):
        if num_bins < 1:
            raise ValueError('num_spatial_bins should be >= 1')

        if crop_dim % num_bins != 0:
            raise ValueError('crop_size should be divisible by num_spatial_bins')

        total_bins *= num_bins
        bin_crop_size.append(crop_dim // num_bins)

    if not global_pool and bin_crop_size[0] != bin_crop_size[1]:
        raise ValueError('Only support square bin crop size for now.')

    ymin, xmin, ymax, xmax = tf.unstack(boxes, axis=1)
    spatial_bins_y, spatial_bins_x = num_spatial_bins

    # Split each box into spatial_bins_y * spatial_bins_x bins.
    position_sensitive_boxes = []
    for bin_y in range(spatial_bins_y):
        step_y = (ymax - ymin) / spatial_bins_y
        for bin_x in range(spatial_bins_x):
            step_x = (xmax - xmin) / spatial_bins_x
            box_coordinates = [ymin + bin_y * step_y,
                               xmin + bin_x * step_x,
                               ymin + (bin_y + 1) * step_y,
                               xmin + (bin_x + 1) * step_x,
                               ]
            position_sensitive_boxes.append(tf.stack(box_coordinates, axis=1))

    image_splits = tf.split(value=image, num_or_size_splits=total_bins, axis=3)

    image_crops = []
    for (split, box) in zip(image_splits, position_sensitive_boxes):
        crop = tf.image.crop_and_resize(split, box, box_ind, bin_crop_size,
                                        extrapolation_value=extrapolation_value)
        image_crops.append(crop)

    if global_pool:
        # Average over all bins.
        position_sensitive_features = tf.add_n(image_crops) / len(image_crops)
        # Then average over spatial positions within the bins.
        position_sensitive_features = tf.reduce_mean(
            position_sensitive_features, [1, 2], keep_dims=True)
    else:
        # Reorder height/width to depth channel.
        block_size = bin_crop_size[0]
        if block_size >= 2:
            image_crops = [tf.space_to_depth(
                crop, block_size=block_size) for crop in image_crops]

        # Pack image_crops so that first dimension is for position-senstive boxes.
        position_sensitive_features = tf.stack(image_crops, axis=0)

        # Unroll the position-sensitive boxes to spatial positions.
        position_sensitive_features = tf.squeeze(
            tf.batch_to_space_nd(position_sensitive_features,
                                 block_shape=[1] + num_spatial_bins,
                                 crops=tf.zeros((3, 2), dtype=tf.int32)),
            squeeze_dims=[0])

        # Reorder back the depth channel.
        if block_size >= 2:
            position_sensitive_features = tf.depth_to_space(
                position_sensitive_features, block_size=block_size)

    return position_sensitive_features
Example #33
0
def space_to_depth_x4(x):
    """Thin wrapper for Tensorflow space_to_depth with block_size=4."""
    # Import currently required to make Lambda work.
    import tensorflow as tf
    return tf.space_to_depth(x, block_size=4)
def position_sensitive_crop_regions(image,
                                    boxes,
                                    box_ind,
                                    crop_size,
                                    num_spatial_bins,
                                    global_pool,
                                    extrapolation_value=None):
    """Position-sensitive crop and pool rectangular regions from a feature grid.

  The output crops are split into `spatial_bins_y` vertical bins
  and `spatial_bins_x` horizontal bins. For each intersection of a vertical
  and a horizontal bin the output values are gathered by performing
  `tf.image.crop_and_resize` (bilinear resampling) on a a separate subset of
  channels of the image. This reduces `depth` by a factor of
  `(spatial_bins_y * spatial_bins_x)`.

  When global_pool is True, this function implements a differentiable version
  of position-sensitive RoI pooling used in
  [R-FCN detection system](https://arxiv.org/abs/1605.06409).

  When global_pool is False, this function implements a differentiable version
  of position-sensitive assembling operation used in
  [instance FCN](https://arxiv.org/abs/1603.08678).

  Args:
    image: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
      `int16`, `int32`, `int64`, `half`, `float32`, `float64`.
      A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
      Both `image_height` and `image_width` need to be positive.
    boxes: A `Tensor` of type `float32`.
      A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
      specifies the coordinates of a box in the `box_ind[i]` image and is
      specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized
      coordinate value of `y` is mapped to the image coordinate at
      `y * (image_height - 1)`, so as the `[0, 1]` interval of normalized image
      height is mapped to `[0, image_height - 1] in image height coordinates.
      We do allow y1 > y2, in which case the sampled crop is an up-down flipped
      version of the original image. The width dimension is treated similarly.
      Normalized coordinates outside the `[0, 1]` range are allowed, in which
      case we use `extrapolation_value` to extrapolate the input image values.
    box_ind:  A `Tensor` of type `int32`.
      A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
      The value of `box_ind[i]` specifies the image that the `i`-th box refers
      to.
    crop_size: A list of two integers `[crop_height, crop_width]`. All
      cropped image patches are resized to this size. The aspect ratio of the
      image content is not preserved. Both `crop_height` and `crop_width` need
      to be positive.
    num_spatial_bins: A list of two integers `[spatial_bins_y, spatial_bins_x]`.
      Represents the number of position-sensitive bins in y and x directions.
      Both values should be >= 1. `crop_height` should be divisible by
      `spatial_bins_y`, and similarly for width.
      The number of image channels should be divisible by
      (spatial_bins_y * spatial_bins_x).
      Suggested value from R-FCN paper: [3, 3].
    global_pool: A boolean variable.
      If True, we perform average global pooling on the features assembled from
        the position-sensitive score maps.
      If False, we keep the position-pooled features without global pooling
        over the spatial coordinates.
      Note that using global_pool=True is equivalent to but more efficient than
        running the function with global_pool=False and then performing global
        average pooling.
    extrapolation_value: An optional `float`. Defaults to `0`.
      Value used for extrapolation, when applicable.
  Returns:
    position_sensitive_features: A 4-D tensor of shape
      `[num_boxes, K, K, crop_channels]`,
      where `crop_channels = depth / (spatial_bins_y * spatial_bins_x)`,
      where K = 1 when global_pool is True (Average-pooled cropped regions),
      and K = crop_size when global_pool is False.
  Raises:
    ValueError: Raised in four situations:
      `num_spatial_bins` is not >= 1;
      `num_spatial_bins` does not divide `crop_size`;
      `(spatial_bins_y*spatial_bins_x)` does not divide `depth`;
      `bin_crop_size` is not square when global_pool=False due to the
        constraint in function space_to_depth.
  """
    total_bins = 1
    bin_crop_size = []

    for (num_bins, crop_dim) in zip(num_spatial_bins, crop_size):
        if num_bins < 1:
            raise ValueError('num_spatial_bins should be >= 1')

        if crop_dim % num_bins != 0:
            raise ValueError(
                'crop_size should be divisible by num_spatial_bins')

        total_bins *= num_bins
        bin_crop_size.append(crop_dim // num_bins)

    if not global_pool and bin_crop_size[0] != bin_crop_size[1]:
        raise ValueError('Only support square bin crop size for now.')

    ymin, xmin, ymax, xmax = tf.unstack(boxes, axis=1)
    spatial_bins_y, spatial_bins_x = num_spatial_bins

    # Split each box into spatial_bins_y * spatial_bins_x bins.
    position_sensitive_boxes = []
    for bin_y in range(spatial_bins_y):
        step_y = (ymax - ymin) / spatial_bins_y
        for bin_x in range(spatial_bins_x):
            step_x = (xmax - xmin) / spatial_bins_x
            box_coordinates = [
                ymin + bin_y * step_y,
                xmin + bin_x * step_x,
                ymin + (bin_y + 1) * step_y,
                xmin + (bin_x + 1) * step_x,
            ]
            position_sensitive_boxes.append(tf.stack(box_coordinates, axis=1))

    image_splits = tf.split(value=image, num_or_size_splits=total_bins, axis=3)

    image_crops = []
    for (split, box) in zip(image_splits, position_sensitive_boxes):
        crop = tf.image.crop_and_resize(
            split,
            box,
            box_ind,
            bin_crop_size,
            extrapolation_value=extrapolation_value)
        image_crops.append(crop)

    if global_pool:
        # Average over all bins.
        position_sensitive_features = tf.add_n(image_crops) / len(image_crops)
        # Then average over spatial positions within the bins.
        position_sensitive_features = tf.reduce_mean(
            position_sensitive_features, [1, 2], keepdims=True)
    else:
        # Reorder height/width to depth channel.
        block_size = bin_crop_size[0]
        if block_size >= 2:
            image_crops = [
                tf.space_to_depth(crop, block_size=block_size)
                for crop in image_crops
            ]

        # Pack image_crops so that first dimension is for position-senstive boxes.
        position_sensitive_features = tf.stack(image_crops, axis=0)

        # Unroll the position-sensitive boxes to spatial positions.
        position_sensitive_features = tf.squeeze(tf.batch_to_space_nd(
            position_sensitive_features,
            block_shape=[1] + num_spatial_bins,
            crops=tf.zeros((3, 2), dtype=tf.int32)),
                                                 squeeze_dims=[0])

        # Reorder back the depth channel.
        if block_size >= 2:
            position_sensitive_features = tf.depth_to_space(
                position_sensitive_features, block_size=block_size)

    return position_sensitive_features
def decompress_seqcnn(x,
                      targets,
                      targets_vocab_size,
                      dilations_and_kernels,
                      block_size,
                      is_2d=False,
                      embedding_var=None,
                      name=None,
                      reuse=None):
  """Decompress x into targets size using a Sequence CNN at every element."""
  with tf.variable_scope(
      name,
      default_name="decompress_batch_seqcnn",
      values=[x, targets],
      reuse=reuse):
    # We assume targets are [batch x block_size * N x block_size * N x C] if
    # is_2d=True or [batch, block_size * N, 1, C] otherwise, and C is static.
    # Let's shift targets to depth and embed.
    targets_shape, targets_shape_static = tf.shape(targets), targets.get_shape()
    channels = int(targets_shape_static[-1])
    hidden_size = int(x.get_shape()[-1])
    if is_2d:
      depth_targets = tf.space_to_depth(targets, block_size)
      factor = channels * block_size * block_size
    else:
      depth_targets = tf.reshape(targets, [
          targets_shape[0], targets_shape[1] // block_size, 1,
          channels * block_size
      ])
      factor = channels * block_size
    if embedding_var is None:
      embedding_var = tf.get_variable("targets_embedding",
                                      [targets_vocab_size, hidden_size])
    targets_emb = tf.gather(embedding_var, depth_targets)
    # Flatten x and embedded targets. Flat targets are factor* larger on axis=1.
    flat_x = tf.reshape(x, [-1, 1, 1, hidden_size])
    flat_targets = tf.reshape(targets_emb, [-1, factor, 1, hidden_size])
    shifted_targets = shift_left(flat_targets)
    # Run a SeqCNN large-batch to produce factor outputs out of every target.
    flat_x += tf.zeros_like(shifted_targets)  # Broadcast on axis=1.
    flat_outputs = conv_block(
        tf.concat([flat_x, shifted_targets], axis=3),
        hidden_size,
        dilations_and_kernels,
        padding="LEFT")
    # Reshape back to embedded targets shape.
    outputs = tf.reshape(flat_outputs, [
        tf.shape(targets_emb)[0],
        tf.shape(targets_emb)[1],
        tf.shape(targets_emb)[2], factor * hidden_size
    ])
    # Move depth back to target space.
    if is_2d:
      outputs = tf.depth_to_space(outputs, 2)
    else:
      outputs = tf.reshape(outputs, [
          tf.shape(outputs)[0], block_size * tf.shape(outputs)[1], 1,
          hidden_size
      ])
    # Final reshape before prediction to ensure target size.
    outputs = tf.reshape(outputs, [
        targets_shape[0], targets_shape[1], targets_shape[2], channels,
        hidden_size
    ])
    return tf.layers.dense(outputs, targets_vocab_size)
Example #36
0
    def forward(self, x):

        dk = 3
        activate = tf.nn.leaky_relu
        mf = self.main_channel_nums
        num_block = self.num_blocks
        n, f1, w, h, c = x.shape
        ki = tf.contrib.layers.xavier_initializer()
        ds = 1
        with tf.variable_scope('nlvsr', reuse=tf.AUTO_REUSE) as scope:
            conv0 = Conv2D(mf,
                           5,
                           strides=ds,
                           padding='same',
                           activation=activate,
                           kernel_initializer=ki,
                           name='conv0')
            conv1 = [
                Conv2D(mf,
                       dk,
                       strides=ds,
                       padding='same',
                       activation=activate,
                       kernel_initializer=ki,
                       name='conv1_{}'.format(i)) for i in range(num_block)
            ]
            conv10 = [
                Conv2D(mf,
                       1,
                       strides=ds,
                       padding='same',
                       activation=activate,
                       kernel_initializer=ki,
                       name='conv10_{}'.format(i)) for i in range(num_block)
            ]
            conv2 = [
                Conv2D(mf,
                       dk,
                       strides=ds,
                       padding='same',
                       activation=activate,
                       kernel_initializer=ki,
                       name='conv2_{}'.format(i)) for i in range(num_block)
            ]
            convmerge1 = Conv2D(48,
                                3,
                                strides=ds,
                                padding='same',
                                activation=activate,
                                kernel_initializer=ki,
                                name='convmerge1')
            convmerge2 = Conv2D(12,
                                3,
                                strides=ds,
                                padding='same',
                                activation=None,
                                kernel_initializer=ki,
                                name='convmerge2')

            inp0 = [x[:, i, :, :, :]
                    for i in range(f1)]  # list[7]:Tensor[8, 64, 64, 3]
            inp0 = tf.concat(inp0, axis=-1)  # Tensor:[8,64,64,21]
            inp1 = tf.space_to_depth(inp0, 2)  # Tensor:[8,32,32,84]
            # Tensor:[8,32,32,84]
            with tf.device('/cpu:0'):
                inp1 = NonLocalBlock(inp1,
                                     int(c) * self.num_frames * 4,
                                     sub_sample=self.nonLocal_sub_sample_rate,
                                     nltype=1,
                                     scope='nlblock_{}'.format(0))
            inp1 = tf.depth_to_space(inp1, 2)  # Tensor:[8,64,64,21]
            inp0 += inp1  # Tensor:[8,64,64,21]
            inp0 = tf.split(inp0, num_or_size_splits=self.num_frames,
                            axis=-1)  # list[7]:Tensor[8, 64, 64, 3]
            inp0 = [conv0(f) for f in inp0]  # list[7]:Tensor[8, 64, 64, 64]
            bic = tf.image.resize_images(x[:, self.num_frames // 2, :, :, :],
                                         [w * self.scale, h * self.scale],
                                         method=2)  # Tensor:[8,256,256,3]

            for i in range(num_block):
                inp1 = [conv1[i](f) for f in inp0]
                base = tf.concat(inp1, axis=-1)
                base = conv10[i](base)
                inp2 = [tf.concat([base, f], -1) for f in inp1]
                inp2 = [conv2[i](f) for f in inp2]
                inp0 = [tf.add(inp0[j], inp2[j]) for j in range(f1)]

            merge = tf.concat(
                inp0, axis=-1
            )  # inp0: list[7]:Tensor[8, 64, 64, 64], merge: Tensor[8,64,64,448=7*64]
            merge = convmerge1(merge)  # merge: Tensor[8,64,64,48]

            large1 = tf.depth_to_space(merge, 2)  # large: Tenosr[8,128,128,12]
            out1 = convmerge2(large1)  # out1: Tensor[8,128,128,12]
            out = tf.depth_to_space(out1, 2)  # out: Tensor[8,256,256,3]

        return tf.stack([out + bic], axis=1, name='out')  #out:
def space_to_depth_x2(x):
    return tf.space_to_depth(x, block_size=2)
Example #38
0
def descriptor_loss(descriptors,
                    warped_descriptors,
                    homographies,
                    valid_mask=None,
                    **config):
    # Compute the position of the center pixel of every cell in the image
    (batch_size, Hc, Wc) = tf.unstack(tf.to_int32(tf.shape(descriptors)[:3]))
    coord_cells = tf.stack(tf.meshgrid(tf.range(Hc),
                                       tf.range(Wc),
                                       indexing='ij'),
                           axis=-1)
    coord_cells = coord_cells * config['grid_size'] + config[
        'grid_size'] // 2  # (Hc, Wc, 2)
    # coord_cells is now a grid containing the coordinates of the Hc x Wc
    # center pixels of the 8x8 cells of the image

    # Compute the position of the warped center pixels
    warped_coord_cells = warp_points(tf.reshape(coord_cells, [-1, 2]),
                                     homographies)
    # warped_coord_cells is now a list of the warped coordinates of all the center
    # pixels of the 8x8 cells of the image, shape (N, Hc x Wc, 2)

    # Compute the pairwise distances and filter the ones less than a threshold
    # The distance is just the pairwise norm of the difference of the two grids
    # Using shape broadcasting, cell_distances has shape (N, Hc, Wc, Hc, Wc)
    coord_cells = tf.to_float(tf.reshape(coord_cells, [1, Hc, Wc, 1, 1, 2]))
    warped_coord_cells = tf.reshape(warped_coord_cells,
                                    [batch_size, 1, 1, Hc, Wc, 2])
    cell_distances = tf.norm(coord_cells - warped_coord_cells, axis=-1)
    s = tf.to_float(tf.less_equal(cell_distances, config['grid_size']))
    # s[id_batch, h, w, h', w'] == 1 if the point of coordinates (h, w) warped by the
    # homography is at a distance from (h', w') less than config['grid_size']
    # and 0 otherwise

    # Compute the pairwise dot product between descriptors: d^t * d'
    descriptors = tf.reshape(descriptors, [batch_size, Hc, Wc, 1, 1, -1])
    warped_descriptors = tf.reshape(warped_descriptors,
                                    [batch_size, 1, 1, Hc, Wc, -1])
    dot_product_desc = tf.reduce_sum(descriptors * warped_descriptors, -1)
    # dot_product_desc[id_batch, h, w, h', w'] is the dot product between the
    # descriptor at position (h, w) in the original descriptors map and the
    # descriptor at position (h', w') in the warped image

    # Compute the loss
    positive_dist = tf.maximum(0.,
                               config['positive_margin'] - dot_product_desc)
    negative_dist = tf.maximum(0.,
                               dot_product_desc - config['negative_margin'])
    loss = config['lambda_d'] * s * positive_dist + (1 - s) * negative_dist

    # Mask the pixels if bordering artifacts appear
    valid_mask = tf.ones([batch_size, Hc, Wc], tf.float32)\
        if valid_mask is None else valid_mask
    valid_mask = tf.to_float(valid_mask[..., tf.newaxis])  # for GPU
    valid_mask = tf.space_to_depth(valid_mask, config['grid_size'])
    valid_mask = tf.reduce_prod(valid_mask,
                                axis=3)  # AND along the channel dim
    valid_mask = tf.reshape(valid_mask, [batch_size, 1, 1, Hc, Wc])

    normalization = tf.reduce_sum(valid_mask) * tf.to_float(Hc * Wc)
    loss = tf.reduce_sum(valid_mask * loss) / normalization
    return loss
Example #39
0
 def space_to_depth_x2(x):
     return tf.space_to_depth(x, block_size=2)