def testGradient(self):
    # Set graph seed for determinism.
    random_seed = 42
    tf.set_random_seed(random_seed)

    with self.test_session():
      for test_case in self._TEST_CASES:
        np.random.seed(random_seed)
        in_shape = test_case['in_shape']
        in_val = tf.constant(np.random.random(in_shape),
                             dtype=tf.float32)

        for padding in ['VALID', 'SAME']:
          out_val = tf.extract_image_patches(in_val,
                                             test_case['ksizes'],
                                             test_case['strides'],
                                             test_case['rates'],
                                             padding)
          out_shape = out_val.get_shape().as_list()

          err = tf.test.compute_gradient_error(
              in_val, in_shape, out_val, out_shape
          )

          print('extract_image_patches gradient err: %.4e' % err)
          self.assertLess(err, 1e-4)
  def _VerifyValues(self, image, ksizes, strides, rates, padding, patches):
    """Tests input-output pairs for the ExtractImagePatches op.

    Args:
      image: Input tensor with shape: [batch, in_rows, in_cols, depth].
      ksizes: Patch size specified as: [ksize_rows, ksize_cols].
      strides: Output strides, specified as [stride_rows, stride_cols].
      rates: Atrous rates, specified as [rate_rows, rate_cols].
      padding: Padding type.
      patches: Expected output.
    """
    ksizes = [1] + ksizes + [1]
    strides = [1] + strides + [1]
    rates = [1] + rates + [1]

    for use_gpu in [False, True]:
      with self.test_session(use_gpu=use_gpu):
        out_tensor = tf.extract_image_patches(
            tf.constant(image),
            ksizes=ksizes,
            strides=strides,
            rates=rates,
            padding=padding,
            name="im2col")
        self.assertAllClose(patches, out_tensor.eval())
Ejemplo n.º 3
0
def read_tensor_from_image_file(file_name):
  input_name = "file_reader"
  output_name = "normalized"
  width = input_size
  height = input_size
  num_channels = 3
  file_reader = tf.read_file(file_name, input_name)
  if file_name.endswith(".png"):
    image_reader = tf.image.decode_png(file_reader, channels = 3,
                                       name='png_reader')
  elif file_name.endswith(".gif"):
    image_reader = tf.squeeze(tf.image.decode_gif(file_reader,
                                                  name='gif_reader'))
  elif file_name.endswith(".bmp"):
    image_reader = tf.image.decode_bmp(file_reader, name='bmp_reader')
  else:
    image_reader = tf.image.decode_jpeg(file_reader, channels = 3,
                                        name='jpeg_reader')
  float_caster = tf.cast(image_reader, tf.float32)
  dims_expander = tf.expand_dims(float_caster, 0);
  # resized = tf.image.resize_bilinear(dims_expander, [input_size, input_size])
  normalized = tf.divide(tf.subtract(dims_expander, [input_mean]), [input_std])
  patches = tf.extract_image_patches(normalized,
       ksizes=[1, patch_height, patch_width, 1],
       strides=[1, patch_height/4, patch_width/4, 1],
       rates=[1,1,1,1],
       padding="VALID")
  patches_shape = tf.shape(patches)
  patches = tf.reshape(patches, [-1, patch_height, patch_width, num_channels])
  patches = tf.image.resize_images(patches, [height, width])
  patches = tf.reshape(patches, [-1, height, width, num_channels])
  sess = tf.Session()
  return sess.run([patches, patches_shape])
Ejemplo n.º 4
0
def style_swap(content, style, patch_size, stride):
    '''Efficiently swap content feature patches with nearest-neighbor style patches
       Original paper: https://arxiv.org/abs/1612.04337
       Adapted from: https://github.com/rtqichen/style-swap/blob/master/lib/NonparametricPatchAutoencoderFactory.lua
    '''
    nC = tf.shape(style)[-1]  # Num channels of input content feature and style-swapped output

    ### Extract patches from style image that will be used for conv/deconv layers
    style_patches = tf.extract_image_patches(style, [1,patch_size,patch_size,1], [1,stride,stride,1], [1,1,1,1], 'VALID')
    before_reshape = tf.shape(style_patches)  # NxRowsxColsxPatch_size*Patch_size*nC
    style_patches = tf.reshape(style_patches, [before_reshape[1]*before_reshape[2],patch_size,patch_size,nC])
    style_patches = tf.transpose(style_patches, [1,2,3,0])  # Patch_sizexPatch_sizexIn_CxOut_c

    # Normalize each style patch
    style_patches_norm = tf.nn.l2_normalize(style_patches, dim=3)

    # Compute cross-correlation/nearest neighbors of patches by using style patches as conv filters
    ss_enc = tf.nn.conv2d(content,
                          style_patches_norm,
                          [1,stride,stride,1],
                          'VALID')

    # For each spatial position find index of max along channel/patch dim  
    ss_argmax = tf.argmax(ss_enc, axis=3)
    encC = tf.shape(ss_enc)[-1]  # Num channels in intermediate conv output, same as # of patches
    
    # One-hot encode argmax with same size as ss_enc, with 1's in max channel idx for each spatial pos
    ss_oh = tf.one_hot(ss_argmax, encC, 1., 0., 3)

    # Calc size of transposed conv out
    deconv_out_H = utils.deconv_output_length(tf.shape(ss_oh)[1], patch_size, 'valid', stride)
    deconv_out_W = utils.deconv_output_length(tf.shape(ss_oh)[2], patch_size, 'valid', stride)
    deconv_out_shape = tf.stack([1,deconv_out_H,deconv_out_W,nC])

    # Deconv back to original content size with highest matching (unnormalized) style patch swapped in for each content patch
    ss_dec = tf.nn.conv2d_transpose(ss_oh,
                                    style_patches,
                                    deconv_out_shape,
                                    [1,stride,stride,1],
                                    'VALID')

    ### Interpolate to average overlapping patch locations
    ss_oh_sum = tf.reduce_sum(ss_oh, axis=3, keep_dims=True)

    filter_ones = tf.ones([patch_size,patch_size,1,1], dtype=tf.float32)
    
    deconv_out_shape = tf.stack([1,deconv_out_H,deconv_out_W,1])  # Same spatial size as ss_dec with 1 channel

    counting = tf.nn.conv2d_transpose(ss_oh_sum,
                                         filter_ones,
                                         deconv_out_shape,
                                         [1,stride,stride,1],
                                         'VALID')

    counting = tf.tile(counting, [1,1,1,nC])  # Repeat along channel dim to make same size as ss_dec

    interpolated_dec = tf.divide(ss_dec, counting)

    return interpolated_dec
def extract_patches(sess,data,width,stride):
    '''
    Extract patches from images 
    :data input image 
    :width dimensiton of the patch
    :stride stride of patch selection on the image
    '''
    print('Patch extraction with stride=%d and width=%d begins'%(stride,width) )
    data_pl=tf.placeholder(tf.float64, [data.shape[0],data.shape[1],data.shape[2],data.shape[3]], name='data_placeholder')
    data_o=tf.extract_image_patches(images=data_pl,ksizes=[1,width,width,1],strides=[1,stride,stride,1],rates=[1,1,1,1],padding='VALID')
    print('Patch extraction done')
    size_tot=data_o.get_shape().as_list()
    data_o=tf.reshape(data_o,[size_tot[1]*size_tot[2],width,width,data.shape[3]])
    
    Data_o= sess.run(data_o,feed_dict={data_pl: data})
    print('%d patches of size %d x %d created as list'%(Data_o.shape[0],Data_o.shape[1],Data_o.shape[2]))
    return Data_o
    def testGradient(self):
        with self.test_session():
            for test in self.test_cases:
                in_shape = test['in_shape']
                in_val = tf.constant(np.random.random(in_shape),
                                     dtype=tf.float32)

                for padding in ['VALID', 'SAME']:
                    out_val = tf.extract_image_patches(in_val,
                                                       test['ksizes'],
                                                       test['strides'],
                                                       test['rates'],
                                                       padding)
                    out_shape = out_val.get_shape().as_list()

                    err = tf.test.compute_gradient_error(
                        in_val, in_shape, out_val, out_shape
                    )

                    print('extract_image_patches gradient err: %.4e' % err)
                    self.assertLess(err, 1e-4)
Ejemplo n.º 7
0
    def _build_net(self):
        with tf.variable_scope(self.name,
                               values=[self.in_size, self.ksize, self.lr]):

            # input place holders
            self.X = tf.placeholder(
                tf.float32, shape=[None, self.in_size, self.in_size, 1])
            # img 64x64x1 (gray scale)
            self.Y = tf.placeholder(tf.float32,
                                    shape=[None, self.in_size, self.in_size])

            self.keep = tf.placeholder(tf.float32)

            in_x = tf.image.resize_image_with_crop_or_pad(
                self.X, self.in_size + self.ksize - 1,
                self.in_size + self.ksize - 1)
            in_x = tf.extract_image_patches(
                in_x,
                ksizes=[1, self.in_size, self.in_size, 1],
                strides=[1, 1, 1, 1],
                rates=[1, 1, 1, 1],
                padding="VALID")

            w1 = init_w('w1', [1, self.ksize, self.ksize, in_x.shape[3]])
            L1 = tf.reduce_sum(tf.multiply(in_x, w1), axis=(1, 2))
            L1 = tf.reshape(L1, [-1, self.in_size, self.in_size, 1])

            in_L2 = tf.image.resize_image_with_crop_or_pad(
                L1, self.in_size + self.ksize - 1,
                self.in_size + self.ksize - 1)
            in_L2 = tf.extract_image_patches(
                in_L2,
                ksizes=[1, self.in_size, self.in_size, 1],
                strides=[1, 1, 1, 1],
                rates=[1, 1, 1, 1],
                padding="VALID")
            w2 = init_w('w2', [1, self.ksize, self.ksize, in_L2.shape[3]])
            L2 = tf.reduce_sum(tf.multiply(in_L2, w2), axis=(1, 2))
            L2 = tf.reshape(L2, [-1, self.in_size, self.in_size, 1])

            in_L3 = tf.image.resize_image_with_crop_or_pad(
                L2, self.in_size + self.ksize - 1,
                self.in_size + self.ksize - 1)
            in_L3 = tf.extract_image_patches(
                in_L3,
                ksizes=[1, self.in_size, self.in_size, 1],
                strides=[1, 1, 1, 1],
                rates=[1, 1, 1, 1],
                padding="VALID")
            w3 = init_w('w3', [1, self.ksize, self.ksize, in_L2.shape[3]])
            L3 = tf.reduce_sum(tf.multiply(in_L3, w3), axis=(1, 2))
            L3 = tf.reshape(L2, [-1, self.in_size, self.in_size, 1])

            in_L4 = tf.image.resize_image_with_crop_or_pad(
                L3, self.in_size + self.ksize - 1,
                self.in_size + self.ksize - 1)
            in_L4 = tf.extract_image_patches(
                in_L4,
                ksizes=[1, self.in_size, self.in_size, 1],
                strides=[1, 1, 1, 1],
                rates=[1, 1, 1, 1],
                padding="VALID")
            w4 = init_w('w4', [1, self.ksize, self.ksize, in_L4.shape[3]])
            L4 = tf.reduce_sum(tf.multiply(in_L4, w4), axis=(1, 2))

            self.logits = tf.reshape(L4, [-1, self.in_size, self.in_size])

        # define cost/loss & optimizer
        #beta = 0.01
        #self.regularizers = tf.nn.l2_loss(w1)# + tf.nn.l2_loss(w2)
        self.cost = tf.reduce_mean(
            tf.square(self.logits - self.Y))  # + beta*self.regularizers)
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate=self.lr).minimize(self.cost)
Ejemplo n.º 8
0
    def compute_stats(self, loss_sampled, var_list=None):
        varlist = var_list
        if varlist is None:
            varlist = tf.trainable_variables()

        gs = tf.gradients(loss_sampled, varlist, name='gradientsSampled')
        self.gs = gs
        factors = self.getFactors(gs, varlist)
        stats = self.getStats(factors, varlist)

        updateOps = []
        statsUpdates = {}
        statsUpdates_cache = {}
        for var in varlist:
            opType = factors[var]['opName']
            fops = factors[var]['op']
            fpropFactor = factors[var]['fpropFactors_concat']
            fpropStats_vars = stats[var]['fprop_concat_stats']
            bpropFactor = factors[var]['bpropFactors_concat']
            bpropStats_vars = stats[var]['bprop_concat_stats']
            SVD_factors = {}
            for stats_var in fpropStats_vars:
                stats_var_dim = int(stats_var.get_shape()[0])
                if stats_var not in statsUpdates_cache:
                    old_fpropFactor = fpropFactor
                    B = (tf.shape(fpropFactor)[0])  # batch size
                    if opType == 'Conv2D':
                        strides = fops.get_attr("strides")
                        padding = fops.get_attr("padding")
                        convkernel_size = var.get_shape()[0:3]

                        KH = int(convkernel_size[0])
                        KW = int(convkernel_size[1])
                        C = int(convkernel_size[2])
                        flatten_size = int(KH * KW * C)

                        Oh = int(bpropFactor.get_shape()[1])
                        Ow = int(bpropFactor.get_shape()[2])

                        if Oh == 1 and Ow == 1 and self._channel_fac:
                                # factorization along the channels
                                # assume independence among input channels
                                # factor = B x 1 x 1 x (KH xKW x C)
                                # patches = B x Oh x Ow x (KH xKW x C)
                            if len(SVD_factors) == 0:
                                if KFAC_DEBUG:
                                    print(('approx %s act factor with rank-1 SVD factors' % (var.name)))
                                # find closest rank-1 approx to the feature map
                                S, U, V = tf.batch_svd(tf.reshape(
                                    fpropFactor, [-1, KH * KW, C]))
                                # get rank-1 approx slides
                                sqrtS1 = tf.expand_dims(tf.sqrt(S[:, 0, 0]), 1)
                                patches_k = U[:, :, 0] * sqrtS1  # B x KH*KW
                                full_factor_shape = fpropFactor.get_shape()
                                patches_k.set_shape(
                                    [full_factor_shape[0], KH * KW])
                                patches_c = V[:, :, 0] * sqrtS1  # B x C
                                patches_c.set_shape([full_factor_shape[0], C])
                                SVD_factors[C] = patches_c
                                SVD_factors[KH * KW] = patches_k
                            fpropFactor = SVD_factors[stats_var_dim]

                        else:
                            # poor mem usage implementation
                            patches = tf.extract_image_patches(fpropFactor, ksizes=[1, convkernel_size[
                                                               0], convkernel_size[1], 1], strides=strides, rates=[1, 1, 1, 1], padding=padding)

                            if self._approxT2:
                                if KFAC_DEBUG:
                                    print(('approxT2 act fisher for %s' % (var.name)))
                                # T^2 terms * 1/T^2, size: B x C
                                fpropFactor = tf.reduce_mean(patches, [1, 2])
                            else:
                                # size: (B x Oh x Ow) x C
                                fpropFactor = tf.reshape(
                                    patches, [-1, flatten_size]) / Oh / Ow
                    fpropFactor_size = int(fpropFactor.get_shape()[-1])
                    if stats_var_dim == (fpropFactor_size + 1) and not self._blockdiag_bias:
                        if opType == 'Conv2D' and not self._approxT2:
                            # correct padding for numerical stability (we
                            # divided out OhxOw from activations for T1 approx)
                            fpropFactor = tf.concat([fpropFactor, tf.ones(
                                [tf.shape(fpropFactor)[0], 1]) / Oh / Ow], 1)
                        else:
                            # use homogeneous coordinates
                            fpropFactor = tf.concat(
                                [fpropFactor, tf.ones([tf.shape(fpropFactor)[0], 1])], 1)

                    # average over the number of data points in a batch
                    # divided by B
                    cov = tf.matmul(fpropFactor, fpropFactor,
                                    transpose_a=True) / tf.cast(B, tf.float32)
                    updateOps.append(cov)
                    statsUpdates[stats_var] = cov
                    if opType != 'Conv2D':
                        # HACK: for convolution we recompute fprop stats for
                        # every layer including forking layers
                        statsUpdates_cache[stats_var] = cov

            for stats_var in bpropStats_vars:
                stats_var_dim = int(stats_var.get_shape()[0])
                if stats_var not in statsUpdates_cache:
                    old_bpropFactor = bpropFactor
                    bpropFactor_shape = bpropFactor.get_shape()
                    B = tf.shape(bpropFactor)[0]  # batch size
                    C = int(bpropFactor_shape[-1])  # num channels
                    if opType == 'Conv2D' or len(bpropFactor_shape) == 4:
                        if fpropFactor is not None:
                            if self._approxT2:
                                if KFAC_DEBUG:
                                    print(('approxT2 grad fisher for %s' % (var.name)))
                                bpropFactor = tf.reduce_sum(
                                    bpropFactor, [1, 2])  # T^2 terms * 1/T^2
                            else:
                                bpropFactor = tf.reshape(
                                    bpropFactor, [-1, C]) * Oh * Ow  # T * 1/T terms
                        else:
                            # just doing block diag approx. spatial independent
                            # structure does not apply here. summing over
                            # spatial locations
                            if KFAC_DEBUG:
                                print(('block diag approx fisher for %s' % (var.name)))
                            bpropFactor = tf.reduce_sum(bpropFactor, [1, 2])

                    # assume sampled loss is averaged. TO-DO:figure out better
                    # way to handle this
                    bpropFactor *= tf.to_float(B)
                    ##

                    cov_b = tf.matmul(
                        bpropFactor, bpropFactor, transpose_a=True) / tf.to_float(tf.shape(bpropFactor)[0])

                    updateOps.append(cov_b)
                    statsUpdates[stats_var] = cov_b
                    statsUpdates_cache[stats_var] = cov_b

        if KFAC_DEBUG:
            aKey = list(statsUpdates.keys())[0]
            statsUpdates[aKey] = tf.Print(statsUpdates[aKey],
                                          [tf.convert_to_tensor('step:'),
                                           self.global_step,
                                           tf.convert_to_tensor(
                                               'computing stats'),
                                           ])
        self.statsUpdates = statsUpdates
        return statsUpdates
Ejemplo n.º 9
0
def contextual_attention(f,
                         b,
                         mask=None,
                         ksize=3,
                         stride=1,
                         rate=1,
                         fuse_k=3,
                         softmax_scale=10.,
                         training=True,
                         fuse=True):
    """ Contextual attention layer implementation.

    Contextual attention is first introduced in publication:
        Generative Image Inpainting with Contextual Attention, Yu et al.

    Args:
        x: Input feature to match (foreground).
        t: Input feature for match (background).
        mask: Input mask for t, indicating patches not available.
        ksize: Kernel size for contextual attention.
        stride: Stride for extracting patches from t.
        rate: Dilation for matching.
        softmax_scale: Scaled softmax for attention.
        training: Indicating if current graph is training or inference.

    Returns:
        tf.Tensor: output

    """
    # get shapes
    raw_fs = tf.shape(f)
    raw_int_fs = f.get_shape().as_list()  # B, H, W, C
    raw_int_bs = b.get_shape().as_list()  # B, H, W, C
    # extract patches from background with stride and rate
    kernel = 2 * rate  # 2d
    raw_w = tf.extract_image_patches(
        b,
        ksizes=[1, kernel, kernel, 1],
        strides=[1, rate * stride, rate * stride, 1],
        rates=[1, 1, 1, 1],
        padding='SAME')  # B, H//(d*s), W//(d*s), 2d*2d*C
    raw_w = tf.reshape(raw_w,
                       [raw_int_bs[0], -1, kernel, kernel, raw_int_bs[3]
                        ])  # B, H//(d*s) * W//(d*s), k, k, C
    raw_w = tf.transpose(
        raw_w,
        [0, 2, 3, 4, 1
         ])  # transpose to b*k*k*c*hw --> B, k, k, C, H//(d*s) * W//(d*s)
    # downscaling foreground option: downscaling both foreground and
    # background for matching and use original background for reconstruction.
    f = resize(f, scale=1. / rate,
               func=tf.image.resize_nearest_neighbor)  # B, H//d, W//d, C
    b = resize(b,
               to_shape=[int(raw_int_bs[1] / rate),
                         int(raw_int_bs[2] / rate)],
               func=tf.image.resize_nearest_neighbor
               )  # https://github.com/tensorflow/tensorflow/issues/11651
    # B, H//d, W//d, C
    if mask is not None:
        mask = resize(
            mask, scale=1. / rate,
            func=tf.image.resize_nearest_neighbor)  # B, H//d, W//d, C
    fs = tf.shape(f)
    int_fs = f.get_shape().as_list()  # B, H//d, W//d, C
    f_groups = tf.split(f, int_fs[0],
                        axis=0)  # list[Tensor(H//d, W//d, C)] len: B
    # from t(H*W*C) to w(b*k*k*c*h*w)
    bs = tf.shape(b)
    int_bs = b.get_shape().as_list()  # B, H//d, W//d, C
    w = tf.extract_image_patches(b, [1, ksize, ksize, 1],
                                 [1, stride, stride, 1], [1, 1, 1, 1],
                                 padding='SAME')  # B,  H//d, W//d, k * k * C
    w = tf.reshape(
        w, [int_fs[0], -1, ksize, ksize, int_fs[3]])  # B, H//d * W//d, k, k, C
    w = tf.transpose(w, [0, 2, 3, 4, 1])  # B, k, k, C, H//d * W//d
    # process mask
    if mask is None:
        mask = tf.zeros([1, bs[1], bs[2], 1])
    m = tf.extract_image_patches(mask, [1, ksize, ksize, 1],
                                 [1, stride, stride, 1], [1, 1, 1, 1],
                                 padding='SAME')
    m = tf.reshape(m, [1, -1, ksize, ksize, 1])
    m = tf.transpose(m, [0, 2, 3, 4, 1])  # 1, k, k, 1, H//d * W//d
    m = m[0]  # mask is shared in a batch --> k, k, 1, H//d * W//d
    mm = tf.cast(
        tf.equal(tf.reduce_mean(m, axis=[0, 1, 2], keep_dims=True), 0.),
        tf.float32)
    # (1, 1, 1, H//d * W//d)  --> if zero, this patch is all-blank
    w_groups = tf.split(w, int_bs[0],
                        axis=0)  # list[Tensor(1, k, k, C, H//d * W//d)] len: B
    raw_w_groups = tf.split(
        raw_w, int_bs[0],
        axis=0)  # list[Tensor(2d, 2d, C, H//d * W//d)] len: B
    y = []
    offsets = []
    k = fuse_k
    scale = softmax_scale
    fuse_weight = tf.reshape(tf.eye(k), [k, k, 1, 1])
    for xi, wi, raw_wi in zip(f_groups, w_groups, raw_w_groups):
        # conv for compare
        wi = wi[0]
        wi_normed = wi / tf.maximum(
            tf.sqrt(tf.reduce_sum(tf.square(wi), axis=[0, 1, 2])), 1e-4)
        yi = tf.nn.conv2d(xi, wi_normed, strides=[1, 1, 1, 1],
                          padding="SAME")  # 1, H//d, W//d, H//d * W//d
        # ???: xi is not normalized ???

        # conv implementation for fuse scores to encourage large patches
        if fuse:
            yi = tf.reshape(yi, [1, fs[1] * fs[2], bs[1] * bs[2], 1
                                 ])  # 1, H//d * W//d, H//d * W//d, 1
            yi = tf.nn.conv2d(yi,
                              fuse_weight,
                              strides=[1, 1, 1, 1],
                              padding='SAME')  # 1, H//d * W//d, H//d * W//d, 1
            yi = tf.reshape(
                yi,
                [1, fs[1], fs[2], bs[1], bs[2]])  # 1, H//d, W//d, H//d, W//d
            yi = tf.transpose(yi, [0, 2, 1, 4, 3])  # 1, W//d, H//d, W//d, H//d
            yi = tf.reshape(yi, [1, fs[1] * fs[2], bs[1] * bs[2], 1
                                 ])  # 1, W//d * H//d, W//d * H//d, 1
            yi = tf.nn.conv2d(yi,
                              fuse_weight,
                              strides=[1, 1, 1, 1],
                              padding='SAME')  # 1, W//d * H//d, W//d * H//d, 1
            yi = tf.reshape(yi, [1, fs[2], fs[1], bs[2], bs[1]
                                 ])  # 1, W//d * H//d, W//d * H//d, 1
            yi = tf.transpose(yi, [0, 2, 1, 4, 3])  # 1, H//d, W//d, H//d, W//d
        yi = tf.reshape(
            yi, [1, fs[1], fs[2], bs[1] * bs[2]])  # 1, H//d, W//d, H//d * W//d

        # softmax to match
        yi *= mm  # mask
        yi = tf.nn.softmax(yi * scale, 3)
        yi *= mm  # mask

        offset = tf.argmax(yi, axis=3, output_type=tf.int32)
        offset = tf.stack([offset // fs[2], offset % fs[2]],
                          axis=-1)  # this is the most correlated idx
        # deconv for patch pasting
        # 3.1 paste center
        wi_center = raw_wi[0]  # 2*d, 2*d, C, H//d * W//d
        yi = tf.nn.conv2d_transpose(yi,
                                    wi_center,
                                    tf.concat([[1], raw_fs[1:]], axis=0),
                                    strides=[1, rate, rate, 1]) / 4.
        y.append(yi)
        offsets.append(offset)
    y = tf.concat(y, axis=0)
    y.set_shape(raw_int_fs)
    offsets = tf.concat(offsets, axis=0)
    offsets.set_shape(int_bs[:3] + [2])
    # case1: visualize optical flow: minus current position
    h_add = tf.tile(tf.reshape(tf.range(bs[1]), [1, bs[1], 1, 1]),
                    [bs[0], 1, bs[2], 1])
    w_add = tf.tile(tf.reshape(tf.range(bs[2]), [1, 1, bs[2], 1]),
                    [bs[0], bs[1], 1, 1])
    offsets = offsets - tf.concat([h_add, w_add], axis=3)
    # to flow image
    flow = flow_to_image_tf(offsets)
    # # case2: visualize which pixels are attended
    # flow = highlight_flow_tf(offsets * tf.cast(mask, tf.int32))
    if rate != 1:
        flow = resize(flow, scale=rate, func=tf.image.resize_bilinear)
    return y, flow
Ejemplo n.º 10
0
# -*- coding=utf-8 -*-
import tensorflow as tf


def showParametersInCkpt(ckpt_dir):
    '''
    显示出ckpt文件中的参数名称
    :param ckpt_dir:
    :return:
    '''
    from tensorflow.python.tools.inspect_checkpoint import print_tensors_in_checkpoint_file
    latest_ckp = tf.train.latest_checkpoint(ckpt_dir)
    # print_tensors_in_checkpoint_file(latest_ckp, all_tensors=True, tensor_name='', all_tensor_names=True)
    from tensorflow.contrib.framework.python.framework import checkpoint_utils
    var_list = checkpoint_utils.list_variables(latest_ckp)
    for v in var_list:
        print(v)


if __name__ == '__main__':
    # ckpt_dir = '/home/give/PycharmProjects/weakly_label_segmentation/logs/1s_weakly_label-transpose'
    # showParametersInCkpt(ckpt_dir)

    tensor = tf.random_normal([4, 256, 256, 128])
    res = tf.extract_image_patches(tensor, [1, 5, 5, 1], [1, 1, 1, 1],
                                   [1, 1, 1, 1], 'VALID')
    print res
Ejemplo n.º 11
0
def extract_patches(x, ksizes, strides, rates):
    return tf.extract_image_patches(
        x,
        ksizes, strides, rates,
        padding="VALID"
    )
Ejemplo n.º 12
0
def attention_transfer(f,
                       b1,
                       b2,
                       ksize=3,
                       stride=1,
                       fuse_k=3,
                       softmax_scale=50.,
                       fuse=False):
    # extract patches from background feature maps with rate (1st scale)
    bs1 = tf.shape(b1)
    int_bs1 = b1.get_shape().as_list()
    w_b1 = tf.extract_image_patches(b1, [1, 4, 4, 1], [1, 4, 4, 1],
                                    [1, 1, 1, 1],
                                    padding='SAME')
    w_b1 = tf.reshape(w_b1, [int_bs1[0], -1, 4, 4, int_bs1[3]])
    w_b1 = tf.transpose(w_b1, [0, 2, 3, 4, 1])  # transpose to b*k*k*c*hw
    # extract patches from background feature maps with rate (2nd scale)
    bs2 = tf.shape(b2)
    int_bs2 = b2.get_shape().as_list()
    w_b2 = tf.extract_image_patches(b2, [1, 2, 2, 1], [1, 2, 2, 1],
                                    [1, 1, 1, 1],
                                    padding='SAME')
    w_b2 = tf.reshape(w_b2, [int_bs2[0], -1, 2, 2, int_bs2[3]])
    w_b2 = tf.transpose(w_b2, [0, 2, 3, 4, 1])  # transpose to b*k*k*c*hw
    # use structure feature maps as foreground for matching and use background feature maps for reconstruction.
    fs = tf.shape(f)
    int_fs = f.get_shape().as_list()
    f_groups = tf.split(f, int_fs[0], axis=0)
    w_f = tf.extract_image_patches(f, [1, ksize, ksize, 1],
                                   [1, stride, stride, 1], [1, 1, 1, 1],
                                   padding='SAME')
    w_f = tf.reshape(w_f, [int_fs[0], -1, ksize, ksize, int_fs[3]])
    w_f = tf.transpose(w_f, [0, 2, 3, 4, 1])  # transpose to b*k*k*c*hw

    w_f_groups = tf.split(w_f, int_fs[0], axis=0)
    w_b1_groups = tf.split(w_b1, int_bs1[0], axis=0)
    w_b2_groups = tf.split(w_b2, int_bs2[0], axis=0)
    y1 = []
    y2 = []
    k = fuse_k
    scale = softmax_scale
    fuse_weight = tf.reshape(tf.eye(k), [k, k, 1, 1])
    for xi, wi, raw1_wi, raw2_wi in zip(f_groups, w_f_groups, w_b1_groups,
                                        w_b2_groups):
        # conv for compare
        wi = wi[0]  #(k,k,c,hw)
        onesi = tf.ones_like(wi)
        xxi = tf.nn.conv2d(tf.square(xi),
                           onesi,
                           strides=[1, 1, 1, 1],
                           padding="SAME")  #(1,h,w,hw)
        wwi = tf.reduce_sum(tf.square(wi), axis=[0, 1, 2],
                            keep_dims=True)  #(1,1,1,hw)
        xwi = tf.nn.conv2d(xi, wi, strides=[1, 1, 1, 1],
                           padding="SAME")  #(1,h,w,hw)
        di = xxi + wwi - 2 * xwi
        di_mean, di_var = tf.nn.moments(di, 3, keep_dims=True)
        di_std = di_var**0.5
        yi = -1 * tf.nn.tanh((di - di_mean) / di_std)

        # conv implementation for fuse scores to encourage large patches
        if fuse:
            yi = tf.reshape(yi, [1, fs[1] * fs[2], fs[1] * fs[2], 1])
            yi = tf.nn.conv2d(yi,
                              fuse_weight,
                              strides=[1, 1, 1, 1],
                              padding='SAME')
            yi = tf.reshape(yi, [1, fs[1], fs[2], fs[1], fs[2]])
            yi = tf.transpose(yi, [0, 2, 1, 4, 3])
            yi = tf.reshape(yi, [1, fs[1] * fs[2], fs[1] * fs[2], 1])
            yi = tf.nn.conv2d(yi,
                              fuse_weight,
                              strides=[1, 1, 1, 1],
                              padding='SAME')
            yi = tf.reshape(yi, [1, fs[2], fs[1], fs[2], fs[1]])
            yi = tf.transpose(yi, [0, 2, 1, 4, 3])
        yi = tf.reshape(yi, [1, fs[1], fs[2], fs[1] * fs[2]])

        # softmax to match
        yi = tf.nn.softmax(yi * scale, 3)

        wi_center1 = raw1_wi[0]
        wi_center2 = raw2_wi[0]
        y1.append(
            tf.nn.conv2d_transpose(yi,
                                   wi_center1,
                                   tf.concat([[1], bs1[1:]], axis=0),
                                   strides=[1, 4, 4, 1]))
        y2.append(
            tf.nn.conv2d_transpose(yi,
                                   wi_center2,
                                   tf.concat([[1], bs2[1:]], axis=0),
                                   strides=[1, 2, 2, 1]))

    y1 = tf.concat(y1, axis=0)
    y2 = tf.concat(y2, axis=0)

    return y1, y2
def _convert_dataset(split_name,
                     filenames,
                     class_names_to_ids,
                     dataset_dir,
                     ksize_rows=299,
                     ksize_cols=299,
                     strides_rows=128,
                     strides_cols=128):
    """Converts the given filenames to a TFRecord dataset.

  Args:
    split_name: The name of the dataset, either 'train' or 'validation'.
    filenames: A list of absolute paths to png or jpg images.
    class_names_to_ids: A dictionary from class names (strings) to ids
      (integers).
    dataset_dir: The directory where the converted datasets are stored.
    ksize_rows, ksize_cols: the height and width of extracted image patches
    strides_rows, strides_cols: the distance between the centers of two consecutive patches
  """
    assert split_name in ['train', 'validation']

    num_per_shard = int(math.ceil(len(filenames) / float(_NUM_SHARDS)))
    number_of_images = 0

    # The size of sliding window
    ksizes = [1, ksize_rows, ksize_cols, 1]

    # How far the centers of 2 consecutive patches are in the image
    strides = [1, strides_rows, strides_cols, 1]

    rates = [1, 1, 1, 1]  # sample pixel consecutively

    padding = 'VALID'  # or 'SAME'

    total_image_number = len(filenames)
    d = dict.fromkeys(class_names_to_ids.values())

    with tf.Graph().as_default():

        with tf.Session('') as sess:

            for shard_id in range(_NUM_SHARDS):
                output_filename = _get_dataset_filename(
                    dataset_dir, split_name, shard_id)

                with tf.python_io.TFRecordWriter(
                        output_filename) as tfrecord_writer:
                    start_ndx = shard_id * num_per_shard
                    end_ndx = min((shard_id + 1) * num_per_shard,
                                  len(filenames))
                    for i in range(start_ndx, end_ndx):

                        try:
                            # Read the filename:
                            image_data = tf.gfile.FastGFile(filenames[i],
                                                            'r').read()
                        except Exception as e:
                            sys.stderr.write(
                                "Error in decoding image {} into tensor - {}.".
                                format(filenames[i], str(e)))
                            continue

                        image = tf.image.decode_image(image_data, channels=3)
                        image = tf.expand_dims(image, 0)

                        image_patches = tf.extract_image_patches(
                            image, ksizes, strides, rates, padding)
                        image_patch_shape = sess.run(tf.shape(image_patches))
                        nrows, ncols = image_patch_shape[1], image_patch_shape[
                            2]
                        #print('{},{}'.format(nrows,ncols), file=sys.stderr)

                        class_name = os.path.basename(
                            os.path.dirname(filenames[i]))
                        class_id = class_names_to_ids[class_name]

                        for nr in range(nrows):
                            for nc in range(ncols):
                                patch_image = tf.reshape(
                                    image_patches[0, nr, nc, ],
                                    [ksize_rows, ksize_cols, 3])
                                height, width = ksize_rows, ksize_cols

                                k = 0

                                # original image patch
                                patch_image_data = tf.image.encode_jpeg(
                                    patch_image)
                                example = dataset_utils.image_to_tfexample(
                                    sess.run(patch_image_data), 'jpg', height,
                                    width, class_id)
                                tfrecord_writer.write(
                                    example.SerializeToString())
                                k += 1

                                flipped_image = tf.image.flip_left_right(
                                    patch_image)
                                flipped_image_data = tf.image.encode_jpeg(
                                    flipped_image)
                                example = dataset_utils.image_to_tfexample(
                                    sess.run(flipped_image_data), 'jpg',
                                    height, width, class_id)
                                tfrecord_writer.write(
                                    example.SerializeToString())
                                k += 1

                                k += _rotate_flip(sess, patch_image,
                                                  tfrecord_writer, height,
                                                  width, class_id)
                                k += _image_random_X(sess, patch_image,
                                                     tfrecord_writer, height,
                                                     width, class_id)

                                number_of_images += k
                                d[class_id] += k

                                sys.stdout.write(
                                    '\r>> Converting image {}/{} shard {}, patch {}/{},{}/{}, total patch: {}.'
                                    .format(i + 1, total_image_number,
                                            shard_id, nr, nrows, nc, ncols,
                                            number_of_images))
                                sys.stdout.flush()

                        #sys.stdout.write('\r>> Converting image %d/%d shard %d' % (i+1, len(filenames), shard_id))
                        #sys.stdout.flush()

    sys.stdout.write('\n')
    sys.stdout.flush()

    with open('{}/class_id_number.txt'.format(dataset_dir), 'w') as f:
        for k, v in d.iteritems():
            print('{}:{}'.format(k, v), file=f)

    with open('{}/{}_number_of_images.txt'.format(dataset_dir, split_name),
              'w') as f:
        f.write(str(number_of_images))
def contextual_attention(f, b, mask=None, ksize=3, stride=1, rate=1,
                         fuse_k=3, softmax_scale=10., training=True, fuse=True):
    """ Contextual attention layer implementation.

    Contextual attention is first introduced in publication:
        Generative Image Inpainting with Contextual Attention, Yu et al.

    Args:
        x: Input feature to match (foreground).
        t: Input feature for match (background).
        mask: Input mask for t, indicating patches not available.
        ksize: Kernel size for contextual attention.
        stride: Stride for extracting patches from t.
        rate: Dilation for matching.
        softmax_scale: Scaled softmax for attention.
        training: Indicating if current graph is training or inference.

    Returns:
        tf.Tensor: output

    """
    # get shapes
    raw_fs = tf.shape(f)
    print("raw_fs",raw_fs.shape)
    
    raw_int_fs = f.get_shape().as_list()
    print("raw_int_fs",raw_int_fs)
    #foreground shape
    raw_int_bs = b.get_shape().as_list()
    print("raw_int_bs",raw_int_bs)
    #background shape
    '''
    raw_fs (4,)
    raw_int_fs [2, 64, 64, 128]
    raw_int_bs [2, 64, 64, 128]

    '''
    # extract patches from background with stride and rate
    kernel = 2*rate
    raw_w = tf.extract_image_patches(
        b, [1,kernel,kernel,1], [1,rate*stride,rate*stride,1], [1,1,1,1], padding='SAME')
    print("raw_w or extracted patches",raw_w)


    raw_w = tf.reshape(raw_w, [raw_int_bs[0], -1, kernel, kernel, raw_int_bs[3]])
    raw_w = tf.transpose(raw_w, [0, 2, 3, 4, 1])
    #####background patches. These patches have to be the kernels
    print("transposed raw_w or extracted patches",raw_w.shape)
    # transpose to b*k*k*c*hw


    # downscaling foreground option: downscaling both foreground and
    # background for matching and use original background for reconstruction.

    ######foreground and back ground need to be downscaled because?
    ### both are downscaled in same shape

    f = resize(f, scale=1./rate, func=tf.image.resize_nearest_neighbor)
    print("rdownscaled f",f.shape)
    b = resize(b, to_shape=[int(raw_int_bs[1]/rate), int(raw_int_bs[2]/rate)], func=tf.image.resize_nearest_neighbor)  # https://github.com/tensorflow/tensorflow/issues/11651
    print("rdownscaled b",b.shape)
    '''
    rdownscaled f (2, 32, 32, 128)
    rdownscaled b (2, 32, 32, 128)
    '''

    #######
    if mask is not None:
        mask = resize(mask, scale=1./rate, func=tf.image.resize_nearest_neighbor)
    print("again resized mask",mask.shape)
    #again resized mask (1, 32, 32, 1) 

    #########after downscaling

    fs = tf.shape(f)
    print("fs:",fs)
    #int_fs: [2, 32, 32, 128]
    int_fs = f.get_shape().as_list()
    print("int_fs:",int_fs)
    f_groups = tf.split(f, int_fs[0], axis=0)
    print("splitted f_groups:",f_groups)

    #tf.split(X, row = n, column = m) is used to split the data set of the variable into n number of pieces row wise and m numbers of pieces column wise.

    #For example, we have data_set x of size (10,10), then tf.split(x, 2, 0) will break the data_set of x in 2 set of size (5, 10)

    #but if we take tf.split(x, 2, 2), then we will get 4 sets of data of size (5, 5).
    # from t(H*W*C) to w(b*k*k*c*h*w)
    bs = tf.shape(b)
    print("bs:",bs)
    int_bs = b.get_shape().as_list()
    print("int_bs:",int_bs)

    w = tf.extract_image_patches(
        b, [1,ksize,ksize,1], [1,stride,stride,1], [1,1,1,1], padding='SAME')

    '''
    w or extracted patches (2, 32, 32, 1152)
    transposed w or extracted patches (2, 3, 3, 128, 1024)
    extracted mask patch shape (1, 32, 32, 9)
    transposed extracted mask patch shape (3, 3, 1, 1024)
    temporary (1, 1, 1, 1024)
    mm shape (1, 1, 1, 1024)
    splitted w_groups: [<tf.Tensor 'inpaint_net/split_1:0' shape=(1, 3, 3, 128, 1024) dtype=float32>, <tf.Tensor 'inpaint_net/split_1:1' shape=(1, 3, 3, 128, 1024) dtype=float32>]
    splitted raw_w_groups: [<tf.Tensor 'inpaint_net/split_2:0' shape=(1, 4, 4, 128, 1024) dtype=float32>, <tf.Tensor 'inpaint_net/split_2:1' shape=(1, 4, 4, 128, 1024) dtype=float32>]
    yi shape (1, 32, 32, 1024)
    yi shape after multiplying mm Tensor("inpaint_net/mul_8:0", shape=(1, 32, 32, 1024), dtype=float32)
    yi after softmax shape (1, 32, 32, 1024)
    yi shape (1, 32, 32, 1024)
    yi shape after multiplying mm Tensor("inpaint_net/mul_16:0", shape=(1, 32, 32, 1024), dtype=float32)
    yi after softmax shape (1, 32, 32, 1024)
    x_hallu [Dimension(2), Dimension(256), Dimension(256), Dimension(3)]
    '''
    print("w or extracted patches",w.shape)
    w = tf.reshape(w, [int_fs[0], -1, ksize, ksize, int_fs[3]])
    w = tf.transpose(w, [0, 2, 3, 4, 1])  # transpose to b*k*k*c*hw
    print("transposed w or extracted patches",w.shape)
    ###again
    # process mask
    if mask is None:
        mask = tf.zeros([1, bs[1], bs[2], 1])


    m = tf.extract_image_patches(
        mask, [1,ksize,ksize,1], [1,stride,stride,1], [1,1,1,1], padding='SAME')
    print("extracted mask patch shape",m.shape)
    m = tf.reshape(m, [1, -1, ksize, ksize, 1])
    m = tf.transpose(m, [0, 2, 3, 4, 1])  # transpose to b*k*k*c*hw
    m = m[0]
    print("transposed extracted mask patch shape",m.shape)
    temporary=tf.equal(tf.reduce_mean(m, axis=[0,1,2], keep_dims=True), 0.)
    print('temporary',temporary.shape)
    mm = tf.cast(temporary, tf.float32)
    print("mm shape",mm.shape)
    w_groups = tf.split(w, int_bs[0], axis=0)
    raw_w_groups = tf.split(raw_w, int_bs[0], axis=0)
    print("splitted w_groups:",w_groups)
    print("splitted raw_w_groups:",raw_w_groups)
    y = []
    offsets = []
    k = fuse_k
    scale = softmax_scale
    fuse_weight = tf.reshape(tf.eye(k), [k, k, 1, 1])
    for xi, wi, raw_wi in zip(f_groups, w_groups, raw_w_groups):
        # conv for compare
        
        wi = wi[0]
        wi_normed = wi / tf.maximum(tf.sqrt(tf.reduce_sum(tf.square(wi), axis=[0,1,2])), 1e-4)
        ## normalize each background patch
        yi = tf.nn.conv2d(xi, wi_normed, strides=[1,1,1,1], padding="SAME")

        # conv implementation for fuse scores to encourage large patches
        if fuse:
            yi = tf.reshape(yi, [1, fs[1]*fs[2], bs[1]*bs[2], 1])
            yi = tf.nn.conv2d(yi, fuse_weight, strides=[1,1,1,1], padding='SAME')
            yi = tf.reshape(yi, [1, fs[1], fs[2], bs[1], bs[2]])
            yi = tf.transpose(yi, [0, 2, 1, 4, 3])
            yi = tf.reshape(yi, [1, fs[1]*fs[2], bs[1]*bs[2], 1])
            yi = tf.nn.conv2d(yi, fuse_weight, strides=[1,1,1,1], padding='SAME')
            yi = tf.reshape(yi, [1, fs[2], fs[1], bs[2], bs[1]])
            yi = tf.transpose(yi, [0, 2, 1, 4, 3])
        yi = tf.reshape(yi, [1, fs[1], fs[2], bs[1]*bs[2]])
        #pr=tf.Print(yi,[yi],"Jisa says:")
        print("yi shape",yi.shape)
        # softmax to match
        yi *=  mm  # mask
        print("yi shape after multiplying mm",yi)
        yi = tf.nn.softmax(yi*scale, 3)
        #pr=tf.Print(yi,[yi],"Jisa says:")
        yi *=  mm  # mask
        print("yi after softmax shape",yi.shape)
        offset = tf.argmax(yi, axis=3, output_type=tf.int32)
        offset = tf.stack([offset // fs[2], offset % fs[2]], axis=-1)
        # deconv for patch pasting
        # 3.1 paste center
        wi_center = raw_wi[0]
        yi = tf.nn.conv2d_transpose(yi, wi_center, tf.concat([[1], raw_fs[1:]], axis=0), strides=[1,rate,rate,1]) / 4.
        y.append(yi)
        offsets.append(offset)
    y = tf.concat(y, axis=0)
    y.set_shape(raw_int_fs)
    offsets = tf.concat(offsets, axis=0)
    offsets.set_shape(int_bs[:3] + [2])
    # case1: visualize optical flow: minus current position
    h_add = tf.tile(tf.reshape(tf.range(bs[1]), [1, bs[1], 1, 1]), [bs[0], 1, bs[2], 1])
    w_add = tf.tile(tf.reshape(tf.range(bs[2]), [1, 1, bs[2], 1]), [bs[0], bs[1], 1, 1])
    offsets = offsets - tf.concat([h_add, w_add], axis=3)
    # to flow image
    flow = flow_to_image_tf(offsets)
    # # case2: visualize which pixels are attended
    # flow = highlight_flow_tf(offsets * tf.cast(mask, tf.int32))
    if rate != 1:
        flow = resize(flow, scale=rate, func=tf.image.resize_nearest_neighbor)
    return y, flow
Ejemplo n.º 15
0
def image_to_patches(image, scale=1):
    patch_height = 108 / scale
    patch_width = 108 / scale
    patch_overlap = 12 / scale
    patches = tf.extract_image_patches(image, [1, patch_height, patch_width, 1], [1, patch_height - 2 * patch_overlap, patch_width - 2 * patch_overlap, 1], [1, 1, 1, 1], padding='VALID')
    return tf.reshape(patches, [tf.shape(patches)[0] * tf.shape(patches)[1] * tf.shape(patches)[2], patch_height, patch_width, 3])
Ejemplo n.º 16
0
net, ind = conv2d(net, 256, 512, '_conv9', ind, tiny)
net, ind = conv2d(net, 512, 256, '_conv10', ind, tiny, size=1)
net, ind = conv2d(net, 256, 512, '_conv11', ind, tiny)
net, ind = conv2d(net, 512, 256, '_conv12', ind, tiny, size=1)
net, ind = conv2d(net, 256, 512, '_conv13', ind, tiny)
shortcut = net
net = max_pool(net)
net, ind = conv2d(net, 512, 1024, '_conv14', ind, tiny)
net, ind = conv2d(net, 1024, 512, '_conv15', ind, tiny, size=1)
net, ind = conv2d(net, 512, 1024, '_conv16', ind, tiny)
net, ind = conv2d(net, 1024, 512, '_conv17', ind, tiny, size=1)
net, ind = conv2d(net, 512, 1024, '_conv18', ind, tiny)
net, ind = conv2d(net, 1024, 1024, '_conv19', ind, tiny)
net, ind = conv2d(net, 1024, 1024, '_conv20', ind, tiny)
shortcut, ind = conv2d(shortcut, 512, 64, '_shortcut', ind, tiny, size=1)
shortcut = tf.extract_image_patches(shortcut, [1, 2, 2, 1], [1, 2, 2, 1],
                                    [1, 1, 1, 1], 'VALID')

net = tf.concat([shortcut, net], axis=-1)
net, ind = conv2d(net, 1280, 1024, '_conv21', ind, tiny)
out, ind = conv2d(net,
                  1024,
                  425,
                  '_conv22',
                  ind,
                  tiny,
                  size=1,
                  batchnorm=False)

sess = tf.Session()
sess.run(tf.global_variables_initializer())
Ejemplo n.º 17
0
 def extract_patches(self, image):
     patches = tf.extract_image_patches(image, [1, self.psz, self.psz, 1],
                                        [1, self.stride, self.stride, 1],
                                        [1, 1, 1, 1], 'VALID')
     return patches
Ejemplo n.º 18
0
    def __init__(self, init_image_batch, hyperparams, info):
        self._image_batch = init_image_batch
        # tf.summary.image('input', self._image_batch, max_outputs=5)

        k1 = hyperparams['k1']
        k2 = hyperparams['k2']
        l1 = hyperparams['l1']

        with tf.name_scope("extract_patches1"):
            self.patches1 = tf.extract_image_patches(self._image_batch,
                                                     [1, k1, k2, 1],
                                                     [1, k1, k2, 1],
                                                     [1, 1, 1, 1],
                                                     padding='SAME',
                                                     name='patches')
            self.patches1 = tf.reshape(self.patches1,
                                       [-1, k1 * k2 * info.N_CHANNELS],
                                       name='patches_shaped')
            self.numofpatches = self.patches1.get_shape()[0].value
            # TODO: figure out how to unvectorize for multi-channel images
            # self.patches1 = tf.reshape(self.patches1, [-1, info.N_CHANNELS,  k1 * k2], name='patches_shaped')
            # self.patches1 = tf.transpose(self.patches1, [0, 2, 1])
            # self.zero_mean_patches1 = self.patches1 - tf.reduce_mean(self.patches1, axis=1, keep_dims=True, name='patch_means')
            self.zero_mean_patches1 = self.patches1
            x1 = tf.transpose(self.zero_mean_patches1, [1, 0])
            x1_trans = self.zero_mean_patches1
            self.patches_covariance1 = tf.matmul(x1,
                                                 x1_trans,
                                                 name='patch_covariance')

        with tf.name_scope("eignvalue_decomposition1"):
            self.x_eig_vals1, self.x_eig1 = tf.self_adjoint_eig(
                self.patches_covariance1, name='x_eig')

            self.top_x_eig1_ori = tf.reverse(self.x_eig1, axis=[-1])[:, 0:l1]
            #     self.top_x_eig1 = tf.transpose(tf.reshape(self.top_x_eig1_ori, [info.N_CHANNELS, k1, k2, l1]), [2, 1, 0, 3])
            #     self.top_x_eig2 = tf.transpose(image.rotate(tf.reshape(self.top_x_eig1_ori, [info.N_CHANNELS, k1, k2, l1]), np.pi), [2, 1, 3, 0])
            #
            self.top_x_eig_vals1 = tf.expand_dims(tf.reverse(self.x_eig_vals1,
                                                             axis=[-1])[0:l1],
                                                  axis=1)
        #     # self.top_x_eig_vals1 = self.x_eig_vals1[0:l1]
        #     self.filt1_viz = tf.transpose(self.top_x_eig1, [3, 0, 1, 2])
        #     tf.summary.image('filt1', self.filt1_viz, max_outputs=l1)
        #
        # with tf.name_scope("convolution1"):
        #     self.conv1 = tf.nn.conv2d(self._image_batch, self.top_x_eig1, [1, 1, 1, 1], padding='SAME')
        #
        #     self.conv1 = tf.transpose(self.conv1, [3, 0, 1, 2])
        #     # conv1 is now (l1, batch_size, img_w, img_h)
        #     self.conv1_batch = tf.expand_dims(tf.reshape(self.conv1, [-1, info.IMAGE_W, info.IMAGE_H]), axis=3)
        #     # conv1 batch is (l1 * batch_size, img_w, img_h)
        #
        #     tf.summary.image('conv1', self.conv1_batch, max_outputs=l1)
        #
        # with tf.name_scope("normalization_of_convolution"):
        #     self.conv1_flatten = tf.reshape(self.conv1, [l1, info.batch_size * info.IMAGE_W * info.IMAGE_H])
        #     self.eigen_vals = tf.tile(self.top_x_eig_vals1, [1, info.batch_size * info.IMAGE_W * info.IMAGE_H])
        #     self.conv1_div_vals = tf.divide(self.conv1_flatten, tf.sqrt(tf.sqrt(self.eigen_vals)))
        #     self.conv1_output = tf.transpose(tf.reshape(self.conv1_div_vals, [l1, info.batch_size, info.IMAGE_W, info.IMAGE_H]), [1, 2, 3, 0])
        #     self.outputs = tf.nn.conv2d(self.conv1_output, self.top_x_eig2, [1, 1, 1, 1], padding='SAME')
        #     # self.outputs = self.conv1_flatten

        # We proved that mse_loss = Sum(eigen_vals), thus we do not need any convolutions ops. Modified at 5.24 14:38
        with tf.name_scope('MSE_Scaling_Op'):
            self.eigen_val_sum = tf.reduce_sum(self.top_x_eig_vals1)
            self.outputs = tf.sqrt(1.0 / self.numofpatches *
                                   self.top_x_eig_vals1)

        with tf.name_scope('Decomposition_maps'):
            self.decompatches = []
            for i in range(k1 * k2):
                self.decompatches.append(
                    tf.matmul(
                        tf.matmul(
                            self.zero_mean_patches1,
                            tf.expand_dims(self.top_x_eig1_ori[:, i], axis=1)),
                        tf.expand_dims(self.top_x_eig1_ori[:, i], axis=0)))
Ejemplo n.º 19
0
def style_swap(content, style, patch_size, stride):
    '''Efficiently swap content feature patches with nearest-neighbor style patches
       Original paper: https://arxiv.org/abs/1612.04337
       Adapted from: https://github.com/rtqichen/style-swap/blob/master/lib/NonparametricPatchAutoencoderFactory.lua
    '''
    nC = tf.shape(style)[
        -1]  # Num channels of input content feature and style-swapped output

    # Extract patches from style image that will be used for conv/deconv layers
    style_patches = tf.extract_image_patches(style,
                                             [1, patch_size, patch_size, 1],
                                             [1, stride, stride, 1],
                                             [1, 1, 1, 1], 'VALID')
    before_reshape = tf.shape(
        style_patches)  # NxRowsxColsxPatch_size*Patch_size*nC
    style_patches = tf.reshape(
        style_patches,
        [before_reshape[1] * before_reshape[2], patch_size, patch_size, nC])
    style_patches = tf.transpose(
        style_patches, [1, 2, 3, 0])  # Patch_sizexPatch_sizexIn_CxOut_c

    # Normalize each style patch
    style_patches_norm = tf.nn.l2_normalize(style_patches, dim=3)

    # Compute cross-correlation/nearest neighbors of patches by using style patches as conv filters
    ss_enc = tf.nn.conv2d(content, style_patches_norm, [1, stride, stride, 1],
                          'VALID')

    # For each spatial position find index of max along channel/patch dim
    ss_argmax = tf.argmax(ss_enc, axis=3)
    encC = tf.shape(ss_enc)[
        -1]  # Num channels in intermediate conv output, same as # of patches

    # One-hot encode argmax with same size as ss_enc, with 1's in max channel idx for each spatial pos
    ss_oh = tf.one_hot(ss_argmax, encC, 1., 0., 3)

    # Calc size of transposed conv out
    deconv_out_H = utils.deconv_output_length(
        tf.shape(ss_oh)[1], patch_size, 'valid', stride)
    deconv_out_W = utils.deconv_output_length(
        tf.shape(ss_oh)[2], patch_size, 'valid', stride)
    deconv_out_shape = tf.stack([1, deconv_out_H, deconv_out_W, nC])

    # Deconv back to original content size with highest matching (unnormalized) style patch swapped in
    # for each content patch
    ss_dec = tf.nn.conv2d_transpose(ss_oh, style_patches, deconv_out_shape,
                                    [1, stride, stride, 1], 'VALID')

    # Interpolate to average overlapping patch locations
    ss_oh_sum = tf.reduce_sum(ss_oh, axis=3, keep_dims=True)

    filter_ones = tf.ones([patch_size, patch_size, 1, 1], dtype=tf.float32)

    deconv_out_shape = tf.stack(
        [1, deconv_out_H, deconv_out_W,
         1])  # Same spatial size as ss_dec with 1 channel

    counting = tf.nn.conv2d_transpose(ss_oh_sum, filter_ones, deconv_out_shape,
                                      [1, stride, stride, 1], 'VALID')

    counting = tf.tile(
        counting,
        [1, 1, 1, nC])  # Repeat along channel dim to make same size as ss_dec

    interpolated_dec = tf.divide(ss_dec, counting)

    return interpolated_dec
Ejemplo n.º 20
0
    def run_style_transfer(self,
                           content_path,
                           style_path,
                           content_map_path="",
                           style_map_path="",
                           num_iterations=100,
                           content_weight=1e4,
                           style_weight=1e-2,
                           trans_weight=0):
        # We don't need to (or want to) train any layers of our model, so we set their
        # trainable to false.
        model = self.get_model()
        for layer in model.layers:
            layer.trainable = False

        # Get the style and content feature representations (from our specified intermediate layers)
        style_features, content_features = self.get_feature_representations(
            model, content_path, style_path)
        style_map_features, _ = self.get_feature_representations(
            model, content_map_path, style_map_path)
        content_map_features, _ = self.get_feature_representations(
            model, style_map_path, content_map_path)

        size = 5
        stride = 4
        base_style_patches = []
        i = 0
        style_img = load_img(style_path)

        for style_feat_img, style_map_img in zip(style_features,
                                                 style_map_features):
            print(style_feat_img.shape)
            print(style_map_img.shape)
            style_feat_img = tf.concat([style_feat_img, style_map_img], -1)
            print(style_feat_img.shape)

            li = tf.squeeze(
                tf.extract_image_patches(tf.expand_dims(style_feat_img,
                                                        axis=0),
                                         ksizes=[1, size, size, 1],
                                         strides=[1, stride, stride, 1],
                                         rates=[1, 1, 1, 1],
                                         padding='VALID'), 0)
            li = tf.reshape(
                li, [((style_feat_img.shape[0] - size) // stride + 1) *
                     ((style_feat_img.shape[1] - size) // stride + 1), -1])
            # li = tf.reshape(li, [(style_feat_img.shape[0] - 2) * (style_feat_img.shape[1] - 2), -1])
            base_style_patches.append(li)

            # print( i,len( base_style_patches[i] ), base_style_patches[i][0] )
            i += 1
        # print(len(base_style_patches))

        # Set initial image
        init_image = load_noise_img(load_and_process_img(content_path))
        init_image = load_and_process_img(content_path)

        init_image = tfe.Variable(init_image, dtype=tf.float32)
        # Create our optimizer
        opt = tf.train.AdamOptimizer(learning_rate=50,
                                     beta1=0.99,
                                     epsilon=1e-1)

        # For displaying intermediate images
        iter_count = 1

        # Store our best result
        best_loss, best_img = float('inf'), None

        # Create a nice config
        loss_weights = (style_weight, content_weight, trans_weight)
        cfg = {
            'model': model,
            'loss_weights': loss_weights,
            'init_image': init_image,
            'base_style_patches': base_style_patches,
            'content_features': content_features,
            'content_map_features': content_map_features
        }

        # For displaying
        num_rows = 2
        num_cols = 10
        display_interval = num_iterations / (num_rows * num_cols)
        start_time = time.time()
        global_start = time.time()

        norm_means = np.array([103.939, 116.779, 123.68])
        min_vals = -norm_means
        max_vals = 255 - norm_means

        imgs = []
        for i in range(num_iterations):
            print("himmat rakho")
            grads, all_loss = self.compute_grads(cfg)
            print("gradient aega")
            loss, style_score, content_score, trans_score = all_loss
            opt.apply_gradients([(grads, init_image)])

            print("gradient agya")
            clipped = tf.clip_by_value(init_image, min_vals, max_vals)
            # print("II 1",init_image)
            init_image.assign(clipped)
            # print("II 2",cfg['init_image'])
            end_time = time.time()

            if loss < best_loss:
                # Update best loss and best image from total loss.
                best_loss = loss
                best_img = deprocess_img(init_image.numpy())

            if i % 1 == 0:
                start_time = time.time()

                # Use the .numpy() method to get the concrete numpy array
                plot_img = init_image.numpy()
                plot_img = deprocess_img(plot_img)

                if i % display_interval == 0:
                    imgs.append(plot_img)

                print('Iteration: {}'.format(i))
                print('Total loss: {:.4e}, '
                      'style loss: {:.4e}, '
                      'content loss: {:.4e}, '
                      'trans loss: {:.4e}, '
                      'time: {:.4f}s'.format(loss, style_score, content_score,
                                             trans_score,
                                             time.time() - start_time))

        print('Total time: {:.4f}s'.format(time.time() - global_start))
        plt.figure(figsize=(14, 4))
        for i, img in enumerate(imgs):
            plt.subplot(num_rows, num_cols, i + 1)
            plt.imshow(img)
            plt.xticks([])
            plt.yticks([])

        plt.savefig(self.results + content_path + '_inter.jpg')

        return best_img, best_loss
Ejemplo n.º 21
0
def gmm_unsupervised_init(sim_op, templates_var, weights_var):
    """Initialize a similarity layer using gmm unsupervised learning

    Initializes the templates and weights using gmm
    The function returns two ops. The first is used to initialize the learning and the second should be run iteratively
     with all the data.

    Parameters
    ----------
    sim_op : tf.Operation | tf.Tensor
        the similarity operation (or the tensor which is the output of the similarity)
    templates_var : tf.Variable
        the templates variable for this similarity layer
    weights_var : tf.Variable
        the weights variable for this similarity layer

    Returns
    -------
    A tuple (init_op, update_op) where init_op must be executed by a session before using the update op
    and the update_op is the operation that performs the learning.
    """
    if isinstance(sim_op, tf.Tensor):
        sim_op = sim_op.op
    if not sim_op.type == 'Similarity':
        raise ValueError(
            'kmeans_unsupervised_init needs a similarity op, got %s instead' %
            sim_op.type)
    assert (isinstance(sim_op, tf.Operation))
    name = sim_op.name + '_gmm_init'
    with tf.name_scope(name):
        input_tensor = sim_op.inputs[0]
        templates_tensor = sim_op.inputs[1]
        num_instances = templates_tensor.get_shape().as_list()[0]

        strides = sim_op.get_attr('strides')
        blocks = sim_op.get_attr('blocks')
        strides = [1, strides[0], strides[1], 1]
        blocks = [1, blocks[0], blocks[1], 1]
        patches = tf.extract_image_patches(tf.transpose(
            input_tensor, (0, 2, 3, 1)),
                                           strides=strides,
                                           blocks=blocks,
                                           rates=[1, 1, 1, 1],
                                           padding='VALID')
        _, _, _, patch_size = patches.get_shape().as_list()
        patches = tf.reshape(patches, [-1, patch_size])
        _, _, _, training_op = _gmm(inp=patches,
                                    initial_clusters='random',
                                    random_seed=33,
                                    num_clusters=num_instances,
                                    covariance_type='diag',
                                    params='mc')
        clusters_var = [
            v for v in tf.global_variables()
            if v.name == name + '/' + 'clusters:0'
        ][0]
        clusters = clusters_var.op.outputs[0]
        clusters_covs_var = [
            v for v in tf.global_variables()
            if v.name == name + '/' + 'clusters_covs:0'
        ][0]
        clusters_covs = clusters_covs_var.op.outputs[0]

        # this hacky code makes sure that the gmm code does not add a variable initializer that depends
        # on the input, which is usually a placeholder. without it, the global intializer must be run with
        # a feed dict, which dows work for keras, and is weird for other code
        non_gmm_vars = [
            v for v in tf.global_variables() if not v.name.startswith(name)
        ]
        gmm_vars = [
            v for v in tf.global_variables() if v.name.startswith(name)
        ]
        graph = tf.get_default_graph()
        graph.clear_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        for v in non_gmm_vars:
            graph.add_to_collection(tf.GraphKeys.GLOBAL_VARIABLES, v)
        initializer = tf.group(*[v.initializer for v in gmm_vars])

        channels, block_rows, block_cols = templates_tensor.get_shape(
        ).as_list()[1:]
        reshaped_clusters = tf.reshape(
            clusters, (num_instances, block_rows, block_cols, channels))
        reshaped_covs = tf.reshape(
            clusters_covs, (num_instances, block_rows, block_cols, channels))
        transposed_clusters = tf.transpose(reshaped_clusters, [0, 3, 1, 2])
        transposed_covs = tf.sqrt(tf.transpose(reshaped_covs, [0, 3, 1, 2]))
        with tf.control_dependencies([training_op]):
            assign1 = tf.assign(templates_var, transposed_clusters)
            assign2 = tf.assign(weights_var, transposed_covs)
        return initializer, tf.group(assign1, assign2, name='gmm_init_assign')
Ejemplo n.º 22
0
def tf_histogram_of_oriented_gradients(img_raw,
                                       x_kernel=[-1.0, 0.0, 1.0],
                                       y_kernel=[-1.0, 0.0, 1.0],
                                       pixels_in_cell=8,
                                       cells_in_block=1,
                                       n_angle_bins=9):
    ## COMPUTE GRADIENT MAGNITUDES/ORIENTATIONS
    img_raw = img_raw if len(img_raw.shape) == 4 else tf.expand_dims(
        img_raw, 0)  # convert single image to batch of 1
    img = tf.to_float(img_raw)  # convert int pixel values to float
    x_kernel = tf.reshape(x_kernel, [3, 1, 1, 1])  # x kernel is a row matrix
    y_kernel = tf.reshape(y_kernel,
                          [1, 3, 1, 1])  # y kernel is a column matrix
    x_grad_by_c = tf.nn.depthwise_conv2d(
        img, tf.tile(x_kernel, [1, 1, 3, 1]), [1, 1, 1, 1],
        "SAME")  # computing channel x/y gradients by convolution
    y_grad_by_c = tf.nn.depthwise_conv2d(
        img, tf.tile(y_kernel, [1, 1, 3, 1]), [1, 1, 1, 1],
        "SAME")  # (kernel tiled across 3 RGB channels)
    grad_mag_by_c = tf.sqrt(tf.square(x_grad_by_c) +
                            tf.square(y_grad_by_c))  # gradient magnitude
    grad_ang_by_c = tf.atan2(y_grad_by_c, x_grad_by_c)  # gradient orientation
    grad_mag = tf.reduce_max(grad_mag_by_c,
                             axis=-1)  # select largest channel gradient
    grad_ang = tf.reduce_sum(grad_ang_by_c *
                             tf.one_hot(tf.argmax(grad_mag_by_c, axis=-1), 3),
                             axis=-1)  # select corresponding orientation

    ## GROUP VALUES INTO CELLS (8x8)
    p = pixels_in_cell
    grad_mag = tf.extract_image_patches(tf.expand_dims(grad_mag,
                                                       -1), [1, p, p, 1],
                                        [1, p, p, 1], [1, 1, 1, 1], "VALID")
    grad_ang = tf.extract_image_patches(tf.expand_dims(grad_ang,
                                                       -1), [1, p, p, 1],
                                        [1, p, p, 1], [1, 1, 1, 1], "VALID")

    ## COMPUTE CELL HISTOGRAMS
    bin_width = np.pi / n_angle_bins
    grad_ang = tf.mod(grad_ang, np.pi)  # unsigned gradients
    grad_ang_idx = grad_ang / bin_width
    lo_bin = tf.floor(grad_ang_idx)
    hi_bin = lo_bin + 1
    lo_weight = (hi_bin - grad_ang_idx) * grad_mag
    hi_weight = (grad_ang_idx - lo_bin) * grad_mag
    hi_bin = tf.mod(hi_bin, n_angle_bins)

    lo_bin = tf.to_int32(lo_bin)
    hi_bin = tf.to_int32(hi_bin)
    cell_hogs = tf.reduce_sum(
        tf.one_hot(lo_bin, n_angle_bins) * tf.expand_dims(lo_weight, -1) +
        tf.one_hot(hi_bin, n_angle_bins) * tf.expand_dims(hi_weight, -1), -2)

    ## ASSEMBLE AND NORMALIZE BLOCK HISTOGRAM VECTORS
    unnormalized_hog = tf.extract_image_patches(
        cell_hogs, [1, cells_in_block, cells_in_block, 1], [1, 1, 1, 1],
        [1, 1, 1, 1], "VALID")
    hog_descriptor = tf.reshape(tf.nn.l2_normalize(unnormalized_hog, -1),
                                [unnormalized_hog.shape[0], -1])

    return cell_hogs, hog_descriptor
Ejemplo n.º 23
0
def crnn(tensor, kernel_size, stride, out_channels, rnn_n_layers, rnn_type,
         bidirectional, w_std, padding, scope_name):
    with tf.variable_scope(
            scope_name,
            initializer=tf.truncated_normal_initializer(stddev=w_std)):
        # Expand to have 4 dimensions if needed
        if len(tensor.shape) == 3:
            tensor = tf.expand_dims(tensor, 3)

        # Extract the patches (returns [batch, time-steps, 1, patch content flattened])
        batch_size = tensor.shape[0].value
        n_in_features = tensor.shape[2].value
        patches = tf.extract_image_patches(
            images=tensor,
            ksizes=[1, kernel_size, n_in_features, 1],
            strides=[1, stride, n_in_features, 1],
            rates=[1, 1, 1, 1],
            padding=padding)
        patches = patches[:, :, 0, :]

        # Reshape to do:
        # 1) reshape the flattened patches back to [kernel_size, n_in_features]
        # 2) combine the batch and time-steps dimensions (which will be the new 'batch' size, for the RNN)
        # now shape will be [batch * time-steps, kernel_size, n_features]
        time_steps_after_stride = patches.shape[1].value
        patches = tf.reshape(
            patches,
            [batch_size * time_steps_after_stride, kernel_size, n_in_features])

        # Transpose and convert to a list, to fit the tf.contrib.rnn.static_rnn requirements
        # Now will be a list of length kernel_size, each element of shape [batch * time-steps, n_features]
        patches = tf.unstack(tf.transpose(patches, [1, 0, 2]))

        # Create the RNN Cell
        if rnn_type == 'simple':
            rnn_cell_func = tf.contrib.rnn.BasicRNNCell
        elif rnn_type == 'lstm':
            rnn_cell_func = tf.contrib.rnn.LSTMBlockCell
        elif rnn_type == 'gru':
            rnn_cell_func = tf.contrib.rnn.GRUBlockCell
        if not bidirectional:
            rnn_cell = rnn_cell_func(out_channels)
        else:
            rnn_cell_f = rnn_cell_func(out_channels)
            rnn_cell_b = rnn_cell_func(out_channels)

        # Multilayer RNN? (does not appear in the original paper)
        if rnn_n_layers > 1:
            if not bidirectional:
                rnn_cell = tf.contrib.rnn.MultiRNNCell([rnn_cell] *
                                                       rnn_n_layers)
            else:
                rnn_cell_f = tf.contrib.rnn.MultiRNNCell([rnn_cell_f] *
                                                         rnn_n_layers)
                rnn_cell_b = tf.contrib.rnn.MultiRNNCell([rnn_cell_b] *
                                                         rnn_n_layers)

        # The RNN itself
        if not bidirectional:
            outputs, state = tf.contrib.rnn.static_rnn(rnn_cell,
                                                       patches,
                                                       dtype=tf.float32)
        else:
            outputs, output_state_fw, output_state_bw = tf.contrib.rnn.static_bidirectional_rnn(
                rnn_cell_f, rnn_cell_b, patches, dtype=tf.float32)

        # Use only the output of the last time-step (shape will be [batch * time-steps, out_channels]).
        # In the case of a bidirectional RNN, we want to take the last time-step of the forward RNN,
        # and the first time-step of the backward RNN.
        if not bidirectional:
            outputs = outputs[-1]
        else:
            half = int(outputs[0].shape.as_list()[-1] / 2)
            outputs = tf.concat([outputs[-1][:, :half], outputs[0][:, half:]],
                                axis=1)

        # Expand the batch * time-steps back (shape will be [batch_size, time_steps, out_channels]
        if bidirectional:
            out_channels = 2 * out_channels
        outputs = tf.reshape(
            outputs, [batch_size, time_steps_after_stride, out_channels])

        return outputs
    def compute_stats(self, loss_sampled, var_list=None):
        varlist = var_list
        if varlist is None:
            varlist = tf.trainable_variables()

        gs = tf.gradients(loss_sampled, varlist, name='gradientsSampled')
        self.gs = gs
        factors = self.getFactors(gs, varlist)
        stats = self.getStats(factors, varlist)

        updateOps = []
        statsUpdates = {}
        statsUpdates_cache = {}
        for var in varlist:
            opType = factors[var]['opName']
            fops = factors[var]['op']
            fpropFactor = factors[var]['fpropFactors_concat']
            fpropStats_vars = stats[var]['fprop_concat_stats']
            bpropFactor = factors[var]['bpropFactors_concat']
            bpropStats_vars = stats[var]['bprop_concat_stats']
            SVD_factors = {}
            for stats_var in fpropStats_vars:
                stats_var_dim = int(stats_var.get_shape()[0])
                if stats_var not in statsUpdates_cache:
                    old_fpropFactor = fpropFactor
                    B = (tf.shape(fpropFactor)[0])  # batch size
                    if opType == 'Conv2D':
                        strides = fops.get_attr("strides")
                        padding = fops.get_attr("padding")
                        convkernel_size = var.get_shape()[0:3]

                        KH = int(convkernel_size[0])
                        KW = int(convkernel_size[1])
                        C = int(convkernel_size[2])
                        flatten_size = int(KH * KW * C)

                        Oh = int(bpropFactor.get_shape()[1])
                        Ow = int(bpropFactor.get_shape()[2])

                        if Oh == 1 and Ow == 1 and self._channel_fac:
                                # factorization along the channels
                                # assume independence among input channels
                                # factor = B x 1 x 1 x (KH xKW x C)
                                # patches = B x Oh x Ow x (KH xKW x C)
                            if len(SVD_factors) == 0:
                                # find closest rank-1 approx to the feature map
                                S, U, V = tf.batch_svd(tf.reshape(
                                    fpropFactor, [-1, KH * KW, C]))
                                # get rank-1 approx slides
                                sqrtS1 = tf.expand_dims(tf.sqrt(S[:, 0, 0]), 1)
                                patches_k = U[:, :, 0] * sqrtS1  # B x KH*KW
                                full_factor_shape = fpropFactor.get_shape()
                                patches_k.set_shape(
                                    [full_factor_shape[0], KH * KW])
                                patches_c = V[:, :, 0] * sqrtS1  # B x C
                                patches_c.set_shape([full_factor_shape[0], C])
                                SVD_factors[C] = patches_c
                                SVD_factors[KH * KW] = patches_k
                            fpropFactor = SVD_factors[stats_var_dim]

                        else:
                            # poor mem usage implementation
                            patches = tf.extract_image_patches(fpropFactor, ksizes=[1, convkernel_size[
                                                               0], convkernel_size[1], 1], strides=strides, rates=[1, 1, 1, 1], padding=padding)

                            if self._approxT2:
                                # T^2 terms * 1/T^2, size: B x C
                                fpropFactor = tf.reduce_mean(patches, [1, 2])
                            else:
                                # size: (B x Oh x Ow) x C
                                fpropFactor = tf.reshape(
                                    patches, [-1, flatten_size]) / Oh / Ow
                    fpropFactor_size = int(fpropFactor.get_shape()[-1])
                    if stats_var_dim == (fpropFactor_size + 1) and not self._blockdiag_bias:
                        if opType == 'Conv2D' and not self._approxT2:
                            # correct padding for numerical stability (we
                            # divided out OhxOw from activations for T1 approx)
                            fpropFactor = tf.concat([fpropFactor, tf.ones(
                                [tf.shape(fpropFactor)[0], 1]) / Oh / Ow], 1)
                        else:
                            # use homogeneous coordinates
                            fpropFactor = tf.concat(
                                [fpropFactor, tf.ones([tf.shape(fpropFactor)[0], 1])], 1)

                    # average over the number of data points in a batch
                    # divided by B
                    cov = tf.matmul(fpropFactor, fpropFactor,
                                    transpose_a=True) / tf.cast(B, tf.float32)
                    updateOps.append(cov)
                    statsUpdates[stats_var] = cov
                    if opType != 'Conv2D':
                        # HACK: for convolution we recompute fprop stats for
                        # every layer including forking layers
                        statsUpdates_cache[stats_var] = cov

            for stats_var in bpropStats_vars:
                stats_var_dim = int(stats_var.get_shape()[0])
                if stats_var not in statsUpdates_cache:
                    old_bpropFactor = bpropFactor
                    bpropFactor_shape = bpropFactor.get_shape()
                    B = tf.shape(bpropFactor)[0]  # batch size
                    C = int(bpropFactor_shape[-1])  # num channels
                    if opType == 'Conv2D' or len(bpropFactor_shape) == 4:
                        if fpropFactor is not None:
                            if self._approxT2:
                                bpropFactor = tf.reduce_sum(
                                    bpropFactor, [1, 2])  # T^2 terms * 1/T^2
                            else:
                                bpropFactor = tf.reshape(
                                    bpropFactor, [-1, C]) * Oh * Ow  # T * 1/T terms
                        else:
                            # just doing block diag approx. spatial independent
                            # structure does not apply here. summing over
                            # spatial locations
                            bpropFactor = tf.reduce_sum(bpropFactor, [1, 2])

                    # assume sampled loss is averaged. TO-DO:figure out better
                    # way to handle this
                    bpropFactor *= tf.to_float(B)
                    ##

                    cov_b = tf.matmul(
                        bpropFactor, bpropFactor, transpose_a=True) / tf.to_float(tf.shape(bpropFactor)[0])

                    updateOps.append(cov_b)
                    statsUpdates[stats_var] = cov_b
                    statsUpdates_cache[stats_var] = cov_b
                    
        self.statsUpdates = statsUpdates
        return statsUpdates
Ejemplo n.º 25
0
def tf_hog_descriptor(images,
                      cell_size=8,
                      block_size=2,
                      block_stride=1,
                      n_bins=9,
                      grayscale=False):

    batch_size, height, width, depth = images.shape
    batch_size = tf.shape(images)[0]
    scale_factor = tf.constant(180 / n_bins,
                               name="scale_factor",
                               dtype=tf.float32)

    img = images
    if grayscale and depth == 3:
        img = tf.image.rgb_to_grayscale(img, name="ImgGray")

    # automatically padding height and width to valid size (multiples of cell size)
    if height % cell_size != 0 or width % cell_size != 0:
        height = height + (cell_size - (height % cell_size)) % cell_size
        width = width + (cell_size - (width % cell_size)) % cell_size
        img = tf.image.resize_image_with_crop_or_pad(img, height, width)

    # gradients
    grad = tf_deriv(img)
    g_x = grad[:, :, :, 0::2]
    g_y = grad[:, :, :, 1::2]

    # masking unwanted gradients of edge pixels
    mask_depth = 1 if grayscale else depth
    g_x_mask = np.ones((1, height, width, mask_depth))
    g_y_mask = np.ones((1, height, width, mask_depth))
    g_x_mask[:, :, (0, -1)] = 0
    g_y_mask[:, (0, -1)] = 0
    g_x_mask = tf.constant(g_x_mask, dtype=tf.float32)
    g_y_mask = tf.constant(g_y_mask, dtype=tf.float32)
    g_x_mask = tf.tile(g_x_mask, [batch_size, 1, 1, 1])
    g_y_mask = tf.tile(g_y_mask, [batch_size, 1, 1, 1])

    g_x = g_x * g_x_mask
    g_y = g_y * g_y_mask

    # maximum norm gradient selection
    g_norm = tf.add(tf.abs(g_x), tf.abs(g_y), "GradNorm")

    if not grayscale and depth != 1:
        # maximum norm gradient selection
        idx = tf.argmax(g_norm, 3)
        g_norm = tf.expand_dims(tf_select_by_idx(g_norm, idx, grayscale), -1)
        g_x = tf.expand_dims(tf_select_by_idx(g_x, idx, grayscale), -1)
        g_y = tf.expand_dims(tf_select_by_idx(g_y, idx, grayscale), -1)

    g_dir = tf_rad2deg(tf.atan2(g_y, g_x)) % 180
    g_bin = tf.to_int32(g_dir / scale_factor, name="Bins")

    # cells partitioning
    cell_norm = tf.space_to_depth(g_norm, cell_size, name="GradCells")
    cell_bins = tf.space_to_depth(g_bin, cell_size, name="BinsCells")

    # cells histograms
    hist = list()
    zero = tf.zeros_like(cell_bins, dtype=tf.float32)
    for i in range(n_bins):
        mask = tf.equal(cell_bins, tf.constant(i, name="%i" % i))
        hist.append(tf.reduce_mean(tf.where(mask, cell_norm, zero), 3))
    hist = tf.transpose(tf.stack(hist), [1, 2, 3, 0], name="Hist")

    # blocks partitioning
    block_hist = tf.extract_image_patches(
        hist,
        ksizes=[1, block_size, block_size, 1],
        strides=[1, block_stride, block_stride, 1],
        rates=[1, 1, 1, 1],
        padding='VALID',
        name="BlockHist")

    # block normalization
    block_hist = tf.nn.l2_normalize(block_hist, 3, epsilon=1.0)

    # HOG descriptor
    hog_descriptor = tf.reshape(block_hist,
                                [batch_size,
                                 int(block_hist.get_shape()[1]) * \
                                 int(block_hist.get_shape()[2]) * \
                                 int(block_hist.get_shape()[3])],
                                 name='HOGDescriptor')

    return hog_descriptor
Ejemplo n.º 26
0
          verbose=1,
          validation_data=(x_val, y_val))

score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
#%%predict
for i in range(9, 11):
    qa, b4, b3, b2 = df['BQA'].iloc[i], df['B4'].iloc[i], df['B3'].iloc[i], df[
        'B2'].iloc[i]
    img_t = img_preprocess(qa, b4, b3, b2, tdimn)[0]
    print(img_t.shape)
    with tf.Session() as sess:
        patches3 = tf.extract_image_patches(images=img_t,
                                            ksizes=[1, pn, pn, 1],
                                            strides=[1, 1, 1, 1],
                                            rates=[1, 1, 1, 1],
                                            padding='SAME')
        patches3 = tf.reshape(patches3, [-1, pn, pn, 3])
        val3 = sess.run(patches3)

    d = {0: 0, 1: 1}
    mask_n = np.zeros(tdimn)
    z = 0
    y = 0
    batch = tdimn[0]
    while (y < tdimn[0]):
        pred = model.predict(val3[z:z + batch, :, :, :])
        for i in xrange(tdimn[0]):
            mask_n[i, y] = d[np.argmax(pred[i])]
        z += batch
Ejemplo n.º 27
0
			if i==0andj==0:
				parts = part
			else:
				parts = tf.concat([parts,part], 0)
	return parts
"""
print(x_batch)
print(test_x_batch)

#add
patch_size_t = 180
stride = 30
patch_size_v = 80
patch_t = tf.extract_image_patches(x_batch,
                                   ksizes=[1, patch_size_t, patch_size_t, 1],
                                   strides=[1, stride, stride, 1],
                                   rates=[1, 1, 1, 1],
                                   padding='VALID')
patch_v = tf.extract_image_patches(test_x_batch,
                                   ksizes=[1, patch_size_v, patch_size_v, 1],
                                   strides=[1, stride, stride, 1],
                                   rates=[1, 1, 1, 1],
                                   padding='VALID')

print("patch_t shape", patch_t.shape)
print("patch_v shape", patch_v.shape)


def parts(patch, size):
    for i in range(patch.shape[0]):
        one_patch = patch[i, :, :, :]
Ejemplo n.º 28
0
#%%
plt.figure(figsize=(40, 20))
plt.subplot(131)
plt.imshow(misc.imrotate(np.squeeze(orig1, axis=2), 18, interp='nearest'),
           cmap=plt.get_cmap('gist_gray'))
plt.imshow(mask, cmap=plt.get_cmap('Reds'), alpha=0.5)
plt.title('Cloud-2')
plt.show()

#%%patch extraction
img = np.reshape(img, (1, mask.shape[0], mask.shape[1], 3))
img2 = np.reshape(mask, (1, mask.shape[0], mask.shape[1], 1))
with tf.Session() as sess:
    patches = tf.extract_image_patches(images=img,
                                       ksizes=[1, 16, 16, 1],
                                       strides=[1, 1, 1, 1],
                                       rates=[1, 1, 1, 1],
                                       padding='SAME')
    patches = tf.reshape(patches, [-1, 16, 16, 3])

    patches2 = tf.extract_image_patches(images=img2,
                                        ksizes=[1, 16, 16, 1],
                                        strides=[1, 1, 1, 1],
                                        rates=[1, 1, 1, 1],
                                        padding='SAME')
    patches2 = tf.reshape(patches2, [-1, 16, 16, 1])

    val = sess.run(patches)
    val2 = sess.run(patches2)

#%%data preparation
Ejemplo n.º 29
0
def conv_capsule_mat(input_tensor,
                     input_activation,
                     input_dim,
                     output_dim,
                     layer_name,
                     num_routing=3,
                     num_in_atoms=3,
                     num_out_atoms=3,
                     stride=2,
                     kernel_size=5,
                     min_var=0.0005,
                     final_beta=1.0):
    """Convolutional Capsule layer with Pose Matrices."""
    print('caps conv stride: {}'.format(stride))
    in_atom_sq = num_in_atoms * num_in_atoms
    with tf.variable_scope(layer_name):
        input_shape = tf.shape(input_tensor)
        _, _, _, in_height, in_width = input_tensor.get_shape()
        # This Variable will hold the state of the weights for the layer
        kernel = utils.weight_variable(shape=[
            input_dim, kernel_size, kernel_size, num_in_atoms,
            output_dim * num_out_atoms
        ],
                                       stddev=0.3)
        # kernel = tf.clip_by_norm(kernel, 3.0, axes=[1, 2, 3])
        activation_biases = utils.bias_variable(
            [1, 1, output_dim, 1, 1, 1, 1, 1],
            init_value=0.5,
            name='activation_biases')
        sigma_biases = utils.bias_variable([1, 1, output_dim, 1, 1, 1, 1, 1],
                                           init_value=.5,
                                           name='sigma_biases')
        with tf.name_scope('conv'):
            print('convi;')
            # input_tensor: [x,128,8, c1,c2] -> [x*128,8, c1,c2]
            print(input_tensor.get_shape())
            input_tensor_reshaped = tf.reshape(input_tensor, [
                input_shape[0] * input_dim * in_atom_sq, input_shape[3],
                input_shape[4], 1
            ])
            input_tensor_reshaped.set_shape(
                (None, input_tensor.get_shape()[3].value,
                 input_tensor.get_shape()[4].value, 1))
            input_act_reshaped = tf.reshape(input_activation, [
                input_shape[0] * input_dim, input_shape[3], input_shape[4], 1
            ])
            input_act_reshaped.set_shape(
                (None, input_tensor.get_shape()[3].value,
                 input_tensor.get_shape()[4].value, 1))
            print(input_tensor_reshaped.get_shape())
            # conv: [x*128,out*out_at, c3,c4]
            conv_patches = tf.extract_image_patches(
                images=input_tensor_reshaped,
                ksizes=[1, kernel_size, kernel_size, 1],
                strides=[1, stride, stride, 1],
                rates=[1, 1, 1, 1],
                padding='VALID',
            )
            act_patches = tf.extract_image_patches(
                images=input_act_reshaped,
                ksizes=[1, kernel_size, kernel_size, 1],
                strides=[1, stride, stride, 1],
                rates=[1, 1, 1, 1],
                padding='VALID',
            )
            o_height = (in_height.value - kernel_size) // stride + 1
            o_width = (in_width.value - kernel_size) // stride + 1
            patches = tf.reshape(conv_patches,
                                 (input_shape[0], input_dim, in_atom_sq,
                                  o_height, o_width, kernel_size, kernel_size))
            patches.set_shape((None, input_dim, in_atom_sq, o_height, o_width,
                               kernel_size, kernel_size))
            patch_trans = tf.transpose(patches, [1, 5, 6, 0, 3, 4, 2])
            patch_split = tf.reshape(
                patch_trans,
                (input_dim, kernel_size, kernel_size, input_shape[0] *
                 o_height * o_width * num_in_atoms, num_in_atoms))
            patch_split.set_shape(
                (input_dim, kernel_size, kernel_size, None, num_in_atoms))
            a_patches = tf.reshape(act_patches,
                                   (input_shape[0], input_dim, 1, 1, o_height,
                                    o_width, kernel_size, kernel_size))
            a_patches.set_shape((None, input_dim, 1, 1, o_height, o_width,
                                 kernel_size, kernel_size))
            with tf.name_scope('input_act'):
                utils.activation_summary(
                    tf.reduce_sum(tf.reduce_sum(tf.reduce_sum(a_patches,
                                                              axis=1),
                                                axis=-1),
                                  axis=-1))
            with tf.name_scope('Wx'):
                wx = tf.matmul(patch_split, kernel)
                wx = tf.reshape(wx, (input_dim, kernel_size, kernel_size,
                                     input_shape[0], o_height, o_width,
                                     num_in_atoms * num_out_atoms, output_dim))
                wx.set_shape(
                    (input_dim, kernel_size, kernel_size, None, o_height,
                     o_width, num_in_atoms * num_out_atoms, output_dim))
                wx = tf.transpose(wx, [3, 0, 7, 6, 4, 5, 1, 2])
                utils.activation_summary(wx)

        with tf.name_scope('routing'):
            # Routing
            # logits: [x, 128, 10, c3, c4]
            logit_shape = [
                input_dim, output_dim, 1, o_height, o_width, kernel_size,
                kernel_size
            ]
            activation, center = update_conv_routing(
                wx=wx,
                input_activation=a_patches,
                activation_biases=activation_biases,
                sigma_biases=sigma_biases,
                logit_shape=logit_shape,
                num_out_atoms=num_out_atoms * num_out_atoms,
                input_dim=input_dim,
                num_routing=num_routing,
                output_dim=output_dim,
                min_var=min_var,
                final_beta=final_beta,
            )
            # activations: [x, 10, 8, c3, c4]

        out_activation = tf.squeeze(activation, axis=[1, 3, 6, 7])
        out_center = tf.squeeze(center, axis=[1, 6, 7])
        with tf.name_scope('center'):
            utils.activation_summary(out_center)
        return tf.sigmoid(out_activation), out_center
Ejemplo n.º 30
0
    def forward(self,inputs, grid,is_training=True, reuse=False):
        def preprocessing(inputs):
            dims = inputs.get_shape()
            if len(dims) == 3:
                inputs = tf.expand_dims(inputs, dim=0)
            mean_BGR = tf.reshape(self.mean_BGR, [1, 1, 1, 3])
            inputs = inputs[:, :, :, ::-1] + mean_BGR
            return inputs

         ## -----------------------depth and normal FCN--------------------------
        inputs = preprocessing(inputs)
        with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], activation_fn=tf.nn.relu, stride=1,
                                padding='SAME',
                                weights_initializer=weight_from_caffe(self.pretrain_weight),
                                biases_initializer=bias_from_caffe(self.pretrain_weight)):

            with tf.variable_scope('fcn', reuse=reuse):
                ##---------------------vgg depth------------------------------------
                conv1 = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
                pool1 = slim.max_pool2d(conv1, [3, 3], stride=2, padding='SAME', scope='pool1')

                conv2 = slim.repeat(pool1, 2, slim.conv2d, 128, [3, 3], scope='conv2')
                pool2 = slim.max_pool2d(conv2, [3, 3], stride=2, padding='SAME', scope='pool2')

                conv3 = slim.repeat(pool2, 3, slim.conv2d, 256, [3, 3], scope='conv3')
                pool3 = slim.max_pool2d(conv3, [3, 3], stride=2, padding='SAME', scope='pool3')

                conv4 = slim.repeat(pool3, 3, slim.conv2d, 512, [3, 3], scope='conv4')
                pool4 = slim.max_pool2d(conv4, [3, 3], stride=1, padding='SAME', scope='pool4')

                conv5 = slim.repeat(pool4, 3, slim.conv2d, 512, [3, 3], rate=2, scope='conv5')
                pool5 = slim.max_pool2d(conv5, [3, 3], stride=1, padding='SAME', scope='pool5')
                pool5a = slim.avg_pool2d(pool5, [3, 3], stride=1, padding='SAME', scope='pool5a')

                fc6 = slim.conv2d(pool5a, 1024, [3, 3], stride=1, rate=12, scope='fc6')
                fc6 = slim.dropout(fc6, 0.5, is_training=is_training, scope='drop6')
                fc7 = slim.conv2d(fc6, 1024, [1, 1], scope='fc7')
                fc7 = slim.dropout(fc7, 0.5, is_training=is_training, scope='drop7')

                pool6_1x1 = slim.avg_pool2d(fc7, [61, 81], stride=[61, 81], padding='SAME', scope='pool6_1x1')
                pool6_1x1_norm = slim.unit_norm(pool6_1x1, dim=3, scope='pool6_1x1_norm_new')
                pool6_1x1_norm_scale = pool6_1x1_norm * 10
                pool6_1x1_norm_upsample = tf.tile(pool6_1x1_norm_scale, [1, 61, 81, 1], name='pool6_1x1_norm_upsample')

                out = tf.concat([fc7, pool6_1x1_norm_upsample], axis=-1, name='out')

                out_reduce = slim.conv2d(out, 256, [1, 1], activation_fn=tf.nn.relu, stride=1, scope='out_reduce',
                                         padding='SAME',
                                         weights_initializer=weight_from_caffe(self.pretrain_weight),
                                         biases_initializer=bias_from_caffe(self.pretrain_weight))
                out_conv = slim.conv2d(out_reduce, 256, [3, 3], activation_fn=tf.nn.relu, stride=1, scope='out_conv',
                                       padding='SAME',
                                       weights_initializer=weight_from_caffe(self.pretrain_weight),
                                       biases_initializer=bias_from_caffe(self.pretrain_weight))
                out_conv_increase = slim.conv2d(out_conv, 1024, [1, 1], activation_fn=tf.nn.relu, stride=1,
                                                scope='out_conv_increase',
                                                padding='SAME',
                                                weights_initializer=weight_from_caffe(self.pretrain_weight),
                                                biases_initializer=bias_from_caffe(self.pretrain_weight))

                fc8_nyu_depth = slim.conv2d(out_conv_increase, 1, [1, 1], activation_fn=None, scope='fc8_nyu_depth')
                fc8_upsample = tf.image.resize_images(fc8_nyu_depth, [self.crop_size_h, self.crop_size_w], method=0,
                                                      align_corners=True)
                #---------------------------------------vgg depth end ---------------------------------------
                ## ----------------- vgg norm---------------------------------------------------------------
                conv1_norm = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1_norm')
                pool1_norm = slim.max_pool2d(conv1_norm, [3, 3], stride=2, padding='SAME', scope='pool1_norm')

                conv2_norm = slim.repeat(pool1_norm, 2, slim.conv2d, 128, [3, 3], scope='conv2_norm')
                pool2_norm = slim.max_pool2d(conv2_norm, [3, 3], stride=2, padding='SAME', scope='pool2_norm')

                conv3_norm = slim.repeat(pool2_norm, 3, slim.conv2d, 256, [3, 3], scope='conv3_norm')
                pool3_norm = slim.max_pool2d(conv3_norm, [3, 3], stride=2, padding='SAME', scope='pool3_norm')

                conv4_norm = slim.repeat(pool3_norm, 3, slim.conv2d, 512, [3, 3], scope='conv4_norm')
                pool4_norm = slim.max_pool2d(conv4_norm, [3, 3], stride=1, padding='SAME', scope='pool4_norm')

                conv5_norm = slim.repeat(pool4_norm, 3, slim.conv2d, 512, [3, 3], rate=2, scope='conv5_norm')
                pool5_norm = slim.max_pool2d(conv5_norm, [3, 3], stride=1, padding='SAME', scope='pool5_norm')
                pool5a_norm = slim.avg_pool2d(pool5_norm, [3, 3], stride=1, padding='SAME', scope='pool5a_norm')

                fc6_norm = slim.conv2d(pool5a_norm, 1024, [3, 3], stride=1, rate=12, scope='fc6_norm')
                fc6_norm = slim.dropout(fc6_norm, 0.5, is_training=is_training, scope='drop6_norm')
                fc7_norm = slim.conv2d(fc6_norm, 1024, [1, 1], scope='fc7_norm')
                fc7_norm = slim.dropout(fc7_norm, 0.5, is_training=is_training, scope='drop7_norm')

                pool6_1x1_norm_new = slim.avg_pool2d(fc7_norm, [61, 81], stride=[61, 81], padding='SAME',
                                                     scope='pool6_1x1_norm_new')

                pool6_1x1_norm_norm = slim.unit_norm(pool6_1x1_norm_new, dim=3, scope='pool6_1x1_norm_new')
                pool6_1x1_norm_scale_norm = pool6_1x1_norm_norm * 10
                pool6_1x1_norm_upsample_norm = tf.tile(pool6_1x1_norm_scale_norm, [1, 61, 81, 1],
                                                       name='pool6_1x1_norm_upsample')
                out_norm = tf.concat([fc7_norm, pool6_1x1_norm_upsample_norm], axis=-1, name='out_norm')
                fc8_nyu_norm_norm = slim.conv2d(out_norm, 3, [1, 1], activation_fn=None, scope='fc8_nyu_norm_norm')
                fc8_upsample_norm = tf.image.resize_images(fc8_nyu_norm_norm, [self.crop_size_h, self.crop_size_w],
                                                           method=0, align_corners=True)

                fc8_upsample_norm = slim.unit_norm(fc8_upsample_norm, dim=3)
                #-------------------------------------vgg norm end---------------------------------------------


            # ------------- depth to normal + norm refinement---------------------------------------------------
            with tf.variable_scope('noise', reuse=reuse):

                fc8_upsample_norm = tf.squeeze(fc8_upsample_norm)
                fc8_upsample_norm = tf.reshape(fc8_upsample_norm,
                                               [self.batch_size, self.crop_size_h, self.crop_size_w, 3])

                norm_matrix = tf.extract_image_patches(images=fc8_upsample_norm, ksizes=[1, self.k, self.k, 1],
                                                       strides=[1, 1, 1, 1],
                                                       rates=[1, self.rate, self.rate, 1], padding='SAME')

                matrix_c = tf.reshape(norm_matrix,
                                      [self.batch_size, self.crop_size_h, self.crop_size_w, self.k * self.k, 3])

                fc8_upsample_norm = tf.expand_dims(fc8_upsample_norm, axis=4)

                angle = tf.matmul(matrix_c, fc8_upsample_norm)

                valid_condition = tf.greater(angle, self.thresh)
                valid_condition_all = tf.tile(valid_condition, [1, 1, 1, 1, 3])

                exp_depth = tf.exp(fc8_upsample * 0.69314718056)
                depth_repeat = tf.tile(exp_depth, [1, 1, 1, 3])
                points = tf.multiply(grid, depth_repeat)
                point_matrix = tf.extract_image_patches(images=points, ksizes=[1, self.k, self.k, 1],
                                                        strides=[1, 1, 1, 1],
                                                        rates=[1, self.rate, self.rate, 1], padding='SAME')

                matrix_a = tf.reshape(point_matrix, [self.batch_size, self.crop_size_h, self.crop_size_w, self.k * self.k, 3])

                matrix_a_zero = tf.zeros_like(matrix_a, dtype=tf.float32)
                matrix_a_valid = tf.where(valid_condition_all, matrix_a, matrix_a_zero)

                matrix_a_trans = tf.matrix_transpose(matrix_a_valid, name='matrix_transpose')
                matrix_b = tf.ones(shape=[self.batch_size, self.crop_size_h, self.crop_size_w, self.k * self.k, 1])
                point_multi = tf.matmul(matrix_a_trans, matrix_a_valid, name='matrix_multiplication')
                with tf.device('cpu:0'):
                    matrix_deter = tf.matrix_determinant(point_multi)
                inverse_condition = tf.greater(matrix_deter, 1e-5)
                inverse_condition = tf.expand_dims(inverse_condition, axis=3)
                inverse_condition = tf.expand_dims(inverse_condition, axis=4)
                inverse_condition_all = tf.tile(inverse_condition, [1, 1, 1, 3, 3])

                diag_constant = tf.ones([3], dtype=tf.float32)
                diag_element = tf.diag(diag_constant)
                diag_element = tf.expand_dims(diag_element, axis=0)
                diag_element = tf.expand_dims(diag_element, axis=0)
                diag_element = tf.expand_dims(diag_element, axis=0)


                diag_matrix = tf.tile(diag_element, [self.batch_size, self.crop_size_h, self.crop_size_w, 1, 1])

                inversible_matrix = tf.where(inverse_condition_all, point_multi, diag_matrix)
                with tf.device('cpu:0'):
                    inv_matrix = tf.matrix_inverse(inversible_matrix)

                generated_norm = tf.matmul(tf.matmul(inv_matrix, matrix_a_trans),matrix_b)

                norm_normalize = slim.unit_norm((generated_norm), dim=3)
                norm_normalize = tf.reshape(norm_normalize,
                                            [self.batch_size, self.crop_size_h, self.crop_size_w, 3])
                norm_scale = norm_normalize * 10.0


                conv1_noise = slim.repeat(norm_scale, 2, slim.conv2d, 64, [3, 3], scope='conv1_noise')
                pool1_noise = slim.max_pool2d(conv1_noise, [3, 3], stride=2, padding='SAME', scope='pool1_noise')  #

                conv2_noise = slim.repeat(pool1_noise, 2, slim.conv2d, 128, [3, 3], scope='conv2_noise')
                conv3_noise = slim.repeat(conv2_noise, 3, slim.conv2d, 256, [3, 3], scope='conv3_noise')

                fc1_noise = slim.conv2d(conv3_noise, 512, [1, 1], activation_fn=tf.nn.relu, stride=1,
                                        scope='fc1_noise',
                                        padding='SAME')
                encode_norm_noise = slim.conv2d(fc1_noise, 3, [3, 3], activation_fn=None, stride=1,
                                                scope='encode_norm_noise',
                                                padding='SAME')
                encode_norm_upsample_noise = tf.image.resize_images(encode_norm_noise,
                                                                    [self.crop_size_h, self.crop_size_w], method=0,
                                                                    align_corners=True)

                sum_norm_noise = tf.add(norm_normalize, encode_norm_upsample_noise)

                norm_pred_noise = slim.unit_norm(sum_norm_noise, dim=3)

                norm_pred_all = tf.concat([tf.expand_dims(tf.squeeze(fc8_upsample_norm),axis=0),norm_pred_noise,inputs*0.00392156862],axis = 3)

                norm_pred_all = slim.repeat(norm_pred_all, 3, slim.conv2d, 128, [3, 3],rate=2, weights_initializer=tf.contrib.layers.xavier_initializer(uniform=False),
                                              biases_initializer=tf.constant_initializer(0.0),scope='conv1_norm_noise_new')
                norm_pred_all = slim.repeat(norm_pred_all, 3, slim.conv2d, 128, [3, 3],weights_initializer=tf.contrib.layers.xavier_initializer(uniform=False),
                                              biases_initializer=tf.constant_initializer(0.0), scope='conv2_norm_noise_new')
                norm_pred_final = slim.conv2d(norm_pred_all, 3, [3, 3], activation_fn=None,
                                              weights_initializer=tf.contrib.layers.xavier_initializer(uniform=False),
                                              biases_initializer=tf.constant_initializer(0.0), scope='norm_conv3_noise_new')
                norm_pred_final = slim.unit_norm((norm_pred_final), dim=3)


            # ------------- normal to depth  + depth refinement---------------------------------------------------
            with tf.variable_scope('norm_depth', reuse=reuse):
                 grid_patch = tf.extract_image_patches(images=grid, ksizes=[1, self.k, self.k, 1], strides=[1, 1, 1, 1],
                                                            rates=[1, self.rate, self.rate, 1], padding='SAME')
                 grid_patch = tf.reshape(grid_patch, [self.batch_size, self.crop_size_h, self.crop_size_w, self.k*self.k, 3])
                 _, _, depth_data = tf.split(value=matrix_a, num_or_size_splits=3, axis=4)
                 tmp_matrix_zero = tf.zeros_like(angle, dtype=tf.float32)
                 valid_angle = tf.where(valid_condition,angle,tmp_matrix_zero)


                 lower_matrix = tf.matmul(matrix_c,tf.expand_dims(grid,axis = 4))
                 condition = tf.greater(lower_matrix,1e-5)
                 tmp_matrix = tf.ones_like(lower_matrix)
                 lower_matrix = tf.where(condition,lower_matrix,tmp_matrix)
                 lower = tf.reciprocal(lower_matrix)
                 valid_angle = tf.where(condition,valid_angle,tmp_matrix_zero)
                 upper = tf.reduce_sum(tf.multiply(matrix_c,grid_patch),[4])
                 ratio = tf.multiply(lower,tf.expand_dims(upper,axis=4))
                 estimate_depth = tf.multiply(ratio,depth_data)


                 valid_angle = tf.multiply(valid_angle, tf.reciprocal(tf.tile(tf.reduce_sum(valid_angle,[3,4],keep_dims = True)+1e-5,[1,1,1,81,1])))

                 depth_stage1 = tf.reduce_sum(tf.multiply(estimate_depth, valid_angle), [3, 4])
                 depth_stage1 = tf.expand_dims(tf.squeeze(depth_stage1), axis=2)
                 depth_stage1 = tf.clip_by_value(depth_stage1, 0, 10.0)
                 exp_depth = tf.expand_dims(tf.squeeze(exp_depth), axis=2)

                 depth_all = tf.expand_dims(tf.concat([depth_stage1, exp_depth,tf.squeeze(inputs)*0.00392156862], axis=2), axis=0)

                 depth_pred_all = slim.repeat(depth_all, 3, slim.conv2d, 128, [3, 3], rate=2,weights_initializer=tf.contrib.layers.xavier_initializer(uniform=False),
                                              biases_initializer=tf.constant_initializer(0.0),
                                             scope='conv1_depth_noise_new')
                 depth_pred_all = slim.repeat(depth_pred_all, 3, slim.conv2d, 128, [3, 3],weights_initializer=tf.contrib.layers.xavier_initializer(uniform=False),
                                              biases_initializer=tf.constant_initializer(0.0), scope='conv2_depth_noise_new')
                 final_depth = slim.conv2d(depth_pred_all, 1, [3, 3], activation_fn=None,
                                               weights_initializer=tf.contrib.layers.xavier_initializer(uniform=False),
                                               biases_initializer=tf.constant_initializer(0.0),
                                               scope='depth_conv3_noise_new')
            with tf.variable_scope('edge_refinemet', reuse=reuse):
                print(inputs.shape)
                edges = tf.py_func(myfunc_canny, [inputs], tf.float32)
                edges = tf.reshape(edges,[1,self.crop_size_h,self.crop_size_w,1])
                edge_input_depth = final_depth
                edge_input_norm = norm_pred_final

                #edge prediction for depth
                edge_inputs = tf.concat([edges,inputs*0.00784],axis=3)
                edges_encoder = slim.repeat(edge_inputs, 3, slim.conv2d, 32, [3, 3],rate = 2,weights_initializer=tf.contrib.layers.xavier_initializer(uniform=False),
                                              biases_initializer=tf.constant_initializer(0.0),
                                             scope='conv1_edge_refinement')
                edges_encoder = slim.repeat(edges_encoder, 3, slim.conv2d, 32, [3, 3],
                                            weights_initializer=tf.contrib.layers.xavier_initializer(uniform=False),
                                            biases_initializer=tf.constant_initializer(0.0),
                                            scope='conv2_edge_refinement')

                edges_predictor = slim.conv2d(edges_encoder, 8, [3, 3], activation_fn=None,
                                               weights_initializer=tf.contrib.layers.xavier_initializer(uniform=False),
                                               biases_initializer=tf.constant_initializer(0.0),
                                               scope='edge_weight')
                edges_all = edges_predictor + tf.tile(edges,[1,1,1,8])
                edges_all = tf.clip_by_value(edges_all,0.0,1.0)

                dlr,drl,dud,ddu,nlr,nrl,nud,ndu = tf.split(edges_all,num_or_size_splits=8,axis=3)

                # 4 iteration depth
                final_depth = propagate(edge_input_depth,dlr,drl,dud,ddu,1)
                final_depth = propagate(final_depth, dlr, drl, dud, ddu, 1)
                final_depth = propagate(final_depth, dlr, drl, dud, ddu, 1)
                final_depth = propagate(final_depth, dlr, drl, dud, ddu, 1)

                # 4 iteration norm
                norm_pred_final = propagate(edge_input_norm, nlr, nrl, nud, ndu, 3)
                norm_pred_final = slim.unit_norm((norm_pred_final), dim=3)
                norm_pred_final = propagate(norm_pred_final, nlr, nrl, nud, ndu, 3)
                norm_pred_final = slim.unit_norm((norm_pred_final), dim=3)
                norm_pred_final = propagate(norm_pred_final, nlr,nrl, nud, ndu, 3)
                norm_pred_final = slim.unit_norm((norm_pred_final), dim=3)
                norm_pred_final = propagate(norm_pred_final, nlr, nrl, nud, ndu, 3)
                norm_pred_final = slim.unit_norm((norm_pred_final), dim=3)

        return final_depth,fc8_upsample_norm,norm_pred_final,fc8_upsample
def inference(images):
  """Build the CIFAR-10 model.

  Args:
    images: Images returned from distorted_inputs() or inputs().

  Returns:
    Logits.
  """
  # We instantiate all variables using tf.get_variable() instead of
  # tf.Variable() in order to share variables across multiple GPU training runs.
  # If we only ran this model on a single GPU, we could simplify this function
  # by replacing all instances of tf.get_variable() with tf.Variable().
  #
  # conv1
  with tf.variable_scope('conv1') as scope:
    kernel = _variable_with_weight_decay('weights',
                                         shape=[5, 5, 3, 64],
                                         stddev=5e-2,
                                         wd=0.0)
    conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
    biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
    pre_activation = tf.nn.bias_add(conv, biases)
    conv1 = tf.nn.relu(pre_activation, name=scope.name)
    _activation_summary(conv1)

  # pool1
  pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
                         padding='SAME', name='pool1')
  # norm1
  norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
                    name='norm1') # local response normalization

  # conv2
  with tf.variable_scope('conv2') as scope:
    kernel = _variable_with_weight_decay('weights',
                                         shape=[5, 5, 64, 64],
                                         stddev=5e-2,
                                         wd=0.0)
    conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')
    biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
    pre_activation = tf.nn.bias_add(conv, biases)
    conv2 = tf.nn.relu(pre_activation, name=scope.name)
    _activation_summary(conv2)

  # norm2
  norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
                    name='norm2')
  # pool2
  pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1],
                         strides=[1, 2, 2, 1], padding='SAME', name='pool2')

  """ # original code
  # local3
  with tf.variable_scope('local3') as scope:
    # Move everything into depth so we can perform a single matrix multiply.
    reshape = tf.reshape(pool2, [FLAGS.batch_size, -1])
    dim = reshape.get_shape()[1].value
    weights = _variable_with_weight_decay('weights', shape=[dim, 384],
                                          stddev=0.04, wd=0.004)
    biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1))
    local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)
    _activation_summary(local3)
	
  # local4
  with tf.variable_scope('local4') as scope:
    weights = _variable_with_weight_decay('weights', shape=[384, 192],
                                          stddev=0.04, wd=0.004)
    biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1))
    local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name=scope.name)
    _activation_summary(local4)	
  """	
	
  """changed to locally connected layers t construction see the url=https://github.com/akrizhevsky/cuda-convnet2/blob/master/layers/layers-cifar10-11pct.cfg"""
  """ detail of locally connected layers  see the url=https://prateekvjoshi.com/2016/04/12/understanding-locally-connected-layers-in-convolutional-neural-networks/"""
  with tf.variable_scope('local3') as scope:
    #shape = pool2.get_shape()
	#h = shape[1].value
	#w = shape[2].value
	
	sz_local = 3 # kernel_size
    #sz_patch = (sz_local**2)*shape[3].value
	n_channels = 64
	
	# Extract 3x3 tensor patches
	patches = tf.extract_image_patches(pool2,[1,sz_local,sz_local,1],[1,1,1,1],[1,1,1,1],'SAME')
	shape = patches.get_shape()
	h = shape[1].value
	w = shape[2].value
	sz_patch = shape[3].value
	weights = _variable_with_weight_decay('weights',shape=[1,h,w,sz_patch,n_channels], stddev=5e-2, wd=0.04)
	bias = _variable_on_cpu('biases',[h,w,n_channels], tf.constant_initializer(0.1))
	
	# "Filter" each patch with its own kernel
	mul = tf.multiply(tf.expand_dims(patches,axis=-1),weights)
	patches_sum = tf.reduce_sum(mul,axis=3)
	pre_activation = tf.add(patches_sum,bias)
	local3 = tf.nn.relu(pre_activation,name=scope.name)
	_activation_summary(local3)
	
  with tf.variable_scope('local4') as scope:
	sz_local = 3 # kernel_size
	n_channels = 32
	
	# Extract 3x3 tensor patches
	patches = tf.extract_image_patches(local3,[1,sz_local,sz_local,1],[1,1,1,1],[1,1,1,1],'SAME')
	shape = patches.get_shape()
	h = shape[1].value
	w = shape[2].value
	sz_patch = shape[3].value
	weights = _variable_with_weight_decay('weights',shape=[1,h,w,sz_patch,n_channels],stddev=5e-2, wd=0.04)
	bias = _variable_on_cpu('biases', [h,w,n_channels], tf.constant_initializer(0.1))
	
	# "Filter" each patch with its own kernel
	mul = tf.multiply(tf.expand_dims(patches,axis=-1),weights)
	patches_sum = tf.reduce_sum(mul,axis=3)
	pre_activation = tf.add(patches_sum,bias)
	local4 = tf.nn.relu(pre_activation,name=scope.name)
	_activation_summary(local4)

  # linear layer(WX + b),
  # We don't apply softmax here because
  # tf.nn.sparse_softmax_cross_entropy_with_logits accepts the unscaled logits
  # and performs the softmax internally for efficiency.
  with tf.variable_scope('softmax_linear') as scope:
    weights = _variable_with_weight_decay('weights', [192, NUM_CLASSES],
                                          stddev=1/192.0, wd=0.0)
    biases = _variable_on_cpu('biases', [NUM_CLASSES],
                              tf.constant_initializer(0.0))
    softmax_linear = tf.add(tf.matmul(local4, weights), biases, name=scope.name)
	
	"""add  tf.nn.sorftmax() when we do this, we must change the loss"""
	softmax_linear = tf.nn.softmax(softmax_linear)
    _activation_summary(softmax_linear)
Ejemplo n.º 32
0
import tensorflow as tf
import numpy as np

ksize = 6
stride = 9
img_size = 55

a = tf.placeholder(tf.float32, [img_size,img_size])

_a = tf.expand_dims(a, axis=0)
_a = tf.expand_dims(_a, axis=3)
b = tf.extract_image_patches(_a,
        ksizes=[1,ksize,ksize,1],
        strides=[1,stride,stride,1],
        rates=[1,1,1,1],
        padding='VALID')

sess = tf.Session()
sess.run(tf.global_variables_initializer())


f = np.arange(img_size**2) * 0.01
f = np.reshape(f, [img_size,img_size])
fd = {a: f}

s=  (sess.run(a, fd))
for i in range(img_size):
    print (s[i])

ppp= (sess.run(b, fd))
print (ppp)
Ejemplo n.º 33
0
 def _extract_image_patches(self, NHWC_X):
     # returns: N x H x W x C * P
     return tf.extract_image_patches(
         NHWC_X, [1, self.filter_size, self.filter_size, 1],
         [1, self.stride, self.stride, 1],
         [1, self.dilation, self.dilation, 1], "VALID")
Ejemplo n.º 34
0
 def forward(self):
     inp = self.inp.out
     s = self.lay.stride
     self.out = tf.extract_image_patches(
         inp, [1,s,s,1], [1,s,s,1], [1,1,1,1], 'VALID')
Ejemplo n.º 35
0
def tf_median(x, kernel):
    with tf.name_scope('median_filter'):
        xp = tf.pad(x, [[0, 0], 2*[kernel//2], 2*[kernel//2], [0, 0]], 'REFLECT')
        patches = tf.extract_image_patches(xp, [1, kernel, kernel, 1], [1, 1, 1, 1], 4*[1], 'VALID')
        patches = tf.reshape(patches, [tf.shape(patches)[0], tf.shape(patches)[1], tf.shape(patches)[2], tf.shape(patches)[3]//3, 3])
        return tf.contrib.distributions.percentile(patches, 50, axis=3)
Ejemplo n.º 36
0
 def forward(self):
     inp = self.inp.out
     s = self.lay.stride
     self.out = tf.extract_image_patches(inp, [1, s, s, 1], [1, s, s, 1],
                                         [1, 1, 1, 1], 'VALID')
Ejemplo n.º 37
0
    def _body(i, posterior, activation, center, masses):
        """Body of the EM while loop."""
        del activation
        beta = final_beta * (1 - tf.pow(0.95, tf.cast(i + 1, tf.float32)))
        # beta = final_beta
        # route: [outdim, height?, width?, batch, indim]
        vote_conf = posterior * input_activation
        # masses: [batch, 1, outdim, 1, height, width, 1, 1]
        masses = tf.reduce_sum(tf.reduce_sum(tf.reduce_sum(
            vote_conf, axis=1, keep_dims=True),
                                             axis=-1,
                                             keep_dims=True),
                               axis=-2,
                               keep_dims=True) + 0.0000001
        preactivate_unrolled = vote_conf * wx
        # center: [batch, 1, outdim, outatom, height, width]
        center = .9 * tf.reduce_sum(tf.reduce_sum(tf.reduce_sum(
            preactivate_unrolled, axis=1, keep_dims=True),
                                                  axis=-1,
                                                  keep_dims=True),
                                    axis=-2,
                                    keep_dims=True) / masses + .1 * center

        noise = (wx - center) * (wx - center)
        variance = min_var + tf.reduce_sum(tf.reduce_sum(tf.reduce_sum(
            vote_conf * noise, axis=1, keep_dims=True),
                                                         axis=-1,
                                                         keep_dims=True),
                                           axis=-2,
                                           keep_dims=True) / masses
        log_variance = tf.log(variance)
        p_i = -1 * tf.reduce_sum(log_variance, axis=3, keep_dims=True)
        log_2pi = tf.log(2 * math.pi)
        win = masses * (p_i - sigma_biases * num_out_atoms * (log_2pi + 1.0))
        logit = beta * (win - activation_biases * 5000)
        activation_update = tf.minimum(
            0.0, logit) - tf.log(1 + tf.exp(-tf.abs(logit)))
        # return activation, center
        log_det_sigma = -1 * p_i
        sigma_update = (num_out_atoms * log_2pi + log_det_sigma) / 2.0
        exp_update = tf.reduce_sum(noise / (2 * variance),
                                   axis=3,
                                   keep_dims=True)
        prior_update = activation_update - sigma_update - exp_update
        max_prior_update = tf.reduce_max(tf.reduce_max(tf.reduce_max(
            tf.reduce_max(prior_update, axis=-1, keep_dims=True),
            axis=-2,
            keep_dims=True),
                                                       axis=-3,
                                                       keep_dims=True),
                                         axis=-4,
                                         keep_dims=True)
        prior_normal = tf.add(prior_update, -1 * max_prior_update)
        prior_exp = tf.exp(prior_normal)
        t_prior = tf.transpose(prior_exp, [0, 1, 2, 3, 4, 6, 5, 7])
        c_prior = tf.reshape(t_prior, [-1, n * k, n * k, 1])
        pad_prior = tf.pad(c_prior,
                           [[0, 0], [(k - 1) * (k - 1), (k - 1) * (k - 1)],
                            [(k - 1) * (k - 1),
                             (k - 1) * (k - 1)], [0, 0]], 'CONSTANT')
        patch_prior = tf.extract_image_patches(images=pad_prior,
                                               ksizes=[1, k, k, 1],
                                               strides=[1, k, k, 1],
                                               rates=[1, k - 1, k - 1, 1],
                                               padding='VALID')
        sum_prior = tf.reduce_sum(patch_prior, axis=-1, keep_dims=True)
        sum_prior_patch = tf.extract_image_patches(images=sum_prior,
                                                   ksizes=[1, k, k, 1],
                                                   strides=[1, 1, 1, 1],
                                                   rates=[1, 1, 1, 1],
                                                   padding='VALID')
        sum_prior_reshape = tf.reshape(
            sum_prior_patch,
            [-1, input_dim, output_dim, 1, n, n, k, k]) + 0.0000001
        posterior = prior_exp / sum_prior_reshape
        return (posterior, logit, center, masses)
Ejemplo n.º 38
0
def reorg(x, stride):
    return tf.extract_image_patches(x, [1, stride, stride, 1],
                        [1, stride, stride, 1], [1,1,1,1], padding="VALID")
Ejemplo n.º 39
0
def reorg(x, stride):
    return tf.extract_image_patches(x, [1, stride, stride, 1],
                                    [1, stride, stride, 1], [1, 1, 1, 1],
                                    padding="VALID")
Ejemplo n.º 40
0
    def conv(self,
             input,
             k_h,
             k_w,
             c_o,
             s_h,
             s_w,
             name,
             relu=True,
             padding=DEFAULT_PADDING,
             group=1,
             biased=True):
        # Verify that the padding is acceptable
        self.validate_padding(padding)
        # Get the number of channels in the input
        c_i = input.get_shape()[-1]
        # Verify that the grouping parameter is valid
        assert c_i % group == 0
        assert c_o % group == 0
        # Convolution for a given input and kernel
        convolve = lambda i, k: tf.nn.conv2d(
            i, k, [1, s_h, s_w, 1], padding=padding)
        with tf.variable_scope(name) as scope:
            # Get input patches and construct hessian op
            # print '%s' %name
            get_patches_op = tf.extract_image_patches(input, \
                        ksizes=[1, k_h, k_w, 1], \
                        strides=[1, s_h, s_w, 1], \
                        rates=[1, 1, 1, 1], padding=padding)
            self.get_layer_inputs_op[name] = get_patches_op
            print 'Layer %s, input shape: %s' % (name,
                                                 get_patches_op.get_shape())
            # print 'Input shape: %s' % input.get_shape().as_list()
            # First method to calculate hessain
            # print 'Patch shape: %s' % get_patches_op.get_shape().as_list()
            # patches_shape = get_patches_op.get_shape().as_list()
            # n_patches =  batch_size * patches_shape[1] * patches_shape[2] # Number of patches in one batch
            '''
			a = tf.expand_dims(get_patches_op, axis=-1)
			# print 'a shape: %s' %a.get_shape()
			a = tf.concat([a, tf.ones([tf.shape(a)[0], tf.shape(a)[1], tf.shape(a)[2], 1, 1])], axis=3)
			# print 'a shape: %s' %a.get_shape()

			# print 'get_patches_op shape: %s' %get_patches_op.get_shape()
			b = tf.expand_dims(get_patches_op, axis=3)
			b = tf.concat([b, tf.ones([tf.shape(b)[0], tf.shape(b)[1], tf.shape(b)[2], 1, 1])], axis=4)
			# print 'b shape: %s' %b.get_shape()
			outprod = tf.multiply(a, b)
			# print 'outprod shape: %s' %outprod.get_shape()
			self.get_batch_hessian_op[name] = tf.reduce_mean(outprod, axis=[0, 1, 2])
			print 'Layer %s, hessian shape: %s' % (name, self.get_batch_hessian_op[name].get_shape())
			'''
            '''
			patches_shape = get_patches_op.get_shape().as_list()
			Dtensor = tf.reshape(get_patches_op, [-1, patches_shape[1] * patches_shape[2], patches_shape[3], 1])
			print 'Dtensor: %s' % Dtensor.get_shape()
			Dtensor = tf.concat([Dtensor, tf.ones([tf.shape(Dtensor)[0], tf.shape(Dtensor)[1], 1, 1])], axis=2)
			print 'Dtensor after concatenating one: %s' % Dtensor.get_shape()
			print 'Dtensor shape: %s' % Dtensor.get_shape()
			self.get_batch_hessian_op[name] = tf.reduce_mean(
				tf.matmul(Dtensor, Dtensor, transpose_b=True), axis=[0, 1])
			print 'Hessian shape: %s' % self.get_batch_hessian_op[name].get_shape()
			'''
            kernel = self.make_var('weights',
                                   shape=[k_h, k_w, c_i / group, c_o])
            if group == 1:
                # This is the common-case. Convolve the input without any further complications.
                output = convolve(input, kernel)
            else:

                # The following commented lines are the old code worked with old version of Tensorflow

                # # Split the input into groups and then convolve each of them independently
                # input_groups = tf.split(3, group, input)
                # kernel_groups = tf.split(3, group, kernel)
                # output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)]
                # # Concatenate the groups
                # output = tf.concat(3, output_groups)

                # The following lines are work with current Tensorflow version

                # Split the input into groups and then convolve each of them independently
                input_groups = tf.split(input, group, 3)
                kernel_groups = tf.split(kernel, group, 3)
                output_groups = [
                    convolve(i, k) for i, k in zip(input_groups, kernel_groups)
                ]
                # Concatenate the groups
                output = tf.concat(output_groups, 3)

            # Add the biases
            if biased:
                biases = self.make_var('biases', [c_o])
                output = tf.nn.bias_add(output, biases)
            if relu:
                # ReLU non-linearity
                output = tf.nn.relu(output, name=scope.name)
            return output
def hlconv(
        x=None, 
        uw=None,
        sw=None,
        ksizes=None, 
        strides=None, 
        padding=None, 
        name=None,
        **kwargs
    ):
    # replace default args with kwargs
    if 'x' in kwargs:
        x = kwargs['x']
    if 'uw' in kwargs:
        uw = kwargs['uw']
    if 'sw' in kwargs:
        sw = kwargs['sw']
    if 'ksizes' in kwargs:
        ksizes = kwargs['ksizes']
    if 'strides' in kwargs:
        strides = kwargs['strides']
    if 'padding' in kwargs:
        padding = kwargs['padding']
    if 'name' in kwargs:
        name = kwargs['name']
    # simple validation
    assert len(ksizes)==4
    assert len(x.shape.as_list())==4
    assert ksizes[0]==1

    kh = ksizes[1]
    kw = ksizes[2]
    depth = x.shape.as_list()[3]
    kn = uw.shape.as_list()[0]
    assert kh*kw*depth==uw.shape.as_list()[1]
    
    patches = tf.extract_image_patches(
            x,
            ksizes = ksizes,
            strides = strides,
            rates = [1,1,1,1],
            padding = padding,
            name = name + '-p'
    )
    # create output tensors
    y = list(range(kn))
    fields = list(range(kn))
    arrivals = list(range(kn))
    # construct sub-operator-graph
    for _i in xrange(kn):
        fields[_i] = tf.norm(uw[_i]-patches, axis=3)
        arrivals[_i] = tf.reduce_sum(sw[_i]*patches, axis=3)
    fields_stacked = tf.stack(fields, axis=0)
    fields_polarized = sig_polar(fields_stacked)
    for _i in xrange(kn):
        y[_i] = fields_polarized[_i]*arrivals[_i]
    y_stacked = tf.stack(y, axis=3, name=name+'-hlconv')
    utrain_op, energy = make_unsupervised_train_op(
            uw,
            patches,
            fields_polarized
    )
    return (y_stacked, utrain_op, energy)