def _conv_block(self, inputs, numOut, name = 'conv_block'):
		""" Convolutional Block
		Args:
			inputs	: Input Tensor
			numOut	: Desired output number of channel
			name	: Name of the block
		Returns:
			conv_3	: Output Tensor
		"""
		if self.tiny:
			with tf.name_scope(name):
				norm = tf.contrib.layers.batch_norm(inputs, 0.9, epsilon=1e-5, activation_fn = tf.nn.relu, is_training = self.training)
				pad = tf.pad(norm, np.array([[0,0],[1,1],[1,1],[0,0]]), name= 'pad')
				conv = self._conv(pad, int(numOut), kernel_size=3, strides=1, pad = 'VALID', name= 'conv')
				return conv
		else:
			with tf.name_scope(name):
				with tf.name_scope('norm_1'):
					norm_1 = tf.contrib.layers.batch_norm(inputs, 0.9, epsilon=1e-5, activation_fn = tf.nn.relu, is_training = self.training)
					conv_1 = self._conv(norm_1, int(numOut/2), kernel_size=1, strides=1, pad = 'VALID', name= 'conv')
				with tf.name_scope('norm_2'):
					norm_2 = tf.contrib.layers.batch_norm(conv_1, 0.9, epsilon=1e-5, activation_fn = tf.nn.relu, is_training = self.training)
					pad = tf.pad(norm_2, np.array([[0,0],[1,1],[1,1],[0,0]]), name= 'pad')
					conv_2 = self._conv(pad, int(numOut/2), kernel_size=3, strides=1, pad = 'VALID', name= 'conv')
				with tf.name_scope('norm_3'):
					norm_3 = tf.contrib.layers.batch_norm(conv_2, 0.9, epsilon=1e-5, activation_fn = tf.nn.relu, is_training = self.training)
					conv_3 = self._conv(norm_3, int(numOut), kernel_size=1, strides=1, pad = 'VALID', name= 'conv')
				return conv_3
    def __build(self):
        self.__init_global_epoch()
        self.__init_global_step()
        self.__init_input()

        with tf.name_scope('Preprocessing'):
            red, green, blue = tf.split(self.X, num_or_size_splits=3, axis=3)
            preprocessed_input = tf.concat([
                tf.subtract(blue, ShuffleNet.MEAN[0]) * ShuffleNet.NORMALIZER,
                tf.subtract(green, ShuffleNet.MEAN[1]) * ShuffleNet.NORMALIZER,
                tf.subtract(red, ShuffleNet.MEAN[2]) * ShuffleNet.NORMALIZER,
            ], 3)
        x_padded = tf.pad(preprocessed_input, [[0, 0], [1, 1], [1, 1], [0, 0]], "CONSTANT")
        conv1 = conv2d('conv1', x=x_padded, w=None, num_filters=self.output_channels['conv1'], kernel_size=(3, 3),
                       stride=(2, 2), l2_strength=self.args.l2_strength, bias=self.args.bias,
                       batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training,
                       activation=tf.nn.relu, padding='VALID')
        padded = tf.pad(conv1, [[0, 0], [0, 1], [0, 1], [0, 0]], "CONSTANT")
        max_pool = max_pool_2d(padded, size=(3, 3), stride=(2, 2), name='max_pool')
        stage2 = self.__stage(max_pool, stage=2, repeat=3)
        stage3 = self.__stage(stage2, stage=3, repeat=7)
        stage4 = self.__stage(stage3, stage=4, repeat=3)
        global_pool = avg_pool_2d(stage4, size=(7, 7), stride=(1, 1), name='global_pool', padding='VALID')

        logits_unflattened = conv2d('fc', global_pool, w=None, num_filters=self.args.num_classes,
                                    kernel_size=(1, 1),
                                    l2_strength=self.args.l2_strength,
                                    bias=self.args.bias,
                                    is_training=self.is_training)
        self.logits = flatten(logits_unflattened)

        self.__init_output()
Beispiel #3
0
def _tf_pad(x, szs, padding='SYMMETRIC'):
    """
    Tensorflow can't handle padding by more than the dimension of the image.
    This wrapper allows us to build padding up successively.
    """
    def get_size(x):
        # Often the batch will be None. Convert these to 0s
        x_szs = x.get_shape().as_list()
        x_szs = [0 if val is None else val for val in x_szs]
        return x_szs

    x_szs = get_size(x)
    gt = [[sz[0] > x_sz, sz[1] > x_sz] for sz,x_sz in zip(szs, x_szs)]
    while np.any(gt):
        # This creates an intermediate padding amount that will bring in
        # dimensions that are too big by the size of x.
        szs_step = np.int32(gt) * np.stack([x_szs, x_szs], axis=-1)
        x = tf.pad(x, szs_step, padding)
        szs = szs - szs_step
        x_szs = get_size(x)
        gt = [[sz[0] > x_sz, sz[1] > x_sz] for sz,x_sz in zip(szs, x_szs)]

    # Pad by the remaining amount
    x = tf.pad(x, szs, 'SYMMETRIC')
    return x
def res_block(x, a=None, filter_size=16, nonlinearity=concat_elu, keep_p=1.0, stride=1, gated=False, name="resnet"):
  orig_x = x
  print(orig_x.get_shape())
  x_1 = conv_layer(nonlinearity(x), 3, stride, filter_size, name + '_conv_1')
  if a is not None:
    shape_a = int_shape(a) 
    shape_x_1 = int_shape(x_1)
    a = tf.pad(
      a, [[0, 0], [0, shape_x_1[1]-shape_a[1]], [0, shape_x_1[2]-shape_a[2]],
      [0, 0]])
    x_1 += nin(nonlinearity(a), filter_size, name + '_nin')
  x_1 = nonlinearity(x_1)
  if keep_p < 1.0:
    x_1 = tf.nn.dropout(x_1, keep_prob=keep_p)
  if not gated:
    x_2 = conv_layer(x_1, 3, 1, filter_size, name + '_conv_2')
  else:
    x_2 = conv_layer(x_1, 3, 1, filter_size*2, name + '_conv_2')
    x_2_1, x_2_2 = tf.split(3,2,x_2)
    x_2 = x_2_1 * tf.nn.sigmoid(x_2_2)

  if int(orig_x.get_shape()[2]) > int(x_2.get_shape()[2]):
    assert(int(orig_x.get_shape()[2]) == 2*int(x_2.get_shape()[2]), "res net block only supports stirde 2")
    orig_x = tf.nn.avg_pool(orig_x, [1,2,2,1], [1,2,2,1], padding='SAME')

  # pad it
  out_filter = filter_size
  in_filter = int(orig_x.get_shape()[3])
  if out_filter != in_filter:
    orig_x = tf.pad(
        orig_x, [[0, 0], [0, 0], [0, 0],
        [(out_filter-in_filter), 0]])

  return orig_x + x_2
def generator(img, scope, gf_dim=64, reuse=False, train=True):

    bn = functools.partial(slim.batch_norm, scale=True, is_training=train,
                           decay=0.9, epsilon=1e-5, updates_collections=None)

    def residule_block(x, dim, scope='res'):
        y = tf.pad(x, [[0, 0], [1, 1], [1, 1], [0, 0]], "REFLECT")
        y = relu(instance_norm(conv(y, dim, 3, 1, padding='VALID', scope=scope + '_conv1'), scope=scope + '_instance_norm1'))
        y = tf.pad(y, [[0, 0], [1, 1], [1, 1], [0, 0]], "REFLECT")
        y = instance_norm(conv(y, dim, 3, 1, padding='VALID', scope=scope + '_conv2'), scope=scope + '_instance_norm2')
        return y + x

    with tf.variable_scope(scope + '_generator', reuse=reuse):
        c0 = tf.pad(img, [[0, 0], [3, 3], [3, 3], [0, 0]], "REFLECT")
        c1 = relu(instance_norm(conv(c0, gf_dim, 7, 1, padding='VALID', scope='c1_conv'), scope='c1_instance_norm'))
        c2 = relu(instance_norm(conv(c1, gf_dim * 2, 3, 2, scope='c2_conv'), scope='c2_instance_norm'))
        c3 = relu(instance_norm(conv(c2, gf_dim * 4, 3, 2, scope='c3_conv'), scope='c3_instance_norm'))

        r1 = residule_block(c3, gf_dim * 4, scope='r1')
        r2 = residule_block(r1, gf_dim * 4, scope='r2')
        r3 = residule_block(r2, gf_dim * 4, scope='r3')
        r4 = residule_block(r3, gf_dim * 4, scope='r4')
        r5 = residule_block(r4, gf_dim * 4, scope='r5')
        r6 = residule_block(r5, gf_dim * 4, scope='r6')
        r7 = residule_block(r6, gf_dim * 4, scope='r7')
        r8 = residule_block(r7, gf_dim * 4, scope='r8')
        r9 = residule_block(r8, gf_dim * 4, scope='r9')

        d1 = relu(instance_norm(deconv(r9, gf_dim * 2, 3, 2, scope='d1_dconv'), scope='d1_instance_norm'))
        d2 = relu(instance_norm(deconv(d1, gf_dim, 3, 2, scope='d2_dconv'), scope='d2_instance_norm'))
        d2 = tf.pad(d2, [[0, 0], [3, 3], [3, 3], [0, 0]], "REFLECT")
        pred = conv(d2, 3, 7, 1, padding='VALID', scope='pred_conv')
        pred = tf.nn.tanh(pred)

        return pred
Beispiel #6
0
    def build_graph(self, image, label):
        xys = np.array([(y, x, 1) for y in range(WARP_TARGET_SIZE)
                        for x in range(WARP_TARGET_SIZE)], dtype='float32')
        xys = tf.constant(xys, dtype=tf.float32, name='xys')    # p x 3

        image = image / 255.0 - 0.5  # bhw2

        def get_stn(image):
            stn = (LinearWrap(image)
                   .AvgPooling('downsample', 2)
                   .Conv2D('conv0', 20, 5, padding='VALID')
                   .MaxPooling('pool0', 2)
                   .Conv2D('conv1', 20, 5, padding='VALID')
                   .FullyConnected('fc1', 32)
                   .FullyConnected('fct', 6, activation=tf.identity,
                                   kernel_initializer=tf.constant_initializer(),
                                   bias_initializer=tf.constant_initializer([1, 0, HALF_DIFF, 0, 1, HALF_DIFF]))())
            # output 6 parameters for affine transformation
            stn = tf.reshape(stn, [-1, 2, 3], name='affine')  # bx2x3
            stn = tf.reshape(tf.transpose(stn, [2, 0, 1]), [3, -1])  # 3 x (bx2)
            coor = tf.reshape(tf.matmul(xys, stn),
                              [WARP_TARGET_SIZE, WARP_TARGET_SIZE, -1, 2])
            coor = tf.transpose(coor, [2, 0, 1, 3], 'sampled_coords')  # b h w 2
            sampled = BilinearSample('warp', [image, coor], borderMode='constant')
            return sampled

        with argscope([Conv2D, FullyConnected], activation=tf.nn.relu):
            with tf.variable_scope('STN1'):
                sampled1 = get_stn(image)
            with tf.variable_scope('STN2'):
                sampled2 = get_stn(image)

        # For visualization in tensorboard
        with tf.name_scope('visualization'):
            padded1 = tf.pad(sampled1, [[0, 0], [HALF_DIFF, HALF_DIFF], [HALF_DIFF, HALF_DIFF], [0, 0]])
            padded2 = tf.pad(sampled2, [[0, 0], [HALF_DIFF, HALF_DIFF], [HALF_DIFF, HALF_DIFF], [0, 0]])
            img_orig = tf.concat([image[:, :, :, 0], image[:, :, :, 1]], 1)  # b x 2h  x w
            transform1 = tf.concat([padded1[:, :, :, 0], padded1[:, :, :, 1]], 1)
            transform2 = tf.concat([padded2[:, :, :, 0], padded2[:, :, :, 1]], 1)
            stacked = tf.concat([img_orig, transform1, transform2], 2, 'viz')
            tf.summary.image('visualize',
                             tf.expand_dims(stacked, -1), max_outputs=30)

        sampled = tf.concat([sampled1, sampled2], 3, 'sampled_concat')
        logits = (LinearWrap(sampled)
                  .FullyConnected('fc1', 256, activation=tf.nn.relu)
                  .FullyConnected('fc2', 128, activation=tf.nn.relu)
                  .FullyConnected('fct', 19, activation=tf.identity)())
        tf.nn.softmax(logits, name='prob')

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')

        wrong = tf.to_float(tf.logical_not(tf.nn.in_top_k(logits, label, 1)), name='incorrect_vector')
        summary.add_moving_summary(tf.reduce_mean(wrong, name='train_error'))

        wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/W', tf.nn.l2_loss),
                              name='regularize_loss')
        summary.add_moving_summary(cost, wd_cost)
        return tf.add_n([wd_cost, cost], name='cost')
Beispiel #7
0
def build_generator_resnet_6blocks(inputgen, name="generator"):
    with tf.variable_scope(name):
        f = 7
        ks = 3
        
        pad_input = tf.pad(inputgen,[[0, 0], [ks, ks], [ks, ks], [0, 0]], "REFLECT")
        o_c1 = general_conv2d(pad_input, ngf, f, f, 1, 1, 0.02,name="c1")
        o_c2 = general_conv2d(o_c1, ngf*2, ks, ks, 2, 2, 0.02,"SAME","c2")
        o_c3 = general_conv2d(o_c2, ngf*4, ks, ks, 2, 2, 0.02,"SAME","c3")

        o_r1 = build_resnet_block(o_c3, ngf*4, "r1")
        o_r2 = build_resnet_block(o_r1, ngf*4, "r2")
        o_r3 = build_resnet_block(o_r2, ngf*4, "r3")
        o_r4 = build_resnet_block(o_r3, ngf*4, "r4")
        o_r5 = build_resnet_block(o_r4, ngf*4, "r5")
        o_r6 = build_resnet_block(o_r5, ngf*4, "r6")

        o_c4 = general_deconv2d(o_r6, [batch_size,64,64,ngf*2], ngf*2, ks, ks, 2, 2, 0.02,"SAME","c4")
        o_c5 = general_deconv2d(o_c4, [batch_size,128,128,ngf], ngf, ks, ks, 2, 2, 0.02,"SAME","c5")
        o_c5_pad = tf.pad(o_c5,[[0, 0], [ks, ks], [ks, ks], [0, 0]], "REFLECT")
        o_c6 = general_conv2d(o_c5_pad, img_layer, f, f, 1, 1, 0.02,"VALID","c6",do_relu=False)

        # Adding the tanh layer

        out_gen = tf.nn.tanh(o_c6,"t1")


        return out_gen
def Rk(input, k,  reuse=False, norm='instance', is_training=True, name=None):
  """ A residual block that contains two 3x3 convolutional layers
      with the same number of filters on both layer
  Args:
    input: 4D Tensor
    k: integer, number of filters (output depth)
    reuse: boolean
    name: string
  Returns:
    4D tensor (same shape as input)
  """
  with tf.variable_scope(name, reuse=reuse):
    with tf.variable_scope('layer1', reuse=reuse):
      weights1 = _weights("weights1",
        shape=[3, 3, input.get_shape()[3], k])
      padded1 = tf.pad(input, [[0,0],[1,1],[1,1],[0,0]], 'REFLECT')
      conv1 = tf.nn.conv2d(padded1, weights1,
          strides=[1, 1, 1, 1], padding='VALID')
      normalized1 = _norm(conv1, is_training, norm)
      relu1 = tf.nn.relu(normalized1)

    with tf.variable_scope('layer2', reuse=reuse):
      weights2 = _weights("weights2",
        shape=[3, 3, relu1.get_shape()[3], k])

      padded2 = tf.pad(relu1, [[0,0],[1,1],[1,1],[0,0]], 'REFLECT')
      conv2 = tf.nn.conv2d(padded2, weights2,
          strides=[1, 1, 1, 1], padding='VALID')
      normalized2 = _norm(conv2, is_training, norm)
    output = input+normalized2
    return output
Beispiel #9
0
def resnet_fpn_backbone(image, num_blocks, freeze_c2=True):
    shape2d = tf.shape(image)[2:]
    mult = float(cfg.FPN.RESOLUTION_REQUIREMENT)
    new_shape2d = tf.to_int32(tf.ceil(tf.to_float(shape2d) / mult) * mult)
    pad_shape2d = new_shape2d - shape2d
    assert len(num_blocks) == 4, num_blocks
    with resnet_argscope():
        chan = image.shape[1]
        pad_base = maybe_reverse_pad(2, 3)
        l = tf.pad(image, tf.stack(
            [[0, 0], [0, 0],
             [pad_base[0], pad_base[1] + pad_shape2d[0]],
             [pad_base[0], pad_base[1] + pad_shape2d[1]]]))
        l.set_shape([None, chan, None, None])
        l = Conv2D('conv0', l, 64, 7, strides=2, activation=BNReLU, padding='VALID')
        l = tf.pad(l, [[0, 0], [0, 0], maybe_reverse_pad(0, 1), maybe_reverse_pad(0, 1)])
        l = MaxPooling('pool0', l, 3, strides=2, padding='VALID')
        c2 = resnet_group('group0', l, resnet_bottleneck, 64, num_blocks[0], 1)
        if freeze_c2:
            c2 = tf.stop_gradient(c2)
        c3 = resnet_group('group1', c2, resnet_bottleneck, 128, num_blocks[1], 2)
        c4 = resnet_group('group2', c3, resnet_bottleneck, 256, num_blocks[2], 2)
        c5 = resnet_group('group3', c4, resnet_bottleneck, 512, num_blocks[3], 2)
    # 32x downsampling up to now
    # size of c5: ceil(input/32)
    return c2, c3, c4, c5
Beispiel #10
0
  def _residual_v1(self,
                   x,
                   kernel_size,
                   in_filter,
                   out_filter,
                   stride,
                   activate_before_residual=False):
    """Residual unit with 2 sub layers, using Plan A for shortcut connection."""

    del activate_before_residual
    with tf.name_scope('residual_v1') as name_scope:
      orig_x = x

      x = self._conv(x, kernel_size, out_filter, stride)
      x = self._batch_norm(x)
      x = self._relu(x)

      x = self._conv(x, kernel_size, out_filter, 1)
      x = self._batch_norm(x)

      if in_filter != out_filter:
        orig_x = self._avg_pool(orig_x, stride, stride)
        pad = (out_filter - in_filter) // 2
        if self._data_format == 'channels_first':
          orig_x = tf.pad(orig_x, [[0, 0], [pad, pad], [0, 0], [0, 0]])
        else:
          orig_x = tf.pad(orig_x, [[0, 0], [0, 0], [0, 0], [pad, pad]])

      x = self._relu(tf.add(x, orig_x))

      tf.logging.info('image after unit %s: %s', name_scope, x.get_shape())
      return x
Beispiel #11
0
def fixed_padding(inputs, kernel_size, data_format="channels_first"):
  """Pads the input along the spatial dimensions independently of input size.

  Args:
    inputs: `Tensor` of size `[batch, channels, height, width]` or
        `[batch, height, width, channels]` depending on `data_format`.
    kernel_size: `int` kernel size to be used for `conv2d` or max_pool2d`
        operations. Should be a positive integer.
    data_format: `str` either "channels_first" for `[batch, channels, height,
        width]` or "channels_last for `[batch, height, width, channels]`.

  Returns:
    A padded `Tensor` of the same `data_format` with size either intact
    (if `kernel_size == 1`) or padded (if `kernel_size > 1`).
  """
  pad_total = kernel_size - 1
  pad_beg = pad_total // 2
  pad_end = pad_total - pad_beg
  if data_format == "channels_first":
    padded_inputs = tf.pad(
        inputs, [[0, 0], [0, 0], [pad_beg, pad_end], [pad_beg, pad_end]])
  else:
    padded_inputs = tf.pad(
        inputs, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])

  return padded_inputs
Beispiel #12
0
def add_edge_padding(x, filter_size):
    assert filter_size[0] % 2 == 1
    if filter_size[0] == 1 and filter_size[1] == 1:
        return x
    a = (filter_size[0] - 1) // 2  # vertical padding size
    b = (filter_size[1] - 1) // 2  # horizontal padding size
    if True:
        x = tf.pad(x, [[0, 0], [a, a], [b, b], [0, 0]])
        name = "_".join([str(dim) for dim in [a, b, *int_shape(x)[1:3]]])
        pads = tf.get_collection(name)
        if not pads:
            if hvd.rank() == 0:
                print("Creating pad", name)
            pad = np.zeros([1] + int_shape(x)[1:3] + [1], dtype='float32')
            pad[:, :a, :, 0] = 1.
            pad[:, -a:, :, 0] = 1.
            pad[:, :, :b, 0] = 1.
            pad[:, :, -b:, 0] = 1.
            pad = tf.convert_to_tensor(pad)
            tf.add_to_collection(name, pad)
        else:
            pad = pads[0]
        pad = tf.tile(pad, [tf.shape(x)[0], 1, 1, 1])
        x = tf.concat([x, pad], axis=3)
    else:
        pad = tf.pad(tf.zeros_like(x[:, :, :, :1]) - 1,
                     [[0, 0], [a, a], [b, b], [0, 0]]) + 1
        x = tf.pad(x, [[0, 0], [a, a], [b, b], [0, 0]])
        x = tf.concat([x, pad], axis=3)
    return x
Beispiel #13
0
    def _conv(self, x, kernel_size, filters, strides, is_atrous=False):
        """Convolution."""

        padding = 'SAME'
        if not is_atrous and strides > 1:
            pad = kernel_size - 1
            pad_beg = pad // 2
            pad_end = pad - pad_beg
            if self._data_format == 'channels_first':
                x = tf.pad(
                    x,
                    [[0, 0], [0, 0], [pad_beg, pad_end], [pad_beg, pad_end]])
            else:
                x = tf.pad(
                    x,
                    [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])
            padding = 'VALID'
        return tf.layers.conv2d(
            inputs=x,
            kernel_size=kernel_size,
            filters=filters,
            strides=strides,
            padding=padding,
            use_bias=False,
            data_format=self._data_format)
def pad_to_same_length(x, y, final_length_divisible_by=1, axis=1):
  """Pad tensors x and y on axis 1 so that they have the same length."""
  if axis not in [1, 2]:
    raise ValueError("Only axis=1 and axis=2 supported for now.")
  with tf.name_scope("pad_to_same_length", [x, y]):
    x_length = tf.shape(x)[axis]
    y_length = tf.shape(y)[axis]
    max_length = tf.maximum(x_length, y_length)
    if final_length_divisible_by > 1:
      # Find the nearest larger-or-equal integer divisible by given number.
      max_length += final_length_divisible_by - 1
      max_length //= final_length_divisible_by
      max_length *= final_length_divisible_by
    length_diff1 = max_length - x_length
    length_diff2 = max_length - y_length

    def padding_list(length_diff, arg):
      if axis == 1:
        return [[[0, 0], [0, length_diff]],
                tf.zeros([tf.rank(arg) - 2, 2], dtype=tf.int32)]
      return [[[0, 0], [0, 0], [0, length_diff]],
              tf.zeros([tf.rank(arg) - 3, 2], dtype=tf.int32)]

    paddings1 = tf.concat(padding_list(length_diff1, x), axis=0)
    paddings2 = tf.concat(padding_list(length_diff2, y), axis=0)
    res_x = tf.pad(x, paddings1)
    res_y = tf.pad(y, paddings2)
    # Static shapes are the same except for axis=1.
    x_shape = x.shape.as_list()
    x_shape[axis] = None
    res_x.set_shape(x_shape)
    y_shape = y.shape.as_list()
    y_shape[axis] = None
    res_y.set_shape(y_shape)
    return res_x, res_y
Beispiel #15
0
def fixed_padding(inputs, kernel_size, data_format, conv_time_dim):
  """Pads the input along the spatial dimensions independently of input size.

  Args:
    inputs: A tensor of size [batch, channels, height_in, width_in] or
      [batch, height_in, width_in, channels] depending on data_format.
    kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
                 Should be a positive integer.
    data_format: The input format ('channels_last' or 'channels_first').

  Returns:
    A tensor with the same format as the input with the data either intact
    (if kernel_size == 1) or padded (if kernel_size > 1).
  """
  pad_total = kernel_size - 1
  feature_pad_beg = pad_total // 2
  feature_pad_end = pad_total - feature_pad_beg

  if conv_time_dim:
    time_pad_beg = 0
    time_pad_end = 0
  else:
    time_pad_beg = feature_pad_beg
    time_pad_end = feature_pad_end

  if data_format == 'channels_first':
    padded_inputs = tf.pad(inputs, [[0, 0], [0, 0],
                                    [time_pad_beg, time_pad_end],
                                    [feature_pad_beg, feature_pad_end]])
  else:
    padded_inputs = tf.pad(inputs, [[0, 0], [time_pad_beg, time_pad_end],
                                    [feature_pad_end, feature_pad_end], [0, 0]])
  return padded_inputs
Beispiel #16
0
def random_transformation2(x, y, padding, phase_train, rnd_vflip=True, rnd_hflip=True, rnd_transpose=True, rnd_colour=False):
    """
    Perform random crop, flip, transpose, hue, saturation, brightness, contrast.

    Args:
        x: [B, H, W, 3]
        y: [B, T, H, W]
        padding: int
        phase_train: bool
    """
    # Random image transformation layers.
    phase_train_f = tf.to_float(phase_train)
    x_shape = tf.shape(x)
    y_shape = tf.shape(y)
    num_ex = x_shape[0]
    inp_height = x_shape[1]
    inp_width = x_shape[2]
    inp_depth_x = x_shape[3]
    inp_depth_y = y_shape[3]

    # Add padding
    x_pad = tf.pad(x, [[0, 0], [padding, padding], [padding, padding], [0, 0]])
    y_pad = tf.pad(y, [[0, 0], [padding, padding], [padding, padding], [0, 0]])

    # Random crop
    offset = tf.random_uniform([2], dtype='int32', maxval=padding * 2)
    x_rand = tf.slice(x_pad, tf.pack([0, offset[0], offset[1], 0]),
                      tf.pack([-1, inp_height, inp_width, inp_depth_x]))
    y_rand = tf.slice(y_pad, tf.pack([0, offset[0], offset[1], 0]),
                      tf.pack([-1, inp_height, inp_width, inp_depth_y]))

    # Center slices (for inference)
    x_ctr = tf.slice(x_pad, [0, padding, padding, 0],
                     tf.pack([-1, inp_height, inp_width, -1]))
    y_ctr = tf.slice(y_pad, [0, padding, padding, 0],
                     tf.pack([-1, inp_height, inp_width, -1]))

    # Random horizontal & vertical flip & transpose
    rand_h = tf.random_uniform([1], 1.0 - float(rnd_hflip), 1.0)
    rand_v = tf.random_uniform([1], 1.0 - float(rnd_vflip), 1.0)
    mirror = tf.pack([1.0, rand_v[0], rand_h[0], 1.0]) < 0.5
    x_rand = tf.reverse(x_rand, mirror)
    y_rand = tf.reverse(y_rand, mirror)
    rand_t = tf.random_uniform([1], 1.0 - float(rnd_transpose), 1.0)
    do_tr = tf.cast(rand_t[0] < 0.5, 'int32')
    x_rand = tf.transpose(x_rand, tf.pack([0, 1 + do_tr, 2 - do_tr, 3]))
    y_rand = tf.transpose(y_rand, tf.pack([0, 1 + do_tr, 2 - do_tr, 3]))

    # Random hue, saturation, brightness, contrast
    if rnd_colour:
        x_rand = random_hue(x_rand, 0.1)
        x_rand = random_saturation(x_rand, 0.9, 1.1)
        x_rand = tf.image.random_brightness(x_rand, 0.1)
        x_rand = tf.image.random_contrast(x_rand, 0.9, 1.1)

    x = (1.0 - phase_train_f) * x_ctr + phase_train_f * x_rand
    y = (1.0 - phase_train_f) * y_ctr + phase_train_f * y_rand

    return x, y
Beispiel #17
0
    def build_network(self,
                      images,
                      num_outputs,
                      alpha,
                      keep_prob=0.5,
                      is_training=True,
                      scope='yolo'):
        with tf.variable_scope(scope):
            with slim.arg_scope(
                [slim.conv2d, slim.fully_connected],
                activation_fn=leaky_relu(alpha),
                weights_regularizer=slim.l2_regularizer(0.0005),
                weights_initializer=tf.truncated_normal_initializer(0.0, 0.01)
            ):
                net = tf.pad(
                    images, np.array([[0, 0], [3, 3], [3, 3], [0, 0]]),
                    name='pad_1')
                net = slim.conv2d(net, 64, 7, 2, padding='VALID', scope='conv_2')

                net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_3')
                
                net = slim.conv2d(net, 192, 3, scope='conv_4')
                net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_5')

                net = slim.conv2d(net, 128, 1, scope='conv_6')
                net = slim.conv2d(net, 256, 3, scope='conv_7')
                net = slim.conv2d(net, 256, 1, scope='conv_8')
                net = slim.conv2d(net, 512, 3, scope='conv_9')
                net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_10')
                
                net = slim.conv2d(net, 256, 1, scope='conv_11')
                net = slim.conv2d(net, 512, 3, scope='conv_12')
                net = slim.conv2d(net, 256, 1, scope='conv_13')
                net = slim.conv2d(net, 512, 3, scope='conv_14')
                net = slim.conv2d(net, 256, 1, scope='conv_15')
                net = slim.conv2d(net, 512, 3, scope='conv_16')
                net = slim.conv2d(net, 256, 1, scope='conv_17')
                net = slim.conv2d(net, 512, 3, scope='conv_18')
                net = slim.conv2d(net, 512, 1, scope='conv_19')
                net = slim.conv2d(net, 1024, 3, scope='conv_20')
                net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_21')
                
                net = slim.conv2d(net, 512, 1, scope='conv_22')
                net = slim.conv2d(net, 1024, 3, scope='conv_23')
                net = slim.conv2d(net, 512, 1, scope='conv_24')
                net = slim.conv2d(net, 1024, 3, scope='conv_25')
                net = slim.conv2d(net, 1024, 3, scope='conv_26')
                net = tf.pad(net, np.array([[0, 0], [1, 1], [1, 1], [0, 0]]),name='pad_27')
                net = slim.conv2d(net, 1024, 3, 2, padding='VALID', scope='conv_28')
                net = slim.conv2d(net, 1024, 3, scope='conv_29')
                net = slim.conv2d(net, 1024, 3, scope='conv_30')
                net = tf.transpose(net, [0, 3, 1, 2], name='trans_31')
                net = slim.flatten(net, scope='flat_32')
                net = slim.fully_connected(net, 512, scope='fc_33')
                net = slim.fully_connected(net, 4096, scope='fc_34')
                net = slim.dropout(net, keep_prob=keep_prob, is_training=is_training,scope='dropout_35')
                net = slim.fully_connected(net, num_outputs, activation_fn=None, scope='fc_36')
        return net
def prepare_decoder(targets, hparams):
  """Prepare decoder for images."""
  targets_shape = common_layers.shape_list(targets)
  channels = hparams.num_channels
  curr_infer_length = None

  # during training, images are [batch, IMG_LEN, IMG_LEN, 3].
  # At inference, they are [batch, curr_infer_length, 1, 1]
  if hparams.mode == tf.contrib.learn.ModeKeys.INFER:
    curr_infer_length = targets_shape[1]
    if hparams.block_raster_scan:
      assert hparams.img_len*channels % hparams.query_shape[1] == 0
      assert hparams.img_len % hparams.query_shape[0] == 0
      total_block_width = hparams.img_len*channels
      # Decoding is in block raster scan order. We divide the image into
      # hparams.query_shape blocks and then decode each block in raster scan.
      # To make that compatible with our inference pipeline, pad the target so
      # that rows is a multiple of query_shape and columns is a multiple of
      # hparams.img_len*channels
      curr_infer_length = targets_shape[1]
      block_padding_factor = total_block_width * hparams.query_shape[0]
      targets = tf.pad(targets, [
          [0, 0], [0, -curr_infer_length % block_padding_factor],
          [0, 0], [0, 0]])

      num_blocks = total_block_width // hparams.query_shape[1]
      # Reshape the image to represent blocks
      target_blocks = tf.reshape(
          targets, [targets_shape[0], -1, num_blocks, hparams.query_shape[0],
                    hparams.query_shape[1]])
      # Transpose to read the image in 2D fashion.
      targets = tf.transpose(target_blocks, [0, 1, 3, 2, 4])
    else:
      # add padding to make sure the size of targets is a multiple of img_height
      # times number of channels. This is  needed for positional encodings and
      # for doing the RGB lookup.
      padding_factor = channels * hparams.img_len
      targets = tf.pad(targets, [
          [0, 0], [0, -curr_infer_length % padding_factor], [0, 0], [0, 0]])
    targets = tf.reshape(targets,
                         [targets_shape[0], -1, hparams.img_len, channels])
  # Preprocess image
  x = prepare_image(targets, hparams, name="dec_channels")
  x_shape = common_layers.shape_list(x)
  if (hparams.dec_attention_type == AttentionType.LOCAL_2D or
      hparams.dec_attention_type == AttentionType.LOCAL_BLOCK):
    x = common_attention.right_shift_blockwise(x, hparams.query_shape)
    x = add_pos_signals(x, hparams, "dec_pos")
  else:
    # Add position signals
    x = tf.reshape(x, [targets_shape[0],
                       x_shape[1]*x_shape[2], hparams.hidden_size])
    x = common_layers.shift_right_3d(x)
    x = tf.reshape(x, [targets_shape[0],
                       x_shape[1], x_shape[2], hparams.hidden_size])
    x = add_pos_signals(x, hparams, "dec_pos")
  x = common_layers.cast_like(x, targets)
  return x, x_shape[1], x_shape[2]
Beispiel #19
0
def cyclegan_upsample(net, num_outputs, stride, method='conv2d_transpose',
                      pad_mode='REFLECT', align_corners=False):
  """Upsamples the given inputs.

  Args:
    net: A Tensor of size [batch_size, height, width, filters].
    num_outputs: The number of output filters.
    stride: A list of 2 scalars or a 1x2 Tensor indicating the scale,
      relative to the inputs, of the output dimensions. For example, if kernel
      size is [2, 3], then the output height and width will be twice and three
      times the input size.
    method: The upsampling method: 'nn_upsample_conv', 'bilinear_upsample_conv',
      or 'conv2d_transpose'.
    pad_mode: mode for tf.pad, one of "CONSTANT", "REFLECT", or "SYMMETRIC".
    align_corners: option for method, 'bilinear_upsample_conv'. If true, the
      centers of the 4 corner pixels of the input and output tensors are
      aligned, preserving the values at the corner pixels.

  Returns:
    A Tensor which was upsampled using the specified method.

  Raises:
    ValueError: if `method` is not recognized.
  """
  with tf.variable_scope('upconv'):
    net_shape = tf.shape(net)
    height = net_shape[1]
    width = net_shape[2]

    # Reflection pad by 1 in spatial dimensions (axes 1, 2 = h, w) to make a 3x3
    # 'valid' convolution produce an output with the same dimension as the
    # input.
    spatial_pad_1 = np.array([[0, 0], [1, 1], [1, 1], [0, 0]])

    if method == 'nn_upsample_conv':
      net = tf.image.resize_nearest_neighbor(
          net, [stride[0] * height, stride[1] * width])
      net = tf.pad(net, spatial_pad_1, pad_mode)
      net = layers.conv2d(net, num_outputs, kernel_size=[3, 3], padding='valid')
    elif method == 'bilinear_upsample_conv':
      net = tf.image.resize_bilinear(
          net, [stride[0] * height, stride[1] * width],
          align_corners=align_corners)
      net = tf.pad(net, spatial_pad_1, pad_mode)
      net = layers.conv2d(net, num_outputs, kernel_size=[3, 3], padding='valid')
    elif method == 'conv2d_transpose':
      # This corrects 1 pixel offset for images with even width and height.
      # conv2d is left aligned and conv2d_transpose is right aligned for even
      # sized images (while doing 'SAME' padding).
      # Note: This doesn't reflect actual model in paper.
      net = layers.conv2d_transpose(
          net, num_outputs, kernel_size=[3, 3], stride=stride, padding='valid')
      net = net[:, 1:, 1:, :]
    else:
      raise ValueError('Unknown method: [%s]' % method)

    return net
Beispiel #20
0
def setup_actor_update(actor):

    with tf.variable_scope("rl"):
        actor.critic_output = tf.placeholder(tf.float32, [None, None, actor.vocab_size], name='critic_output')
        # action_gradients is passed in by Q_network...
        # and in DDPG, it's the gradients of Q w.r.t. policy's chosen actions
        # but in AC, it's the output of Q network w.r.t. all actions
        opt = nlc_model.get_optimizer(FLAGS.optimizer)(actor.learning_rate)

        # update
        params = tf.trainable_variables()

        # TODO: hope this would work
        with tf.variable_scope("Loss"):
            doshape = tf.shape(actor.decoder_output)
            T, batch_size = doshape[0], doshape[1]
            do2d = tf.reshape(actor.decoder_output, [-1, actor.size])
            logits2d = rnn_cell._linear(do2d, actor.vocab_size, True, 1.0)
            # outputs2d = tf.nn.log_softmax(logits2d)

            # apply Q-network's score here (similar to advantage function)
            # 1. reshape critic_output like decoder_output (same shape anyway)
            # TODO: hope this is correct
            critic_do2d = tf.reshape(actor.critic_output, [-1, actor.vocab_size])  # should reshape according to critic
            # 2. multiply this with actor's logitis
            rl_logits2d = logits2d * critic_do2d

            # actor.outputs = tf.reshape(outputs2d, tf.pack([T, batch_size, actor.vocab_size]))

            targets_no_GO = tf.slice(actor.target_tokens, [1, 0], [-1, -1])
            masks_no_GO = tf.slice(actor.target_mask, [1, 0], [-1, -1])
            # easier to pad target/mask than to split decoder input since tensorflow does not support negative indexing
            labels1d = tf.reshape(tf.pad(targets_no_GO, [[0, 1], [0, 0]]), [-1])
            mask1d = tf.reshape(tf.pad(masks_no_GO, [[0, 1], [0, 0]]), [-1])
            losses1d = tf.nn.sparse_softmax_cross_entropy_with_logits(rl_logits2d, labels1d) * tf.to_float(mask1d)
            losses2d = tf.reshape(losses1d, tf.pack([T, batch_size]))

            actor.rl_losses = tf.reduce_sum(losses2d) / tf.to_float(batch_size)

        # http://pemami4911.github.io/blog/2016/08/21/ddpg-rl.html (DDPG update)
        gradients = tf.gradients(actor.rl_losses, params)  # step 7: update
        # Not sure if I understood this part lol

        clipped_gradients, _ = tf.clip_by_global_norm(gradients, FLAGS.max_gradient_norm)

        # clip, then multiply, otherwise we are not learning the signals from critic
        # clipped_gradients: [T, batch_size, vocab_size]

        # updated_gradients = clipped_gradients * actor.critic_output
        # pass in as input

        actor.rl_gradient_norm = tf.global_norm(clipped_gradients)
        actor.rl_param_norm = tf.global_norm(params)

        actor.rl_updates = opt.apply_gradients(
            zip(clipped_gradients, params), global_step=actor.global_step)
Beispiel #21
0
def convolutional(inputs, output_channels, filter_size, stride, padding, conv_type, scope, init='xavier', regularizer=None, data_format='NHWC', output_shape=None, spectral=False, 
                  power_iterations=1, display=True):
    with tf.variable_scope('conv_layer_%s' % scope):
        # Weight Initlializer.
        if init=='normal':
            weight_init = tf.initializers.random_normal(stddev=0.02)
        elif init=='orthogonal':
            weight_init = tf.initializers.orthogonal()
        else:
            weight_init = tf.contrib.layers.xavier_initializer_conv2d()

        # Shapes.
        current_shape = inputs.get_shape()
        input_channels = current_shape[3]
        if 'transpose'in conv_type or 'upscale' in conv_type: filter_shape = (filter_size, filter_size, output_channels, input_channels)   
        else: filter_shape = (filter_size, filter_size, input_channels, output_channels)    

        # Weight and Bias Initialization.
        bias = tf.get_variable(name='bias', shape=[output_channels], initializer=tf.constant_initializer(0.0), trainable=True, dtype=tf.float32) 
        filter = tf.get_variable(name='filter_conv', shape=filter_shape, initializer=weight_init, trainable=True, dtype=tf.float32, regularizer=regularizer)    
        
       # Type of convolutional operation.
        if conv_type == 'upscale':
            output_shape = [tf.shape(inputs)[0], current_shape[1]*2, current_shape[2]*2, output_channels]
            # Weight filter initializer.
            filter = tf.pad(filter, ([1,1], [1,1], [0,0], [0,0]), mode='CONSTANT')
            filter = tf.add_n([filter[1:,1:], filter[:-1,1:], filter[1:,:-1], filter[:-1,:-1]])
            if spectral: filter = spectral_normalization(filter, power_iterations)
            strides = [1, 2, 2, 1]
            output = tf.nn.conv2d_transpose(value=inputs, filter=filter, output_shape=tf.stack(output_shape), strides=strides, padding=padding, data_format=data_format)
            
        elif conv_type == 'downscale':
            # Weight filter initializer.
            filter = tf.pad(filter, ([1,1], [1,1], [0,0], [0,0]), mode='CONSTANT')
            filter = tf.add_n([filter[1:,1:], filter[:-1,1:], filter[1:,:-1], filter[:-1,:-1]])
            if spectral: filter = spectral_normalization(filter, power_iterations)
            strides = [1, 2, 2, 1]
            output = tf.nn.conv2d(input=inputs, filter=filter, strides=strides, padding=padding, data_format=data_format)
            
        elif conv_type == 'transpose':
            output_shape = [tf.shape(inputs)[0], current_shape[1]*stride, current_shape[2]*stride, output_channels]
            strides = [1, stride, stride, 1]
            if spectral: filter = spectral_normalization(filter, power_iterations)
            output = tf.nn.conv2d_transpose(value=inputs, filter=filter, output_shape=tf.stack(output_shape), strides=strides, padding=padding, data_format=data_format)
        
        elif conv_type == 'convolutional':
            strides = [1, stride, stride, 1]
            if spectral: filter = spectral_normalization(filter, power_iterations)
            output = tf.nn.conv2d(input=inputs, filter=filter, strides=strides, padding=padding, data_format=data_format)
        
        output = tf.nn.bias_add(output, bias, data_format=data_format)

    if display:
        print('Conv Layer:     Scope=%15s Channels %5s Filter_size=%2s  Stride=%2s Padding=%6s Conv_type=%15s Output Shape: %s' % 
            (str(scope)[:14], output_channels, filter_size, stride, padding, conv_type, output.shape))
    return output
def style_loss(CNN_structure, const_layers, var_layers, content_segs, style_segs, weight):
    loss_styles = []
    layer_count = float(len(const_layers))
    layer_index = 0

    _, content_seg_height, content_seg_width, _ = content_segs[0].get_shape().as_list()
    _, style_seg_height, style_seg_width, _ = style_segs[0].get_shape().as_list()
    for layer_name in CNN_structure:
        layer_name = layer_name[layer_name.find("/") + 1:]

        # downsampling segmentation
        if "pool" in layer_name:
            content_seg_width, content_seg_height = int(math.ceil(content_seg_width / 2)), int(math.ceil(content_seg_height / 2))
            style_seg_width, style_seg_height = int(math.ceil(style_seg_width / 2)), int(math.ceil(style_seg_height / 2))

            for i in xrange(len(content_segs)):
                content_segs[i] = tf.image.resize_bilinear(content_segs[i], tf.constant((content_seg_height, content_seg_width)))
                style_segs[i] = tf.image.resize_bilinear(style_segs[i], tf.constant((style_seg_height, style_seg_width)))

        elif "conv" in layer_name:
            for i in xrange(len(content_segs)):
                # have some differences on border with torch
                content_segs[i] = tf.nn.avg_pool(tf.pad(content_segs[i], [[0, 0], [1, 1], [1, 1], [0, 0]], "CONSTANT"), \
                ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='VALID')
                style_segs[i] = tf.nn.avg_pool(tf.pad(style_segs[i], [[0, 0], [1, 1], [1, 1], [0, 0]], "CONSTANT"), \
                ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='VALID')

        if layer_name == var_layers[layer_index].name[var_layers[layer_index].name.find("/") + 1:]:
            print("Setting up style layer: <{}>".format(layer_name))
            const_layer = const_layers[layer_index]
            var_layer = var_layers[layer_index]

            layer_index = layer_index + 1

            layer_style_loss = 0.0
            for content_seg, style_seg in zip(content_segs, style_segs):
                gram_matrix_const = gram_matrix(tf.multiply(const_layer, style_seg))
                style_mask_mean   = tf.reduce_mean(style_seg)
                gram_matrix_const = tf.cond(tf.greater(style_mask_mean, 0.),
                                        lambda: gram_matrix_const / (tf.to_float(tf.size(const_layer)) * style_mask_mean),
                                        lambda: gram_matrix_const
                                    )

                gram_matrix_var   = gram_matrix(tf.multiply(var_layer, content_seg))
                content_mask_mean = tf.reduce_mean(content_seg)
                gram_matrix_var   = tf.cond(tf.greater(content_mask_mean, 0.),
                                        lambda: gram_matrix_var / (tf.to_float(tf.size(var_layer)) * content_mask_mean),
                                        lambda: gram_matrix_var
                                    )

                diff_style_sum    = tf.reduce_mean(tf.squared_difference(gram_matrix_const, gram_matrix_var)) * content_mask_mean

                layer_style_loss += diff_style_sum

            loss_styles.append(layer_style_loss * weight)
    return loss_styles
Beispiel #23
0
 def _pre_padding(self, x, kernel_size):
   """Padding Based On Kernel_size"""
   pad_total = kernel_size - 1
   pad_beg = pad_total // 2
   pad_end = pad_total - pad_beg
   if self.data_format == 'NCHW':
     x = tf.pad(x, [[0, 0], [0, 0], [pad_beg, pad_end], [pad_beg, pad_end]])
   else:
     x = tf.pad(x, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])
   return x 
Beispiel #24
0
def build_resnet_block(inputres, dim, name="resnet"):
    
    with tf.variable_scope(name):

        out_res = tf.pad(inputres, [[0, 0], [1, 1], [1, 1], [0, 0]], "REFLECT")
        out_res = general_conv2d(out_res, dim, 3, 3, 1, 1, 0.02, "VALID","c1")
        out_res = tf.pad(out_res, [[0, 0], [1, 1], [1, 1], [0, 0]], "REFLECT")
        out_res = general_conv2d(out_res, dim, 3, 3, 1, 1, 0.02, "VALID","c2",do_relu=False)
        
        return tf.nn.relu(out_res + inputres)
Beispiel #25
0
def _provide_data(input_tensors, truncated_length, hparams):
  """Returns tensors for reading batches from provider."""
  (spec, labels, label_weights, length, onsets, filename,
   note_sequence) = input_tensors

  length = tf.to_int32(length)
  labels = tf.reshape(labels, (-1, constants.MIDI_PITCHES))
  label_weights = tf.reshape(label_weights, (-1, constants.MIDI_PITCHES))
  onsets = tf.reshape(onsets, (-1, constants.MIDI_PITCHES))
  spec = tf.reshape(spec, (-1, hparams_frame_size(hparams)))

  truncated_length = (tf.reduce_min([truncated_length, length])
                      if truncated_length else length)

  # Pad or slice specs and labels tensors to have the same lengths,
  # truncating after truncated_length.
  spec_delta = tf.shape(spec)[0] - truncated_length
  spec = tf.case(
      [(spec_delta < 0,
        lambda: tf.pad(spec, tf.stack([(0, -spec_delta), (0, 0)]))),
       (spec_delta > 0, lambda: spec[0:-spec_delta])],
      default=lambda: spec)
  labels_delta = tf.shape(labels)[0] - truncated_length
  labels = tf.case(
      [(labels_delta < 0,
        lambda: tf.pad(labels, tf.stack([(0, -labels_delta), (0, 0)]))),
       (labels_delta > 0, lambda: labels[0:-labels_delta])],
      default=lambda: labels)
  label_weights = tf.case(
      [(labels_delta < 0,
        lambda: tf.pad(label_weights, tf.stack([(0, -labels_delta), (0, 0)]))
       ), (labels_delta > 0, lambda: label_weights[0:-labels_delta])],
      default=lambda: label_weights)
  onsets = tf.case(
      [(labels_delta < 0,
        lambda: tf.pad(onsets, tf.stack([(0, -labels_delta), (0, 0)]))),
       (labels_delta > 0, lambda: onsets[0:-labels_delta])],
      default=lambda: onsets)

  truncated_note_sequence = truncate_note_sequence_op(
      note_sequence, truncated_length, hparams)

  batch_tensors = {
      'spec': tf.reshape(
          spec, (truncated_length, hparams_frame_size(hparams), 1)),
      'labels': tf.reshape(labels, (truncated_length, constants.MIDI_PITCHES)),
      'label_weights': tf.reshape(
          label_weights, (truncated_length, constants.MIDI_PITCHES)),
      'lengths': truncated_length,
      'onsets': tf.reshape(onsets, (truncated_length, constants.MIDI_PITCHES)),
      'filenames': filename,
      'note_sequences': truncated_note_sequence,
  }

  return batch_tensors
	def _pool_layer(self, inputs, numOut, name = 'pool_layer'):
		with tf.name_scope(name):
			bnr_1 = self._bn_relu(inputs)
			pool = tf.contrib.layers.max_pool2d(bnr_1,[2,2],[2,2],padding='VALID')
			pad_1 = tf.pad(pool, np.array([[0,0],[1,1],[1,1],[0,0]]))
			conv_1 = self._conv(pad_1, numOut, kernel_size=3, strides=1, name='conv')
			bnr_2 = self._bn_relu(conv_1)
			pad_2 = tf.pad(bnr_2, np.array([[0,0],[1,1],[1,1],[0,0]]))
			conv_2 = self._conv(pad_2, numOut, kernel_size=3, strides=1, name='conv')
			upsample = tf.image.resize_nearest_neighbor(conv_2, tf.shape(conv_2)[1:3]*2, name = 'upsampling')
		return upsample
Beispiel #27
0
def kernels_on_grid_summary(kernel, name):
    """ Returns the Summary with kernel filters displayed in a single grid
    Visualize conv. features as an image (mostly for the 1st layer).
    Args:
        kernel: tensor of shape [Y, X, NumChannels, NumKernels]
        name: the name displayed in tensorboard
    """
    #TODO: fixme

    pad = 1
    kernel_height = kernel.get_shape()[0].value + pad
    kernel_width = kernel.get_shape()[1].value + pad
    depth = kernel.get_shape()[2].value
    num_kernels = kernel.get_shape()[3].value
    num_filters = int(num_kernels / depth)

    square_side = math.ceil(math.sqrt(num_kernels))
    grid_height = square_side * kernel_height + 1
    grid_width = square_side * kernel_width + 1

    # split kernel in num_filters filter and put it into the grid
    # pad the extracted filter
    filters = tf.split(3, num_filters, kernel)
    y_pos, x_pos = 0, 0

    # list of tensors
    cells = []
    for inner_filter in filters:
        filter_3d = tf.squeeze(inner_filter, [3])
        # add padding
        padding = tf.constant([[pad, 0], [pad, 0], [0, 0]])
        filter_3d = tf.pad(filter_3d, padding)

        before_padding = tf.constant([[y_pos, 0], [x_pos, 0], [0, 0]])

        bottom_padding = grid_width - y_pos - kernel_width - 1
        right_padding = grid_height - x_pos - kernel_height - 1
        after_paddng = tf.constant([[bottom_padding, 1], [right_padding, 1],
                                    [0, 0]])

        cell = tf.pad(filter_3d, before_padding)
        cells.append(tf.pad(cell, after_paddng))

        if right_padding == 0:
            # move along y
            y_pos += kernel_height
            # reset x position
            x_pos = 0
        else:
            # move along x
            x_pos += kernel_height

    grid = tf.reshape(tf.add_n(cells), [1, grid_width, grid_height, depth])
    return tf.image_summary(name, grid, max_images=1)
Beispiel #28
0
    def _pad_tensors_to_same_length(x, y):
      """Pad x and y so that the results have the same length (second dimension)."""
      with tf.name_scope("pad_to_same_length"):
        x_length = tf.shape(x)[1]
        y_length = tf.shape(y)[1]

        max_length = tf.maximum(x_length, y_length)

        x = tf.pad(x, [[0, 0], [0, max_length - x_length], [0, 0]])
        y = tf.pad(y, [[0, 0], [0, max_length - y_length]])
        return x, y
Beispiel #29
0
 def testPaddingsMaximum(self):
   with self.test_session(use_gpu=True):
     with self.assertRaises(Exception):
       tf.pad(
           tf.constant([1], shape=[2]),
           tf.constant([2, 0], shape=[1, 2]),
           mode="REFLECT").eval()
     with self.assertRaises(Exception):
       tf.pad(
           tf.constant([1], shape=[2]),
           tf.constant([0, 3], shape=[1, 2]),
           mode="SYMMETRIC").eval()
Beispiel #30
0
def pad_pred_label(predictions, labels):
    num_digit_predictions = tf.shape(predictions)[-1]
    num_digit_labels = tf.shape(labels)[-1]
    
    paddings_mask = tf.constant([[0,0], [0,1]], dtype=labels.dtype)
    paddings = tf.cast(tf.fill([2,2], tf.abs(num_digit_predictions-num_digit_labels)),labels.dtype)
    paddings  = paddings * paddings_mask
    # paddings = tf.constant([[0, 0,], [0, tf.abs(num_digit_predictions-num_digit_predictions)]])
    
    predictions = tf.cond(num_digit_predictions< num_digit_labels, lambda: tf.pad(predictions, paddings, constant_values=-1), lambda: tf.identity(predictions))
    labels = tf.cond(num_digit_labels< num_digit_predictions, lambda: tf.pad(labels, paddings, constant_values=-1), lambda: tf.identity(labels))
    return predictions, labels
Beispiel #31
0
def ReflectPadding2D(x, pad=1):
    x = Lambda(lambda x: tf.pad(x, [[0, 0], [pad, pad], [pad, pad], [0, 0]], mode='REFLECT'))(x)
    return x
Beispiel #32
0
def shuffle_block(inputs, stride, scope=None):

    with tf.variable_scope(scope, default_name='shuffle_block'):
        if stride > 1:
            # when stride == 2
            left_inputs, right_inputs = inputs, inputs
        else:
            left_inputs, right_inputs = tf.split(inputs, 2, axis=3)

        # right branch
        right_outputs = slim.conv2d(right_inputs,
                                    right_inputs.shape[3],
                                    1,
                                    stride=1,
                                    data_format='NHWC',
                                    scope='point_conv1')

        kernel_size = 3
        if stride > 1:
            pad_wh = math.floor(kernel_size / 2)
            right_outputs = tf.pad(
                right_outputs,
                [[0, 0], [pad_wh, pad_wh], [pad_wh, pad_wh], [0, 0]])
            right_outputs = slim.separable_conv2d(right_outputs,
                                                  None,
                                                  kernel_size,
                                                  1,
                                                  activation_fn=None,
                                                  stride=stride,
                                                  padding='VALID',
                                                  scope='depth_conv')
        else:
            right_outputs = slim.separable_conv2d(right_outputs,
                                                  None,
                                                  kernel_size,
                                                  1,
                                                  activation_fn=None,
                                                  stride=stride,
                                                  padding='SAME',
                                                  scope='depth_conv')

        right_outputs = slim.conv2d(right_outputs,
                                    right_outputs.shape[3],
                                    1,
                                    stride=1,
                                    data_format='NHWC',
                                    scope='point_conv2')

        # left branch
        if stride > 1:
            pad_wh = math.floor(kernel_size / 2)
            left_outputs = tf.pad(
                left_inputs,
                [[0, 0], [pad_wh, pad_wh], [pad_wh, pad_wh], [0, 0]])
            left_outputs = slim.separable_conv2d(left_outputs,
                                                 None,
                                                 kernel_size,
                                                 1,
                                                 activation_fn=None,
                                                 stride=stride,
                                                 padding='VALID',
                                                 scope='depth_conv_left')

            left_outputs = slim.conv2d(left_outputs,
                                       right_outputs.shape[3],
                                       1,
                                       stride=1,
                                       data_format='NHWC',
                                       scope='point_conv_left')
        else:
            left_outputs = left_inputs

        # shuffle
        outputs = tf.stack([left_outputs, right_outputs],
                           axis=4,
                           name='output')
        output_shape = outputs.shape
        outputs = tf.reshape(
            outputs,
            [-1, output_shape[1], output_shape[2], 2 * output_shape[3].value])
        return outputs
    def _build_graph(self, inputs):
        xys = np.array([(y, x, 1) for y in range(WARP_TARGET_SIZE)
                        for x in range(WARP_TARGET_SIZE)], dtype='float32')
        xys = tf.constant(xys, dtype=tf.float32, name='xys')    # p x 3

        image, label = inputs

        image = image / 255.0 - 0.5  # bhw2

        def get_stn(image):
            stn = (LinearWrap(image)
                   .AvgPooling('downsample', 2)
                   .Conv2D('conv0', 20, 5, padding='VALID')
                   .MaxPooling('pool0', 2)
                   .Conv2D('conv1', 20, 5, padding='VALID')
                   .FullyConnected('fc1', 32)
                   .FullyConnected('fct', 6, activation=tf.identity,
                                   kernel_initializer=tf.constant_initializer(),
                                   bias_initializer=tf.constant_initializer([1, 0, HALF_DIFF, 0, 1, HALF_DIFF]))())
            # output 6 parameters for affine transformation
            stn = tf.reshape(stn, [-1, 2, 3], name='affine')  # bx2x3
            stn = tf.reshape(tf.transpose(stn, [2, 0, 1]), [3, -1])  # 3 x (bx2)
            coor = tf.reshape(tf.matmul(xys, stn),
                              [WARP_TARGET_SIZE, WARP_TARGET_SIZE, -1, 2])
            coor = tf.transpose(coor, [2, 0, 1, 3], 'sampled_coords')  # b h w 2
            sampled = ImageSample('warp', [image, coor], borderMode='constant')
            return sampled

        with argscope([Conv2D, FullyConnected], activation=tf.nn.relu):
            with tf.variable_scope('STN1'):
                sampled1 = get_stn(image)
            with tf.variable_scope('STN2'):
                sampled2 = get_stn(image)

        # For visualization in tensorboard
        with tf.name_scope('visualization'):
            padded1 = tf.pad(sampled1, [[0, 0], [HALF_DIFF, HALF_DIFF], [HALF_DIFF, HALF_DIFF], [0, 0]])
            padded2 = tf.pad(sampled2, [[0, 0], [HALF_DIFF, HALF_DIFF], [HALF_DIFF, HALF_DIFF], [0, 0]])
            img_orig = tf.concat([image[:, :, :, 0], image[:, :, :, 1]], 1)  # b x 2h  x w
            transform1 = tf.concat([padded1[:, :, :, 0], padded1[:, :, :, 1]], 1)
            transform2 = tf.concat([padded2[:, :, :, 0], padded2[:, :, :, 1]], 1)
            stacked = tf.concat([img_orig, transform1, transform2], 2, 'viz')
            tf.summary.image('visualize',
                             tf.expand_dims(stacked, -1), max_outputs=30)

        sampled = tf.concat([sampled1, sampled2], 3, 'sampled_concat')
        logits = (LinearWrap(sampled)
                  .FullyConnected('fc1', 256, activation=tf.nn.relu)
                  .FullyConnected('fc2', 128, activation=tf.nn.relu)
                  .FullyConnected('fct', 19, activation=tf.identity)())
        tf.nn.softmax(logits, name='prob')

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')

        wrong = tf.to_float(tf.logical_not(tf.nn.in_top_k(logits, label, 1)), name='incorrect_vector')
        summary.add_moving_summary(tf.reduce_mean(wrong, name='train_error'))

        wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/W', tf.nn.l2_loss),
                              name='regularize_loss')
        summary.add_moving_summary(cost, wd_cost)
        self.cost = tf.add_n([wd_cost, cost], name='cost')
Beispiel #34
0
    def load(self,
             ckpt_path,
             hparams,
             master='local',
             batch_timeout_micros=80 * 1000,
             buckets=None):
        self.hparams = hparams
        self.buckets = buckets
        self.tpu_graph = tf.Graph()
        tpu_config = tf.ConfigProto(
            operation_timeout_in_ms=600 * 1000,
            allow_soft_placement=True,
            graph_options=tf.GraphOptions(
                rewrite_options=rewriter_config_pb2.RewriterConfig(
                    disable_meta_optimizer=True)),
            isolate_session_state=True)
        # Find tpu master.
        print('master value set to:', master)
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            master, zone=None, project=None)
        master = tpu_cluster_resolver.get_master()
        self.sess = tf.Session(master, graph=self.tpu_graph, config=tpu_config)
        with self.tpu_graph.as_default():
            self.vocab_table = tf.contrib.lookup.index_to_string_table_from_file(
                self.vocab_prefix, default_value=vocab_utils.UNK)

        if self.scenario == 'Offline':
            with self.tpu_graph.as_default():
                self.source = tf.placeholder(shape=(hparams.infer_batch_size,
                                                    hparams.src_max_len_infer),
                                             dtype=tf.int32)
                self.source_sequence_length = tf.placeholder(
                    shape=(hparams.infer_batch_size), dtype=tf.int32)

                inputs = [[self.source, self.source_sequence_length]]
                self.predict_ops.append(self.offline_op(inputs))
        else:
            with self.tpu_graph.as_default():
                self.source = tf.placeholder(
                    shape=[None, hparams.src_max_len_infer], dtype=tf.int32)
                self.source_sequence_length = tf.placeholder(shape=[None],
                                                             dtype=tf.int32)
                inputs = [self.source, self.source_sequence_length]
                for _ in buckets:
                    self.predict_ops.append(
                        self.server_op(
                            inputs,
                            num_batch_threads=16,
                            max_batch_size=hparams.infer_batch_size,
                            batch_timeout_micros=batch_timeout_micros,
                            allowed_batch_sizes=[hparams.infer_batch_size],
                            max_enqueued_batches=10000))
                # Add longest sequence predict op.
                self.predict_ops.append(
                    self.server_op(
                        inputs,
                        num_batch_threads=16,
                        max_batch_size=hparams.infer_batch_size,
                        batch_timeout_micros=5000 * 1000,
                        allowed_batch_sizes=[hparams.infer_batch_size],
                        max_enqueued_batches=10000))

        with self.tpu_graph.as_default():
            vs = tf.global_variables()

            assign_ops = []
            var_map = {}
            with tf.variable_scope('f32', dtype=tf.float32):
                for i in vs:
                    if 'output_projection' in i.name:
                        new_var = tf.get_variable(
                            i.name[:-2], [i.shape[0], hparams.tgt_vocab_size])
                        assign_ops.append(
                            tf.assign(
                                i,
                                tf.pad(
                                    tf.cast(new_var, i.dtype),
                                    [[0, 0],
                                     [
                                         0, 128 *
                                         (hparams.tgt_vocab_size // 128 + 1) -
                                         hparams.tgt_vocab_size
                                     ]])))
                    else:
                        new_var = tf.get_variable(i.name[:-2], i.shape)
                        assign_ops.append(
                            tf.assign(i, tf.cast(new_var, i.dtype)))
                    var_map[i.name[:-2]] = new_var.name[:-2]

            self.sess.run(tpu.initialize_system())
            tf.train.init_from_checkpoint(ckpt_path, var_map)
            self.sess.run(tf.initializers.global_variables())
            self.sess.run(tf.tables_initializer())
            self.sess.run(assign_ops)

        return self
    def encoder(self, inputs, n_layers=3):
        """COnvnet that encodes inputs into mean and std of a gaussian.

    Args:
     inputs: 5-D Tensor, shape (batch_size, num_frames, width, height, channels)
     n_layers: Number of layers.

    Returns:
     z_mu: Mean of the latent gaussians.
     z_log_var: log(var) of the latent gaussians.

    Raises:
      ValueError: If inputs is not a 5-D tensor or not float32.
    """
        latent_dims = self.hparams.z_dim

        shape_as_list = inputs.shape.as_list()
        if len(shape_as_list) != 5:
            raise ValueError("Expected inputs to be a 5-D, got %d" %
                             len(shape_as_list))
        if inputs.dtype != tf.float32:
            raise ValueError("Expected dtype tf.float32, got %s" %
                             inputs.dtype)

        # Flatten (N,T,W,H,C) into (NT,W,H,C)
        batch_size, _ = shape_as_list[:2]
        inputs = tf.reshape(inputs, [-1] + list(inputs.shape)[2:])
        n_filters = 64
        rectified = None

        # Applies 3 layer conv-net with padding, instance normalization
        # and leaky relu as per the encoder in
        # https://github.com/alexlee-gk/video_prediction
        padding = [[0, 0], [1, 1], [1, 1], [0, 0]]
        for i in range(n_layers):
            with tf.variable_scope("layer_%d" % (i + 1)):
                n_filters *= 2**i
                if i:
                    padded = tf.pad(rectified, padding)
                else:
                    padded = tf.pad(inputs, padding)
                convolved = tf.layers.conv2d(padded,
                                             filters=n_filters,
                                             kernel_size=4,
                                             strides=2,
                                             padding="VALID")
                normalized = tf.contrib.layers.instance_norm(convolved)
                rectified = tf.nn.leaky_relu(normalized, alpha=0.2)

        # Mean pooling across all spatial dimensions.
        pooled = tf.nn.avg_pool(rectified,
                                [1] + rectified.shape[1:3].as_list() + [1],
                                strides=[1, 1, 1, 1],
                                padding="VALID")
        squeezed = tf.squeeze(pooled, [1, 2])

        # Down-project and output the mean and log of the standard deviation of
        # the latents.
        with tf.variable_scope("z_mu"):
            z_mu = tf.layers.dense(squeezed, latent_dims)
        with tf.variable_scope("z_log_sigma_sq"):
            z_log_var = tf.layers.dense(squeezed, latent_dims)
            z_log_var = tf.clip_by_value(z_log_var, -10, 10)

        # Reshape to (batch_size X num_frames X latent_dims)
        z_mu = tf.reshape(z_mu, (batch_size, -1, latent_dims))
        z_log_var = tf.reshape(z_log_var, (batch_size, -1, latent_dims))
        return z_mu, z_log_var
Beispiel #36
0
def discriminator(inputdisc, name="discriminator"):
    with tf.variable_scope(name):
        f = 4

        padw = 2

        pad_input = tf.pad(inputdisc,
                           [[0, 0], [padw, padw], [padw, padw], [0, 0]],
                           "CONSTANT")
        o_c1 = layers.general_conv2d(pad_input,
                                     ndf,
                                     f,
                                     f,
                                     2,
                                     2,
                                     0.02,
                                     "VALID",
                                     "c1",
                                     do_norm=False,
                                     relufactor=0.2)

        pad_o_c1 = tf.pad(o_c1, [[0, 0], [padw, padw], [padw, padw], [0, 0]],
                          "CONSTANT")
        o_c2 = layers.general_conv2d(pad_o_c1,
                                     ndf * 2,
                                     f,
                                     f,
                                     2,
                                     2,
                                     0.02,
                                     "VALID",
                                     "c2",
                                     relufactor=0.2)

        pad_o_c2 = tf.pad(o_c2, [[0, 0], [padw, padw], [padw, padw], [0, 0]],
                          "CONSTANT")
        o_c3 = layers.general_conv2d(pad_o_c2,
                                     ndf * 4,
                                     f,
                                     f,
                                     2,
                                     2,
                                     0.02,
                                     "VALID",
                                     "c3",
                                     relufactor=0.2)

        pad_o_c3 = tf.pad(o_c3, [[0, 0], [padw, padw], [padw, padw], [0, 0]],
                          "CONSTANT")
        o_c4 = layers.general_conv2d(pad_o_c3,
                                     ndf * 8,
                                     f,
                                     f,
                                     1,
                                     1,
                                     0.02,
                                     "VALID",
                                     "c4",
                                     relufactor=0.2)

        pad_o_c4 = tf.pad(o_c4, [[0, 0], [padw, padw], [padw, padw], [0, 0]],
                          "CONSTANT")
        o_c5 = layers.general_conv2d(pad_o_c4,
                                     1,
                                     f,
                                     f,
                                     1,
                                     1,
                                     0.02,
                                     "VALID",
                                     "c5",
                                     do_norm=False,
                                     do_relu=False)

        return o_c5
Beispiel #37
0
 def pad_fn():
   return tf.pad(
       tensor=serialized_list,
       paddings=[[0, 0], [0, list_size - cur_list_size]],
       constant_values="")
 def call(self, input_tensor, mask=None):
     padding_width, padding_height = self.padding
     return tf.pad(input_tensor, [[0, 0], [padding_height, padding_height],
                                  [padding_width, padding_width], [0, 0]],
                   'REFLECT')
Beispiel #39
0
    def __init__(self, m, n, dim, n_iterations=100, alpha=None, sigma=None, gamma=None, sparsity=None):
        """
        Initializes all necessary components of the TensorFlow
        Graph.
 
        m X n are the dimensions of the SOM. 'n_iterations' should
        should be an integer denoting the number of iterations undergone
        while training.
        'dim' is the dimensionality of the training inputs.
        'gamma' is the edge length of the hypercube defining each 
        neuron's receptive field. A neurons centroid determines the 
        centroid of its receptive field. 
        'alpha' is a number denoting the initial time(iteration no)-based
        learning rate. Default value is 0.3
        'sigma' is the the initial neighbourhood value, denoting
        the radius of influence of the BMU while training. By default, its
        taken to be half of max(m, n).
        """
 
        #Assign required variables first
        self._m = m
        self._n = n
        if sparsity is None:
            sparsity = 0.02
        else:
            sparsity = float(sparsity)
        if alpha is None:
            alpha = 0.3
        else:
            alpha = float(alpha)
        if sigma is None:
            sigma = max(m, n) / 2.0
        else:
            sigma = float(sigma)
        if gamma is None:
            gamma = 5
        else:
            gamma = float(gamma)
        self._n_iterations = abs(int(n_iterations))


        ##INITIALIZE GRAPH
        self._graph = tf.Graph()

        ##POPULATE GRAPH WITH NECESSARY COMPONENTS
        with self._graph.as_default():

            ##VARIABLES AND CONSTANT OPS FOR DATA STORAGE

            #Randomly initialized weightage vectors for all neurons,
            #stored together as a matrix Variable of size [m*n, dim]
            self._weightage_vects = tf.Variable(tf.random_normal(
                [m*n, dim]))

            #List of distances from each centroid to the input vector
            #self._input_neuron_dist = tf.Variable(m*n)

            #Matrix of size [m*n, 2] for SOM grid locations
            #of neurons
            self._location_vects = tf.constant(np.array(
                list(self._neuron_locations(m, n))))

            #Constant list containing gamma/2 used to mask a tensor later on.
            self._gamma_mask = tf.constant(gamma/2,shape=[m*n])


            ##PLACEHOLDERS FOR TRAINING INPUTS
            #We need to assign them as attributes to self, since they
            #will be fed in during training

            #The training vector
            self._vect_input = tf.placeholder("float", [dim])
            #Iteration number
            self._iter_input = tf.placeholder("float")

            ##CONSTRUCT TRAINING OP PIECE BY PIECE
            #Only the final, 'root' training op needs to be assigned as
            #an attribute to self, since all the rest will be executed
            #automatically during training


            #To compute the Best Matching Units given a vector,
            #Find all neurons that have the input in their receptive
	        #Fields. Then calculate the distance to the input
	        #from those neurons. The amount of neurons made active
	        # is such that 2% of all neurons active for each input.
	        # returns the index of these neurons in the 98th percentile.

            vector_differences = tf.norm(tf.subtract(self._weightage_vects, tf.stack
            ([self._vect_input for i in range(m*n)])),axis=1)
            # tf.print(vector_differences, [vector_differences], "Distances from"
            #                                                   "centroid to input: ")
            mask = tf.less(vector_differences, self._gamma_mask/2)         # Used to filter out neurons that do not have
                                                                         # the input vector in their receptive field
            containing_input = tf.boolean_mask(vector_differences, mask) # Contains the neurons that have input in their
                                                                         # receptive fields
            activation_distance= tf.contrib.distriutions.percentile(containing_input, sparsity) # Top sparsity*100% of
                                                                                                # candidates

            winners_mask = tf.variable(activation_distance, shape=m*n)
            global_mask  = tf.less_equal(vector_differences, winners_mask)  # Boolean list of all neurons with
                                                                            # Euclidean distance less than activation_distance
            bmu_indeces = tf.where(global_mask)

            # This will extract the locations of the BMUs based on the BMUs indeces
            slice_input = tf.pad(tf.reshape(bmu_indeces, [1]),
                                 np.array([[0, 1]]))
            bmu_loc = tf.reshape(tf.slice(self._location_vects, slice_input,
                                          tf.constant(np.array([1, 2]))),
                                 [2])
            # To compute the alpha and sigma values based on iteration
            # number
            learning_rate_op = tf.subtract(1.0, tf.div(self._iter_input,
                                                  self._n_iterations))
            _alpha_op = tf.multiply(alpha, learning_rate_op)
            _sigma_op = tf.multiply(sigma, learning_rate_op)
 
            #Construct the op that will generate a vector with learning
            #rates for all neurons, based on iteration number and location
            #wrt BMU.
            bmu_distance_squares = tf.reduce_sum(tf.pow(tf.subtract(
                self._location_vects, tf.stack(
                    [bmu_loc for i in range(m*n)])), 2), 1)
            neighbourhood_func = tf.exp(tf.negative(tf.div(tf.cast(
                bmu_distance_squares, "float32"), tf.pow(_sigma_op, 2))))
            learning_rate_op = tf.multiply(_alpha_op, neighbourhood_func)
 
            #Finally, the op that will use learning_rate_op to update
            #the weightage vectors of all neurons based on a particular
            #input
            learning_rate_multiplier = tf.stack([tf.tile(tf.slice(
                learning_rate_op, np.array([i]), np.array([1])), [dim])
                                               for i in range(m*n)])
            weightage_delta = tf.multiply(
                learning_rate_multiplier,
                tf.subtract(tf.stack([self._vect_input for i in range(m*n)]),
                       self._weightage_vects))                                         
            new_weightages_op = tf.add(self._weightage_vects,
                                       weightage_delta)
            self._training_op = tf.assign(self._weightage_vects,
                                          new_weightages_op)                                       
 
            ##INITIALIZE SESSION
            self._sess = tf.Session()
 
            ##INITIALIZE VARIABLES
            init_op = tf.global_variables_initializer()
            self._sess.run(init_op)
Beispiel #40
0
 def deconv(self, x, num_out_layers, kernel_size, scale):
     p_x = tf.pad(x, [[0, 0], [1, 1], [1, 1], [0, 0]])
     conv = slim.conv2d_transpose(p_x, num_out_layers, kernel_size, scale,
                                  'SAME')
     return conv[:, 3:-1, 3:-1, :]
Beispiel #41
0
 def maxpool(self, x, kernel_size):
     p = np.floor((kernel_size - 1) / 2).astype(np.int32)
     p_x = tf.pad(x, [[0, 0], [p, p], [p, p], [0, 0]])
     return slim.max_pool2d(p_x, kernel_size)
def cnnmodel(frame1_xyz, frame1_rgb, frame2_xyz, frame2_rgb):
    frame1_feat_rgb, _ = get_network('resnet50',
                                     frame1_rgb,
                                     weight_decay=1e-5,
                                     is_training=True)
    frame2_feat_rgb, _ = get_network('resnet50',
                                     frame2_rgb,
                                     weight_decay=1e-5,
                                     is_training=True,
                                     reuse=True)

    frame1_feat = encoder(frame1_xyz)
    frame2_feat = encoder(frame2_xyz, reuse=True)

    cc_o = correlation(frame2_feat_rgb, frame1_feat_rgb, 1, rad, 1, 1, rad)
    cc = tf.reshape(cc_o, [-1, 30 * 40, dia * dia, 1])
    cc_weight = tf.nn.relu(cc)

    frame1_feat_o = frame1_feat
    frame1_feat = tf.transpose(frame1_feat, [0, 3, 1, 2])
    frame1_feat_padded = tf.pad(frame1_feat,
                                paddings=[[0, 0], [0, 0], [rad, rad],
                                          [rad, rad]])
    frame1_list = []
    for i in xrange(30):
        for j in xrange(40):
            tmp = frame1_feat_padded[:, :, 0 + i:2 * rad + 1 + i,
                                     0 + j:2 * rad + 1 + j]
            tmp = tf.reshape(tmp, [-1, 64, dia * dia])
            frame1_list.append(tmp)
    frame1_list = tf.stack(frame1_list, axis=2)
    frame1_list = tf.transpose(frame1_list, [0, 2, 3, 1])

    frame1_list = frame1_list * cc_weight

    frame1_list = tf.nn.max_pool(frame1_list,
                                 ksize=[1, 1, dia * dia, 1],
                                 strides=[1, 1, dia * dia, 1],
                                 padding='VALID')
    frame1_list = tf.reshape(frame1_list, (-1, 30, 40, 64))

    x = tf.concat([frame2_feat, frame1_feat_o, frame1_list], 3)

    x_s = decoder(x)
    x_transl = tflearn.layers.conv.conv_2d(x_s,
                                           3, (3, 3),
                                           strides=1,
                                           activation='linear',
                                           weight_decay=1e-3,
                                           regularizer='L2')
    rot_quaternion = tflearn.layers.conv.conv_2d(x_s,
                                                 4, (3, 3),
                                                 strides=1,
                                                 activation='linear',
                                                 weight_decay=1e-3,
                                                 regularizer='L2')

    ### quaternion normalize
    quaternion_norm = tf.norm(rot_quaternion, axis=3) * tf.sign(
        rot_quaternion[:, :, :, 0])
    quaternion_norm = tf.expand_dims(quaternion_norm, -1)
    x_quaternion = rot_quaternion / (quaternion_norm + 0.000001)

    w1, x1, y1, z1 = tf.unstack(x_quaternion, axis=-1)
    x2, y2, z2 = tf.unstack(frame2_xyz, axis=-1)

    wm = -x1 * x2 - y1 * y2 - z1 * z2
    xm = w1 * x2 + y1 * z2 - z1 * y2
    ym = w1 * y2 + z1 * x2 - x1 * z2
    zm = w1 * z2 + x1 * y2 - y1 * x2

    x = -wm * x1 + xm * w1 - ym * z1 + zm * y1
    y = -wm * y1 + ym * w1 - zm * x1 + xm * z1
    z = -wm * z1 + zm * w1 - xm * y1 + ym * x1

    x_flow = tf.stack((x, y, z), axis=-1)
    x_flow = x_flow + x_transl - frame2_xyz

    x_center = tflearn.layers.conv.conv_2d(x_s,
                                           3, (3, 3),
                                           strides=1,
                                           activation='linear',
                                           weight_decay=1e-3,
                                           regularizer='L2')
    x_score = tflearn.layers.conv.conv_2d(x_s,
                                          2, (3, 3),
                                          strides=1,
                                          activation='linear',
                                          weight_decay=1e-3,
                                          regularizer='L2')
    x_mask = tflearn.layers.conv.conv_2d(x_s,
                                         2, (3, 3),
                                         strides=1,
                                         activation='linear',
                                         weight_decay=1e-3,
                                         regularizer='L2')
    x_boundary = tflearn.layers.conv.conv_2d(x_s,
                                             2, (3, 3),
                                             strides=1,
                                             activation='linear',
                                             weight_decay=1e-3,
                                             regularizer='L2')

    x_center = tf.add(x_center, frame2_xyz)
    xc, yc, zc = tf.unstack(x_center, axis=-1)

    wmc = -x1 * xc - y1 * yc - z1 * zc
    xmc = w1 * xc + y1 * zc - z1 * yc
    ymc = w1 * yc + z1 * xc - x1 * zc
    zmc = w1 * zc + x1 * yc - y1 * xc

    xc = -wmc * x1 + xmc * w1 - ymc * z1 + zmc * y1
    yc = -wmc * y1 + ymc * w1 - zmc * x1 + xmc * z1
    zc = -wmc * z1 + zmc * w1 - xmc * y1 + ymc * x1

    x_center_p = tf.stack((xc, yc, zc), axis=-1) + x_transl
    x_traj = tf.concat([x_center, x_center_p], 3)

    return x_quaternion, x_transl, x_traj, x_flow, x_center, x_mask, x_score, x_boundary
Beispiel #43
0
    def __call__(self, image, reuse=None):
        with tf.variable_scope(self.name, reuse=reuse):
            act = tf.nn.relu
            kwargs_downsample = {
                "kernel_size": (4, 4),
                "strides": (4, 4),
                "padding": "valid"
            }

            # image is 256x256x3
            image = tf.layers.conv2d(image,
                                     filters=128,
                                     **kwargs_downsample,
                                     activation=act)

            # image is 64x64x128
            image = tf.layers.conv2d(image,
                                     filters=256,
                                     **kwargs_downsample,
                                     activation=act)

            # image is 16x16x256
            image = tf.layers.conv2d(image,
                                     filters=512,
                                     **kwargs_downsample,
                                     activation=act)

            # -------------- image is 4x4x512
            pad = [[0, 0], [2, 2], [2, 2], [0, 0]]
            kwargs_upsample = {
                "kernel_size": (5, 5),
                "strides": (1, 1),
                "padding": "valid"
            }
            res_met = tf.image.ResizeMethod.NEAREST_NEIGHBOR

            image = tf.pad(image, pad, mode="SYMMETRIC")
            image = tf.layers.conv2d(image,
                                     filters=512,
                                     **kwargs_upsample,
                                     activation=act)
            image = tf.image.resize_images(image, (16, 16), method=res_met)

            # image is 16x16x256

            image = tf.pad(image, pad, mode="SYMMETRIC")
            image = tf.layers.conv2d(image,
                                     filters=256,
                                     **kwargs_upsample,
                                     activation=act)
            image = tf.image.resize_images(image, (64, 64), method=res_met)

            # image is 64x64x128

            image = tf.pad(image, pad, mode="SYMMETRIC")
            image = tf.layers.conv2d(image,
                                     filters=128,
                                     **kwargs_upsample,
                                     activation=act)
            image = tf.image.resize_images(image, (256, 256), method=res_met)

            # image is 256x256x128

            image = tf.pad(image, pad, mode="SYMMETRIC")
            image = tf.layers.conv2d(image,
                                     filters=3,
                                     activation=tf.nn.sigmoid,
                                     **kwargs_upsample)

            # image is 256x256x3

            return image
Beispiel #44
0
    def __init__(self, data, training=False):
        self.data = data
        self.initializer = tf.orthogonal_initializer()
        q_mask = tf.sequence_mask(self.data.ql, maxlen=25)  # (1, L_q)
        s_mask = tf.sequence_mask(self.data.sl, maxlen=29)  # (N, L_s)
        a_mask = tf.sequence_mask(self.data.al, maxlen=34)  # (5, L_a)

        with tf.variable_scope('Embedding'):
            self.embedding = tf.get_variable('embedding_matrix',
                                             initializer=np.load(
                                                 _mp.embedding_file),
                                             trainable=False)

            self.ques = tf.nn.embedding_lookup(self.embedding,
                                               self.data.ques)  # (1, L_q, E)
            self.ans = tf.nn.embedding_lookup(self.embedding,
                                              self.data.ans)  # (5, L_a, E)
            self.subt = tf.nn.embedding_lookup(self.embedding,
                                               self.data.subt)  # (N, L_s, E)

            # self.ques = tf.layers.dropout(self.ques, hp['dropout_rate'], training=training)  # (1, L_q, E)
            # self.ans = tf.layers.dropout(self.ans, hp['dropout_rate'], training=training)  # (5, L_a, E)
            # self.subt = tf.layers.dropout(self.subt, hp['dropout_rate'], training=training)  # (N, L_s, E)

        with tf.variable_scope('Embedding_Linear'):
            self.ques_embedding = tf.layers.dense(
                self.ques,
                hp['emb_dim'],
                use_bias=False,
                kernel_initializer=self.initializer)  # (1, L_q, E_t)
            self.ans_embedding = tf.layers.dense(self.ans,
                                                 hp['emb_dim'],
                                                 use_bias=False,
                                                 reuse=True)  # (5, L_a, E_t)
            self.subt_embedding = tf.layers.dense(
                self.subt,
                hp['emb_dim'],
                use_bias=False,
                reuse=True,
            )  # (N, L_s, E_t)

        with tf.variable_scope('Language_Encode'):
            position_attn = tf.get_variable(
                'position_attention',
                shape=[hp['pos_len'], hp['emb_dim']],
                initializer=self.initializer,
                trainable=False)
            ques_pos, _ = tf.split(position_attn, [25, hp['pos_len'] - 25])
            ans_pos, _ = tf.split(position_attn, [34, hp['pos_len'] - 34])
            subt_pos, _ = tf.split(position_attn, [29, hp['pos_len'] - 29])

            q_qa_enc, a_qa_enc = language_encode(self.ques, self.ans,
                                                 self.data.ql, self.data.al,
                                                 ques_pos, ans_pos)
            q_qs_enc, s_qs_enc = language_encode(self.ques, self.subt,
                                                 self.data.ql, self.data.sl,
                                                 ques_pos, subt_pos)
            a_as_enc, s_as_enc = language_encode(self.ans, self.subt,
                                                 self.data.al, self.data.sl,
                                                 ans_pos, subt_pos)

            self.ques_enc = tf.layers.dense(
                tf.concat([q_qa_enc, q_qs_enc], axis=-1),
                hp['feat_dim'],
                kernel_initializer=self.initializer,
                activation=tf.nn.tanh)  # (1, L_q, 2 * E_t)
            self.ans_enc = tf.layers.dense(
                tf.concat([a_qa_enc, a_as_enc], axis=-1),
                hp['feat_dim'],
                kernel_initializer=self.initializer,
                activation=tf.nn.tanh)  # (5, L_a, 2 * E_t)
            self.subt_enc = tf.layers.dense(
                tf.concat([s_qs_enc, s_as_enc], axis=-1),
                hp['feat_dim'],
                kernel_initializer=self.initializer,
                activation=tf.nn.tanh)  # (N, L_s, 2 * E_t)

        #
        #     self.ques_enc = tf.layers.dense(self.ques_enc, hp['feat_dim'])  # (1, L_q, 2 * E_t)
        #     self.ans_enc = tf.layers.dense(self.ques_enc, hp['feat_dim'])  # (5, L_a, 2 * E_t)
        #     self.subt_enc = tf.layers.dense(self.ques_enc, hp['feat_dim'])  # (N, L_s, 2 * E_t)
        #
        # self.m_subt = tf.layers.dense(
        #     self.subt_enc, hp['feat_dim'], use_bias=False, name='encode_transform')  # (N, F_t)
        # self.m_ques = tf.layers.dense(
        #     self.ques_enc, hp['feat_dim'], use_bias=False, reuse=True, name='encode_transform')  # (1, F_t)
        # self.m_ans = tf.layers.dense(
        #     self.ans_enc, hp['feat_dim'], use_bias=False, reuse=True, name='encode_transform')  # (5, F_t)
        #
        # self.m_subt = tf.layers.dropout(self.m_subt, hp['dropout_rate'], training=training)
        # self.m_ques = tf.layers.dropout(self.m_ques, hp['dropout_rate'], training=training)
        # self.m_ans = tf.layers.dropout(self.m_ans, hp['dropout_rate'], training=training)
        #
        t_shape = tf.shape(self.subt_enc)
        split_num = tf.cast(tf.ceil(t_shape[0] / 5), dtype=tf.int32)
        pad_num = split_num * 5 - t_shape[0]
        paddings = tf.convert_to_tensor([[0, pad_num], [0, 0]])

        with tf.variable_scope('Memory_Block'):
            self.mem_feat = tf.pad(self.subt_enc, paddings)

            self.mem_block = tf.reshape(self.mem_feat,
                                        [split_num, 5, hp['feat_dim']])

            self.mem_node = tf.reduce_mean(self.mem_block, axis=1)

            self.mem_opt = tf.layers.dense(self.mem_node,
                                           hp['feat_dim'],
                                           activation=tf.nn.tanh,
                                           kernel_initializer=self.initializer)

            self.mem_direct = tf.matmul(
                self.mem_node, self.mem_opt,
                transpose_b=True) / (hp['feat_dim']**0.5)

            self.mem_fw_direct = tf.nn.softmax(self.mem_direct)

            self.mem_bw_direct = tf.nn.softmax(self.mem_direct, axis=0)

            self.mem_self = tf.matmul(self.mem_fw_direct,
                                      self.mem_node) + tf.matmul(
                                          self.mem_bw_direct, self.mem_node)

        self.mem_attn = tf.nn.softmax(
            tf.matmul(self.mem_self, self.ques_enc, transpose_b=True))

        self.mem_output = tf.reduce_sum(self.mem_self * self.mem_attn, axis=0)

        self.output = tf.reduce_sum(self.mem_output * self.ans_enc, axis=1)
Beispiel #45
0
    def project_mesh(self, args):

        height = args['height']
        orientation = args['orientation']

        # Adjust our height and orientation
        if 'mesh' in self.variants:
            v = self.variants['mesh']
            if 'height' in v:
                height = height + tf.truncated_normal(
                    shape=(),
                    mean=v['height']['mean'],
                    stddev=v['height']['stddev'],
                )
            if 'rotation' in v:
                # Make 3 random euler angles
                rotation = tf.truncated_normal(
                    shape=[3],
                    mean=v['rotation']['mean'],
                    stddev=v['rotation']['stddev'],
                )
                # Cos and sin for everyone!
                ca = tf.cos(rotation[0])
                sa = tf.sin(rotation[0])
                cb = tf.cos(rotation[1])
                sb = tf.sin(rotation[0])
                cc = tf.cos(rotation[2])
                sc = tf.sin(rotation[0])

                # Convert these into a rotation matrix
                rot = [cc*ca, -cc*sa*cb + sc*sb, cc*sa*sb + sc*cb,
                          sa,             ca*cb,         -ca * sb,
                      -sc*ca,  sc*sa*cb + cc*sb, -sc*sa*sb + cc*cb]  # yapf: disable
                rot = tf.reshape(tf.stack(rot), [3, 3])

                # Apply the rotation
                orientation = tf.matmul(rot, orientation)

        # Run the visual mesh to get our values
        pixels, neighbours = VisualMesh(
            tf.shape(args['image']),
            args['projection'],
            args['focal_length'],
            args['fov'],
            orientation,
            height,
            self.geometry,
            self.geometry_params,
            name='ProjectVisualMesh',
        )

        # Round to integer pixels
        # TODO one day someone could do linear interpolation here, like what happens in the OpenCL version
        pixels = tf.cast(tf.round(pixels), dtype=tf.int32)

        # Select the points in the network and discard the old dictionary data
        # We pad one extra point at the end for the offscreen point
        return {
            'X': tf.pad(tf.gather_nd(args['image'], pixels), [[0, 1], [0, 0]]),
            'Y': tf.pad(tf.gather_nd(args['mask'], pixels), [[0, 1], [0, 0]]),
            'G': neighbours,
            'px': pixels,
            'raw': args['raw'],
        }
Beispiel #46
0
 def _symmetric_pad(i, x):
   paddings_i = tf.map_fn(lambda e: tf.where(i < e, 1, 0), paddings)
   paddings_i = tf.reshape(paddings_i, [num_dim, 2])
   x = tf.pad(x, paddings_i, 'SYMMETRIC')
   return i + 1, x
    def __init__(
            self, sequence_length, vocab_size, embedding_type, embedding_size, filter_sizes,
            num_filters, fc_hidden_size, num_classes, l2_reg_lambda=0.0, pretrained_embedding=None):

        # Placeholders for input, output, dropout_prob and training_tag
        self.input_x_front = tf.placeholder(tf.int32, [None, sequence_length], name="input_x_front")
        self.input_x_behind = tf.placeholder(tf.int32, [None, sequence_length], name="input_x_behind")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
        self.is_training = tf.placeholder(tf.bool, name="is_training")

        self.global_step = tf.Variable(0, trainable=False, name="Global_Step")

        def _cos_sim(input_x1, input_x2):
            norm1 = tf.square(tf.reduce_sum(tf.square(input_x1), axis=1))
            norm2 = tf.square(tf.reduce_sum(tf.square(input_x2), axis=1))
            dot_products = tf.reduce_sum(input_x1 * input_x2, axis=1, name="cos_sim")
            return dot_products / (norm1 * norm2)

        def _make_attention_mat(input_x1, input_x2):
            # shape of `input_x1` and `input_x2`: [batch_size, embedding_size, sequence_length, 1]
            # input_x2 need to transpose to the [batch_size, embedding_size, 1, sequence_length]
            # shape of output: [batch_size, sequence_length, sequence_length]
            dist = tf.reduce_sum(tf.square(input_x1 - tf.matrix_transpose(input_x2)), axis=1)
            euclidean = tf.sqrt(tf.maximum(dist, 1e-10))
            return 1.0 / (1.0 + euclidean)

        def _w_pool(input_x, attention, filter_size, scope):
            # input_x: [batch_size, num_filters, sequence_length + filter_size - 1, 1]
            # attention: [batch_size, sequence_length + filter_size - 1]
            pools = []

            # [batch_size, 1, sequence_length + filter_size - 1, 1]
            attention = tf.transpose(tf.expand_dims(tf.expand_dims(attention, axis=-1), axis=-1), perm=[0, 2, 1, 3])

            for i in range(sequence_length):
                # [batch_size, num_filters, filter_size, 1]
                # reduce_sum => [batch_size, num_filters, 1, 1]
                pools.append(
                    tf.reduce_sum(input_x[:, :, i:i + filter_size, :] * attention[:, :, i:i + filter_size, :],
                                  axis=2, keepdims=True))
            # [batch_size, num_filters, sequence_length, 1]
            w_ap = tf.concat(pools, axis=2, name="w_ap_" + scope)
            return w_ap

        def _all_pool(input_x, filter_size, scope):
            # input_x: [batch_size, num_filters, sequence_length + filter_size -1, 1]
            all_ap = tf.nn.avg_pool(
                input_x,
                ksize=[1, 1, sequence_length + filter_size - 1, 1],
                strides=[1, 1, 1, 1],
                padding="VALID",
                name="all_pool_" + scope
            )
            all_ap_reshaped = tf.reshape(all_ap, shape=[-1, num_filters])
            return all_ap_reshaped

        def _linear(input_, output_size, scope="SimpleLinear"):
            """
            Linear map: output[k] = sum_i(Matrix[k, i] * args[i] ) + Bias[k]
            Args:
                input_: a tensor or a list of 2D, batch x n, Tensors.
                output_size: int, second dimension of W[i].
                scope: VariableScope for the created subgraph; defaults to "SimpleLinear".
            Returns:
                A 2D Tensor with shape [batch x output_size] equal to
                sum_i(args[i] * W[i]), where W[i]s are newly created matrices.
            Raises:
                ValueError: if some of the arguments has unspecified or wrong shape.
            """

            shape = input_.get_shape().as_list()
            if len(shape) != 2:
                raise ValueError("Linear is expecting 2D arguments: {0}".format(str(shape)))
            if not shape[1]:
                raise ValueError("Linear expects shape[1] of arguments: {0}".format(str(shape)))
            input_size = shape[1]

            # Now the computation.
            with tf.variable_scope(scope):
                W = tf.get_variable("W", [input_size, output_size], dtype=input_.dtype)
                b = tf.get_variable("b", [output_size], dtype=input_.dtype)

            return tf.nn.xw_plus_b(input_, W, b)

        def _highway_layer(input_, size, num_layers=1, bias=-2.0, f=tf.nn.relu):
            """
            Highway Network (cf. http://arxiv.org/abs/1505.00387).
            t = sigmoid(Wy + b)
            z = t * g(Wy + b) + (1 - t) * y
            where g is nonlinearity, t is transform gate, and (1 - t) is carry gate.
            """

            for idx in range(num_layers):
                g = f(_linear(input_, size, scope=("highway_lin_{0}".format(idx))))
                t = tf.sigmoid(_linear(input_, size, scope=("highway_gate_{0}".format(idx))) + bias)
                output = t * g + (1. - t) * input_
                input_ = output

            return output

        # Embedding Layer
        with tf.device("/cpu:0"), tf.name_scope("embedding"):
            # Use random generated the word vector by default
            # Can also be obtained through our own word vectors trained by our corpus
            if pretrained_embedding is None:
                self.embedding = tf.Variable(tf.random_uniform([vocab_size, embedding_size], minval=-1.0, maxval=1.0,
                                                               dtype=tf.float32), trainable=True, name="embedding")
            else:
                if embedding_type == 0:
                    self.embedding = tf.constant(pretrained_embedding, dtype=tf.float32, name="embedding")
                if embedding_type == 1:
                    self.embedding = tf.Variable(pretrained_embedding, trainable=True,
                                                 dtype=tf.float32, name="embedding")
            embedded_sentence_front = tf.nn.embedding_lookup(self.embedding, self.input_x_front)
            embedded_sentence_behind = tf.nn.embedding_lookup(self.embedding, self.input_x_behind)

            # transpose the embedding sentence: [batch_size, embedding_size, sequence_length]
            embedded_sentence_front_trans = tf.transpose(embedded_sentence_front, perm=[0, 2, 1])
            embedded_sentence_behind_trans = tf.transpose(embedded_sentence_behind, perm=[0, 2, 1])

            # [batch_size, embedding_size, sequence_length, 1]
            embedded_sentence_expanded_front_trans = tf.expand_dims(embedded_sentence_front_trans, axis=-1)
            embedded_sentence_expanded_behind_trans = tf.expand_dims(embedded_sentence_behind_trans, axis=-1)

            # shape of `L0_0` and `R0_0`: [batch_size, embedding_size]
            self.F0_0 = tf.reshape(tf.reduce_mean(embedded_sentence_front, axis=1), shape=[-1, embedding_size])
            self.B0_0 = tf.reshape(tf.reduce_mean(embedded_sentence_behind, axis=1), shape=[-1, embedding_size])

        # Attention Layer
        with tf.name_scope("attention_matrix"):
            W_a = tf.Variable(tf.truncated_normal(shape=[sequence_length, embedding_size],
                                                  stddev=0.1, dtype=tf.float32), name="W_a")
            # shape of `attention_matrix`: [batch_size, sequence_length, sequence_length]
            attention_matrix = _make_attention_mat(embedded_sentence_expanded_front_trans,
                                                   embedded_sentence_expanded_behind_trans)

            # [batch_size, sequence_length, sequence_length] * [sequence_length, embedding_size]
            # einsum => [batch_size, sequence_length, embedding_size]
            # matrix transpose => [batch_size, embedding_size, sequence_length]
            # expand dims => [batch_size, embedding_size, sequence_length, 1]
            front_attention = tf.expand_dims(tf.matrix_transpose(
                tf.einsum("ijk,kl->ijl", attention_matrix, W_a)), axis=-1)
            behind_attention = tf.expand_dims(tf.matrix_transpose(
                tf.einsum("ijk,kl->ijl", tf.matrix_transpose(attention_matrix), W_a)), axis=-1)

            # shape of new `embedded_sentence_expanded_trans`: [batch_size, embedding_size, sequence_length, 2]
            embedded_sentence_expanded_front_trans = tf.concat([embedded_sentence_expanded_front_trans,
                                                                front_attention], axis=3)
            embedded_sentence_expanded_behind_trans = tf.concat([embedded_sentence_expanded_behind_trans,
                                                                 behind_attention], axis=3)

        # Convolution layer
        pooled_outputs_wp_front = []
        pooled_outputs_wp_behind = []

        pooled_outputs_ap_front = []
        pooled_outputs_ap_behind = []

        for filter_size in filter_sizes:
            with tf.name_scope("conv-filter{0}".format(filter_size)):
                in_channels = 2  # The in_channels of filter_shape is 2 (two channels, origin + attention)

                # shape of new `embedded_sentence_expanded`
                # [batch_size, embedding_size, sequence_length + filter_size - 1, 2]
                input_x1 = tf.pad(embedded_sentence_expanded_front_trans, np.array(
                    [[0, 0], [0, 0], [filter_size - 1, filter_size - 1], [0, 0]]), mode="CONSTANT")
                input_x2 = tf.pad(embedded_sentence_expanded_behind_trans, np.array(
                    [[0, 0], [0, 0], [filter_size - 1, filter_size - 1], [0, 0]]), mode="CONSTANT")

                filter_shape = [embedding_size, filter_size, in_channels, num_filters]
                W = tf.Variable(tf.truncated_normal(shape=filter_shape, stddev=0.1, dtype=tf.float32), name="W")
                b = tf.Variable(tf.constant(value=0.1, shape=[num_filters], dtype=tf.float32), name="b")
                conv_front = tf.nn.conv2d(
                    input_x1,
                    W,
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="conv_front")

                conv_behind = tf.nn.conv2d(
                    input_x2,
                    W,
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="conv_behind")

                # Apply nonlinearity
                # [batch_size, 1, sequence_length + filter_size - 1, num_filters]
                conv_out_front = tf.nn.relu(tf.nn.bias_add(conv_front, b), name="relu_front")
                conv_out_behind = tf.nn.relu(tf.nn.bias_add(conv_behind, b), name="relu_behind")

                # [batch_size, num_filters, sequence_length + filter_size - 1, 1]
                conv_out_front_trans = tf.transpose(conv_out_front, perm=[0, 3, 2, 1])
                conv_out_behind_trans = tf.transpose(conv_out_behind, perm=[0, 3, 2, 1])

            with tf.name_scope("attention-filter{0}".format(filter_size)):
                # [batch_size, sequence_length + filter_size - 1, sequence_length + filter_size - 1]
                attention_matrix_v2 = _make_attention_mat(conv_out_front_trans, conv_out_behind_trans)

                # [batch_size, sequence_length + filter_size - 1]
                front_attention_v2 = tf.reduce_sum(attention_matrix_v2, axis=2)
                behind_attention_v2 = tf.reduce_sum(attention_matrix_v2, axis=1)

            with tf.name_scope("pool-filter{0}".format(filter_size)):
                # shape of `front_wp`: [batch_size, num_filters, sequence_length, 1]
                front_wp = _w_pool(input_x=conv_out_front_trans, attention=front_attention_v2,
                                   filter_size=filter_size, scope="front")
                behind_wp = _w_pool(input_x=conv_out_behind_trans, attention=behind_attention_v2,
                                    filter_size=filter_size, scope="behind")

                # shape of `front_ap`: [batch_size, num_filters]
                front_ap = _all_pool(input_x=conv_out_front_trans, filter_size=filter_size, scope="front")
                behind_ap = _all_pool(input_x=conv_out_behind_trans, filter_size=filter_size, scope="behind")

                pooled_outputs_wp_front.append(front_wp)
                pooled_outputs_wp_behind.append(behind_wp)

                pooled_outputs_ap_front.append(front_ap)
                pooled_outputs_ap_behind.append(behind_ap)

        # shape of `FI_1` & `BI_1`: [batch_size, num_filters_total, sequence_length, 1]
        self.FI_1 = tf.concat(pooled_outputs_wp_front, axis=1)
        self.BI_1 = tf.concat(pooled_outputs_wp_behind, axis=1)

        # shape of `F0_1` & `B0_1`: [batch_size, num_filters_total]
        self.F0_1 = tf.concat(pooled_outputs_ap_front, axis=1)
        self.B0_1 = tf.concat(pooled_outputs_ap_behind, axis=1)

        # Concat Layer
        num_filters_total = num_filters * len(filter_sizes)

        # shape of `conv_front` & `conv_behind`: [batch_size, embedding_size + num_filters_total]
        self.conv_front = tf.concat([self.F0_0, self.F0_1], axis=1)
        self.conv_behind = tf.concat([self.B0_0, self.B0_1], axis=1)

        self.sims = tf.stack([_cos_sim(self.F0_0, self.B0_0), _cos_sim(self.F0_1, self.B0_1)], axis=1)
        # shape of `conv_combine`: [batch_size, 2 * (embedding_size + num_filters_total)]
        self.conv_combine = tf.concat([self.conv_front, self.conv_behind], axis=1)

        # Fully Connected Layer
        with tf.name_scope("fc"):
            W = tf.Variable(tf.truncated_normal(shape=[2 * (embedding_size + num_filters_total), fc_hidden_size],
                                                stddev=0.1, dtype=tf.float32), name="W")
            b = tf.Variable(tf.constant(value=0.1, shape=[fc_hidden_size], dtype=tf.float32), name="b")
            self.fc = tf.nn.xw_plus_b(self.conv_combine, W, b)

            # Apply nonlinearity
            self.fc_out = tf.nn.relu(self.fc, name="relu")

        # Highway Layer
        with tf.name_scope("highway"):
            self.highway = _highway_layer(self.fc_out, self.fc_out.get_shape()[1], num_layers=1, bias=0)

        # Add dropout
        with tf.name_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.highway, self.dropout_keep_prob)

        # Final scores and predictions
        with tf.name_scope("output"):
            W = tf.Variable(tf.truncated_normal(shape=[fc_hidden_size, num_classes],
                                                stddev=0.1, dtype=tf.float32), name="W")
            b = tf.Variable(tf.constant(value=0.1, shape=[num_classes], dtype=tf.float32), name="b")
            self.logits = tf.nn.xw_plus_b(self.h_drop, W, b, name="logits")
            self.softmax_scores = tf.nn.softmax(self.logits, name="softmax_scores")
            self.predictions = tf.argmax(self.logits, 1, name="predictions")
            self.topKPreds = tf.nn.top_k(self.softmax_scores, k=1, sorted=True, name="topKPreds")

        # Calculate mean cross-entropy loss, L2 loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.input_y, logits=self.logits)
            losses = tf.reduce_mean(losses, name="softmax_losses")
            l2_losses = tf.add_n([tf.nn.l2_loss(tf.cast(v, tf.float32)) for v in tf.trainable_variables()],
                                 name="l2_losses") * l2_reg_lambda
            self.loss = tf.add(losses, l2_losses, name="loss")
Beispiel #48
0
 def nms(normalized_boxes, scores):
     idxs_ = tf.image.non_max_suppression(normalized_boxes, scores, self.proposal_count, self.nms_thresh)
     box = tf.gather(normalized_boxes, idxs_)
     pad_num = tf.maximum(self.proposal_count - tf.shape(normalized_boxes)[0],0)
     box = tf.pad(box, [(0, pad_num), (0,0)])
     return box
Beispiel #49
0
def cryptonets_train(x):
    """Builds the graph for classifying digits based on Cryptonets

    Args:
        x: an input tensor with the dimensions (N_examples, 784), where 784 is
        the number of pixels in a standard MNIST image.

    Returns:
        A tuple (y, a scalar placeholder). y is a tensor of shape
        (N_examples, 10), with values equal to the logits of classifying the
        digit into one of 10 classes (the digits 0-9).
    """
    # Reshape to use within a conv neural net.
    # Last dimension is for "features" - there is only one here, since images
    # are grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
    with tf.name_scope('reshape'):
        x_image = tf.reshape(x, [-1, 28, 28, 1])
        paddings = tf.constant([[0, 0], [0, 1], [0, 1], [0, 0]],
                               name='pad_const')
        x_image = tf.pad(x_image, paddings)

    # First conv layer
    # CryptoNets's output of the first conv layer has feature map size 13 x 13,
    # therefore, we manually add paddings.
    # Input: N x 28 x 28 x 1
    # Filter: 5 x 5 x 1 x 5
    # Output: N x 12 x 12 x 5
    # Output after padding: N x 13 x 13 x 5
    with tf.name_scope('conv1'):
        W_conv1 = tf.get_variable("W_conv1", [5, 5, 1, 5])
        h_conv1_no_pad = tf.square(
            common.conv2d_stride_2_valid(x_image, W_conv1))
        paddings = tf.constant([[0, 0], [0, 1], [0, 1], [0, 0]],
                               name='pad_const')
        h_conv1 = tf.pad(h_conv1_no_pad, paddings)

    # Pooling layer
    # Input: N x 13 x 13 x 5
    # Output: N x 13 x 13 x 5
    with tf.name_scope('pool1'):
        h_pool1 = common.avg_pool_3x3_same_size(h_conv1)

    # Second convolution
    # Input: N x 13 x 13 x 5
    # Filter: 5 x 5 x 5 x 50
    # Output: N x 5 x 5 x 50
    with tf.name_scope('conv2'):
        W_conv2 = tf.get_variable("W_conv2", [5, 5, 5, 50])
        h_conv2 = common.conv2d_stride_2_valid(h_pool1, W_conv2)

    # Second pooling layer
    # Input: N x 5 x 5 x 50
    # Output: N x 5 x 5 x 50
    with tf.name_scope('pool2'):
        h_pool2 = common.avg_pool_3x3_same_size(h_conv2)

    # Fully connected layer 1
    # Input: N x 5 x 5 x 50
    # Input flattened: N x 1250
    # Weight: 1250 x 100
    # Output: N x 100
    with tf.name_scope('fc1'):
        h_pool2_flat = tf.reshape(h_pool2, [-1, 5 * 5 * 50])
        W_fc1 = tf.get_variable("W_fc1", [5 * 5 * 50, 100])
        h_fc1 = tf.square(tf.matmul(h_pool2_flat, W_fc1))

    # Map the 100 features to 10 classes, one for each digit
    # Input: N x 100
    # Weight: 100 x 10
    # Output: N x 10
    with tf.name_scope('fc2'):
        W_fc2 = tf.get_variable("W_fc2", [100, 10])
        y_conv = tf.matmul(h_fc1, W_fc2)
    return y_conv
Beispiel #50
0
def decoding_graph(features, state, mode, params):
    if mode != "train":
        params.residual_dropout = 0.0
        params.attention_dropout = 0.0
        params.relu_dropout = 0.0
        params.label_smoothing = 0.0

    tgt_seq = features["target"]
    src_len = features["source_length"]
    mem_len = features["memory_length"]
    tgt_len = features["target_length"]
    src_mask = tf.sequence_mask(src_len,
                                maxlen=tf.shape(features["source"])[1],
                                dtype=tf.float32)
    mem_mask = tf.sequence_mask(mem_len,
                                maxlen=tf.shape(features["memory"])[1],
                                dtype=tf.float32)
    tgt_mask = tf.sequence_mask(tgt_len,
                                maxlen=tf.shape(features["target"])[1],
                                dtype=tf.float32)

    hidden_size = params.hidden_size
    tvocab = params.vocabulary["target"]
    tgt_vocab_size = len(tvocab)
    initializer = tf.random_normal_initializer(0.0, params.hidden_size**-0.5)

    # if params.use_pretrained_embedding:
    #     trg_emb_initializer = tf.constant_initializer(features['trg_embs'])
    #     if params.shared_source_target_embedding:
    #         with tf.variable_scope(tf.get_variable_scope(), reuse=True):
    #             tgt_embedding = tf.get_variable("shared_embedding",
    #                                             [tgt_vocab_size, hidden_size],
    #                                             initializer=trg_emb_initializer)
    #     else:
    #         tgt_embedding = tf.get_variable("target_embedding",
    #                                         [tgt_vocab_size, hidden_size],
    #                                         initializer=trg_emb_initializer)
    # else:
    if params.shared_source_target_embedding:
        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            tgt_embedding = tf.get_variable("shared_embedding",
                                            [tgt_vocab_size, hidden_size],
                                            initializer=initializer)
    else:
        tgt_embedding = tf.get_variable("target_embedding",
                                        [tgt_vocab_size, hidden_size],
                                        initializer=initializer)

    if params.shared_embedding_and_softmax_weights:
        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            if params.shared_source_target_embedding:
                weights = tf.get_variable("shared_embedding",
                                          [tgt_vocab_size, hidden_size],
                                          initializer=initializer)
            else:
                weights = tf.get_variable("target_embedding",
                                          [tgt_vocab_size, hidden_size],
                                          initializer=initializer)
    else:
        weights = tf.get_variable("softmax_weights",
                                  [tgt_vocab_size, hidden_size],
                                  initializer=initializer)

    # id => embedding
    # tgt_seq: [batch, max_tgt_length]
    targets = tf.gather(tgt_embedding, tgt_seq) * (hidden_size**0.5)
    targets = targets * tf.expand_dims(tgt_mask, -1)

    # Preparing encoder and decoder input
    #enc_attn_bias = attention_bias(src_mask, "masking")
    enc_attn_bias = attention_bias(tf.concat([src_mask, mem_mask], 1),
                                   "masking")
    dec_attn_bias = attention_bias(tf.shape(targets)[1], "causal")
    # Shift left
    decoder_input = tf.pad(targets, [[0, 0], [1, 0], [0, 0]])[:, :-1, :]
    decoder_input = add_timing_signal(decoder_input)

    if params.residual_dropout is not None and params.residual_dropout > 0:
        decoder_input = tf.nn.dropout(decoder_input,
                                      1.0 - params.residual_dropout)

    encoder_output = state["encoder"]

    if mode != "infer":
        decoder_output = transformer_decoder(decoder_input, encoder_output,
                                             dec_attn_bias, enc_attn_bias,
                                             params)
    else:
        decoder_input = decoder_input[:, -1:, :]
        dec_attn_bias = dec_attn_bias[:, :, -1:, :]
        decoder_outputs = transformer_decoder(decoder_input,
                                              encoder_output,
                                              dec_attn_bias,
                                              enc_attn_bias,
                                              params,
                                              state=state["decoder"])

        decoder_output, decoder_state = decoder_outputs
        decoder_output = decoder_output[:, -1, :]
        logits = tf.matmul(decoder_output, weights, False, True)
        log_prob = tf.nn.log_softmax(logits)

        return log_prob, {"encoder": encoder_output, "decoder": decoder_state}

    # [batch, length, hidden] => [batch * length, vocab_size]
    decoder_output = tf.reshape(decoder_output, [-1, hidden_size])
    logits = tf.matmul(decoder_output, weights, False, True)
    labels = features["target"]

    # label smoothing
    ce = smoothed_softmax_cross_entropy(logits, labels, params.label_smoothing,
                                        True)

    ce = tf.reshape(ce, tf.shape(tgt_seq))

    if mode == "eval":
        return -tf.reduce_sum(ce * tgt_mask, axis=1)

    loss = tf.reduce_sum(ce * tgt_mask) / tf.reduce_sum(tgt_mask)

    return loss
def conv0_space_to_depth(inputs,
                         use_fused_bn=False,
                         data_format='channels_last'):
    """Strided 2-D convolution with explicit padding.

  The padding is consistent and is based only on `kernel_size`, not on the
  dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone).

  Args:
    inputs: `Tensor` of size `[batch, height_in, width_in, channels]`.
    use_fused_bn: 'bool' whether use fused batch norm variables.
    data_format: `str` either "channels_first" for `[batch, channels, height,
      width]` or "channels_last for `[batch, height, width, channels]`.

  Returns:
    A `Tensor` with the same type as `inputs`.
  """
    # Create the conv0 kernel w.r.t. the original image size. (no space-to-depth).
    filters = 64
    kernel_size = 7
    space_to_depth_block_size = ssd_constants.SPACE_TO_DEPTH_BLOCK_SIZE
    strides = 2
    conv0 = tf.layers.Conv2D(
        filters=filters,
        kernel_size=kernel_size,
        strides=2,
        padding=('SAME' if strides == 1 else 'VALID'),
        use_bias=True if use_fused_bn else False,
        kernel_initializer=tf.variance_scaling_initializer(),
        data_format=data_format,
        name='conv1_1')
    # Use the image size without space-to-depth transform as the input of conv0.
    batch_size, h, w, channel = inputs.get_shape().as_list()
    conv0.build([
        batch_size, h * space_to_depth_block_size,
        w * space_to_depth_block_size, channel / (space_to_depth_block_size**2)
    ])

    kernel = conv0.weights[0]
    # [7, 7, 3, 64] --> [8, 8, 3, 64]
    kernel = tf.pad(kernel,
                    paddings=tf.constant([[1, 0], [1, 0], [0, 0], [0, 0]]),
                    mode='CONSTANT',
                    constant_values=0.)
    # Transform kernel follows the space-to-depth logic: http://shortn/_9YvHW96xPJ
    kernel = tf.reshape(kernel, [
        4, space_to_depth_block_size, 4, space_to_depth_block_size, 3, filters
    ])
    kernel = tf.transpose(kernel, [0, 2, 1, 3, 4, 5])
    kernel = tf.reshape(kernel, [4, 4, int(channel), filters])
    kernel = tf.cast(kernel, inputs.dtype)

    inputs = space_to_depth_fixed_padding(inputs, kernel_size, data_format,
                                          space_to_depth_block_size)

    outputs = tf.nn.conv2d(
        input=inputs,
        filter=kernel,
        strides=[1, 1, 1, 1],
        padding='VALID',
        data_format='NHWC' if data_format == 'channels_last' else 'NCHW',
        name='conv1_1')
    if use_fused_bn:
        # The additional bias is used as the batch norm is fused into the conv
        # layer.
        return tf.nn.bias_add(outputs,
                              tf.cast(conv0.weights[1], outputs.dtype),
                              data_format='NHWC')
    else:
        return outputs
Beispiel #52
0
 def zero_padding(self, input, paddings, name):
     pad_mat = np.array([[0,0], [paddings, paddings], [paddings, paddings], [0, 0]])
     return tf.pad(input, paddings=pad_mat, name=name)
Beispiel #53
0
def CNN(x):
    stride = 2
    # dropout_rate
    dropout_rate = 1.0
    if a.load_model is not True:
        dropout_rate = 0.5
    with tf.variable_scope('CNN', reuse=tf.AUTO_REUSE):
        with tf.variable_scope('layer1'):
            # [n,28,28,1] -> [n,14,14,64]
            padded = tf.pad(x, [[0, 0], [1, 1], [1, 1], [0, 0]],
                            mode="CONSTANT")
            w = tf.get_variable(name='w1',
                                shape=[4, 4, 1, 64],
                                dtype=tf.float32,
                                initializer=tf.random_normal_initializer(
                                    0, 0.02))
            b = tf.get_variable(name='b1',
                                shape=[64],
                                dtype=tf.float32,
                                initializer=tf.constant_initializer(0.0))
            out = tf.nn.leaky_relu(
                batchnorm(
                    tf.nn.conv2d(
                        padded, w, [1, stride, stride, 1], padding='VALID') +
                    b), 0.2)
        with tf.variable_scope('layer2'):
            # [n,14,14,64] -> [n,7,7,128] ([n,7x7x128])
            padded = tf.pad(out, [[0, 0], [1, 1], [1, 1], [0, 0]],
                            mode="CONSTANT")
            w = tf.get_variable(name='w2',
                                shape=[4, 4, 64, 128],
                                dtype=tf.float32,
                                initializer=tf.random_normal_initializer(
                                    0, 0.02))
            b = tf.get_variable(name='b2',
                                shape=[128],
                                dtype=tf.float32,
                                initializer=tf.constant_initializer(0.0))
            out = tf.nn.leaky_relu(
                batchnorm(
                    tf.nn.conv2d(
                        padded, w, [1, stride, stride, 1], padding='VALID') +
                    b), 0.2)
            out = tf.reshape(out, [-1, 7 * 7 * 128])
        with tf.variable_scope('layer3'):
            # [n,7*7*128] -> [n,1024]
            w = tf.get_variable(name='w3',
                                shape=[7 * 7 * 128, 1024],
                                dtype=tf.float32,
                                initializer=tf.random_normal_initializer(
                                    0, 0.02))
            b = tf.get_variable(name='b3',
                                shape=[1024],
                                dtype=tf.float32,
                                initializer=tf.constant_initializer(0.0))
            out = tf.nn.leaky_relu(batchnorm(tf.matmul(out, w) + b), 0.2)
            # dropout
            out = tf.nn.dropout(out, dropout_rate)
        with tf.variable_scope('layer4'):
            # [n,1024] -> [n,10]
            w = tf.get_variable(name='w4',
                                shape=[1024, 10],
                                dtype=tf.float32,
                                initializer=tf.random_normal_initializer(
                                    0, 0.02))
            b = tf.get_variable(name='b4',
                                shape=[10],
                                dtype=tf.float32,
                                initializer=tf.constant_initializer(0.0))
            out = tf.nn.softmax(batchnorm(tf.matmul(out, w) + b))
            return out
def pad(tensor, num=1):
    return tf.pad(tensor, [[0, 0], [num, num], [num, num], [0, 0]], "CONSTANT")
Beispiel #55
0
    def encoder_bottleneck_regular(self,
                                   x,
                                   output_depth,
                                   scope,
                                   keep_prob,
                                   proj_ratio=4,
                                   downsampling=False):
        input_shape = x.get_shape().as_list()
        input_depth = input_shape[3]

        internal_depth = int(output_depth / proj_ratio)

        # convolution branch:
        conv_branch = x

        # # 1x1 projection:
        if downsampling:
            W_conv = self.get_variable_weight_decay(
                scope + "/W_proj",
                shape=[2, 2, input_depth, internal_depth],
                # ([filter_height, filter_width, in_depth, out_depth])
                initializer=tf.contrib.layers.xavier_initializer(),
                loss_category="encoder_wd_losses")
            conv_branch = tf.nn.conv2d(conv_branch,
                                       W_conv,
                                       strides=[1, 2, 2, 1],
                                       padding="VALID")  # NOTE! no bias terms
        else:
            W_proj = self.get_variable_weight_decay(
                scope + "/W_proj",
                shape=[1, 1, input_depth, internal_depth],
                # ([filter_height, filter_width, in_depth, out_depth])
                initializer=tf.contrib.layers.xavier_initializer(),
                loss_category="encoder_wd_losses")
            conv_branch = tf.nn.conv2d(conv_branch,
                                       W_proj,
                                       strides=[1, 1, 1, 1],
                                       padding="VALID")  # NOTE! no bias terms
        # # # batch norm and PReLU:
        conv_branch = tf.contrib.slim.batch_norm(conv_branch)
        conv_branch = PReLU(conv_branch, scope=scope + "/proj")

        # # conv:
        W_conv = self.get_variable_weight_decay(
            scope + "/W_conv",
            shape=[3, 3, internal_depth, internal_depth],
            # ([filter_height, filter_width, in_depth, out_depth])
            initializer=tf.contrib.layers.xavier_initializer(),
            loss_category="encoder_wd_losses")
        b_conv = self.get_variable_weight_decay(
            scope + "/b_conv",
            shape=[internal_depth],  # ([out_depth])
            initializer=tf.constant_initializer(0),
            loss_category="encoder_wd_losses")
        conv_branch = tf.nn.conv2d(
            conv_branch, W_conv, strides=[1, 1, 1, 1], padding="SAME") + b_conv
        # # # batch norm and PReLU:
        conv_branch = tf.contrib.slim.batch_norm(conv_branch)
        conv_branch = PReLU(conv_branch, scope=scope + "/conv")

        # # 1x1 expansion:
        shape = [1, 1, internal_depth, output_depth]
        W_exp = self.get_variable_weight_decay(
            scope + "/W_exp",
            shape=shape,
            # ([filter_height, filter_width, in_depth, out_depth])
            initializer=tf.contrib.layers.xavier_initializer(),
            loss_category="encoder_wd_losses")
        W_exp = tf.reshape(drop_connect(W_exp, self.keep_prob_pl), shape=shape)

        conv_branch = tf.nn.conv2d(conv_branch,
                                   W_exp,
                                   strides=[1, 1, 1, 1],
                                   padding="VALID")  # NOTE! no bias terms
        # # # batch norm:
        conv_branch = tf.contrib.slim.batch_norm(conv_branch)
        # NOTE! no PReLU here

        # # regularizer:
        # conv_branch = dropout(conv_branch, self.keep_prob_pl)

        # main branch:
        main_branch = x

        if downsampling:
            # max pooling with argmax (for use in max_unpool in the decoder):
            main_branch, pooling_indices = tf.nn.max_pool_with_argmax(
                main_branch,
                ksize=[1, 2, 2, 1],
                strides=[1, 2, 2, 1],
                padding="SAME")
            # (everytime we downsample, we also increase the feature block depth)

            # pad with zeros so that the feature block depth matches:
            depth_to_pad = output_depth - input_depth
            paddings = tf.convert_to_tensor([[0, 0], [0, 0], [0, 0],
                                             [0, depth_to_pad]])
            # (paddings is an integer tensor of shape [4, 2] where 4 is the rank
            # of main_branch. For each dimension D (D = 0, 1, 2, 3) of main_branch,
            # paddings[D, 0] is the no of values to add before the contents of
            # main_branch in that dimension, and paddings[D, 0] is the no of
            # values to add after the contents of main_branch in that dimension)
            main_branch = tf.pad(main_branch,
                                 paddings=paddings,
                                 mode="CONSTANT")

        # add the branches:
        merged = conv_branch + main_branch

        # apply PReLU:
        output = PReLU(merged, scope=scope + "/output")

        if downsampling:
            return output, pooling_indices
        else:
            return output
Beispiel #56
0
    def prepare_processing_graph(self, model_settings, summaries_dir):
        """
    建立张量流图以应用输入失真。
    创建一个图形,加载一个WAVE文件,对其进行解码、缩放体积、平移,
    添加背景噪声,计算一个声谱图,然后从中生成MFCC特征。
    必须在TensorFlow会话运行时调用它,它会创建多个占位符输入和一个输出::

      - wav_filename_placeholder_: 音频文件名
      - foreground_volume_placeholder_: 主剪辑的声音应该有多大
      - time_shift_padding_placeholder_: 在哪个位置剪辑
      - time_shift_offset_placeholder_: 在剪辑上移动多少
      - background_data_placeholder_: 背景噪声的PCM采样数据
      - background_volume_placeholder_: 背景中混音的响度
      - output_: 经过处理后的二维输出

    Args:
      model_settings: 正在训练的当前模型信息
      summaries_dir: 保存训练摘要信息的路径
      
    """
        with tf.get_default_graph().name_scope('data'):
            desired_samples = model_settings['desired_samples']
            self.wav_filename_placeholder_ = tf.placeholder(
                tf.string, [], name='wav_filename')
            wav_loader = io_ops.read_file(self.wav_filename_placeholder_)
            wav_decoder = contrib_audio.decode_wav(
                wav_loader,
                desired_channels=1,
                desired_samples=desired_samples)

            #允许调整音频样本的音量

            self.foreground_volume_placeholder_ = tf.placeholder(
                tf.float32, [], name='foreground_volume')
            scaled_foreground = tf.multiply(
                wav_decoder.audio, self.foreground_volume_placeholder_)

            # 移动样本的起始位置,并用零填充任何间隙

            self.time_shift_padding_placeholder_ = tf.placeholder(
                tf.int32, [2, 2], name='time_shift_padding')
            self.time_shift_offset_placeholder_ = tf.placeholder(
                tf.int32, [2], name='time_shift_offset')
            padded_foreground = tf.pad(scaled_foreground,
                                       self.time_shift_padding_placeholder_,
                                       mode='CONSTANT')
            sliced_foreground = tf.slice(padded_foreground,
                                         self.time_shift_offset_placeholder_,
                                         [desired_samples, -1])
            # 混入背景噪音
            self.background_data_placeholder_ = tf.placeholder(
                tf.float32, [desired_samples, 1], name='background_data')
            self.background_volume_placeholder_ = tf.placeholder(
                tf.float32, [], name='background_volume')
            background_mul = tf.multiply(self.background_data_placeholder_,
                                         self.background_volume_placeholder_)
            background_add = tf.add(background_mul, sliced_foreground)
            background_clamp = tf.clip_by_value(background_add, -1.0, 1.0)

            # 运行频谱图和MFCC节点来获取音频的二维特征

            spectrogram = contrib_audio.audio_spectrogram(
                background_clamp,
                window_size=model_settings['window_size_samples'],
                stride=model_settings['window_stride_samples'],
                magnitude_squared=True)
            tf.summary.image('spectrogram',
                             tf.expand_dims(spectrogram, -1),
                             max_outputs=1)

            #频谱图中每个FFT行中的桶数将取决于每个窗口中有多少输入样本。
            #不需要详细分类,希望缩小它们以产生更小的结果。
            #一种方法是使用平均法来遍历相邻的bucket,更复杂的方法是应用MFCC算法来缩小表示。

            if model_settings['preprocess'] == 'average':
                self.output_ = tf.nn.pool(
                    tf.expand_dims(spectrogram, -1),
                    window_shape=[1, model_settings['average_window_width']],
                    strides=[1, model_settings['average_window_width']],
                    pooling_type='AVG',
                    padding='SAME')
                tf.summary.image('shrunk_spectrogram',
                                 self.output_,
                                 max_outputs=1)
            elif model_settings['preprocess'] == 'mfcc':
                self.output_ = contrib_audio.mfcc(
                    spectrogram,
                    wav_decoder.sample_rate,
                    dct_coefficient_count=model_settings['fingerprint_width'])
                tf.summary.image('mfcc',
                                 tf.expand_dims(self.output_, -1),
                                 max_outputs=1)
            else:
                raise ValueError(
                    'Unknown preprocess mode "%s" (should be "mfcc" or'
                    ' "average")' % (model_settings['preprocess']))

            # 合并所有摘要并将其写入/tmp/retrain_日志

            self.merged_summaries_ = tf.summary.merge_all(scope='data')
            self.summary_writer_ = tf.summary.FileWriter(
                summaries_dir + '/data', tf.get_default_graph())
    def _k_grads(self, X1, X2):
        r"""
        Vectorized kernel calc and kernel grad calc.
        Following notation from Beck (2017), i.e have tensors S,D,Kpp,Kp
        Input is two tensors of shape (# strings , # characters)
        and we calc the pair-wise kernel calcs between the elements (i.e n kern calcs for two lists of length n)
        D is the tensor than unrolls the recursion and allows vecotrizaiton
        """

        # turn into one-hot  i.e. shape (# strings, #characters+1, alphabet size)
        X1 = tf.one_hot(X1, len(self.alphabet) + 1, dtype=tf.float64)
        X2 = tf.one_hot(X2, len(self.alphabet) + 1, dtype=tf.float64)
        # remove the ones in the first column that encode the padding (i.e we dont want them to count as a match)
        paddings = tf.constant([[0, 0], [0, 0], [0, len(self.alphabet)]])
        X1 = X1 - tf.pad(tf.expand_dims(X1[:, :, 0], 2), paddings, "CONSTANT")
        X2 = X2 - tf.pad(tf.expand_dims(X2[:, :, 0], 2), paddings, "CONSTANT")
        # store squared match coef
        match_sq = tf.square(self.match_decay)
        # Make S: the similarity tensor of shape (# strings, #characters, # characters)
        S = tf.matmul(X1, tf.transpose(X2, perm=(0, 2, 1)))
        # Main loop, where Kp, Kpp values and gradients are calculated.
        Kp = tf.TensorArray(tf.float64,
                            size=0,
                            dynamic_size=True,
                            clear_after_read=False)
        dKp_dgap = tf.TensorArray(tf.float64,
                                  size=0,
                                  dynamic_size=True,
                                  clear_after_read=False)
        dKp_dmatch = tf.TensorArray(tf.float64,
                                    size=0,
                                    dynamic_size=True,
                                    clear_after_read=False)
        Kp = Kp.write(
            Kp.size(),
            tf.ones(shape=tf.stack([tf.shape(X1)[0], self.maxlen,
                                    self.maxlen]),
                    dtype=tf.float64))
        dKp_dgap = dKp_dgap.write(
            dKp_dgap.size(),
            tf.zeros(shape=tf.stack(
                [tf.shape(X1)[0], self.maxlen, self.maxlen]),
                     dtype=tf.float64))
        dKp_dmatch = dKp_dmatch.write(
            dKp_dmatch.size(),
            tf.zeros(shape=tf.stack(
                [tf.shape(X1)[0], self.maxlen, self.maxlen]),
                     dtype=tf.float64))

        # calc subkernels for each subsequence length
        for i in tf.range(0, self.max_subsequence_length - 1):

            Kp_temp = tf.multiply(S, Kp.read(i))
            Kp_temp0 = match_sq * Kp_temp
            Kp_temp1 = tf.matmul(Kp_temp0, self.D)
            Kp_temp2 = tf.matmul(self.D, Kp_temp1, transpose_a=True)
            Kp = Kp.write(Kp.size(), Kp_temp2)

            dKp_dgap_temp_1 = tf.matmul(self.dD_dgap,
                                        Kp_temp1,
                                        transpose_a=True)
            dKp_dgap_temp_2 = tf.multiply(S, dKp_dgap.read(i))
            dKp_dgap_temp_2 = dKp_dgap_temp_2 * match_sq
            dKp_dgap_temp_2 = tf.matmul(dKp_dgap_temp_2, self.D)
            dKp_dgap_temp_2 = dKp_dgap_temp_2 + tf.matmul(
                Kp_temp0, self.dD_dgap)
            dKp_dgap_temp_2 = tf.matmul(self.D,
                                        dKp_dgap_temp_2,
                                        transpose_a=True)
            dKp_dgap = dKp_dgap.write(dKp_dgap.size(),
                                      dKp_dgap_temp_1 + dKp_dgap_temp_2)

            dKp_dmatch_temp_1 = 2 * tf.divide(Kp_temp2, self.match_decay)
            dKp_dmatch_temp_2 = tf.multiply(S, dKp_dmatch.read(i))
            dKp_dmatch_temp_2 = dKp_dmatch_temp_2 * match_sq
            dKp_dmatch_temp_2 = tf.matmul(dKp_dmatch_temp_2, self.D)
            dKp_dmatch_temp_2 = tf.matmul(self.D,
                                          dKp_dmatch_temp_2,
                                          transpose_a=True)
            dKp_dmatch = dKp_dmatch.write(
                dKp_dmatch.size(), dKp_dmatch_temp_1 + dKp_dmatch_temp_2)

        # Final calculation. We gather all Kps
        Kp_stacked = Kp.stack()
        Kp.close()
        dKp_dgap_stacked = dKp_dgap.stack()
        dKp_dgap.close()
        dKp_dmatch_stacked = dKp_dmatch.stack()
        dKp_dmatch.close()

        # get k
        temp = tf.multiply(S, Kp_stacked)
        temp = tf.reduce_sum(temp, -1)
        sum2 = tf.reduce_sum(temp, -1)
        Ki = sum2 * match_sq
        k = tf.linalg.matvec(tf.transpose(Ki), self.order_coefs)
        k = tf.expand_dims(k, 1)

        # get gap decay grads
        temp = tf.multiply(S, dKp_dgap_stacked)
        temp = tf.reduce_sum(temp, -1)
        temp = tf.reduce_sum(temp, -1)
        temp = temp * match_sq
        dk_dgap = tf.linalg.matvec(tf.transpose(temp), self.order_coefs)
        dk_dgap = tf.expand_dims(dk_dgap, 1)

        # get match decay grads
        temp = tf.multiply(S, dKp_dmatch_stacked)
        temp = tf.reduce_sum(temp, -1)
        temp = tf.reduce_sum(temp, -1)
        temp = temp * match_sq
        temp = temp + 2 * self.match_decay * sum2
        dk_dmatch = tf.linalg.matvec(tf.transpose(temp), self.order_coefs)
        dk_dmatch = tf.expand_dims(dk_dmatch, 1)

        dk_dcoefs = tf.transpose(Ki)

        return k, dk_dgap, dk_dmatch, dk_dcoefs
Beispiel #58
0
def sorted_non_max_suppression_padded(scores, boxes, classes, max_output_size,
                                      iou_threshold):
    """A wrapper that handles non-maximum suppression.

  Assumption:
    * The boxes are sorted by scores unless the box is a dot (all coordinates
      are zero).
    * Boxes with higher scores can be used to suppress boxes with lower scores.

  The overal design of the algorithm is to handle boxes tile-by-tile:

  boxes = boxes.pad_to_multiply_of(tile_size)
  num_tiles = len(boxes) // tile_size
  output_boxes = []
  for i in range(num_tiles):
    box_tile = boxes[i*tile_size : (i+1)*tile_size]
    for j in range(i - 1):
      suppressing_tile = boxes[j*tile_size : (j+1)*tile_size]
      iou = bbox_overlap(box_tile, suppressing_tile)
      # if the box is suppressed in iou, clear it to a dot
      box_tile *= _update_boxes(iou)
    # Iteratively handle the diagnal tile.
    iou = _box_overlap(box_tile, box_tile)
    iou_changed = True
    while iou_changed:
      # boxes that are not suppressed by anything else
      suppressing_boxes = _get_suppressing_boxes(iou)
      # boxes that are suppressed by suppressing_boxes
      suppressed_boxes = _get_suppressed_boxes(iou, suppressing_boxes)
      # clear iou to 0 for boxes that are suppressed, as they cannot be used
      # to suppress other boxes any more
      new_iou = _clear_iou(iou, suppressed_boxes)
      iou_changed = (new_iou != iou)
      iou = new_iou
    # remaining boxes that can still suppress others, are selected boxes.
    output_boxes.append(_get_suppressing_boxes(iou))
    if len(output_boxes) >= max_output_size:
      break

  Args:
    scores: a tensor with a shape of [batch_size, anchors].
    boxes: a tensor with a shape of [batch_size, anchors, 4].
    max_output_size: a scalar integer `Tensor` representing the maximum number
      of boxes to be selected by non max suppression.
    iou_threshold: a float representing the threshold for deciding whether boxes
      overlap too much with respect to IOU.

  Returns:
    nms_scores: a tensor with a shape of [batch_size, anchors]. It has same
      dtype as input scores.
    nms_proposals: a tensor with a shape of [batch_size, anchors, 4]. It has
      same dtype as input boxes.
  """
    batch_size = tf.shape(boxes)[0]
    num_boxes = tf.shape(boxes)[1]
    pad = tf.cast(tf.math.ceil(tf.cast(num_boxes, tf.float32) / NMS_TILE_SIZE),
                  tf.int32) * NMS_TILE_SIZE - num_boxes
    boxes = tf.pad(tf.cast(boxes, tf.float32), [[0, 0], [0, pad], [0, 0]])
    scores = tf.pad(tf.cast(scores, tf.float32), [[0, 0], [0, pad]],
                    constant_values=-1)
    classes = tf.pad(tf.cast(classes, tf.float32), [[0, 0], [0, pad]],
                     constant_values=-1)
    num_boxes += pad

    def _loop_cond(unused_boxes, unused_threshold, output_size, idx):
        return tf.logical_and(
            tf.reduce_min(output_size) < max_output_size,
            idx < num_boxes // NMS_TILE_SIZE)

    selected_boxes, _, output_size, _ = tf.while_loop(
        _loop_cond, _suppression_loop_body, [
            boxes, iou_threshold,
            tf.zeros([batch_size], tf.int32),
            tf.constant(0)
        ])
    idx = num_boxes - tf.cast(
        tf.nn.top_k(
            tf.cast(tf.reduce_any(selected_boxes > 0, [2]), tf.int32) *
            tf.expand_dims(tf.range(num_boxes, 0, -1), 0), max_output_size)[0],
        tf.int32)
    idx = tf.minimum(idx, num_boxes - 1)
    idx = tf.reshape(
        idx + tf.reshape(tf.range(batch_size) * num_boxes, [-1, 1]), [-1])
    boxes = tf.reshape(tf.gather(tf.reshape(boxes, [-1, 4]), idx),
                       [batch_size, max_output_size, 4])
    boxes = boxes * tf.cast(
        tf.reshape(tf.range(max_output_size), [1, -1, 1]) < tf.reshape(
            output_size, [-1, 1, 1]), boxes.dtype)
    scores = tf.reshape(tf.gather(tf.reshape(scores, [-1, 1]), idx),
                        [batch_size, max_output_size])
    scores = scores * tf.cast(
        tf.reshape(tf.range(max_output_size), [1, -1]) < tf.reshape(
            output_size, [-1, 1]), scores.dtype)
    classes = tf.reshape(tf.gather(tf.reshape(classes, [-1, 1]), idx),
                         [batch_size, max_output_size])
    classes = classes * tf.cast(
        tf.reshape(tf.range(max_output_size), [1, -1]) < tf.reshape(
            output_size, [-1, 1]), classes.dtype)
    return scores, boxes, classes
Beispiel #59
0
def conv2d(input_tensor, kernel, bias):
    kernel = np.transpose(kernel, [2, 3, 1, 0])
    x = tf.pad(input_tensor, [[0,0], [1,1], [1,1], [0,0]])
    x = tf.nn.conv2d(x, tf.constant(kernel), (1,1,1,1), 'VALID')
    x = tf.nn.bias_add(x, tf.constant(bias))
    return tf.nn.relu(x)
Beispiel #60
0
with tf.variable_scope("block_embedding"):
    block_embedding = tf.get_variable("block_embedding_b",
                                      [NUM_BLOCKS, block_emb_size],
                                      trainable=True)

set_block_embedding = tf_util.create_row_setter(block_embedding,
                                                "set_block_embedding")

# %%

with tf.variable_scope("conv1_1"):
    filter_bank = tf.get_variable("filter_bank",
                                  [2, 2, 2, block_emb_size, layer1_size],
                                  trainable=True)

voxels_padded = tf.pad(voxels % NUM_BLOCKS, [[0, 0], [2, 2], [2, 2], [2, 2]])

voxel_emb_1 = tf.nn.conv3d(tf.nn.embedding_lookup(block_embedding,
                                                  voxels_padded),
                           filter_bank,
                           strides=[1, 1, 1, 1, 1],
                           padding="VALID")

voxel_emb_1s = tf.nn.sigmoid(voxel_emb_1)

# %%

with tf.variable_scope("conv2_1"):
    filter_bank_2 = tf.get_variable("filter_bank",
                                    [4, 4, 4, layer1_size, layer2_size],
                                    trainable=True)