Ejemplo n.º 1
0
 def clip_boxes(self, boxes):
     """Clip boxes to fit in an image."""
     ymin, xmin, ymax, xmax = tf.unstack(boxes, axis=1)
     ymin = tf.clip_by_value(ymin, 0, self._output_size[0] - 1)
     xmin = tf.clip_by_value(xmin, 0, self._output_size[1] - 1)
     ymax = tf.clip_by_value(ymax, 0, self._output_size[0] - 1)
     xmax = tf.clip_by_value(xmax, 0, self._output_size[1] - 1)
     boxes = tf.stack([ymin, xmin, ymax, xmax], axis=1)
     return boxes
def _refine_motion_field(motion_field, layer):
    """Refines a motion field using features from another layer.

  This function builds an element of a UNet-like architecture. `motion_field`
  has a lower spatial resolution than `layer`. First motion_field is resized to
  `layer`'s spatial resolution using bilinear interpolation, then convolutional
  filters are applied on `layer` and the result is added to the upscaled
  `motion_field`.

  This scheme is inspired by FlowNet (https://arxiv.org/abs/1504.06852), and the
  realization that keeping the bottenecks at the same (low) dimension as the
  motion field will pressure the network to gradually transfer details from
  depth channels to space.

  The specifics are slightly different form FlowNet: We use two parallel towers,
  a 3x3 convolution, and two successive 3x3 convolutions, as opposed to one
  3x3 convolution in FLowNet. Also, we add the result to the upscaled
  `motion_field`, forming a residual connection, unlike FlowNet. These changes
  seemed to improve the depth prediction metrics, but exploration was far from
  exhaustive.

  Args:
    motion_field: a tf.Tensor of shape [B, h1, w1, m]. m is the number of
      dimensions in the motion field, for example, 3 in case of a 3D translation
      field.
    layer: tf.Tensor of shape [B, h2, w2, c].

  Returns:
    A tf.Tensor of shape [B, h2, w2, m], obtained by upscaling motion_field to
    h2, w2, and mixing it with layer using a few convolutions.

  """
    _, h, w, _ = tf.unstack(tf.shape(layer))
    upsampled_motion_field = tf.image.resize_bilinear(motion_field, [h, w])
    conv_input = tf.concat([upsampled_motion_field, layer], axis=3)
    conv_output = layers.conv2d(conv_input,
                                max(4,
                                    layer.shape.as_list()[-1]), [3, 3],
                                stride=1)
    conv_input = layers.conv2d(conv_input,
                               max(4,
                                   layer.shape.as_list()[-1]), [3, 3],
                               stride=1)
    conv_output2 = layers.conv2d(conv_input,
                                 max(4,
                                     layer.shape.as_list()[-1]), [3, 3],
                                 stride=1)
    conv_output = tf.concat([conv_output, conv_output2], axis=-1)

    return upsampled_motion_field + layers.conv2d(
        conv_output,
        motion_field.shape.as_list()[-1], [1, 1],
        stride=1,
        activation_fn=None,
        biases_initializer=None,
        scope=layer.op.name + '/MotionBottleneck')
Ejemplo n.º 3
0
  def unroll(self, actions, env_outputs, core_state):
    """Manual implementation of the network unroll."""
    _, _, done, _ = env_outputs

    torso_outputs = snt.BatchApply(self._torso)((actions, env_outputs))

    # Note, in this implementation we can't use CuDNN RNN to speed things up due
    # to the state reset. This can be XLA-compiled (LSTMBlockCell needs to be
    # changed to implement snt.LSTMCell).
    initial_core_state = self._core.zero_state(tf.shape(actions)[1], tf.float32)
    core_output_list = []
    for input_, d in zip(tf.unstack(torso_outputs), tf.unstack(done)):
      # If the episode ended, the core state should be reset before the next.
      core_state = nest.map_structure(
          functools.partial(tf.where, d), initial_core_state, core_state)
      core_output, core_state = self._core(input_, core_state)
      core_output_list.append(core_output)

    return snt.BatchApply(self._head)(tf.stack(core_output_list)), core_state
    def buildModel(self, rnn_layer, is_dynamic_rnn):
        """Build Mnist recognition model.

    Args:
      rnn_layer: The rnn layer either a single rnn cell or a multi rnn cell.
      is_dynamic_rnn: Use dynamic_rnn or not.

    Returns:
     A tuple containing:

     - Input tensor of the model.
     - Prediction tensor of the model.
     - Output class tensor of the model.
    """
        # Weights and biases for output softmax layer.
        out_weights = tf.Variable(
            tf.random.normal([self.num_units, self.n_classes]))
        out_bias = tf.Variable(tf.random.normal([self.n_classes]))

        # input image placeholder
        x = tf.compat.v1.placeholder("float",
                                     [None, self.time_steps, self.n_input],
                                     name="INPUT_IMAGE")

        # x is shaped [batch_size,time_steps,num_inputs]
        if is_dynamic_rnn:
            rnn_input = tf.transpose(x, perm=[1, 0, 2])
            outputs, _ = tf.compat.v1.lite.experimental.nn.dynamic_rnn(
                rnn_layer, rnn_input, dtype="float32")
            outputs = tf.unstack(outputs, axis=0)
        else:
            rnn_input = tf.unstack(x, self.time_steps, 1)
            outputs, _ = tf.compat.v1.nn.static_rnn(rnn_layer,
                                                    rnn_input,
                                                    dtype="float32")

        # Compute logits by multiplying outputs[-1] of shape [batch_size,num_units]
        # by the softmax layer's out_weight of shape [num_units,n_classes]
        # plus out_bias
        prediction = tf.matmul(outputs[-1], out_weights) + out_bias
        output_class = tf.nn.softmax(prediction, name="OUTPUT_CLASS")

        return x, prediction, output_class
Ejemplo n.º 5
0
def interpolate(features, hparams, decode_hp):
    """Interpolate between the first input frame and last target frame.

  Args:
    features: dict of tensors
    hparams: HParams, training hparams.
    decode_hp: HParams, decode hparams.
  Returns:
    images: interpolated images, 4-D Tensor, shape=(num_interp, H, W, C)
    first_frame: image, 3-D Tensor, shape=(1, H, W, C)
    last_frame: image, 3-D Tensor, shape=(1, H, W, C)
  """
    inputs, targets = features["inputs"], features["targets"]
    inputs = tf.unstack(inputs, axis=1)
    targets = tf.unstack(targets, axis=1)
    coeffs = np.linspace(0.0, 1.0, decode_hp.num_interp)

    # (X_1, X_t) -> (z_1, z_t)
    first_frame, last_frame = inputs[0], targets[-1]
    first_top_z, first_level_eps = frame_to_latents(first_frame, hparams)
    last_top_z, last_level_eps = frame_to_latents(last_frame, hparams)

    # Interpolate latents at all levels.
    first_lats = first_level_eps + [first_top_z]
    last_lats = last_level_eps + [last_top_z]
    interp_lats = []
    lat_iterator = enumerate(zip(first_lats, last_lats))
    for level_ind, (first_lat, last_lat) in lat_iterator:
        if level_ind in decode_hp.level_interp:
            if decode_hp.channel_interp == "all":
                interp_lat = glow_ops.linear_interpolate(
                    first_lat, last_lat, coeffs)
            else:
                interp_lat = glow_ops.linear_interpolate_rank(
                    first_lat, last_lat, coeffs, decode_hp.rank_interp)
        else:
            interp_lat = tf.tile(first_lat, [decode_hp.num_interp, 1, 1, 1])
        interp_lats.append(interp_lat)

    level_eps_interp = interp_lats[:hparams.n_levels - 1]
    z_top_interp = interp_lats[-1]
    images = latents_to_frames(z_top_interp, level_eps_interp, hparams)
    return images, first_frame, last_frame
Ejemplo n.º 6
0
def _using_motion_vector(depth, translation, rotation_angles, intrinsic_mat):
  """A helper for using_motion_vector. See docstring therein."""

  if translation.shape.ndims not in (2, 4):
    raise ValueError('\'translation\' should have rank 2 or 4, not %d' %
                     translation.shape.ndims)
  if translation.shape[-1] != 3:
    raise ValueError('translation\'s last dimension should be 3, not %d' %
                     translation.shape[1])
  if translation.shape.ndims == 2:
    translation = tf.expand_dims(tf.expand_dims(translation, 1), 1)

  _, height, width = tf.unstack(tf.shape(depth))
  grid = tf.squeeze(
      tf.stack(tf.meshgrid(tf.range(width), tf.range(height), (1,))), axis=3)
  grid = tf.to_float(grid)
  intrinsic_mat_inv = tf.linalg.inv(intrinsic_mat)

  rot_mat = transform_utils.matrix_from_angles(rotation_angles)
  # We have to treat separately the case of a per-image rotation vector and a
  # per-image rotation field, because the broadcasting capabilities of einsum
  # are limited.
  if rotation_angles.shape.ndims == 2:
    # The calculation here is identical to the one in inverse_warp above.
    # Howeverwe use einsum for better clarity. Under the hood, einsum performs
    # the reshaping and invocation of BatchMatMul, instead of doing it manually,
    # as in inverse_warp.
    projected_rotation = tf.einsum('bij,bjk,bkl->bil', intrinsic_mat, rot_mat,
                                   intrinsic_mat_inv)
    pcoords = tf.einsum('bij,jhw,bhw->bihw', projected_rotation, grid, depth)
  elif rotation_angles.shape.ndims == 4:
    # We push the H and W dimensions to the end, and transpose the rotation
    # matrix elements (as noted above).
    rot_mat = tf.transpose(rot_mat, [0, 3, 4, 1, 2])
    projected_rotation = tf.einsum('bij,bjkhw,bkl->bilhw', intrinsic_mat,
                                   rot_mat, intrinsic_mat_inv)
    pcoords = tf.einsum('bijhw,jhw,bhw->bihw', projected_rotation, grid, depth)

  projected_translation = tf.einsum('bij,bhwj->bihw', intrinsic_mat,
                                    translation)
  pcoords += projected_translation
  x, y, z = tf.unstack(pcoords, axis=1)
  return x / z, y / z, z
Ejemplo n.º 7
0
 def unit(hidden_memory_tuple):
     hidden_state, c_prev = tf.unstack(hidden_memory_tuple)
     # hidden_state : batch x hidden_dim
     for j in range(len(self.Wbo_list) // 2 - 1):
         hidden_state = tf.nn.relu(
             tf.nn.xw_plus_b(hidden_state, self.Wbo_list[2 * j],
                             self.Wbo_list[2 * j + 1]))
     logits = tf.nn.xw_plus_b(hidden_state, self.Wbo_list[-2],
                              self.Wbo_list[-1])
     return logits
Ejemplo n.º 8
0
        def unit(hidden_memory_tuple):
            hidden_state, c_prev = tf.unstack(hidden_memory_tuple)
            hidden_state = tf.nn.relu(tf.nn.xw_plus_b(hidden_state, self.Wbo_list[0], self.Wbo_list[1]))
            for i in range(num_highway):
                tran = tf.nn.relu(tf.nn.xw_plus_b(hidden_state, self.Wbo_list[2 + 4 * i], self.Wbo_list[3 + 4 * i]))
                gate = tf.nn.sigmoid(tf.nn.xw_plus_b(hidden_state, self.Wbo_list[4 + 4 * i], self.Wbo_list[5 + 4 * i]))
                hidden_state = tran * gate + (1. - gate) * hidden_state

            rewards = tf.nn.sigmoid(tf.nn.xw_plus_b(hidden_state, self.Wbo_list[-2], self.Wbo_list[-1]))
            return rewards
Ejemplo n.º 9
0
def _lstm(x, prev_c, prev_h, w_lstm, layer_masks):
    """Multi-layer LSTM.

  Args:
    x: [batch_size, num_steps, hidden_size].
    prev_c: [[batch_size, hidden_size] * num_layers].
    prev_h: [[batch_size, hidden_size] * num_layers].
    w_lstm: [[2 * hidden_size, 4 * hidden_size] * num_layers].
    layer_masks: [([hidden_size, hidden_size] or None)* num_layers].

  Returns:
    next_c: [[batch_size, hidden_size] * num_layers].
    next_h: [[batch_size, hidden_size] * num_layers].
    all_h: [batch_size, num_steps, hidden_size].
  """
    _, num_steps, _ = tf.unstack(tf.shape(x))
    num_layers = len(w_lstm)

    all_h = [
        tf.TensorArray(dtype=tf.float32, size=num_steps, infer_shape=False)
        for _ in range(num_layers)
    ]

    def _condition(step, *unused_args):
        return tf.less(step, num_steps)

    def _body(step, pprev_c, pprev_h, all_h):
        """Apply LSTM at each step."""
        next_c, next_h = [], []
        for layer_id, (p_c, p_h, w, m) in enumerate(
                zip(pprev_c, pprev_h, w_lstm, layer_masks)):
            inp = x[:, step, :] if layer_id == 0 else next_h[-1]
            if m is not None:
                inp *= m
            ifog = tf.matmul(tf.concat([inp, p_h], axis=1), w)
            i, f, o, g = tf.split(ifog, 4, axis=1)
            i = tf.sigmoid(i)
            f = tf.sigmoid(f)
            o = tf.sigmoid(o)
            g = tf.tanh(g)
            c = i * g + f * p_c
            h = o * tf.tanh(c)
            all_h[layer_id] = all_h[layer_id].write(step, h)
            next_c.append(c)
            next_h.append(h)
        return step + 1, next_c, next_h, all_h

    loop_inps = [tf.constant(0, dtype=tf.int32), prev_c, prev_h, all_h]
    _, next_c, next_h, all_h = tf.while_loop(_condition,
                                             _body,
                                             loop_inps,
                                             parallel_iterations=1)
    all_h = [tf.transpose(h.stack(), [1, 0, 2]) for h in all_h]

    return next_c, next_h, all_h
Ejemplo n.º 10
0
def transformCropImage(opt, image, pMtrx):
    with tf.name_scope("transformImage"):
        refMtrx = tf.tile(tf.expand_dims(opt.refMtrx_b, axis=0),
                          [opt.batchSize, 1, 1])
        transMtrx = tf.matmul(refMtrx, pMtrx)
        # warp the canonical coordinates
        X, Y = np.meshgrid(np.linspace(-1, 1, opt.W),
                           np.linspace(-1, 1, opt.H))
        X, Y = X.flatten(), Y.flatten()
        XYhom = np.stack([X, Y, np.ones_like(X)], axis=1).T
        XYhom = np.tile(XYhom, [opt.batchSize, 1, 1]).astype(np.float32)
        XYwarpHom = tf.matmul(transMtrx, XYhom)
        XwarpHom, YwarpHom, ZwarpHom = tf.unstack(XYwarpHom, axis=1)
        Xwarp = tf.reshape(XwarpHom / (ZwarpHom + 1e-8),
                           [opt.batchSize, opt.H, opt.W])
        Ywarp = tf.reshape(YwarpHom / (ZwarpHom + 1e-8),
                           [opt.batchSize, opt.H, opt.W])
        # get the integer sampling coordinates
        Xfloor, Xceil = tf.floor(Xwarp), tf.ceil(Xwarp)
        Yfloor, Yceil = tf.floor(Ywarp), tf.ceil(Ywarp)
        XfloorInt, XceilInt = tf.to_int32(Xfloor), tf.to_int32(Xceil)
        YfloorInt, YceilInt = tf.to_int32(Yfloor), tf.to_int32(Yceil)
        imageIdx = np.tile(
            np.arange(opt.batchSize).reshape([opt.batchSize, 1, 1]),
            [1, opt.H, opt.W])
        imageVec = tf.reshape(image, [-1, 3])
        imageVecOut = tf.concat([imageVec, tf.zeros([1, 3])], axis=0)
        idxUL = (imageIdx * opt.dataH + YfloorInt) * opt.dataW + XfloorInt
        idxUR = (imageIdx * opt.dataH + YfloorInt) * opt.dataW + XceilInt
        idxBL = (imageIdx * opt.dataH + YceilInt) * opt.dataW + XfloorInt
        idxBR = (imageIdx * opt.dataH + YceilInt) * opt.dataW + XceilInt
        idxOutside = tf.fill([opt.batchSize, opt.H, opt.W],
                             opt.batchSize * opt.dataH * opt.dataW)

        def insideIm(Xint, Yint):
            return (Xint >= 0) & (Xint < opt.dataW) & (Yint >= 0) & (Yint <
                                                                     opt.dataH)

        idxUL = tf.where(insideIm(XfloorInt, YfloorInt), idxUL, idxOutside)
        idxUR = tf.where(insideIm(XceilInt, YfloorInt), idxUR, idxOutside)
        idxBL = tf.where(insideIm(XfloorInt, YceilInt), idxBL, idxOutside)
        idxBR = tf.where(insideIm(XceilInt, YceilInt), idxBR, idxOutside)
        # bilinear interpolation
        Xratio = tf.reshape(Xwarp - Xfloor, [opt.batchSize, opt.H, opt.W, 1])
        Yratio = tf.reshape(Ywarp - Yfloor, [opt.batchSize, opt.H, opt.W, 1])
        imageUL = tf.to_float(tf.gather(imageVecOut,
                                        idxUL)) * (1 - Xratio) * (1 - Yratio)
        imageUR = tf.to_float(tf.gather(imageVecOut,
                                        idxUR)) * (Xratio) * (1 - Yratio)
        imageBL = tf.to_float(tf.gather(imageVecOut,
                                        idxBL)) * (1 - Xratio) * (Yratio)
        imageBR = tf.to_float(tf.gather(imageVecOut,
                                        idxBR)) * (Xratio) * (Yratio)
        imageWarp = imageUL + imageUR + imageBL + imageBR
    return imageWarp
Ejemplo n.º 11
0
def time_distributed(incoming, fn, args=None, scope=None):
    """ Time Distributed.

    This layer applies a function to every timestep of the input tensor. The
    custom function first argument must be the input tensor at every timestep.
    Additional parameters for the custom function may be specified in 'args'
    argument (as a list).

    Examples:
        ```python
        # Applying a fully_connected layer at every timestep
        x = time_distributed(input_tensor, fully_connected, [64])

        # Using a conv layer at every timestep with a scope
        x = time_distributed(input_tensor, conv_2d, [64, 3], scope='tconv')
        ```

    Input:
        (3+)-D Tensor [samples, timestep, input_dim].

    Output:
        (3+)-D Tensor [samples, timestep, output_dim].

    Arguments:
        incoming: `Tensor`. The incoming tensor.
        fn: `function`. A function to apply at every timestep. This function
            first parameter must be the input tensor per timestep. Additional
            parameters may be specified in 'args' argument.
        args: `list`. A list of parameters to use with the provided function.
        scope: `str`. A scope to give to each timestep tensor. Useful when
            sharing weights. Each timestep tensor scope will be generated
            as 'scope'-'i' where i represents the timestep id. Note that your
            custom function will be required to have a 'scope' parameter.

    Returns:
        A Tensor.

    """
    if not args: args = list()
    assert isinstance(args, list), "'args' must be a list."

    if not isinstance(incoming, tf.Tensor):
        incoming = tf.transpose(tf.stack(incoming), [1, 0, 2])

    input_shape = utils.get_incoming_shape(incoming)
    timestep = input_shape[1]
    x = tf.unstack(incoming, axis=1)
    if scope:
        x = [fn(x[i], scope=scope+'-'+str(i), *args)
             for i in range(timestep)]
    else:
        x = [fn(x[i], *args) for i in range(timestep)]

    x = list(map(lambda t: tf.reshape(t, [-1, 1]+utils.get_incoming_shape(t)[1:]), x))
    return tf.concat(x, 1)
Ejemplo n.º 12
0
def _match_templates(net_z, net_x, params_names_list, params_values_list):
    # finalize network
    # z, x are [B, H, W, C]
    net_z = tf.transpose(net_z, perm=[1, 2, 0, 3])
    net_x = tf.transpose(net_x, perm=[1, 2, 0, 3])
    # z, x are [H, W, B, C]
    Hz, Wz, B, C = tf.unstack(tf.shape(net_z))
    Hx, Wx, Bx, Cx = tf.unstack(tf.shape(net_x))
    # assert B==Bx, ('Z and X should have same Batch size')
    # assert C==Cx, ('Z and X should have same Channels number')
    net_z = tf.reshape(net_z, (Hz, Wz, B * C, 1))
    net_x = tf.reshape(net_x, (1, Hx, Wx, B * C))
    net_final = tf.nn.depthwise_conv2d(net_x,
                                       net_z,
                                       strides=[1, 1, 1, 1],
                                       padding='VALID')
    #candidates
    candidates = net_final
    # final is [1, Hf, Wf, BC]
    net_final = tf.concat(tf.split(net_final, 3, axis=3), axis=0)
    # final is [B, Hf, Wf, C]
    net_final = tf.expand_dims(tf.reduce_sum(net_final, axis=3), axis=3)
    # final is [B, Hf, Wf, 1]
    if _bnorm_adjust:
        bn_beta = params_values_list[params_names_list.index('fin_adjust_bnb')]
        bn_gamma = params_values_list[params_names_list.index(
            'fin_adjust_bnm')]
        bn_moments = params_values_list[params_names_list.index(
            'fin_adjust_bnx')]
        bn_moving_mean = bn_moments[:, 0]
        bn_moving_variance = bn_moments[:, 1]**2
        net_final = tf.layers.batch_normalization(
            net_final,
            beta_initializer=tf.constant_initializer(bn_beta),
            gamma_initializer=tf.constant_initializer(bn_gamma),
            moving_mean_initializer=tf.constant_initializer(bn_moving_mean),
            moving_variance_initializer=tf.constant_initializer(
                bn_moving_variance),
            training=False,
            trainable=False)

    return net_final, candidates
Ejemplo n.º 13
0
 def _make_random_serialized_jpeg_images(self, num_frames, image_height,
                                         image_width):
   images = tf.cast(tf.random.uniform(
       [num_frames, image_height, image_width, 3],
       maxval=256,
       dtype=tf.int32), dtype=tf.uint8)
   images_list = tf.unstack(images, axis=0)
   encoded_images_list = [tf.io.encode_jpeg(image) for image in images_list]
   with tf.Session() as sess:
     encoded_images = sess.run(encoded_images_list)
   return encoded_images
Ejemplo n.º 14
0
def get_RNN_from_words(model, word_idxs, reuse, scope=None):
    with variable_scope.variable_scope(scope or 'RNN_abstraction',
                                       reuse=reuse):
        # get mean word vectors
        word_vecs = tf.nn.embedding_lookup(model.word_emb, word_idxs)
        cell = tf.contrib.rnn.GRUCell(model.embed_size)
        encoder_outputs, encoder_state = tf.nn.static_rnn(cell,
                                                          tf.unstack(word_vecs,
                                                                     axis=1),
                                                          dtype=dtypes.float32)
        return encoder_state, [word_vecs]
Ejemplo n.º 15
0
def concat_tensor_display(tensor, axisToConcat, axisToSplit=3):
    tensors_list = tf.unstack(
        tensor,
        axis=axisToSplit)  #4 * [batch, 256,256,3] Might need to use split
    #if tensors_list[0].get_shape()[1] == 1:
    #    tensors_list = [tf.squeeze (tensor, axis = 1) for tensor in tensors_list]

    tensors = tf.concat(tensors_list,
                        axis=axisToConcat)  #[batch, 256, 256 * 4, 3]

    return tensors
 def graph_fn():
     images = tf.cast(tf.random.uniform(
         [num_frames, image_height, image_width, 3],
         maxval=256,
         dtype=tf.int32),
                      dtype=tf.uint8)
     images_list = tf.unstack(images, axis=0)
     encoded_images = [
         tf.io.encode_jpeg(image) for image in images_list
     ]
     return encoded_images
Ejemplo n.º 17
0
def tensormul(t1, t2):
    """Basically matmul, but t1 can have more dimensions than t2."""
    dim1 = t1.get_shape().as_list()[-1]
    dim2 = t2.get_shape().as_list()[-1]
    result_shape_tensors = tf.unstack(tf.shape(t1))
    result_shape_tensors[-1] = dim2
    result_shape_tensor = tf.stack(result_shape_tensors)
    t1 = tf.reshape(t1, [-1, dim1])
    result = tf.matmul(t1, t2)
    result = tf.reshape(result, result_shape_tensors)
    return result
Ejemplo n.º 18
0
def RNN(X, weights, biases):#输入,权重,偏置
    X = tf.reshape(X, [-1, 28])#(128 * 28, 28 )
    X_in = tf.matmul(X, weights['in']) + biases['in']#(128 * 28, 128 )
    X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_units])#(128 ,28, 128 )#时间序列
    cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden_units, forget_bias=1.0, state_is_tuple=True)
    init_state = cell.zero_state(batch_size=batch_size, dtype=tf.float32)
    outputs, final_state = tf.nn.dynamic_rnn(cell, X_in, initial_state=init_state, time_major=False)#time_major代表时间序列
    outputs = tf.unstack(tf.transpose(outputs, [1, 0, 2]))
    results = tf.matmul(outputs[-1], weights['out']) + biases['out']
    #results = tf.matmul(final_state[1], weights['out']) + biases['out']#应该是仅仅考虑了短期记忆
    return results
Ejemplo n.º 19
0
 def host_call_fn(step, scalar_values):
     values = tf.unstack(scalar_values)
     with tf2.summary.create_file_writer(
             params['checkpoint_dir']).as_default():
         with tf2.summary.record_if(
                 tf.math.equal(step[0] % params['tpu_iterations_per_loop'],
                               0)):
             for key, value in zip(list(tensorboard_scalars.keys()),
                                   values):
                 tf2.summary.scalar(key, value, step=step[0])
             return tf.summary.all_v2_summary_ops()
Ejemplo n.º 20
0
 def crop_image(image_bytes, label):
   """Preprocess the image."""
   shape = tf.shape(image_bytes)
   bbox = tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4])
   bbox_begin, bbox_size, _ = tf.image.sample_distorted_bounding_box(
       shape,
       bbox,
       min_object_covered=0.1,
       aspect_ratio_range=(0.75, 1.33),
       area_range=(0.08, 1.0),
       max_attempts=10,
       use_image_if_no_bounding_boxes=True)
   offset_y, offset_x, _ = tf.unstack(bbox_begin)
   target_height, target_width, _ = tf.unstack(bbox_size)
   image = tf.image.crop_to_bounding_box(image_bytes, offset_y, offset_x,
                                         target_height, target_width)
   image = tf.image.resize_bicubic([image], [image_size, image_size])[0]
   image = tf.image.random_flip_left_right(image)
   image = tf.reshape(image, [image_size, image_size, 3])
   return tf.image.convert_image_dtype(image, dtype), label
Ejemplo n.º 21
0
def gather_nd(params, indices, name=None):
    shape = params.get_shape().as_list()
    rank = len(shape)
    flat_params = tf.reshape(params, [-1])
    multipliers = [
        reduce(lambda x, y: x * y, shape[i + 1:], 1) for i in range(0, rank)
    ]
    indices_unpacked = tf.unstack(
        tf.transpose(indices, [rank - 1] + range(0, rank - 1), name))
    flat_indices = sum([a * b for a, b in zip(multipliers, indices_unpacked)])
    return tf.gather(flat_params, flat_indices, name=name)
Ejemplo n.º 22
0
def reinforce_loss(disc_logits, bert_scores, gen_logprobs, gamma, decay):
  """The REINFORCE loss.

  Args:
      disc_logits: float tensor, shape [batch_size, sequence_length].
      gen_logprobs: float32 tensor, shape [batch_size, sequence_length]
      gamma: a float, discount factor for cumulative reward.
      decay: a float, decay rate for the EWMA baseline of REINFORCE.

  Returns:
    Float tensor, shape [batch_size, sequence_length], the REINFORCE loss for
    each timestep.
  """
  # Assume 1 logit for each timestep.
  batch_size, sequence_length = disc_logits.shape.as_list()
  gen_logprobs.shape.assert_is_compatible_with([batch_size, sequence_length])

  disc_predictions = tf.nn.sigmoid(disc_logits)
  bert_predictions = tf.nn.sigmoid(bert_scores)

  # MaskGAN uses log(D), but this is more stable empirically.
  # rewards = 1.5 * disc_predictions + 0.5 * bert_predictions - 1
  rewards = 2 * bert_predictions - 1            # Test whether bert_predictions change with different inputs

  # Compute cumulative rewards.
  rewards_list = tf.unstack(rewards, axis=1)
  cumulative_rewards = []

  # Cheng: the reward of each step is a discounted sum of all following rewards.
  # Actually calculation can be improved
  for t in xrange(sequence_length):
    cum_value = tf.zeros(shape=[batch_size])
    for s in xrange(t, sequence_length):
      cum_value += np.power(gamma, (s - t)) * rewards_list[s]
    cumulative_rewards.append(cum_value)
  cumulative_rewards = tf.stack(cumulative_rewards, axis=1)

  cumulative_rewards.shape.assert_is_compatible_with(
      [batch_size, sequence_length])

  with tf.variable_scope("reinforce", reuse=tf.AUTO_REUSE):
    ewma_reward = tf.get_variable("ewma_reward", initializer=0.0)

  mean_reward = tf.reduce_mean(cumulative_rewards)
  new_ewma_reward = decay * ewma_reward + (1.0 - decay) * mean_reward
  update_op = tf.assign(ewma_reward, new_ewma_reward)

  # REINFORCE
  with tf.control_dependencies([update_op]):
    advantage = cumulative_rewards - ewma_reward
    loss = -tf.stop_gradient(advantage) * gen_logprobs

  loss.shape.assert_is_compatible_with([batch_size, sequence_length])
  return loss, cumulative_rewards, ewma_reward
Ejemplo n.º 23
0
def cdna_transformation(prev_image, cdna_input, num_masks, color_channels,
                        dna_kernel_size, relu_shift):
    """Apply convolutional dynamic neural advection to previous image.

  Args:
    prev_image: previous image to be transformed.
    cdna_input: hidden lyaer to be used for computing CDNA kernels.
    num_masks: number of masks and hence the number of CDNA transformations.
    color_channels: the number of color channels in the images.
    dna_kernel_size: dna kernel size.
    relu_shift: shift for ReLU function.

  Returns:
    List of images transformed by the predicted CDNA kernels.
  """
    batch_size = tf.shape(cdna_input)[0]
    height = int(prev_image.get_shape()[1])
    width = int(prev_image.get_shape()[2])

    # Predict kernels using linear function of last hidden layer.
    cdna_kerns = tfl.dense(cdna_input,
                           dna_kernel_size * dna_kernel_size * num_masks,
                           name="cdna_params",
                           activation=None)

    # Reshape and normalize.
    cdna_kerns = tf.reshape(
        cdna_kerns,
        [batch_size, dna_kernel_size, dna_kernel_size, 1, num_masks])
    cdna_kerns = (tf.nn.relu(cdna_kerns - relu_shift) + relu_shift)
    norm_factor = tf.reduce_sum(cdna_kerns, [1, 2, 3], keep_dims=True)
    cdna_kerns /= norm_factor

    # Treat the color channel dimension as the batch dimension since the same
    # transformation is applied to each color channel.
    # Treat the batch dimension as the channel dimension so that
    # depthwise_conv2d can apply a different transformation to each sample.
    cdna_kerns = tf.transpose(cdna_kerns, [1, 2, 0, 4, 3])
    cdna_kerns = tf.reshape(
        cdna_kerns, [dna_kernel_size, dna_kernel_size, batch_size, num_masks])
    # Swap the batch and channel dimensions.
    prev_image = tf.transpose(prev_image, [3, 1, 2, 0])

    # Transform image.
    transformed = tf.nn.depthwise_conv2d(prev_image, cdna_kerns, [1, 1, 1, 1],
                                         "SAME")

    # Transpose the dimensions to where they belong.
    transformed = tf.reshape(
        transformed, [color_channels, height, width, batch_size, num_masks])
    transformed = tf.transpose(transformed, [3, 1, 2, 0, 4])
    transformed = tf.unstack(transformed, axis=-1)
    return transformed
Ejemplo n.º 24
0
        def step(hprev, x):
            st_1, ct_1 = tf.unstack(hprev)

            fc_gate = tf.matmul(weight, tf.transpose(tf.concat([x, st_1], -1)))
            fc_gate = tf.transpose(fc_gate) + bias
            i, f, g, o = tf.split(fc_gate, 4, axis=1)
            i, f, g, o = tf.sigmoid(i), tf.sigmoid(f), tf.tanh(g), tf.sigmoid(
                o)
            ct = ct_1 * f + g * i
            st = tf.tanh(ct) * o

            return tf.stack([st, ct])
Ejemplo n.º 25
0
def registerTensorboard(paths, images, nbInputsMax, nbTargets, loss_value,
                        batch_size, targetsRenderings, outputsRenderings):
    inputs = images[0]
    targets = images[1]
    outputs = images[2]

    targetsList = tf.split(targets, batch_size, axis=0)
    inputsList = tf.split(inputs, batch_size, axis=0)
    #print(targetsList[0])
    #inputsList[0] = tf.Print(inputsList[0], [tf.reduce_mean(inputsList[0])], "mean of inputs 0")
    tf.summary.image("targets", targetsList[0], max_outputs=nbTargets)
    tf.summary.image("inputs", inputsList[0], max_outputs=nbInputsMax)
    tf.summary.image("outputs", outputs, max_outputs=nbTargets)
    tf.summary.scalar("loss", loss_value)
    #targetsRenderings is [batchSize,nbRenderings, 256, 256, 3]
    tf.summary.image("targets renderings",
                     tf.unstack(tf.log(targetsRenderings[0] + 0.1), axis=0),
                     max_outputs=9)
    tf.summary.image("outputs renderings",
                     tf.unstack(tf.log(outputsRenderings[0] + 0.1), axis=0),
                     max_outputs=9)
Ejemplo n.º 26
0
def relevants(inputs, lengths, is_concat=True):
    '''从rnn的输出中取出最后一个词的输出

    Args:
        :type inputs: tensor, shape = [batch_size, time_step, edim]
        :param inputs: rnn的输出,

        :type lengths: tensor, shape = [batch_size]
        :param lengths: rnn中各个输入的真实长度

    Returns:
        一个shape=[batch_size * real_len, edim]的张量,第0维的每个元素是对应的Rnn的最后的输出。
    '''
    ips = tf.unstack(inputs, axis=0)
    lens = tf.unstack(lengths, axis=0)
    reles = []
    for i in range(len(lens)):
        reles.append(tf.gather(ips[i], tf.range(0, lens[i])))
    if is_concat:
        reles = tf.concat(reles, axis=0)
    return reles
Ejemplo n.º 27
0
        def calibration_fn(class_predictions_with_background):
            """Calibrate predictions per class via 1-d linear interpolation.

      Prediction scores are linearly interpolated with class-specific function
      approximations. Note that after calibration, an anchor's class scores will
      not necessarily sum to 1, and score ordering may change, depending on each
      class' calibration parameters.

      Args:
        class_predictions_with_background: tf.float32 tensor of shape
          [batch_size, num_anchors, num_classes + 1] containing scores on the
          interval [0,1]. This is usually produced by a sigmoid or softmax layer
          and the result of calling the `predict` method of a detection model.

      Returns:
        tf.float32 tensor of the same shape as the input with values on the
        interval [0, 1].

      Raises:
        KeyError: Calibration parameters are not present for a class.
      """
            class_id_function_dict = _get_class_id_function_dict(
                calibration_config)

            # Tensors are split by class and then recombined at the end to recover
            # the input's original shape. If a class id does not have calibration
            # parameters, it is left unchanged.
            class_tensors = tf.unstack(class_predictions_with_background,
                                       axis=-1)
            calibrated_class_tensors = []
            for class_id, class_tensor in enumerate(class_tensors):
                flat_class_tensor = tf.reshape(class_tensor, shape=[-1])
                if class_id in class_id_function_dict:
                    output_tensor = _tf_linear_interp1d(
                        x_to_interpolate=flat_class_tensor,
                        fn_x=class_id_function_dict[class_id][0],
                        fn_y=class_id_function_dict[class_id][1])
                else:
                    tf.logging.info(
                        'Calibration parameters for class id `%d` not not found',
                        class_id)
                    output_tensor = flat_class_tensor
                calibrated_class_tensors.append(output_tensor)

            combined_calibrated_tensor = tf.stack(calibrated_class_tensors,
                                                  axis=1)
            input_shape = shape_utils.combined_static_and_dynamic_shape(
                class_predictions_with_background)
            calibrated_class_predictions_with_background = tf.reshape(
                combined_calibrated_tensor,
                shape=input_shape,
                name='calibrate_scores')
            return calibrated_class_predictions_with_background
Ejemplo n.º 28
0
    def get_extra_loss(self,
                       latent_means=None,
                       latent_stds=None,
                       true_frames=None,
                       gen_frames=None):
        """Gets extra loss from VAE and GAN."""
        if not self.is_training:
            return 0.0

        vae_loss, d_vae_loss, d_gan_loss = 0.0, 0.0, 0.0
        # Use sv2p's KL divergence computation.
        if self.hparams.use_vae:
            vae_loss = super(NextFrameSavpBase,
                             self).get_extra_loss(latent_means=latent_means,
                                                  latent_stds=latent_stds)

        if self.hparams.use_gan:
            # Strip out the first context_frames for the true_frames
            # Strip out the first context_frames - 1 for the gen_frames
            context_frames = self.hparams.video_num_input_frames
            true_frames = tf.stack(
                tf.unstack(true_frames, axis=0)[context_frames:])

            # discriminator for VAE.
            if self.hparams.use_vae:
                gen_enc_frames = tf.stack(
                    tf.unstack(gen_frames, axis=0)[context_frames - 1:])
                d_vae_loss = self.get_gan_loss(true_frames,
                                               gen_enc_frames,
                                               name="vae")

            # discriminator for GAN.
            gen_prior_frames = tf.stack(
                tf.unstack(self.gen_prior_video, axis=0)[context_frames - 1:])
            d_gan_loss = self.get_gan_loss(true_frames,
                                           gen_prior_frames,
                                           name="gan")

        return (vae_loss + self.hparams.gan_loss_multiplier * d_gan_loss +
                self.hparams.gan_vae_loss_multiplier * d_vae_loss)
    def __call__(self, input_cls, input_reg, is_training=True):
        output_conf = input_cls
        output_bbox = input_reg

        output_conf = self.cls_conv_1(output_conf, is_training=is_training)
        output_conf = self.cls_conv_2(output_conf, is_training=is_training)
        output_conf = self.cls_conv_3(output_conf, is_training=is_training)
        output_bbox = self.reg_conv_1(output_bbox, is_training=is_training)
        output_bbox = self.reg_conv_2(output_bbox, is_training=is_training)
        output_bbox = self.reg_conv_3(output_bbox, is_training=is_training)

        cls_score = self.cls_score(output_conf, is_training=is_training)
        with tf.name_scope('cls_score'):
            cls_B, cls_H, cls_W, cls_C = tf.unstack(
                tf.shape(cls_score, name='shape'))
            cls_score = tf.reshape(cls_score, (cls_B, cls_H * cls_W, cls_C),
                                   name='reshape')

        ctr_score = self.ctr_score(output_conf, is_training=is_training)
        with tf.name_scope('ctr_score'):
            ctr_B, ctr_H, ctr_W, ctr_C = tf.unstack(
                tf.shape(ctr_score, name='shape'))
            ctr_score = tf.reshape(ctr_score, (ctr_B, ctr_H * ctr_W, ctr_C),
                                   name='reshape')

        offset = self.offset(output_bbox, is_training=is_training)
        with tf.name_scope('offset'):
            offset = (self.si * offset + self.bi)
            offset = tf.exp(offset, name='exp') * self.total_stride

            offset_B, offset_H, offset_W, offset_C = tf.unstack(
                tf.shape(offset, name='shape'))
            offset = tf.reshape(offset,
                                (offset_B, offset_H * offset_W, offset_C),
                                name='reshape')
            xy0 = self.fm_ctr - offset[:, :, 0:2]
            xy1 = self.fm_ctr + offset[:, :, 2:]
            bbox = tf.concat([xy0, xy1], axis=2)

        return cls_score, ctr_score, bbox
Ejemplo n.º 30
0
def _clamp_and_filter_result(pixel_x, pixel_y, z):
    """Clamps and masks out out-of-bounds pixel coordinates.
    Args:
      pixel_x: a tf.Tensor containing x pixel coordinates in an image.
      pixel_y: a tf.Tensor containing y pixel coordinates in an image.
      z: a tf.Tensor containing the depth ar each (pixel_y, pixel_x)  All shapes
        are [B, H, W].
    Returns:
      pixel_x, pixel_y, mask, where pixel_x and pixel_y are the original ones,
      except:
      - Values that fall out of the image bounds, which are [0, W-1) in x and
        [0, H-1) in y, are clamped to the bounds
      - NaN values in pixel_x, pixel_y are replaced by zeros
      mask is False at allpoints where:
      - Clamping in pixel_x or pixel_y was performed
      - NaNs were replaced by zeros
      - z is non-positive,
      and True everywhere else, that is, where pixel_x, pixel_y are finite and
      fall within the frame.
    """
    with tf.name_scope("Clamp", values=[pixel_x, pixel_y, z]):
        _, height, width = tf.unstack(tf.shape(pixel_x))

        def _tensor(x):
            return tf.to_float(tf.convert_to_tensor(x))

        x_not_underflow = pixel_x >= 0.0
        y_not_underflow = pixel_y >= 0.0
        x_not_overflow = pixel_x < _tensor(width - 1)
        y_not_overflow = pixel_y < _tensor(height - 1)
        z_positive = z > 0.0
        x_not_nan = tf.math.logical_not(tf.is_nan(pixel_x))
        y_not_nan = tf.math.logical_not(tf.is_nan(pixel_y))
        not_nan = tf.logical_and(x_not_nan, y_not_nan)
        not_nan_mask = tf.to_float(not_nan)
        pixel_x *= not_nan_mask
        pixel_y *= not_nan_mask
        pixel_x = tf.clip_by_value(pixel_x, 0.0, _tensor(width - 1))
        pixel_y = tf.clip_by_value(pixel_y, 0.0, _tensor(height - 1))
        mask_stack = tf.stack(
            [
                x_not_underflow,
                y_not_underflow,
                x_not_overflow,
                y_not_overflow,
                z_positive,
                not_nan,
            ],
            axis=0,
        )
        mask = tf.reduce_all(mask_stack, axis=0)
        return pixel_x, pixel_y, mask