Example #1
0
 def _create_model(self, train_triples):
     # Count unique items to determine embedding matrix sizes
     entity_cnt = len(set(train_triples[:,0]).union(train_triples[:,2]))
     rel_cnt = len(set(train_triples[:,1]))
     init_sd = 1.0 / np.sqrt(self.embedding_size)
     # Embedding variables for all entities and relationship types
     entity_embedding_shape = [entity_cnt, self.embedding_size]
     # Relationship embeddings will be stored in flattened format to make 
     # applying maxnorm constraints easier
     rel_embedding_shape = [rel_cnt, self.embedding_size * self.embedding_size]
     entity_init = tf.truncated_normal(entity_embedding_shape, stddev=init_sd)
     rel_init = tf.truncated_normal(rel_embedding_shape, stddev=init_sd)
     if self.maxnorm is not None:
         # Ensure maxnorm constraints are initially satisfied
         entity_init = dense_maxnorm(entity_init, self.maxnorm)
         rel_init = dense_maxnorm(rel_init, self.maxnorm)
     self.entity_embedding_vars = tf.Variable(entity_init)
     self.rel_embedding_vars = tf.Variable(rel_init)
     # Embedding layer for each (head, rel, tail) triple being fed in as input
     head_embed = tf.nn.embedding_lookup(self.entity_embedding_vars, self.head_input)
     tail_embed = tf.nn.embedding_lookup(self.entity_embedding_vars, self.tail_input)
     rel_embed = tf.nn.embedding_lookup(self.rel_embedding_vars, self.rel_input)
     # Reshape rel_embed into square D x D matrices
     rel_embed_square = tf.reshape(rel_embed, (-1, self.embedding_size, self.embedding_size))
     # Reshape head_embed and tail_embed to be suitable for the matrix multiplication
     head_embed_row = tf.expand_dims(head_embed, 1) # embeddings as row vectors
     tail_embed_col = tf.expand_dims(tail_embed, 2) # embeddings as column vectors
     head_rel_mult = tf.batch_matmul(head_embed_row, rel_embed_square)
     # Output needs a squeeze into a 1d vector
     raw_output = tf.squeeze(tf.batch_matmul(head_rel_mult, tail_embed_col)) 
     self.output, self.loss = self._create_output_and_loss(raw_output)
     # Optimization
     self.train_step = self.opt.minimize(self.loss)
     if self.maxnorm is not None:
         # Post-processing to limit embedding vars to L2 ball
         rel_maxnorm = self.maxnorm * self.rel_maxnorm_mult
         unique_ent_indices = tf.unique(tf.concat(0, [self.head_input, self.tail_input]))[0]
         unique_rel_indices = tf.unique(self.rel_input)[0]
         entity_constraint = self._norm_constraint_op(self.entity_embedding_vars, 
                                                      unique_ent_indices, 
                                                      self.maxnorm)
         rel_constraint = self._norm_constraint_op(self.rel_embedding_vars, 
                                                   unique_rel_indices, 
                                                   rel_maxnorm)
         self.post_step = [entity_constraint, rel_constraint]
Example #2
0
def nearest_neighbor_features_per_object(
    reference_embeddings, query_embeddings, reference_labels,
    max_neighbors_per_object, k_nearest_neighbors, gt_ids=None, n_chunks=100):
  """Calculates the distance to the nearest neighbor per object.

  For every pixel of query_embeddings calculate the distance to the
  nearest neighbor in the (possibly subsampled) reference_embeddings per object.

  Args:
    reference_embeddings: Tensor of shape [height, width, embedding_dim],
      the embedding vectors for the reference frame.
    query_embeddings: Tensor of shape [n_query_images, height, width,
      embedding_dim], the embedding vectors for the query frames.
    reference_labels: Tensor of shape [height, width, 1], the class labels of
      the reference frame.
    max_neighbors_per_object: Integer, the maximum number of candidates
      for the nearest neighbor query per object after subsampling,
      or 0 for no subsampling.
    k_nearest_neighbors: Integer, the number of nearest neighbors to use.
    gt_ids: Int tensor of shape [n_objs] of the sorted unique ground truth
      ids in the first frame. If None, it will be derived from
      reference_labels.
    n_chunks: Integer, the number of chunks to use to save memory
      (set to 1 for no chunking).

  Returns:
    nn_features: A float32 tensor of nearest neighbor features of shape
      [n_query_images, height, width, n_objects, feature_dim].
    gt_ids: An int32 tensor of the unique sorted object ids present
      in the reference labels.
  """
  with tf.name_scope('nn_features_per_object'):
    reference_labels_flat = tf.reshape(reference_labels, [-1])
    if gt_ids is None:
      ref_obj_ids, _ = tf.unique(reference_labels_flat)
      ref_obj_ids = tf.contrib.framework.sort(ref_obj_ids)
      gt_ids = ref_obj_ids
    embedding_dim = resolve_shape(reference_embeddings)[-1]
    reference_embeddings_flat = tf.reshape(reference_embeddings,
                                           [-1, embedding_dim])

    reference_embeddings_flat, reference_labels_flat = (
        subsample_reference_embeddings_and_labels(reference_embeddings_flat,
                                                  reference_labels_flat,
                                                  gt_ids,
                                                  max_neighbors_per_object))
    shape = resolve_shape(query_embeddings)
    query_embeddings_flat = tf.reshape(query_embeddings, [-1, embedding_dim])
    nn_features = _nearest_neighbor_features_per_object_in_chunks(
        reference_embeddings_flat, query_embeddings_flat, reference_labels_flat,
        gt_ids, k_nearest_neighbors, n_chunks)
    nn_features_dim = resolve_shape(nn_features)[-1]
    nn_features_reshaped = tf.reshape(nn_features,
                                      tf.stack(shape[:3] + [tf.size(gt_ids),
                                                            nn_features_dim]))
    return nn_features_reshaped, gt_ids
Example #3
0
  def testInt32(self):
    x = list(np.random.randint(2, high=10, size=7000))
    with self.test_session() as sess:
      y, idx = tf.unique(x)
      tf_y, tf_idx = sess.run([y, idx])

    self.assertEqual(len(x), len(tf_idx))
    self.assertEqual(len(tf_y), len(np.unique(x)))
    for i in range(len(x)):
      self.assertEqual(x[i], tf_y[tf_idx[i]])
    def testString(self):
        indx = np.random.randint(65, high=122, size=7000)
        x = [chr(i) for i in indx]
        with self.test_session() as sess:
            y, idx = tf.unique(x)
            tf_y, tf_idx = sess.run([y, idx])

        self.assertEqual(len(x), len(tf_idx))
        self.assertEqual(len(tf_y), len(np.unique(x)))
        for i in range(len(x)):
            self.assertEqual(x[i], tf_y[tf_idx[i]].decode("ascii"))
Example #5
0
def createBatchedIndices(roi_idx, centers, nr_of_points):
    centers = tf.convert_to_tensor(centers, dtype=tf.int32)
    simple_roi_idx = roi_idx

    def inLoop(i, roi_idx):
        new_roi = simple_roi_idx + tf.select(tf.greater(nr_of_points, i), true_values * i,
                                                                      true_values * 0)
        return tf.add(i, 1),\
               tf.concat(0, [roi_idx, new_roi])

    points_max = tf.cast(tf.reduce_max(nr_of_points), tf.int32)

    i = tf.constant(0)
    c = lambda i, nr_of_points: tf.less(i, points_max)
    i2, roi_idx = tf.while_loop(c, inLoop, [i, roi_idx], parallel_iterations=1)

    roi_idx = tf.unique(roi_idx)[0]
    batched_indices = tf.gather(centers, roi_idx)
    unique = tf.unique(batched_indices[:, 0])
    return tf.concat(1, [tf.expand_dims(unique[1], 1), batched_indices[:, 1:]])
Example #6
0
    def testWatchingUnconnectedOutputTensor(self):
        """Watch an output slot not emitting any edges.

    (Not even control edges from the node.)
    """

        with session.Session() as sess:
            x_init = constant_op.constant([2, 2, 3, 5, 5])
            x = variables.Variable(x_init, name="unconnected/x")

            # The UniqueOp (tf.unique) has two output slots. Use only slot 0 in the
            # graph. Let the debugger watch the unused slot 1.
            unique_x, _ = tf.unique(x, name="unconnected/unique_x")
            y = tf.add(unique_x, [0, 1, 2], name="unconnected/y")

            x.initializer.run()

            # Verify that only slot 0 of unique_x has recipients, while slot 1 of the
            # same node does not have recipients.
            unique_x_slot_0_recipients = []
            unique_x_slot_1_recipients = []
            for op in sess.graph.get_operations():
                for inp in op.inputs:
                    if inp.name == "unconnected/unique_x:0":
                        unique_x_slot_0_recipients.append(op.name)
                    elif inp.name == "unconnected/unique_x:1":
                        unique_x_slot_1_recipients.append(op.name)

            self.assertEqual(["unconnected/y"], unique_x_slot_0_recipients)
            self.assertEqual([], unique_x_slot_1_recipients)

            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls=self._debug_urls())

            run_metadata = config_pb2.RunMetadata()
            result = sess.run(y, options=run_options, run_metadata=run_metadata)
            self.assertAllClose([2, 4, 7], result)

            dump = debug_data.DebugDumpDir(self._dump_root, partition_graphs=run_metadata.partition_graphs)

            # Assert that the connected slot (slot 0) is dumped properly.
            unique_x_slot_0_dumps = dump.watch_key_to_data("unconnected/unique_x:0:DebugIdentity")
            self.assertEqual(1, len(unique_x_slot_0_dumps))
            self.assertEqual("unconnected/unique_x", unique_x_slot_0_dumps[0].node_name)
            self.assertEqual(0, unique_x_slot_0_dumps[0].output_slot)
            self.assertAllClose([2, 3, 5], unique_x_slot_0_dumps[0].get_tensor())

            # Assert that the unconnected slot (slot 1) is dumped properly.
            unique_x_slot_1_dumps = dump.watch_key_to_data("unconnected/unique_x:1:DebugIdentity")
            self.assertEqual(1, len(unique_x_slot_1_dumps))
            self.assertEqual("unconnected/unique_x", unique_x_slot_1_dumps[0].node_name)
            self.assertEqual(1, unique_x_slot_1_dumps[0].output_slot)
            self.assertAllClose([0, 0, 1, 2, 2], unique_x_slot_1_dumps[0].get_tensor())
Example #7
0
 def _create_model(self, train_triples):
     # Count unique items to determine embedding matrix sizes
     head_cnt = len(set(train_triples[:,0]))
     rel_cnt = len(set(train_triples[:,1]))
     tail_cnt = len(set(train_triples[:,2]))
     init_sd = 1.0 / np.sqrt(self.embedding_size)
     # Embedding matrices for entities and relationship types
     head_init = tf.truncated_normal([head_cnt, self.embedding_size], stddev=init_sd)
     rel_init = tf.truncated_normal([rel_cnt, self.embedding_size], stddev=init_sd)
     tail_init = tf.truncated_normal([tail_cnt, self.embedding_size], stddev=init_sd)
     if self.maxnorm is not None:
         # Ensure maxnorm constraints are initially satisfied
         head_init = dense_maxnorm(head_init, self.maxnorm)
         rel_init = dense_maxnorm(rel_init, self.maxnorm)
         tail_init = dense_maxnorm(tail_init, self.maxnorm)
     self.head_embedding_vars = tf.Variable(head_init)
     self.rel_embedding_vars = tf.Variable(rel_init)
     self.tail_embedding_vars = tf.Variable(tail_init)
     # Embedding layer for each (head, rel, tail) triple being fed in as input
     head_embed = tf.nn.embedding_lookup(self.head_embedding_vars, self.head_input)
     rel_embed = tf.nn.embedding_lookup(self.rel_embedding_vars, self.rel_input)
     tail_embed = tf.nn.embedding_lookup(self.tail_embedding_vars, self.tail_input)
     # Model output
     raw_output = tf.reduce_sum(tf.mul(tf.mul(head_embed, rel_embed), tail_embed), 1)
     self.output, self.loss = self._create_output_and_loss(raw_output)
     # Optimization
     self.train_step = self.opt.minimize(self.loss)
     if self.maxnorm is not None:
         # Post-processing to limit embedding vars to L2 ball
         head_constraint = self._norm_constraint_op(self.head_embedding_vars, 
                                                    tf.unique(self.head_input)[0], 
                                                    self.maxnorm)
         rel_constraint = self._norm_constraint_op(self.rel_embedding_vars, 
                                                   tf.unique(self.rel_input)[0], 
                                                   self.maxnorm)
         tail_constraint = self._norm_constraint_op(self.tail_embedding_vars, 
                                                    tf.unique(self.tail_input)[0], 
                                                    self.maxnorm)
         self.post_step = [head_constraint, rel_constraint, tail_constraint]
def build_vocab(word_tensor, vocab_size):
  unique, idx = tf.unique(word_tensor)

  counts = tf.foldl(
      lambda counts, item: counts + tf.one_hot(
          tf.reshape(item, [-1]),
          tf.shape(unique)[0],
          dtype=tf.int32)[0],
      idx,
      initializer=tf.zeros_like(unique, dtype=tf.int32),
      back_prop=False
  )
  _, indices = tf.nn.top_k(counts, k=vocab_size)
  return tf.gather(unique, indices)
Example #9
0
def accumulate_sparse_gradients(grad):
  """Accumulates repeated indices of a sparse gradient update.

  Args:
    grad: a tf.IndexedSlices gradient

  Returns:
    grad_indices: unique indices
    grad_values: gradient values corresponding to the indices
  """

  grad_indices, grad_segments = tf.unique(grad.indices)
  grad_values = tf.unsorted_segment_sum(grad.values, grad_segments,
                                        tf.shape(grad_indices)[0])
  return grad_indices, grad_values
Example #10
0
def _deduplicate_indexed_slices(values, indices):
    """Sums `values` associated with any non-unique `indices`.
    Args:
      values: A `Tensor` with rank >= 1.
      indices: A one-dimensional integer `Tensor`, indexing into the first
      dimension of `values` (as in an IndexedSlices object).
    Returns:
      A tuple of (`summed_values`, `unique_indices`) where `unique_indices` is a
      de-duplicated version of `indices` and `summed_values` contains the sum of
      `values` slices associated with each unique index.
    """
    unique_indices, new_index_positions = tf.unique(indices)
    summed_values = tf.unsorted_segment_sum(values,
                                            new_index_positions,
                                            tf.shape(unique_indices)[0])
    return (summed_values, unique_indices)
Example #11
0
def create_initial_softmax_from_labels(last_frame_labels, reference_labels,
                                       decoder_output_stride, reduce_labels):
  """Creates initial softmax predictions from last frame labels.

  Args:
    last_frame_labels: last frame labels of shape [1, height, width, 1].
    reference_labels: reference frame labels of shape [1, height, width, 1].
    decoder_output_stride: Integer, the stride of the decoder. Can be None, in
      this case it's assumed that the last_frame_labels and reference_labels
      are already scaled to the decoder output resolution.
    reduce_labels: Boolean, whether to reduce the depth of the softmax one_hot
      encoding to the actual number of labels present in the reference frame
      (otherwise the depth will be the highest label index + 1).

  Returns:
    init_softmax: the initial softmax predictions.
  """
  if decoder_output_stride is None:
    labels_output_size = last_frame_labels
    reference_labels_output_size = reference_labels
  else:
    h = tf.shape(last_frame_labels)[1]
    w = tf.shape(last_frame_labels)[2]
    h_sub = model.scale_dimension(h, 1.0 / decoder_output_stride)
    w_sub = model.scale_dimension(w, 1.0 / decoder_output_stride)
    labels_output_size = tf.image.resize_nearest_neighbor(
        last_frame_labels, [h_sub, w_sub], align_corners=True)
    reference_labels_output_size = tf.image.resize_nearest_neighbor(
        reference_labels, [h_sub, w_sub], align_corners=True)
  if reduce_labels:
    unique_labels, _ = tf.unique(tf.reshape(reference_labels_output_size, [-1]))
    depth = tf.size(unique_labels)
  else:
    depth = tf.reduce_max(reference_labels_output_size) + 1
  one_hot_assertion = tf.assert_less(tf.reduce_max(labels_output_size), depth)
  with tf.control_dependencies([one_hot_assertion]):
    init_softmax = tf.one_hot(tf.squeeze(labels_output_size,
                                         axis=-1),
                              depth=depth,
                              dtype=tf.float32)
  return init_softmax
Example #12
0
def run_modules(inputs, selection, module_fnc, output_shape):
    batch_size = tf.shape(inputs)[0]
    if output_shape is not None:
        output_shape = [batch_size] + output_shape
    else:
        # This is the only way I am aware of to get the output shape easily
        dummy = module_fnc(inputs, 0)
        output_shape = [batch_size] + dummy.shape[1:].as_list()

    used_modules, _ = tf.unique(tf.reshape(selection, (-1,)))

    def compute_module(accum, module):
        mask = tf.equal(module, selection)
        reduced_mask = tf.reduce_any(mask, axis=-1)
        indices = tf.where(reduced_mask)
        affected_inp = tf.boolean_mask(inputs, reduced_mask)
        output = module_fnc(affected_inp, module)
        return accum + tf.scatter_nd(indices, output, tf.cast(output_shape, tf.int64))

    output = tf.scan(compute_module, used_modules, initializer=tf.zeros(output_shape))[-1]
    return output
Example #13
0
 def _create_model(self, train_triples):
     # Count unique items to determine embedding matrix sizes
     entity_cnt = len(set(train_triples[:,0]).union(train_triples[:,2]))
     rel_cnt = len(set(train_triples[:,1]))
     init_sd = 1.0 / np.sqrt(self.embedding_size)
     # Embedding variables
     entity_var_shape = [entity_cnt, self.embedding_size]
     rel_var_shape = [rel_cnt, self.embedding_size]
     entity_init  = tf.truncated_normal(entity_var_shape, stddev=init_sd)
     rel_init = tf.truncated_normal(rel_var_shape, stddev=init_sd)
     # Ensure maxnorm constraints are initially satisfied
     entity_init = dense_maxnorm(entity_init, self.maxnorm)
     self.entity_embedding_vars = tf.Variable(entity_init)
     self.rel_embedding_vars = tf.Variable(rel_init)
     # Embedding layer for each (head, rel, tail) triple being fed in as input
     head_embed = tf.nn.embedding_lookup(self.entity_embedding_vars, self.head_input)
     tail_embed = tf.nn.embedding_lookup(self.entity_embedding_vars, self.tail_input)
     rel_embed = tf.nn.embedding_lookup(self.rel_embedding_vars, self.rel_input)
     # Relationship vector acts as a translation in entity embedding space
     diff_vec = tail_embed - (head_embed + rel_embed)
     # negative dist so higher scores are better (important for pairwise loss)
     if self.dist == 'manhattan':
         raw_output = -tf.reduce_sum(tf.abs(diff_vec), 1)
     elif self.dist == 'euclidean':
         # +eps because gradients can misbehave for small values in sqrt
         raw_output = -tf.sqrt(tf.reduce_sum(tf.square(diff_vec), 1) + self.EPS)
     elif self.dist == 'sqeuclidean':
         raw_output = -tf.reduce_sum(tf.square(diff_vec), 1)
     else:
         raise Exception('Unknown distance type')
     # Model output
     self.output, self.loss = ranking_margin_objective(raw_output, self.margin)
     # Optimization with postprocessing to limit embedding vars to L2 ball
     self.train_step = self.opt.minimize(self.loss)
     unique_ent_indices = tf.unique(tf.concat(0, [self.head_input, self.tail_input]))[0]
     self.post_step = self._norm_constraint_op(self.entity_embedding_vars, 
                                               unique_ent_indices, 
                                               self.maxnorm)
Example #14
0
def find_dup(a):
  """ Find the duplicated elements in 1-D a tensor.
  Args:
    a: 1-D tensor.
    
  Return:
    more_than_one_vals: duplicated value in a.
    indexes_in_a: duplicated value's index in a.
    dups_in_a: duplicated value with duplicate in a.
  """
  unique_a_vals, unique_idx = tf.unique(a)
  count_a_unique = tf.unsorted_segment_sum(tf.ones_like(a),
                                           unique_idx,
                                           tf.shape(a)[0])

  more_than_one = tf.greater(count_a_unique, 1)
  more_than_one_idx = tf.squeeze(tf.where(more_than_one))
  more_than_one_vals = tf.squeeze(tf.gather(unique_a_vals, more_than_one_idx))

  not_duplicated, _ = tf.setdiff1d(a, more_than_one_vals)
  dups_in_a, indexes_in_a = tf.setdiff1d(a, not_duplicated)

  return more_than_one_vals, indexes_in_a, dups_in_a
Example #15
0
n = 100
num_parallel = 5
dtype = tf.int32
queue = tf.FIFOQueue(capacity=n, dtypes=[dtype], shapes=[()])
enqueue_op = queue.enqueue_many(tf.range(n))
size_op = queue.size()

dequeue_ops = []
for i in range(num_parallel):
    dequeue_ops.append(queue.dequeue())

if hasattr(tf, "stack"):
    batch = tf.stack(dequeue_ops)
else:
    batch = tf.pack(dequeue_ops)
all_unique = tf.equal(tf.size(tf.unique(batch)[0]), num_parallel)
sess = create_session()
sess.run(enqueue_op)
print(tf.__version__)
print(tf.__git_version__)
for i in range(n//num_parallel):
    print(sess.run([batch, all_unique, size_op]))
print(tf.get_default_graph().as_graph_def())

# node {
#   name: "fifo_queue"
#   op: "FIFOQueueV2"
#   attr {
#     key: "capacity"
#     value {
#       i: 100
Example #16
0
def unique(input, return_inverse=False):
    if return_inverse:
        return tf.unique(input)
    else:
        return tf.unique(input).y
Example #17
0
def refine_detections_graph(rois, probs, deltas, window, config):
    '''
    Refine classified proposals and filter overlaps and return final detections.

    Algorithm:
        refine rois by mrcnn_deltas (rois is produced by refine anchors by rpn_deltas)
        filter out background boxes and low confidence rois
        filter: Apply per-class NMS
        filter: keep at most DETECTION_MAX_INSTANCES predictions according to probs
        concat and zero-padding to config.DETECTION_MAX_INSTANCES

    :param rois:    [num_rois, (y1, x1, y2, x2)] in normalized coordinates
    :param probs:   [num_rois, NUM_CLASSES] classifier probabilities
    :param deltas:  [num_rois, NUM_CLASSES, (dy, dx, log(dh), log(dw))] Deltas to apply to proposal boxes
    :param window:  [4, ] (y1, x1, y2, x2) in normalized coordinates. The part of the image
            that contains the image excluding the padding.
    :param config:  instance of (sub-class of Config)
    :return: [config.DETECTION_MAX_INSTANCES, (y1, x1, y2, x2, class_id, score)] where coordinates are normalized.
    '''

    # Class IDs per ROI
    class_ids = tf.argmax(probs, axis=1, output_type=tf.int32)
    # Class probability of the top class of each ROI
    indices = tf.stack([tf.range(probs.shape[0]), class_ids], axis=1)
    class_scores = tf.gather_nd(probs, indices)
    # Class-specific bounding box deltas
    deltas_specific = tf.gather_nd(deltas, indices)
    # Apply bounding box deltas
    # Shape: [boxes, (y1, x1, y2, x2)] in normalized coordinates
    refined_rois = apply_box_deltas_graph(
        rois, deltas_specific * config.BBOX_STD_DEV)
    # Clip boxes to image window
    refined_rois = clip_boxes_graph(refined_rois, window)

    # TODO: Filter out boxes with zero area

    # Filter out background boxes
    keep = tf.where(class_ids > 0)[:, 0]
    # Filter out low confidence boxes
    if config.DETECTION_MIN_CONFIDENCE:
        conf_keep = tf.where(
            class_scores >= config.DETECTION_MIN_CONFIDENCE)[:, 0]
        keep = tf.sets.set_intersection(tf.expand_dims(keep, 0),
                                        tf.expand_dims(conf_keep, 0))
        keep = tf.sparse_tensor_to_dense(keep)[0]

    # Apply per-class NMS
    # 1. Prepare variables
    pre_nms_class_ids = tf.gather(class_ids, keep)
    pre_nms_scores = tf.gather(class_scores, keep)
    pre_nms_rois = tf.gather(refined_rois, keep)
    unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0]

    def nms_keep_map(class_id):
        """Apply Non-Maximum Suppression on ROIs of the given class."""
        # Indices of ROIs of the given class
        ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0]
        # Apply NMS
        class_keep = tf.image.non_max_suppression(
            tf.gather(pre_nms_rois, ixs),
            tf.gather(pre_nms_scores, ixs),
            max_output_size=config.DETECTION_MAX_INSTANCES,
            iou_threshold=config.DETECTION_NMS_THRESHOLD)
        # Map indices
        class_keep = tf.gather(keep, tf.gather(ixs, class_keep))
        # Pad with -1 so returned tensors have the same shape
        gap = config.DETECTION_MAX_INSTANCES - tf.shape(class_keep)[0]
        class_keep = tf.pad(class_keep, [(0, gap)],
                            mode='CONSTANT',
                            constant_values=-1)
        # Set shape so map_fn() can infer result shape
        class_keep.set_shape([config.DETECTION_MAX_INSTANCES])
        return class_keep

    # 2. Map over class IDs
    nms_keep = tf.map_fn(nms_keep_map,
                         unique_pre_nms_class_ids,
                         dtype=tf.int64)
    # 3. Merge results into one list, and remove -1 padding
    nms_keep = tf.reshape(nms_keep, [-1])
    nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0])
    # 4. Compute intersection between keep and nms_keep
    keep = tf.sets.set_intersection(tf.expand_dims(keep, 0),
                                    tf.expand_dims(nms_keep, 0))
    keep = tf.sparse_tensor_to_dense(keep)[0]
    # Keep top detections
    roi_count = config.DETECTION_MAX_INSTANCES
    class_scores_keep = tf.gather(class_scores, keep)
    num_keep = tf.minimum(tf.shape(class_scores_keep)[0], roi_count)
    top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1]
    keep = tf.gather(keep, top_ids)

    # Arrange output as [N, (y1, x1, y2, x2, class_id, score)]
    # Coordinates are normalized.
    detections = tf.concat([
        tf.gather(refined_rois, keep),
        tf.to_float(tf.gather(class_ids, keep))[..., tf.newaxis],
        tf.gather(class_scores, keep)[..., tf.newaxis]
    ],
                           axis=1)

    # Pad with zeros if detections < DETECTION_MAX_INSTANCES
    gap = config.DETECTION_MAX_INSTANCES - tf.shape(detections)[0]
    detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT")
    return detections
Example #18
0
def unique(input):
    return tf.unique(input).y
Example #19
0
def encode_annos(labels, bboxes, anchors, num_classes):
    """Encode annotations for losses computations.
  All the output tensors have a fix shape(none dynamic dimention).

  Args:
    labels: 1-D with shape `[num_bounding_boxes]`.
    bboxes: 2-D with shape `[num_bounding_boxes, 4]`. Format [ymin, xmin, ymax, xmax]
    anchors: 4-D tensor with shape `[num_anchors, 4]`. Format [cx, cy, w, h]

  Returns:
    input_mask: 2-D with shape `[num_anchors, 1]`, indicate which anchor to be used to cal loss.
    labels_input: 2-D with shape `[num_anchors, num_classes]`, one hot encode for every anchor.
    box_delta_input: 2-D with shape `[num_anchors, 4]`. Format [dcx, dcy, dw, dh]
    box_input: 2-D with shape '[num_anchors, 4]'. Format [ymin, xmin, ymax, xmax]
  """
    with tf.name_scope("Encode_annotations") as scope:
        num_anchors = config.ANCHORS
        # num_bboxes = tf.shape(bboxes)[0]

        # Cal iou, find the target anchor
        with tf.name_scope("Matching") as subscope:
            ious = batch_iou_fast(xywh_to_yxyx(anchors), bboxes)
            anchor_indices = tf.reshape(tf.arg_max(ious, dimension=1),
                                        shape=[-1, 1])  # target anchor indices
            # anchor_indices = tf.Print(anchor_indices, [anchor_indices], "anchor_indices", summarize=100)

            # discard duplicate # unique_idx wrong
            anchor_indices, idx, count = tf.unique_with_counts(
                tf.reshape(anchor_indices, shape=[-1]))
            ori_idx = tf.cumsum(tf.pad(count, [[1, 0]]))[:-1]
            anchor_indices = tf.reshape(anchor_indices, shape=[-1, 1])
            bboxes = tf.gather(bboxes, tf.unique(ori_idx)[0])
            labels = tf.gather(labels, tf.unique(ori_idx)[0])
            ious = tf.gather(ious, tf.unique(ori_idx)[0])
            num_bboxes = tf.shape(anchor_indices)[0]

            # TODO(shizehao):deal with duplicate
            # with tf.name_scope("Deal_with_duplicate"):
            #   dup_anchor_indices, indices_in_a, dup_anchor_indices_with_dup = find_dup(tf.reshape(anchor_indices, shape=[-1]))
            #
            #   # reset duplicated corresponding anchor
            #   conflicted_ious = tf.gather(ious, indices_in_a)
            #   top_k_anchor_indices = tf.nn.top_k(conflicted_ious, k=20).indices  # shape = [num_conflicted_bboxes, 20]
            #   dup_group_idx = tf.where(tf.equal(dup_anchor_indices_with_dup, tf.reshape(dup_anchor_indices, shape=[-1, 1])))
            #   seg_group = tf.unstack(dup_group_idx, axis=1)[0]

            with tf.name_scope("Deal_with_noneoverlap"):
                # find the none-overlap bbox
                bbox_indices = tf.reshape(tf.range(num_bboxes), shape=[-1, 1])
                # bbox_indices = tf.Print(bbox_indices, [bbox_indices], "bbox_indices", summarize=100)

                # anchor_indices = tf.Print(anchor_indices, [anchor_indices], "anchor_indices", summarize=100)
                iou_indices = tf.concat(
                    [bbox_indices,
                     tf.cast(anchor_indices, dtype=tf.int32)],
                    axis=1)
                # iou_indices = tf.Print(iou_indices, [iou_indices], "iou_indices", summarize=100)

                target_iou = tf.gather_nd(ious, iou_indices)
                # target_iou = tf.Print(target_iou,[target_iou],"target_iou",summarize=100)

                none_overlap_bbox_indices = tf.where(target_iou <= 0)  # 1-D
                # none_overlap_bbox_indices = tf.Print(none_overlap_bbox_indices, [none_overlap_bbox_indices], "none_overlap_bbox_indices", summarize=100)

                # find it's corresponding anchor
                target_bbox = tf.gather_nd(bboxes, none_overlap_bbox_indices)
                # target_bbox = tf.Print(target_bbox, [target_bbox], "target_bbox", summarize=100)

                closest_anchor_indices = arg_closest_anchor(
                    target_bbox, xywh_to_yxyx(anchors))  # 1-D
                # closest_anchor_indices = tf.Print(closest_anchor_indices, [closest_anchor_indices, tf.gather(anchors, closest_anchor_indices)], "closest_anchor_indices", summarize=100)

            with tf.name_scope("Update_anchor_indices"):
                anchor_indices = tf.reshape(anchor_indices, shape=[-1])
                anchor_indices = update_tensor(anchor_indices,
                                               none_overlap_bbox_indices,
                                               closest_anchor_indices)
                anchor_indices = tf.reshape(anchor_indices, shape=[-1, 1])

        with tf.name_scope("Delta") as subscope:
            target_anchors = tf.gather_nd(anchors, anchor_indices)
            bboxes = yxyx_to_xywh(bboxes)
            delta = batch_delta(bboxes, target_anchors)

        with tf.name_scope("Scattering") as subscope:
            # bbox
            box_input = tf.scatter_nd(anchor_indices,
                                      bboxes,
                                      shape=[num_anchors, 4])

            # label
            labels_input = tf.scatter_nd(anchor_indices,
                                         tf.one_hot(labels, num_classes),
                                         shape=[num_anchors, num_classes])

            # delta
            box_delta_input = tf.scatter_nd(anchor_indices,
                                            delta,
                                            shape=[num_anchors, 4])

            # anchor mask
            # unique_indices, _ = tf.unique(tf.reshape(anchor_indices, shape=[-1]))
            # unique_indices = tf.Print(unique_indices, [unique_indices], summarize=100)
            # num_bboxes = tf.Print(num_bboxes, [num_bboxes])
            input_mask = tf.scatter_nd(anchor_indices,
                                       tf.ones([num_bboxes]),
                                       shape=[num_anchors])
            input_mask = tf.reshape(input_mask, shape=[-1, 1])

    return input_mask, labels_input, box_delta_input, box_input
#except Exception as e:
#    print(e)

#匯入類別資料
patient_data_path = "E:/GitHub Program/skin_cancer_data/HAM10000_metadata.csv"
patient_data = pathlib.Path(patient_data_path).read_text()
#以分隔符號將資料分割並移除header
patient_data = patient_data.split("\n")[1:]
#定義每個特徵的資料類別
col_data_type = [str(), str(), str(), str(), float(), str(), str()]
#根據給予的資料類別建立一個包含數個tensor的list
#每個tensor代表一個特徵
patient_data = tf.io.decode_csv(patient_data, record_defaults=col_data_type)

#number_of_class          = tf.unique(patient_data[2]).y.shape[0]
label = tf.unique(patient_data[2]).idx
image_id = patient_data[1]
original_data_path = "E:/GitHub Program/skin_cancer_data/HAM10000_images/"
new_data_path = "E:/GitHub Program/skin_cancer_data/HAM10000_images_rename"

number_of_process = 2
number_of_data_to_split = 1
new_size = (200, 200)
batch_size = 500

run_rename_function = True
#將原始資料的檔案名稱加入類別標籤以便訓練
try:
    os.makedirs(new_data_path)
except FileExistsError:
    print("Folder Exist")
Example #21
0
lens_tf = tf.reduce_sum(mask, 1)

labels_list = []
for i in range(Bs):
    positions = tf.range(Lmax)
    substitues = tf.random.uniform([1], 1, lens_tf[i] - 1, tf.int32)
    labels = tf.cast(tf.equal(positions, substitues), tf.int32)
    labels_list.append(tf.expand_dims(labels, 0))

labels_tf = tf.concat(labels_list, 0)

splits_list = []
for i in range(Bs):
    #one = tf.constant(np.random.uniform(0, lens_np[i], (Ns * 2)).astype(int))
    one = tf.random.uniform([Ns * 4], 1, lens_tf[i], tf.int32)
    one, _ = tf.unique(one)
    one = tf.cond(
        tf.less(tf.shape(one)[0],
                Ns * 2), lambda: tf.expand_dims(tf.range(Ns * 2)[1::2], 0),
        lambda: tf.sort(tf.reshape(one[:Ns * 2], [1, Ns * 2]))[:, ::2])
    splits_list.append(one)

splits_tf = tf.concat(splits_list, 0)

splits_up = tf.concat(
    [splits_tf,
     tf.expand_dims(tf.constant([Lmax] * Bs, tf.int32), 1)], 1)
splits_lo = tf.concat(
    [tf.expand_dims(tf.constant([0] * Bs, tf.int32), 1), splits_tf], 1)
size_splits = splits_up - splits_lo
Example #22
0
 def forward(self, data, state):
     conv_sbbox, conv_mbbox, conv_lbbox = data
     batch_size = conv_sbbox.shape[0]
     final_results = []
     for idx in range(batch_size):
         pred_s, pred_m, pred_l = conv_sbbox[idx], conv_mbbox[
             idx], conv_lbbox[idx]
         pred_s, pred_m, pred_l = tf.reshape(pred_s, (-1, 85)), tf.reshape(
             pred_m, (-1, 85)), tf.reshape(pred_l, (-1, 85))
         preds = tf.concat([pred_s, pred_m, pred_l], axis=0)
         preds = preds[preds[:, 4] >
                       self.conf_threshold]  # filter by confidence
         classes = tf.argmax(preds[:, 5:], axis=-1)
         unique_classes = tf.unique(classes)[0]
         selected_boxes_all_classes = tf.zeros(shape=[0, 6],
                                               dtype=tf.float32)
         for clss in unique_classes:
             tf.autograph.experimental.set_loop_options(
                 shape_invariants=[(selected_boxes_all_classes,
                                    tf.TensorShape([None, 6]))])
             mask = tf.math.equal(classes, clss)
             preds_cls = tf.boolean_mask(preds, mask)
             x1, y1, w, h = preds_cls[:,
                                      0], preds_cls[:,
                                                    1], preds_cls[:,
                                                                  2], preds_cls[:,
                                                                                3]
             x2, y2 = x1 + w, y1 + h
             conf_score, label = preds_cls[:, 4], tf.boolean_mask(
                 classes, mask)
             selected_bboxes = tf.stack(
                 [y1, x1, y2, x2, conf_score,
                  tf.cast(label, tf.float32)],
                 axis=-1)
             # nms for every class
             nms_keep = tf.image.non_max_suppression(selected_bboxes[:, :4],
                                                     selected_bboxes[:, 4],
                                                     max_output_size=50,
                                                     iou_threshold=0.35)
             selected_bboxes = tf.gather(selected_bboxes, nms_keep)
             selected_boxes_all_classes = tf.concat(
                 [selected_boxes_all_classes, selected_bboxes], axis=0)
         # clip bounding boxes to image size
         y1_abs = tf.clip_by_value(selected_boxes_all_classes[:, 0], 0,
                                   self.height)
         x1_abs = tf.clip_by_value(selected_boxes_all_classes[:, 1], 0,
                                   self.width)
         height_abs = tf.clip_by_value(
             selected_boxes_all_classes[:, 2] - y1_abs, 0,
             self.height - y1_abs)
         width_abs = tf.clip_by_value(
             selected_boxes_all_classes[:, 3] - x1_abs, 0,
             self.width - x1_abs)
         labels_score, labels = selected_boxes_all_classes[:,
                                                           4], selected_boxes_all_classes[:,
                                                                                          5]
         # final output: [x1, y1, w, h, label, label_score, select_or_not]
         results_single = [
             x1_abs, y1_abs, width_abs, height_abs, labels, labels_score,
             tf.ones_like(x1_abs)
         ]
         results_single = tf.stack(results_single, axis=-1)
         # pad 0 to other rows to improve performance
         results_single = tf.pad(
             results_single,
             [(0, self.max_outputs - tf.shape(results_single)[0]), (0, 0)])
         final_results.append(results_single)
     final_results = tf.stack(final_results)
     return final_results
    def batch_unique(x, max_labels=25):
        labels, _ = tf.unique(tf.reshape(x, (-1, )))
        if (tf.greater_equal(tf.shape(labels)[0], max_labels)):
            labels = tf.gather(labels, tf.range(0, max_labels))

        return tf.pad(labels, [[0, max_labels - tf.shape(labels)[0]]])
Example #24
0
def detectionLayer(proposal, probs, bbox, image_shape):
    """
    一次检测一张图片
    :param proposal: 经过非极大值抑制后, [批数,个数,4]
    :param probs: [num_boxex, num_classes]
    :param bbox: [num_boxex, num_classes, (dx, dy, log(h), log(w))]
    :param image_shape: [高,宽,通道数]
    :return: 盒子,ids, 概率
    """
    with tf.control_dependencies([
            tf.Assert(tf.shape(proposal)[0] == 1,
                      data=["A single picture for evaluation each time"])
    ]):
        proposal = tf.squeeze(proposal, axis=[
            0,
        ])  # [num_boxes, 4]

    class_ids = tf.argmax(probs, axis=1, output_type=tf.int32)  # [num_boxes]
    indices = tf.stack([tf.range(probs.shape[0]), class_ids],
                       axis=1)  # [序号,类别号]
    class_probs = tf.gather_nd(probs, indices)  # [num_box]
    deltas = tf.gather_nd(bbox, indices)  # [num_box, 4]
    refined_rois = apply_box_deltas(proposal, deltas *
                                    config.RPN_BBOX_STD_DEV)  # [num_boxes, 4]
    refined_rois = tf.clip_by_value(refined_rois, 0, 1)

    keep = tf.where(class_ids > 0)[:, 0]  # 取出前景
    if config.DETECTION_MIN_CONFIDENCE:
        # 如果使用最小confidence,就取交集
        conf_keep = tf.where(class_probs >= config.DETECTION_MIN_CONFIDENCE)[:,
                                                                             0]
        keep = tf.sets.set_intersection(tf.expand_dims(keep, 0),
                                        tf.expand_dims(conf_keep, 0))
        keep = tf.sparse_tensor_to_dense(keep)[0]

    ###########################下面的,全部重新组合排序########################
    pre_nms_class_ids = tf.gather(class_ids, keep)  # 取出id  [num]
    pre_nms_scores = tf.gather(class_probs, keep)  # 取出相应的概率值  [num]
    pre_nms_rois = tf.gather(refined_rois, keep)  # 取出相应的框框 [num, 4]
    unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[
        0]  # 能找到的分类数  [unique]

    def nms_keep_map(class_id):
        """
        只有属于同一类别的,才进行非极大值抑制
        :param class_id: 给定的某一类别号
        :return:
        """
        ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0]
        class_keep = tf.image.non_max_suppression(
            boxes=tf.gather(pre_nms_rois, ixs),
            scores=tf.gather(pre_nms_scores, ixs),
            max_output_size=config.DETECTION_MAX_INSTANCE,
            iou_threshold=config.DETECTION_NMS_THRESHHOLD)
        # class_keep取出的是相对于ixs的序号,tf.gather(ixs, class_keep)就是ixs的数值本身
        # ixs的数值本身,就是pre_nms_class_ids的序号
        class_keep = tf.gather(ixs, class_keep)
        # 用-1填充,使得都有相同的shape
        gap = config.DETECTION_MAX_INSTANCE - tf.shape(class_keep)[0]
        class_keep = tf.pad(class_keep, [(0, gap)],
                            mode='CONSTANT',
                            constant_values=-1)
        # 设置shape,使得map_fn()能够立即知道她的shape
        class_keep.set_shape([config.DETECTION_MAX_INSTANCE])
        return class_keep

    # nms_keep的shape是,[unique_pre_nms_class_ids的长度,config.DETECTION_MAX_INSTANCE]
    nms_keep = tf.map_fn(nms_keep_map,
                         unique_pre_nms_class_ids,
                         dtype=tf.int32)

    nms_keep = tf.reshape(nms_keep, [-1])
    # keep就是pre_nms_class_ids的序号
    keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0])

    #
    class_probs2 = tf.gather(pre_nms_scores, keep)
    num_keep = tf.minimum(tf.shape(keep)[0], config.DETECTION_MAX_INSTANCE)
    top_ids = tf.nn.top_k(class_probs2, num_keep, sorted=True).indices
    keep = tf.gather(keep, top_ids)  # 至此,keep是分数最大的,不超过实例个数的保留值了

    return tf.gather(pre_nms_rois,
                     keep), tf.gather(pre_nms_class_ids,
                                      keep), tf.gather(pre_nms_scores, keep)
Example #25
0
###############################################################################
# 1f: Create a random 2-d tensor of size 10 x 10 from any distribution.
# Calculate its determinant.
# Hint: Look at tf.matrix_determinant().
###############################################################################
s = tf.random_normal([10, 10])
result = tf.matrix_determinant(s)
print("6th computation -- " + str(sess.run(result)))

###############################################################################
# 1g: Create tensor x with value [5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9].
# Return the unique elements in x
# Hint: use tf.unique(). Keep in mind that tf.unique() returns a tuple.
###############################################################################
t = tf.constant([5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9])
unique = tf.unique(t)
print("6th computation -- " + str(sess.run(unique)))


###############################################################################
# Helper method to calculate huber loss
###############################################################################
def huber_loss(labels, predictions, delta=1.0):
    residual = tf.abs(predictions - labels)
    condition = tf.less(residual, delta)
    small_res = 0.5 * tf.square(residual)
    large_res = delta * residual - 0.5 * tf.square(delta)
    return tf.select(condition, small_res, large_res)


###############################################################################
Example #26
0
    def _get_bboxes_single(self, rcnn_probs, rcnn_deltas, rois, img_shape):
        '''
        Args
        ---
            rcnn_probs: [num_rois, num_classes]
            rcnn_deltas: [num_rois, num_classes, (dy, dx, log(dh), log(dw))]
            rois: [num_rois, (y1, x1, y2, x2)]
            img_shape: np.ndarray. [2]. (img_height, img_width)       
        '''
        H, W = img_shape
        # Class IDs per ROI
        class_ids = tf.argmax(rcnn_probs, axis=1, output_type=tf.int32)

        # Class probability of the top class of each ROI
        indices = tf.stack([tf.range(rcnn_probs.shape[0]), class_ids], axis=1)
        class_scores = tf.gather_nd(rcnn_probs, indices)
        # Class-specific bounding box deltas
        deltas_specific = tf.gather_nd(rcnn_deltas, indices)
        # Apply bounding box deltas
        # Shape: [num_rois, (y1, x1, y2, x2)] in normalized coordinates
        refined_rois = transforms.delta2bbox(rois, deltas_specific,
                                             self.target_means,
                                             self.target_stds)

        # Clip boxes to image window
        refined_rois *= tf.constant([H, W, H, W], dtype=tf.float32)
        window = tf.constant([0., 0., H * 1., W * 1.], dtype=tf.float32)
        refined_rois = transforms.bbox_clip(refined_rois, window)

        # Filter out background boxes
        keep = tf.where(class_ids > 0)[:, 0]

        # Filter out low confidence boxes
        if self.min_confidence:
            conf_keep = tf.where(class_scores >= self.min_confidence)[:, 0]
            keep = tf.sets.set_intersection(tf.expand_dims(keep, 0),
                                            tf.expand_dims(conf_keep, 0))
            keep = tf.sparse_tensor_to_dense(keep)[0]

        # Apply per-class NMS
        # 1. Prepare variables
        pre_nms_class_ids = tf.gather(class_ids, keep)
        pre_nms_scores = tf.gather(class_scores, keep)
        pre_nms_rois = tf.gather(refined_rois, keep)
        unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0]

        def nms_keep_map(class_id):
            '''Apply Non-Maximum Suppression on ROIs of the given class.'''
            # Indices of ROIs of the given class
            ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0]
            # Apply NMS
            class_keep = tf.image.non_max_suppression(
                tf.gather(pre_nms_rois, ixs),
                tf.gather(pre_nms_scores, ixs),
                max_output_size=self.max_instances,
                iou_threshold=self.nms_threshold)
            # Map indices
            class_keep = tf.gather(keep, tf.gather(ixs, class_keep))
            return class_keep

        # 2. Map over class IDs
        nms_keep = []
        for i in range(unique_pre_nms_class_ids.shape[0]):
            nms_keep.append(nms_keep_map(unique_pre_nms_class_ids[i]))
        nms_keep = tf.concat(nms_keep, axis=0)

        # 3. Compute intersection between keep and nms_keep
        keep = tf.sets.set_intersection(tf.expand_dims(keep, 0),
                                        tf.expand_dims(nms_keep, 0))
        keep = tf.sparse_tensor_to_dense(keep)[0]
        # Keep top detections
        roi_count = self.max_instances
        class_scores_keep = tf.gather(class_scores, keep)
        num_keep = tf.minimum(tf.shape(class_scores_keep)[0], roi_count)
        top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1]
        keep = tf.gather(keep, top_ids)

        detections = tf.concat([
            tf.gather(refined_rois, keep),
            tf.to_float(tf.gather(class_ids, keep))[..., tf.newaxis],
            tf.gather(class_scores, keep)[..., tf.newaxis]
        ],
                               axis=1)

        return detections
Example #27
0
def model_fn(features, labels, mode, params):
	embedding_size = 36
	movie_id = features["movie_id"]
	user_id = features["user_id"]
	rating = features["user_rating"]

	if mode == tf.estimator.ModeKeys.TRAIN:
		lookup_node_list = [
			"/job:ps/replica:0/task:{}/CPU:0".format(i)
			for i in range(params["ps_num"])]
		initializer = tf.keras.initializers.RandomNormal(-1, 1)
	else:
		lookup_node_list = ["/job:localhost/replica:0/task:0/CPU:0"] * params["ps_num"]
		initializer = tf.keras.initializers.Zeros()

	redis_config=tfra.dynamic_embedding.RedisTableConfig(
		redis_config_abs_dir_env="model_tfra_redis_config_path"
	)
	redis_creator=tfra.dynamic_embedding.RedisTableCreator(redis_config)
	user_embeddings = tfra.dynamic_embedding.get_variable(
		name="user_dynamic_embeddings",
		dim=embedding_size,
		devices=lookup_node_list,
		initializer=initializer,
		kv_creator=redis_creator)
	movie_embeddings = tfra.dynamic_embedding.get_variable(
		name="moive_dynamic_embeddings",
		dim=embedding_size,
		devices=lookup_node_list,
		initializer=initializer,
		kv_creator=redis_creator)

	user_id_val, user_id_idx = tf.unique(tf.concat(user_id, axis=0))
	user_id_weights, user_id_trainable_wrapper = tfra.dynamic_embedding.embedding_lookup(
		params=user_embeddings,
		ids=user_id_val,
		name="user-id-weights",
		return_trainable=True)
	user_id_weights = tf.gather(user_id_weights, user_id_idx)

	movie_id_val, movie_id_idx = tf.unique(tf.concat(movie_id, axis=0))
	movie_id_weights, movie_id_trainable_wrapper = tfra.dynamic_embedding.embedding_lookup(
		params=movie_embeddings,
		ids=movie_id_val,
		name="movie-id-weights",
		return_trainable=True)
	movie_id_weights = tf.gather(movie_id_weights, movie_id_idx)

	embeddings = tf.concat([user_id_weights, movie_id_weights], axis=1)
	d0 = Dense(256,
			   activation='relu',
			   kernel_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1),
			   bias_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1))
	d1 = Dense(64,
			   activation='relu',
			   kernel_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1),
			   bias_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1))
	d2 = Dense(1,
			   kernel_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1),
			   bias_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1))
	dnn = d0(embeddings)
	dnn = d1(dnn)
	dnn = d2(dnn)
	out = tf.reshape(dnn, shape=[-1])
#	loss = tf.keras.losses.MeanSquaredError()(rating, out)

	per_example_loss = (out - rating)**2
	loss = tf.nn.compute_average_loss(per_example_loss)

	predictions = {"out": out}
	acc = tf.metrics.Accuracy()
	acc.update_state([0.1, 1.0], [1.0, 0.1])

	tensors_to_log = {"user_id_val": user_id_val.name}
	hook = tf.estimator.LoggingTensorHook(tensors_to_log, every_n_iter=100)

	if mode == tf.estimator.ModeKeys.EVAL:
		eval_metric_ops = {"accuracy": acc}
		return tf.estimator.EstimatorSpec(mode=mode,
										  loss=loss,
										  eval_metric_ops=eval_metric_ops)

	if mode == tf.estimator.ModeKeys.TRAIN:
		optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=0.001)
		optimizer = tfra.dynamic_embedding.DynamicEmbeddingOptimizer(optimizer)
		train_op = optimizer.minimize(
			loss, global_step=tf.compat.v1.train.get_or_create_global_step())
		return tf.estimator.EstimatorSpec(mode=mode,
										  predictions=predictions,
										  loss=loss,
										  train_op=train_op,
											training_hooks=[hook])

	if mode == tf.estimator.ModeKeys.PREDICT:
		predictions_for_net = {"out": out}
		export_outputs = {
			"predict_export_outputs":
				tf.estimator.export.PredictOutput(outputs=predictions_for_net)
		}
		return tf.estimator.EstimatorSpec(mode,
										  predictions=predictions_for_net,
										  export_outputs=export_outputs,
											prediction_hooks=[hook])
Example #28
0
    def _compute_one_image_loss(self, pbbox_yx, pbbox_hw, abbox_y1x1, abbox_y2x2,
                                abbox_yx, abbox_hw, pconf, ground_truth):
        slice_index = tf.argmin(ground_truth, axis=0)[0]
        ground_truth = tf.gather(ground_truth, tf.range(0, slice_index, dtype=tf.int64))
        gbbox_yx = ground_truth[..., 0:2]
        gbbox_hw = ground_truth[..., 2:4]
        gbbox_y1x1 = gbbox_yx - gbbox_hw / 2.
        gbbox_y2x2 = gbbox_yx + gbbox_hw / 2.
        class_id = tf.cast(ground_truth[..., 4:5], dtype=tf.int32)
        label = class_id

        abbox_hwti = tf.reshape(abbox_hw, [1, -1, 2])
        abbox_y1x1ti = tf.reshape(abbox_y1x1, [1, -1, 2])
        abbox_y2x2ti = tf.reshape(abbox_y2x2, [1, -1, 2])
        gbbox_hwti = tf.reshape(gbbox_hw, [-1, 1, 2])
        gbbox_y1x1ti = tf.reshape(gbbox_y1x1, [-1, 1, 2])
        gbbox_y2x2ti = tf.reshape(gbbox_y2x2, [-1, 1, 2])
        ashape = tf.shape(abbox_hwti)
        gshape = tf.shape(gbbox_hwti)
        abbox_hwti = tf.tile(abbox_hwti, [gshape[0], 1, 1])
        abbox_y1x1ti = tf.tile(abbox_y1x1ti, [gshape[0], 1, 1])
        abbox_y2x2ti = tf.tile(abbox_y2x2ti, [gshape[0], 1, 1])
        gbbox_hwti = tf.tile(gbbox_hwti, [1, ashape[1], 1])
        gbbox_y1x1ti = tf.tile(gbbox_y1x1ti, [1, ashape[1], 1])
        gbbox_y2x2ti = tf.tile(gbbox_y2x2ti, [1, ashape[1], 1])

        gaiou_y1x1ti = tf.maximum(abbox_y1x1ti, gbbox_y1x1ti)
        gaiou_y2x2ti = tf.minimum(abbox_y2x2ti, gbbox_y2x2ti)
        gaiou_area = tf.reduce_prod(tf.maximum(gaiou_y2x2ti - gaiou_y1x1ti, 0), axis=-1)
        aarea = tf.reduce_prod(abbox_hwti, axis=-1)
        garea = tf.reduce_prod(gbbox_hwti, axis=-1)
        gaiou_rate = gaiou_area / (aarea + garea - gaiou_area)

        best_raindex = tf.argmax(gaiou_rate, axis=1)
        best_pbbox_yx = tf.gather(pbbox_yx, best_raindex)
        best_pbbox_hw = tf.gather(pbbox_hw, best_raindex)
        best_pconf = tf.gather(pconf, best_raindex)
        best_abbox_yx = tf.gather(abbox_yx, best_raindex)
        best_abbox_hw = tf.gather(abbox_hw, best_raindex)

        bestmask, _ = tf.unique(best_raindex)
        bestmask = tf.contrib.framework.sort(bestmask)
        bestmask = tf.reshape(bestmask, [-1, 1])
        bestmask = tf.sparse.SparseTensor(tf.concat([bestmask, tf.zeros_like(bestmask)], axis=-1),
                                          tf.squeeze(tf.ones_like(bestmask)), dense_shape=[ashape[1], 1])
        bestmask = tf.reshape(tf.cast(tf.sparse.to_dense(bestmask), tf.float32), [-1])

        othermask = 1. - bestmask
        othermask = othermask > 0.
        other_pbbox_yx = tf.boolean_mask(pbbox_yx, othermask)
        other_pbbox_hw = tf.boolean_mask(pbbox_hw, othermask)
        other_pconf = tf.boolean_mask(pconf, othermask)

        other_abbox_yx = tf.boolean_mask(abbox_yx, othermask)
        other_abbox_hw = tf.boolean_mask(abbox_hw, othermask)

        agiou_rate = tf.transpose(gaiou_rate)
        other_agiou_rate = tf.boolean_mask(agiou_rate, othermask)
        best_agiou_rate = tf.reduce_max(other_agiou_rate, axis=1)
        pos_agiou_mask = best_agiou_rate > 0.5
        neg_agiou_mask = best_agiou_rate < 0.4
        rgindex = tf.argmax(other_agiou_rate, axis=1)
        pos_rgindex = tf.boolean_mask(rgindex, pos_agiou_mask)
        pos_ppox_yx = tf.boolean_mask(other_pbbox_yx, pos_agiou_mask)
        pos_ppox_hw = tf.boolean_mask(other_pbbox_hw, pos_agiou_mask)
        pos_pconf = tf.boolean_mask(other_pconf, pos_agiou_mask)
        pos_abbox_yx = tf.boolean_mask(other_abbox_yx, pos_agiou_mask)
        pos_abbox_hw = tf.boolean_mask(other_abbox_hw, pos_agiou_mask)
        pos_label = tf.gather(label, pos_rgindex)
        pos_gbbox_yx = tf.gather(gbbox_yx, pos_rgindex)
        pos_gbbox_hw = tf.gather(gbbox_hw, pos_rgindex)

        neg_pconf = tf.boolean_mask(other_pconf, neg_agiou_mask)
        neg_shape = tf.shape(neg_pconf)
        num_neg = neg_shape[0]
        neg_class_id = tf.constant([self.num_classes-1])
        neg_label = tf.tile(neg_class_id, [num_neg])

        pos_pbbox_yx = tf.concat([best_pbbox_yx, pos_ppox_yx], axis=0)
        pos_pbbox_hw = tf.concat([best_pbbox_hw, pos_ppox_hw], axis=0)
        pos_pconf = tf.concat([best_pconf, pos_pconf], axis=0)
        pos_label = tf.concat([label, pos_label], axis=0)
        pos_gbbox_yx = tf.concat([gbbox_yx, pos_gbbox_yx], axis=0)
        pos_gbbox_hw = tf.concat([gbbox_hw, pos_gbbox_hw], axis=0)
        pos_abbox_yx = tf.concat([best_abbox_yx, pos_abbox_yx], axis=0)
        pos_abbox_hw = tf.concat([best_abbox_hw, pos_abbox_hw], axis=0)
        conf_loss = self._focal_loss(pos_label, pos_pconf, neg_label, neg_pconf)

        pos_truth_pbbox_yx = (pos_gbbox_yx - pos_abbox_yx) / pos_abbox_hw
        pos_truth_pbbox_hw = tf.log(pos_gbbox_hw / pos_abbox_hw)
        pos_yx_loss = tf.reduce_sum(self._smooth_l1_loss(pos_pbbox_yx - pos_truth_pbbox_yx), axis=-1)
        pos_hw_loss = tf.reduce_sum(self._smooth_l1_loss(pos_pbbox_hw - pos_truth_pbbox_hw), axis=-1)
        pos_coord_loss = tf.reduce_mean(pos_yx_loss + pos_hw_loss)

        total_loss = conf_loss + pos_coord_loss
        return total_loss
Example #29
0
def _process_segment_and_label(video_matrix, num_frames, contexts,
                               segment_labels, segment_size,
                               num_classes) -> Dict[str, tf.Tensor]:
    """Processes a batched Tensor of frames.

  The same parameters used in process should be used here.
  Args:
    video_matrix: different features concatenated into one matrix
    num_frames: Number of frames per subclip.
    contexts: context information extracted from decoder
    segment_labels: if we read segment labels instead.
    segment_size: the segment_size used for reading segments. Segment length.
    num_classes: a positive integer for the number of classes.

  Returns:
    output: dictionary containing batch information
  """
    # Partition frame-level feature matrix to segment-level feature matrix.
    batch_video_ids = None
    if segment_labels:
        start_times = contexts["segment_start_times"].values
        # Here we assume all the segments that started at the same start time has
        # the same segment_size.
        uniq_start_times, seg_idxs = tf.unique(start_times,
                                               out_idx=tf.dtypes.int64)
        # Range gather matrix, e.g., [[0,1,2],[1,2,3]] for segment_size == 3.
        range_mtx = tf.expand_dims(uniq_start_times, axis=-1) + tf.expand_dims(
            tf.range(0, segment_size, dtype=tf.int64), axis=0)
        # Shape: [num_segment, segment_size, feature_dim].
        batch_video_matrix = tf.gather_nd(video_matrix,
                                          tf.expand_dims(range_mtx, axis=-1))
        num_segment = tf.shape(batch_video_matrix)[0]
        if "id" in contexts:
            batch_video_ids = tf.reshape(
                tf.tile([contexts["id"]], [num_segment]), (num_segment, ))
        batch_frames = tf.reshape(tf.tile([segment_size], [num_segment]),
                                  (num_segment, ))
        batch_frames = tf.cast(tf.expand_dims(batch_frames, 1), tf.float32)

        # For segment labels, all labels are not exhaustively rated. So we only
        # evaluate the rated labels.

        # Label indices for each segment, shape: [num_segment, 2].
        label_indices = tf.stack([seg_idxs, contexts["segment_labels"].values],
                                 axis=-1)
        label_values = contexts["segment_scores"].values
        sparse_labels = tf.sparse.SparseTensor(label_indices, label_values,
                                               (num_segment, num_classes))
        batch_labels = tf.sparse.to_dense(sparse_labels,
                                          validate_indices=False)

        sparse_label_weights = tf.sparse.SparseTensor(
            label_indices, tf.ones_like(label_values, dtype=tf.float32),
            (num_segment, num_classes))
        batch_label_weights = tf.sparse.to_dense(sparse_label_weights,
                                                 validate_indices=False)
        # output_dict = utils.get_segments(batch_video_matrix, batch_frames, 5)
    else:
        # Process video-level labels.
        label_indices = contexts["labels"].values
        sparse_labels = tf.sparse.SparseTensor(
            tf.expand_dims(label_indices, axis=-1),
            tf.ones_like(contexts["labels"].values, dtype=tf.bool),
            (num_classes, ))
        labels = tf.sparse.to_dense(sparse_labels,
                                    default_value=False,
                                    validate_indices=False)

        # convert to batch format.
        if "id" in contexts:
            batch_video_ids = tf.expand_dims(contexts["id"], 0)
        batch_video_matrix = tf.expand_dims(video_matrix, 0)
        batch_labels = tf.expand_dims(labels, 0)
        batch_frames = tf.expand_dims(num_frames, 0)
        batch_label_weights = None

    output_dict = {
        "video_matrix": batch_video_matrix,
        "labels": batch_labels,
        "num_frames": batch_frames,
    }
    if batch_video_ids is not None:
        output_dict["video_ids"] = batch_video_ids
    if batch_label_weights is not None:
        output_dict["label_weights"] = batch_label_weights

    return output_dict
Example #30
0
def get_processed_frame_data(rgb_frame,
                             audio_frame,
                             feature_list,
                             concat_features=False):
    rgb_frame_trans = tf.transpose(rgb_frame, perm=[1, 0])
    audio_frame_trans = tf.transpose(audio_frame, perm=[1, 0])

    video_length = tf.shape(rgb_frame)[0]

    q0_rgb_frame = tf.reduce_min(rgb_frame, reduction_indices=0)
    q1_rgb_frame = tf.reduce_min(tf.nn.top_k(
        rgb_frame_trans,
        k=tf.to_int32(tf.scalar_mul(0.75, tf.to_float(video_length))),
        sorted=False).values,
                                 reduction_indices=1)
    q2_rgb_frame = tf.reduce_min(tf.nn.top_k(
        rgb_frame_trans,
        k=tf.to_int32(tf.scalar_mul(0.50, tf.to_float(video_length))),
        sorted=False).values,
                                 reduction_indices=1)
    q3_rgb_frame = tf.reduce_min(tf.nn.top_k(
        rgb_frame_trans,
        k=tf.to_int32(tf.scalar_mul(0.25, tf.to_float(video_length))),
        sorted=False).values,
                                 reduction_indices=1)
    q4_rgb_frame = tf.reduce_max(rgb_frame, reduction_indices=0)
    mean_rgb_frame = tf.reduce_mean(rgb_frame, reduction_indices=0)
    stddv_rgb_frame = tf.sqrt(
        tf.reduce_mean(tf.square(rgb_frame - mean_rgb_frame),
                       reduction_indices=0))
    skew_rgb_frame = tf.div(
        tf.reduce_mean(tf.pow(rgb_frame - mean_rgb_frame, 3),
                       reduction_indices=0), tf.pow(stddv_rgb_frame, 3))
    kurt_rgb_frame = tf.div(
        tf.reduce_mean(tf.pow(rgb_frame - mean_rgb_frame, 4),
                       reduction_indices=0), tf.pow(stddv_rgb_frame, 4))

    q0_audio_frame = tf.reduce_min(audio_frame, reduction_indices=0)
    q1_audio_frame = tf.reduce_min(tf.nn.top_k(
        audio_frame_trans,
        k=tf.to_int32(tf.scalar_mul(0.75, tf.to_float(video_length))),
        sorted=False).values,
                                   reduction_indices=1)
    q2_audio_frame = tf.reduce_min(tf.nn.top_k(
        audio_frame_trans,
        k=tf.to_int32(tf.scalar_mul(0.50, tf.to_float(video_length))),
        sorted=False).values,
                                   reduction_indices=1)
    q3_audio_frame = tf.reduce_min(tf.nn.top_k(
        audio_frame_trans,
        k=tf.to_int32(tf.scalar_mul(0.25, tf.to_float(video_length))),
        sorted=False).values,
                                   reduction_indices=1)
    q4_audio_frame = tf.reduce_max(audio_frame, reduction_indices=0)
    mean_audio_frame = tf.reduce_mean(audio_frame, reduction_indices=0)
    stddv_audio_frame = tf.sqrt(
        tf.reduce_mean(tf.square(audio_frame - mean_audio_frame),
                       reduction_indices=0))
    skew_audio_frame = tf.div(
        tf.reduce_mean(tf.pow(audio_frame - mean_audio_frame, 3),
                       reduction_indices=0), tf.pow(stddv_audio_frame, 3))
    kurt_audio_frame = tf.div(
        tf.reduce_mean(tf.pow(audio_frame - mean_audio_frame, 4),
                       reduction_indices=0), tf.pow(stddv_audio_frame, 4))

    iqr_rgb_frame = tf.subtract(q3_rgb_frame, q1_rgb_frame)
    rng_rgb_frame = tf.subtract(q4_rgb_frame, q0_rgb_frame)

    iqr_audio_frame = tf.subtract(q3_audio_frame, q1_audio_frame)
    rng_audio_frame = tf.subtract(q4_audio_frame, q0_audio_frame)

    coeffvar_rgb_frame = tf.div(stddv_rgb_frame, mean_rgb_frame)
    efficiency_rgb_frame = tf.div(tf.square(stddv_rgb_frame),
                                  tf.square(mean_rgb_frame))
    midhinge_rgb_frame = tf.add(q3_rgb_frame, q1_rgb_frame)
    qntcoeffdisp_rgb_frame = tf.div(iqr_rgb_frame, midhinge_rgb_frame)

    coeffvar_audio_frame = tf.div(stddv_audio_frame, mean_audio_frame)
    efficiency_audio_frame = tf.div(tf.square(stddv_audio_frame),
                                    tf.square(mean_audio_frame))
    midhinge_audio_frame = tf.add(q3_audio_frame, q1_audio_frame)
    qntcoeffdisp_audio_frame = tf.div(iqr_audio_frame, midhinge_audio_frame)

    # Mean Absolute Difference
    md_rgb_frame = tf.div(
        tf.reduce_sum(tf.abs(
            tf.matrix_band_part(
                tf.subtract(tf.expand_dims(rgb_frame_trans, 2),
                            tf.expand_dims(rgb_frame_trans, 1)), 0, -1)),
                      reduction_indices=[1, 2]),
        tf.cast(tf.multiply(video_length, video_length - 1), tf.float32))
    # Median Absolute Deviation around Median
    abs_dev_median = tf.transpose(tf.abs(tf.subtract(rgb_frame, q2_rgb_frame)),
                                  perm=[1, 0])
    mean_abs_med_rgb_frame = tf.reduce_min(tf.nn.top_k(
        abs_dev_median,
        k=tf.to_int32(tf.scalar_mul(0.50, tf.to_float(video_length))),
        sorted=False).values,
                                           reduction_indices=1)
    # Mean Absolute Deviation around Mean
    mean_abs_mean_rgb_frame = tf.reduce_mean(tf.abs(
        tf.subtract(rgb_frame, mean_rgb_frame)),
                                             reduction_indices=0)
    # Mean Absolute Deviation around Median
    mean_abs_mean_rgb_frame = tf.reduce_mean(tf.abs(
        tf.subtract(rgb_frame, mean_rgb_frame)),
                                             reduction_indices=0)
    # Mean Absolute Deviation around Mode
    mean_abs_mean_rgb_frame = tf.reduce_mean(tf.abs(
        tf.subtract(rgb_frame, mean_rgb_frame)),
                                             reduction_indices=0)

    pairwise_man, _ = tf.unique(
        tf.reshape(
            tf.matrix_band_part(
                tf.reduce_sum(tf.abs(
                    tf.subtract(tf.expand_dims(rgb_frame, 0),
                                tf.expand_dims(rgb_frame, 1))),
                              reduction_indices=[2]), 0, -1), [-1]))

    local_features = locals()
    if (concat_features):
        features = []
        for x in feature_list:
            if x != 'video_length':
                features.append(local_features[x])
            else:
                features.append(
                    tf.cast(tf.convert_to_tensor([video_length]), tf.float32))
        features = tf.concat(features, 0)
    else:
        features = {
            feature: local_features[feature]
            for feature in feature_list
        }

    return (features)
Example #31
0
  def build_inference_for_training(self):
    """Invokes depth and ego-motion networks and computes clouds if needed."""
    (self.image_stack, self.image_stack_norm, self.seg_stack,
     self.intrinsic_mat, self.intrinsic_mat_inv) = self.reader.read_data()
    with tf.variable_scope('depth_prediction'):
      # Organized by ...[i][scale].  Note that the order is flipped in
      # variables in build_loss() below.
      self.disp = {}
      self.depth = {}
      self.depth_upsampled = {}
      self.inf_loss = 0.0
      # Organized by [i].
      disp_bottlenecks = [None] * self.seq_length

      if self.icp_weight > 0:
        self.cloud = {}
      for i in range(self.seq_length):
        image = self.image_stack_norm[:, :, :, 3 * i:3 * (i + 1)]

        multiscale_disps_i, disp_bottlenecks[i] = nets.disp_net(
            self.architecture, image, self.use_skip,
            self.weight_reg, True)
        multiscale_depths_i = [1.0 / d for d in multiscale_disps_i]
        self.disp[i] = multiscale_disps_i
        self.depth[i] = multiscale_depths_i
        if self.depth_upsampling:
          self.depth_upsampled[i] = []
          # Upsample low-resolution depth maps using differentiable bilinear
          # interpolation.
          for s in range(len(multiscale_depths_i)):
            self.depth_upsampled[i].append(tf.image.resize_bilinear(
                multiscale_depths_i[s], [self.img_height, self.img_width],
                align_corners=True))

        if self.icp_weight > 0:
          multiscale_clouds_i = [
              project.get_cloud(d,
                                self.intrinsic_mat_inv[:, s, :, :],
                                name='cloud%d_%d' % (s, i))
              for (s, d) in enumerate(multiscale_depths_i)
          ]
          self.cloud[i] = multiscale_clouds_i
        # Reuse the same depth graph for all images.
        tf.get_variable_scope().reuse_variables()

    if self.handle_motion:
      # Define egomotion network. This network can see the whole scene except
      # for any moving objects as indicated by the provided segmentation masks.
      # To avoid the network getting clues of motion by tracking those masks, we
      # define the segmentation masks as the union temporally.
      with tf.variable_scope('egomotion_prediction'):
        base_input = self.image_stack_norm  # (B, H, W, 9)
        seg_input = self.seg_stack  # (B, H, W, 9)
        ref_zero = tf.constant(0, dtype=tf.uint8)
        # Motion model is currently defined for three-frame sequences.
        object_mask1 = tf.equal(seg_input[:, :, :, 0], ref_zero)
        object_mask2 = tf.equal(seg_input[:, :, :, 3], ref_zero)
        object_mask3 = tf.equal(seg_input[:, :, :, 6], ref_zero)
        mask_complete = tf.expand_dims(tf.logical_and(  # (B, H, W, 1)
            tf.logical_and(object_mask1, object_mask2), object_mask3), axis=3)
        mask_complete = tf.tile(mask_complete, (1, 1, 1, 9))  # (B, H, W, 9)
        # Now mask out base_input.
        self.mask_complete = tf.to_float(mask_complete)
        self.base_input_masked = base_input * self.mask_complete
        self.egomotion = nets.egomotion_net(
            image_stack=self.base_input_masked,
            disp_bottleneck_stack=None,
            joint_encoder=False,
            seq_length=self.seq_length,
            weight_reg=self.weight_reg)

      # Define object motion network for refinement. This network only sees
      # one object at a time over the whole sequence, and tries to estimate its
      # motion. The sequence of images are the respective warped frames.

      # For each scale, contains batch_size elements of shape (N, 2, 6).
      self.object_transforms = {}
      # For each scale, contains batch_size elements of shape (N, H, W, 9).
      self.object_masks = {}
      self.object_masks_warped = {}
      # For each scale, contains batch_size elements of size N.
      self.object_ids = {}

      self.egomotions_seq = {}
      self.warped_seq = {}
      self.inputs_objectmotion_net = {}
      with tf.variable_scope('objectmotion_prediction'):
        # First, warp raw images according to overall egomotion.
        for s in range(NUM_SCALES):
          self.warped_seq[s] = []
          self.egomotions_seq[s] = []
          for source_index in range(self.seq_length):
            egomotion_mat_i_1 = project.get_transform_mat(
                self.egomotion, source_index, 1)
            warped_image_i_1, _ = (
                project.inverse_warp(
                    self.image_stack[
                        :, :, :, source_index*3:(source_index+1)*3],
                    self.depth_upsampled[1][s],
                    egomotion_mat_i_1,
                    self.intrinsic_mat[:, 0, :, :],
                    self.intrinsic_mat_inv[:, 0, :, :]))

            self.warped_seq[s].append(warped_image_i_1)
            self.egomotions_seq[s].append(egomotion_mat_i_1)

          # Second, for every object in the segmentation mask, take its mask and
          # warp it according to the egomotion estimate. Then put a threshold to
          # binarize the warped result. Use this mask to mask out background and
          # other objects, and pass the filtered image to the object motion
          # network.
          self.object_transforms[s] = []
          self.object_masks[s] = []
          self.object_ids[s] = []
          self.object_masks_warped[s] = []
          self.inputs_objectmotion_net[s] = {}

          for i in range(self.batch_size):
            seg_sequence = self.seg_stack[i]  # (H, W, 9=3*3)
            object_ids = tf.unique(tf.reshape(seg_sequence, [-1]))[0]
            self.object_ids[s].append(object_ids)
            color_stack = []
            mask_stack = []
            mask_stack_warped = []
            for j in range(self.seq_length):
              current_image = self.warped_seq[s][j][i]  # (H, W, 3)
              current_seg = seg_sequence[:, :, j * 3:(j+1) * 3]  # (H, W, 3)

              def process_obj_mask_warp(obj_id):
                """Performs warping of the individual object masks."""
                obj_mask = tf.to_float(tf.equal(current_seg, obj_id))
                # Warp obj_mask according to overall egomotion.
                obj_mask_warped, _ = (
                    project.inverse_warp(
                        tf.expand_dims(obj_mask, axis=0),
                        # Middle frame, highest scale, batch element i:
                        tf.expand_dims(self.depth_upsampled[1][s][i], axis=0),
                        # Matrix for warping j into middle frame, batch elem. i:
                        tf.expand_dims(self.egomotions_seq[s][j][i], axis=0),
                        tf.expand_dims(self.intrinsic_mat[i, 0, :, :], axis=0),
                        tf.expand_dims(self.intrinsic_mat_inv[i, 0, :, :],
                                       axis=0)))
                obj_mask_warped = tf.squeeze(obj_mask_warped)
                obj_mask_binarized = tf.greater(  # Threshold to binarize mask.
                    obj_mask_warped, tf.constant(0.5))
                return tf.to_float(obj_mask_binarized)

              def process_obj_mask(obj_id):
                """Returns the individual object masks separately."""
                return tf.to_float(tf.equal(current_seg, obj_id))
              object_masks = tf.map_fn(  # (N, H, W, 3)
                  process_obj_mask, object_ids, dtype=tf.float32)

              if self.size_constraint_weight > 0:
                # The object segmentation masks are all in object_masks.
                # We need to measure the height of every of them, and get the
                # approximate distance.

                # self.depth_upsampled of shape (seq_length, scale, B, H, W).
                depth_pred = self.depth_upsampled[j][s][i]  # (H, W)
                def get_losses(obj_mask):
                  """Get motion constraint loss."""
                  # Find height of segment.
                  coords = tf.where(tf.greater(  # Shape (num_true, 2=yx)
                      obj_mask[:, :, 0], tf.constant(0.5, dtype=tf.float32)))
                  y_max = tf.reduce_max(coords[:, 0])
                  y_min = tf.reduce_min(coords[:, 0])
                  seg_height = y_max - y_min
                  f_y = self.intrinsic_mat[i, 0, 1, 1]
                  approx_depth = ((f_y * self.global_scale_var) /
                                  tf.to_float(seg_height))
                  reference_pred = tf.boolean_mask(
                      depth_pred, tf.greater(
                          tf.reshape(obj_mask[:, :, 0],
                                     (self.img_height, self.img_width, 1)),
                          tf.constant(0.5, dtype=tf.float32)))

                  # Establish loss on approx_depth, a scalar, and
                  # reference_pred, our dense prediction. Normalize both to
                  # prevent degenerative depth shrinking.
                  global_mean_depth_pred = tf.reduce_mean(depth_pred)
                  reference_pred /= global_mean_depth_pred
                  approx_depth /= global_mean_depth_pred
                  spatial_err = tf.abs(reference_pred - approx_depth)
                  mean_spatial_err = tf.reduce_mean(spatial_err)
                  return mean_spatial_err

                losses = tf.map_fn(
                    get_losses, object_masks, dtype=tf.float32)
                self.inf_loss += tf.reduce_mean(losses)
              object_masks_warped = tf.map_fn(  # (N, H, W, 3)
                  process_obj_mask_warp, object_ids, dtype=tf.float32)
              filtered_images = tf.map_fn(
                  lambda mask: current_image * mask, object_masks_warped,
                  dtype=tf.float32)  # (N, H, W, 3)
              color_stack.append(filtered_images)
              mask_stack.append(object_masks)
              mask_stack_warped.append(object_masks_warped)

            # For this batch-element, if there are N moving objects,
            # color_stack, mask_stack and mask_stack_warped contain both
            # seq_length elements of shape (N, H, W, 3).
            # We can now concatenate them on the last axis, creating a tensor of
            # (N, H, W, 3*3 = 9), and, assuming N does not get too large so that
            # we have enough memory, pass them in a single batch to the object
            # motion network.
            mask_stack = tf.concat(mask_stack, axis=3)  # (N, H, W, 9)
            mask_stack_warped = tf.concat(mask_stack_warped, axis=3)
            color_stack = tf.concat(color_stack, axis=3)  # (N, H, W, 9)
            all_transforms = nets.objectmotion_net(
                # We cut the gradient flow here as the object motion gradient
                # should have no saying in how the egomotion network behaves.
                # One could try just stopping the gradient for egomotion, but
                # not for the depth prediction network.
                image_stack=tf.stop_gradient(color_stack),
                disp_bottleneck_stack=None,
                joint_encoder=False,  # Joint encoder not supported.
                seq_length=self.seq_length,
                weight_reg=self.weight_reg)
            # all_transforms of shape (N, 2, 6).
            self.object_transforms[s].append(all_transforms)
            self.object_masks[s].append(mask_stack)
            self.object_masks_warped[s].append(mask_stack_warped)
            self.inputs_objectmotion_net[s][i] = color_stack
            tf.get_variable_scope().reuse_variables()
    else:
      # Don't handle motion, classic model formulation.
      with tf.name_scope('egomotion_prediction'):
        if self.joint_encoder:
          # Re-arrange disp_bottleneck_stack to be of shape
          # [B, h_hid, w_hid, c_hid * seq_length]. Currently, it is a list with
          # seq_length elements, each of dimension [B, h_hid, w_hid, c_hid].
          disp_bottleneck_stack = tf.concat(disp_bottlenecks, axis=3)
        else:
          disp_bottleneck_stack = None
        self.egomotion = nets.egomotion_net(
            image_stack=self.image_stack_norm,
            disp_bottleneck_stack=disp_bottleneck_stack,
            joint_encoder=self.joint_encoder,
            seq_length=self.seq_length,
            weight_reg=self.weight_reg)
Example #32
0
###############################################################################

# YOUR CODE
x = tf.random_normal((10, 10))
out = tf.matrix_determinant(x)
print(sess.run(out))

###############################################################################
# 1g: Create tensor x with value [5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9].
# Return the unique elements in x
# Hint: use tf.unique(). Keep in mind that tf.unique() returns a tuple.
###############################################################################

# YOUR CODE
x = tf.constant([5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9], tf.float32)
out, index = tf.unique(x)
print(sess.run(out))

###############################################################################
# 1h: Create two tensors x and y of shape 300 from any normal distribution,
# as long as they are from the same distribution.
# Use tf.cond() to return:
# - The mean squared error of (x - y) if the average of all elements in (x - y)
#   is negative, or
# - The sum of absolute value of all elements in the tensor (x - y) otherwise.
# Hint: see the Huber loss function in the lecture slides 3.
###############################################################################

# YOUR CODE
x = tf.random_normal((300, ))
y = tf.random_normal((300, ))
Example #33
0
    def process_one(self, collection):
        """Process one episode.

    Args:
      Collection dictionary that contains the following keys:
        support: np.ndarray. Image ID in the support set.
        flag: np.ndarray. Binary flag indicating whether it is labeled (1) or
          unlabeled (0).
        query: np.ndarray. Image ID in the query set.
    """
        s, flag, q = collection['support'], collection['flag'], collection[
            'query']
        del collection['support']
        del collection['query']
        del collection['flag']
        dataset = self.dataset
        nclasses = self.nclasses
        img_s = dataset.get_images(s)
        lbl_s = np.array(collection['support_label'])
        del collection['support_label']
        T = self.maxlen

        # Mask off unlabeled set.
        labeled = flag == 1
        unlabeled = flag == 0
        lbl_s_l = lbl_s[labeled]
        lbl_s_u = lbl_s[unlabeled]

        # Note numpy does not give the desired behavior here.
        # lbl_map, lbl_s_l = np.unique(lbl_s_l, return_inverse=True)
        lbl_map, lbl_s_l = tf.unique(lbl_s_l)

        def query_tf(x):
            x = tf.expand_dims(x, 1)  # [T, 1]
            x_eq = tf.cast(tf.equal(x, lbl_map), tf.float32)  # [T, N]
            x_valid = tf.reduce_sum(x_eq, [1])  # [T]

            # Everything that has not been found -> fixed unknown.
            # This means it's a distractor.
            x = tf.cast(tf.argmax(x_eq, axis=1), tf.float32)
            x = x_valid * x + (1 - x_valid) * nclasses
            x = tf.cast(x, tf.int32)
            return x

        def query_np(x):
            x = np.expand_dims(x, 1)  # [T, 1]
            x_eq = np.equal(x, lbl_map).astype(np.float32)  # [T, N]
            x_valid = np.sum(x_eq, axis=1)  # [T]

            # Everything that has not been found -> fixed unknown.
            # This means it's a distractor.
            x = np.argmax(x_eq, axis=1).astype(np.float32)
            x = x_valid * x + (1 - x_valid) * nclasses
            x = x.astype(np.int32)
            return x

        # Find distractors.
        lbl_s_eq = tf.cast(tf.equal(tf.expand_dims(lbl_s, 1), lbl_map),
                           tf.float32)
        distractor_flag = tf.cast(1.0 - tf.reduce_sum(lbl_s_eq, [1]), tf.int32)

        # Re-indexed labels.
        lbl_s[labeled] = lbl_s_l
        lbl_s[unlabeled] = query_np(lbl_s_u)

        # Label fed into the network.
        lbl_s_masked = np.copy(lbl_s)
        lbl_s_masked[unlabeled] = nclasses

        # We assumed fix unknown.
        # Make the first appearing item to be unknown in groundtruth.
        lbl_s_np = np.copy(lbl_s)
        lbl_s_np2 = np.copy(lbl_s_np)
        lbl_s_np2[unlabeled] = -1
        lbl_s_gt = np.zeros([len(lbl_s_np)], dtype=np.int32)
        cummax = np.maximum.accumulate(lbl_s_np2)
        lbl_s_gt[0] = nclasses
        # Labeled to be trained as target.
        cond = lbl_s_np[1:] > cummax[:-1]
        lbl_s_gt[1:] = np.where(cond, nclasses, lbl_s_np[1:])

        if self.nquery > 0:
            img_q = dataset.get_images(q)
            lbl_q = collection['query_label']
            del collection['query_label']
            lbl_q = query_tf(lbl_q)
        else:
            img_q = None
            lbl_q = None
        epi = {
            'x_s': self.pad_x(img_s, T),
            'y_s': self.pad_y(lbl_s_masked, T),
            'y_gt': self.pad_y(lbl_s_gt, T),
            'y_dis': self.pad_y(distractor_flag, T),
            'y_full': self.pad_y(lbl_s, T),
            'flag_s': self.get_flag(lbl_s, T)
        }
        if self.nquery > 0:
            assert False, 'Not supported'

        # For remaining additional info.
        for k in collection:
            epi[k] = self.pad_y(collection[k], T)

        if self.episode_processor is not None:
            epi = self.episode_processor(epi)
        return epi
Example #34
0
# YOUR CODE
x = tf.random_normal([10, 10], mean=10)
out = tf.matrix_determinant(x)
print("x=", sess.run(x))
print("out=", sess.run(out))

###############################################################################
# 1g: Create tensor x with value [5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9].
# Return the unique elements in x
# Hint: use tf.unique(). Keep in mind that tf.unique() returns a tuple.
###############################################################################

# YOUR CODE
x = tf.constant([5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9])
out = tf.unique(x)

###############################################################################
# 1h: Create two tensors x and y of shape 300 from any normal distribution,
# as long as they are from the same distribution.
# Use tf.cond() to return:
# - The mean squared error of (x - y) if the average of all elements in (x - y)
#   is negative, or
# - The sum of absolute value of all elements in the tensor (x - y) otherwise.
# Hint: see the Huber loss function in the lecture slides 3.
###############################################################################

# YOUR CODE
x = tf.random_normal([300], mean=5, stddev=1)
y = tf.random_normal([300], mean=5, stddev=1)
average = tf.reduce_mean(x - y)
Example #35
0
    def generate_sequence_beam_search(self,
                                      input,
                                      max_words=None,
                                      initial_state=None,
                                      attention_states=None,
                                      beam_size=10,
                                      convert_unk=True,
                                      length_normalization_factor=0.,
                                      input_text=None,
                                      input_text_length=None,
                                      emb=None):
        """
    outgraph beam search, input should be one instance only batch_size=1
    max_words actually not used here... for it is determined outgraph..
    return top (path, score)
    TODO this is hacky, first step attention_state, input , state all size 1,
    then should be attention_state 1, input, state size is beam_size,
    also might be less then beam_size.. if not possible to find beam_size un done
    """
        if emb is None:
            emb = self.emb

        tf.add_to_collection('beam_search_beam_size', tf.constant(beam_size))
        if input_text is not None:
            if FLAGS.decode_copy:
                input_text = tf.squeeze(input_text)
                input_text_length = tf.to_int32(tf.squeeze(input_text_length))
                input_text = input_text[0:input_text_length]
                input_text, _ = tf.unique(input_text)
                input_text_length = tf.shape(input_text)[-1]
                #sort from small to large
                #input_text, _ = -tf.nn.top_k(-input_text, input_text_length)
                #TODO may be need to be input_text_length, so as to do more decode limit out graph like using trie!
                beam_size = tf.minimum(beam_size, input_text_length)
            elif FLAGS.decode_use_alignment:
                input_text = tf.squeeze(input_text)
                input_text_length = tf.to_int32(tf.squeeze(input_text_length))
                input_text = input_text[0:input_text_length]
                input_text_length = tf.shape(input_text)[-1]
                beam_size = tf.minimum(beam_size, input_text_length)
            else:
                if FLAGS.gen_only:
                    input_text = None

        batch_size = melt.get_batch_size(input)
        if attention_states is None:
            cell = self.cell
        else:
            cell = self.prepare_attention(
                attention_states,
                initial_state=initial_state,
                score_as_alignment=self.score_as_alignment)
            initial_state = None
        state = cell.zero_state(batch_size, tf.float32) \
            if initial_state is None else initial_state

        ##--TODO hard.. since need to reuse to share ValueError:
        ##Variable seq2seq/main/decode/memory_layer/kernel already exists, disallowed. Did you mean to set reuse=True in VarScope?
        ##another way to solve is always using tiled_batch attention_states and state, the first step will choose from only first beam
        ##will not all solve the problem since feed data might be less than beam size, so attention states always be 1 is safe
        #cell2 = self.prepare_attention(tf.contrib.seq2seq.tile_batch(attention_states, beam_size), reuse=True)

        first_state = state

        beam_search_step = functools.partial(self.beam_search_step,
                                             beam_size=beam_size)

        #since before hack using generate_sequence_greedy, here can not set scope.reuse_variables
        #NOTICE inorder to use lstm which is in .../rnn/ nameapce here you must also add this scope to use the shared
        with tf.variable_scope(self.scope) as scope:
            inital_attention, initial_state, initial_logprobs, initial_ids = \
                  beam_search_step(input, state, cell, input_text=input_text)

            if attention_states is not None:
                tf.add_to_collection(
                    'beam_search_initial_alignments',
                    tf.get_collection('attention_alignments')[-1])

            scope.reuse_variables()
            # In inference mode, use concatenated states for convenient feeding and
            # fetching.
            state_is_tuple = len(initial_state) == 2

            if state_is_tuple:
                initial_state = tf.concat(initial_state,
                                          1,
                                          name="initial_state")
                state_size = sum(self.cell.state_size)
            else:
                state_size = self.cell.state_size

            #output is used only when use attention
            if attention_states is not None:
                initial_state = tf.concat([initial_state, inital_attention],
                                          1,
                                          name="initial_attention_state")
                state_size += self.cell.output_size

            tf.add_to_collection('beam_search_initial_state', initial_state)
            tf.add_to_collection('beam_search_initial_logprobs',
                                 initial_logprobs)
            tf.add_to_collection('beam_search_initial_ids', initial_ids)

            input_feed = tf.placeholder(
                dtype=tf.int64,
                shape=[None],  # batch_size
                name="input_feed")
            tf.add_to_collection('beam_search_input_feed', input_feed)
            input = tf.nn.embedding_lookup(emb, input_feed)

            # Placeholder for feeding a batch of concatenated states.
            state_feed = tf.placeholder(dtype=tf.float32,
                                        shape=[None, state_size],
                                        name="state_feed")
            tf.add_to_collection('beam_search_state_feed', state_feed)

            if attention_states is not None:
                state, attention = tf.split(state_feed, [
                    state_size - self.cell.output_size, self.cell.output_size
                ],
                                            axis=1)
            else:
                state = state_feed

            if state_is_tuple:
                state = tf.split(state, num_or_size_splits=2, axis=1)

            if attention_states is not None:
                state_ = first_state.clone(cell_state=state,
                                           attention=attention)
            else:
                state_ = state

            #--TODO here is not safe if change attention_wrapper, notice batch size of attention states is 1
            #--but cell input and state is beam_size
            #attention, state, top_logprobs, top_ids = beam_search_step(input, state_, cell2)

            if input_text is not None and not FLAGS.decode_copy:
                input_text = tf.contrib.seq2seq.tile_batch(
                    input_text, melt.get_batch_size(input))

            attention, state, top_logprobs, top_ids = beam_search_step(
                input, state_, cell, input_text=input_text)

            if state_is_tuple:
                # Concatentate the resulting state.
                state = tf.concat(state, 1, name="state")
            if attention_states is not None:
                state = tf.concat([state, attention],
                                  1,
                                  name="attention_state")

            tf.add_to_collection('beam_search_state', state)
            tf.add_to_collection('beam_search_logprobs', top_logprobs)
            tf.add_to_collection('beam_search_ids', top_ids)

            #just same return like return path list, score list
            return tf.no_op(), tf.no_op()
Example #36
0
def merge_boxes_with_multiple_labels(boxes,
                                     classes,
                                     confidences,
                                     num_classes,
                                     quantization_bins=10000):
  """Merges boxes with same coordinates and returns K-hot encoded classes.

  Args:
    boxes: A tf.float32 tensor with shape [N, 4] holding N boxes. Only
      normalized coordinates are allowed.
    classes: A tf.int32 tensor with shape [N] holding class indices.
      The class index starts at 0.
    confidences: A tf.float32 tensor with shape [N] holding class confidences.
    num_classes: total number of classes to use for K-hot encoding.
    quantization_bins: the number of bins used to quantize the box coordinate.

  Returns:
    merged_boxes: A tf.float32 tensor with shape [N', 4] holding boxes,
      where N' <= N.
    class_encodings: A tf.int32 tensor with shape [N', num_classes] holding
      K-hot encodings for the merged boxes.
    confidence_encodings: A tf.float32 tensor with shape [N', num_classes]
      holding encodings of confidences for the merged boxes.
    merged_box_indices: A tf.int32 tensor with shape [N'] holding original
      indices of the boxes.
  """
  boxes_shape = tf.shape(boxes)
  classes_shape = tf.shape(classes)
  confidences_shape = tf.shape(confidences)
  box_class_shape_assert = shape_utils.assert_shape_equal_along_first_dimension(
      boxes_shape, classes_shape)
  box_confidence_shape_assert = (
      shape_utils.assert_shape_equal_along_first_dimension(
          boxes_shape, confidences_shape))
  box_dimension_assert = tf.assert_equal(boxes_shape[1], 4)
  box_normalized_assert = shape_utils.assert_box_normalized(boxes)

  with tf.control_dependencies(
      [box_class_shape_assert, box_confidence_shape_assert,
       box_dimension_assert, box_normalized_assert]):
    quantized_boxes = tf.to_int64(boxes * (quantization_bins - 1))
    ymin, xmin, ymax, xmax = tf.unstack(quantized_boxes, axis=1)
    hashcodes = (
        ymin +
        xmin * quantization_bins +
        ymax * quantization_bins * quantization_bins +
        xmax * quantization_bins * quantization_bins * quantization_bins)
    unique_hashcodes, unique_indices = tf.unique(hashcodes)
    num_boxes = tf.shape(boxes)[0]
    num_unique_boxes = tf.shape(unique_hashcodes)[0]
    merged_box_indices = tf.unsorted_segment_min(
        tf.range(num_boxes), unique_indices, num_unique_boxes)
    merged_boxes = tf.gather(boxes, merged_box_indices)

    def map_box_encodings(i):
      """Produces box K-hot and score encodings for each class index."""
      box_mask = tf.equal(
          unique_indices, i * tf.ones(num_boxes, dtype=tf.int32))
      box_mask = tf.reshape(box_mask, [-1])
      box_indices = tf.boolean_mask(classes, box_mask)
      box_confidences = tf.boolean_mask(confidences, box_mask)
      box_class_encodings = tf.sparse_to_dense(
          box_indices, [num_classes], 1, validate_indices=False)
      box_confidence_encodings = tf.sparse_to_dense(
          box_indices, [num_classes], box_confidences, validate_indices=False)
      return box_class_encodings, box_confidence_encodings

    class_encodings, confidence_encodings = tf.map_fn(
        map_box_encodings,
        tf.range(num_unique_boxes),
        back_prop=False,
        dtype=(tf.int32, tf.float32))

    merged_boxes = tf.reshape(merged_boxes, [-1, 4])
    class_encodings = tf.reshape(class_encodings, [-1, num_classes])
    confidence_encodings = tf.reshape(confidence_encodings, [-1, num_classes])
    merged_box_indices = tf.reshape(merged_box_indices, [-1])
    return (merged_boxes, class_encodings, confidence_encodings,
            merged_box_indices)
Example #37
0
def efron_estimator_tf(time, censoring, prediction):
    n             = tf.shape(time)[0]
    sort_idx      = tf.nn.top_k(time, k=n, sorted=True).indices

    risk          = tf.gather(prediction, sort_idx)
    events        = tf.gather(censoring, sort_idx)
    otimes        = tf.gather(time, sort_idx)

    # Get unique failure times & Exclude zeros
    # NOTE: this assumes that falure times start from > 0 (greater than zero)
    otimes_cens   = otimes * events
    unique_ftimes = tf.boolean_mask(otimes_cens, tf.greater(otimes_cens, 0) )
    unique_ftimes = tf.unique(unique_ftimes).y
    m             = tf.shape(unique_ftimes)[0]

    # Define key variables:
    log_lik       = tf.Variable(0., dtype=tf.float32, trainable=False)

    tie_count     = tf.Variable([], dtype=tf.uint8,   trainable=False)
    tie_risk      = tf.Variable([], dtype=tf.float32, trainable=False)
    tie_hazard    = tf.Variable([], dtype=tf.float32, trainable=False)
    cum_hazard    = tf.Variable([], dtype=tf.float32, trainable=False)

    cum_sum       = tf.cumsum(tf.exp(risk))

    # Prepare for looping:
    i = tf.constant(0, tf.int32)
    def loop_cond(i, *args):
        return i < m

    def loop_1_step(i, tc, tr, th, ch):
        idx_b = tf.logical_and(
            tf.equal(otimes, unique_ftimes[i]),
            tf.equal(events, tf.ones_like(events)) )

        idx_i = tf.cast(
            tf.boolean_mask(
                tf.lin_space(0., tf.cast(n-1,tf.float32), n),
                tf.greater(tf.cast(idx_b, tf.int32),0)
            ), tf.int32 )

        tc = tf.concat([tc, [tf.reduce_sum(tf.cast(idx_b, tf.uint8))]], 0)
        tr = tf.concat([tr, [tf.reduce_sum(tf.gather(risk, idx_i))]], 0)
        th = tf.concat([th, [tf.reduce_sum(tf.gather(tf.exp(risk), idx_i))]], 0)

        idx_i = tf.cast(
            tf.boolean_mask(
                tf.lin_space(0., tf.cast(n-1,tf.float32), n),
                tf.greater(tf.cast(tf.equal(otimes, unique_ftimes[i]), tf.int32),0)
            ), tf.int32 )

        ch = tf.concat([ch, [tf.reduce_max(tf.gather( cum_sum, idx_i))]], 0)
        return i + 1, tc, tr, th, ch

    def loop_2_step(i, tc, tr, th, ch, likelihood):
        l = tf.cast(tc[i], tf.float32)
        J = tf.lin_space(0., l-1, tf.cast(l,tf.int32)) / l
        Dm = ch[i] - J * th[i]
        likelihood = likelihood + tr[i] - tf.reduce_sum(tf.log(Dm))
        return i + 1, tc, tr, th, ch, likelihood

    # Loops:
    _, tie_count, tie_risk, tie_hazard, cum_hazard = loop_1 = tf.while_loop(
        loop_cond, loop_1_step,
        loop_vars = [i, tie_count, tie_risk, tie_hazard, cum_hazard],
        shape_invariants = [i.get_shape(),tf.TensorShape([None]),tf.TensorShape([None]),tf.TensorShape([None]),tf.TensorShape([None])]
    )

    loop_2_out = tf.while_loop(
        loop_cond, loop_2_step,
        loop_vars = [i, tie_count, tie_risk, tie_hazard, cum_hazard, log_lik],
        shape_invariants = [i.get_shape(),tf.TensorShape([None]),tf.TensorShape([None]),tf.TensorShape([None]),tf.TensorShape([None]),log_lik.get_shape()]
    )

    log_lik = loop_2_out[-1]
    return tf.negative(log_lik)
Example #38
0
import tensorflow as tf


sess = tf.InteractiveSession() 
x = tf.constant([[2, 5, 3, -5], 
              [0, 3,-2,  5], 
              [4, 3, 5,  3], 
              [6, 1, 4,  0]]) 
listx = tf.constant([1,2,3,4,5,6,7,8]) 
listy = tf.constant([4,5,8,9]) 
 
print("\nx=\n", x.eval())
print("\nlistx=", listx.eval())
print("\nlisty=", listy.eval())
 
boolx = tf.constant([[True,False], [False,True]]) 
 
print("\ntf.argmin(x, 1).eval() ")# Position of the min value of columns
print(tf.argmin(x, 1).eval() )# Position of the min value of columns

print("\ntf.argmax(x, 1).eval() ")# Position of the max value of rows 
print(tf.argmax(x, 1).eval() )# Position of the max value of rows 

print("\ntf.setdiff1d(listx, listy)[0].eval() ")# List differences 
print(tf.setdiff1d(listx, listy)[0].eval() )# List differences 

print(tf.where(boolx).eval() )# Show true values  

print(tf.unique(listx)[0].eval() )# Unique values in list 
Example #39
0
    def prepare_serialized_examples(self,
                                    serialized_example,
                                    max_quantized_value=2,
                                    min_quantized_value=-2):
        """Parse single serialized SequenceExample from the TFRecords."""

        # Read/parse frame/segment-level labels.
        context_features = {
            "id": tf.FixedLenFeature([], tf.string),
        }
        if self.segment_labels:
            context_features.update({
                # There is no need to read end-time given we always assume the segment
                # has the same size.
                "segment_labels":
                tf.VarLenFeature(tf.int64),
                "segment_start_times":
                tf.VarLenFeature(tf.int64),
                "segment_scores":
                tf.VarLenFeature(tf.float32)
            })
        else:
            context_features.update({"labels": tf.VarLenFeature(tf.int64)})
        sequence_features = {
            feature_name: tf.FixedLenSequenceFeature([], dtype=tf.string)
            for feature_name in self.feature_names
        }
        contexts, features = tf.parse_single_sequence_example(
            serialized_example,
            context_features=context_features,
            sequence_features=sequence_features)

        # loads (potentially) different types of features and concatenates them
        num_features = len(self.feature_names)
        assert num_features > 0, "No feature selected: feature_names is empty!"

        assert len(self.feature_names) == len(self.feature_sizes), (
            "length of feature_names (={}) != length of feature_sizes (={})".
            format(len(self.feature_names), len(self.feature_sizes)))

        num_frames = -1  # the number of frames in the video
        feature_matrices = [None
                            ] * num_features  # an array of different features
        for feature_index in range(num_features):
            feature_matrix, num_frames_in_this_feature = self.get_video_matrix(
                features[self.feature_names[feature_index]],
                self.feature_sizes[feature_index], self.max_frames,
                max_quantized_value, min_quantized_value)
            if num_frames == -1:
                num_frames = num_frames_in_this_feature

            feature_matrices[feature_index] = feature_matrix

        # cap the number of frames at self.max_frames
        num_frames = tf.minimum(num_frames, self.max_frames)

        # concatenate different features
        video_matrix = tf.concat(feature_matrices, 1)

        # Partition frame-level feature matrix to segment-level feature matrix.
        if self.segment_labels:
            start_times = contexts["segment_start_times"].values
            # Here we assume all the segments that started at the same start time has
            # the same segment_size.
            uniq_start_times, seg_idxs = tf.unique(start_times,
                                                   out_idx=tf.dtypes.int64)
            # TODO(zhengxu): Ensure the segment_sizes are all same.
            segment_size = self.segment_size
            # Range gather matrix, e.g., [[0,1,2],[1,2,3]] for segment_size == 3.
            range_mtx = tf.expand_dims(
                uniq_start_times, axis=-1) + tf.expand_dims(
                    tf.range(0, segment_size, dtype=tf.int64), axis=0)
            # Shape: [num_segment, segment_size, feature_dim].
            batch_video_matrix = tf.gather_nd(
                video_matrix, tf.expand_dims(range_mtx, axis=-1))
            num_segment = tf.shape(batch_video_matrix)[0]
            batch_video_ids = tf.reshape(
                tf.tile([contexts["id"]], [num_segment]), (num_segment, ))
            batch_frames = tf.reshape(tf.tile([segment_size], [num_segment]),
                                      (num_segment, ))

            # For segment labels, all labels are not exhausively rated. So we only
            # evaluate the rated labels.

            # Label indices for each segment, shape: [num_segment, 2].
            label_indices = tf.stack(
                [seg_idxs, contexts["segment_labels"].values], axis=-1)
            label_values = contexts["segment_scores"].values
            sparse_labels = tf.sparse.SparseTensor(
                label_indices, label_values, (num_segment, self.num_classes))
            batch_labels = tf.sparse.to_dense(sparse_labels,
                                              validate_indices=False)

            sparse_label_weights = tf.sparse.SparseTensor(
                label_indices, tf.ones_like(label_values, dtype=tf.float32),
                (num_segment, self.num_classes))
            batch_label_weights = tf.sparse.to_dense(sparse_label_weights,
                                                     validate_indices=False)
        else:
            # Process video-level labels.
            label_indices = contexts["labels"].values
            sparse_labels = tf.sparse.SparseTensor(
                tf.expand_dims(label_indices, axis=-1),
                tf.ones_like(contexts["labels"].values, dtype=tf.bool),
                (self.num_classes, ))
            labels = tf.sparse.to_dense(sparse_labels,
                                        default_value=False,
                                        validate_indices=False)
            # convert to batch format.
            batch_video_ids = tf.expand_dims(contexts["id"], 0)
            batch_video_matrix = tf.expand_dims(video_matrix, 0)
            batch_labels = tf.expand_dims(labels, 0)
            batch_frames = tf.expand_dims(num_frames, 0)
            batch_label_weights = None

        output_dict = {
            "video_ids": batch_video_ids,
            "video_matrix": batch_video_matrix,
            "labels": batch_labels,
            "num_frames": batch_frames,
        }
        if batch_label_weights is not None:
            output_dict["label_weights"] = batch_label_weights

        return output_dict
# 1f: Create a random 2-d tensor of size 10 x 10 from any distribution.
# Calculate its determinant.
# Hint: Look at tf.matrix_determinant().
###############################################################################

m = tf.random_normal([10, 10], mean=10, stddev=1)
out = tf.matrix_determinant(m)

###############################################################################
# 1g: Create tensor x with value [5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9].
# Return the unique elements in x
# Hint: use tf.unique(). Keep in mind that tf.unique() returns a tuple.
###############################################################################

x = tf.constant([5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9])
unique_values, indices = tf.unique(x)

###############################################################################
# 1h: Create two tensors x and y of shape 300 from any normal distribution,
# as long as they are from the same distribution.
# Use tf.cond() to return:
# - The mean squared error of (x - y) if the average of all elements in (x - y)
#   is negative, or
# - The sum of absolute value of all elements in the tensor (x - y) otherwise.
# Hint: see the Huber loss function in the lecture slides 3.
###############################################################################

x = tf.random_normal([300], mean=5, stddev=1)
y = tf.random_normal([300], mean=5, stddev=1)
average = tf.reduce_mean(x - y)
def f1(): return tf.reduce_mean(tf.square(x - y))
Example #41
0
def efron_estimator_tf(y_true, y_pred):
    sort_idx = tf.nn.top_k(y_true[:, 1], k=tf.shape(y_pred)[0],
                           sorted=True).indices

    risk = tf.gather(y_pred, sort_idx)
    risk_exp = tf.exp(risk)
    events = tf.gather(y_true[:, 2], sort_idx)
    ftimes = tf.gather(y_true[:, 1], sort_idx)
    ftimes_cens = ftimes * events

    # Get unique failure times & Exclude zeros
    # NOTE: this assumes that falure times start from > 0 (greater than zero)
    unique = tf.unique(ftimes_cens).y
    unique_ftimes = tf.boolean_mask(unique, tf.greater(unique, 0))
    m = tf.shape(unique_ftimes)[0]

    # Define key variables:
    log_lik = tf.Variable(0.,
                          dtype=tf.float32,
                          validate_shape=True,
                          trainable=False)
    E_ti = tf.Variable([],
                       dtype=tf.int32,
                       validate_shape=True,
                       trainable=False)
    risk_phi = tf.Variable([],
                           dtype=tf.float32,
                           validate_shape=True,
                           trainable=False)
    tie_phi = tf.Variable([],
                          dtype=tf.float32,
                          validate_shape=True,
                          trainable=False)
    cum_risk = tf.Variable([],
                           dtype=tf.float32,
                           validate_shape=True,
                           trainable=False)
    cum_sum = tf.cumsum(risk_exp)

    # -----------------------------------------------------------------
    # Prepare for looping:
    # -----------------------------------------------------------------
    i = tf.constant(0, tf.int32)

    def loop_cond(i, *args):
        return i < m

    # Step for loop # 1:
    def loop_1_step(i, E, Rp, Tp, Cr, Cs):
        n = tf.shape(Cs)[0]
        idx_b = tf.logical_and(tf.equal(ftimes, unique_ftimes[i]),
                               tf.equal(events, tf.ones_like(events)))

        idx_i = tf.cast(
            tf.boolean_mask(tf.lin_space(0., tf.cast(n - 1, tf.float32), n),
                            tf.greater(tf.cast(idx_b, tf.int32), 0)), tf.int32)

        E = tf.concat([E, [tf.reduce_sum(tf.cast(idx_b, tf.int32))]], 0)
        Rp = tf.concat([Rp, [tf.reduce_sum(tf.gather(risk, idx_i))]], 0)
        Tp = tf.concat([Tp, [tf.reduce_sum(tf.gather(risk_exp, idx_i))]], 0)

        idx_i = tf.cast(
            tf.boolean_mask(
                tf.lin_space(0., tf.cast(n - 1, tf.float32), n),
                tf.greater(
                    tf.cast(tf.equal(ftimes, unique_ftimes[i]), tf.int32), 0)),
            tf.int32)

        Cr = tf.concat([Cr, [tf.reduce_max(tf.gather(Cs, idx_i))]], 0)
        return i + 1, E, Rp, Tp, Cr, Cs

    # Step for loop # 1:
    def loop_2_step(i, E, Rp, Tp, Cr, likelihood):
        l = E_ti[i]
        J = tf.lin_space(0., tf.cast(l - 1, tf.float32), l) / tf.cast(
            l, tf.float32)
        Dm = Cr[i] - J * Tp[i]
        likelihood = likelihood + Rp[i] - tf.reduce_sum(tf.log(Dm))
        return i + 1, E, Rp, Tp, Cr, likelihood

    # -----------------------------------------------------------------

    # Loop # 1:
    _, E_ti, risk_phi, tie_phi, cum_risk, _ = loop_1 = tf.while_loop(
        loop_cond,
        loop_1_step,
        loop_vars=[i, E_ti, risk_phi, tie_phi, cum_risk, cum_sum],
        shape_invariants=[
            i.get_shape(),
            tf.TensorShape([None]),
            tf.TensorShape([None]),
            tf.TensorShape([None]),
            tf.TensorShape([None]),
            cum_sum.get_shape()
        ])

    # Loop # 2:
    loop_2 = tf.while_loop(
        loop_cond,
        loop_2_step,
        loop_vars=[i, E_ti, risk_phi, tie_phi, cum_risk, log_lik],
        shape_invariants=[
            i.get_shape(),
            tf.TensorShape([None]),
            tf.TensorShape([None]),
            tf.TensorShape([None]),
            tf.TensorShape([None]),
            log_lik.get_shape()
        ])

    log_lik = loop_2[5]
    # TODO: Normalize by the number of EVENTS in the batch,
    # NOT number of samples in the batch FIXIT!!
    log_lik = log_lik / tf.cast(tf.shape(y_pred)[0], tf.float32)
    return tf.negative(log_lik)
Example #42
0
W = tf.Variable(tf.random_uniform([4, 5], -1.0, 1.0))

print(W.get_shape())    # Get the shape of W (4, 5)
print(tf.shape(W))      # Wrong. tf.shape(W) returns an tensor that in runtime returns the shape.
                        # Tensor("Shape:0", shape=(2,), dtype=int32)

W = tf.reshape(W, [10, 2])
print(W.get_shape())    # (10, 2)

W = tf.reshape(W, [-1])
print(W.get_shape())    # (20,)

W = tf.reshape(W, [5, -1])
print(W.get_shape())    # (5, 4)

shape_op = tf.shape(W)

c = tf.constant([1, 2, 3, 1])
y, _ = tf.unique(c)     # y only contains the unique elements.

print(y.get_shape())    # (?,) This is a dynamic shape. Only know in runtime

y_shape = tf.shape(y)   # Define an op to get the dynamic shape.

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    print(sess.run(shape_op))  # [5 4]
    print(sess.run(y_shape))   # [3]
def get(dataset,
        num_frames_per_video,
        crop_size,
        batch_size,
        min_resize_value=None,
        max_resize_value=None,
        resize_factor=None,
        min_scale_factor=1.,
        max_scale_factor=1.,
        scale_factor_step_size=0,
        preprocess_image_and_label=True,
        num_readers=1,
        num_threads=1,
        dataset_split=None,
        is_training=True,
        model_variant=None,
        batch_capacity_factor=32,
        video_frames_are_decoded=False,
        decoder_output_stride=None,
        first_frame_finetuning=False,
        sample_only_first_frame_for_finetuning=False,
        sample_adjacent_and_consistent_query_frames=False,
        remap_labels_to_reference_frame=True,
        generate_prev_frame_mask_by_mask_damaging=False,
        three_frame_dataset=False,
        add_prev_frame_label=True):
  """Gets the dataset split for semantic segmentation.

  This functions gets the dataset split for semantic segmentation. In
  particular, it is a wrapper of (1) dataset_data_provider which returns the raw
  dataset split, (2) input_preprcess which preprocess the raw data, and (3) the
  Tensorflow operation of batching the preprocessed data. Then, the output could
  be directly used by training, evaluation or visualization.

  Args:
    dataset: An instance of slim Dataset.
    num_frames_per_video: The number of frames used per video
    crop_size: Image crop size [height, width].
    batch_size: Batch size.
    min_resize_value: Desired size of the smaller image side.
    max_resize_value: Maximum allowed size of the larger image side.
    resize_factor: Resized dimensions are multiple of factor plus one.
    min_scale_factor: Minimum scale factor value.
    max_scale_factor: Maximum scale factor value.
    scale_factor_step_size: The step size from min scale factor to max scale
      factor. The input is randomly scaled based on the value of
      (min_scale_factor, max_scale_factor, scale_factor_step_size).
    preprocess_image_and_label: Boolean variable specifies if preprocessing of
      image and label will be performed or not.
    num_readers: Number of readers for data provider.
    num_threads: Number of threads for batching data.
    dataset_split: Dataset split.
    is_training: Is training or not.
    model_variant: Model variant (string) for choosing how to mean-subtract the
      images. See feature_extractor.network_map for supported model variants.
    batch_capacity_factor: Batch capacity factor affecting the training queue
      batch capacity.
    video_frames_are_decoded: Boolean, whether the video frames are already
        decoded
    decoder_output_stride: Integer, the stride of the decoder output.
    first_frame_finetuning: Boolean, whether to only sample the first frame
      for fine-tuning.
    sample_only_first_frame_for_finetuning: Boolean, whether to only sample the
      first frame during fine-tuning. This should be False when using lucid or
      wonderland data, but true when fine-tuning on the first frame only.
      Only has an effect if first_frame_finetuning is True.
    sample_adjacent_and_consistent_query_frames: Boolean, if true, the query
      frames (all but the first frame which is the reference frame) will be
      sampled such that they are adjacent video frames and have the same
      crop coordinates and flip augmentation.
    remap_labels_to_reference_frame: Boolean, whether to remap the labels of
      the query frames to match the labels of the (downscaled) reference frame.
      If a query frame contains a label which is not present in the reference,
      it will be mapped to background.
    generate_prev_frame_mask_by_mask_damaging: Boolean, whether to generate
      the masks used as guidance from the previous frame by damaging the
      ground truth mask.
    three_frame_dataset: Boolean, whether the dataset has exactly three frames
      per video of which the first is to be used as reference and the two
      others are consecutive frames to be used as query frames.
    add_prev_frame_label: Boolean, whether to sample one more frame before the
      first query frame to obtain a previous frame label. Only has an effect,
      if sample_adjacent_and_consistent_query_frames is True and
      generate_prev_frame_mask_by_mask_damaging is False.

  Returns:
    A dictionary of batched Tensors for semantic segmentation.

  Raises:
    ValueError: dataset_split is None, or Failed to find labels.
  """
  if dataset_split is None:
    raise ValueError('Unknown dataset split.')
  if model_variant is None:
    tf.logging.warning('Please specify a model_variant. See '
                       'feature_extractor.network_map for supported model '
                       'variants.')

  data_provider = dataset_data_provider.DatasetDataProvider(
      dataset,
      num_readers=num_readers,
      num_epochs=None if is_training else 1,
      shuffle=is_training)
  image, label, object_label, image_name, height, width, video_id = _get_data(
      data_provider, dataset_split, video_frames_are_decoded)

  sampling_is_valid = tf.constant(True)
  if num_frames_per_video is not None:
    total_num_frames = tf.shape(image)[0]
    if first_frame_finetuning or three_frame_dataset:
      if sample_only_first_frame_for_finetuning:
        assert not sample_adjacent_and_consistent_query_frames, (
            'this option does not make sense for sampling only first frame.')
        # Sample the first frame num_frames_per_video times.
        sel_indices = tf.tile(tf.constant(0, dtype=tf.int32)[tf.newaxis],
                              multiples=[num_frames_per_video])
      else:
        if sample_adjacent_and_consistent_query_frames:
          if add_prev_frame_label:
            num_frames_per_video += 1
          # Since this is first frame fine-tuning, we'll for now assume that
          # each sequence has exactly 3 images: the ref frame and 2 adjacent
          # query frames.
          assert num_frames_per_video == 3
          with tf.control_dependencies([tf.assert_equal(total_num_frames, 3)]):
            sel_indices = tf.constant([1, 2], dtype=tf.int32)
        else:
          # Sample num_frames_per_video - 1 query frames which are not the
          # first frame.
          sel_indices = tf.random_shuffle(
              tf.range(1, total_num_frames))[:(num_frames_per_video - 1)]
        # Concat first frame as reference frame to the front.
        sel_indices = tf.concat([tf.constant(0, dtype=tf.int32)[tf.newaxis],
                                 sel_indices], axis=0)
    else:
      if sample_adjacent_and_consistent_query_frames:
        if add_prev_frame_label:
          # Sample one more frame which we can use to provide initial softmax
          # feedback.
          num_frames_per_video += 1
        ref_idx = tf.random_shuffle(tf.range(total_num_frames))[0]
        sampling_is_valid = tf.greater_equal(total_num_frames,
                                             num_frames_per_video)
        def sample_query_start_idx():
          return tf.random_shuffle(
              tf.range(total_num_frames - num_frames_per_video + 1))[0]
        query_start_idx = tf.cond(sampling_is_valid, sample_query_start_idx,
                                  lambda: tf.constant(0, dtype=tf.int32))
        def sample_sel_indices():
          return tf.concat(
              [ref_idx[tf.newaxis],
               tf.range(
                   query_start_idx,
                   query_start_idx + (num_frames_per_video - 1))], axis=0)
        sel_indices = tf.cond(
            sampling_is_valid, sample_sel_indices,
            lambda: tf.zeros((num_frames_per_video,), dtype=tf.int32))
      else:
        # Randomly sample some frames from the video.
        sel_indices = tf.random_shuffle(
            tf.range(total_num_frames))[:num_frames_per_video]
    image = tf.gather(image, sel_indices, axis=0)
  if not video_frames_are_decoded:
    image = decode_image_sequence(image)

  if label is not None:
    if num_frames_per_video is not None:
      label = tf.gather(label, sel_indices, axis=0)
    if not video_frames_are_decoded:
      label = decode_image_sequence(label, image_format='png', channels=1)

    # Sometimes, label is saved as [num_frames_per_video, height, width] or
    # [num_frames_per_video, height, width, 1]. We change it to be
    # [num_frames_per_video, height, width, 1].
    if label.shape.ndims == 3:
      label = tf.expand_dims(label, 3)
    elif label.shape.ndims == 4 and label.shape.dims[3] == 1:
      pass
    else:
      raise ValueError('Input label shape must be '
                       '[num_frames_per_video, height, width],'
                       ' or [num_frames, height, width, 1]. '
                       'Got {}'.format(label.shape.ndims))
    label.set_shape([None, None, None, 1])

  # Add size of first dimension since tf can't figure it out automatically.
  image.set_shape((num_frames_per_video, None, None, None))
  if label is not None:
    label.set_shape((num_frames_per_video, None, None, None))

  preceding_frame_label = None
  if preprocess_image_and_label:
    if num_frames_per_video is None:
      raise ValueError('num_frame_per_video must be specified for preproc.')
    original_images = []
    images = []
    labels = []
    if sample_adjacent_and_consistent_query_frames:
      num_frames_individual_preproc = 1
    else:
      num_frames_individual_preproc = num_frames_per_video
    for frame_idx in range(num_frames_individual_preproc):
      original_image_t, image_t, label_t = (
          input_preprocess.preprocess_image_and_label(
              image[frame_idx],
              label[frame_idx],
              crop_height=crop_size[0] if crop_size is not None else None,
              crop_width=crop_size[1] if crop_size is not None else None,
              min_resize_value=min_resize_value,
              max_resize_value=max_resize_value,
              resize_factor=resize_factor,
              min_scale_factor=min_scale_factor,
              max_scale_factor=max_scale_factor,
              scale_factor_step_size=scale_factor_step_size,
              ignore_label=dataset.ignore_label,
              is_training=is_training,
              model_variant=model_variant))
      original_images.append(original_image_t)
      images.append(image_t)
      labels.append(label_t)
    if sample_adjacent_and_consistent_query_frames:
      imgs_for_preproc = [image[frame_idx] for frame_idx in
                          range(1, num_frames_per_video)]
      labels_for_preproc = [label[frame_idx] for frame_idx in
                            range(1, num_frames_per_video)]
      original_image_rest, image_rest, label_rest = (
          input_preprocess.preprocess_images_and_labels_consistently(
              imgs_for_preproc,
              labels_for_preproc,
              crop_height=crop_size[0] if crop_size is not None else None,
              crop_width=crop_size[1] if crop_size is not None else None,
              min_resize_value=min_resize_value,
              max_resize_value=max_resize_value,
              resize_factor=resize_factor,
              min_scale_factor=min_scale_factor,
              max_scale_factor=max_scale_factor,
              scale_factor_step_size=scale_factor_step_size,
              ignore_label=dataset.ignore_label,
              is_training=is_training,
              model_variant=model_variant))
      original_images.extend(original_image_rest)
      images.extend(image_rest)
      labels.extend(label_rest)
    assert len(original_images) == num_frames_per_video
    assert len(images) == num_frames_per_video
    assert len(labels) == num_frames_per_video

    if remap_labels_to_reference_frame:
      # Remap labels to indices into the labels of the (downscaled) reference
      # frame, or 0, i.e. background, for labels which are not present
      # in the reference.
      reference_labels = labels[0][tf.newaxis]
      h, w = train_utils.resolve_shape(reference_labels)[1:3]
      embedding_height = model.scale_dimension(
          h, 1.0 / decoder_output_stride)
      embedding_width = model.scale_dimension(
          w, 1.0 / decoder_output_stride)
      reference_labels_embedding_size = tf.squeeze(
          tf.image.resize_nearest_neighbor(
              reference_labels, tf.stack([embedding_height, embedding_width]),
              align_corners=True),
          axis=0)
      # Get sorted unique labels in the reference frame.
      labels_in_ref_frame, _ = tf.unique(
          tf.reshape(reference_labels_embedding_size, [-1]))
      labels_in_ref_frame = tf.contrib.framework.sort(labels_in_ref_frame)
      for idx in range(1, len(labels)):
        ref_label_mask = tf.equal(
            labels[idx],
            labels_in_ref_frame[tf.newaxis, tf.newaxis, :])
        remapped = tf.argmax(tf.cast(ref_label_mask, tf.uint8), axis=-1,
                             output_type=tf.int32)
        # Set to 0 if label is not present
        is_in_ref = tf.reduce_any(ref_label_mask, axis=-1)
        remapped *= tf.cast(is_in_ref, tf.int32)
        labels[idx] = remapped[..., tf.newaxis]

    if sample_adjacent_and_consistent_query_frames:
      if first_frame_finetuning and generate_prev_frame_mask_by_mask_damaging:
        preceding_frame_label = mask_damaging.damage_masks(labels[1])
      elif add_prev_frame_label:
        # Discard the image of the additional frame and take the label as
        # initialization for softmax feedback.
        original_images = [original_images[0]] + original_images[2:]
        preceding_frame_label = labels[1]
        images = [images[0]] + images[2:]
        labels = [labels[0]] + labels[2:]
        num_frames_per_video -= 1

    original_image = tf.stack(original_images, axis=0)
    image = tf.stack(images, axis=0)
    label = tf.stack(labels, axis=0)
  else:
    if label is not None:
      # Need to set label shape due to batching.
      label.set_shape([num_frames_per_video,
                       None if crop_size is None else crop_size[0],
                       None if crop_size is None else crop_size[1],
                       1])
    original_image = tf.to_float(tf.zeros_like(label))
    if crop_size is None:
      height = tf.shape(image)[1]
      width = tf.shape(image)[2]
    else:
      height = crop_size[0]
      width = crop_size[1]

  sample = {'image': image,
            'image_name': image_name,
            'height': height,
            'width': width,
            'video_id': video_id}
  if label is not None:
    sample['label'] = label

  if object_label is not None:
    sample['object_label'] = object_label

  if preceding_frame_label is not None:
    sample['preceding_frame_label'] = preceding_frame_label

  if not is_training:
    # Original image is only used during visualization.
    sample['original_image'] = original_image

  if is_training:
    if first_frame_finetuning:
      keep_input = tf.constant(True)
    else:
      keep_input = tf.logical_and(sampling_is_valid, tf.logical_and(
          _has_enough_pixels_of_each_object_in_first_frame(
              label, decoder_output_stride),
          _has_foreground_and_background_in_first_frame_2(
              label, decoder_output_stride)))

    batched = tf.train.maybe_batch(sample,
                                   keep_input=keep_input,
                                   batch_size=batch_size,
                                   num_threads=num_threads,
                                   capacity=batch_capacity_factor * batch_size,
                                   dynamic_pad=True)
  else:
    batched = tf.train.batch(sample,
                             batch_size=batch_size,
                             num_threads=num_threads,
                             capacity=batch_capacity_factor * batch_size,
                             dynamic_pad=True)

  # Flatten from [batch, num_frames_per_video, ...] to
  # batch * num_frames_per_video, ...].
  cropped_height = train_utils.resolve_shape(batched['image'])[2]
  cropped_width = train_utils.resolve_shape(batched['image'])[3]
  if num_frames_per_video is None:
    first_dim = -1
  else:
    first_dim = batch_size * num_frames_per_video
  batched['image'] = tf.reshape(batched['image'],
                                [first_dim, cropped_height, cropped_width, 3])
  if label is not None:
    batched['label'] = tf.reshape(batched['label'],
                                  [first_dim, cropped_height, cropped_width, 1])
  return batched
Example #44
0
def refine_detections_graph(rois, probs, deltas, window, config):
    """细化分类建议并过滤重叠部分并返回最终结果探测。
    Inputs:
        rois: [N, (y1, x1, y2, x2)] in normalized coordinates
        probs: [N, num_classes]. Class probabilities.
        deltas: [N, num_classes, (dy, dx, log(dh), log(dw))]. Class-specific
                bounding box deltas.
        window: (y1, x1, y2, x2) in normalized coordinates. The part of the image
            that contains the image excluding the padding.

    Returns detections shaped: [num_detections, (y1, x1, y2, x2, class_id, score)] where
        coordinates are normalized.
    """
    # 找到得分最高的类
    class_ids = tf.argmax(probs, axis=1, output_type=tf.int32)
    # 堆叠 序号+类
    indices = tf.stack([tf.range(probs.shape[0]), class_ids], axis=1)
    # 取出成绩
    class_scores = tf.gather_nd(probs, indices)
    # 还有框的调整参数
    deltas_specific = tf.gather_nd(deltas, indices)
    # 进行解码,Shape: [boxes, (y1, x1, y2, x2)] in normalized coordinates
    refined_rois = apply_box_deltas_graph(
        rois, deltas_specific * config.BBOX_STD_DEV)   # rois理解为classifier真实框
    # 防止超出0-1
    refined_rois = clip_boxes_graph(refined_rois, window)

    # 去除背景
    keep = tf.where(class_ids > 0)[:, 0]
    # 去除背景和得分小的区域
    if config.DETECTION_MIN_CONFIDENCE: # config.DETECTION_MIN_CONFIDENCE=0.7
        conf_keep = tf.where(class_scores >= config.DETECTION_MIN_CONFIDENCE)[:, 0]
        keep = tf.sets.set_intersection(tf.expand_dims(keep, 0),
                                        tf.expand_dims(conf_keep, 0))
        keep = tf.sparse_tensor_to_dense(keep)[0]

    # 获得除去背景并且得分较高的框还有种类与得分
    # 1. Prepare variables
    pre_nms_class_ids = tf.gather(class_ids, keep)
    pre_nms_scores = tf.gather(class_scores, keep)
    pre_nms_rois = tf.gather(refined_rois,   keep)
    unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0]

    def nms_keep_map(class_id):

        ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0]

        class_keep = tf.image.non_max_suppression(
                tf.gather(pre_nms_rois, ixs),
                tf.gather(pre_nms_scores, ixs),
                max_output_size=config.DETECTION_MAX_INSTANCES,
                iou_threshold=config.DETECTION_NMS_THRESHOLD)

        class_keep = tf.gather(keep, tf.gather(ixs, class_keep))

        gap = config.DETECTION_MAX_INSTANCES - tf.shape(class_keep)[0]
        class_keep = tf.pad(class_keep, [(0, gap)],
                            mode='CONSTANT', constant_values=-1)

        class_keep.set_shape([config.DETECTION_MAX_INSTANCES])
        return class_keep

    # 2. 进行非极大抑制
    nms_keep = tf.map_fn(nms_keep_map, unique_pre_nms_class_ids,
                         dtype=tf.int64)
    # 3. 找到符合要求的需要被保留的建议框
    nms_keep = tf.reshape(nms_keep, [-1])
    nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0])
    # 4. Compute intersection between keep and nms_keep
    keep = tf.sets.set_intersection(tf.expand_dims(keep, 0),
                                    tf.expand_dims(nms_keep, 0))
    keep = tf.sparse_tensor_to_dense(keep)[0]

    # 寻找得分最高的num_keep个框
    roi_count = config.DETECTION_MAX_INSTANCES
    class_scores_keep = tf.gather(class_scores, keep)
    num_keep = tf.minimum(tf.shape(class_scores_keep)[0], roi_count)
    top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1]
    keep = tf.gather(keep, top_ids)

    # 堆叠调整后的框 Arrange output as [N, (y1, x1, y2, x2, class_id, score)]
    detections = tf.concat([
        tf.gather(refined_rois, keep),
        tf.to_float(tf.gather(class_ids, keep))[..., tf.newaxis],
        tf.gather(class_scores, keep)[..., tf.newaxis]
        ], axis=1)

    # 如果达不到数量的话就padding
    gap = config.DETECTION_MAX_INSTANCES - tf.shape(detections)[0]
    detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT")
    return detections
Example #45
0
import tensorflow as tf 
sess = tf.InteractiveSession()
x = tf.constant([[2, 5, 3, -5], 
                 [0, 3,-2,  5], 
                 [4, 3, 5,  3], 
                 [6, 1, 4,  0]]) 
listx = tf.constant([1,2,3,4,5,6,7,8])
listy = tf.constant([4,5,8,9])

boolx = tf.constant([[True,False], [False,True]])

tf.argmin(x, 1).eval() # Position of the maximum value of columns
tf.argmax(x, 1).eval() # Position of the minimum value of rows
tf.listdiff(listx, listy)[0].eval() # List differences
tf.where(boolx).eval() # Show true values
tf.unique(listx)[0].eval() # Unique values in list
Example #46
0
print('관계연산----------')
print(sess.run(tf.equal(1, 2)))
print(sess.run(tf.not_equal(1, 2)))
print(sess.run(tf.less(1, 2)))
print(sess.run(tf.greater(1, 2)))
print(sess.run(tf.greater_equal(1, 2)))

print('논리연산----------')
print(sess.run(tf.logical_and(True, False)))
print(sess.run(tf.logical_or(True, False)))
print(sess.run(tf.logical_xor(True, False)))
print(sess.run(tf.logical_not(True)))

print('합집합----------')
kbs = tf.constant([1,2,2,2,3])
val, idx = tf.unique(kbs)
print(sess.run(val))
print(sess.run(idx))

# tf.reduce~  : 연산 후 차원축소가 이루어짐
ar = [[1.,2.],[3.,4.]]
print(tf.reduce_sum(ar).eval(session=tf.Session()))
print(tf.reduce_mean(ar, axis=0).eval(session=tf.Session())) # 열방향
print(tf.reduce_mean(ar, axis=1).eval(session=tf.Session())) # 행방향

print()
#차원변경
import numpy as np
t = np.array([[[0, 1, 2], [3, 4, 5]], [[6, 7, 8], [9, 10, 11]]])
print(t.shape)  # (2, 2, 3)
def instance_embedding_npair_loss(embedding,
                                  instance_labels,
                                  crop_min_height,
                                  crop_area,
                                  similarity_strategy='dotproduct',
                                  loss_strategy='softmax'):
    """n-pair loss for a cropped box inside the embedding.

  It uses npair_loss (above) to compute the embedding loss given the
  ground-truth instance_labels. instance_labels contains the ground-truth
  labels. The loss is computed as follows: We compute the dot product between
  the embedding vector of each pixel and every other pixel. If we have N pixels,
  this will give us a [N, N] matrix. In this matrix, we compute the
  softmax (or sigmoid) loss for each row, average the losses and return as
  output. In order to perform the softmax (sigmoid) loss, we need the one-hot
  ground-truth labels for each row. In the row i, the pixels that in the
  same instance as the pixel i, will be set to 1, and other pixels will be set
  to 0. Each row is normalized so the sum of each row is equal to 1.

  Args:
    embedding: A tf.float32 tensor of [height, width, embedding_size].
    instance_labels: A tf.int32 tensor of [height, width]. Assumed values in
      target start from 0 and cover 0 to N-1.
    crop_min_height: Minimum height of the crop window.
    crop_area: Area of the crop window.
    similarity_strategy: Defines the method for computing similarity between
      embedding vectors. Possible values are 'dotproduct' and 'distance'.
    loss_strategy: Defines the type of loss including 'softmax' or 'sigmoid'.

  Returns:
    Total loss value.

  Raises:
    ValueError: If loss strategy or similarity strategy are unknown.
  """
    embedding_shape = tf.shape(embedding)
    embedding_height = embedding_shape[0]
    embedding_width = embedding_shape[1]
    embedding_size = embedding_shape[2]
    crop_height = tf.maximum(crop_area // embedding_width, crop_min_height)
    crop_height = tf.maximum(1, tf.minimum(embedding_height - 1, crop_height))
    crop_width = tf.maximum(
        1, tf.minimum(embedding_width - 1, crop_area // crop_height))
    y_start = tf.random.uniform([],
                                minval=0,
                                maxval=tf.maximum(
                                    1, embedding_height - crop_height),
                                dtype=tf.int32)
    x_start = tf.random.uniform([],
                                minval=0,
                                maxval=tf.maximum(1, embedding_width -
                                                  crop_width),
                                dtype=tf.int32)
    embedding = tf.slice(embedding,
                         begin=tf.stack([y_start, x_start, 0]),
                         size=tf.stack([
                             tf.minimum(crop_height,
                                        embedding_height - y_start),
                             tf.minimum(crop_width, embedding_width - x_start),
                             embedding_size
                         ]))
    embedding = tf.reshape(embedding, [-1, embedding_size])
    instance_labels = tf.slice(instance_labels,
                               begin=tf.stack([y_start, x_start]),
                               size=tf.stack([
                                   tf.minimum(crop_height,
                                              embedding_height - y_start),
                                   tf.minimum(crop_width,
                                              embedding_width - x_start)
                               ]))
    instance_labels = tf.reshape(instance_labels, [-1])
    num_instance_labels = tf.reduce_max(instance_labels) + 1
    valid_mask = tf.greater_equal(instance_labels, 0)
    embedding = tf.boolean_mask(embedding, valid_mask)
    instance_labels = tf.boolean_mask(instance_labels, valid_mask)
    unique_labels, _ = tf.unique(instance_labels)
    instance_labels = tf.one_hot(instance_labels,
                                 num_instance_labels,
                                 dtype=tf.float32)
    instance_labels = tf.transpose(
        tf.gather(tf.transpose(instance_labels), unique_labels))
    return weighted_npair_loss(embedding, instance_labels, similarity_strategy,
                               loss_strategy)
Example #48
0
  def build_inference_for_training(self):
    """Invokes depth and ego-motion networks and computes clouds if needed."""
    (self.image_stack, self.image_stack_norm, self.seg_stack,
     self.intrinsic_mat, self.intrinsic_mat_inv) = self.reader.read_data()
    with tf.variable_scope('depth_prediction'):
      # Organized by ...[i][scale].  Note that the order is flipped in
      # variables in build_loss() below.
      self.disp = {}
      self.depth = {}
      self.depth_upsampled = {}
      self.inf_loss = 0.0
      # Organized by [i].
      disp_bottlenecks = [None] * self.seq_length

      if self.icp_weight > 0:
        self.cloud = {}
      for i in range(self.seq_length):
        image = self.image_stack_norm[:, :, :, 3 * i:3 * (i + 1)]

        multiscale_disps_i, disp_bottlenecks[i] = nets.disp_net(
            self.architecture, image, self.use_skip,
            self.weight_reg, True)
        multiscale_depths_i = [1.0 / d for d in multiscale_disps_i]
        self.disp[i] = multiscale_disps_i
        self.depth[i] = multiscale_depths_i
        if self.depth_upsampling:
          self.depth_upsampled[i] = []
          # Upsample low-resolution depth maps using differentiable bilinear
          # interpolation.
          for s in range(len(multiscale_depths_i)):
            self.depth_upsampled[i].append(tf.image.resize_bilinear(
                multiscale_depths_i[s], [self.img_height, self.img_width],
                align_corners=True))

        if self.icp_weight > 0:
          multiscale_clouds_i = [
              project.get_cloud(d,
                                self.intrinsic_mat_inv[:, s, :, :],
                                name='cloud%d_%d' % (s, i))
              for (s, d) in enumerate(multiscale_depths_i)
          ]
          self.cloud[i] = multiscale_clouds_i
        # Reuse the same depth graph for all images.
        tf.get_variable_scope().reuse_variables()

    if self.handle_motion:
      # Define egomotion network. This network can see the whole scene except
      # for any moving objects as indicated by the provided segmentation masks.
      # To avoid the network getting clues of motion by tracking those masks, we
      # define the segmentation masks as the union temporally.
      with tf.variable_scope('egomotion_prediction'):
        base_input = self.image_stack_norm  # (B, H, W, 9)
        seg_input = self.seg_stack  # (B, H, W, 9)
        ref_zero = tf.constant(0, dtype=tf.uint8)
        # Motion model is currently defined for three-frame sequences.
        object_mask1 = tf.equal(seg_input[:, :, :, 0], ref_zero)
        object_mask2 = tf.equal(seg_input[:, :, :, 3], ref_zero)
        object_mask3 = tf.equal(seg_input[:, :, :, 6], ref_zero)
        mask_complete = tf.expand_dims(tf.logical_and(  # (B, H, W, 1)
            tf.logical_and(object_mask1, object_mask2), object_mask3), axis=3)
        mask_complete = tf.tile(mask_complete, (1, 1, 1, 9))  # (B, H, W, 9)
        # Now mask out base_input.
        self.mask_complete = tf.to_float(mask_complete)
        self.base_input_masked = base_input * self.mask_complete
        self.egomotion = nets.egomotion_net(
            image_stack=self.base_input_masked,
            disp_bottleneck_stack=None,
            joint_encoder=False,
            seq_length=self.seq_length,
            weight_reg=self.weight_reg)

      # Define object motion network for refinement. This network only sees
      # one object at a time over the whole sequence, and tries to estimate its
      # motion. The sequence of images are the respective warped frames.

      # For each scale, contains batch_size elements of shape (N, 2, 6).
      self.object_transforms = {}
      # For each scale, contains batch_size elements of shape (N, H, W, 9).
      self.object_masks = {}
      self.object_masks_warped = {}
      # For each scale, contains batch_size elements of size N.
      self.object_ids = {}

      self.egomotions_seq = {}
      self.warped_seq = {}
      self.inputs_objectmotion_net = {}
      with tf.variable_scope('objectmotion_prediction'):
        # First, warp raw images according to overall egomotion.
        for s in range(NUM_SCALES):
          self.warped_seq[s] = []
          self.egomotions_seq[s] = []
          for source_index in range(self.seq_length):
            egomotion_mat_i_1 = project.get_transform_mat(
                self.egomotion, source_index, 1)
            warped_image_i_1, _ = (
                project.inverse_warp(
                    self.image_stack[
                        :, :, :, source_index*3:(source_index+1)*3],
                    self.depth_upsampled[1][s],
                    egomotion_mat_i_1,
                    self.intrinsic_mat[:, 0, :, :],
                    self.intrinsic_mat_inv[:, 0, :, :]))

            self.warped_seq[s].append(warped_image_i_1)
            self.egomotions_seq[s].append(egomotion_mat_i_1)

          # Second, for every object in the segmentation mask, take its mask and
          # warp it according to the egomotion estimate. Then put a threshold to
          # binarize the warped result. Use this mask to mask out background and
          # other objects, and pass the filtered image to the object motion
          # network.
          self.object_transforms[s] = []
          self.object_masks[s] = []
          self.object_ids[s] = []
          self.object_masks_warped[s] = []
          self.inputs_objectmotion_net[s] = {}

          for i in range(self.batch_size):
            seg_sequence = self.seg_stack[i]  # (H, W, 9=3*3)
            object_ids = tf.unique(tf.reshape(seg_sequence, [-1]))[0]
            self.object_ids[s].append(object_ids)
            color_stack = []
            mask_stack = []
            mask_stack_warped = []
            for j in range(self.seq_length):
              current_image = self.warped_seq[s][j][i]  # (H, W, 3)
              current_seg = seg_sequence[:, :, j * 3:(j+1) * 3]  # (H, W, 3)

              def process_obj_mask_warp(obj_id):
                """Performs warping of the individual object masks."""
                obj_mask = tf.to_float(tf.equal(current_seg, obj_id))
                # Warp obj_mask according to overall egomotion.
                obj_mask_warped, _ = (
                    project.inverse_warp(
                        tf.expand_dims(obj_mask, axis=0),
                        # Middle frame, highest scale, batch element i:
                        tf.expand_dims(self.depth_upsampled[1][s][i], axis=0),
                        # Matrix for warping j into middle frame, batch elem. i:
                        tf.expand_dims(self.egomotions_seq[s][j][i], axis=0),
                        tf.expand_dims(self.intrinsic_mat[i, 0, :, :], axis=0),
                        tf.expand_dims(self.intrinsic_mat_inv[i, 0, :, :],
                                       axis=0)))
                obj_mask_warped = tf.squeeze(obj_mask_warped)
                obj_mask_binarized = tf.greater(  # Threshold to binarize mask.
                    obj_mask_warped, tf.constant(0.5))
                return tf.to_float(obj_mask_binarized)

              def process_obj_mask(obj_id):
                """Returns the individual object masks separately."""
                return tf.to_float(tf.equal(current_seg, obj_id))
              object_masks = tf.map_fn(  # (N, H, W, 3)
                  process_obj_mask, object_ids, dtype=tf.float32)

              if self.size_constraint_weight > 0:
                # The object segmentation masks are all in object_masks.
                # We need to measure the height of every of them, and get the
                # approximate distance.

                # self.depth_upsampled of shape (seq_length, scale, B, H, W).
                depth_pred = self.depth_upsampled[j][s][i]  # (H, W)
                def get_losses(obj_mask):
                  """Get motion constraint loss."""
                  # Find height of segment.
                  coords = tf.where(tf.greater(  # Shape (num_true, 2=yx)
                      obj_mask[:, :, 0], tf.constant(0.5, dtype=tf.float32)))
                  y_max = tf.reduce_max(coords[:, 0])
                  y_min = tf.reduce_min(coords[:, 0])
                  seg_height = y_max - y_min
                  f_y = self.intrinsic_mat[i, 0, 1, 1]
                  approx_depth = ((f_y * self.global_scale_var) /
                                  tf.to_float(seg_height))
                  reference_pred = tf.boolean_mask(
                      depth_pred, tf.greater(
                          tf.reshape(obj_mask[:, :, 0],
                                     (self.img_height, self.img_width, 1)),
                          tf.constant(0.5, dtype=tf.float32)))

                  # Establish loss on approx_depth, a scalar, and
                  # reference_pred, our dense prediction. Normalize both to
                  # prevent degenerative depth shrinking.
                  global_mean_depth_pred = tf.reduce_mean(depth_pred)
                  reference_pred /= global_mean_depth_pred
                  approx_depth /= global_mean_depth_pred
                  spatial_err = tf.abs(reference_pred - approx_depth)
                  mean_spatial_err = tf.reduce_mean(spatial_err)
                  return mean_spatial_err

                losses = tf.map_fn(
                    get_losses, object_masks, dtype=tf.float32)
                self.inf_loss += tf.reduce_mean(losses)
              object_masks_warped = tf.map_fn(  # (N, H, W, 3)
                  process_obj_mask_warp, object_ids, dtype=tf.float32)
              filtered_images = tf.map_fn(
                  lambda mask: current_image * mask, object_masks_warped,
                  dtype=tf.float32)  # (N, H, W, 3)
              color_stack.append(filtered_images)
              mask_stack.append(object_masks)
              mask_stack_warped.append(object_masks_warped)

            # For this batch-element, if there are N moving objects,
            # color_stack, mask_stack and mask_stack_warped contain both
            # seq_length elements of shape (N, H, W, 3).
            # We can now concatenate them on the last axis, creating a tensor of
            # (N, H, W, 3*3 = 9), and, assuming N does not get too large so that
            # we have enough memory, pass them in a single batch to the object
            # motion network.
            mask_stack = tf.concat(mask_stack, axis=3)  # (N, H, W, 9)
            mask_stack_warped = tf.concat(mask_stack_warped, axis=3)
            color_stack = tf.concat(color_stack, axis=3)  # (N, H, W, 9)
            all_transforms = nets.objectmotion_net(
                # We cut the gradient flow here as the object motion gradient
                # should have no saying in how the egomotion network behaves.
                # One could try just stopping the gradient for egomotion, but
                # not for the depth prediction network.
                image_stack=tf.stop_gradient(color_stack),
                disp_bottleneck_stack=None,
                joint_encoder=False,  # Joint encoder not supported.
                seq_length=self.seq_length,
                weight_reg=self.weight_reg)
            # all_transforms of shape (N, 2, 6).
            self.object_transforms[s].append(all_transforms)
            self.object_masks[s].append(mask_stack)
            self.object_masks_warped[s].append(mask_stack_warped)
            self.inputs_objectmotion_net[s][i] = color_stack
            tf.get_variable_scope().reuse_variables()
    else:
      # Don't handle motion, classic model formulation.
      with tf.name_scope('egomotion_prediction'):
        if self.joint_encoder:
          # Re-arrange disp_bottleneck_stack to be of shape
          # [B, h_hid, w_hid, c_hid * seq_length]. Currently, it is a list with
          # seq_length elements, each of dimension [B, h_hid, w_hid, c_hid].
          disp_bottleneck_stack = tf.concat(disp_bottlenecks, axis=3)
        else:
          disp_bottleneck_stack = None
        self.egomotion = nets.egomotion_net(
            image_stack=self.image_stack_norm,
            disp_bottleneck_stack=disp_bottleneck_stack,
            joint_encoder=self.joint_encoder,
            seq_length=self.seq_length,
            weight_reg=self.weight_reg)
Example #49
0
  def _mini_batch_training_op(self, inputs, cluster_idx_list,
                              cluster_centers, cluster_centers_var,
                              total_counts):
    """Creates an op for training for mini batch case.

    Args:
      inputs: list of input Tensors.
      cluster_idx_list: A vector (or list of vectors). Each element in the
        vector corresponds to an input row in 'inp' and specifies the cluster id
        corresponding to the input.
      cluster_centers: Tensor of cluster centers, possibly normalized.
      cluster_centers_var: Tensor Ref of cluster centers.
      total_counts: Tensor Ref of cluster counts.

    Returns:
      An op for doing an update of mini-batch k-means.
    """
    update_ops = []
    for inp, cluster_idx in zip(inputs, cluster_idx_list):
      with ops.colocate_with(inp):
        assert total_counts is not None
        cluster_idx = tf.reshape(cluster_idx, [-1])
        # Dedupe the unique ids of cluster_centers being updated so that updates
        # can be locally aggregated.
        unique_ids, unique_idx = tf.unique(cluster_idx)
        num_unique_cluster_idx = tf.size(unique_ids)
        # Fetch the old values of counts and cluster_centers.
        with ops.colocate_with(total_counts):
          old_counts = tf.gather(total_counts, unique_ids)
        with ops.colocate_with(cluster_centers):
          old_cluster_centers = tf.gather(cluster_centers, unique_ids)
        # Locally aggregate the increment to counts.
        count_updates = tf.unsorted_segment_sum(
            tf.ones_like(unique_idx, dtype=total_counts.dtype),
            unique_idx,
            num_unique_cluster_idx)
        # Locally compute the sum of inputs mapped to each id.
        # For a cluster with old cluster value x, old count n, and with data
        # d_1,...d_k newly assigned to it, we recompute the new value as
        # x += (sum_i(d_i) - k * x) / (n + k).
        # Compute sum_i(d_i), see comment above.
        cluster_center_updates = tf.unsorted_segment_sum(
            inp,
            unique_idx,
            num_unique_cluster_idx)
        # Shape to enable broadcasting count_updates and learning_rate to inp.
        # It extends the shape with 1's to match the rank of inp.
        broadcast_shape = tf.concat(
            0,
            [tf.reshape(num_unique_cluster_idx, [1]),
             tf.ones(tf.reshape(tf.rank(inp) - 1, [1]), dtype=tf.int32)])
        # Subtract k * x, see comment above.
        cluster_center_updates -= tf.cast(
            tf.reshape(count_updates, broadcast_shape),
            inp.dtype) * old_cluster_centers
        learning_rate = tf.inv(tf.cast(old_counts + count_updates, inp.dtype))
        learning_rate = tf.reshape(learning_rate, broadcast_shape)
        # scale by 1 / (n + k), see comment above.
        cluster_center_updates *= learning_rate
        # Apply the updates.
      update_counts = tf.scatter_add(
          total_counts,
          unique_ids,
          count_updates)
      update_cluster_centers = tf.scatter_add(
          cluster_centers_var,
          unique_ids,
          cluster_center_updates)
      update_ops.extend([update_counts, update_cluster_centers])
    return tf.group(*update_ops)
Example #50
0
    def _process_input_helper(self,
                              update_row_factors,
                              sp_input=None,
                              transpose_input=False,
                              row_weights=None):
        """Creates the graph for processing a sparse slice of input.

    Args:
      update_row_factors: if True, update or project the row_factors, else
        update or project the column factors.
      sp_input: Please refer to comments for update_row_factors,
        update_col_factors, project_row_factors, and project_col_factors for
        restrictions.
      transpose_input: If True, the input is logically transposed and then the
        corresponding rows/columns of the transposed input are updated.
      row_weights: If not None, this is the row/column weights to be used for
        the update or projection. If None, use the corresponding weights from
        the model. Note that the feature (column/row) weights will be
        determined by the model. When not None, it can either be a scalar or
        a rank-1 tensor with the same number of elements as the number of rows
        of columns to be updated/projected.

    Returns:
      A tuple consisting of the following two elements:
      new_values: New values for the row/column factors.
      update_op: An op that assigns the newly computed values to the row/column
        factors.
    """
        assert isinstance(sp_input, tf.SparseTensor)

        if update_row_factors:
            left = self._row_factors
            right_factors = self._col_factors_cache
            row_wt = self._row_wt_cache
            col_wt = self._col_wt_cache
            sharding_func = WALSModel._get_sharding_func(
                self._input_rows, self._num_row_shards)
            gramian = self._col_gramian_cache
        else:
            left = self._col_factors
            right_factors = self._row_factors_cache
            row_wt = self._col_wt_cache
            col_wt = self._row_wt_cache
            sharding_func = WALSModel._get_sharding_func(
                self._input_cols, self._num_col_shards)
            gramian = self._row_gramian_cache
            transpose_input = not transpose_input

        # Note that the row indices of sp_input are based on the original full input
        # Here we reindex the rows and give them contiguous ids starting at 0.
        # We use tf.unique to achieve this reindexing. Note that this is done so
        # that the downstream kernel can assume that the input is "dense" along the
        # row dimension.
        row_ids, col_ids = tf.split(1, 2, sp_input.indices)
        update_row_indices, all_row_ids = tf.unique(row_ids[:, 0])
        update_col_indices, all_col_ids = tf.unique(col_ids[:, 0])
        col_ids = tf.expand_dims(tf.cast(all_col_ids, tf.int64), 1)
        row_ids = tf.expand_dims(tf.cast(all_row_ids, tf.int64), 1)

        if transpose_input:
            update_indices = update_col_indices
            row_shape = [tf.cast(tf.shape(update_row_indices)[0], tf.int64)]
            gather_indices = update_row_indices
        else:
            update_indices = update_row_indices
            row_shape = [tf.cast(tf.shape(update_col_indices)[0], tf.int64)]
            gather_indices = update_col_indices

        num_rows = tf.cast(tf.shape(update_indices)[0], tf.int64)
        col_shape = [num_rows]
        right = embedding_ops.embedding_lookup(right_factors,
                                               gather_indices,
                                               partition_strategy='div')
        new_sp_indices = tf.concat(1, [row_ids, col_ids])
        new_sp_shape = (tf.concat(0, [row_shape, col_shape]) if transpose_input
                        else tf.concat(0, [col_shape, row_shape]))
        new_sp_input = tf.SparseTensor(indices=new_sp_indices,
                                       values=sp_input.values,
                                       dense_shape=new_sp_shape)

        # Compute lhs and rhs of the normal equations
        total_lhs = (self._unobserved_weight * gramian)
        if self._regularization is not None:
            total_lhs += self._regularization
        if self._row_weights is None:
            # Special case of ALS. Use a much simpler update rule.
            total_rhs = (self._unobserved_weight *
                         tf.sparse_tensor_dense_matmul(
                             new_sp_input, right, adjoint_a=transpose_input))
            # TODO(rmlarsen): handle transposing in tf.matrix_solve instead of
            # transposing explicitly.
            # TODO(rmlarsen): multi-thread tf.matrix_solve.
            new_left_values = tf.transpose(
                tf.matrix_solve(total_lhs, tf.transpose(total_rhs)))
        else:
            if row_weights is None:
                # TODO(yifanchen): Add special handling for single shard without using
                # embedding_lookup and perform benchmarks for those cases. Same for
                # col_weights lookup below.
                row_weights_slice = embedding_ops.embedding_lookup(
                    row_wt, update_indices, partition_strategy='div')
            else:
                with ops.control_dependencies(
                    [tf.assert_less_equal(tf.rank(row_weights), 1)]):
                    row_weights_slice = tf.cond(
                        tf.equal(tf.rank(row_weights), 0), lambda:
                        (tf.ones([tf.shape(update_indices)[0]]) * row_weights),
                        lambda: tf.cast(row_weights, tf.float32))

            col_weights = embedding_ops.embedding_lookup(
                col_wt, gather_indices, partition_strategy='div')
            partial_lhs, total_rhs = wals_compute_partial_lhs_and_rhs(
                right,
                col_weights,
                self._unobserved_weight,
                row_weights_slice,
                new_sp_input.indices,
                new_sp_input.values,
                num_rows,
                transpose_input,
                name="wals_compute_partial_lhs_rhs")
            total_lhs = tf.expand_dims(total_lhs, 0) + partial_lhs
            total_rhs = tf.expand_dims(total_rhs, -1)
            new_left_values = tf.squeeze(tf.matrix_solve(total_lhs, total_rhs),
                                         [2])

        return (new_left_values,
                self.scatter_update(left, update_indices, new_left_values,
                                    sharding_func))
Example #51
0
 def test_Unique(self):
     t1, t2 = tf.unique([9, 3, 5, 7, 3, 9, 9])
     self.check(t1)
     self.check(t2)
Example #52
0
def beam_search_decoding(input_encoder, model, tokenizer, config, verbose=0):

    input_encoder = tf.expand_dims(input_encoder, axis=0)
    input_decoder = tf.expand_dims(config.START_TOKEN, axis=0)

    k_scores = [0.0]

    for i in range(config.MAX_LENGTH):
        if verbose:
            print('\nStep', i)
            if config.ENCODING == 'subword':
                for k in range(input_decoder.shape[0]):
                    print(
                        tokenizer.decode([
                            j for j in input_decoder.numpy()[k]
                            if j < config.VOCAB_SIZE - 2
                        ]))
            else:
                for k in range(input_decoder.shape[0]):
                    print(
                        tokenizer.sequences_to_texts([[
                            j for j in input_decoder.numpy()[k]
                            if j < config.VOCAB_SIZE - 2
                        ]]))
        predictions = model(inputs=[input_encoder, input_decoder],
                            training=False)
        predictions = predictions[:, -1:, :]
        values, indices = tf.math.top_k(
            tf.math.log(tf.nn.softmax(predictions)), config.BEAM_SIZE)

        sequences = []
        scores = []

        for k in range(input_decoder.shape[0]):
            for b in range(config.BEAM_SIZE):
                sequences.append(
                    tf.concat([input_decoder[k], [indices[k, 0, b]]], axis=0))
                if i >= config.MAX_REP and len(
                        tf.unique(sequences[-1][-config.MAX_REP:])[0]) == 1:
                    scores.append(k_scores[k] - float('inf'))
                else:
                    scores.append(k_scores[k] + values[k, 0, b])

        values, indices = tf.math.top_k(scores, config.BEAM_SIZE)

        k_scores = []
        input_decoder = []
        for k in range(config.BEAM_SIZE):
            k_scores.append(values[k])
            input_decoder.append(sequences[indices[k]])
        input_decoder = tf.stack(input_decoder)

        if input_encoder.shape[0] == 1:
            input_encoder = tf.repeat(input_encoder, config.BEAM_SIZE, axis=0)

        if tf.equal(input_decoder[0, -1], config.END_TOKEN):
            break

    if verbose:
        print()

    if config.ENCODING == 'subword':
        return tokenizer.decode(
            [i for i in input_decoder[0].numpy() if i < config.VOCAB_SIZE - 2])
    else:
        return tokenizer.sequences_to_texts([[
            i for i in input_decoder[0].numpy() if i < config.VOCAB_SIZE - 2
        ]])[0][::2]
Example #53
0
def encode_annos(labels, bboxes, anchors, num_classes):
  """Encode annotations for losses computations.
  All the output tensors have a fix shape(none dynamic dimention).

  Args:
    labels: 1-D with shape `[num_bounding_boxes]`.
    bboxes: 2-D with shape `[num_bounding_boxes, 4]`. Format [ymin, xmin, ymax, xmax]
    anchors: 4-D tensor with shape `[num_anchors, 4]`. Format [cx, cy, w, h]

  Returns:
    input_mask: 2-D with shape `[num_anchors, 1]`, indicate which anchor to be used to cal loss.
    labels_input: 2-D with shape `[num_anchors, num_classes]`, one hot encode for every anchor.
    box_delta_input: 2-D with shape `[num_anchors, 4]`. Format [dcx, dcy, dw, dh]
    box_input: 2-D with shape '[num_anchors, 4]'. Format [ymin, xmin, ymax, xmax]
  """
  with tf.name_scope("Encode_annotations") as scope:
    num_anchors = config.ANCHORS
    # num_bboxes = tf.shape(bboxes)[0]

    # Cal iou, find the target anchor
    with tf.name_scope("Matching") as subscope:
      ious = batch_iou_fast(xywh_to_yxyx(anchors), bboxes)
      anchor_indices = tf.reshape(tf.arg_max(ious, dimension=1), shape=[-1, 1])  # target anchor indices
      # anchor_indices = tf.Print(anchor_indices, [anchor_indices], "anchor_indices", summarize=100)

      # discard duplicate # unique_idx wrong
      anchor_indices, idx, count = tf.unique_with_counts(tf.reshape(anchor_indices, shape=[-1]))
      ori_idx = tf.cumsum(tf.pad(count, [[1, 0]]))[:-1]
      anchor_indices = tf.reshape(anchor_indices, shape=[-1, 1])
      bboxes = tf.gather(bboxes, tf.unique(ori_idx)[0])
      labels = tf.gather(labels, tf.unique(ori_idx)[0])
      ious = tf.gather(ious, tf.unique(ori_idx)[0])
      num_bboxes = tf.shape(anchor_indices)[0]

      # TODO(shizehao):deal with duplicate
      # with tf.name_scope("Deal_with_duplicate"):
      #   dup_anchor_indices, indices_in_a, dup_anchor_indices_with_dup = find_dup(tf.reshape(anchor_indices, shape=[-1]))
      #
      #   # reset duplicated corresponding anchor
      #   conflicted_ious = tf.gather(ious, indices_in_a)
      #   top_k_anchor_indices = tf.nn.top_k(conflicted_ious, k=20).indices  # shape = [num_conflicted_bboxes, 20]
      #   dup_group_idx = tf.where(tf.equal(dup_anchor_indices_with_dup, tf.reshape(dup_anchor_indices, shape=[-1, 1])))
      #   seg_group = tf.unstack(dup_group_idx, axis=1)[0]


      with tf.name_scope("Deal_with_noneoverlap"):
        # find the none-overlap bbox
        bbox_indices = tf.reshape(tf.range(num_bboxes), shape=[-1, 1])
        # bbox_indices = tf.Print(bbox_indices, [bbox_indices], "bbox_indices", summarize=100)

        # anchor_indices = tf.Print(anchor_indices, [anchor_indices], "anchor_indices", summarize=100)
        iou_indices = tf.concat([bbox_indices, tf.cast(anchor_indices, dtype=tf.int32)], axis=1)
        # iou_indices = tf.Print(iou_indices, [iou_indices], "iou_indices", summarize=100)

        target_iou = tf.gather_nd(ious, iou_indices)
        # target_iou = tf.Print(target_iou,[target_iou],"target_iou",summarize=100)

        none_overlap_bbox_indices = tf.where(target_iou <= 0)  # 1-D
        # none_overlap_bbox_indices = tf.Print(none_overlap_bbox_indices, [none_overlap_bbox_indices], "none_overlap_bbox_indices", summarize=100)

        # find it's corresponding anchor
        target_bbox = tf.gather_nd(bboxes, none_overlap_bbox_indices)
        # target_bbox = tf.Print(target_bbox, [target_bbox], "target_bbox", summarize=100)

        closest_anchor_indices = arg_closest_anchor(target_bbox, xywh_to_yxyx(anchors))  # 1-D
        # closest_anchor_indices = tf.Print(closest_anchor_indices, [closest_anchor_indices, tf.gather(anchors, closest_anchor_indices)], "closest_anchor_indices", summarize=100)

      with tf.name_scope("Update_anchor_indices"):
        anchor_indices = tf.reshape(anchor_indices, shape=[-1])
        anchor_indices = update_tensor(anchor_indices, none_overlap_bbox_indices, closest_anchor_indices)
        anchor_indices = tf.reshape(anchor_indices, shape=[-1, 1])


    with tf.name_scope("Delta") as subscope:
      target_anchors = tf.gather_nd(anchors, anchor_indices)
      bboxes = yxyx_to_xywh(bboxes)
      delta = batch_delta(bboxes, target_anchors)



    with tf.name_scope("Scattering") as subscope:
      # bbox
      box_input = tf.scatter_nd(anchor_indices,
                                bboxes,
                                shape=[num_anchors, 4]
                                )

      # label
      labels_input = tf.scatter_nd(anchor_indices,
                                   tf.one_hot(labels, num_classes),
                                   shape=[num_anchors, num_classes]
                                   )

      # delta
      box_delta_input = tf.scatter_nd(anchor_indices,
                                      delta,
                                      shape=[num_anchors, 4]
                                      )





      # anchor mask
      # unique_indices, _ = tf.unique(tf.reshape(anchor_indices, shape=[-1]))
      # unique_indices = tf.Print(unique_indices, [unique_indices], summarize=100)
      # num_bboxes = tf.Print(num_bboxes, [num_bboxes])
      input_mask = tf.scatter_nd(anchor_indices,
                                 tf.ones([num_bboxes]),
                                 shape=[num_anchors])
      input_mask = tf.reshape(input_mask, shape=[-1, 1])

  return input_mask, labels_input, box_delta_input, box_input
Example #54
0
    def _one_image_rpn_train(self, pconf, pbbox_yx, pbbox_hw, abbox_yx,
                             abbox_hw, abbox_y1x1, abbox_y2x2, nground_truth):
        slice_index = tf.argmin(nground_truth, axis=0)[0]
        nground_truth = tf.gather(nground_truth, tf.range(0, slice_index, dtype=tf.int64))
        ngbbox_yx = nground_truth[..., 0:2]
        ngbbox_hw = nground_truth[..., 2:4]
        ngbbox_y1x1 = ngbbox_yx - ngbbox_hw / 2
        ngbbox_y2x2 = ngbbox_yx + ngbbox_hw / 2
        rcnn_label = tf.cast(nground_truth[..., 4:], tf.int32)

        dpbbox_yx = pbbox_yx * abbox_hw + abbox_yx
        dpbbox_hw = abbox_hw * tf.exp(pbbox_hw)
        dpbbox_y1x1 = dpbbox_yx - dpbbox_hw / 2
        dpbbox_y2x2 = dpbbox_yx + dpbbox_hw / 2
        dpbbox_y1x1y2x2 = tf.concat([dpbbox_y1x1, dpbbox_y2x2], axis=-1)
        selected_indices = tf.image.non_max_suppression(
            dpbbox_y1x1y2x2, pconf[:, 0], self.post_nms_proposals, iou_threshold=0.5
        )
        # selected_indices2 = tf.image.non_max_suppression(
        #     dpbbox_y1x1y2x2, pconf[:, 1], self.reserve_proposals//2, iou_threshold=0.5
        # )
        # selected_indices = tf.concat([selected_indices1, selected_indices2], axis=0)
        # selected_indices, _ = tf.unique(selected_indices)
        pconf = tf.gather(pconf, selected_indices)
        pbbox_yx = tf.gather(pbbox_yx, selected_indices)
        pbbox_hw = tf.gather(pbbox_hw, selected_indices)
        abbox_yx = tf.gather(abbox_yx, selected_indices)
        abbox_hw = tf.gather(abbox_hw, selected_indices)
        abbox_y1x1 = tf.gather(abbox_y1x1, selected_indices)
        abbox_y2x2 = tf.gather(abbox_y2x2, selected_indices)
        proposal_yx = tf.gather(dpbbox_yx, selected_indices)
        proposal_hw = tf.gather(dpbbox_hw, selected_indices)

        num_ground_truth = tf.shape(ngbbox_yx)[0]
        num_abbox = tf.shape(abbox_yx)[0]

        ngbbox_y1x1ti = tf.reshape(ngbbox_y1x1, [-1, 1, 2])
        ngbbox_y2x2ti = tf.reshape(ngbbox_y2x2, [-1, 1, 2])
        ngbbox_y1x1ti = tf.tile(ngbbox_y1x1ti, [1, num_abbox, 1])
        ngbbox_y2x2ti = tf.tile(ngbbox_y2x2ti, [1, num_abbox, 1])
        abbox_y1x1ti = tf.reshape(abbox_y1x1, [1, -1, 2])
        abbox_y2x2ti = tf.reshape(abbox_y2x2, [1, -1, 2])
        abbox_y1x1ti = tf.tile(abbox_y1x1ti, [num_ground_truth, 1, 1])
        abbox_y2x2ti = tf.tile(abbox_y2x2ti, [num_ground_truth, 1, 1])

        gaiou_y1x1ti = tf.maximum(ngbbox_y1x1ti, abbox_y1x1ti)
        gaiou_y2x2ti = tf.minimum(ngbbox_y2x2ti, abbox_y2x2ti)
        gaiou_area = tf.reduce_prod(tf.maximum(gaiou_y2x2ti - gaiou_y1x1ti, 0), axis=-1)
        aarea = tf.reduce_prod(abbox_y2x2ti - abbox_y1x1ti, axis=-1)
        garea = tf.reduce_prod(ngbbox_y2x2ti - ngbbox_y1x1ti, axis=-1)
        gaiou_rate = gaiou_area / (aarea + garea - gaiou_area + 1e-7)
        best_raindex = tf.argmax(gaiou_rate, axis=1)

        best_pbbox_yx = tf.gather(pbbox_yx, best_raindex)
        best_pbbox_hw = tf.gather(pbbox_hw, best_raindex)
        best_pconf = tf.gather(pconf, best_raindex)
        best_abbox_yx = tf.gather(abbox_yx, best_raindex)
        best_abbox_hw = tf.gather(abbox_hw, best_raindex)
        best_proposal_yx = tf.gather(proposal_yx, best_raindex)
        best_proposal_hw = tf.gather(proposal_hw, best_raindex)
        best_rcnn_label = rcnn_label

        bestmask, _ = tf.unique(best_raindex)
        bestmask = tf.contrib.framework.sort(bestmask)
        bestmask = tf.reshape(bestmask, [-1, 1])
        bestmask = tf.sparse.SparseTensor(tf.concat([bestmask, tf.zeros_like(bestmask)], axis=-1),
                                          tf.squeeze(tf.ones_like(bestmask)), dense_shape=[num_abbox, 1])
        bestmask = tf.reshape(tf.cast(tf.sparse.to_dense(bestmask), tf.float32), [-1])
        othermask = (1. - bestmask) > 0.

        other_pbbox_yx = tf.boolean_mask(pbbox_yx, othermask)
        other_pbbox_hw = tf.boolean_mask(pbbox_hw, othermask)
        other_pconf = tf.boolean_mask(pconf, othermask)
        other_abbox_yx = tf.boolean_mask(abbox_yx, othermask)
        other_abbox_hw = tf.boolean_mask(abbox_hw, othermask)
        other_proposal_yx = tf.boolean_mask(proposal_yx, othermask)
        other_proposal_hw = tf.boolean_mask(proposal_hw, othermask)

        agiou_rate = tf.transpose(gaiou_rate)
        other_agiou_rate = tf.boolean_mask(agiou_rate, othermask)
        best_agiou_rate = tf.reduce_max(other_agiou_rate, axis=1)
        pos_mask = best_agiou_rate > 0.7
        neg_mask = best_agiou_rate < 0.3
        rgindex = tf.argmax(other_agiou_rate, axis=1)
        pos_rgindex = tf.boolean_mask(rgindex, pos_mask)
        pos_rcnn_label = tf.gather(rcnn_label, pos_rgindex)
        pos_ppox_yx = tf.boolean_mask(other_pbbox_yx, pos_mask)
        pos_ppox_hw = tf.boolean_mask(other_pbbox_hw, pos_mask)
        pos_pconf = tf.boolean_mask(other_pconf, pos_mask)
        pos_abbox_yx = tf.boolean_mask(other_abbox_yx, pos_mask)
        pos_abbox_hw = tf.boolean_mask(other_abbox_hw, pos_mask)
        pos_proposal_yx = tf.boolean_mask(other_proposal_yx, pos_mask)
        pos_proposal_hw = tf.boolean_mask(other_proposal_hw, pos_mask)
        pos_gbbox_yx = tf.gather(ngbbox_yx, pos_rgindex)
        pos_gbbox_hw = tf.gather(ngbbox_hw, pos_rgindex)
        neg_pconf = tf.boolean_mask(other_pconf, neg_mask)
        neg_proposal_yx = tf.boolean_mask(other_proposal_yx, neg_mask)
        neg_proposal_hw = tf.boolean_mask(other_proposal_hw, neg_mask)

        pos_rcnn_label = tf.concat([best_rcnn_label, pos_rcnn_label], axis=0)
        pos_pbbox_yx = tf.concat([best_pbbox_yx, pos_ppox_yx], axis=0)
        pos_pbbox_hw = tf.concat([best_pbbox_hw, pos_ppox_hw], axis=0)
        pos_pconf = tf.concat([best_pconf, pos_pconf], axis=0)
        pos_gbbox_yx = tf.concat([ngbbox_yx, pos_gbbox_yx], axis=0)
        pos_gbbox_hw = tf.concat([ngbbox_hw, pos_gbbox_hw], axis=0)
        pos_abbox_yx = tf.concat([best_abbox_yx, pos_abbox_yx], axis=0)
        pos_abbox_hw = tf.concat([best_abbox_hw, pos_abbox_hw], axis=0)
        pos_proposal_yx = tf.concat([best_proposal_yx, pos_proposal_yx], axis=0)
        pos_proposal_hw = tf.concat([best_proposal_hw, pos_proposal_hw], axis=0)

        num_pos = tf.shape(pos_pconf)[0]
        num_neg = tf.shape(neg_pconf)[0]
        chosen_num_pos = tf.cond(num_pos > 128, lambda: 128, lambda: num_pos)
        chosen_num_neg = tf.cond(num_neg > 256 - chosen_num_pos, lambda: 256 - chosen_num_pos, lambda: num_neg)
        pos_rpn_label = tf.tile(tf.constant([0]), [num_pos])
        neg_rpn_label = tf.tile(tf.constant([1]), [num_neg])
        neg_rcnn_label = tf.tile(tf.constant([self.num_classes - 1]), [num_neg])
        neg_rcnn_label = tf.reshape(neg_rcnn_label, [-1, 1])

        pos_conf_loss = tf.losses.sparse_softmax_cross_entropy(labels=pos_rpn_label, logits=pos_pconf, reduction=tf.losses.Reduction.NONE)
        neg_conf_loss = tf.losses.sparse_softmax_cross_entropy(labels=neg_rpn_label, logits=neg_pconf, reduction=tf.losses.Reduction.NONE)
        chosen_pos_loss, chosen_pos_index = tf.nn.top_k(pos_conf_loss, chosen_num_pos)
        chosen_neg_loss, chosen_neg_index = tf.nn.top_k(neg_conf_loss, chosen_num_neg)
        conf_loss = tf.reduce_mean(tf.concat([chosen_pos_loss, chosen_neg_loss], axis=-1))

        pos_gbbox_yx = tf.gather(pos_gbbox_yx, chosen_pos_index)
        pos_gbbox_hw = tf.gather(pos_gbbox_hw, chosen_pos_index)
        pos_abbox_yx = tf.gather(pos_abbox_yx, chosen_pos_index)
        pos_abbox_hw = tf.gather(pos_abbox_hw, chosen_pos_index)
        pos_pbbox_yx = tf.gather(pos_pbbox_yx, chosen_pos_index)
        pos_pbbox_hw = tf.gather(pos_pbbox_hw, chosen_pos_index)
        pos_proposal_yx = tf.gather(pos_proposal_yx, chosen_pos_index)
        pos_proposal_hw = tf.gather(pos_proposal_hw, chosen_pos_index)
        neg_proposal_yx = tf.gather(neg_proposal_yx, chosen_neg_index)
        neg_proposal_hw = tf.gather(neg_proposal_hw, chosen_neg_index)

        pos_truth_pbbox_yx = (pos_gbbox_yx - pos_abbox_yx) / pos_abbox_hw
        pos_truth_pbbox_hw = tf.log(pos_gbbox_hw / pos_abbox_hw)
        pos_yx_loss = tf.reduce_sum(self._smooth_l1_loss(pos_pbbox_yx - pos_truth_pbbox_yx), axis=-1)
        pos_hw_loss = tf.reduce_sum(self._smooth_l1_loss(pos_pbbox_hw - pos_truth_pbbox_hw), axis=-1)
        pos_coord_loss = tf.reduce_mean(pos_yx_loss + pos_hw_loss)

        total_loss = conf_loss + 10.0 * pos_coord_loss

        proposal_yx = tf.concat([pos_proposal_yx, neg_proposal_yx], axis=0)
        proposal_hw = tf.concat([pos_proposal_hw, neg_proposal_hw], axis=0)
        proposal_y1x1 = proposal_yx - proposal_hw / 2.
        proposal_y2x2 = proposal_yx + proposal_hw / 2.
        rcnn_label = tf.concat([pos_rcnn_label, neg_rcnn_label], axis=0)

        return total_loss, proposal_y1x1, proposal_y2x2, pos_proposal_yx, pos_proposal_hw, pos_gbbox_yx, pos_gbbox_hw, rcnn_label
Example #55
0
    def _build(self, all_anchors, gt_boxes, im_shape):
        """
        We compare anchors to GT and using the minibatch size and the different
        config settings (clobber, foreground fraction, etc), we end up with
        training targets *only* for the elements we want to use in the batch,
        while everything else is ignored.

        Basically what it does is, first generate the targets for all (valid)
        anchors, and then start subsampling the positive (foreground) and the
        negative ones (background) based on the number of samples of each type
        that we want.

        Args:
            all_anchors:
                A Tensor with all the bounding boxes coords of the anchors.
                Its shape should be (num_anchors, 4).
            gt_boxes:
                A Tensor with the ground truth bounding boxes of the image of
                the batch being processed. Its shape should be (num_gt, 5).
                The last dimension is used for the label.
            im_shape:
                Shape of original image (height, width) in order to define
                anchor targers in respect with gt_boxes.

        Returns:
            Tuple of the tensors of:
                labels: (1, 0, -1) for each anchor.
                    Shape (num_anchors, 1)
                bbox_targets: 4d bbox targets as specified by paper.
                    Shape (num_anchors, 4)
                max_overlaps: Max IoU overlap with ground truth boxes.
                    Shape (num_anchors, 1)
        """
        # Keep only the coordinates of gt_boxes
        gt_boxes = gt_boxes[:, :4]
        all_anchors = all_anchors[:, :4]

        # Only keep anchors inside the image
        (x_min_anchor, y_min_anchor,
         x_max_anchor, y_max_anchor) = tf.unstack(all_anchors, axis=1)

        anchor_filter = tf.logical_and(
            tf.logical_and(
                tf.greater_equal(x_min_anchor, -self._allowed_border),
                tf.greater_equal(y_min_anchor, -self._allowed_border)
            ),
            tf.logical_and(
                tf.less(x_max_anchor, im_shape[1] + self._allowed_border),
                tf.less(y_max_anchor, im_shape[0] + self._allowed_border)
            )
        )

        # We (force) reshape the filter so that we can use it as a boolean mask
        anchor_filter = tf.reshape(anchor_filter, [-1])
        # Filter anchors.
        anchors = tf.boolean_mask(
            all_anchors, anchor_filter, name='filter_anchors')

        # Generate array with the labels for all_anchors.
        labels = tf.fill((tf.gather(tf.shape(all_anchors), [0])), -1)
        labels = tf.boolean_mask(labels, anchor_filter, name='filter_labels')

        # Intersection over union (IoU) overlap between the anchors and the
        # ground truth boxes.
        overlaps = bbox_overlap_tf(tf.to_float(anchors), tf.to_float(gt_boxes))

        # Generate array with the IoU value of the closest GT box for each
        # anchor.
        max_overlaps = tf.reduce_max(overlaps, axis=1)
        if not self._clobber_positives:
            # Assign bg labels first so that positive labels can clobber them.
            # First we get an array with True where IoU is less than
            # self._negative_overlap
            negative_overlap_nonzero = tf.less(
                max_overlaps, self._negative_overlap)

            # Finally we set 0 at True indices
            labels = tf.where(
                condition=negative_overlap_nonzero,
                x=tf.zeros(tf.shape(labels)), y=tf.to_float(labels)
            )
        # Get the value of the max IoU for the closest anchor for each gt.
        gt_max_overlaps = tf.reduce_max(overlaps, axis=0)

        # Find all the indices that match (at least one, but could be more).
        gt_argmax_overlaps = tf.squeeze(tf.equal(overlaps, gt_max_overlaps))
        gt_argmax_overlaps = tf.where(gt_argmax_overlaps)[:, 0]
        # Eliminate duplicates indices.
        gt_argmax_overlaps, _ = tf.unique(gt_argmax_overlaps)
        # Order the indices for sparse_to_dense compatibility
        gt_argmax_overlaps, _ = tf.nn.top_k(
            gt_argmax_overlaps, k=tf.shape(gt_argmax_overlaps)[-1])
        gt_argmax_overlaps = tf.reverse(gt_argmax_overlaps, [0])

        # Foreground label: for each ground-truth, anchor with highest overlap.
        # When the argmax is many items we use all of them (for consistency).
        # We set 1 at gt_argmax_overlaps_cond indices
        gt_argmax_overlaps_cond = tf.sparse_to_dense(
            gt_argmax_overlaps, tf.shape(labels, out_type=tf.int64),
            True, default_value=False
        )

        labels = tf.where(
            condition=gt_argmax_overlaps_cond,
            x=tf.ones(tf.shape(labels)), y=tf.to_float(labels)
        )

        # Foreground label: above threshold Intersection over Union (IoU)
        # First we get an array with True where IoU is greater or equal than
        # self._positive_overlap
        positive_overlap_inds = tf.greater_equal(
            max_overlaps, self._positive_overlap)
        # Finally we set 1 at True indices
        labels = tf.where(
            condition=positive_overlap_inds,
            x=tf.ones(tf.shape(labels)), y=labels
        )

        if self._clobber_positives:
            # Assign background labels last so that negative labels can clobber
            # positives. First we get an array with True where IoU is less than
            # self._negative_overlap
            negative_overlap_nonzero = tf.less(
                max_overlaps, self._negative_overlap)
            # Finally we set 0 at True indices
            labels = tf.where(
                condition=negative_overlap_nonzero,
                x=tf.zeros(tf.shape(labels)), y=labels
            )

        # Subsample positive labels if we have too many
        def subsample_positive():
            # Shuffle the foreground indices
            disable_fg_inds = tf.random_shuffle(fg_inds, seed=self._seed)
            # Select the indices that we have to ignore, this is
            # `tf.shape(fg_inds)[0] - num_fg` because we want to get only
            # `num_fg` foreground labels.
            disable_place = (tf.shape(fg_inds)[0] - num_fg)
            disable_fg_inds = disable_fg_inds[:disable_place]
            # Order the indices for sparse_to_dense compatibility
            disable_fg_inds, _ = tf.nn.top_k(
                disable_fg_inds, k=tf.shape(disable_fg_inds)[-1])
            disable_fg_inds = tf.reverse(disable_fg_inds, [0])
            disable_fg_inds = tf.sparse_to_dense(
                disable_fg_inds, tf.shape(labels, out_type=tf.int64),
                True, default_value=False
            )
            # Put -1 to ignore the anchors in the selected indices
            return tf.where(
                condition=tf.squeeze(disable_fg_inds),
                x=tf.to_float(tf.fill(tf.shape(labels), -1)), y=labels
            )

        num_fg = tf.to_int32(self._foreground_fraction * self._minibatch_size)
        # Get foreground indices, get True in the indices where we have a one.
        fg_inds = tf.equal(labels, 1)
        # We get only the indices where we have True.
        fg_inds = tf.squeeze(tf.where(fg_inds), axis=1)
        fg_inds_size = tf.size(fg_inds)
        # Condition for check if we have too many positive labels.
        subsample_positive_cond = fg_inds_size > num_fg
        # Check the condition and subsample positive labels.
        labels = tf.cond(
            subsample_positive_cond,
            true_fn=subsample_positive, false_fn=lambda: labels
        )

        # Subsample negative labels if we have too many
        def subsample_negative():
            # Shuffle the background indices
            disable_bg_inds = tf.random_shuffle(bg_inds, seed=self._seed)

            # Select the indices that we have to ignore, this is
            # `tf.shape(bg_inds)[0] - num_bg` because we want to get only
            # `num_bg` background labels.
            disable_place = (tf.shape(bg_inds)[0] - num_bg)
            disable_bg_inds = disable_bg_inds[:disable_place]
            # Order the indices for sparse_to_dense compatibility
            disable_bg_inds, _ = tf.nn.top_k(
                disable_bg_inds, k=tf.shape(disable_bg_inds)[-1])
            disable_bg_inds = tf.reverse(disable_bg_inds, [0])
            disable_bg_inds = tf.sparse_to_dense(
                disable_bg_inds, tf.shape(labels, out_type=tf.int64),
                True, default_value=False
            )
            # Put -1 to ignore the anchors in the selected indices
            return tf.where(
                condition=tf.squeeze(disable_bg_inds),
                x=tf.to_float(tf.fill(tf.shape(labels), -1)), y=labels
            )

        # Recalculate the foreground indices after (maybe) disable some of them

        # Get foreground indices, get True in the indices where we have a one.
        fg_inds = tf.equal(labels, 1)
        # We get only the indices where we have True.
        fg_inds = tf.squeeze(tf.where(fg_inds), axis=1)
        fg_inds_size = tf.size(fg_inds)

        num_bg = tf.to_int32(self._minibatch_size - fg_inds_size)
        # Get background indices, get True in the indices where we have a zero.
        bg_inds = tf.equal(labels, 0)
        # We get only the indices where we have True.
        bg_inds = tf.squeeze(tf.where(bg_inds), axis=1)
        bg_inds_size = tf.size(bg_inds)
        # Condition for check if we have too many positive labels.
        subsample_negative_cond = bg_inds_size > num_bg
        # Check the condition and subsample positive labels.
        labels = tf.cond(
            subsample_negative_cond,
            true_fn=subsample_negative, false_fn=lambda: labels
        )

        # Return bbox targets with shape (anchors.shape[0], 4).

        # Find the closest gt box for each anchor.
        argmax_overlaps = tf.argmax(overlaps, axis=1)
        # Eliminate duplicates.
        argmax_overlaps_unique, _ = tf.unique(argmax_overlaps)
        # Filter the gt_boxes.
        # We get only the indices where we have "inside anchors".
        anchor_filter_inds = tf.where(anchor_filter)
        gt_boxes = tf.gather(gt_boxes, argmax_overlaps)

        bbox_targets = encode_tf(anchors, gt_boxes)

        # For the anchors that arent foreground, we ignore the bbox_targets.
        anchor_foreground_filter = tf.equal(labels, 1)
        bbox_targets = tf.where(
            condition=anchor_foreground_filter,
            x=bbox_targets, y=tf.zeros_like(bbox_targets)
        )

        # We unroll "inside anchors" value for all anchors (for shape
        # compatibility).

        # We complete the missed indices with zeros
        # (because scatter_nd has zeros as default).
        bbox_targets = tf.scatter_nd(
            indices=tf.to_int32(anchor_filter_inds),
            updates=bbox_targets,
            shape=tf.shape(all_anchors)
        )

        labels_scatter = tf.scatter_nd(
            indices=tf.to_int32(anchor_filter_inds),
            updates=labels,
            shape=[tf.shape(all_anchors)[0]]
        )
        # We have to put -1 to ignore the indices with 0 generated by
        # scatter_nd, otherwise it will be considered as background.
        labels = tf.where(
            condition=anchor_filter, x=labels_scatter,
            y=tf.to_float(tf.fill(tf.shape(labels_scatter), -1))
        )

        max_overlaps = tf.scatter_nd(
            indices=tf.to_int32(anchor_filter_inds),
            updates=max_overlaps,
            shape=[tf.shape(all_anchors)[0]]
        )

        return labels, bbox_targets, max_overlaps
Example #56
0
def learn(
        make_env,
        make_policy,
        *,
        n_episodes,
        horizon,
        delta,
        gamma,
        max_iters,
        sampler=None,
        use_natural_gradient=False,  #can be 'exact', 'approximate'
        fisher_reg=1e-2,
        iw_method='is',
        iw_norm='none',
        bound='J',
        line_search_type='parabola',
        save_weights=False,
        improvement_tol=0.,
        center_return=False,
        render_after=None,
        max_offline_iters=100,
        callback=None,
        clipping=False,
        entropy='none',
        positive_return=False,
        reward_clustering='none'):

    np.set_printoptions(precision=3)
    max_samples = horizon * n_episodes

    if line_search_type == 'binary':
        line_search = line_search_binary
    elif line_search_type == 'parabola':
        line_search = line_search_parabola
    else:
        raise ValueError()

    # Building the environment
    env = make_env()
    ob_space = env.observation_space
    ac_space = env.action_space

    # Building the policy
    pi = make_policy('pi', ob_space, ac_space)
    oldpi = make_policy('oldpi', ob_space, ac_space)

    all_var_list = pi.get_trainable_variables()
    var_list = [
        v for v in all_var_list if v.name.split('/')[1].startswith('pol')
    ]

    shapes = [U.intprod(var.get_shape().as_list()) for var in var_list]
    n_parameters = sum(shapes)

    # Placeholders
    ob_ = ob = U.get_placeholder_cached(name='ob')
    ac_ = pi.pdtype.sample_placeholder([max_samples], name='ac')
    mask_ = tf.placeholder(dtype=tf.float32, shape=(max_samples), name='mask')
    rew_ = tf.placeholder(dtype=tf.float32, shape=(max_samples), name='rew')
    disc_rew_ = tf.placeholder(dtype=tf.float32,
                               shape=(max_samples),
                               name='disc_rew')
    clustered_rew_ = tf.placeholder(dtype=tf.float32, shape=(n_episodes))
    gradient_ = tf.placeholder(dtype=tf.float32,
                               shape=(n_parameters, 1),
                               name='gradient')
    iter_number_ = tf.placeholder(dtype=tf.int32, name='iter_number')
    losses_with_name = []

    # Policy densities
    target_log_pdf = pi.pd.logp(ac_)
    behavioral_log_pdf = oldpi.pd.logp(ac_)
    log_ratio = target_log_pdf - behavioral_log_pdf

    # Split operations
    disc_rew_split = tf.stack(tf.split(disc_rew_ * mask_, n_episodes))
    rew_split = tf.stack(tf.split(rew_ * mask_, n_episodes))
    log_ratio_split = tf.stack(tf.split(log_ratio * mask_, n_episodes))
    target_log_pdf_split = tf.stack(
        tf.split(target_log_pdf * mask_, n_episodes))
    behavioral_log_pdf_split = tf.stack(
        tf.split(behavioral_log_pdf * mask_, n_episodes))
    mask_split = tf.stack(tf.split(mask_, n_episodes))

    # Renyi divergence
    emp_d2_split = tf.stack(
        tf.split(pi.pd.renyi(oldpi.pd, 2) * mask_, n_episodes))
    emp_d2_cum_split = tf.reduce_sum(emp_d2_split, axis=1)
    empirical_d2 = tf.reduce_mean(tf.exp(emp_d2_cum_split))

    # Return
    ep_return = clustered_rew_  #tf.reduce_sum(mask_split * disc_rew_split, axis=1)
    if clipping:
        rew_split = tf.clip_by_value(rew_split, -1, 1)

    if center_return:
        ep_return = ep_return - tf.reduce_mean(ep_return)
        rew_split = rew_split - (tf.reduce_sum(rew_split) /
                                 (tf.reduce_sum(mask_split) + 1e-24))

    discounter = [pow(gamma, i) for i in range(0, horizon)]  # Decreasing gamma
    discounter_tf = tf.constant(discounter)
    disc_rew_split = rew_split * discounter_tf

    #tf.add_to_collection('prints', tf.Print(ep_return, [ep_return], 'ep_return_not_clustered', summarize=20))

    # Reward clustering
    '''
    rew_clustering_options = reward_clustering.split(':')
    if reward_clustering == 'none':
        pass # Do nothing
    elif rew_clustering_options[0] == 'global':
        assert len(rew_clustering_options) == 2, "Reward clustering: Provide the correct number of parameters"
        N = int(rew_clustering_options[1])
        tf.add_to_collection('prints', tf.Print(ep_return, [ep_return], 'ep_return', summarize=20))
        global_rew_min = tf.Variable(float('+inf'), trainable=False)
        global_rew_max = tf.Variable(float('-inf'), trainable=False)
        rew_min = tf.reduce_min(ep_return)
        rew_max = tf.reduce_max(ep_return)
        global_rew_min = tf.assign(global_rew_min, tf.minimum(global_rew_min, rew_min))
        global_rew_max = tf.assign(global_rew_max, tf.maximum(global_rew_max, rew_max))
        interval_size = (global_rew_max - global_rew_min) / N
        ep_return = tf.floordiv(ep_return, interval_size) * interval_size
    elif rew_clustering_options[0] == 'batch':
        assert len(rew_clustering_options) == 2, "Reward clustering: Provide the correct number of parameters"
        N = int(rew_clustering_options[1])
        rew_min = tf.reduce_min(ep_return)
        rew_max = tf.reduce_max(ep_return)
        interval_size = (rew_max - rew_min) / N
        ep_return = tf.floordiv(ep_return, interval_size) * interval_size
    elif rew_clustering_options[0] == 'manual':
        assert len(rew_clustering_options) == 4, "Reward clustering: Provide the correct number of parameters"
        N, rew_min, rew_max = map(int, rew_clustering_options[1:])
        print("N:", N)
        print("Min reward:", rew_min)
        print("Max reward:", rew_max)
        interval_size = (rew_max - rew_min) / N
        print("Interval size:", interval_size)
        # Clip to avoid overflow and cluster
        ep_return = tf.clip_by_value(ep_return, rew_min, rew_max)
        ep_return = tf.cast(tf.floordiv(ep_return, interval_size) * interval_size, tf.float32)
        tf.add_to_collection('prints', tf.Print(ep_return, [ep_return], 'ep_return_clustered', summarize=20))
    else:
        raise Exception('Unrecognized reward clustering scheme.')
    '''

    return_mean = tf.reduce_mean(ep_return)
    return_std = U.reduce_std(ep_return)
    return_max = tf.reduce_max(ep_return)
    return_min = tf.reduce_min(ep_return)
    return_abs_max = tf.reduce_max(tf.abs(ep_return))
    return_step_max = tf.reduce_max(tf.abs(rew_split))  # Max step reward
    return_step_mean = tf.abs(tf.reduce_mean(rew_split))
    positive_step_return_max = tf.maximum(0.0, tf.reduce_max(rew_split))
    negative_step_return_max = tf.maximum(0.0, tf.reduce_max(-rew_split))
    return_step_maxmin = tf.abs(positive_step_return_max -
                                negative_step_return_max)

    losses_with_name.extend([(return_mean, 'InitialReturnMean'),
                             (return_max, 'InitialReturnMax'),
                             (return_min, 'InitialReturnMin'),
                             (return_std, 'InitialReturnStd'),
                             (empirical_d2, 'EmpiricalD2'),
                             (return_step_max, 'ReturnStepMax'),
                             (return_step_maxmin, 'ReturnStepMaxmin')])

    if iw_method == 'pdis':
        # log_ratio_split cumulative sum
        log_ratio_cumsum = tf.cumsum(log_ratio_split, axis=1)
        # Exponentiate
        ratio_cumsum = tf.exp(log_ratio_cumsum)
        # Multiply by the step-wise reward (not episode)
        ratio_reward = ratio_cumsum * disc_rew_split
        # Average on episodes
        ratio_reward_per_episode = tf.reduce_sum(ratio_reward, axis=1)
        w_return_mean = tf.reduce_sum(ratio_reward_per_episode,
                                      axis=0) / n_episodes
        # Get d2(w0:t) with mask
        d2_w_0t = tf.exp(tf.cumsum(emp_d2_split,
                                   axis=1)) * mask_split  # LEAVE THIS OUTSIDE
        # Sum d2(w0:t) over timesteps
        episode_d2_0t = tf.reduce_sum(d2_w_0t, axis=1)
        # Sample variance
        J_sample_variance = (1 / (n_episodes - 1)) * tf.reduce_sum(
            tf.square(ratio_reward_per_episode - w_return_mean))
        losses_with_name.append((J_sample_variance, 'J_sample_variance'))
        losses_with_name.extend([(tf.reduce_max(ratio_cumsum), 'MaxIW'),
                                 (tf.reduce_min(ratio_cumsum), 'MinIW'),
                                 (tf.reduce_mean(ratio_cumsum), 'MeanIW'),
                                 (U.reduce_std(ratio_cumsum), 'StdIW')])
        losses_with_name.extend([(tf.reduce_max(d2_w_0t), 'MaxD2w0t'),
                                 (tf.reduce_min(d2_w_0t), 'MinD2w0t'),
                                 (tf.reduce_mean(d2_w_0t), 'MeanD2w0t'),
                                 (U.reduce_std(d2_w_0t), 'StdD2w0t')])

    elif iw_method == 'is':
        iw = tf.exp(tf.reduce_sum(log_ratio_split, axis=1))
        if iw_norm == 'none':
            iwn = iw / n_episodes
            w_return_mean = tf.reduce_sum(iwn * ep_return)
            J_sample_variance = (1 / (n_episodes - 1)) * tf.reduce_sum(
                tf.square(iw * ep_return - w_return_mean))
            losses_with_name.append((J_sample_variance, 'J_sample_variance'))
        elif iw_norm == 'sn':
            iwn = iw / tf.reduce_sum(iw)
            w_return_mean = tf.reduce_sum(iwn * ep_return)
        elif iw_norm == 'regression':
            iwn = iw / n_episodes
            mean_iw = tf.reduce_mean(iw)
            beta = tf.reduce_sum(
                (iw - mean_iw) * ep_return * iw) / (tf.reduce_sum(
                    (iw - mean_iw)**2) + 1e-24)
            w_return_mean = tf.reduce_mean(iw * ep_return - beta * (iw - 1))
        else:
            raise NotImplementedError()
        ess_classic = tf.linalg.norm(iw, 1)**2 / tf.linalg.norm(iw, 2)**2
        sqrt_ess_classic = tf.linalg.norm(iw, 1) / tf.linalg.norm(iw, 2)
        ess_renyi = n_episodes / empirical_d2
        losses_with_name.extend([(tf.reduce_max(iwn), 'MaxIWNorm'),
                                 (tf.reduce_min(iwn), 'MinIWNorm'),
                                 (tf.reduce_mean(iwn), 'MeanIWNorm'),
                                 (U.reduce_std(iwn), 'StdIWNorm'),
                                 (tf.reduce_max(iw), 'MaxIW'),
                                 (tf.reduce_min(iw), 'MinIW'),
                                 (tf.reduce_mean(iw), 'MeanIW'),
                                 (U.reduce_std(iw), 'StdIW'),
                                 (ess_classic, 'ESSClassic'),
                                 (ess_renyi, 'ESSRenyi')])
    elif iw_method == 'rbis':
        # Get pdfs for episodes
        target_log_pdf_episode = tf.reduce_sum(target_log_pdf_split, axis=1)
        behavioral_log_pdf_episode = tf.reduce_sum(behavioral_log_pdf_split,
                                                   axis=1)
        # Normalize log_proba (avoid as overflows as possible)
        normalization_factor = tf.reduce_mean(
            tf.stack([target_log_pdf_episode, behavioral_log_pdf_episode]))
        target_norm_log_pdf_episode = target_log_pdf_episode - normalization_factor
        behavioral_norm_log_pdf_episode = behavioral_log_pdf_episode - normalization_factor
        # Exponentiate
        target_pdf_episode = tf.clip_by_value(
            tf.cast(tf.exp(target_norm_log_pdf_episode), tf.float64), 1e-300,
            1e+300)
        behavioral_pdf_episode = tf.clip_by_value(
            tf.cast(tf.exp(behavioral_norm_log_pdf_episode), tf.float64),
            1e-300, 1e+300)
        tf.add_to_collection(
            'asserts',
            tf.assert_positive(target_pdf_episode, name='target_pdf_positive'))
        tf.add_to_collection(
            'asserts',
            tf.assert_positive(behavioral_pdf_episode,
                               name='behavioral_pdf_positive'))
        # Compute the merging matrix (reward-clustering) and the number of clusters
        reward_unique, reward_indexes = tf.unique(ep_return)
        episode_clustering_matrix = tf.cast(
            tf.one_hot(reward_indexes, n_episodes), tf.float64)
        max_index = tf.reduce_max(reward_indexes) + 1
        trajectories_per_cluster = tf.reduce_sum(episode_clustering_matrix,
                                                 axis=0)[:max_index]
        tf.add_to_collection(
            'asserts',
            tf.assert_positive(tf.reduce_sum(episode_clustering_matrix,
                                             axis=0)[:max_index],
                               name='clustering_matrix'))
        # Get the clustered pdfs
        clustered_target_pdf = tf.matmul(
            tf.reshape(target_pdf_episode, (1, -1)),
            episode_clustering_matrix)[0][:max_index]
        clustered_behavioral_pdf = tf.matmul(
            tf.reshape(behavioral_pdf_episode, (1, -1)),
            episode_clustering_matrix)[0][:max_index]
        tf.add_to_collection(
            'asserts',
            tf.assert_positive(clustered_target_pdf,
                               name='clust_target_pdf_positive'))
        tf.add_to_collection(
            'asserts',
            tf.assert_positive(clustered_behavioral_pdf,
                               name='clust_behavioral_pdf_positive'))
        # Compute the J
        ratio_clustered = clustered_target_pdf / clustered_behavioral_pdf
        #ratio_reward = tf.cast(ratio_clustered, tf.float32) * reward_unique                                                  # ---- No cluster cardinality
        ratio_reward = tf.cast(ratio_clustered,
                               tf.float32) * reward_unique * tf.cast(
                                   trajectories_per_cluster,
                                   tf.float32)  # ---- Cluster cardinality
        #w_return_mean = tf.reduce_sum(ratio_reward) / tf.cast(max_index, tf.float32)                                         # ---- No cluster cardinality
        w_return_mean = tf.reduce_sum(ratio_reward) / tf.cast(
            n_episodes, tf.float32)  # ---- Cluster cardinality
        # Divergences
        ess_classic = tf.linalg.norm(ratio_reward, 1)**2 / tf.linalg.norm(
            ratio_reward, 2)**2
        sqrt_ess_classic = tf.linalg.norm(ratio_reward, 1) / tf.linalg.norm(
            ratio_reward, 2)
        ess_renyi = n_episodes / empirical_d2
        # Summaries
        losses_with_name.extend([(tf.reduce_max(ratio_clustered), 'MaxIW'),
                                 (tf.reduce_min(ratio_clustered), 'MinIW'),
                                 (tf.reduce_mean(ratio_clustered), 'MeanIW'),
                                 (U.reduce_std(ratio_clustered), 'StdIW'),
                                 (1 - (max_index / n_episodes),
                                  'RewardCompression'),
                                 (ess_classic, 'ESSClassic'),
                                 (ess_renyi, 'ESSRenyi')])
    else:
        raise NotImplementedError()

    if bound == 'J':
        bound_ = w_return_mean
    elif bound == 'std-d2':
        bound_ = w_return_mean - tf.sqrt(
            (1 - delta) / (delta * ess_renyi)) * return_std
    elif bound == 'max-d2':
        var_estimate = tf.sqrt(
            (1 - delta) / (delta * ess_renyi)) * return_abs_max
        bound_ = w_return_mean - tf.sqrt(
            (1 - delta) / (delta * ess_renyi)) * return_abs_max
    elif bound == 'max-ess':
        bound_ = w_return_mean - tf.sqrt(
            (1 - delta) / delta) / sqrt_ess_classic * return_abs_max
    elif bound == 'std-ess':
        bound_ = w_return_mean - tf.sqrt(
            (1 - delta) / delta) / sqrt_ess_classic * return_std
    elif bound == 'pdis-max-d2':
        # Discount factor
        if gamma >= 1:
            discounter = [
                float(1 + 2 * (horizon - t - 1)) for t in range(0, horizon)
            ]
        else:

            def f(t):
                return pow(gamma, 2 * t) + (
                    2 * pow(gamma, t) *
                    (pow(gamma, t + 1) - pow(gamma, horizon))) / (1 - gamma)

            discounter = [f(t) for t in range(0, horizon)]
        discounter_tf = tf.constant(discounter)
        mean_episode_d2 = tf.reduce_sum(
            d2_w_0t, axis=0) / (tf.reduce_sum(mask_split, axis=0) + 1e-24)
        discounted_d2 = mean_episode_d2 * discounter_tf  # Discounted d2
        discounted_total_d2 = tf.reduce_sum(discounted_d2,
                                            axis=0)  # Sum over time
        bound_ = w_return_mean - tf.sqrt(
            (1 - delta) * discounted_total_d2 /
            (delta * n_episodes)) * return_step_max
    elif bound == 'pdis-mean-d2':
        # Discount factor
        if gamma >= 1:
            discounter = [
                float(1 + 2 * (horizon - t - 1)) for t in range(0, horizon)
            ]
        else:

            def f(t):
                return pow(gamma, 2 * t) + (
                    2 * pow(gamma, t) *
                    (pow(gamma, t + 1) - pow(gamma, horizon))) / (1 - gamma)

            discounter = [f(t) for t in range(0, horizon)]
        discounter_tf = tf.constant(discounter)
        mean_episode_d2 = tf.reduce_sum(
            d2_w_0t, axis=0) / (tf.reduce_sum(mask_split, axis=0) + 1e-24)
        discounted_d2 = mean_episode_d2 * discounter_tf  # Discounted d2
        discounted_total_d2 = tf.reduce_sum(discounted_d2,
                                            axis=0)  # Sum over time
        bound_ = w_return_mean - tf.sqrt(
            (1 - delta) * discounted_total_d2 /
            (delta * n_episodes)) * return_step_mean
    else:
        raise NotImplementedError()

    # Policy entropy for exploration
    ent = pi.pd.entropy()
    meanent = tf.reduce_mean(ent)
    losses_with_name.append((meanent, 'MeanEntropy'))
    # Add policy entropy bonus
    if entropy != 'none':
        scheme, v1, v2 = entropy.split(':')
        if scheme == 'step':
            entcoeff = tf.cond(iter_number_ < int(v2), lambda: float(v1),
                               lambda: float(0.0))
            losses_with_name.append((entcoeff, 'EntropyCoefficient'))
            entbonus = entcoeff * meanent
            bound_ = bound_ + entbonus
        elif scheme == 'lin':
            ip = tf.cast(iter_number_ / max_iters, tf.float32)
            entcoeff_decay = tf.maximum(
                0.0,
                float(v2) + (float(v1) - float(v2)) * (1.0 - ip))
            losses_with_name.append((entcoeff_decay, 'EntropyCoefficient'))
            entbonus = entcoeff_decay * meanent
            bound_ = bound_ + entbonus
        elif scheme == 'exp':
            ent_f = tf.exp(
                -tf.abs(tf.reduce_mean(iw) - 1) * float(v2)) * float(v1)
            losses_with_name.append((ent_f, 'EntropyCoefficient'))
            bound_ = bound_ + ent_f * meanent
        else:
            raise Exception('Unrecognized entropy scheme.')

    losses_with_name.append((w_return_mean, 'ReturnMeanIW'))
    losses_with_name.append((bound_, 'Bound'))
    losses, loss_names = map(list, zip(*losses_with_name))

    if use_natural_gradient:
        p = tf.placeholder(dtype=tf.float32, shape=[None])
        target_logpdf_episode = tf.reduce_sum(target_log_pdf_split *
                                              mask_split,
                                              axis=1)
        grad_logprob = U.flatgrad(
            tf.stop_gradient(iwn) * target_logpdf_episode, var_list)
        dot_product = tf.reduce_sum(grad_logprob * p)
        hess_logprob = U.flatgrad(dot_product, var_list)
        compute_linear_operator = U.function([p, ob_, ac_, disc_rew_, mask_],
                                             [-hess_logprob])

    assign_old_eq_new = U.function(
        [], [],
        updates=[
            tf.assign(oldv, newv)
            for (oldv,
                 newv) in zipsame(oldpi.get_variables(), pi.get_variables())
        ])

    assert_ops = tf.group(*tf.get_collection('asserts'))
    print_ops = tf.group(*tf.get_collection('prints'))

    compute_lossandgrad = U.function(
        [ob_, ac_, rew_, disc_rew_, clustered_rew_, mask_, iter_number_],
        losses + [U.flatgrad(bound_, var_list), assert_ops, print_ops])
    compute_grad = U.function(
        [ob_, ac_, rew_, disc_rew_, clustered_rew_, mask_, iter_number_],
        [U.flatgrad(bound_, var_list), assert_ops, print_ops])
    compute_bound = U.function(
        [ob_, ac_, rew_, disc_rew_, clustered_rew_, mask_, iter_number_],
        [bound_, assert_ops, print_ops])
    compute_losses = U.function(
        [ob_, ac_, rew_, disc_rew_, clustered_rew_, mask_, iter_number_],
        losses)
    #compute_temp = U.function([ob_, ac_, rew_, disc_rew_, mask_], [ratio_cumsum, discounted_ratio])

    set_parameter = U.SetFromFlat(var_list)
    get_parameter = U.GetFlat(var_list)

    if sampler is None:
        seg_gen = traj_segment_generator(pi,
                                         env,
                                         n_episodes,
                                         horizon,
                                         stochastic=True)
        sampler = type("SequentialSampler", (object, ), {
            "collect": lambda self, _: seg_gen.__next__()
        })()

    U.initialize()

    # Starting optimizing

    episodes_so_far = 0
    timesteps_so_far = 0
    iters_so_far = 0
    tstart = time.time()
    lenbuffer = deque(maxlen=n_episodes)
    rewbuffer = deque(maxlen=n_episodes)

    while True:

        iters_so_far += 1

        if render_after is not None and iters_so_far % render_after == 0:
            if hasattr(env, 'render'):
                render(env, pi, horizon)

        if callback:
            callback(locals(), globals())

        if iters_so_far >= max_iters:
            print('Finised...')
            break

        logger.log('********** Iteration %i ************' % iters_so_far)

        theta = get_parameter()

        with timed('sampling'):
            seg = sampler.collect(theta)

        add_disc_rew(seg, gamma)

        lens, rets = seg['ep_lens'], seg['ep_rets']
        lenbuffer.extend(lens)
        rewbuffer.extend(rets)
        episodes_so_far += len(lens)
        timesteps_so_far += sum(lens)

        # Get clustered reward
        reward_matrix = np.reshape(seg['disc_rew'] * seg['mask'],
                                   (n_episodes, horizon))
        ep_reward = np.sum(reward_matrix, axis=1)
        if reward_clustering == 'none':
            pass
        elif reward_clustering == 'floor':
            ep_reward = np.floor(ep_reward)
        elif reward_clustering == 'ceil':
            ep_reward = np.ceil(ep_reward)

        args = ob, ac, rew, disc_rew, clustered_rew, mask, iter_number = seg[
            'ob'], seg['ac'], seg['rew'], seg['disc_rew'], ep_reward, seg[
                'mask'], iters_so_far

        assign_old_eq_new()

        def evaluate_loss():
            loss = compute_bound(*args)
            return loss[0]

        def evaluate_gradient():
            gradient = compute_grad(*args)
            return gradient[0]

        if use_natural_gradient:

            def evaluate_fisher_vector_prod(x):
                return compute_linear_operator(x, *args)[0] + fisher_reg * x

            def evaluate_natural_gradient(g):
                return cg(evaluate_fisher_vector_prod,
                          g,
                          cg_iters=10,
                          verbose=0)
        else:
            evaluate_natural_gradient = None

        with timed('summaries before'):
            logger.record_tabular("Iteration", iters_so_far)
            logger.record_tabular("InitialBound", evaluate_loss())
            logger.record_tabular("EpLenMean", np.mean(lenbuffer))
            logger.record_tabular("EpRewMean", np.mean(rewbuffer))
            logger.record_tabular("EpThisIter", len(lens))
            logger.record_tabular("EpisodesSoFar", episodes_so_far)
            logger.record_tabular("TimestepsSoFar", timesteps_so_far)
            logger.record_tabular("TimeElapsed", time.time() - tstart)

        if save_weights:
            logger.record_tabular('Weights', str(get_parameter()))
            import pickle
            file = open('checkpoint.pkl', 'wb')
            pickle.dump(theta, file)

        with timed("offline optimization"):
            theta, improvement = optimize_offline(
                theta,
                set_parameter,
                line_search,
                evaluate_loss,
                evaluate_gradient,
                evaluate_natural_gradient,
                max_offline_ite=max_offline_iters)

        set_parameter(theta)

        with timed('summaries after'):
            meanlosses = np.array(compute_losses(*args))
            for (lossname, lossval) in zip(loss_names, meanlosses):
                logger.record_tabular(lossname, lossval)

        logger.dump_tabular()

    env.close()
Example #57
0
  def _process_input_helper(self, update_row_factors,
                            sp_input=None, transpose_input=False):
    """Creates the graph for processing a sparse slice of input.

    Args:
      update_row_factors: if True, update the row_factors, else update the
        column factors.
      sp_input: Please refer to comments for update_row_factors and
        update_col_factors.
      transpose_input: If true, the input is logically transposed and then the
        corresponding rows/columns of the transposed input are updated.

    Returns:
      A tuple consisting of the following two elements:
      new_values: New values for the row/column factors.
      update_op: An op that assigns the newly computed values to the row/column
        factors.
    """
    assert isinstance(sp_input, tf.SparseTensor)

    if update_row_factors:
      left = self._row_factors
      right_factors = self._col_factors_cache
      row_wt = self._row_wt_cache
      col_wt = self._col_wt_cache
      sharding_func = WALSModel._get_sharding_func(self._input_rows,
                                                   self._num_row_shards)
      gramian = self._col_gramian_cache
    else:
      left = self._col_factors
      right_factors = self._row_factors_cache
      row_wt = self._col_wt_cache
      col_wt = self._row_wt_cache
      sharding_func = WALSModel._get_sharding_func(self._input_cols,
                                                   self._num_col_shards)
      gramian = self._row_gramian_cache
      transpose_input = not transpose_input

    # Note that the row indices of sp_input are based on the original full input
    # Here we reindex the rows and give them contiguous ids starting at 0.
    # We use tf.unique to achieve this reindexing. Note that this is done so
    # that the downstream kernel can assume that the input is "dense" along the
    # row dimension.
    row_ids, col_ids = tf.split(1, 2, sp_input.indices)
    update_row_indices, all_row_ids = tf.unique(row_ids[:, 0])
    update_col_indices, all_col_ids = tf.unique(col_ids[:, 0])
    col_ids = tf.expand_dims(tf.cast(all_col_ids, tf.int64), 1)
    row_ids = tf.expand_dims(tf.cast(all_row_ids, tf.int64), 1)

    if transpose_input:
      update_indices = update_col_indices
      row_shape = [tf.cast(tf.shape(update_row_indices)[0], tf.int64)]
      gather_indices = update_row_indices
    else:
      update_indices = update_row_indices
      row_shape = [tf.cast(tf.shape(update_col_indices)[0], tf.int64)]
      gather_indices = update_col_indices

    num_rows = tf.cast(tf.shape(update_indices)[0], tf.int64)
    col_shape = [num_rows]
    right = embedding_ops.embedding_lookup(right_factors, gather_indices,
                                           partition_strategy='div')
    new_sp_indices = tf.concat(1, [row_ids, col_ids])
    new_sp_shape = (tf.concat(0, [row_shape, col_shape]) if transpose_input
                    else tf.concat(0, [col_shape, row_shape]))
    new_sp_input = tf.SparseTensor(indices=new_sp_indices,
                                   values=sp_input.values, shape=new_sp_shape)

    # Compute lhs and rhs of the normal equations
    total_lhs = (self._unobserved_weight * gramian)
    if self._regularization is not None:
      total_lhs += self._regularization
    if self._row_weights is None:
      # Special case of ALS. Use a much simpler update rule.
      total_rhs = (self._unobserved_weight *
                   tf.sparse_tensor_dense_matmul(new_sp_input, right,
                                                 adjoint_a=transpose_input))
      # TODO(rmlarsen): handle transposing in tf.matrix_solve instead of
      # transposing explicitly.
      # TODO(rmlarsen): multi-thread tf.matrix_solve.
      new_left_values = tf.transpose(tf.matrix_solve(total_lhs,
                                                     tf.transpose(total_rhs)))
    else:
      # TODO(yifanchen): Add special handling for single shard without using
      # embedding_lookup and perform benchmarks for those cases.
      row_weights_slice = embedding_ops.embedding_lookup(
          row_wt, update_indices, partition_strategy='div')
      col_weights = embedding_ops.embedding_lookup(
          col_wt, gather_indices, partition_strategy='div')
      partial_lhs, total_rhs = wals_compute_partial_lhs_and_rhs(
          right,
          col_weights,
          self._unobserved_weight,
          row_weights_slice,
          new_sp_input.indices,
          new_sp_input.values,
          num_rows,
          transpose_input,
          name="wals_compute_partial_lhs_rhs")
      total_lhs = tf.expand_dims(total_lhs, 0) + partial_lhs
      total_rhs = tf.expand_dims(total_rhs, -1)
      new_left_values = tf.squeeze(tf.matrix_solve(total_lhs, total_rhs), [2])

    return (new_left_values,
            self.scatter_update(left,
                                update_indices,
                                new_left_values,
                                sharding_func))
Example #58
0
def merge_boxes_with_multiple_labels(boxes,
                                     classes,
                                     confidences,
                                     num_classes,
                                     quantization_bins=10000):
  """Merges boxes with same coordinates and returns K-hot encoded classes.

  Args:
    boxes: A tf.float32 tensor with shape [N, 4] holding N boxes. Only
      normalized coordinates are allowed.
    classes: A tf.int32 tensor with shape [N] holding class indices.
      The class index starts at 0.
    confidences: A tf.float32 tensor with shape [N] holding class confidences.
    num_classes: total number of classes to use for K-hot encoding.
    quantization_bins: the number of bins used to quantize the box coordinate.

  Returns:
    merged_boxes: A tf.float32 tensor with shape [N', 4] holding boxes,
      where N' <= N.
    class_encodings: A tf.int32 tensor with shape [N', num_classes] holding
      K-hot encodings for the merged boxes.
    confidence_encodings: A tf.float32 tensor with shape [N', num_classes]
      holding encodings of confidences for the merged boxes.
    merged_box_indices: A tf.int32 tensor with shape [N'] holding original
      indices of the boxes.
  """
  boxes_shape = tf.shape(boxes)
  classes_shape = tf.shape(classes)
  confidences_shape = tf.shape(confidences)
  box_class_shape_assert = shape_utils.assert_shape_equal_along_first_dimension(
      boxes_shape, classes_shape)
  box_confidence_shape_assert = (
      shape_utils.assert_shape_equal_along_first_dimension(
          boxes_shape, confidences_shape))
  box_dimension_assert = tf.assert_equal(boxes_shape[1], 4)
  box_normalized_assert = shape_utils.assert_box_normalized(boxes)

  with tf.control_dependencies(
      [box_class_shape_assert, box_confidence_shape_assert,
       box_dimension_assert, box_normalized_assert]):
    quantized_boxes = tf.to_int64(boxes * (quantization_bins - 1))
    ymin, xmin, ymax, xmax = tf.unstack(quantized_boxes, axis=1)
    hashcodes = (
        ymin +
        xmin * quantization_bins +
        ymax * quantization_bins * quantization_bins +
        xmax * quantization_bins * quantization_bins * quantization_bins)
    unique_hashcodes, unique_indices = tf.unique(hashcodes)
    num_boxes = tf.shape(boxes)[0]
    num_unique_boxes = tf.shape(unique_hashcodes)[0]
    merged_box_indices = tf.unsorted_segment_min(
        tf.range(num_boxes), unique_indices, num_unique_boxes)
    merged_boxes = tf.gather(boxes, merged_box_indices)

    def map_box_encodings(i):
      """Produces box K-hot and score encodings for each class index."""
      box_mask = tf.equal(
          unique_indices, i * tf.ones(num_boxes, dtype=tf.int32))
      box_mask = tf.reshape(box_mask, [-1])
      box_indices = tf.boolean_mask(classes, box_mask)
      box_confidences = tf.boolean_mask(confidences, box_mask)
      box_class_encodings = tf.sparse_to_dense(
          box_indices, [num_classes], 1, validate_indices=False)
      box_confidence_encodings = tf.sparse_to_dense(
          box_indices, [num_classes], box_confidences, validate_indices=False)
      return box_class_encodings, box_confidence_encodings

    class_encodings, confidence_encodings = tf.map_fn(
        map_box_encodings,
        tf.range(num_unique_boxes),
        back_prop=False,
        dtype=(tf.int32, tf.float32))

    merged_boxes = tf.reshape(merged_boxes, [-1, 4])
    class_encodings = tf.reshape(class_encodings, [-1, num_classes])
    confidence_encodings = tf.reshape(confidence_encodings, [-1, num_classes])
    merged_box_indices = tf.reshape(merged_box_indices, [-1])
    return (merged_boxes, class_encodings, confidence_encodings,
            merged_box_indices)