def _create_model(self, train_triples): # Count unique items to determine embedding matrix sizes entity_cnt = len(set(train_triples[:,0]).union(train_triples[:,2])) rel_cnt = len(set(train_triples[:,1])) init_sd = 1.0 / np.sqrt(self.embedding_size) # Embedding variables for all entities and relationship types entity_embedding_shape = [entity_cnt, self.embedding_size] # Relationship embeddings will be stored in flattened format to make # applying maxnorm constraints easier rel_embedding_shape = [rel_cnt, self.embedding_size * self.embedding_size] entity_init = tf.truncated_normal(entity_embedding_shape, stddev=init_sd) rel_init = tf.truncated_normal(rel_embedding_shape, stddev=init_sd) if self.maxnorm is not None: # Ensure maxnorm constraints are initially satisfied entity_init = dense_maxnorm(entity_init, self.maxnorm) rel_init = dense_maxnorm(rel_init, self.maxnorm) self.entity_embedding_vars = tf.Variable(entity_init) self.rel_embedding_vars = tf.Variable(rel_init) # Embedding layer for each (head, rel, tail) triple being fed in as input head_embed = tf.nn.embedding_lookup(self.entity_embedding_vars, self.head_input) tail_embed = tf.nn.embedding_lookup(self.entity_embedding_vars, self.tail_input) rel_embed = tf.nn.embedding_lookup(self.rel_embedding_vars, self.rel_input) # Reshape rel_embed into square D x D matrices rel_embed_square = tf.reshape(rel_embed, (-1, self.embedding_size, self.embedding_size)) # Reshape head_embed and tail_embed to be suitable for the matrix multiplication head_embed_row = tf.expand_dims(head_embed, 1) # embeddings as row vectors tail_embed_col = tf.expand_dims(tail_embed, 2) # embeddings as column vectors head_rel_mult = tf.batch_matmul(head_embed_row, rel_embed_square) # Output needs a squeeze into a 1d vector raw_output = tf.squeeze(tf.batch_matmul(head_rel_mult, tail_embed_col)) self.output, self.loss = self._create_output_and_loss(raw_output) # Optimization self.train_step = self.opt.minimize(self.loss) if self.maxnorm is not None: # Post-processing to limit embedding vars to L2 ball rel_maxnorm = self.maxnorm * self.rel_maxnorm_mult unique_ent_indices = tf.unique(tf.concat(0, [self.head_input, self.tail_input]))[0] unique_rel_indices = tf.unique(self.rel_input)[0] entity_constraint = self._norm_constraint_op(self.entity_embedding_vars, unique_ent_indices, self.maxnorm) rel_constraint = self._norm_constraint_op(self.rel_embedding_vars, unique_rel_indices, rel_maxnorm) self.post_step = [entity_constraint, rel_constraint]
def nearest_neighbor_features_per_object( reference_embeddings, query_embeddings, reference_labels, max_neighbors_per_object, k_nearest_neighbors, gt_ids=None, n_chunks=100): """Calculates the distance to the nearest neighbor per object. For every pixel of query_embeddings calculate the distance to the nearest neighbor in the (possibly subsampled) reference_embeddings per object. Args: reference_embeddings: Tensor of shape [height, width, embedding_dim], the embedding vectors for the reference frame. query_embeddings: Tensor of shape [n_query_images, height, width, embedding_dim], the embedding vectors for the query frames. reference_labels: Tensor of shape [height, width, 1], the class labels of the reference frame. max_neighbors_per_object: Integer, the maximum number of candidates for the nearest neighbor query per object after subsampling, or 0 for no subsampling. k_nearest_neighbors: Integer, the number of nearest neighbors to use. gt_ids: Int tensor of shape [n_objs] of the sorted unique ground truth ids in the first frame. If None, it will be derived from reference_labels. n_chunks: Integer, the number of chunks to use to save memory (set to 1 for no chunking). Returns: nn_features: A float32 tensor of nearest neighbor features of shape [n_query_images, height, width, n_objects, feature_dim]. gt_ids: An int32 tensor of the unique sorted object ids present in the reference labels. """ with tf.name_scope('nn_features_per_object'): reference_labels_flat = tf.reshape(reference_labels, [-1]) if gt_ids is None: ref_obj_ids, _ = tf.unique(reference_labels_flat) ref_obj_ids = tf.contrib.framework.sort(ref_obj_ids) gt_ids = ref_obj_ids embedding_dim = resolve_shape(reference_embeddings)[-1] reference_embeddings_flat = tf.reshape(reference_embeddings, [-1, embedding_dim]) reference_embeddings_flat, reference_labels_flat = ( subsample_reference_embeddings_and_labels(reference_embeddings_flat, reference_labels_flat, gt_ids, max_neighbors_per_object)) shape = resolve_shape(query_embeddings) query_embeddings_flat = tf.reshape(query_embeddings, [-1, embedding_dim]) nn_features = _nearest_neighbor_features_per_object_in_chunks( reference_embeddings_flat, query_embeddings_flat, reference_labels_flat, gt_ids, k_nearest_neighbors, n_chunks) nn_features_dim = resolve_shape(nn_features)[-1] nn_features_reshaped = tf.reshape(nn_features, tf.stack(shape[:3] + [tf.size(gt_ids), nn_features_dim])) return nn_features_reshaped, gt_ids
def testInt32(self): x = list(np.random.randint(2, high=10, size=7000)) with self.test_session() as sess: y, idx = tf.unique(x) tf_y, tf_idx = sess.run([y, idx]) self.assertEqual(len(x), len(tf_idx)) self.assertEqual(len(tf_y), len(np.unique(x))) for i in range(len(x)): self.assertEqual(x[i], tf_y[tf_idx[i]])
def testString(self): indx = np.random.randint(65, high=122, size=7000) x = [chr(i) for i in indx] with self.test_session() as sess: y, idx = tf.unique(x) tf_y, tf_idx = sess.run([y, idx]) self.assertEqual(len(x), len(tf_idx)) self.assertEqual(len(tf_y), len(np.unique(x))) for i in range(len(x)): self.assertEqual(x[i], tf_y[tf_idx[i]].decode("ascii"))
def createBatchedIndices(roi_idx, centers, nr_of_points): centers = tf.convert_to_tensor(centers, dtype=tf.int32) simple_roi_idx = roi_idx def inLoop(i, roi_idx): new_roi = simple_roi_idx + tf.select(tf.greater(nr_of_points, i), true_values * i, true_values * 0) return tf.add(i, 1),\ tf.concat(0, [roi_idx, new_roi]) points_max = tf.cast(tf.reduce_max(nr_of_points), tf.int32) i = tf.constant(0) c = lambda i, nr_of_points: tf.less(i, points_max) i2, roi_idx = tf.while_loop(c, inLoop, [i, roi_idx], parallel_iterations=1) roi_idx = tf.unique(roi_idx)[0] batched_indices = tf.gather(centers, roi_idx) unique = tf.unique(batched_indices[:, 0]) return tf.concat(1, [tf.expand_dims(unique[1], 1), batched_indices[:, 1:]])
def testWatchingUnconnectedOutputTensor(self): """Watch an output slot not emitting any edges. (Not even control edges from the node.) """ with session.Session() as sess: x_init = constant_op.constant([2, 2, 3, 5, 5]) x = variables.Variable(x_init, name="unconnected/x") # The UniqueOp (tf.unique) has two output slots. Use only slot 0 in the # graph. Let the debugger watch the unused slot 1. unique_x, _ = tf.unique(x, name="unconnected/unique_x") y = tf.add(unique_x, [0, 1, 2], name="unconnected/y") x.initializer.run() # Verify that only slot 0 of unique_x has recipients, while slot 1 of the # same node does not have recipients. unique_x_slot_0_recipients = [] unique_x_slot_1_recipients = [] for op in sess.graph.get_operations(): for inp in op.inputs: if inp.name == "unconnected/unique_x:0": unique_x_slot_0_recipients.append(op.name) elif inp.name == "unconnected/unique_x:1": unique_x_slot_1_recipients.append(op.name) self.assertEqual(["unconnected/y"], unique_x_slot_0_recipients) self.assertEqual([], unique_x_slot_1_recipients) run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls=self._debug_urls()) run_metadata = config_pb2.RunMetadata() result = sess.run(y, options=run_options, run_metadata=run_metadata) self.assertAllClose([2, 4, 7], result) dump = debug_data.DebugDumpDir(self._dump_root, partition_graphs=run_metadata.partition_graphs) # Assert that the connected slot (slot 0) is dumped properly. unique_x_slot_0_dumps = dump.watch_key_to_data("unconnected/unique_x:0:DebugIdentity") self.assertEqual(1, len(unique_x_slot_0_dumps)) self.assertEqual("unconnected/unique_x", unique_x_slot_0_dumps[0].node_name) self.assertEqual(0, unique_x_slot_0_dumps[0].output_slot) self.assertAllClose([2, 3, 5], unique_x_slot_0_dumps[0].get_tensor()) # Assert that the unconnected slot (slot 1) is dumped properly. unique_x_slot_1_dumps = dump.watch_key_to_data("unconnected/unique_x:1:DebugIdentity") self.assertEqual(1, len(unique_x_slot_1_dumps)) self.assertEqual("unconnected/unique_x", unique_x_slot_1_dumps[0].node_name) self.assertEqual(1, unique_x_slot_1_dumps[0].output_slot) self.assertAllClose([0, 0, 1, 2, 2], unique_x_slot_1_dumps[0].get_tensor())
def _create_model(self, train_triples): # Count unique items to determine embedding matrix sizes head_cnt = len(set(train_triples[:,0])) rel_cnt = len(set(train_triples[:,1])) tail_cnt = len(set(train_triples[:,2])) init_sd = 1.0 / np.sqrt(self.embedding_size) # Embedding matrices for entities and relationship types head_init = tf.truncated_normal([head_cnt, self.embedding_size], stddev=init_sd) rel_init = tf.truncated_normal([rel_cnt, self.embedding_size], stddev=init_sd) tail_init = tf.truncated_normal([tail_cnt, self.embedding_size], stddev=init_sd) if self.maxnorm is not None: # Ensure maxnorm constraints are initially satisfied head_init = dense_maxnorm(head_init, self.maxnorm) rel_init = dense_maxnorm(rel_init, self.maxnorm) tail_init = dense_maxnorm(tail_init, self.maxnorm) self.head_embedding_vars = tf.Variable(head_init) self.rel_embedding_vars = tf.Variable(rel_init) self.tail_embedding_vars = tf.Variable(tail_init) # Embedding layer for each (head, rel, tail) triple being fed in as input head_embed = tf.nn.embedding_lookup(self.head_embedding_vars, self.head_input) rel_embed = tf.nn.embedding_lookup(self.rel_embedding_vars, self.rel_input) tail_embed = tf.nn.embedding_lookup(self.tail_embedding_vars, self.tail_input) # Model output raw_output = tf.reduce_sum(tf.mul(tf.mul(head_embed, rel_embed), tail_embed), 1) self.output, self.loss = self._create_output_and_loss(raw_output) # Optimization self.train_step = self.opt.minimize(self.loss) if self.maxnorm is not None: # Post-processing to limit embedding vars to L2 ball head_constraint = self._norm_constraint_op(self.head_embedding_vars, tf.unique(self.head_input)[0], self.maxnorm) rel_constraint = self._norm_constraint_op(self.rel_embedding_vars, tf.unique(self.rel_input)[0], self.maxnorm) tail_constraint = self._norm_constraint_op(self.tail_embedding_vars, tf.unique(self.tail_input)[0], self.maxnorm) self.post_step = [head_constraint, rel_constraint, tail_constraint]
def build_vocab(word_tensor, vocab_size): unique, idx = tf.unique(word_tensor) counts = tf.foldl( lambda counts, item: counts + tf.one_hot( tf.reshape(item, [-1]), tf.shape(unique)[0], dtype=tf.int32)[0], idx, initializer=tf.zeros_like(unique, dtype=tf.int32), back_prop=False ) _, indices = tf.nn.top_k(counts, k=vocab_size) return tf.gather(unique, indices)
def accumulate_sparse_gradients(grad): """Accumulates repeated indices of a sparse gradient update. Args: grad: a tf.IndexedSlices gradient Returns: grad_indices: unique indices grad_values: gradient values corresponding to the indices """ grad_indices, grad_segments = tf.unique(grad.indices) grad_values = tf.unsorted_segment_sum(grad.values, grad_segments, tf.shape(grad_indices)[0]) return grad_indices, grad_values
def _deduplicate_indexed_slices(values, indices): """Sums `values` associated with any non-unique `indices`. Args: values: A `Tensor` with rank >= 1. indices: A one-dimensional integer `Tensor`, indexing into the first dimension of `values` (as in an IndexedSlices object). Returns: A tuple of (`summed_values`, `unique_indices`) where `unique_indices` is a de-duplicated version of `indices` and `summed_values` contains the sum of `values` slices associated with each unique index. """ unique_indices, new_index_positions = tf.unique(indices) summed_values = tf.unsorted_segment_sum(values, new_index_positions, tf.shape(unique_indices)[0]) return (summed_values, unique_indices)
def create_initial_softmax_from_labels(last_frame_labels, reference_labels, decoder_output_stride, reduce_labels): """Creates initial softmax predictions from last frame labels. Args: last_frame_labels: last frame labels of shape [1, height, width, 1]. reference_labels: reference frame labels of shape [1, height, width, 1]. decoder_output_stride: Integer, the stride of the decoder. Can be None, in this case it's assumed that the last_frame_labels and reference_labels are already scaled to the decoder output resolution. reduce_labels: Boolean, whether to reduce the depth of the softmax one_hot encoding to the actual number of labels present in the reference frame (otherwise the depth will be the highest label index + 1). Returns: init_softmax: the initial softmax predictions. """ if decoder_output_stride is None: labels_output_size = last_frame_labels reference_labels_output_size = reference_labels else: h = tf.shape(last_frame_labels)[1] w = tf.shape(last_frame_labels)[2] h_sub = model.scale_dimension(h, 1.0 / decoder_output_stride) w_sub = model.scale_dimension(w, 1.0 / decoder_output_stride) labels_output_size = tf.image.resize_nearest_neighbor( last_frame_labels, [h_sub, w_sub], align_corners=True) reference_labels_output_size = tf.image.resize_nearest_neighbor( reference_labels, [h_sub, w_sub], align_corners=True) if reduce_labels: unique_labels, _ = tf.unique(tf.reshape(reference_labels_output_size, [-1])) depth = tf.size(unique_labels) else: depth = tf.reduce_max(reference_labels_output_size) + 1 one_hot_assertion = tf.assert_less(tf.reduce_max(labels_output_size), depth) with tf.control_dependencies([one_hot_assertion]): init_softmax = tf.one_hot(tf.squeeze(labels_output_size, axis=-1), depth=depth, dtype=tf.float32) return init_softmax
def run_modules(inputs, selection, module_fnc, output_shape): batch_size = tf.shape(inputs)[0] if output_shape is not None: output_shape = [batch_size] + output_shape else: # This is the only way I am aware of to get the output shape easily dummy = module_fnc(inputs, 0) output_shape = [batch_size] + dummy.shape[1:].as_list() used_modules, _ = tf.unique(tf.reshape(selection, (-1,))) def compute_module(accum, module): mask = tf.equal(module, selection) reduced_mask = tf.reduce_any(mask, axis=-1) indices = tf.where(reduced_mask) affected_inp = tf.boolean_mask(inputs, reduced_mask) output = module_fnc(affected_inp, module) return accum + tf.scatter_nd(indices, output, tf.cast(output_shape, tf.int64)) output = tf.scan(compute_module, used_modules, initializer=tf.zeros(output_shape))[-1] return output
def _create_model(self, train_triples): # Count unique items to determine embedding matrix sizes entity_cnt = len(set(train_triples[:,0]).union(train_triples[:,2])) rel_cnt = len(set(train_triples[:,1])) init_sd = 1.0 / np.sqrt(self.embedding_size) # Embedding variables entity_var_shape = [entity_cnt, self.embedding_size] rel_var_shape = [rel_cnt, self.embedding_size] entity_init = tf.truncated_normal(entity_var_shape, stddev=init_sd) rel_init = tf.truncated_normal(rel_var_shape, stddev=init_sd) # Ensure maxnorm constraints are initially satisfied entity_init = dense_maxnorm(entity_init, self.maxnorm) self.entity_embedding_vars = tf.Variable(entity_init) self.rel_embedding_vars = tf.Variable(rel_init) # Embedding layer for each (head, rel, tail) triple being fed in as input head_embed = tf.nn.embedding_lookup(self.entity_embedding_vars, self.head_input) tail_embed = tf.nn.embedding_lookup(self.entity_embedding_vars, self.tail_input) rel_embed = tf.nn.embedding_lookup(self.rel_embedding_vars, self.rel_input) # Relationship vector acts as a translation in entity embedding space diff_vec = tail_embed - (head_embed + rel_embed) # negative dist so higher scores are better (important for pairwise loss) if self.dist == 'manhattan': raw_output = -tf.reduce_sum(tf.abs(diff_vec), 1) elif self.dist == 'euclidean': # +eps because gradients can misbehave for small values in sqrt raw_output = -tf.sqrt(tf.reduce_sum(tf.square(diff_vec), 1) + self.EPS) elif self.dist == 'sqeuclidean': raw_output = -tf.reduce_sum(tf.square(diff_vec), 1) else: raise Exception('Unknown distance type') # Model output self.output, self.loss = ranking_margin_objective(raw_output, self.margin) # Optimization with postprocessing to limit embedding vars to L2 ball self.train_step = self.opt.minimize(self.loss) unique_ent_indices = tf.unique(tf.concat(0, [self.head_input, self.tail_input]))[0] self.post_step = self._norm_constraint_op(self.entity_embedding_vars, unique_ent_indices, self.maxnorm)
def find_dup(a): """ Find the duplicated elements in 1-D a tensor. Args: a: 1-D tensor. Return: more_than_one_vals: duplicated value in a. indexes_in_a: duplicated value's index in a. dups_in_a: duplicated value with duplicate in a. """ unique_a_vals, unique_idx = tf.unique(a) count_a_unique = tf.unsorted_segment_sum(tf.ones_like(a), unique_idx, tf.shape(a)[0]) more_than_one = tf.greater(count_a_unique, 1) more_than_one_idx = tf.squeeze(tf.where(more_than_one)) more_than_one_vals = tf.squeeze(tf.gather(unique_a_vals, more_than_one_idx)) not_duplicated, _ = tf.setdiff1d(a, more_than_one_vals) dups_in_a, indexes_in_a = tf.setdiff1d(a, not_duplicated) return more_than_one_vals, indexes_in_a, dups_in_a
n = 100 num_parallel = 5 dtype = tf.int32 queue = tf.FIFOQueue(capacity=n, dtypes=[dtype], shapes=[()]) enqueue_op = queue.enqueue_many(tf.range(n)) size_op = queue.size() dequeue_ops = [] for i in range(num_parallel): dequeue_ops.append(queue.dequeue()) if hasattr(tf, "stack"): batch = tf.stack(dequeue_ops) else: batch = tf.pack(dequeue_ops) all_unique = tf.equal(tf.size(tf.unique(batch)[0]), num_parallel) sess = create_session() sess.run(enqueue_op) print(tf.__version__) print(tf.__git_version__) for i in range(n//num_parallel): print(sess.run([batch, all_unique, size_op])) print(tf.get_default_graph().as_graph_def()) # node { # name: "fifo_queue" # op: "FIFOQueueV2" # attr { # key: "capacity" # value { # i: 100
def unique(input, return_inverse=False): if return_inverse: return tf.unique(input) else: return tf.unique(input).y
def refine_detections_graph(rois, probs, deltas, window, config): ''' Refine classified proposals and filter overlaps and return final detections. Algorithm: refine rois by mrcnn_deltas (rois is produced by refine anchors by rpn_deltas) filter out background boxes and low confidence rois filter: Apply per-class NMS filter: keep at most DETECTION_MAX_INSTANCES predictions according to probs concat and zero-padding to config.DETECTION_MAX_INSTANCES :param rois: [num_rois, (y1, x1, y2, x2)] in normalized coordinates :param probs: [num_rois, NUM_CLASSES] classifier probabilities :param deltas: [num_rois, NUM_CLASSES, (dy, dx, log(dh), log(dw))] Deltas to apply to proposal boxes :param window: [4, ] (y1, x1, y2, x2) in normalized coordinates. The part of the image that contains the image excluding the padding. :param config: instance of (sub-class of Config) :return: [config.DETECTION_MAX_INSTANCES, (y1, x1, y2, x2, class_id, score)] where coordinates are normalized. ''' # Class IDs per ROI class_ids = tf.argmax(probs, axis=1, output_type=tf.int32) # Class probability of the top class of each ROI indices = tf.stack([tf.range(probs.shape[0]), class_ids], axis=1) class_scores = tf.gather_nd(probs, indices) # Class-specific bounding box deltas deltas_specific = tf.gather_nd(deltas, indices) # Apply bounding box deltas # Shape: [boxes, (y1, x1, y2, x2)] in normalized coordinates refined_rois = apply_box_deltas_graph( rois, deltas_specific * config.BBOX_STD_DEV) # Clip boxes to image window refined_rois = clip_boxes_graph(refined_rois, window) # TODO: Filter out boxes with zero area # Filter out background boxes keep = tf.where(class_ids > 0)[:, 0] # Filter out low confidence boxes if config.DETECTION_MIN_CONFIDENCE: conf_keep = tf.where( class_scores >= config.DETECTION_MIN_CONFIDENCE)[:, 0] keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), tf.expand_dims(conf_keep, 0)) keep = tf.sparse_tensor_to_dense(keep)[0] # Apply per-class NMS # 1. Prepare variables pre_nms_class_ids = tf.gather(class_ids, keep) pre_nms_scores = tf.gather(class_scores, keep) pre_nms_rois = tf.gather(refined_rois, keep) unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0] def nms_keep_map(class_id): """Apply Non-Maximum Suppression on ROIs of the given class.""" # Indices of ROIs of the given class ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0] # Apply NMS class_keep = tf.image.non_max_suppression( tf.gather(pre_nms_rois, ixs), tf.gather(pre_nms_scores, ixs), max_output_size=config.DETECTION_MAX_INSTANCES, iou_threshold=config.DETECTION_NMS_THRESHOLD) # Map indices class_keep = tf.gather(keep, tf.gather(ixs, class_keep)) # Pad with -1 so returned tensors have the same shape gap = config.DETECTION_MAX_INSTANCES - tf.shape(class_keep)[0] class_keep = tf.pad(class_keep, [(0, gap)], mode='CONSTANT', constant_values=-1) # Set shape so map_fn() can infer result shape class_keep.set_shape([config.DETECTION_MAX_INSTANCES]) return class_keep # 2. Map over class IDs nms_keep = tf.map_fn(nms_keep_map, unique_pre_nms_class_ids, dtype=tf.int64) # 3. Merge results into one list, and remove -1 padding nms_keep = tf.reshape(nms_keep, [-1]) nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0]) # 4. Compute intersection between keep and nms_keep keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), tf.expand_dims(nms_keep, 0)) keep = tf.sparse_tensor_to_dense(keep)[0] # Keep top detections roi_count = config.DETECTION_MAX_INSTANCES class_scores_keep = tf.gather(class_scores, keep) num_keep = tf.minimum(tf.shape(class_scores_keep)[0], roi_count) top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1] keep = tf.gather(keep, top_ids) # Arrange output as [N, (y1, x1, y2, x2, class_id, score)] # Coordinates are normalized. detections = tf.concat([ tf.gather(refined_rois, keep), tf.to_float(tf.gather(class_ids, keep))[..., tf.newaxis], tf.gather(class_scores, keep)[..., tf.newaxis] ], axis=1) # Pad with zeros if detections < DETECTION_MAX_INSTANCES gap = config.DETECTION_MAX_INSTANCES - tf.shape(detections)[0] detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT") return detections
def unique(input): return tf.unique(input).y
def encode_annos(labels, bboxes, anchors, num_classes): """Encode annotations for losses computations. All the output tensors have a fix shape(none dynamic dimention). Args: labels: 1-D with shape `[num_bounding_boxes]`. bboxes: 2-D with shape `[num_bounding_boxes, 4]`. Format [ymin, xmin, ymax, xmax] anchors: 4-D tensor with shape `[num_anchors, 4]`. Format [cx, cy, w, h] Returns: input_mask: 2-D with shape `[num_anchors, 1]`, indicate which anchor to be used to cal loss. labels_input: 2-D with shape `[num_anchors, num_classes]`, one hot encode for every anchor. box_delta_input: 2-D with shape `[num_anchors, 4]`. Format [dcx, dcy, dw, dh] box_input: 2-D with shape '[num_anchors, 4]'. Format [ymin, xmin, ymax, xmax] """ with tf.name_scope("Encode_annotations") as scope: num_anchors = config.ANCHORS # num_bboxes = tf.shape(bboxes)[0] # Cal iou, find the target anchor with tf.name_scope("Matching") as subscope: ious = batch_iou_fast(xywh_to_yxyx(anchors), bboxes) anchor_indices = tf.reshape(tf.arg_max(ious, dimension=1), shape=[-1, 1]) # target anchor indices # anchor_indices = tf.Print(anchor_indices, [anchor_indices], "anchor_indices", summarize=100) # discard duplicate # unique_idx wrong anchor_indices, idx, count = tf.unique_with_counts( tf.reshape(anchor_indices, shape=[-1])) ori_idx = tf.cumsum(tf.pad(count, [[1, 0]]))[:-1] anchor_indices = tf.reshape(anchor_indices, shape=[-1, 1]) bboxes = tf.gather(bboxes, tf.unique(ori_idx)[0]) labels = tf.gather(labels, tf.unique(ori_idx)[0]) ious = tf.gather(ious, tf.unique(ori_idx)[0]) num_bboxes = tf.shape(anchor_indices)[0] # TODO(shizehao):deal with duplicate # with tf.name_scope("Deal_with_duplicate"): # dup_anchor_indices, indices_in_a, dup_anchor_indices_with_dup = find_dup(tf.reshape(anchor_indices, shape=[-1])) # # # reset duplicated corresponding anchor # conflicted_ious = tf.gather(ious, indices_in_a) # top_k_anchor_indices = tf.nn.top_k(conflicted_ious, k=20).indices # shape = [num_conflicted_bboxes, 20] # dup_group_idx = tf.where(tf.equal(dup_anchor_indices_with_dup, tf.reshape(dup_anchor_indices, shape=[-1, 1]))) # seg_group = tf.unstack(dup_group_idx, axis=1)[0] with tf.name_scope("Deal_with_noneoverlap"): # find the none-overlap bbox bbox_indices = tf.reshape(tf.range(num_bboxes), shape=[-1, 1]) # bbox_indices = tf.Print(bbox_indices, [bbox_indices], "bbox_indices", summarize=100) # anchor_indices = tf.Print(anchor_indices, [anchor_indices], "anchor_indices", summarize=100) iou_indices = tf.concat( [bbox_indices, tf.cast(anchor_indices, dtype=tf.int32)], axis=1) # iou_indices = tf.Print(iou_indices, [iou_indices], "iou_indices", summarize=100) target_iou = tf.gather_nd(ious, iou_indices) # target_iou = tf.Print(target_iou,[target_iou],"target_iou",summarize=100) none_overlap_bbox_indices = tf.where(target_iou <= 0) # 1-D # none_overlap_bbox_indices = tf.Print(none_overlap_bbox_indices, [none_overlap_bbox_indices], "none_overlap_bbox_indices", summarize=100) # find it's corresponding anchor target_bbox = tf.gather_nd(bboxes, none_overlap_bbox_indices) # target_bbox = tf.Print(target_bbox, [target_bbox], "target_bbox", summarize=100) closest_anchor_indices = arg_closest_anchor( target_bbox, xywh_to_yxyx(anchors)) # 1-D # closest_anchor_indices = tf.Print(closest_anchor_indices, [closest_anchor_indices, tf.gather(anchors, closest_anchor_indices)], "closest_anchor_indices", summarize=100) with tf.name_scope("Update_anchor_indices"): anchor_indices = tf.reshape(anchor_indices, shape=[-1]) anchor_indices = update_tensor(anchor_indices, none_overlap_bbox_indices, closest_anchor_indices) anchor_indices = tf.reshape(anchor_indices, shape=[-1, 1]) with tf.name_scope("Delta") as subscope: target_anchors = tf.gather_nd(anchors, anchor_indices) bboxes = yxyx_to_xywh(bboxes) delta = batch_delta(bboxes, target_anchors) with tf.name_scope("Scattering") as subscope: # bbox box_input = tf.scatter_nd(anchor_indices, bboxes, shape=[num_anchors, 4]) # label labels_input = tf.scatter_nd(anchor_indices, tf.one_hot(labels, num_classes), shape=[num_anchors, num_classes]) # delta box_delta_input = tf.scatter_nd(anchor_indices, delta, shape=[num_anchors, 4]) # anchor mask # unique_indices, _ = tf.unique(tf.reshape(anchor_indices, shape=[-1])) # unique_indices = tf.Print(unique_indices, [unique_indices], summarize=100) # num_bboxes = tf.Print(num_bboxes, [num_bboxes]) input_mask = tf.scatter_nd(anchor_indices, tf.ones([num_bboxes]), shape=[num_anchors]) input_mask = tf.reshape(input_mask, shape=[-1, 1]) return input_mask, labels_input, box_delta_input, box_input
#except Exception as e: # print(e) #匯入類別資料 patient_data_path = "E:/GitHub Program/skin_cancer_data/HAM10000_metadata.csv" patient_data = pathlib.Path(patient_data_path).read_text() #以分隔符號將資料分割並移除header patient_data = patient_data.split("\n")[1:] #定義每個特徵的資料類別 col_data_type = [str(), str(), str(), str(), float(), str(), str()] #根據給予的資料類別建立一個包含數個tensor的list #每個tensor代表一個特徵 patient_data = tf.io.decode_csv(patient_data, record_defaults=col_data_type) #number_of_class = tf.unique(patient_data[2]).y.shape[0] label = tf.unique(patient_data[2]).idx image_id = patient_data[1] original_data_path = "E:/GitHub Program/skin_cancer_data/HAM10000_images/" new_data_path = "E:/GitHub Program/skin_cancer_data/HAM10000_images_rename" number_of_process = 2 number_of_data_to_split = 1 new_size = (200, 200) batch_size = 500 run_rename_function = True #將原始資料的檔案名稱加入類別標籤以便訓練 try: os.makedirs(new_data_path) except FileExistsError: print("Folder Exist")
lens_tf = tf.reduce_sum(mask, 1) labels_list = [] for i in range(Bs): positions = tf.range(Lmax) substitues = tf.random.uniform([1], 1, lens_tf[i] - 1, tf.int32) labels = tf.cast(tf.equal(positions, substitues), tf.int32) labels_list.append(tf.expand_dims(labels, 0)) labels_tf = tf.concat(labels_list, 0) splits_list = [] for i in range(Bs): #one = tf.constant(np.random.uniform(0, lens_np[i], (Ns * 2)).astype(int)) one = tf.random.uniform([Ns * 4], 1, lens_tf[i], tf.int32) one, _ = tf.unique(one) one = tf.cond( tf.less(tf.shape(one)[0], Ns * 2), lambda: tf.expand_dims(tf.range(Ns * 2)[1::2], 0), lambda: tf.sort(tf.reshape(one[:Ns * 2], [1, Ns * 2]))[:, ::2]) splits_list.append(one) splits_tf = tf.concat(splits_list, 0) splits_up = tf.concat( [splits_tf, tf.expand_dims(tf.constant([Lmax] * Bs, tf.int32), 1)], 1) splits_lo = tf.concat( [tf.expand_dims(tf.constant([0] * Bs, tf.int32), 1), splits_tf], 1) size_splits = splits_up - splits_lo
def forward(self, data, state): conv_sbbox, conv_mbbox, conv_lbbox = data batch_size = conv_sbbox.shape[0] final_results = [] for idx in range(batch_size): pred_s, pred_m, pred_l = conv_sbbox[idx], conv_mbbox[ idx], conv_lbbox[idx] pred_s, pred_m, pred_l = tf.reshape(pred_s, (-1, 85)), tf.reshape( pred_m, (-1, 85)), tf.reshape(pred_l, (-1, 85)) preds = tf.concat([pred_s, pred_m, pred_l], axis=0) preds = preds[preds[:, 4] > self.conf_threshold] # filter by confidence classes = tf.argmax(preds[:, 5:], axis=-1) unique_classes = tf.unique(classes)[0] selected_boxes_all_classes = tf.zeros(shape=[0, 6], dtype=tf.float32) for clss in unique_classes: tf.autograph.experimental.set_loop_options( shape_invariants=[(selected_boxes_all_classes, tf.TensorShape([None, 6]))]) mask = tf.math.equal(classes, clss) preds_cls = tf.boolean_mask(preds, mask) x1, y1, w, h = preds_cls[:, 0], preds_cls[:, 1], preds_cls[:, 2], preds_cls[:, 3] x2, y2 = x1 + w, y1 + h conf_score, label = preds_cls[:, 4], tf.boolean_mask( classes, mask) selected_bboxes = tf.stack( [y1, x1, y2, x2, conf_score, tf.cast(label, tf.float32)], axis=-1) # nms for every class nms_keep = tf.image.non_max_suppression(selected_bboxes[:, :4], selected_bboxes[:, 4], max_output_size=50, iou_threshold=0.35) selected_bboxes = tf.gather(selected_bboxes, nms_keep) selected_boxes_all_classes = tf.concat( [selected_boxes_all_classes, selected_bboxes], axis=0) # clip bounding boxes to image size y1_abs = tf.clip_by_value(selected_boxes_all_classes[:, 0], 0, self.height) x1_abs = tf.clip_by_value(selected_boxes_all_classes[:, 1], 0, self.width) height_abs = tf.clip_by_value( selected_boxes_all_classes[:, 2] - y1_abs, 0, self.height - y1_abs) width_abs = tf.clip_by_value( selected_boxes_all_classes[:, 3] - x1_abs, 0, self.width - x1_abs) labels_score, labels = selected_boxes_all_classes[:, 4], selected_boxes_all_classes[:, 5] # final output: [x1, y1, w, h, label, label_score, select_or_not] results_single = [ x1_abs, y1_abs, width_abs, height_abs, labels, labels_score, tf.ones_like(x1_abs) ] results_single = tf.stack(results_single, axis=-1) # pad 0 to other rows to improve performance results_single = tf.pad( results_single, [(0, self.max_outputs - tf.shape(results_single)[0]), (0, 0)]) final_results.append(results_single) final_results = tf.stack(final_results) return final_results
def batch_unique(x, max_labels=25): labels, _ = tf.unique(tf.reshape(x, (-1, ))) if (tf.greater_equal(tf.shape(labels)[0], max_labels)): labels = tf.gather(labels, tf.range(0, max_labels)) return tf.pad(labels, [[0, max_labels - tf.shape(labels)[0]]])
def detectionLayer(proposal, probs, bbox, image_shape): """ 一次检测一张图片 :param proposal: 经过非极大值抑制后, [批数,个数,4] :param probs: [num_boxex, num_classes] :param bbox: [num_boxex, num_classes, (dx, dy, log(h), log(w))] :param image_shape: [高,宽,通道数] :return: 盒子,ids, 概率 """ with tf.control_dependencies([ tf.Assert(tf.shape(proposal)[0] == 1, data=["A single picture for evaluation each time"]) ]): proposal = tf.squeeze(proposal, axis=[ 0, ]) # [num_boxes, 4] class_ids = tf.argmax(probs, axis=1, output_type=tf.int32) # [num_boxes] indices = tf.stack([tf.range(probs.shape[0]), class_ids], axis=1) # [序号,类别号] class_probs = tf.gather_nd(probs, indices) # [num_box] deltas = tf.gather_nd(bbox, indices) # [num_box, 4] refined_rois = apply_box_deltas(proposal, deltas * config.RPN_BBOX_STD_DEV) # [num_boxes, 4] refined_rois = tf.clip_by_value(refined_rois, 0, 1) keep = tf.where(class_ids > 0)[:, 0] # 取出前景 if config.DETECTION_MIN_CONFIDENCE: # 如果使用最小confidence,就取交集 conf_keep = tf.where(class_probs >= config.DETECTION_MIN_CONFIDENCE)[:, 0] keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), tf.expand_dims(conf_keep, 0)) keep = tf.sparse_tensor_to_dense(keep)[0] ###########################下面的,全部重新组合排序######################## pre_nms_class_ids = tf.gather(class_ids, keep) # 取出id [num] pre_nms_scores = tf.gather(class_probs, keep) # 取出相应的概率值 [num] pre_nms_rois = tf.gather(refined_rois, keep) # 取出相应的框框 [num, 4] unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[ 0] # 能找到的分类数 [unique] def nms_keep_map(class_id): """ 只有属于同一类别的,才进行非极大值抑制 :param class_id: 给定的某一类别号 :return: """ ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0] class_keep = tf.image.non_max_suppression( boxes=tf.gather(pre_nms_rois, ixs), scores=tf.gather(pre_nms_scores, ixs), max_output_size=config.DETECTION_MAX_INSTANCE, iou_threshold=config.DETECTION_NMS_THRESHHOLD) # class_keep取出的是相对于ixs的序号,tf.gather(ixs, class_keep)就是ixs的数值本身 # ixs的数值本身,就是pre_nms_class_ids的序号 class_keep = tf.gather(ixs, class_keep) # 用-1填充,使得都有相同的shape gap = config.DETECTION_MAX_INSTANCE - tf.shape(class_keep)[0] class_keep = tf.pad(class_keep, [(0, gap)], mode='CONSTANT', constant_values=-1) # 设置shape,使得map_fn()能够立即知道她的shape class_keep.set_shape([config.DETECTION_MAX_INSTANCE]) return class_keep # nms_keep的shape是,[unique_pre_nms_class_ids的长度,config.DETECTION_MAX_INSTANCE] nms_keep = tf.map_fn(nms_keep_map, unique_pre_nms_class_ids, dtype=tf.int32) nms_keep = tf.reshape(nms_keep, [-1]) # keep就是pre_nms_class_ids的序号 keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0]) # class_probs2 = tf.gather(pre_nms_scores, keep) num_keep = tf.minimum(tf.shape(keep)[0], config.DETECTION_MAX_INSTANCE) top_ids = tf.nn.top_k(class_probs2, num_keep, sorted=True).indices keep = tf.gather(keep, top_ids) # 至此,keep是分数最大的,不超过实例个数的保留值了 return tf.gather(pre_nms_rois, keep), tf.gather(pre_nms_class_ids, keep), tf.gather(pre_nms_scores, keep)
############################################################################### # 1f: Create a random 2-d tensor of size 10 x 10 from any distribution. # Calculate its determinant. # Hint: Look at tf.matrix_determinant(). ############################################################################### s = tf.random_normal([10, 10]) result = tf.matrix_determinant(s) print("6th computation -- " + str(sess.run(result))) ############################################################################### # 1g: Create tensor x with value [5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9]. # Return the unique elements in x # Hint: use tf.unique(). Keep in mind that tf.unique() returns a tuple. ############################################################################### t = tf.constant([5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9]) unique = tf.unique(t) print("6th computation -- " + str(sess.run(unique))) ############################################################################### # Helper method to calculate huber loss ############################################################################### def huber_loss(labels, predictions, delta=1.0): residual = tf.abs(predictions - labels) condition = tf.less(residual, delta) small_res = 0.5 * tf.square(residual) large_res = delta * residual - 0.5 * tf.square(delta) return tf.select(condition, small_res, large_res) ###############################################################################
def _get_bboxes_single(self, rcnn_probs, rcnn_deltas, rois, img_shape): ''' Args --- rcnn_probs: [num_rois, num_classes] rcnn_deltas: [num_rois, num_classes, (dy, dx, log(dh), log(dw))] rois: [num_rois, (y1, x1, y2, x2)] img_shape: np.ndarray. [2]. (img_height, img_width) ''' H, W = img_shape # Class IDs per ROI class_ids = tf.argmax(rcnn_probs, axis=1, output_type=tf.int32) # Class probability of the top class of each ROI indices = tf.stack([tf.range(rcnn_probs.shape[0]), class_ids], axis=1) class_scores = tf.gather_nd(rcnn_probs, indices) # Class-specific bounding box deltas deltas_specific = tf.gather_nd(rcnn_deltas, indices) # Apply bounding box deltas # Shape: [num_rois, (y1, x1, y2, x2)] in normalized coordinates refined_rois = transforms.delta2bbox(rois, deltas_specific, self.target_means, self.target_stds) # Clip boxes to image window refined_rois *= tf.constant([H, W, H, W], dtype=tf.float32) window = tf.constant([0., 0., H * 1., W * 1.], dtype=tf.float32) refined_rois = transforms.bbox_clip(refined_rois, window) # Filter out background boxes keep = tf.where(class_ids > 0)[:, 0] # Filter out low confidence boxes if self.min_confidence: conf_keep = tf.where(class_scores >= self.min_confidence)[:, 0] keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), tf.expand_dims(conf_keep, 0)) keep = tf.sparse_tensor_to_dense(keep)[0] # Apply per-class NMS # 1. Prepare variables pre_nms_class_ids = tf.gather(class_ids, keep) pre_nms_scores = tf.gather(class_scores, keep) pre_nms_rois = tf.gather(refined_rois, keep) unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0] def nms_keep_map(class_id): '''Apply Non-Maximum Suppression on ROIs of the given class.''' # Indices of ROIs of the given class ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0] # Apply NMS class_keep = tf.image.non_max_suppression( tf.gather(pre_nms_rois, ixs), tf.gather(pre_nms_scores, ixs), max_output_size=self.max_instances, iou_threshold=self.nms_threshold) # Map indices class_keep = tf.gather(keep, tf.gather(ixs, class_keep)) return class_keep # 2. Map over class IDs nms_keep = [] for i in range(unique_pre_nms_class_ids.shape[0]): nms_keep.append(nms_keep_map(unique_pre_nms_class_ids[i])) nms_keep = tf.concat(nms_keep, axis=0) # 3. Compute intersection between keep and nms_keep keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), tf.expand_dims(nms_keep, 0)) keep = tf.sparse_tensor_to_dense(keep)[0] # Keep top detections roi_count = self.max_instances class_scores_keep = tf.gather(class_scores, keep) num_keep = tf.minimum(tf.shape(class_scores_keep)[0], roi_count) top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1] keep = tf.gather(keep, top_ids) detections = tf.concat([ tf.gather(refined_rois, keep), tf.to_float(tf.gather(class_ids, keep))[..., tf.newaxis], tf.gather(class_scores, keep)[..., tf.newaxis] ], axis=1) return detections
def model_fn(features, labels, mode, params): embedding_size = 36 movie_id = features["movie_id"] user_id = features["user_id"] rating = features["user_rating"] if mode == tf.estimator.ModeKeys.TRAIN: lookup_node_list = [ "/job:ps/replica:0/task:{}/CPU:0".format(i) for i in range(params["ps_num"])] initializer = tf.keras.initializers.RandomNormal(-1, 1) else: lookup_node_list = ["/job:localhost/replica:0/task:0/CPU:0"] * params["ps_num"] initializer = tf.keras.initializers.Zeros() redis_config=tfra.dynamic_embedding.RedisTableConfig( redis_config_abs_dir_env="model_tfra_redis_config_path" ) redis_creator=tfra.dynamic_embedding.RedisTableCreator(redis_config) user_embeddings = tfra.dynamic_embedding.get_variable( name="user_dynamic_embeddings", dim=embedding_size, devices=lookup_node_list, initializer=initializer, kv_creator=redis_creator) movie_embeddings = tfra.dynamic_embedding.get_variable( name="moive_dynamic_embeddings", dim=embedding_size, devices=lookup_node_list, initializer=initializer, kv_creator=redis_creator) user_id_val, user_id_idx = tf.unique(tf.concat(user_id, axis=0)) user_id_weights, user_id_trainable_wrapper = tfra.dynamic_embedding.embedding_lookup( params=user_embeddings, ids=user_id_val, name="user-id-weights", return_trainable=True) user_id_weights = tf.gather(user_id_weights, user_id_idx) movie_id_val, movie_id_idx = tf.unique(tf.concat(movie_id, axis=0)) movie_id_weights, movie_id_trainable_wrapper = tfra.dynamic_embedding.embedding_lookup( params=movie_embeddings, ids=movie_id_val, name="movie-id-weights", return_trainable=True) movie_id_weights = tf.gather(movie_id_weights, movie_id_idx) embeddings = tf.concat([user_id_weights, movie_id_weights], axis=1) d0 = Dense(256, activation='relu', kernel_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1), bias_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1)) d1 = Dense(64, activation='relu', kernel_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1), bias_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1)) d2 = Dense(1, kernel_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1), bias_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1)) dnn = d0(embeddings) dnn = d1(dnn) dnn = d2(dnn) out = tf.reshape(dnn, shape=[-1]) # loss = tf.keras.losses.MeanSquaredError()(rating, out) per_example_loss = (out - rating)**2 loss = tf.nn.compute_average_loss(per_example_loss) predictions = {"out": out} acc = tf.metrics.Accuracy() acc.update_state([0.1, 1.0], [1.0, 0.1]) tensors_to_log = {"user_id_val": user_id_val.name} hook = tf.estimator.LoggingTensorHook(tensors_to_log, every_n_iter=100) if mode == tf.estimator.ModeKeys.EVAL: eval_metric_ops = {"accuracy": acc} return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops) if mode == tf.estimator.ModeKeys.TRAIN: optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=0.001) optimizer = tfra.dynamic_embedding.DynamicEmbeddingOptimizer(optimizer) train_op = optimizer.minimize( loss, global_step=tf.compat.v1.train.get_or_create_global_step()) return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, train_op=train_op, training_hooks=[hook]) if mode == tf.estimator.ModeKeys.PREDICT: predictions_for_net = {"out": out} export_outputs = { "predict_export_outputs": tf.estimator.export.PredictOutput(outputs=predictions_for_net) } return tf.estimator.EstimatorSpec(mode, predictions=predictions_for_net, export_outputs=export_outputs, prediction_hooks=[hook])
def _compute_one_image_loss(self, pbbox_yx, pbbox_hw, abbox_y1x1, abbox_y2x2, abbox_yx, abbox_hw, pconf, ground_truth): slice_index = tf.argmin(ground_truth, axis=0)[0] ground_truth = tf.gather(ground_truth, tf.range(0, slice_index, dtype=tf.int64)) gbbox_yx = ground_truth[..., 0:2] gbbox_hw = ground_truth[..., 2:4] gbbox_y1x1 = gbbox_yx - gbbox_hw / 2. gbbox_y2x2 = gbbox_yx + gbbox_hw / 2. class_id = tf.cast(ground_truth[..., 4:5], dtype=tf.int32) label = class_id abbox_hwti = tf.reshape(abbox_hw, [1, -1, 2]) abbox_y1x1ti = tf.reshape(abbox_y1x1, [1, -1, 2]) abbox_y2x2ti = tf.reshape(abbox_y2x2, [1, -1, 2]) gbbox_hwti = tf.reshape(gbbox_hw, [-1, 1, 2]) gbbox_y1x1ti = tf.reshape(gbbox_y1x1, [-1, 1, 2]) gbbox_y2x2ti = tf.reshape(gbbox_y2x2, [-1, 1, 2]) ashape = tf.shape(abbox_hwti) gshape = tf.shape(gbbox_hwti) abbox_hwti = tf.tile(abbox_hwti, [gshape[0], 1, 1]) abbox_y1x1ti = tf.tile(abbox_y1x1ti, [gshape[0], 1, 1]) abbox_y2x2ti = tf.tile(abbox_y2x2ti, [gshape[0], 1, 1]) gbbox_hwti = tf.tile(gbbox_hwti, [1, ashape[1], 1]) gbbox_y1x1ti = tf.tile(gbbox_y1x1ti, [1, ashape[1], 1]) gbbox_y2x2ti = tf.tile(gbbox_y2x2ti, [1, ashape[1], 1]) gaiou_y1x1ti = tf.maximum(abbox_y1x1ti, gbbox_y1x1ti) gaiou_y2x2ti = tf.minimum(abbox_y2x2ti, gbbox_y2x2ti) gaiou_area = tf.reduce_prod(tf.maximum(gaiou_y2x2ti - gaiou_y1x1ti, 0), axis=-1) aarea = tf.reduce_prod(abbox_hwti, axis=-1) garea = tf.reduce_prod(gbbox_hwti, axis=-1) gaiou_rate = gaiou_area / (aarea + garea - gaiou_area) best_raindex = tf.argmax(gaiou_rate, axis=1) best_pbbox_yx = tf.gather(pbbox_yx, best_raindex) best_pbbox_hw = tf.gather(pbbox_hw, best_raindex) best_pconf = tf.gather(pconf, best_raindex) best_abbox_yx = tf.gather(abbox_yx, best_raindex) best_abbox_hw = tf.gather(abbox_hw, best_raindex) bestmask, _ = tf.unique(best_raindex) bestmask = tf.contrib.framework.sort(bestmask) bestmask = tf.reshape(bestmask, [-1, 1]) bestmask = tf.sparse.SparseTensor(tf.concat([bestmask, tf.zeros_like(bestmask)], axis=-1), tf.squeeze(tf.ones_like(bestmask)), dense_shape=[ashape[1], 1]) bestmask = tf.reshape(tf.cast(tf.sparse.to_dense(bestmask), tf.float32), [-1]) othermask = 1. - bestmask othermask = othermask > 0. other_pbbox_yx = tf.boolean_mask(pbbox_yx, othermask) other_pbbox_hw = tf.boolean_mask(pbbox_hw, othermask) other_pconf = tf.boolean_mask(pconf, othermask) other_abbox_yx = tf.boolean_mask(abbox_yx, othermask) other_abbox_hw = tf.boolean_mask(abbox_hw, othermask) agiou_rate = tf.transpose(gaiou_rate) other_agiou_rate = tf.boolean_mask(agiou_rate, othermask) best_agiou_rate = tf.reduce_max(other_agiou_rate, axis=1) pos_agiou_mask = best_agiou_rate > 0.5 neg_agiou_mask = best_agiou_rate < 0.4 rgindex = tf.argmax(other_agiou_rate, axis=1) pos_rgindex = tf.boolean_mask(rgindex, pos_agiou_mask) pos_ppox_yx = tf.boolean_mask(other_pbbox_yx, pos_agiou_mask) pos_ppox_hw = tf.boolean_mask(other_pbbox_hw, pos_agiou_mask) pos_pconf = tf.boolean_mask(other_pconf, pos_agiou_mask) pos_abbox_yx = tf.boolean_mask(other_abbox_yx, pos_agiou_mask) pos_abbox_hw = tf.boolean_mask(other_abbox_hw, pos_agiou_mask) pos_label = tf.gather(label, pos_rgindex) pos_gbbox_yx = tf.gather(gbbox_yx, pos_rgindex) pos_gbbox_hw = tf.gather(gbbox_hw, pos_rgindex) neg_pconf = tf.boolean_mask(other_pconf, neg_agiou_mask) neg_shape = tf.shape(neg_pconf) num_neg = neg_shape[0] neg_class_id = tf.constant([self.num_classes-1]) neg_label = tf.tile(neg_class_id, [num_neg]) pos_pbbox_yx = tf.concat([best_pbbox_yx, pos_ppox_yx], axis=0) pos_pbbox_hw = tf.concat([best_pbbox_hw, pos_ppox_hw], axis=0) pos_pconf = tf.concat([best_pconf, pos_pconf], axis=0) pos_label = tf.concat([label, pos_label], axis=0) pos_gbbox_yx = tf.concat([gbbox_yx, pos_gbbox_yx], axis=0) pos_gbbox_hw = tf.concat([gbbox_hw, pos_gbbox_hw], axis=0) pos_abbox_yx = tf.concat([best_abbox_yx, pos_abbox_yx], axis=0) pos_abbox_hw = tf.concat([best_abbox_hw, pos_abbox_hw], axis=0) conf_loss = self._focal_loss(pos_label, pos_pconf, neg_label, neg_pconf) pos_truth_pbbox_yx = (pos_gbbox_yx - pos_abbox_yx) / pos_abbox_hw pos_truth_pbbox_hw = tf.log(pos_gbbox_hw / pos_abbox_hw) pos_yx_loss = tf.reduce_sum(self._smooth_l1_loss(pos_pbbox_yx - pos_truth_pbbox_yx), axis=-1) pos_hw_loss = tf.reduce_sum(self._smooth_l1_loss(pos_pbbox_hw - pos_truth_pbbox_hw), axis=-1) pos_coord_loss = tf.reduce_mean(pos_yx_loss + pos_hw_loss) total_loss = conf_loss + pos_coord_loss return total_loss
def _process_segment_and_label(video_matrix, num_frames, contexts, segment_labels, segment_size, num_classes) -> Dict[str, tf.Tensor]: """Processes a batched Tensor of frames. The same parameters used in process should be used here. Args: video_matrix: different features concatenated into one matrix num_frames: Number of frames per subclip. contexts: context information extracted from decoder segment_labels: if we read segment labels instead. segment_size: the segment_size used for reading segments. Segment length. num_classes: a positive integer for the number of classes. Returns: output: dictionary containing batch information """ # Partition frame-level feature matrix to segment-level feature matrix. batch_video_ids = None if segment_labels: start_times = contexts["segment_start_times"].values # Here we assume all the segments that started at the same start time has # the same segment_size. uniq_start_times, seg_idxs = tf.unique(start_times, out_idx=tf.dtypes.int64) # Range gather matrix, e.g., [[0,1,2],[1,2,3]] for segment_size == 3. range_mtx = tf.expand_dims(uniq_start_times, axis=-1) + tf.expand_dims( tf.range(0, segment_size, dtype=tf.int64), axis=0) # Shape: [num_segment, segment_size, feature_dim]. batch_video_matrix = tf.gather_nd(video_matrix, tf.expand_dims(range_mtx, axis=-1)) num_segment = tf.shape(batch_video_matrix)[0] if "id" in contexts: batch_video_ids = tf.reshape( tf.tile([contexts["id"]], [num_segment]), (num_segment, )) batch_frames = tf.reshape(tf.tile([segment_size], [num_segment]), (num_segment, )) batch_frames = tf.cast(tf.expand_dims(batch_frames, 1), tf.float32) # For segment labels, all labels are not exhaustively rated. So we only # evaluate the rated labels. # Label indices for each segment, shape: [num_segment, 2]. label_indices = tf.stack([seg_idxs, contexts["segment_labels"].values], axis=-1) label_values = contexts["segment_scores"].values sparse_labels = tf.sparse.SparseTensor(label_indices, label_values, (num_segment, num_classes)) batch_labels = tf.sparse.to_dense(sparse_labels, validate_indices=False) sparse_label_weights = tf.sparse.SparseTensor( label_indices, tf.ones_like(label_values, dtype=tf.float32), (num_segment, num_classes)) batch_label_weights = tf.sparse.to_dense(sparse_label_weights, validate_indices=False) # output_dict = utils.get_segments(batch_video_matrix, batch_frames, 5) else: # Process video-level labels. label_indices = contexts["labels"].values sparse_labels = tf.sparse.SparseTensor( tf.expand_dims(label_indices, axis=-1), tf.ones_like(contexts["labels"].values, dtype=tf.bool), (num_classes, )) labels = tf.sparse.to_dense(sparse_labels, default_value=False, validate_indices=False) # convert to batch format. if "id" in contexts: batch_video_ids = tf.expand_dims(contexts["id"], 0) batch_video_matrix = tf.expand_dims(video_matrix, 0) batch_labels = tf.expand_dims(labels, 0) batch_frames = tf.expand_dims(num_frames, 0) batch_label_weights = None output_dict = { "video_matrix": batch_video_matrix, "labels": batch_labels, "num_frames": batch_frames, } if batch_video_ids is not None: output_dict["video_ids"] = batch_video_ids if batch_label_weights is not None: output_dict["label_weights"] = batch_label_weights return output_dict
def get_processed_frame_data(rgb_frame, audio_frame, feature_list, concat_features=False): rgb_frame_trans = tf.transpose(rgb_frame, perm=[1, 0]) audio_frame_trans = tf.transpose(audio_frame, perm=[1, 0]) video_length = tf.shape(rgb_frame)[0] q0_rgb_frame = tf.reduce_min(rgb_frame, reduction_indices=0) q1_rgb_frame = tf.reduce_min(tf.nn.top_k( rgb_frame_trans, k=tf.to_int32(tf.scalar_mul(0.75, tf.to_float(video_length))), sorted=False).values, reduction_indices=1) q2_rgb_frame = tf.reduce_min(tf.nn.top_k( rgb_frame_trans, k=tf.to_int32(tf.scalar_mul(0.50, tf.to_float(video_length))), sorted=False).values, reduction_indices=1) q3_rgb_frame = tf.reduce_min(tf.nn.top_k( rgb_frame_trans, k=tf.to_int32(tf.scalar_mul(0.25, tf.to_float(video_length))), sorted=False).values, reduction_indices=1) q4_rgb_frame = tf.reduce_max(rgb_frame, reduction_indices=0) mean_rgb_frame = tf.reduce_mean(rgb_frame, reduction_indices=0) stddv_rgb_frame = tf.sqrt( tf.reduce_mean(tf.square(rgb_frame - mean_rgb_frame), reduction_indices=0)) skew_rgb_frame = tf.div( tf.reduce_mean(tf.pow(rgb_frame - mean_rgb_frame, 3), reduction_indices=0), tf.pow(stddv_rgb_frame, 3)) kurt_rgb_frame = tf.div( tf.reduce_mean(tf.pow(rgb_frame - mean_rgb_frame, 4), reduction_indices=0), tf.pow(stddv_rgb_frame, 4)) q0_audio_frame = tf.reduce_min(audio_frame, reduction_indices=0) q1_audio_frame = tf.reduce_min(tf.nn.top_k( audio_frame_trans, k=tf.to_int32(tf.scalar_mul(0.75, tf.to_float(video_length))), sorted=False).values, reduction_indices=1) q2_audio_frame = tf.reduce_min(tf.nn.top_k( audio_frame_trans, k=tf.to_int32(tf.scalar_mul(0.50, tf.to_float(video_length))), sorted=False).values, reduction_indices=1) q3_audio_frame = tf.reduce_min(tf.nn.top_k( audio_frame_trans, k=tf.to_int32(tf.scalar_mul(0.25, tf.to_float(video_length))), sorted=False).values, reduction_indices=1) q4_audio_frame = tf.reduce_max(audio_frame, reduction_indices=0) mean_audio_frame = tf.reduce_mean(audio_frame, reduction_indices=0) stddv_audio_frame = tf.sqrt( tf.reduce_mean(tf.square(audio_frame - mean_audio_frame), reduction_indices=0)) skew_audio_frame = tf.div( tf.reduce_mean(tf.pow(audio_frame - mean_audio_frame, 3), reduction_indices=0), tf.pow(stddv_audio_frame, 3)) kurt_audio_frame = tf.div( tf.reduce_mean(tf.pow(audio_frame - mean_audio_frame, 4), reduction_indices=0), tf.pow(stddv_audio_frame, 4)) iqr_rgb_frame = tf.subtract(q3_rgb_frame, q1_rgb_frame) rng_rgb_frame = tf.subtract(q4_rgb_frame, q0_rgb_frame) iqr_audio_frame = tf.subtract(q3_audio_frame, q1_audio_frame) rng_audio_frame = tf.subtract(q4_audio_frame, q0_audio_frame) coeffvar_rgb_frame = tf.div(stddv_rgb_frame, mean_rgb_frame) efficiency_rgb_frame = tf.div(tf.square(stddv_rgb_frame), tf.square(mean_rgb_frame)) midhinge_rgb_frame = tf.add(q3_rgb_frame, q1_rgb_frame) qntcoeffdisp_rgb_frame = tf.div(iqr_rgb_frame, midhinge_rgb_frame) coeffvar_audio_frame = tf.div(stddv_audio_frame, mean_audio_frame) efficiency_audio_frame = tf.div(tf.square(stddv_audio_frame), tf.square(mean_audio_frame)) midhinge_audio_frame = tf.add(q3_audio_frame, q1_audio_frame) qntcoeffdisp_audio_frame = tf.div(iqr_audio_frame, midhinge_audio_frame) # Mean Absolute Difference md_rgb_frame = tf.div( tf.reduce_sum(tf.abs( tf.matrix_band_part( tf.subtract(tf.expand_dims(rgb_frame_trans, 2), tf.expand_dims(rgb_frame_trans, 1)), 0, -1)), reduction_indices=[1, 2]), tf.cast(tf.multiply(video_length, video_length - 1), tf.float32)) # Median Absolute Deviation around Median abs_dev_median = tf.transpose(tf.abs(tf.subtract(rgb_frame, q2_rgb_frame)), perm=[1, 0]) mean_abs_med_rgb_frame = tf.reduce_min(tf.nn.top_k( abs_dev_median, k=tf.to_int32(tf.scalar_mul(0.50, tf.to_float(video_length))), sorted=False).values, reduction_indices=1) # Mean Absolute Deviation around Mean mean_abs_mean_rgb_frame = tf.reduce_mean(tf.abs( tf.subtract(rgb_frame, mean_rgb_frame)), reduction_indices=0) # Mean Absolute Deviation around Median mean_abs_mean_rgb_frame = tf.reduce_mean(tf.abs( tf.subtract(rgb_frame, mean_rgb_frame)), reduction_indices=0) # Mean Absolute Deviation around Mode mean_abs_mean_rgb_frame = tf.reduce_mean(tf.abs( tf.subtract(rgb_frame, mean_rgb_frame)), reduction_indices=0) pairwise_man, _ = tf.unique( tf.reshape( tf.matrix_band_part( tf.reduce_sum(tf.abs( tf.subtract(tf.expand_dims(rgb_frame, 0), tf.expand_dims(rgb_frame, 1))), reduction_indices=[2]), 0, -1), [-1])) local_features = locals() if (concat_features): features = [] for x in feature_list: if x != 'video_length': features.append(local_features[x]) else: features.append( tf.cast(tf.convert_to_tensor([video_length]), tf.float32)) features = tf.concat(features, 0) else: features = { feature: local_features[feature] for feature in feature_list } return (features)
def build_inference_for_training(self): """Invokes depth and ego-motion networks and computes clouds if needed.""" (self.image_stack, self.image_stack_norm, self.seg_stack, self.intrinsic_mat, self.intrinsic_mat_inv) = self.reader.read_data() with tf.variable_scope('depth_prediction'): # Organized by ...[i][scale]. Note that the order is flipped in # variables in build_loss() below. self.disp = {} self.depth = {} self.depth_upsampled = {} self.inf_loss = 0.0 # Organized by [i]. disp_bottlenecks = [None] * self.seq_length if self.icp_weight > 0: self.cloud = {} for i in range(self.seq_length): image = self.image_stack_norm[:, :, :, 3 * i:3 * (i + 1)] multiscale_disps_i, disp_bottlenecks[i] = nets.disp_net( self.architecture, image, self.use_skip, self.weight_reg, True) multiscale_depths_i = [1.0 / d for d in multiscale_disps_i] self.disp[i] = multiscale_disps_i self.depth[i] = multiscale_depths_i if self.depth_upsampling: self.depth_upsampled[i] = [] # Upsample low-resolution depth maps using differentiable bilinear # interpolation. for s in range(len(multiscale_depths_i)): self.depth_upsampled[i].append(tf.image.resize_bilinear( multiscale_depths_i[s], [self.img_height, self.img_width], align_corners=True)) if self.icp_weight > 0: multiscale_clouds_i = [ project.get_cloud(d, self.intrinsic_mat_inv[:, s, :, :], name='cloud%d_%d' % (s, i)) for (s, d) in enumerate(multiscale_depths_i) ] self.cloud[i] = multiscale_clouds_i # Reuse the same depth graph for all images. tf.get_variable_scope().reuse_variables() if self.handle_motion: # Define egomotion network. This network can see the whole scene except # for any moving objects as indicated by the provided segmentation masks. # To avoid the network getting clues of motion by tracking those masks, we # define the segmentation masks as the union temporally. with tf.variable_scope('egomotion_prediction'): base_input = self.image_stack_norm # (B, H, W, 9) seg_input = self.seg_stack # (B, H, W, 9) ref_zero = tf.constant(0, dtype=tf.uint8) # Motion model is currently defined for three-frame sequences. object_mask1 = tf.equal(seg_input[:, :, :, 0], ref_zero) object_mask2 = tf.equal(seg_input[:, :, :, 3], ref_zero) object_mask3 = tf.equal(seg_input[:, :, :, 6], ref_zero) mask_complete = tf.expand_dims(tf.logical_and( # (B, H, W, 1) tf.logical_and(object_mask1, object_mask2), object_mask3), axis=3) mask_complete = tf.tile(mask_complete, (1, 1, 1, 9)) # (B, H, W, 9) # Now mask out base_input. self.mask_complete = tf.to_float(mask_complete) self.base_input_masked = base_input * self.mask_complete self.egomotion = nets.egomotion_net( image_stack=self.base_input_masked, disp_bottleneck_stack=None, joint_encoder=False, seq_length=self.seq_length, weight_reg=self.weight_reg) # Define object motion network for refinement. This network only sees # one object at a time over the whole sequence, and tries to estimate its # motion. The sequence of images are the respective warped frames. # For each scale, contains batch_size elements of shape (N, 2, 6). self.object_transforms = {} # For each scale, contains batch_size elements of shape (N, H, W, 9). self.object_masks = {} self.object_masks_warped = {} # For each scale, contains batch_size elements of size N. self.object_ids = {} self.egomotions_seq = {} self.warped_seq = {} self.inputs_objectmotion_net = {} with tf.variable_scope('objectmotion_prediction'): # First, warp raw images according to overall egomotion. for s in range(NUM_SCALES): self.warped_seq[s] = [] self.egomotions_seq[s] = [] for source_index in range(self.seq_length): egomotion_mat_i_1 = project.get_transform_mat( self.egomotion, source_index, 1) warped_image_i_1, _ = ( project.inverse_warp( self.image_stack[ :, :, :, source_index*3:(source_index+1)*3], self.depth_upsampled[1][s], egomotion_mat_i_1, self.intrinsic_mat[:, 0, :, :], self.intrinsic_mat_inv[:, 0, :, :])) self.warped_seq[s].append(warped_image_i_1) self.egomotions_seq[s].append(egomotion_mat_i_1) # Second, for every object in the segmentation mask, take its mask and # warp it according to the egomotion estimate. Then put a threshold to # binarize the warped result. Use this mask to mask out background and # other objects, and pass the filtered image to the object motion # network. self.object_transforms[s] = [] self.object_masks[s] = [] self.object_ids[s] = [] self.object_masks_warped[s] = [] self.inputs_objectmotion_net[s] = {} for i in range(self.batch_size): seg_sequence = self.seg_stack[i] # (H, W, 9=3*3) object_ids = tf.unique(tf.reshape(seg_sequence, [-1]))[0] self.object_ids[s].append(object_ids) color_stack = [] mask_stack = [] mask_stack_warped = [] for j in range(self.seq_length): current_image = self.warped_seq[s][j][i] # (H, W, 3) current_seg = seg_sequence[:, :, j * 3:(j+1) * 3] # (H, W, 3) def process_obj_mask_warp(obj_id): """Performs warping of the individual object masks.""" obj_mask = tf.to_float(tf.equal(current_seg, obj_id)) # Warp obj_mask according to overall egomotion. obj_mask_warped, _ = ( project.inverse_warp( tf.expand_dims(obj_mask, axis=0), # Middle frame, highest scale, batch element i: tf.expand_dims(self.depth_upsampled[1][s][i], axis=0), # Matrix for warping j into middle frame, batch elem. i: tf.expand_dims(self.egomotions_seq[s][j][i], axis=0), tf.expand_dims(self.intrinsic_mat[i, 0, :, :], axis=0), tf.expand_dims(self.intrinsic_mat_inv[i, 0, :, :], axis=0))) obj_mask_warped = tf.squeeze(obj_mask_warped) obj_mask_binarized = tf.greater( # Threshold to binarize mask. obj_mask_warped, tf.constant(0.5)) return tf.to_float(obj_mask_binarized) def process_obj_mask(obj_id): """Returns the individual object masks separately.""" return tf.to_float(tf.equal(current_seg, obj_id)) object_masks = tf.map_fn( # (N, H, W, 3) process_obj_mask, object_ids, dtype=tf.float32) if self.size_constraint_weight > 0: # The object segmentation masks are all in object_masks. # We need to measure the height of every of them, and get the # approximate distance. # self.depth_upsampled of shape (seq_length, scale, B, H, W). depth_pred = self.depth_upsampled[j][s][i] # (H, W) def get_losses(obj_mask): """Get motion constraint loss.""" # Find height of segment. coords = tf.where(tf.greater( # Shape (num_true, 2=yx) obj_mask[:, :, 0], tf.constant(0.5, dtype=tf.float32))) y_max = tf.reduce_max(coords[:, 0]) y_min = tf.reduce_min(coords[:, 0]) seg_height = y_max - y_min f_y = self.intrinsic_mat[i, 0, 1, 1] approx_depth = ((f_y * self.global_scale_var) / tf.to_float(seg_height)) reference_pred = tf.boolean_mask( depth_pred, tf.greater( tf.reshape(obj_mask[:, :, 0], (self.img_height, self.img_width, 1)), tf.constant(0.5, dtype=tf.float32))) # Establish loss on approx_depth, a scalar, and # reference_pred, our dense prediction. Normalize both to # prevent degenerative depth shrinking. global_mean_depth_pred = tf.reduce_mean(depth_pred) reference_pred /= global_mean_depth_pred approx_depth /= global_mean_depth_pred spatial_err = tf.abs(reference_pred - approx_depth) mean_spatial_err = tf.reduce_mean(spatial_err) return mean_spatial_err losses = tf.map_fn( get_losses, object_masks, dtype=tf.float32) self.inf_loss += tf.reduce_mean(losses) object_masks_warped = tf.map_fn( # (N, H, W, 3) process_obj_mask_warp, object_ids, dtype=tf.float32) filtered_images = tf.map_fn( lambda mask: current_image * mask, object_masks_warped, dtype=tf.float32) # (N, H, W, 3) color_stack.append(filtered_images) mask_stack.append(object_masks) mask_stack_warped.append(object_masks_warped) # For this batch-element, if there are N moving objects, # color_stack, mask_stack and mask_stack_warped contain both # seq_length elements of shape (N, H, W, 3). # We can now concatenate them on the last axis, creating a tensor of # (N, H, W, 3*3 = 9), and, assuming N does not get too large so that # we have enough memory, pass them in a single batch to the object # motion network. mask_stack = tf.concat(mask_stack, axis=3) # (N, H, W, 9) mask_stack_warped = tf.concat(mask_stack_warped, axis=3) color_stack = tf.concat(color_stack, axis=3) # (N, H, W, 9) all_transforms = nets.objectmotion_net( # We cut the gradient flow here as the object motion gradient # should have no saying in how the egomotion network behaves. # One could try just stopping the gradient for egomotion, but # not for the depth prediction network. image_stack=tf.stop_gradient(color_stack), disp_bottleneck_stack=None, joint_encoder=False, # Joint encoder not supported. seq_length=self.seq_length, weight_reg=self.weight_reg) # all_transforms of shape (N, 2, 6). self.object_transforms[s].append(all_transforms) self.object_masks[s].append(mask_stack) self.object_masks_warped[s].append(mask_stack_warped) self.inputs_objectmotion_net[s][i] = color_stack tf.get_variable_scope().reuse_variables() else: # Don't handle motion, classic model formulation. with tf.name_scope('egomotion_prediction'): if self.joint_encoder: # Re-arrange disp_bottleneck_stack to be of shape # [B, h_hid, w_hid, c_hid * seq_length]. Currently, it is a list with # seq_length elements, each of dimension [B, h_hid, w_hid, c_hid]. disp_bottleneck_stack = tf.concat(disp_bottlenecks, axis=3) else: disp_bottleneck_stack = None self.egomotion = nets.egomotion_net( image_stack=self.image_stack_norm, disp_bottleneck_stack=disp_bottleneck_stack, joint_encoder=self.joint_encoder, seq_length=self.seq_length, weight_reg=self.weight_reg)
############################################################################### # YOUR CODE x = tf.random_normal((10, 10)) out = tf.matrix_determinant(x) print(sess.run(out)) ############################################################################### # 1g: Create tensor x with value [5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9]. # Return the unique elements in x # Hint: use tf.unique(). Keep in mind that tf.unique() returns a tuple. ############################################################################### # YOUR CODE x = tf.constant([5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9], tf.float32) out, index = tf.unique(x) print(sess.run(out)) ############################################################################### # 1h: Create two tensors x and y of shape 300 from any normal distribution, # as long as they are from the same distribution. # Use tf.cond() to return: # - The mean squared error of (x - y) if the average of all elements in (x - y) # is negative, or # - The sum of absolute value of all elements in the tensor (x - y) otherwise. # Hint: see the Huber loss function in the lecture slides 3. ############################################################################### # YOUR CODE x = tf.random_normal((300, )) y = tf.random_normal((300, ))
def process_one(self, collection): """Process one episode. Args: Collection dictionary that contains the following keys: support: np.ndarray. Image ID in the support set. flag: np.ndarray. Binary flag indicating whether it is labeled (1) or unlabeled (0). query: np.ndarray. Image ID in the query set. """ s, flag, q = collection['support'], collection['flag'], collection[ 'query'] del collection['support'] del collection['query'] del collection['flag'] dataset = self.dataset nclasses = self.nclasses img_s = dataset.get_images(s) lbl_s = np.array(collection['support_label']) del collection['support_label'] T = self.maxlen # Mask off unlabeled set. labeled = flag == 1 unlabeled = flag == 0 lbl_s_l = lbl_s[labeled] lbl_s_u = lbl_s[unlabeled] # Note numpy does not give the desired behavior here. # lbl_map, lbl_s_l = np.unique(lbl_s_l, return_inverse=True) lbl_map, lbl_s_l = tf.unique(lbl_s_l) def query_tf(x): x = tf.expand_dims(x, 1) # [T, 1] x_eq = tf.cast(tf.equal(x, lbl_map), tf.float32) # [T, N] x_valid = tf.reduce_sum(x_eq, [1]) # [T] # Everything that has not been found -> fixed unknown. # This means it's a distractor. x = tf.cast(tf.argmax(x_eq, axis=1), tf.float32) x = x_valid * x + (1 - x_valid) * nclasses x = tf.cast(x, tf.int32) return x def query_np(x): x = np.expand_dims(x, 1) # [T, 1] x_eq = np.equal(x, lbl_map).astype(np.float32) # [T, N] x_valid = np.sum(x_eq, axis=1) # [T] # Everything that has not been found -> fixed unknown. # This means it's a distractor. x = np.argmax(x_eq, axis=1).astype(np.float32) x = x_valid * x + (1 - x_valid) * nclasses x = x.astype(np.int32) return x # Find distractors. lbl_s_eq = tf.cast(tf.equal(tf.expand_dims(lbl_s, 1), lbl_map), tf.float32) distractor_flag = tf.cast(1.0 - tf.reduce_sum(lbl_s_eq, [1]), tf.int32) # Re-indexed labels. lbl_s[labeled] = lbl_s_l lbl_s[unlabeled] = query_np(lbl_s_u) # Label fed into the network. lbl_s_masked = np.copy(lbl_s) lbl_s_masked[unlabeled] = nclasses # We assumed fix unknown. # Make the first appearing item to be unknown in groundtruth. lbl_s_np = np.copy(lbl_s) lbl_s_np2 = np.copy(lbl_s_np) lbl_s_np2[unlabeled] = -1 lbl_s_gt = np.zeros([len(lbl_s_np)], dtype=np.int32) cummax = np.maximum.accumulate(lbl_s_np2) lbl_s_gt[0] = nclasses # Labeled to be trained as target. cond = lbl_s_np[1:] > cummax[:-1] lbl_s_gt[1:] = np.where(cond, nclasses, lbl_s_np[1:]) if self.nquery > 0: img_q = dataset.get_images(q) lbl_q = collection['query_label'] del collection['query_label'] lbl_q = query_tf(lbl_q) else: img_q = None lbl_q = None epi = { 'x_s': self.pad_x(img_s, T), 'y_s': self.pad_y(lbl_s_masked, T), 'y_gt': self.pad_y(lbl_s_gt, T), 'y_dis': self.pad_y(distractor_flag, T), 'y_full': self.pad_y(lbl_s, T), 'flag_s': self.get_flag(lbl_s, T) } if self.nquery > 0: assert False, 'Not supported' # For remaining additional info. for k in collection: epi[k] = self.pad_y(collection[k], T) if self.episode_processor is not None: epi = self.episode_processor(epi) return epi
# YOUR CODE x = tf.random_normal([10, 10], mean=10) out = tf.matrix_determinant(x) print("x=", sess.run(x)) print("out=", sess.run(out)) ############################################################################### # 1g: Create tensor x with value [5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9]. # Return the unique elements in x # Hint: use tf.unique(). Keep in mind that tf.unique() returns a tuple. ############################################################################### # YOUR CODE x = tf.constant([5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9]) out = tf.unique(x) ############################################################################### # 1h: Create two tensors x and y of shape 300 from any normal distribution, # as long as they are from the same distribution. # Use tf.cond() to return: # - The mean squared error of (x - y) if the average of all elements in (x - y) # is negative, or # - The sum of absolute value of all elements in the tensor (x - y) otherwise. # Hint: see the Huber loss function in the lecture slides 3. ############################################################################### # YOUR CODE x = tf.random_normal([300], mean=5, stddev=1) y = tf.random_normal([300], mean=5, stddev=1) average = tf.reduce_mean(x - y)
def generate_sequence_beam_search(self, input, max_words=None, initial_state=None, attention_states=None, beam_size=10, convert_unk=True, length_normalization_factor=0., input_text=None, input_text_length=None, emb=None): """ outgraph beam search, input should be one instance only batch_size=1 max_words actually not used here... for it is determined outgraph.. return top (path, score) TODO this is hacky, first step attention_state, input , state all size 1, then should be attention_state 1, input, state size is beam_size, also might be less then beam_size.. if not possible to find beam_size un done """ if emb is None: emb = self.emb tf.add_to_collection('beam_search_beam_size', tf.constant(beam_size)) if input_text is not None: if FLAGS.decode_copy: input_text = tf.squeeze(input_text) input_text_length = tf.to_int32(tf.squeeze(input_text_length)) input_text = input_text[0:input_text_length] input_text, _ = tf.unique(input_text) input_text_length = tf.shape(input_text)[-1] #sort from small to large #input_text, _ = -tf.nn.top_k(-input_text, input_text_length) #TODO may be need to be input_text_length, so as to do more decode limit out graph like using trie! beam_size = tf.minimum(beam_size, input_text_length) elif FLAGS.decode_use_alignment: input_text = tf.squeeze(input_text) input_text_length = tf.to_int32(tf.squeeze(input_text_length)) input_text = input_text[0:input_text_length] input_text_length = tf.shape(input_text)[-1] beam_size = tf.minimum(beam_size, input_text_length) else: if FLAGS.gen_only: input_text = None batch_size = melt.get_batch_size(input) if attention_states is None: cell = self.cell else: cell = self.prepare_attention( attention_states, initial_state=initial_state, score_as_alignment=self.score_as_alignment) initial_state = None state = cell.zero_state(batch_size, tf.float32) \ if initial_state is None else initial_state ##--TODO hard.. since need to reuse to share ValueError: ##Variable seq2seq/main/decode/memory_layer/kernel already exists, disallowed. Did you mean to set reuse=True in VarScope? ##another way to solve is always using tiled_batch attention_states and state, the first step will choose from only first beam ##will not all solve the problem since feed data might be less than beam size, so attention states always be 1 is safe #cell2 = self.prepare_attention(tf.contrib.seq2seq.tile_batch(attention_states, beam_size), reuse=True) first_state = state beam_search_step = functools.partial(self.beam_search_step, beam_size=beam_size) #since before hack using generate_sequence_greedy, here can not set scope.reuse_variables #NOTICE inorder to use lstm which is in .../rnn/ nameapce here you must also add this scope to use the shared with tf.variable_scope(self.scope) as scope: inital_attention, initial_state, initial_logprobs, initial_ids = \ beam_search_step(input, state, cell, input_text=input_text) if attention_states is not None: tf.add_to_collection( 'beam_search_initial_alignments', tf.get_collection('attention_alignments')[-1]) scope.reuse_variables() # In inference mode, use concatenated states for convenient feeding and # fetching. state_is_tuple = len(initial_state) == 2 if state_is_tuple: initial_state = tf.concat(initial_state, 1, name="initial_state") state_size = sum(self.cell.state_size) else: state_size = self.cell.state_size #output is used only when use attention if attention_states is not None: initial_state = tf.concat([initial_state, inital_attention], 1, name="initial_attention_state") state_size += self.cell.output_size tf.add_to_collection('beam_search_initial_state', initial_state) tf.add_to_collection('beam_search_initial_logprobs', initial_logprobs) tf.add_to_collection('beam_search_initial_ids', initial_ids) input_feed = tf.placeholder( dtype=tf.int64, shape=[None], # batch_size name="input_feed") tf.add_to_collection('beam_search_input_feed', input_feed) input = tf.nn.embedding_lookup(emb, input_feed) # Placeholder for feeding a batch of concatenated states. state_feed = tf.placeholder(dtype=tf.float32, shape=[None, state_size], name="state_feed") tf.add_to_collection('beam_search_state_feed', state_feed) if attention_states is not None: state, attention = tf.split(state_feed, [ state_size - self.cell.output_size, self.cell.output_size ], axis=1) else: state = state_feed if state_is_tuple: state = tf.split(state, num_or_size_splits=2, axis=1) if attention_states is not None: state_ = first_state.clone(cell_state=state, attention=attention) else: state_ = state #--TODO here is not safe if change attention_wrapper, notice batch size of attention states is 1 #--but cell input and state is beam_size #attention, state, top_logprobs, top_ids = beam_search_step(input, state_, cell2) if input_text is not None and not FLAGS.decode_copy: input_text = tf.contrib.seq2seq.tile_batch( input_text, melt.get_batch_size(input)) attention, state, top_logprobs, top_ids = beam_search_step( input, state_, cell, input_text=input_text) if state_is_tuple: # Concatentate the resulting state. state = tf.concat(state, 1, name="state") if attention_states is not None: state = tf.concat([state, attention], 1, name="attention_state") tf.add_to_collection('beam_search_state', state) tf.add_to_collection('beam_search_logprobs', top_logprobs) tf.add_to_collection('beam_search_ids', top_ids) #just same return like return path list, score list return tf.no_op(), tf.no_op()
def merge_boxes_with_multiple_labels(boxes, classes, confidences, num_classes, quantization_bins=10000): """Merges boxes with same coordinates and returns K-hot encoded classes. Args: boxes: A tf.float32 tensor with shape [N, 4] holding N boxes. Only normalized coordinates are allowed. classes: A tf.int32 tensor with shape [N] holding class indices. The class index starts at 0. confidences: A tf.float32 tensor with shape [N] holding class confidences. num_classes: total number of classes to use for K-hot encoding. quantization_bins: the number of bins used to quantize the box coordinate. Returns: merged_boxes: A tf.float32 tensor with shape [N', 4] holding boxes, where N' <= N. class_encodings: A tf.int32 tensor with shape [N', num_classes] holding K-hot encodings for the merged boxes. confidence_encodings: A tf.float32 tensor with shape [N', num_classes] holding encodings of confidences for the merged boxes. merged_box_indices: A tf.int32 tensor with shape [N'] holding original indices of the boxes. """ boxes_shape = tf.shape(boxes) classes_shape = tf.shape(classes) confidences_shape = tf.shape(confidences) box_class_shape_assert = shape_utils.assert_shape_equal_along_first_dimension( boxes_shape, classes_shape) box_confidence_shape_assert = ( shape_utils.assert_shape_equal_along_first_dimension( boxes_shape, confidences_shape)) box_dimension_assert = tf.assert_equal(boxes_shape[1], 4) box_normalized_assert = shape_utils.assert_box_normalized(boxes) with tf.control_dependencies( [box_class_shape_assert, box_confidence_shape_assert, box_dimension_assert, box_normalized_assert]): quantized_boxes = tf.to_int64(boxes * (quantization_bins - 1)) ymin, xmin, ymax, xmax = tf.unstack(quantized_boxes, axis=1) hashcodes = ( ymin + xmin * quantization_bins + ymax * quantization_bins * quantization_bins + xmax * quantization_bins * quantization_bins * quantization_bins) unique_hashcodes, unique_indices = tf.unique(hashcodes) num_boxes = tf.shape(boxes)[0] num_unique_boxes = tf.shape(unique_hashcodes)[0] merged_box_indices = tf.unsorted_segment_min( tf.range(num_boxes), unique_indices, num_unique_boxes) merged_boxes = tf.gather(boxes, merged_box_indices) def map_box_encodings(i): """Produces box K-hot and score encodings for each class index.""" box_mask = tf.equal( unique_indices, i * tf.ones(num_boxes, dtype=tf.int32)) box_mask = tf.reshape(box_mask, [-1]) box_indices = tf.boolean_mask(classes, box_mask) box_confidences = tf.boolean_mask(confidences, box_mask) box_class_encodings = tf.sparse_to_dense( box_indices, [num_classes], 1, validate_indices=False) box_confidence_encodings = tf.sparse_to_dense( box_indices, [num_classes], box_confidences, validate_indices=False) return box_class_encodings, box_confidence_encodings class_encodings, confidence_encodings = tf.map_fn( map_box_encodings, tf.range(num_unique_boxes), back_prop=False, dtype=(tf.int32, tf.float32)) merged_boxes = tf.reshape(merged_boxes, [-1, 4]) class_encodings = tf.reshape(class_encodings, [-1, num_classes]) confidence_encodings = tf.reshape(confidence_encodings, [-1, num_classes]) merged_box_indices = tf.reshape(merged_box_indices, [-1]) return (merged_boxes, class_encodings, confidence_encodings, merged_box_indices)
def efron_estimator_tf(time, censoring, prediction): n = tf.shape(time)[0] sort_idx = tf.nn.top_k(time, k=n, sorted=True).indices risk = tf.gather(prediction, sort_idx) events = tf.gather(censoring, sort_idx) otimes = tf.gather(time, sort_idx) # Get unique failure times & Exclude zeros # NOTE: this assumes that falure times start from > 0 (greater than zero) otimes_cens = otimes * events unique_ftimes = tf.boolean_mask(otimes_cens, tf.greater(otimes_cens, 0) ) unique_ftimes = tf.unique(unique_ftimes).y m = tf.shape(unique_ftimes)[0] # Define key variables: log_lik = tf.Variable(0., dtype=tf.float32, trainable=False) tie_count = tf.Variable([], dtype=tf.uint8, trainable=False) tie_risk = tf.Variable([], dtype=tf.float32, trainable=False) tie_hazard = tf.Variable([], dtype=tf.float32, trainable=False) cum_hazard = tf.Variable([], dtype=tf.float32, trainable=False) cum_sum = tf.cumsum(tf.exp(risk)) # Prepare for looping: i = tf.constant(0, tf.int32) def loop_cond(i, *args): return i < m def loop_1_step(i, tc, tr, th, ch): idx_b = tf.logical_and( tf.equal(otimes, unique_ftimes[i]), tf.equal(events, tf.ones_like(events)) ) idx_i = tf.cast( tf.boolean_mask( tf.lin_space(0., tf.cast(n-1,tf.float32), n), tf.greater(tf.cast(idx_b, tf.int32),0) ), tf.int32 ) tc = tf.concat([tc, [tf.reduce_sum(tf.cast(idx_b, tf.uint8))]], 0) tr = tf.concat([tr, [tf.reduce_sum(tf.gather(risk, idx_i))]], 0) th = tf.concat([th, [tf.reduce_sum(tf.gather(tf.exp(risk), idx_i))]], 0) idx_i = tf.cast( tf.boolean_mask( tf.lin_space(0., tf.cast(n-1,tf.float32), n), tf.greater(tf.cast(tf.equal(otimes, unique_ftimes[i]), tf.int32),0) ), tf.int32 ) ch = tf.concat([ch, [tf.reduce_max(tf.gather( cum_sum, idx_i))]], 0) return i + 1, tc, tr, th, ch def loop_2_step(i, tc, tr, th, ch, likelihood): l = tf.cast(tc[i], tf.float32) J = tf.lin_space(0., l-1, tf.cast(l,tf.int32)) / l Dm = ch[i] - J * th[i] likelihood = likelihood + tr[i] - tf.reduce_sum(tf.log(Dm)) return i + 1, tc, tr, th, ch, likelihood # Loops: _, tie_count, tie_risk, tie_hazard, cum_hazard = loop_1 = tf.while_loop( loop_cond, loop_1_step, loop_vars = [i, tie_count, tie_risk, tie_hazard, cum_hazard], shape_invariants = [i.get_shape(),tf.TensorShape([None]),tf.TensorShape([None]),tf.TensorShape([None]),tf.TensorShape([None])] ) loop_2_out = tf.while_loop( loop_cond, loop_2_step, loop_vars = [i, tie_count, tie_risk, tie_hazard, cum_hazard, log_lik], shape_invariants = [i.get_shape(),tf.TensorShape([None]),tf.TensorShape([None]),tf.TensorShape([None]),tf.TensorShape([None]),log_lik.get_shape()] ) log_lik = loop_2_out[-1] return tf.negative(log_lik)
import tensorflow as tf sess = tf.InteractiveSession() x = tf.constant([[2, 5, 3, -5], [0, 3,-2, 5], [4, 3, 5, 3], [6, 1, 4, 0]]) listx = tf.constant([1,2,3,4,5,6,7,8]) listy = tf.constant([4,5,8,9]) print("\nx=\n", x.eval()) print("\nlistx=", listx.eval()) print("\nlisty=", listy.eval()) boolx = tf.constant([[True,False], [False,True]]) print("\ntf.argmin(x, 1).eval() ")# Position of the min value of columns print(tf.argmin(x, 1).eval() )# Position of the min value of columns print("\ntf.argmax(x, 1).eval() ")# Position of the max value of rows print(tf.argmax(x, 1).eval() )# Position of the max value of rows print("\ntf.setdiff1d(listx, listy)[0].eval() ")# List differences print(tf.setdiff1d(listx, listy)[0].eval() )# List differences print(tf.where(boolx).eval() )# Show true values print(tf.unique(listx)[0].eval() )# Unique values in list
def prepare_serialized_examples(self, serialized_example, max_quantized_value=2, min_quantized_value=-2): """Parse single serialized SequenceExample from the TFRecords.""" # Read/parse frame/segment-level labels. context_features = { "id": tf.FixedLenFeature([], tf.string), } if self.segment_labels: context_features.update({ # There is no need to read end-time given we always assume the segment # has the same size. "segment_labels": tf.VarLenFeature(tf.int64), "segment_start_times": tf.VarLenFeature(tf.int64), "segment_scores": tf.VarLenFeature(tf.float32) }) else: context_features.update({"labels": tf.VarLenFeature(tf.int64)}) sequence_features = { feature_name: tf.FixedLenSequenceFeature([], dtype=tf.string) for feature_name in self.feature_names } contexts, features = tf.parse_single_sequence_example( serialized_example, context_features=context_features, sequence_features=sequence_features) # loads (potentially) different types of features and concatenates them num_features = len(self.feature_names) assert num_features > 0, "No feature selected: feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), ( "length of feature_names (={}) != length of feature_sizes (={})". format(len(self.feature_names), len(self.feature_sizes))) num_frames = -1 # the number of frames in the video feature_matrices = [None ] * num_features # an array of different features for feature_index in range(num_features): feature_matrix, num_frames_in_this_feature = self.get_video_matrix( features[self.feature_names[feature_index]], self.feature_sizes[feature_index], self.max_frames, max_quantized_value, min_quantized_value) if num_frames == -1: num_frames = num_frames_in_this_feature feature_matrices[feature_index] = feature_matrix # cap the number of frames at self.max_frames num_frames = tf.minimum(num_frames, self.max_frames) # concatenate different features video_matrix = tf.concat(feature_matrices, 1) # Partition frame-level feature matrix to segment-level feature matrix. if self.segment_labels: start_times = contexts["segment_start_times"].values # Here we assume all the segments that started at the same start time has # the same segment_size. uniq_start_times, seg_idxs = tf.unique(start_times, out_idx=tf.dtypes.int64) # TODO(zhengxu): Ensure the segment_sizes are all same. segment_size = self.segment_size # Range gather matrix, e.g., [[0,1,2],[1,2,3]] for segment_size == 3. range_mtx = tf.expand_dims( uniq_start_times, axis=-1) + tf.expand_dims( tf.range(0, segment_size, dtype=tf.int64), axis=0) # Shape: [num_segment, segment_size, feature_dim]. batch_video_matrix = tf.gather_nd( video_matrix, tf.expand_dims(range_mtx, axis=-1)) num_segment = tf.shape(batch_video_matrix)[0] batch_video_ids = tf.reshape( tf.tile([contexts["id"]], [num_segment]), (num_segment, )) batch_frames = tf.reshape(tf.tile([segment_size], [num_segment]), (num_segment, )) # For segment labels, all labels are not exhausively rated. So we only # evaluate the rated labels. # Label indices for each segment, shape: [num_segment, 2]. label_indices = tf.stack( [seg_idxs, contexts["segment_labels"].values], axis=-1) label_values = contexts["segment_scores"].values sparse_labels = tf.sparse.SparseTensor( label_indices, label_values, (num_segment, self.num_classes)) batch_labels = tf.sparse.to_dense(sparse_labels, validate_indices=False) sparse_label_weights = tf.sparse.SparseTensor( label_indices, tf.ones_like(label_values, dtype=tf.float32), (num_segment, self.num_classes)) batch_label_weights = tf.sparse.to_dense(sparse_label_weights, validate_indices=False) else: # Process video-level labels. label_indices = contexts["labels"].values sparse_labels = tf.sparse.SparseTensor( tf.expand_dims(label_indices, axis=-1), tf.ones_like(contexts["labels"].values, dtype=tf.bool), (self.num_classes, )) labels = tf.sparse.to_dense(sparse_labels, default_value=False, validate_indices=False) # convert to batch format. batch_video_ids = tf.expand_dims(contexts["id"], 0) batch_video_matrix = tf.expand_dims(video_matrix, 0) batch_labels = tf.expand_dims(labels, 0) batch_frames = tf.expand_dims(num_frames, 0) batch_label_weights = None output_dict = { "video_ids": batch_video_ids, "video_matrix": batch_video_matrix, "labels": batch_labels, "num_frames": batch_frames, } if batch_label_weights is not None: output_dict["label_weights"] = batch_label_weights return output_dict
# 1f: Create a random 2-d tensor of size 10 x 10 from any distribution. # Calculate its determinant. # Hint: Look at tf.matrix_determinant(). ############################################################################### m = tf.random_normal([10, 10], mean=10, stddev=1) out = tf.matrix_determinant(m) ############################################################################### # 1g: Create tensor x with value [5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9]. # Return the unique elements in x # Hint: use tf.unique(). Keep in mind that tf.unique() returns a tuple. ############################################################################### x = tf.constant([5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9]) unique_values, indices = tf.unique(x) ############################################################################### # 1h: Create two tensors x and y of shape 300 from any normal distribution, # as long as they are from the same distribution. # Use tf.cond() to return: # - The mean squared error of (x - y) if the average of all elements in (x - y) # is negative, or # - The sum of absolute value of all elements in the tensor (x - y) otherwise. # Hint: see the Huber loss function in the lecture slides 3. ############################################################################### x = tf.random_normal([300], mean=5, stddev=1) y = tf.random_normal([300], mean=5, stddev=1) average = tf.reduce_mean(x - y) def f1(): return tf.reduce_mean(tf.square(x - y))
def efron_estimator_tf(y_true, y_pred): sort_idx = tf.nn.top_k(y_true[:, 1], k=tf.shape(y_pred)[0], sorted=True).indices risk = tf.gather(y_pred, sort_idx) risk_exp = tf.exp(risk) events = tf.gather(y_true[:, 2], sort_idx) ftimes = tf.gather(y_true[:, 1], sort_idx) ftimes_cens = ftimes * events # Get unique failure times & Exclude zeros # NOTE: this assumes that falure times start from > 0 (greater than zero) unique = tf.unique(ftimes_cens).y unique_ftimes = tf.boolean_mask(unique, tf.greater(unique, 0)) m = tf.shape(unique_ftimes)[0] # Define key variables: log_lik = tf.Variable(0., dtype=tf.float32, validate_shape=True, trainable=False) E_ti = tf.Variable([], dtype=tf.int32, validate_shape=True, trainable=False) risk_phi = tf.Variable([], dtype=tf.float32, validate_shape=True, trainable=False) tie_phi = tf.Variable([], dtype=tf.float32, validate_shape=True, trainable=False) cum_risk = tf.Variable([], dtype=tf.float32, validate_shape=True, trainable=False) cum_sum = tf.cumsum(risk_exp) # ----------------------------------------------------------------- # Prepare for looping: # ----------------------------------------------------------------- i = tf.constant(0, tf.int32) def loop_cond(i, *args): return i < m # Step for loop # 1: def loop_1_step(i, E, Rp, Tp, Cr, Cs): n = tf.shape(Cs)[0] idx_b = tf.logical_and(tf.equal(ftimes, unique_ftimes[i]), tf.equal(events, tf.ones_like(events))) idx_i = tf.cast( tf.boolean_mask(tf.lin_space(0., tf.cast(n - 1, tf.float32), n), tf.greater(tf.cast(idx_b, tf.int32), 0)), tf.int32) E = tf.concat([E, [tf.reduce_sum(tf.cast(idx_b, tf.int32))]], 0) Rp = tf.concat([Rp, [tf.reduce_sum(tf.gather(risk, idx_i))]], 0) Tp = tf.concat([Tp, [tf.reduce_sum(tf.gather(risk_exp, idx_i))]], 0) idx_i = tf.cast( tf.boolean_mask( tf.lin_space(0., tf.cast(n - 1, tf.float32), n), tf.greater( tf.cast(tf.equal(ftimes, unique_ftimes[i]), tf.int32), 0)), tf.int32) Cr = tf.concat([Cr, [tf.reduce_max(tf.gather(Cs, idx_i))]], 0) return i + 1, E, Rp, Tp, Cr, Cs # Step for loop # 1: def loop_2_step(i, E, Rp, Tp, Cr, likelihood): l = E_ti[i] J = tf.lin_space(0., tf.cast(l - 1, tf.float32), l) / tf.cast( l, tf.float32) Dm = Cr[i] - J * Tp[i] likelihood = likelihood + Rp[i] - tf.reduce_sum(tf.log(Dm)) return i + 1, E, Rp, Tp, Cr, likelihood # ----------------------------------------------------------------- # Loop # 1: _, E_ti, risk_phi, tie_phi, cum_risk, _ = loop_1 = tf.while_loop( loop_cond, loop_1_step, loop_vars=[i, E_ti, risk_phi, tie_phi, cum_risk, cum_sum], shape_invariants=[ i.get_shape(), tf.TensorShape([None]), tf.TensorShape([None]), tf.TensorShape([None]), tf.TensorShape([None]), cum_sum.get_shape() ]) # Loop # 2: loop_2 = tf.while_loop( loop_cond, loop_2_step, loop_vars=[i, E_ti, risk_phi, tie_phi, cum_risk, log_lik], shape_invariants=[ i.get_shape(), tf.TensorShape([None]), tf.TensorShape([None]), tf.TensorShape([None]), tf.TensorShape([None]), log_lik.get_shape() ]) log_lik = loop_2[5] # TODO: Normalize by the number of EVENTS in the batch, # NOT number of samples in the batch FIXIT!! log_lik = log_lik / tf.cast(tf.shape(y_pred)[0], tf.float32) return tf.negative(log_lik)
W = tf.Variable(tf.random_uniform([4, 5], -1.0, 1.0)) print(W.get_shape()) # Get the shape of W (4, 5) print(tf.shape(W)) # Wrong. tf.shape(W) returns an tensor that in runtime returns the shape. # Tensor("Shape:0", shape=(2,), dtype=int32) W = tf.reshape(W, [10, 2]) print(W.get_shape()) # (10, 2) W = tf.reshape(W, [-1]) print(W.get_shape()) # (20,) W = tf.reshape(W, [5, -1]) print(W.get_shape()) # (5, 4) shape_op = tf.shape(W) c = tf.constant([1, 2, 3, 1]) y, _ = tf.unique(c) # y only contains the unique elements. print(y.get_shape()) # (?,) This is a dynamic shape. Only know in runtime y_shape = tf.shape(y) # Define an op to get the dynamic shape. init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) print(sess.run(shape_op)) # [5 4] print(sess.run(y_shape)) # [3]
def get(dataset, num_frames_per_video, crop_size, batch_size, min_resize_value=None, max_resize_value=None, resize_factor=None, min_scale_factor=1., max_scale_factor=1., scale_factor_step_size=0, preprocess_image_and_label=True, num_readers=1, num_threads=1, dataset_split=None, is_training=True, model_variant=None, batch_capacity_factor=32, video_frames_are_decoded=False, decoder_output_stride=None, first_frame_finetuning=False, sample_only_first_frame_for_finetuning=False, sample_adjacent_and_consistent_query_frames=False, remap_labels_to_reference_frame=True, generate_prev_frame_mask_by_mask_damaging=False, three_frame_dataset=False, add_prev_frame_label=True): """Gets the dataset split for semantic segmentation. This functions gets the dataset split for semantic segmentation. In particular, it is a wrapper of (1) dataset_data_provider which returns the raw dataset split, (2) input_preprcess which preprocess the raw data, and (3) the Tensorflow operation of batching the preprocessed data. Then, the output could be directly used by training, evaluation or visualization. Args: dataset: An instance of slim Dataset. num_frames_per_video: The number of frames used per video crop_size: Image crop size [height, width]. batch_size: Batch size. min_resize_value: Desired size of the smaller image side. max_resize_value: Maximum allowed size of the larger image side. resize_factor: Resized dimensions are multiple of factor plus one. min_scale_factor: Minimum scale factor value. max_scale_factor: Maximum scale factor value. scale_factor_step_size: The step size from min scale factor to max scale factor. The input is randomly scaled based on the value of (min_scale_factor, max_scale_factor, scale_factor_step_size). preprocess_image_and_label: Boolean variable specifies if preprocessing of image and label will be performed or not. num_readers: Number of readers for data provider. num_threads: Number of threads for batching data. dataset_split: Dataset split. is_training: Is training or not. model_variant: Model variant (string) for choosing how to mean-subtract the images. See feature_extractor.network_map for supported model variants. batch_capacity_factor: Batch capacity factor affecting the training queue batch capacity. video_frames_are_decoded: Boolean, whether the video frames are already decoded decoder_output_stride: Integer, the stride of the decoder output. first_frame_finetuning: Boolean, whether to only sample the first frame for fine-tuning. sample_only_first_frame_for_finetuning: Boolean, whether to only sample the first frame during fine-tuning. This should be False when using lucid or wonderland data, but true when fine-tuning on the first frame only. Only has an effect if first_frame_finetuning is True. sample_adjacent_and_consistent_query_frames: Boolean, if true, the query frames (all but the first frame which is the reference frame) will be sampled such that they are adjacent video frames and have the same crop coordinates and flip augmentation. remap_labels_to_reference_frame: Boolean, whether to remap the labels of the query frames to match the labels of the (downscaled) reference frame. If a query frame contains a label which is not present in the reference, it will be mapped to background. generate_prev_frame_mask_by_mask_damaging: Boolean, whether to generate the masks used as guidance from the previous frame by damaging the ground truth mask. three_frame_dataset: Boolean, whether the dataset has exactly three frames per video of which the first is to be used as reference and the two others are consecutive frames to be used as query frames. add_prev_frame_label: Boolean, whether to sample one more frame before the first query frame to obtain a previous frame label. Only has an effect, if sample_adjacent_and_consistent_query_frames is True and generate_prev_frame_mask_by_mask_damaging is False. Returns: A dictionary of batched Tensors for semantic segmentation. Raises: ValueError: dataset_split is None, or Failed to find labels. """ if dataset_split is None: raise ValueError('Unknown dataset split.') if model_variant is None: tf.logging.warning('Please specify a model_variant. See ' 'feature_extractor.network_map for supported model ' 'variants.') data_provider = dataset_data_provider.DatasetDataProvider( dataset, num_readers=num_readers, num_epochs=None if is_training else 1, shuffle=is_training) image, label, object_label, image_name, height, width, video_id = _get_data( data_provider, dataset_split, video_frames_are_decoded) sampling_is_valid = tf.constant(True) if num_frames_per_video is not None: total_num_frames = tf.shape(image)[0] if first_frame_finetuning or three_frame_dataset: if sample_only_first_frame_for_finetuning: assert not sample_adjacent_and_consistent_query_frames, ( 'this option does not make sense for sampling only first frame.') # Sample the first frame num_frames_per_video times. sel_indices = tf.tile(tf.constant(0, dtype=tf.int32)[tf.newaxis], multiples=[num_frames_per_video]) else: if sample_adjacent_and_consistent_query_frames: if add_prev_frame_label: num_frames_per_video += 1 # Since this is first frame fine-tuning, we'll for now assume that # each sequence has exactly 3 images: the ref frame and 2 adjacent # query frames. assert num_frames_per_video == 3 with tf.control_dependencies([tf.assert_equal(total_num_frames, 3)]): sel_indices = tf.constant([1, 2], dtype=tf.int32) else: # Sample num_frames_per_video - 1 query frames which are not the # first frame. sel_indices = tf.random_shuffle( tf.range(1, total_num_frames))[:(num_frames_per_video - 1)] # Concat first frame as reference frame to the front. sel_indices = tf.concat([tf.constant(0, dtype=tf.int32)[tf.newaxis], sel_indices], axis=0) else: if sample_adjacent_and_consistent_query_frames: if add_prev_frame_label: # Sample one more frame which we can use to provide initial softmax # feedback. num_frames_per_video += 1 ref_idx = tf.random_shuffle(tf.range(total_num_frames))[0] sampling_is_valid = tf.greater_equal(total_num_frames, num_frames_per_video) def sample_query_start_idx(): return tf.random_shuffle( tf.range(total_num_frames - num_frames_per_video + 1))[0] query_start_idx = tf.cond(sampling_is_valid, sample_query_start_idx, lambda: tf.constant(0, dtype=tf.int32)) def sample_sel_indices(): return tf.concat( [ref_idx[tf.newaxis], tf.range( query_start_idx, query_start_idx + (num_frames_per_video - 1))], axis=0) sel_indices = tf.cond( sampling_is_valid, sample_sel_indices, lambda: tf.zeros((num_frames_per_video,), dtype=tf.int32)) else: # Randomly sample some frames from the video. sel_indices = tf.random_shuffle( tf.range(total_num_frames))[:num_frames_per_video] image = tf.gather(image, sel_indices, axis=0) if not video_frames_are_decoded: image = decode_image_sequence(image) if label is not None: if num_frames_per_video is not None: label = tf.gather(label, sel_indices, axis=0) if not video_frames_are_decoded: label = decode_image_sequence(label, image_format='png', channels=1) # Sometimes, label is saved as [num_frames_per_video, height, width] or # [num_frames_per_video, height, width, 1]. We change it to be # [num_frames_per_video, height, width, 1]. if label.shape.ndims == 3: label = tf.expand_dims(label, 3) elif label.shape.ndims == 4 and label.shape.dims[3] == 1: pass else: raise ValueError('Input label shape must be ' '[num_frames_per_video, height, width],' ' or [num_frames, height, width, 1]. ' 'Got {}'.format(label.shape.ndims)) label.set_shape([None, None, None, 1]) # Add size of first dimension since tf can't figure it out automatically. image.set_shape((num_frames_per_video, None, None, None)) if label is not None: label.set_shape((num_frames_per_video, None, None, None)) preceding_frame_label = None if preprocess_image_and_label: if num_frames_per_video is None: raise ValueError('num_frame_per_video must be specified for preproc.') original_images = [] images = [] labels = [] if sample_adjacent_and_consistent_query_frames: num_frames_individual_preproc = 1 else: num_frames_individual_preproc = num_frames_per_video for frame_idx in range(num_frames_individual_preproc): original_image_t, image_t, label_t = ( input_preprocess.preprocess_image_and_label( image[frame_idx], label[frame_idx], crop_height=crop_size[0] if crop_size is not None else None, crop_width=crop_size[1] if crop_size is not None else None, min_resize_value=min_resize_value, max_resize_value=max_resize_value, resize_factor=resize_factor, min_scale_factor=min_scale_factor, max_scale_factor=max_scale_factor, scale_factor_step_size=scale_factor_step_size, ignore_label=dataset.ignore_label, is_training=is_training, model_variant=model_variant)) original_images.append(original_image_t) images.append(image_t) labels.append(label_t) if sample_adjacent_and_consistent_query_frames: imgs_for_preproc = [image[frame_idx] for frame_idx in range(1, num_frames_per_video)] labels_for_preproc = [label[frame_idx] for frame_idx in range(1, num_frames_per_video)] original_image_rest, image_rest, label_rest = ( input_preprocess.preprocess_images_and_labels_consistently( imgs_for_preproc, labels_for_preproc, crop_height=crop_size[0] if crop_size is not None else None, crop_width=crop_size[1] if crop_size is not None else None, min_resize_value=min_resize_value, max_resize_value=max_resize_value, resize_factor=resize_factor, min_scale_factor=min_scale_factor, max_scale_factor=max_scale_factor, scale_factor_step_size=scale_factor_step_size, ignore_label=dataset.ignore_label, is_training=is_training, model_variant=model_variant)) original_images.extend(original_image_rest) images.extend(image_rest) labels.extend(label_rest) assert len(original_images) == num_frames_per_video assert len(images) == num_frames_per_video assert len(labels) == num_frames_per_video if remap_labels_to_reference_frame: # Remap labels to indices into the labels of the (downscaled) reference # frame, or 0, i.e. background, for labels which are not present # in the reference. reference_labels = labels[0][tf.newaxis] h, w = train_utils.resolve_shape(reference_labels)[1:3] embedding_height = model.scale_dimension( h, 1.0 / decoder_output_stride) embedding_width = model.scale_dimension( w, 1.0 / decoder_output_stride) reference_labels_embedding_size = tf.squeeze( tf.image.resize_nearest_neighbor( reference_labels, tf.stack([embedding_height, embedding_width]), align_corners=True), axis=0) # Get sorted unique labels in the reference frame. labels_in_ref_frame, _ = tf.unique( tf.reshape(reference_labels_embedding_size, [-1])) labels_in_ref_frame = tf.contrib.framework.sort(labels_in_ref_frame) for idx in range(1, len(labels)): ref_label_mask = tf.equal( labels[idx], labels_in_ref_frame[tf.newaxis, tf.newaxis, :]) remapped = tf.argmax(tf.cast(ref_label_mask, tf.uint8), axis=-1, output_type=tf.int32) # Set to 0 if label is not present is_in_ref = tf.reduce_any(ref_label_mask, axis=-1) remapped *= tf.cast(is_in_ref, tf.int32) labels[idx] = remapped[..., tf.newaxis] if sample_adjacent_and_consistent_query_frames: if first_frame_finetuning and generate_prev_frame_mask_by_mask_damaging: preceding_frame_label = mask_damaging.damage_masks(labels[1]) elif add_prev_frame_label: # Discard the image of the additional frame and take the label as # initialization for softmax feedback. original_images = [original_images[0]] + original_images[2:] preceding_frame_label = labels[1] images = [images[0]] + images[2:] labels = [labels[0]] + labels[2:] num_frames_per_video -= 1 original_image = tf.stack(original_images, axis=0) image = tf.stack(images, axis=0) label = tf.stack(labels, axis=0) else: if label is not None: # Need to set label shape due to batching. label.set_shape([num_frames_per_video, None if crop_size is None else crop_size[0], None if crop_size is None else crop_size[1], 1]) original_image = tf.to_float(tf.zeros_like(label)) if crop_size is None: height = tf.shape(image)[1] width = tf.shape(image)[2] else: height = crop_size[0] width = crop_size[1] sample = {'image': image, 'image_name': image_name, 'height': height, 'width': width, 'video_id': video_id} if label is not None: sample['label'] = label if object_label is not None: sample['object_label'] = object_label if preceding_frame_label is not None: sample['preceding_frame_label'] = preceding_frame_label if not is_training: # Original image is only used during visualization. sample['original_image'] = original_image if is_training: if first_frame_finetuning: keep_input = tf.constant(True) else: keep_input = tf.logical_and(sampling_is_valid, tf.logical_and( _has_enough_pixels_of_each_object_in_first_frame( label, decoder_output_stride), _has_foreground_and_background_in_first_frame_2( label, decoder_output_stride))) batched = tf.train.maybe_batch(sample, keep_input=keep_input, batch_size=batch_size, num_threads=num_threads, capacity=batch_capacity_factor * batch_size, dynamic_pad=True) else: batched = tf.train.batch(sample, batch_size=batch_size, num_threads=num_threads, capacity=batch_capacity_factor * batch_size, dynamic_pad=True) # Flatten from [batch, num_frames_per_video, ...] to # batch * num_frames_per_video, ...]. cropped_height = train_utils.resolve_shape(batched['image'])[2] cropped_width = train_utils.resolve_shape(batched['image'])[3] if num_frames_per_video is None: first_dim = -1 else: first_dim = batch_size * num_frames_per_video batched['image'] = tf.reshape(batched['image'], [first_dim, cropped_height, cropped_width, 3]) if label is not None: batched['label'] = tf.reshape(batched['label'], [first_dim, cropped_height, cropped_width, 1]) return batched
def refine_detections_graph(rois, probs, deltas, window, config): """细化分类建议并过滤重叠部分并返回最终结果探测。 Inputs: rois: [N, (y1, x1, y2, x2)] in normalized coordinates probs: [N, num_classes]. Class probabilities. deltas: [N, num_classes, (dy, dx, log(dh), log(dw))]. Class-specific bounding box deltas. window: (y1, x1, y2, x2) in normalized coordinates. The part of the image that contains the image excluding the padding. Returns detections shaped: [num_detections, (y1, x1, y2, x2, class_id, score)] where coordinates are normalized. """ # 找到得分最高的类 class_ids = tf.argmax(probs, axis=1, output_type=tf.int32) # 堆叠 序号+类 indices = tf.stack([tf.range(probs.shape[0]), class_ids], axis=1) # 取出成绩 class_scores = tf.gather_nd(probs, indices) # 还有框的调整参数 deltas_specific = tf.gather_nd(deltas, indices) # 进行解码,Shape: [boxes, (y1, x1, y2, x2)] in normalized coordinates refined_rois = apply_box_deltas_graph( rois, deltas_specific * config.BBOX_STD_DEV) # rois理解为classifier真实框 # 防止超出0-1 refined_rois = clip_boxes_graph(refined_rois, window) # 去除背景 keep = tf.where(class_ids > 0)[:, 0] # 去除背景和得分小的区域 if config.DETECTION_MIN_CONFIDENCE: # config.DETECTION_MIN_CONFIDENCE=0.7 conf_keep = tf.where(class_scores >= config.DETECTION_MIN_CONFIDENCE)[:, 0] keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), tf.expand_dims(conf_keep, 0)) keep = tf.sparse_tensor_to_dense(keep)[0] # 获得除去背景并且得分较高的框还有种类与得分 # 1. Prepare variables pre_nms_class_ids = tf.gather(class_ids, keep) pre_nms_scores = tf.gather(class_scores, keep) pre_nms_rois = tf.gather(refined_rois, keep) unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0] def nms_keep_map(class_id): ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0] class_keep = tf.image.non_max_suppression( tf.gather(pre_nms_rois, ixs), tf.gather(pre_nms_scores, ixs), max_output_size=config.DETECTION_MAX_INSTANCES, iou_threshold=config.DETECTION_NMS_THRESHOLD) class_keep = tf.gather(keep, tf.gather(ixs, class_keep)) gap = config.DETECTION_MAX_INSTANCES - tf.shape(class_keep)[0] class_keep = tf.pad(class_keep, [(0, gap)], mode='CONSTANT', constant_values=-1) class_keep.set_shape([config.DETECTION_MAX_INSTANCES]) return class_keep # 2. 进行非极大抑制 nms_keep = tf.map_fn(nms_keep_map, unique_pre_nms_class_ids, dtype=tf.int64) # 3. 找到符合要求的需要被保留的建议框 nms_keep = tf.reshape(nms_keep, [-1]) nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0]) # 4. Compute intersection between keep and nms_keep keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), tf.expand_dims(nms_keep, 0)) keep = tf.sparse_tensor_to_dense(keep)[0] # 寻找得分最高的num_keep个框 roi_count = config.DETECTION_MAX_INSTANCES class_scores_keep = tf.gather(class_scores, keep) num_keep = tf.minimum(tf.shape(class_scores_keep)[0], roi_count) top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1] keep = tf.gather(keep, top_ids) # 堆叠调整后的框 Arrange output as [N, (y1, x1, y2, x2, class_id, score)] detections = tf.concat([ tf.gather(refined_rois, keep), tf.to_float(tf.gather(class_ids, keep))[..., tf.newaxis], tf.gather(class_scores, keep)[..., tf.newaxis] ], axis=1) # 如果达不到数量的话就padding gap = config.DETECTION_MAX_INSTANCES - tf.shape(detections)[0] detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT") return detections
import tensorflow as tf sess = tf.InteractiveSession() x = tf.constant([[2, 5, 3, -5], [0, 3,-2, 5], [4, 3, 5, 3], [6, 1, 4, 0]]) listx = tf.constant([1,2,3,4,5,6,7,8]) listy = tf.constant([4,5,8,9]) boolx = tf.constant([[True,False], [False,True]]) tf.argmin(x, 1).eval() # Position of the maximum value of columns tf.argmax(x, 1).eval() # Position of the minimum value of rows tf.listdiff(listx, listy)[0].eval() # List differences tf.where(boolx).eval() # Show true values tf.unique(listx)[0].eval() # Unique values in list
print('관계연산----------') print(sess.run(tf.equal(1, 2))) print(sess.run(tf.not_equal(1, 2))) print(sess.run(tf.less(1, 2))) print(sess.run(tf.greater(1, 2))) print(sess.run(tf.greater_equal(1, 2))) print('논리연산----------') print(sess.run(tf.logical_and(True, False))) print(sess.run(tf.logical_or(True, False))) print(sess.run(tf.logical_xor(True, False))) print(sess.run(tf.logical_not(True))) print('합집합----------') kbs = tf.constant([1,2,2,2,3]) val, idx = tf.unique(kbs) print(sess.run(val)) print(sess.run(idx)) # tf.reduce~ : 연산 후 차원축소가 이루어짐 ar = [[1.,2.],[3.,4.]] print(tf.reduce_sum(ar).eval(session=tf.Session())) print(tf.reduce_mean(ar, axis=0).eval(session=tf.Session())) # 열방향 print(tf.reduce_mean(ar, axis=1).eval(session=tf.Session())) # 행방향 print() #차원변경 import numpy as np t = np.array([[[0, 1, 2], [3, 4, 5]], [[6, 7, 8], [9, 10, 11]]]) print(t.shape) # (2, 2, 3)
def instance_embedding_npair_loss(embedding, instance_labels, crop_min_height, crop_area, similarity_strategy='dotproduct', loss_strategy='softmax'): """n-pair loss for a cropped box inside the embedding. It uses npair_loss (above) to compute the embedding loss given the ground-truth instance_labels. instance_labels contains the ground-truth labels. The loss is computed as follows: We compute the dot product between the embedding vector of each pixel and every other pixel. If we have N pixels, this will give us a [N, N] matrix. In this matrix, we compute the softmax (or sigmoid) loss for each row, average the losses and return as output. In order to perform the softmax (sigmoid) loss, we need the one-hot ground-truth labels for each row. In the row i, the pixels that in the same instance as the pixel i, will be set to 1, and other pixels will be set to 0. Each row is normalized so the sum of each row is equal to 1. Args: embedding: A tf.float32 tensor of [height, width, embedding_size]. instance_labels: A tf.int32 tensor of [height, width]. Assumed values in target start from 0 and cover 0 to N-1. crop_min_height: Minimum height of the crop window. crop_area: Area of the crop window. similarity_strategy: Defines the method for computing similarity between embedding vectors. Possible values are 'dotproduct' and 'distance'. loss_strategy: Defines the type of loss including 'softmax' or 'sigmoid'. Returns: Total loss value. Raises: ValueError: If loss strategy or similarity strategy are unknown. """ embedding_shape = tf.shape(embedding) embedding_height = embedding_shape[0] embedding_width = embedding_shape[1] embedding_size = embedding_shape[2] crop_height = tf.maximum(crop_area // embedding_width, crop_min_height) crop_height = tf.maximum(1, tf.minimum(embedding_height - 1, crop_height)) crop_width = tf.maximum( 1, tf.minimum(embedding_width - 1, crop_area // crop_height)) y_start = tf.random.uniform([], minval=0, maxval=tf.maximum( 1, embedding_height - crop_height), dtype=tf.int32) x_start = tf.random.uniform([], minval=0, maxval=tf.maximum(1, embedding_width - crop_width), dtype=tf.int32) embedding = tf.slice(embedding, begin=tf.stack([y_start, x_start, 0]), size=tf.stack([ tf.minimum(crop_height, embedding_height - y_start), tf.minimum(crop_width, embedding_width - x_start), embedding_size ])) embedding = tf.reshape(embedding, [-1, embedding_size]) instance_labels = tf.slice(instance_labels, begin=tf.stack([y_start, x_start]), size=tf.stack([ tf.minimum(crop_height, embedding_height - y_start), tf.minimum(crop_width, embedding_width - x_start) ])) instance_labels = tf.reshape(instance_labels, [-1]) num_instance_labels = tf.reduce_max(instance_labels) + 1 valid_mask = tf.greater_equal(instance_labels, 0) embedding = tf.boolean_mask(embedding, valid_mask) instance_labels = tf.boolean_mask(instance_labels, valid_mask) unique_labels, _ = tf.unique(instance_labels) instance_labels = tf.one_hot(instance_labels, num_instance_labels, dtype=tf.float32) instance_labels = tf.transpose( tf.gather(tf.transpose(instance_labels), unique_labels)) return weighted_npair_loss(embedding, instance_labels, similarity_strategy, loss_strategy)
def _mini_batch_training_op(self, inputs, cluster_idx_list, cluster_centers, cluster_centers_var, total_counts): """Creates an op for training for mini batch case. Args: inputs: list of input Tensors. cluster_idx_list: A vector (or list of vectors). Each element in the vector corresponds to an input row in 'inp' and specifies the cluster id corresponding to the input. cluster_centers: Tensor of cluster centers, possibly normalized. cluster_centers_var: Tensor Ref of cluster centers. total_counts: Tensor Ref of cluster counts. Returns: An op for doing an update of mini-batch k-means. """ update_ops = [] for inp, cluster_idx in zip(inputs, cluster_idx_list): with ops.colocate_with(inp): assert total_counts is not None cluster_idx = tf.reshape(cluster_idx, [-1]) # Dedupe the unique ids of cluster_centers being updated so that updates # can be locally aggregated. unique_ids, unique_idx = tf.unique(cluster_idx) num_unique_cluster_idx = tf.size(unique_ids) # Fetch the old values of counts and cluster_centers. with ops.colocate_with(total_counts): old_counts = tf.gather(total_counts, unique_ids) with ops.colocate_with(cluster_centers): old_cluster_centers = tf.gather(cluster_centers, unique_ids) # Locally aggregate the increment to counts. count_updates = tf.unsorted_segment_sum( tf.ones_like(unique_idx, dtype=total_counts.dtype), unique_idx, num_unique_cluster_idx) # Locally compute the sum of inputs mapped to each id. # For a cluster with old cluster value x, old count n, and with data # d_1,...d_k newly assigned to it, we recompute the new value as # x += (sum_i(d_i) - k * x) / (n + k). # Compute sum_i(d_i), see comment above. cluster_center_updates = tf.unsorted_segment_sum( inp, unique_idx, num_unique_cluster_idx) # Shape to enable broadcasting count_updates and learning_rate to inp. # It extends the shape with 1's to match the rank of inp. broadcast_shape = tf.concat( 0, [tf.reshape(num_unique_cluster_idx, [1]), tf.ones(tf.reshape(tf.rank(inp) - 1, [1]), dtype=tf.int32)]) # Subtract k * x, see comment above. cluster_center_updates -= tf.cast( tf.reshape(count_updates, broadcast_shape), inp.dtype) * old_cluster_centers learning_rate = tf.inv(tf.cast(old_counts + count_updates, inp.dtype)) learning_rate = tf.reshape(learning_rate, broadcast_shape) # scale by 1 / (n + k), see comment above. cluster_center_updates *= learning_rate # Apply the updates. update_counts = tf.scatter_add( total_counts, unique_ids, count_updates) update_cluster_centers = tf.scatter_add( cluster_centers_var, unique_ids, cluster_center_updates) update_ops.extend([update_counts, update_cluster_centers]) return tf.group(*update_ops)
def _process_input_helper(self, update_row_factors, sp_input=None, transpose_input=False, row_weights=None): """Creates the graph for processing a sparse slice of input. Args: update_row_factors: if True, update or project the row_factors, else update or project the column factors. sp_input: Please refer to comments for update_row_factors, update_col_factors, project_row_factors, and project_col_factors for restrictions. transpose_input: If True, the input is logically transposed and then the corresponding rows/columns of the transposed input are updated. row_weights: If not None, this is the row/column weights to be used for the update or projection. If None, use the corresponding weights from the model. Note that the feature (column/row) weights will be determined by the model. When not None, it can either be a scalar or a rank-1 tensor with the same number of elements as the number of rows of columns to be updated/projected. Returns: A tuple consisting of the following two elements: new_values: New values for the row/column factors. update_op: An op that assigns the newly computed values to the row/column factors. """ assert isinstance(sp_input, tf.SparseTensor) if update_row_factors: left = self._row_factors right_factors = self._col_factors_cache row_wt = self._row_wt_cache col_wt = self._col_wt_cache sharding_func = WALSModel._get_sharding_func( self._input_rows, self._num_row_shards) gramian = self._col_gramian_cache else: left = self._col_factors right_factors = self._row_factors_cache row_wt = self._col_wt_cache col_wt = self._row_wt_cache sharding_func = WALSModel._get_sharding_func( self._input_cols, self._num_col_shards) gramian = self._row_gramian_cache transpose_input = not transpose_input # Note that the row indices of sp_input are based on the original full input # Here we reindex the rows and give them contiguous ids starting at 0. # We use tf.unique to achieve this reindexing. Note that this is done so # that the downstream kernel can assume that the input is "dense" along the # row dimension. row_ids, col_ids = tf.split(1, 2, sp_input.indices) update_row_indices, all_row_ids = tf.unique(row_ids[:, 0]) update_col_indices, all_col_ids = tf.unique(col_ids[:, 0]) col_ids = tf.expand_dims(tf.cast(all_col_ids, tf.int64), 1) row_ids = tf.expand_dims(tf.cast(all_row_ids, tf.int64), 1) if transpose_input: update_indices = update_col_indices row_shape = [tf.cast(tf.shape(update_row_indices)[0], tf.int64)] gather_indices = update_row_indices else: update_indices = update_row_indices row_shape = [tf.cast(tf.shape(update_col_indices)[0], tf.int64)] gather_indices = update_col_indices num_rows = tf.cast(tf.shape(update_indices)[0], tf.int64) col_shape = [num_rows] right = embedding_ops.embedding_lookup(right_factors, gather_indices, partition_strategy='div') new_sp_indices = tf.concat(1, [row_ids, col_ids]) new_sp_shape = (tf.concat(0, [row_shape, col_shape]) if transpose_input else tf.concat(0, [col_shape, row_shape])) new_sp_input = tf.SparseTensor(indices=new_sp_indices, values=sp_input.values, dense_shape=new_sp_shape) # Compute lhs and rhs of the normal equations total_lhs = (self._unobserved_weight * gramian) if self._regularization is not None: total_lhs += self._regularization if self._row_weights is None: # Special case of ALS. Use a much simpler update rule. total_rhs = (self._unobserved_weight * tf.sparse_tensor_dense_matmul( new_sp_input, right, adjoint_a=transpose_input)) # TODO(rmlarsen): handle transposing in tf.matrix_solve instead of # transposing explicitly. # TODO(rmlarsen): multi-thread tf.matrix_solve. new_left_values = tf.transpose( tf.matrix_solve(total_lhs, tf.transpose(total_rhs))) else: if row_weights is None: # TODO(yifanchen): Add special handling for single shard without using # embedding_lookup and perform benchmarks for those cases. Same for # col_weights lookup below. row_weights_slice = embedding_ops.embedding_lookup( row_wt, update_indices, partition_strategy='div') else: with ops.control_dependencies( [tf.assert_less_equal(tf.rank(row_weights), 1)]): row_weights_slice = tf.cond( tf.equal(tf.rank(row_weights), 0), lambda: (tf.ones([tf.shape(update_indices)[0]]) * row_weights), lambda: tf.cast(row_weights, tf.float32)) col_weights = embedding_ops.embedding_lookup( col_wt, gather_indices, partition_strategy='div') partial_lhs, total_rhs = wals_compute_partial_lhs_and_rhs( right, col_weights, self._unobserved_weight, row_weights_slice, new_sp_input.indices, new_sp_input.values, num_rows, transpose_input, name="wals_compute_partial_lhs_rhs") total_lhs = tf.expand_dims(total_lhs, 0) + partial_lhs total_rhs = tf.expand_dims(total_rhs, -1) new_left_values = tf.squeeze(tf.matrix_solve(total_lhs, total_rhs), [2]) return (new_left_values, self.scatter_update(left, update_indices, new_left_values, sharding_func))
def test_Unique(self): t1, t2 = tf.unique([9, 3, 5, 7, 3, 9, 9]) self.check(t1) self.check(t2)
def beam_search_decoding(input_encoder, model, tokenizer, config, verbose=0): input_encoder = tf.expand_dims(input_encoder, axis=0) input_decoder = tf.expand_dims(config.START_TOKEN, axis=0) k_scores = [0.0] for i in range(config.MAX_LENGTH): if verbose: print('\nStep', i) if config.ENCODING == 'subword': for k in range(input_decoder.shape[0]): print( tokenizer.decode([ j for j in input_decoder.numpy()[k] if j < config.VOCAB_SIZE - 2 ])) else: for k in range(input_decoder.shape[0]): print( tokenizer.sequences_to_texts([[ j for j in input_decoder.numpy()[k] if j < config.VOCAB_SIZE - 2 ]])) predictions = model(inputs=[input_encoder, input_decoder], training=False) predictions = predictions[:, -1:, :] values, indices = tf.math.top_k( tf.math.log(tf.nn.softmax(predictions)), config.BEAM_SIZE) sequences = [] scores = [] for k in range(input_decoder.shape[0]): for b in range(config.BEAM_SIZE): sequences.append( tf.concat([input_decoder[k], [indices[k, 0, b]]], axis=0)) if i >= config.MAX_REP and len( tf.unique(sequences[-1][-config.MAX_REP:])[0]) == 1: scores.append(k_scores[k] - float('inf')) else: scores.append(k_scores[k] + values[k, 0, b]) values, indices = tf.math.top_k(scores, config.BEAM_SIZE) k_scores = [] input_decoder = [] for k in range(config.BEAM_SIZE): k_scores.append(values[k]) input_decoder.append(sequences[indices[k]]) input_decoder = tf.stack(input_decoder) if input_encoder.shape[0] == 1: input_encoder = tf.repeat(input_encoder, config.BEAM_SIZE, axis=0) if tf.equal(input_decoder[0, -1], config.END_TOKEN): break if verbose: print() if config.ENCODING == 'subword': return tokenizer.decode( [i for i in input_decoder[0].numpy() if i < config.VOCAB_SIZE - 2]) else: return tokenizer.sequences_to_texts([[ i for i in input_decoder[0].numpy() if i < config.VOCAB_SIZE - 2 ]])[0][::2]
def encode_annos(labels, bboxes, anchors, num_classes): """Encode annotations for losses computations. All the output tensors have a fix shape(none dynamic dimention). Args: labels: 1-D with shape `[num_bounding_boxes]`. bboxes: 2-D with shape `[num_bounding_boxes, 4]`. Format [ymin, xmin, ymax, xmax] anchors: 4-D tensor with shape `[num_anchors, 4]`. Format [cx, cy, w, h] Returns: input_mask: 2-D with shape `[num_anchors, 1]`, indicate which anchor to be used to cal loss. labels_input: 2-D with shape `[num_anchors, num_classes]`, one hot encode for every anchor. box_delta_input: 2-D with shape `[num_anchors, 4]`. Format [dcx, dcy, dw, dh] box_input: 2-D with shape '[num_anchors, 4]'. Format [ymin, xmin, ymax, xmax] """ with tf.name_scope("Encode_annotations") as scope: num_anchors = config.ANCHORS # num_bboxes = tf.shape(bboxes)[0] # Cal iou, find the target anchor with tf.name_scope("Matching") as subscope: ious = batch_iou_fast(xywh_to_yxyx(anchors), bboxes) anchor_indices = tf.reshape(tf.arg_max(ious, dimension=1), shape=[-1, 1]) # target anchor indices # anchor_indices = tf.Print(anchor_indices, [anchor_indices], "anchor_indices", summarize=100) # discard duplicate # unique_idx wrong anchor_indices, idx, count = tf.unique_with_counts(tf.reshape(anchor_indices, shape=[-1])) ori_idx = tf.cumsum(tf.pad(count, [[1, 0]]))[:-1] anchor_indices = tf.reshape(anchor_indices, shape=[-1, 1]) bboxes = tf.gather(bboxes, tf.unique(ori_idx)[0]) labels = tf.gather(labels, tf.unique(ori_idx)[0]) ious = tf.gather(ious, tf.unique(ori_idx)[0]) num_bboxes = tf.shape(anchor_indices)[0] # TODO(shizehao):deal with duplicate # with tf.name_scope("Deal_with_duplicate"): # dup_anchor_indices, indices_in_a, dup_anchor_indices_with_dup = find_dup(tf.reshape(anchor_indices, shape=[-1])) # # # reset duplicated corresponding anchor # conflicted_ious = tf.gather(ious, indices_in_a) # top_k_anchor_indices = tf.nn.top_k(conflicted_ious, k=20).indices # shape = [num_conflicted_bboxes, 20] # dup_group_idx = tf.where(tf.equal(dup_anchor_indices_with_dup, tf.reshape(dup_anchor_indices, shape=[-1, 1]))) # seg_group = tf.unstack(dup_group_idx, axis=1)[0] with tf.name_scope("Deal_with_noneoverlap"): # find the none-overlap bbox bbox_indices = tf.reshape(tf.range(num_bboxes), shape=[-1, 1]) # bbox_indices = tf.Print(bbox_indices, [bbox_indices], "bbox_indices", summarize=100) # anchor_indices = tf.Print(anchor_indices, [anchor_indices], "anchor_indices", summarize=100) iou_indices = tf.concat([bbox_indices, tf.cast(anchor_indices, dtype=tf.int32)], axis=1) # iou_indices = tf.Print(iou_indices, [iou_indices], "iou_indices", summarize=100) target_iou = tf.gather_nd(ious, iou_indices) # target_iou = tf.Print(target_iou,[target_iou],"target_iou",summarize=100) none_overlap_bbox_indices = tf.where(target_iou <= 0) # 1-D # none_overlap_bbox_indices = tf.Print(none_overlap_bbox_indices, [none_overlap_bbox_indices], "none_overlap_bbox_indices", summarize=100) # find it's corresponding anchor target_bbox = tf.gather_nd(bboxes, none_overlap_bbox_indices) # target_bbox = tf.Print(target_bbox, [target_bbox], "target_bbox", summarize=100) closest_anchor_indices = arg_closest_anchor(target_bbox, xywh_to_yxyx(anchors)) # 1-D # closest_anchor_indices = tf.Print(closest_anchor_indices, [closest_anchor_indices, tf.gather(anchors, closest_anchor_indices)], "closest_anchor_indices", summarize=100) with tf.name_scope("Update_anchor_indices"): anchor_indices = tf.reshape(anchor_indices, shape=[-1]) anchor_indices = update_tensor(anchor_indices, none_overlap_bbox_indices, closest_anchor_indices) anchor_indices = tf.reshape(anchor_indices, shape=[-1, 1]) with tf.name_scope("Delta") as subscope: target_anchors = tf.gather_nd(anchors, anchor_indices) bboxes = yxyx_to_xywh(bboxes) delta = batch_delta(bboxes, target_anchors) with tf.name_scope("Scattering") as subscope: # bbox box_input = tf.scatter_nd(anchor_indices, bboxes, shape=[num_anchors, 4] ) # label labels_input = tf.scatter_nd(anchor_indices, tf.one_hot(labels, num_classes), shape=[num_anchors, num_classes] ) # delta box_delta_input = tf.scatter_nd(anchor_indices, delta, shape=[num_anchors, 4] ) # anchor mask # unique_indices, _ = tf.unique(tf.reshape(anchor_indices, shape=[-1])) # unique_indices = tf.Print(unique_indices, [unique_indices], summarize=100) # num_bboxes = tf.Print(num_bboxes, [num_bboxes]) input_mask = tf.scatter_nd(anchor_indices, tf.ones([num_bboxes]), shape=[num_anchors]) input_mask = tf.reshape(input_mask, shape=[-1, 1]) return input_mask, labels_input, box_delta_input, box_input
def _one_image_rpn_train(self, pconf, pbbox_yx, pbbox_hw, abbox_yx, abbox_hw, abbox_y1x1, abbox_y2x2, nground_truth): slice_index = tf.argmin(nground_truth, axis=0)[0] nground_truth = tf.gather(nground_truth, tf.range(0, slice_index, dtype=tf.int64)) ngbbox_yx = nground_truth[..., 0:2] ngbbox_hw = nground_truth[..., 2:4] ngbbox_y1x1 = ngbbox_yx - ngbbox_hw / 2 ngbbox_y2x2 = ngbbox_yx + ngbbox_hw / 2 rcnn_label = tf.cast(nground_truth[..., 4:], tf.int32) dpbbox_yx = pbbox_yx * abbox_hw + abbox_yx dpbbox_hw = abbox_hw * tf.exp(pbbox_hw) dpbbox_y1x1 = dpbbox_yx - dpbbox_hw / 2 dpbbox_y2x2 = dpbbox_yx + dpbbox_hw / 2 dpbbox_y1x1y2x2 = tf.concat([dpbbox_y1x1, dpbbox_y2x2], axis=-1) selected_indices = tf.image.non_max_suppression( dpbbox_y1x1y2x2, pconf[:, 0], self.post_nms_proposals, iou_threshold=0.5 ) # selected_indices2 = tf.image.non_max_suppression( # dpbbox_y1x1y2x2, pconf[:, 1], self.reserve_proposals//2, iou_threshold=0.5 # ) # selected_indices = tf.concat([selected_indices1, selected_indices2], axis=0) # selected_indices, _ = tf.unique(selected_indices) pconf = tf.gather(pconf, selected_indices) pbbox_yx = tf.gather(pbbox_yx, selected_indices) pbbox_hw = tf.gather(pbbox_hw, selected_indices) abbox_yx = tf.gather(abbox_yx, selected_indices) abbox_hw = tf.gather(abbox_hw, selected_indices) abbox_y1x1 = tf.gather(abbox_y1x1, selected_indices) abbox_y2x2 = tf.gather(abbox_y2x2, selected_indices) proposal_yx = tf.gather(dpbbox_yx, selected_indices) proposal_hw = tf.gather(dpbbox_hw, selected_indices) num_ground_truth = tf.shape(ngbbox_yx)[0] num_abbox = tf.shape(abbox_yx)[0] ngbbox_y1x1ti = tf.reshape(ngbbox_y1x1, [-1, 1, 2]) ngbbox_y2x2ti = tf.reshape(ngbbox_y2x2, [-1, 1, 2]) ngbbox_y1x1ti = tf.tile(ngbbox_y1x1ti, [1, num_abbox, 1]) ngbbox_y2x2ti = tf.tile(ngbbox_y2x2ti, [1, num_abbox, 1]) abbox_y1x1ti = tf.reshape(abbox_y1x1, [1, -1, 2]) abbox_y2x2ti = tf.reshape(abbox_y2x2, [1, -1, 2]) abbox_y1x1ti = tf.tile(abbox_y1x1ti, [num_ground_truth, 1, 1]) abbox_y2x2ti = tf.tile(abbox_y2x2ti, [num_ground_truth, 1, 1]) gaiou_y1x1ti = tf.maximum(ngbbox_y1x1ti, abbox_y1x1ti) gaiou_y2x2ti = tf.minimum(ngbbox_y2x2ti, abbox_y2x2ti) gaiou_area = tf.reduce_prod(tf.maximum(gaiou_y2x2ti - gaiou_y1x1ti, 0), axis=-1) aarea = tf.reduce_prod(abbox_y2x2ti - abbox_y1x1ti, axis=-1) garea = tf.reduce_prod(ngbbox_y2x2ti - ngbbox_y1x1ti, axis=-1) gaiou_rate = gaiou_area / (aarea + garea - gaiou_area + 1e-7) best_raindex = tf.argmax(gaiou_rate, axis=1) best_pbbox_yx = tf.gather(pbbox_yx, best_raindex) best_pbbox_hw = tf.gather(pbbox_hw, best_raindex) best_pconf = tf.gather(pconf, best_raindex) best_abbox_yx = tf.gather(abbox_yx, best_raindex) best_abbox_hw = tf.gather(abbox_hw, best_raindex) best_proposal_yx = tf.gather(proposal_yx, best_raindex) best_proposal_hw = tf.gather(proposal_hw, best_raindex) best_rcnn_label = rcnn_label bestmask, _ = tf.unique(best_raindex) bestmask = tf.contrib.framework.sort(bestmask) bestmask = tf.reshape(bestmask, [-1, 1]) bestmask = tf.sparse.SparseTensor(tf.concat([bestmask, tf.zeros_like(bestmask)], axis=-1), tf.squeeze(tf.ones_like(bestmask)), dense_shape=[num_abbox, 1]) bestmask = tf.reshape(tf.cast(tf.sparse.to_dense(bestmask), tf.float32), [-1]) othermask = (1. - bestmask) > 0. other_pbbox_yx = tf.boolean_mask(pbbox_yx, othermask) other_pbbox_hw = tf.boolean_mask(pbbox_hw, othermask) other_pconf = tf.boolean_mask(pconf, othermask) other_abbox_yx = tf.boolean_mask(abbox_yx, othermask) other_abbox_hw = tf.boolean_mask(abbox_hw, othermask) other_proposal_yx = tf.boolean_mask(proposal_yx, othermask) other_proposal_hw = tf.boolean_mask(proposal_hw, othermask) agiou_rate = tf.transpose(gaiou_rate) other_agiou_rate = tf.boolean_mask(agiou_rate, othermask) best_agiou_rate = tf.reduce_max(other_agiou_rate, axis=1) pos_mask = best_agiou_rate > 0.7 neg_mask = best_agiou_rate < 0.3 rgindex = tf.argmax(other_agiou_rate, axis=1) pos_rgindex = tf.boolean_mask(rgindex, pos_mask) pos_rcnn_label = tf.gather(rcnn_label, pos_rgindex) pos_ppox_yx = tf.boolean_mask(other_pbbox_yx, pos_mask) pos_ppox_hw = tf.boolean_mask(other_pbbox_hw, pos_mask) pos_pconf = tf.boolean_mask(other_pconf, pos_mask) pos_abbox_yx = tf.boolean_mask(other_abbox_yx, pos_mask) pos_abbox_hw = tf.boolean_mask(other_abbox_hw, pos_mask) pos_proposal_yx = tf.boolean_mask(other_proposal_yx, pos_mask) pos_proposal_hw = tf.boolean_mask(other_proposal_hw, pos_mask) pos_gbbox_yx = tf.gather(ngbbox_yx, pos_rgindex) pos_gbbox_hw = tf.gather(ngbbox_hw, pos_rgindex) neg_pconf = tf.boolean_mask(other_pconf, neg_mask) neg_proposal_yx = tf.boolean_mask(other_proposal_yx, neg_mask) neg_proposal_hw = tf.boolean_mask(other_proposal_hw, neg_mask) pos_rcnn_label = tf.concat([best_rcnn_label, pos_rcnn_label], axis=0) pos_pbbox_yx = tf.concat([best_pbbox_yx, pos_ppox_yx], axis=0) pos_pbbox_hw = tf.concat([best_pbbox_hw, pos_ppox_hw], axis=0) pos_pconf = tf.concat([best_pconf, pos_pconf], axis=0) pos_gbbox_yx = tf.concat([ngbbox_yx, pos_gbbox_yx], axis=0) pos_gbbox_hw = tf.concat([ngbbox_hw, pos_gbbox_hw], axis=0) pos_abbox_yx = tf.concat([best_abbox_yx, pos_abbox_yx], axis=0) pos_abbox_hw = tf.concat([best_abbox_hw, pos_abbox_hw], axis=0) pos_proposal_yx = tf.concat([best_proposal_yx, pos_proposal_yx], axis=0) pos_proposal_hw = tf.concat([best_proposal_hw, pos_proposal_hw], axis=0) num_pos = tf.shape(pos_pconf)[0] num_neg = tf.shape(neg_pconf)[0] chosen_num_pos = tf.cond(num_pos > 128, lambda: 128, lambda: num_pos) chosen_num_neg = tf.cond(num_neg > 256 - chosen_num_pos, lambda: 256 - chosen_num_pos, lambda: num_neg) pos_rpn_label = tf.tile(tf.constant([0]), [num_pos]) neg_rpn_label = tf.tile(tf.constant([1]), [num_neg]) neg_rcnn_label = tf.tile(tf.constant([self.num_classes - 1]), [num_neg]) neg_rcnn_label = tf.reshape(neg_rcnn_label, [-1, 1]) pos_conf_loss = tf.losses.sparse_softmax_cross_entropy(labels=pos_rpn_label, logits=pos_pconf, reduction=tf.losses.Reduction.NONE) neg_conf_loss = tf.losses.sparse_softmax_cross_entropy(labels=neg_rpn_label, logits=neg_pconf, reduction=tf.losses.Reduction.NONE) chosen_pos_loss, chosen_pos_index = tf.nn.top_k(pos_conf_loss, chosen_num_pos) chosen_neg_loss, chosen_neg_index = tf.nn.top_k(neg_conf_loss, chosen_num_neg) conf_loss = tf.reduce_mean(tf.concat([chosen_pos_loss, chosen_neg_loss], axis=-1)) pos_gbbox_yx = tf.gather(pos_gbbox_yx, chosen_pos_index) pos_gbbox_hw = tf.gather(pos_gbbox_hw, chosen_pos_index) pos_abbox_yx = tf.gather(pos_abbox_yx, chosen_pos_index) pos_abbox_hw = tf.gather(pos_abbox_hw, chosen_pos_index) pos_pbbox_yx = tf.gather(pos_pbbox_yx, chosen_pos_index) pos_pbbox_hw = tf.gather(pos_pbbox_hw, chosen_pos_index) pos_proposal_yx = tf.gather(pos_proposal_yx, chosen_pos_index) pos_proposal_hw = tf.gather(pos_proposal_hw, chosen_pos_index) neg_proposal_yx = tf.gather(neg_proposal_yx, chosen_neg_index) neg_proposal_hw = tf.gather(neg_proposal_hw, chosen_neg_index) pos_truth_pbbox_yx = (pos_gbbox_yx - pos_abbox_yx) / pos_abbox_hw pos_truth_pbbox_hw = tf.log(pos_gbbox_hw / pos_abbox_hw) pos_yx_loss = tf.reduce_sum(self._smooth_l1_loss(pos_pbbox_yx - pos_truth_pbbox_yx), axis=-1) pos_hw_loss = tf.reduce_sum(self._smooth_l1_loss(pos_pbbox_hw - pos_truth_pbbox_hw), axis=-1) pos_coord_loss = tf.reduce_mean(pos_yx_loss + pos_hw_loss) total_loss = conf_loss + 10.0 * pos_coord_loss proposal_yx = tf.concat([pos_proposal_yx, neg_proposal_yx], axis=0) proposal_hw = tf.concat([pos_proposal_hw, neg_proposal_hw], axis=0) proposal_y1x1 = proposal_yx - proposal_hw / 2. proposal_y2x2 = proposal_yx + proposal_hw / 2. rcnn_label = tf.concat([pos_rcnn_label, neg_rcnn_label], axis=0) return total_loss, proposal_y1x1, proposal_y2x2, pos_proposal_yx, pos_proposal_hw, pos_gbbox_yx, pos_gbbox_hw, rcnn_label
def _build(self, all_anchors, gt_boxes, im_shape): """ We compare anchors to GT and using the minibatch size and the different config settings (clobber, foreground fraction, etc), we end up with training targets *only* for the elements we want to use in the batch, while everything else is ignored. Basically what it does is, first generate the targets for all (valid) anchors, and then start subsampling the positive (foreground) and the negative ones (background) based on the number of samples of each type that we want. Args: all_anchors: A Tensor with all the bounding boxes coords of the anchors. Its shape should be (num_anchors, 4). gt_boxes: A Tensor with the ground truth bounding boxes of the image of the batch being processed. Its shape should be (num_gt, 5). The last dimension is used for the label. im_shape: Shape of original image (height, width) in order to define anchor targers in respect with gt_boxes. Returns: Tuple of the tensors of: labels: (1, 0, -1) for each anchor. Shape (num_anchors, 1) bbox_targets: 4d bbox targets as specified by paper. Shape (num_anchors, 4) max_overlaps: Max IoU overlap with ground truth boxes. Shape (num_anchors, 1) """ # Keep only the coordinates of gt_boxes gt_boxes = gt_boxes[:, :4] all_anchors = all_anchors[:, :4] # Only keep anchors inside the image (x_min_anchor, y_min_anchor, x_max_anchor, y_max_anchor) = tf.unstack(all_anchors, axis=1) anchor_filter = tf.logical_and( tf.logical_and( tf.greater_equal(x_min_anchor, -self._allowed_border), tf.greater_equal(y_min_anchor, -self._allowed_border) ), tf.logical_and( tf.less(x_max_anchor, im_shape[1] + self._allowed_border), tf.less(y_max_anchor, im_shape[0] + self._allowed_border) ) ) # We (force) reshape the filter so that we can use it as a boolean mask anchor_filter = tf.reshape(anchor_filter, [-1]) # Filter anchors. anchors = tf.boolean_mask( all_anchors, anchor_filter, name='filter_anchors') # Generate array with the labels for all_anchors. labels = tf.fill((tf.gather(tf.shape(all_anchors), [0])), -1) labels = tf.boolean_mask(labels, anchor_filter, name='filter_labels') # Intersection over union (IoU) overlap between the anchors and the # ground truth boxes. overlaps = bbox_overlap_tf(tf.to_float(anchors), tf.to_float(gt_boxes)) # Generate array with the IoU value of the closest GT box for each # anchor. max_overlaps = tf.reduce_max(overlaps, axis=1) if not self._clobber_positives: # Assign bg labels first so that positive labels can clobber them. # First we get an array with True where IoU is less than # self._negative_overlap negative_overlap_nonzero = tf.less( max_overlaps, self._negative_overlap) # Finally we set 0 at True indices labels = tf.where( condition=negative_overlap_nonzero, x=tf.zeros(tf.shape(labels)), y=tf.to_float(labels) ) # Get the value of the max IoU for the closest anchor for each gt. gt_max_overlaps = tf.reduce_max(overlaps, axis=0) # Find all the indices that match (at least one, but could be more). gt_argmax_overlaps = tf.squeeze(tf.equal(overlaps, gt_max_overlaps)) gt_argmax_overlaps = tf.where(gt_argmax_overlaps)[:, 0] # Eliminate duplicates indices. gt_argmax_overlaps, _ = tf.unique(gt_argmax_overlaps) # Order the indices for sparse_to_dense compatibility gt_argmax_overlaps, _ = tf.nn.top_k( gt_argmax_overlaps, k=tf.shape(gt_argmax_overlaps)[-1]) gt_argmax_overlaps = tf.reverse(gt_argmax_overlaps, [0]) # Foreground label: for each ground-truth, anchor with highest overlap. # When the argmax is many items we use all of them (for consistency). # We set 1 at gt_argmax_overlaps_cond indices gt_argmax_overlaps_cond = tf.sparse_to_dense( gt_argmax_overlaps, tf.shape(labels, out_type=tf.int64), True, default_value=False ) labels = tf.where( condition=gt_argmax_overlaps_cond, x=tf.ones(tf.shape(labels)), y=tf.to_float(labels) ) # Foreground label: above threshold Intersection over Union (IoU) # First we get an array with True where IoU is greater or equal than # self._positive_overlap positive_overlap_inds = tf.greater_equal( max_overlaps, self._positive_overlap) # Finally we set 1 at True indices labels = tf.where( condition=positive_overlap_inds, x=tf.ones(tf.shape(labels)), y=labels ) if self._clobber_positives: # Assign background labels last so that negative labels can clobber # positives. First we get an array with True where IoU is less than # self._negative_overlap negative_overlap_nonzero = tf.less( max_overlaps, self._negative_overlap) # Finally we set 0 at True indices labels = tf.where( condition=negative_overlap_nonzero, x=tf.zeros(tf.shape(labels)), y=labels ) # Subsample positive labels if we have too many def subsample_positive(): # Shuffle the foreground indices disable_fg_inds = tf.random_shuffle(fg_inds, seed=self._seed) # Select the indices that we have to ignore, this is # `tf.shape(fg_inds)[0] - num_fg` because we want to get only # `num_fg` foreground labels. disable_place = (tf.shape(fg_inds)[0] - num_fg) disable_fg_inds = disable_fg_inds[:disable_place] # Order the indices for sparse_to_dense compatibility disable_fg_inds, _ = tf.nn.top_k( disable_fg_inds, k=tf.shape(disable_fg_inds)[-1]) disable_fg_inds = tf.reverse(disable_fg_inds, [0]) disable_fg_inds = tf.sparse_to_dense( disable_fg_inds, tf.shape(labels, out_type=tf.int64), True, default_value=False ) # Put -1 to ignore the anchors in the selected indices return tf.where( condition=tf.squeeze(disable_fg_inds), x=tf.to_float(tf.fill(tf.shape(labels), -1)), y=labels ) num_fg = tf.to_int32(self._foreground_fraction * self._minibatch_size) # Get foreground indices, get True in the indices where we have a one. fg_inds = tf.equal(labels, 1) # We get only the indices where we have True. fg_inds = tf.squeeze(tf.where(fg_inds), axis=1) fg_inds_size = tf.size(fg_inds) # Condition for check if we have too many positive labels. subsample_positive_cond = fg_inds_size > num_fg # Check the condition and subsample positive labels. labels = tf.cond( subsample_positive_cond, true_fn=subsample_positive, false_fn=lambda: labels ) # Subsample negative labels if we have too many def subsample_negative(): # Shuffle the background indices disable_bg_inds = tf.random_shuffle(bg_inds, seed=self._seed) # Select the indices that we have to ignore, this is # `tf.shape(bg_inds)[0] - num_bg` because we want to get only # `num_bg` background labels. disable_place = (tf.shape(bg_inds)[0] - num_bg) disable_bg_inds = disable_bg_inds[:disable_place] # Order the indices for sparse_to_dense compatibility disable_bg_inds, _ = tf.nn.top_k( disable_bg_inds, k=tf.shape(disable_bg_inds)[-1]) disable_bg_inds = tf.reverse(disable_bg_inds, [0]) disable_bg_inds = tf.sparse_to_dense( disable_bg_inds, tf.shape(labels, out_type=tf.int64), True, default_value=False ) # Put -1 to ignore the anchors in the selected indices return tf.where( condition=tf.squeeze(disable_bg_inds), x=tf.to_float(tf.fill(tf.shape(labels), -1)), y=labels ) # Recalculate the foreground indices after (maybe) disable some of them # Get foreground indices, get True in the indices where we have a one. fg_inds = tf.equal(labels, 1) # We get only the indices where we have True. fg_inds = tf.squeeze(tf.where(fg_inds), axis=1) fg_inds_size = tf.size(fg_inds) num_bg = tf.to_int32(self._minibatch_size - fg_inds_size) # Get background indices, get True in the indices where we have a zero. bg_inds = tf.equal(labels, 0) # We get only the indices where we have True. bg_inds = tf.squeeze(tf.where(bg_inds), axis=1) bg_inds_size = tf.size(bg_inds) # Condition for check if we have too many positive labels. subsample_negative_cond = bg_inds_size > num_bg # Check the condition and subsample positive labels. labels = tf.cond( subsample_negative_cond, true_fn=subsample_negative, false_fn=lambda: labels ) # Return bbox targets with shape (anchors.shape[0], 4). # Find the closest gt box for each anchor. argmax_overlaps = tf.argmax(overlaps, axis=1) # Eliminate duplicates. argmax_overlaps_unique, _ = tf.unique(argmax_overlaps) # Filter the gt_boxes. # We get only the indices where we have "inside anchors". anchor_filter_inds = tf.where(anchor_filter) gt_boxes = tf.gather(gt_boxes, argmax_overlaps) bbox_targets = encode_tf(anchors, gt_boxes) # For the anchors that arent foreground, we ignore the bbox_targets. anchor_foreground_filter = tf.equal(labels, 1) bbox_targets = tf.where( condition=anchor_foreground_filter, x=bbox_targets, y=tf.zeros_like(bbox_targets) ) # We unroll "inside anchors" value for all anchors (for shape # compatibility). # We complete the missed indices with zeros # (because scatter_nd has zeros as default). bbox_targets = tf.scatter_nd( indices=tf.to_int32(anchor_filter_inds), updates=bbox_targets, shape=tf.shape(all_anchors) ) labels_scatter = tf.scatter_nd( indices=tf.to_int32(anchor_filter_inds), updates=labels, shape=[tf.shape(all_anchors)[0]] ) # We have to put -1 to ignore the indices with 0 generated by # scatter_nd, otherwise it will be considered as background. labels = tf.where( condition=anchor_filter, x=labels_scatter, y=tf.to_float(tf.fill(tf.shape(labels_scatter), -1)) ) max_overlaps = tf.scatter_nd( indices=tf.to_int32(anchor_filter_inds), updates=max_overlaps, shape=[tf.shape(all_anchors)[0]] ) return labels, bbox_targets, max_overlaps
def learn( make_env, make_policy, *, n_episodes, horizon, delta, gamma, max_iters, sampler=None, use_natural_gradient=False, #can be 'exact', 'approximate' fisher_reg=1e-2, iw_method='is', iw_norm='none', bound='J', line_search_type='parabola', save_weights=False, improvement_tol=0., center_return=False, render_after=None, max_offline_iters=100, callback=None, clipping=False, entropy='none', positive_return=False, reward_clustering='none'): np.set_printoptions(precision=3) max_samples = horizon * n_episodes if line_search_type == 'binary': line_search = line_search_binary elif line_search_type == 'parabola': line_search = line_search_parabola else: raise ValueError() # Building the environment env = make_env() ob_space = env.observation_space ac_space = env.action_space # Building the policy pi = make_policy('pi', ob_space, ac_space) oldpi = make_policy('oldpi', ob_space, ac_space) all_var_list = pi.get_trainable_variables() var_list = [ v for v in all_var_list if v.name.split('/')[1].startswith('pol') ] shapes = [U.intprod(var.get_shape().as_list()) for var in var_list] n_parameters = sum(shapes) # Placeholders ob_ = ob = U.get_placeholder_cached(name='ob') ac_ = pi.pdtype.sample_placeholder([max_samples], name='ac') mask_ = tf.placeholder(dtype=tf.float32, shape=(max_samples), name='mask') rew_ = tf.placeholder(dtype=tf.float32, shape=(max_samples), name='rew') disc_rew_ = tf.placeholder(dtype=tf.float32, shape=(max_samples), name='disc_rew') clustered_rew_ = tf.placeholder(dtype=tf.float32, shape=(n_episodes)) gradient_ = tf.placeholder(dtype=tf.float32, shape=(n_parameters, 1), name='gradient') iter_number_ = tf.placeholder(dtype=tf.int32, name='iter_number') losses_with_name = [] # Policy densities target_log_pdf = pi.pd.logp(ac_) behavioral_log_pdf = oldpi.pd.logp(ac_) log_ratio = target_log_pdf - behavioral_log_pdf # Split operations disc_rew_split = tf.stack(tf.split(disc_rew_ * mask_, n_episodes)) rew_split = tf.stack(tf.split(rew_ * mask_, n_episodes)) log_ratio_split = tf.stack(tf.split(log_ratio * mask_, n_episodes)) target_log_pdf_split = tf.stack( tf.split(target_log_pdf * mask_, n_episodes)) behavioral_log_pdf_split = tf.stack( tf.split(behavioral_log_pdf * mask_, n_episodes)) mask_split = tf.stack(tf.split(mask_, n_episodes)) # Renyi divergence emp_d2_split = tf.stack( tf.split(pi.pd.renyi(oldpi.pd, 2) * mask_, n_episodes)) emp_d2_cum_split = tf.reduce_sum(emp_d2_split, axis=1) empirical_d2 = tf.reduce_mean(tf.exp(emp_d2_cum_split)) # Return ep_return = clustered_rew_ #tf.reduce_sum(mask_split * disc_rew_split, axis=1) if clipping: rew_split = tf.clip_by_value(rew_split, -1, 1) if center_return: ep_return = ep_return - tf.reduce_mean(ep_return) rew_split = rew_split - (tf.reduce_sum(rew_split) / (tf.reduce_sum(mask_split) + 1e-24)) discounter = [pow(gamma, i) for i in range(0, horizon)] # Decreasing gamma discounter_tf = tf.constant(discounter) disc_rew_split = rew_split * discounter_tf #tf.add_to_collection('prints', tf.Print(ep_return, [ep_return], 'ep_return_not_clustered', summarize=20)) # Reward clustering ''' rew_clustering_options = reward_clustering.split(':') if reward_clustering == 'none': pass # Do nothing elif rew_clustering_options[0] == 'global': assert len(rew_clustering_options) == 2, "Reward clustering: Provide the correct number of parameters" N = int(rew_clustering_options[1]) tf.add_to_collection('prints', tf.Print(ep_return, [ep_return], 'ep_return', summarize=20)) global_rew_min = tf.Variable(float('+inf'), trainable=False) global_rew_max = tf.Variable(float('-inf'), trainable=False) rew_min = tf.reduce_min(ep_return) rew_max = tf.reduce_max(ep_return) global_rew_min = tf.assign(global_rew_min, tf.minimum(global_rew_min, rew_min)) global_rew_max = tf.assign(global_rew_max, tf.maximum(global_rew_max, rew_max)) interval_size = (global_rew_max - global_rew_min) / N ep_return = tf.floordiv(ep_return, interval_size) * interval_size elif rew_clustering_options[0] == 'batch': assert len(rew_clustering_options) == 2, "Reward clustering: Provide the correct number of parameters" N = int(rew_clustering_options[1]) rew_min = tf.reduce_min(ep_return) rew_max = tf.reduce_max(ep_return) interval_size = (rew_max - rew_min) / N ep_return = tf.floordiv(ep_return, interval_size) * interval_size elif rew_clustering_options[0] == 'manual': assert len(rew_clustering_options) == 4, "Reward clustering: Provide the correct number of parameters" N, rew_min, rew_max = map(int, rew_clustering_options[1:]) print("N:", N) print("Min reward:", rew_min) print("Max reward:", rew_max) interval_size = (rew_max - rew_min) / N print("Interval size:", interval_size) # Clip to avoid overflow and cluster ep_return = tf.clip_by_value(ep_return, rew_min, rew_max) ep_return = tf.cast(tf.floordiv(ep_return, interval_size) * interval_size, tf.float32) tf.add_to_collection('prints', tf.Print(ep_return, [ep_return], 'ep_return_clustered', summarize=20)) else: raise Exception('Unrecognized reward clustering scheme.') ''' return_mean = tf.reduce_mean(ep_return) return_std = U.reduce_std(ep_return) return_max = tf.reduce_max(ep_return) return_min = tf.reduce_min(ep_return) return_abs_max = tf.reduce_max(tf.abs(ep_return)) return_step_max = tf.reduce_max(tf.abs(rew_split)) # Max step reward return_step_mean = tf.abs(tf.reduce_mean(rew_split)) positive_step_return_max = tf.maximum(0.0, tf.reduce_max(rew_split)) negative_step_return_max = tf.maximum(0.0, tf.reduce_max(-rew_split)) return_step_maxmin = tf.abs(positive_step_return_max - negative_step_return_max) losses_with_name.extend([(return_mean, 'InitialReturnMean'), (return_max, 'InitialReturnMax'), (return_min, 'InitialReturnMin'), (return_std, 'InitialReturnStd'), (empirical_d2, 'EmpiricalD2'), (return_step_max, 'ReturnStepMax'), (return_step_maxmin, 'ReturnStepMaxmin')]) if iw_method == 'pdis': # log_ratio_split cumulative sum log_ratio_cumsum = tf.cumsum(log_ratio_split, axis=1) # Exponentiate ratio_cumsum = tf.exp(log_ratio_cumsum) # Multiply by the step-wise reward (not episode) ratio_reward = ratio_cumsum * disc_rew_split # Average on episodes ratio_reward_per_episode = tf.reduce_sum(ratio_reward, axis=1) w_return_mean = tf.reduce_sum(ratio_reward_per_episode, axis=0) / n_episodes # Get d2(w0:t) with mask d2_w_0t = tf.exp(tf.cumsum(emp_d2_split, axis=1)) * mask_split # LEAVE THIS OUTSIDE # Sum d2(w0:t) over timesteps episode_d2_0t = tf.reduce_sum(d2_w_0t, axis=1) # Sample variance J_sample_variance = (1 / (n_episodes - 1)) * tf.reduce_sum( tf.square(ratio_reward_per_episode - w_return_mean)) losses_with_name.append((J_sample_variance, 'J_sample_variance')) losses_with_name.extend([(tf.reduce_max(ratio_cumsum), 'MaxIW'), (tf.reduce_min(ratio_cumsum), 'MinIW'), (tf.reduce_mean(ratio_cumsum), 'MeanIW'), (U.reduce_std(ratio_cumsum), 'StdIW')]) losses_with_name.extend([(tf.reduce_max(d2_w_0t), 'MaxD2w0t'), (tf.reduce_min(d2_w_0t), 'MinD2w0t'), (tf.reduce_mean(d2_w_0t), 'MeanD2w0t'), (U.reduce_std(d2_w_0t), 'StdD2w0t')]) elif iw_method == 'is': iw = tf.exp(tf.reduce_sum(log_ratio_split, axis=1)) if iw_norm == 'none': iwn = iw / n_episodes w_return_mean = tf.reduce_sum(iwn * ep_return) J_sample_variance = (1 / (n_episodes - 1)) * tf.reduce_sum( tf.square(iw * ep_return - w_return_mean)) losses_with_name.append((J_sample_variance, 'J_sample_variance')) elif iw_norm == 'sn': iwn = iw / tf.reduce_sum(iw) w_return_mean = tf.reduce_sum(iwn * ep_return) elif iw_norm == 'regression': iwn = iw / n_episodes mean_iw = tf.reduce_mean(iw) beta = tf.reduce_sum( (iw - mean_iw) * ep_return * iw) / (tf.reduce_sum( (iw - mean_iw)**2) + 1e-24) w_return_mean = tf.reduce_mean(iw * ep_return - beta * (iw - 1)) else: raise NotImplementedError() ess_classic = tf.linalg.norm(iw, 1)**2 / tf.linalg.norm(iw, 2)**2 sqrt_ess_classic = tf.linalg.norm(iw, 1) / tf.linalg.norm(iw, 2) ess_renyi = n_episodes / empirical_d2 losses_with_name.extend([(tf.reduce_max(iwn), 'MaxIWNorm'), (tf.reduce_min(iwn), 'MinIWNorm'), (tf.reduce_mean(iwn), 'MeanIWNorm'), (U.reduce_std(iwn), 'StdIWNorm'), (tf.reduce_max(iw), 'MaxIW'), (tf.reduce_min(iw), 'MinIW'), (tf.reduce_mean(iw), 'MeanIW'), (U.reduce_std(iw), 'StdIW'), (ess_classic, 'ESSClassic'), (ess_renyi, 'ESSRenyi')]) elif iw_method == 'rbis': # Get pdfs for episodes target_log_pdf_episode = tf.reduce_sum(target_log_pdf_split, axis=1) behavioral_log_pdf_episode = tf.reduce_sum(behavioral_log_pdf_split, axis=1) # Normalize log_proba (avoid as overflows as possible) normalization_factor = tf.reduce_mean( tf.stack([target_log_pdf_episode, behavioral_log_pdf_episode])) target_norm_log_pdf_episode = target_log_pdf_episode - normalization_factor behavioral_norm_log_pdf_episode = behavioral_log_pdf_episode - normalization_factor # Exponentiate target_pdf_episode = tf.clip_by_value( tf.cast(tf.exp(target_norm_log_pdf_episode), tf.float64), 1e-300, 1e+300) behavioral_pdf_episode = tf.clip_by_value( tf.cast(tf.exp(behavioral_norm_log_pdf_episode), tf.float64), 1e-300, 1e+300) tf.add_to_collection( 'asserts', tf.assert_positive(target_pdf_episode, name='target_pdf_positive')) tf.add_to_collection( 'asserts', tf.assert_positive(behavioral_pdf_episode, name='behavioral_pdf_positive')) # Compute the merging matrix (reward-clustering) and the number of clusters reward_unique, reward_indexes = tf.unique(ep_return) episode_clustering_matrix = tf.cast( tf.one_hot(reward_indexes, n_episodes), tf.float64) max_index = tf.reduce_max(reward_indexes) + 1 trajectories_per_cluster = tf.reduce_sum(episode_clustering_matrix, axis=0)[:max_index] tf.add_to_collection( 'asserts', tf.assert_positive(tf.reduce_sum(episode_clustering_matrix, axis=0)[:max_index], name='clustering_matrix')) # Get the clustered pdfs clustered_target_pdf = tf.matmul( tf.reshape(target_pdf_episode, (1, -1)), episode_clustering_matrix)[0][:max_index] clustered_behavioral_pdf = tf.matmul( tf.reshape(behavioral_pdf_episode, (1, -1)), episode_clustering_matrix)[0][:max_index] tf.add_to_collection( 'asserts', tf.assert_positive(clustered_target_pdf, name='clust_target_pdf_positive')) tf.add_to_collection( 'asserts', tf.assert_positive(clustered_behavioral_pdf, name='clust_behavioral_pdf_positive')) # Compute the J ratio_clustered = clustered_target_pdf / clustered_behavioral_pdf #ratio_reward = tf.cast(ratio_clustered, tf.float32) * reward_unique # ---- No cluster cardinality ratio_reward = tf.cast(ratio_clustered, tf.float32) * reward_unique * tf.cast( trajectories_per_cluster, tf.float32) # ---- Cluster cardinality #w_return_mean = tf.reduce_sum(ratio_reward) / tf.cast(max_index, tf.float32) # ---- No cluster cardinality w_return_mean = tf.reduce_sum(ratio_reward) / tf.cast( n_episodes, tf.float32) # ---- Cluster cardinality # Divergences ess_classic = tf.linalg.norm(ratio_reward, 1)**2 / tf.linalg.norm( ratio_reward, 2)**2 sqrt_ess_classic = tf.linalg.norm(ratio_reward, 1) / tf.linalg.norm( ratio_reward, 2) ess_renyi = n_episodes / empirical_d2 # Summaries losses_with_name.extend([(tf.reduce_max(ratio_clustered), 'MaxIW'), (tf.reduce_min(ratio_clustered), 'MinIW'), (tf.reduce_mean(ratio_clustered), 'MeanIW'), (U.reduce_std(ratio_clustered), 'StdIW'), (1 - (max_index / n_episodes), 'RewardCompression'), (ess_classic, 'ESSClassic'), (ess_renyi, 'ESSRenyi')]) else: raise NotImplementedError() if bound == 'J': bound_ = w_return_mean elif bound == 'std-d2': bound_ = w_return_mean - tf.sqrt( (1 - delta) / (delta * ess_renyi)) * return_std elif bound == 'max-d2': var_estimate = tf.sqrt( (1 - delta) / (delta * ess_renyi)) * return_abs_max bound_ = w_return_mean - tf.sqrt( (1 - delta) / (delta * ess_renyi)) * return_abs_max elif bound == 'max-ess': bound_ = w_return_mean - tf.sqrt( (1 - delta) / delta) / sqrt_ess_classic * return_abs_max elif bound == 'std-ess': bound_ = w_return_mean - tf.sqrt( (1 - delta) / delta) / sqrt_ess_classic * return_std elif bound == 'pdis-max-d2': # Discount factor if gamma >= 1: discounter = [ float(1 + 2 * (horizon - t - 1)) for t in range(0, horizon) ] else: def f(t): return pow(gamma, 2 * t) + ( 2 * pow(gamma, t) * (pow(gamma, t + 1) - pow(gamma, horizon))) / (1 - gamma) discounter = [f(t) for t in range(0, horizon)] discounter_tf = tf.constant(discounter) mean_episode_d2 = tf.reduce_sum( d2_w_0t, axis=0) / (tf.reduce_sum(mask_split, axis=0) + 1e-24) discounted_d2 = mean_episode_d2 * discounter_tf # Discounted d2 discounted_total_d2 = tf.reduce_sum(discounted_d2, axis=0) # Sum over time bound_ = w_return_mean - tf.sqrt( (1 - delta) * discounted_total_d2 / (delta * n_episodes)) * return_step_max elif bound == 'pdis-mean-d2': # Discount factor if gamma >= 1: discounter = [ float(1 + 2 * (horizon - t - 1)) for t in range(0, horizon) ] else: def f(t): return pow(gamma, 2 * t) + ( 2 * pow(gamma, t) * (pow(gamma, t + 1) - pow(gamma, horizon))) / (1 - gamma) discounter = [f(t) for t in range(0, horizon)] discounter_tf = tf.constant(discounter) mean_episode_d2 = tf.reduce_sum( d2_w_0t, axis=0) / (tf.reduce_sum(mask_split, axis=0) + 1e-24) discounted_d2 = mean_episode_d2 * discounter_tf # Discounted d2 discounted_total_d2 = tf.reduce_sum(discounted_d2, axis=0) # Sum over time bound_ = w_return_mean - tf.sqrt( (1 - delta) * discounted_total_d2 / (delta * n_episodes)) * return_step_mean else: raise NotImplementedError() # Policy entropy for exploration ent = pi.pd.entropy() meanent = tf.reduce_mean(ent) losses_with_name.append((meanent, 'MeanEntropy')) # Add policy entropy bonus if entropy != 'none': scheme, v1, v2 = entropy.split(':') if scheme == 'step': entcoeff = tf.cond(iter_number_ < int(v2), lambda: float(v1), lambda: float(0.0)) losses_with_name.append((entcoeff, 'EntropyCoefficient')) entbonus = entcoeff * meanent bound_ = bound_ + entbonus elif scheme == 'lin': ip = tf.cast(iter_number_ / max_iters, tf.float32) entcoeff_decay = tf.maximum( 0.0, float(v2) + (float(v1) - float(v2)) * (1.0 - ip)) losses_with_name.append((entcoeff_decay, 'EntropyCoefficient')) entbonus = entcoeff_decay * meanent bound_ = bound_ + entbonus elif scheme == 'exp': ent_f = tf.exp( -tf.abs(tf.reduce_mean(iw) - 1) * float(v2)) * float(v1) losses_with_name.append((ent_f, 'EntropyCoefficient')) bound_ = bound_ + ent_f * meanent else: raise Exception('Unrecognized entropy scheme.') losses_with_name.append((w_return_mean, 'ReturnMeanIW')) losses_with_name.append((bound_, 'Bound')) losses, loss_names = map(list, zip(*losses_with_name)) if use_natural_gradient: p = tf.placeholder(dtype=tf.float32, shape=[None]) target_logpdf_episode = tf.reduce_sum(target_log_pdf_split * mask_split, axis=1) grad_logprob = U.flatgrad( tf.stop_gradient(iwn) * target_logpdf_episode, var_list) dot_product = tf.reduce_sum(grad_logprob * p) hess_logprob = U.flatgrad(dot_product, var_list) compute_linear_operator = U.function([p, ob_, ac_, disc_rew_, mask_], [-hess_logprob]) assign_old_eq_new = U.function( [], [], updates=[ tf.assign(oldv, newv) for (oldv, newv) in zipsame(oldpi.get_variables(), pi.get_variables()) ]) assert_ops = tf.group(*tf.get_collection('asserts')) print_ops = tf.group(*tf.get_collection('prints')) compute_lossandgrad = U.function( [ob_, ac_, rew_, disc_rew_, clustered_rew_, mask_, iter_number_], losses + [U.flatgrad(bound_, var_list), assert_ops, print_ops]) compute_grad = U.function( [ob_, ac_, rew_, disc_rew_, clustered_rew_, mask_, iter_number_], [U.flatgrad(bound_, var_list), assert_ops, print_ops]) compute_bound = U.function( [ob_, ac_, rew_, disc_rew_, clustered_rew_, mask_, iter_number_], [bound_, assert_ops, print_ops]) compute_losses = U.function( [ob_, ac_, rew_, disc_rew_, clustered_rew_, mask_, iter_number_], losses) #compute_temp = U.function([ob_, ac_, rew_, disc_rew_, mask_], [ratio_cumsum, discounted_ratio]) set_parameter = U.SetFromFlat(var_list) get_parameter = U.GetFlat(var_list) if sampler is None: seg_gen = traj_segment_generator(pi, env, n_episodes, horizon, stochastic=True) sampler = type("SequentialSampler", (object, ), { "collect": lambda self, _: seg_gen.__next__() })() U.initialize() # Starting optimizing episodes_so_far = 0 timesteps_so_far = 0 iters_so_far = 0 tstart = time.time() lenbuffer = deque(maxlen=n_episodes) rewbuffer = deque(maxlen=n_episodes) while True: iters_so_far += 1 if render_after is not None and iters_so_far % render_after == 0: if hasattr(env, 'render'): render(env, pi, horizon) if callback: callback(locals(), globals()) if iters_so_far >= max_iters: print('Finised...') break logger.log('********** Iteration %i ************' % iters_so_far) theta = get_parameter() with timed('sampling'): seg = sampler.collect(theta) add_disc_rew(seg, gamma) lens, rets = seg['ep_lens'], seg['ep_rets'] lenbuffer.extend(lens) rewbuffer.extend(rets) episodes_so_far += len(lens) timesteps_so_far += sum(lens) # Get clustered reward reward_matrix = np.reshape(seg['disc_rew'] * seg['mask'], (n_episodes, horizon)) ep_reward = np.sum(reward_matrix, axis=1) if reward_clustering == 'none': pass elif reward_clustering == 'floor': ep_reward = np.floor(ep_reward) elif reward_clustering == 'ceil': ep_reward = np.ceil(ep_reward) args = ob, ac, rew, disc_rew, clustered_rew, mask, iter_number = seg[ 'ob'], seg['ac'], seg['rew'], seg['disc_rew'], ep_reward, seg[ 'mask'], iters_so_far assign_old_eq_new() def evaluate_loss(): loss = compute_bound(*args) return loss[0] def evaluate_gradient(): gradient = compute_grad(*args) return gradient[0] if use_natural_gradient: def evaluate_fisher_vector_prod(x): return compute_linear_operator(x, *args)[0] + fisher_reg * x def evaluate_natural_gradient(g): return cg(evaluate_fisher_vector_prod, g, cg_iters=10, verbose=0) else: evaluate_natural_gradient = None with timed('summaries before'): logger.record_tabular("Iteration", iters_so_far) logger.record_tabular("InitialBound", evaluate_loss()) logger.record_tabular("EpLenMean", np.mean(lenbuffer)) logger.record_tabular("EpRewMean", np.mean(rewbuffer)) logger.record_tabular("EpThisIter", len(lens)) logger.record_tabular("EpisodesSoFar", episodes_so_far) logger.record_tabular("TimestepsSoFar", timesteps_so_far) logger.record_tabular("TimeElapsed", time.time() - tstart) if save_weights: logger.record_tabular('Weights', str(get_parameter())) import pickle file = open('checkpoint.pkl', 'wb') pickle.dump(theta, file) with timed("offline optimization"): theta, improvement = optimize_offline( theta, set_parameter, line_search, evaluate_loss, evaluate_gradient, evaluate_natural_gradient, max_offline_ite=max_offline_iters) set_parameter(theta) with timed('summaries after'): meanlosses = np.array(compute_losses(*args)) for (lossname, lossval) in zip(loss_names, meanlosses): logger.record_tabular(lossname, lossval) logger.dump_tabular() env.close()
def _process_input_helper(self, update_row_factors, sp_input=None, transpose_input=False): """Creates the graph for processing a sparse slice of input. Args: update_row_factors: if True, update the row_factors, else update the column factors. sp_input: Please refer to comments for update_row_factors and update_col_factors. transpose_input: If true, the input is logically transposed and then the corresponding rows/columns of the transposed input are updated. Returns: A tuple consisting of the following two elements: new_values: New values for the row/column factors. update_op: An op that assigns the newly computed values to the row/column factors. """ assert isinstance(sp_input, tf.SparseTensor) if update_row_factors: left = self._row_factors right_factors = self._col_factors_cache row_wt = self._row_wt_cache col_wt = self._col_wt_cache sharding_func = WALSModel._get_sharding_func(self._input_rows, self._num_row_shards) gramian = self._col_gramian_cache else: left = self._col_factors right_factors = self._row_factors_cache row_wt = self._col_wt_cache col_wt = self._row_wt_cache sharding_func = WALSModel._get_sharding_func(self._input_cols, self._num_col_shards) gramian = self._row_gramian_cache transpose_input = not transpose_input # Note that the row indices of sp_input are based on the original full input # Here we reindex the rows and give them contiguous ids starting at 0. # We use tf.unique to achieve this reindexing. Note that this is done so # that the downstream kernel can assume that the input is "dense" along the # row dimension. row_ids, col_ids = tf.split(1, 2, sp_input.indices) update_row_indices, all_row_ids = tf.unique(row_ids[:, 0]) update_col_indices, all_col_ids = tf.unique(col_ids[:, 0]) col_ids = tf.expand_dims(tf.cast(all_col_ids, tf.int64), 1) row_ids = tf.expand_dims(tf.cast(all_row_ids, tf.int64), 1) if transpose_input: update_indices = update_col_indices row_shape = [tf.cast(tf.shape(update_row_indices)[0], tf.int64)] gather_indices = update_row_indices else: update_indices = update_row_indices row_shape = [tf.cast(tf.shape(update_col_indices)[0], tf.int64)] gather_indices = update_col_indices num_rows = tf.cast(tf.shape(update_indices)[0], tf.int64) col_shape = [num_rows] right = embedding_ops.embedding_lookup(right_factors, gather_indices, partition_strategy='div') new_sp_indices = tf.concat(1, [row_ids, col_ids]) new_sp_shape = (tf.concat(0, [row_shape, col_shape]) if transpose_input else tf.concat(0, [col_shape, row_shape])) new_sp_input = tf.SparseTensor(indices=new_sp_indices, values=sp_input.values, shape=new_sp_shape) # Compute lhs and rhs of the normal equations total_lhs = (self._unobserved_weight * gramian) if self._regularization is not None: total_lhs += self._regularization if self._row_weights is None: # Special case of ALS. Use a much simpler update rule. total_rhs = (self._unobserved_weight * tf.sparse_tensor_dense_matmul(new_sp_input, right, adjoint_a=transpose_input)) # TODO(rmlarsen): handle transposing in tf.matrix_solve instead of # transposing explicitly. # TODO(rmlarsen): multi-thread tf.matrix_solve. new_left_values = tf.transpose(tf.matrix_solve(total_lhs, tf.transpose(total_rhs))) else: # TODO(yifanchen): Add special handling for single shard without using # embedding_lookup and perform benchmarks for those cases. row_weights_slice = embedding_ops.embedding_lookup( row_wt, update_indices, partition_strategy='div') col_weights = embedding_ops.embedding_lookup( col_wt, gather_indices, partition_strategy='div') partial_lhs, total_rhs = wals_compute_partial_lhs_and_rhs( right, col_weights, self._unobserved_weight, row_weights_slice, new_sp_input.indices, new_sp_input.values, num_rows, transpose_input, name="wals_compute_partial_lhs_rhs") total_lhs = tf.expand_dims(total_lhs, 0) + partial_lhs total_rhs = tf.expand_dims(total_rhs, -1) new_left_values = tf.squeeze(tf.matrix_solve(total_lhs, total_rhs), [2]) return (new_left_values, self.scatter_update(left, update_indices, new_left_values, sharding_func))