def create_id3_embedding(videos): """Embeds the given videos using the Inflated 3D Convolution network. Downloads the graph of the I3D from tf.hub and adds it to the graph on the first call. Args: videos: <float32>[batch_size, num_frames, height=224, width=224, depth=3]. Expected range is [-1, 1]. Returns: embedding: <float32>[batch_size, embedding_size]. embedding_size depends on the model used. Raises: ValueError: when a provided embedding_layer is not supported. """ batch_size = 16 module_spec = "https://tfhub.dev/deepmind/i3d-kinetics-400/1" # Making sure that we import the graph separately for # each different input video tensor. module_name = "fvd_kinetics-400_id3_module_" + videos.name.replace(":", "_") assert_ops = [ tf.Assert( tf.reduce_max(videos) <= 1.001, ["max value in frame is > 1", videos]), tf.Assert( tf.reduce_min(videos) >= -1.001, ["min value in frame is < -1", videos]), tf.assert_equal( tf.shape(videos)[0], batch_size, ["invalid frame batch size: ", tf.shape(videos)], summarize=6), ] with tf.control_dependencies(assert_ops): videos = tf.identity(videos) module_scope = "%s_apply_default/" % module_name # To check whether the module has already been loaded into the graph, we look # for a given tensor name. If this tensor name exists, we assume the function # has been called before and the graph was imported. Otherwise we import it. # Note: in theory, the tensor could exist, but have wrong shapes. # This will happen if create_id3_embedding is called with a frames_placehoder # of wrong size/batch size, because even though that will throw a tf.Assert # on graph-execution time, it will insert the tensor (with wrong shape) into # the graph. This is why we need the following assert. video_batch_size = int(videos.shape[0]) assert video_batch_size in [batch_size, -1, None], "Invalid batch size" tensor_name = module_scope + "RGB/inception_i3d/Mean:0" if not _is_in_graph(tensor_name): i3d_model = hub.Module(module_spec, name=module_name) i3d_model(videos) # gets the kinetics-i3d-400-logits layer tensor_name = module_scope + "RGB/inception_i3d/Mean:0" tensor = tf.get_default_graph().get_tensor_by_name(tensor_name) return tensor
def multiclass_non_max_suppression(boxes, scores, score_thresh, iou_thresh, max_size_per_class, max_total_size=0, clip_window=None, change_coordinate_frame=False, masks=None, additional_fields=None, scope=None): """Multi-class version of non maximum suppression. This op greedily selects a subset of detection bounding boxes, pruning away boxes that have high IOU (intersection over union) overlap (> thresh) with already selected boxes. It operates independently for each class for which scores are provided (via the scores field of the input box_list), pruning boxes with score less than a provided threshold prior to applying NMS. Please note that this operation is performed on *all* classes, therefore any background classes should be removed prior to calling this function. Args: boxes: A [k, q, 4] float32 tensor containing k detections. `q` can be either number of classes or 1 depending on whether a separate box is predicted per class. scores: A [k, num_classes] float32 tensor containing the scores for each of the k detections. score_thresh: scalar threshold for score (low scoring boxes are removed). iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap with previously selected boxes are removed). max_size_per_class: maximum number of retained boxes per class. max_total_size: maximum number of boxes retained over all classes. By default returns all boxes retained after capping boxes per class. clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max] representing the window to clip and normalize boxes to before performing non-max suppression. change_coordinate_frame: Whether to normalize coordinates after clipping relative to clip_window (this can only be set to True if a clip_window is provided) masks: (optional) a [k, q, mask_height, mask_width] float32 tensor containing box masks. `q` can be either number of classes or 1 depending on whether a separate mask is predicted per class. additional_fields: (optional) If not None, a dictionary that maps keys to tensors whose first dimensions are all of size `k`. After non-maximum suppression, all tensors corresponding to the selected boxes will be added to resulting BoxList. scope: name scope. Returns: a BoxList holding M boxes with a rank-1 scores field representing corresponding scores for each box with scores sorted in decreasing order and a rank-1 classes field representing a class label for each box. If masks, keypoints, keypoint_heatmaps is not None, the boxlist will contain masks, keypoints, keypoint_heatmaps corresponding to boxes. Raises: ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have a valid scores field. """ if not 0 <= iou_thresh <= 1.0: raise ValueError('iou_thresh must be between 0 and 1') if scores.shape.ndims != 2: raise ValueError('scores field must be of rank 2') if scores.shape[1].value is None: raise ValueError('scores must have statically defined second ' 'dimension') if boxes.shape.ndims != 3: raise ValueError('boxes must be of rank 3.') if not (boxes.shape[1].value == scores.shape[1].value or boxes.shape[1].value == 1): raise ValueError('second dimension of boxes must be either 1 or equal ' 'to the second dimension of scores') if boxes.shape[2].value != 4: raise ValueError('last dimension of boxes must be of size 4.') if change_coordinate_frame and clip_window is None: raise ValueError( 'if change_coordinate_frame is True, then a clip_window' 'must be specified.') with tf.name_scope(scope, 'MultiClassNonMaxSuppression'): num_boxes = tf.shape(boxes)[0] num_scores = tf.shape(scores)[0] num_classes = scores.get_shape()[1] length_assert = tf.Assert(tf.equal(num_boxes, num_scores), [ 'Incorrect scores field length: actual vs expected.', num_scores, num_boxes ]) selected_boxes_list = [] per_class_boxes_list = tf.unstack(boxes, axis=1) if masks is not None: per_class_masks_list = tf.unstack(masks, axis=1) boxes_ids = (range(num_classes) if len(per_class_boxes_list) > 1 else [0] * num_classes) for class_idx, boxes_idx in zip(range(num_classes), boxes_ids): per_class_boxes = per_class_boxes_list[boxes_idx] boxlist_and_class_scores = box_list.BoxList(per_class_boxes) with tf.control_dependencies([length_assert]): class_scores = tf.reshape( tf.slice(scores, [0, class_idx], tf.stack([num_scores, 1])), [-1]) boxlist_and_class_scores.add_field(fields.BoxListFields.scores, class_scores) if masks is not None: per_class_masks = per_class_masks_list[boxes_idx] boxlist_and_class_scores.add_field(fields.BoxListFields.masks, per_class_masks) if additional_fields is not None: for key, tensor in additional_fields.iteritems(): boxlist_and_class_scores.add_field(key, tensor) boxlist_filtered = box_list_ops.filter_greater_than( boxlist_and_class_scores, score_thresh) if clip_window is not None: boxlist_filtered = box_list_ops.clip_to_window( boxlist_filtered, clip_window) if change_coordinate_frame: boxlist_filtered = box_list_ops.change_coordinate_frame( boxlist_filtered, clip_window) max_selection_size = tf.minimum(max_size_per_class, boxlist_filtered.num_boxes()) selected_indices = tf.image.non_max_suppression( boxlist_filtered.get(), boxlist_filtered.get_field(fields.BoxListFields.scores), max_selection_size, iou_threshold=iou_thresh) nms_result = box_list_ops.gather(boxlist_filtered, selected_indices) nms_result.add_field(fields.BoxListFields.classes, (tf.zeros_like( nms_result.get_field(fields.BoxListFields.scores)) + class_idx)) selected_boxes_list.append(nms_result) selected_boxes = box_list_ops.concatenate(selected_boxes_list) sorted_boxes = box_list_ops.sort_by_field(selected_boxes, fields.BoxListFields.scores) if max_total_size: max_total_size = tf.minimum(max_total_size, sorted_boxes.num_boxes()) sorted_boxes = box_list_ops.gather(sorted_boxes, tf.range(max_total_size)) return sorted_boxes
def build_graph(mode, config, sequence_example_file_paths=None): """Builds the TensorFlow graph. Args: mode: 'train', 'eval', or 'generate'. Only mode related ops are added to the graph. config: An EventSequenceRnnConfig containing the encoder/decoder and HParams to use. sequence_example_file_paths: A list of paths to TFRecord files containing tf.train.SequenceExample protos. Only needed for training and evaluation. May be a sharded file of the form. Returns: A tf.Graph instance which contains the TF ops. Raises: ValueError: If mode is not 'train', 'eval', or 'generate'. """ if mode not in ('train', 'eval', 'generate'): raise ValueError("The mode parameter must be 'train', 'eval', " "or 'generate'. The mode parameter was: %s" % mode) hparams = config.hparams encoder_decoder = config.encoder_decoder tf.logging.info('hparams = %s', hparams.values()) input_size = encoder_decoder.input_size num_classes = encoder_decoder.num_classes no_event_label = encoder_decoder.default_event_label with tf.Graph().as_default() as graph: inputs, labels, lengths, = None, None, None if mode == 'train' or mode == 'eval': inputs, labels, lengths = magenta.common.get_padded_batch( sequence_example_file_paths, hparams.batch_size, input_size) elif mode == 'generate': inputs = tf.placeholder(tf.float32, [hparams.batch_size, None, input_size]) cell = make_rnn_cell(hparams.rnn_layer_sizes, dropout_keep_prob=(1.0 if mode == 'generate' else hparams.dropout_keep_prob), attn_length=hparams.attn_length) initial_state = cell.zero_state(hparams.batch_size, tf.float32) outputs, final_state = tf.nn.dynamic_rnn(cell, inputs, initial_state=initial_state, swap_memory=True) outputs_flat = tf.reshape(outputs, [-1, cell.output_size]) logits_flat = tf.contrib.layers.linear(outputs_flat, num_classes) if mode == 'train' or mode == 'eval': labels_flat = tf.reshape(labels, [-1]) mask = tf.sequence_mask(lengths) if hparams.skip_first_n_losses: skip = tf.minimum(lengths, hparams.skip_first_n_losses) skip_mask = tf.sequence_mask(skip, maxlen=tf.reduce_max(lengths)) mask = tf.logical_and(mask, tf.logical_not(skip_mask)) mask = tf.cast(mask, tf.float32) mask_flat = tf.reshape(mask, [-1]) num_logits = tf.to_float(tf.reduce_sum(lengths)) with tf.control_dependencies( [tf.Assert(tf.greater(num_logits, 0.), [num_logits])]): softmax_cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels_flat, logits=logits_flat) loss = tf.reduce_sum( mask_flat * softmax_cross_entropy) / num_logits perplexity = ( tf.reduce_sum(mask_flat * tf.exp(softmax_cross_entropy)) / num_logits) correct_predictions = tf.to_float( tf.nn.in_top_k(logits_flat, labels_flat, 1)) * mask_flat accuracy = tf.reduce_sum(correct_predictions) / num_logits * 100 event_positions = ( tf.to_float(tf.not_equal(labels_flat, no_event_label)) * mask_flat) event_accuracy = (tf.reduce_sum( tf.multiply(correct_predictions, event_positions)) / tf.reduce_sum(event_positions) * 100) no_event_positions = ( tf.to_float(tf.equal(labels_flat, no_event_label)) * mask_flat) no_event_accuracy = (tf.reduce_sum( tf.multiply(correct_predictions, no_event_positions)) / tf.reduce_sum(no_event_positions) * 100) global_step = tf.Variable(0, trainable=False, name='global_step') tf.add_to_collection('loss', loss) tf.add_to_collection('perplexity', perplexity) tf.add_to_collection('accuracy', accuracy) tf.add_to_collection('global_step', global_step) summaries = [ tf.summary.scalar('loss', loss), tf.summary.scalar('perplexity', perplexity), tf.summary.scalar('accuracy', accuracy), tf.summary.scalar('event_accuracy', event_accuracy), tf.summary.scalar('no_event_accuracy', no_event_accuracy), ] if mode == 'train': learning_rate = tf.train.exponential_decay( hparams.initial_learning_rate, global_step, hparams.decay_steps, hparams.decay_rate, staircase=True, name='learning_rate') opt = tf.train.AdamOptimizer(learning_rate) params = tf.trainable_variables() gradients = tf.gradients(loss, params) clipped_gradients, _ = tf.clip_by_global_norm( gradients, hparams.clip_norm) train_op = opt.apply_gradients(zip(clipped_gradients, params), global_step) tf.add_to_collection('learning_rate', learning_rate) tf.add_to_collection('train_op', train_op) summaries.append( tf.summary.scalar('learning_rate', learning_rate)) if mode == 'eval': summary_op = tf.summary.merge(summaries) tf.add_to_collection('summary_op', summary_op) elif mode == 'generate': temperature = tf.placeholder(tf.float32, []) softmax_flat = tf.nn.softmax( tf.div(logits_flat, tf.fill([num_classes], temperature))) softmax = tf.reshape(softmax_flat, [hparams.batch_size, -1, num_classes]) tf.add_to_collection('inputs', inputs) tf.add_to_collection('temperature', temperature) tf.add_to_collection('softmax', softmax) # Flatten state tuples for metagraph compatibility. for state in tf_nest.flatten(initial_state): tf.add_to_collection('initial_state', state) for state in tf_nest.flatten(final_state): tf.add_to_collection('final_state', state) return graph
def assert_constraints(outputs, monotonicity, output_min, output_max, clamp_min=False, clamp_max=False, debug_tensors=None, eps=1e-6): """Asserts that 'outputs' satisfiy constraints. Args: outputs: Tensor of shape `(num_output_values, units)` which represents outputs of pwl calibration layer which will be tested against the given constraints. If monotonicity is specified these outputs must be for consequtive inputs. monotonicity: One of {-1, 0, 1}. -1 for decreasing, 1 for increasing 0 means no monotonicity checks. output_min: Lower bound or None. output_max: Upper bound or None. clamp_min: Whether one of outputs must match output_min. clamp_max: Whther one of outputs must match output_max. debug_tensors: None or list of anything convertible to tensor (for example tensors or strings) which will be printed in case of constraints violation. eps: Allowed constraints violation. Raises: ValueError: If monotonicity is not one of {-1, 0, 1} Returns: List of assertion ops in graph mode or immideately asserts in eager mode. """ info = ["Outputs: ", outputs, "Epsilon: ", eps] if debug_tensors: info += debug_tensors asserts = [] if output_min is not None: min_output = tf.reduce_min(outputs, axis=0) if clamp_min: asserts.append( tf.Assert( tf.reduce_all(tf.abs(min_output - output_min) <= eps), data=["Clamp_min violation.", "output_min:", output_min] + info, summarize=outputs.shape[0])) else: asserts.append( tf.Assert( tf.reduce_all(min_output >= output_min - eps), data=["Lower bound violation.", "output_min:", output_min ] + info, summarize=outputs.shape[0])) if output_max is not None: max_output = tf.reduce_max(outputs, axis=0) if clamp_max: asserts.append( tf.Assert( tf.reduce_all(tf.abs(max_output - output_max) <= eps), data=["Clamp_max violation.", "output_max:", output_max] + info, summarize=outputs.shape[0])) else: asserts.append( tf.Assert( tf.reduce_all(max_output <= output_max + eps), data=["Upper bound violation.", "output_max:", output_max ] + info, summarize=outputs.shape[0])) if monotonicity not in [-1, 0, 1]: raise ValueError( "'monotonicity' must be one of: [-1, 0, 1]. It is: %s" % monotonicity) if monotonicity != 0: diffs = (outputs[1:] - outputs[0:-1]) asserts.append( tf.Assert( tf.reduce_min(diffs * monotonicity) >= -eps, data=[ "Monotonicity violation.", "monotonicity:", monotonicity ] + info, summarize=outputs.shape[0])) return asserts
def _random_crop(image_list, crop_height, crop_width): """Crops the given list of images. The function applies the same crop to each image in the list. This can be effectively applied when there are multiple image inputs of the same dimension such as: image, depths, normals = _random_crop([image, depths, normals], 120, 150) Args: image_list: a list of image tensors of the same dimension but possibly varying channel. crop_height: the new height. crop_width: the new width. Returns: the image_list with cropped images. Raises: ValueError: if there are multiple image inputs provided with different size or the images are smaller than the crop dimensions. """ if not image_list: raise ValueError('Empty image_list.') # Compute the rank assertions. rank_assertions = [] for i in range(len(image_list)): image_rank = tf.rank(image_list[i]) rank_assert = tf.Assert(tf.equal(image_rank, 3), [ 'Wrong rank for tensor %s [expected] [actual]', image_list[i].name, 3, image_rank ]) rank_assertions.append(rank_assert) with tf.control_dependencies([rank_assertions[0]]): image_shape = tf.shape(image_list[0]) image_height = image_shape[0] image_width = image_shape[1] crop_size_assert = tf.Assert( tf.logical_and(tf.greater_equal(image_height, crop_height), tf.greater_equal(image_width, crop_width)), ['Crop size greater than the image size.']) asserts = [rank_assertions[0], crop_size_assert] for i in range(1, len(image_list)): image = image_list[i] asserts.append(rank_assertions[i]) with tf.control_dependencies([rank_assertions[i]]): shape = tf.shape(image) height = shape[0] width = shape[1] height_assert = tf.Assert(tf.equal(height, image_height), [ 'Wrong height for tensor %s [expected][actual]', image.name, height, image_height ]) width_assert = tf.Assert(tf.equal(width, image_width), [ 'Wrong width for tensor %s [expected][actual]', image.name, width, image_width ]) asserts.extend([height_assert, width_assert]) # Create a random bounding box. # # Use tf.random_uniform and not numpy.random.rand as doing the former would # generate random numbers at graph eval time, unlike the latter which # generates random numbers at graph definition time. with tf.control_dependencies(asserts): max_offset_height = tf.reshape(image_height - crop_height + 1, []) with tf.control_dependencies(asserts): max_offset_width = tf.reshape(image_width - crop_width + 1, []) offset_height = tf.random_uniform([], maxval=max_offset_height, dtype=tf.int32) offset_width = tf.random_uniform([], maxval=max_offset_width, dtype=tf.int32) return [ _crop(image, offset_height, offset_width, crop_height, crop_width) for image in image_list ]
def assert_in_range(x, min_value, max_value): return tf.Assert( tf.logical_and(tf.greater_equal(tf.reduce_min(x), min_value), tf.less_equal(tf.reduce_max(x), max_value)), [x])
def _raise(): tf.Assert(False, [str(e)]) return ()
def f(x): for i in tf.range(10): tf.print(i) tf.Assert(i<10,['a']) x += x return x
def generate_trips(self, min_gap=1, max_gap=5): """Generate a tf Dataset of training triplets with an offset between three frames. Args: min_gap: (int) the minimum offset between two frames of a sampled triplet. max_gap: (int) the maximum offset between two frames of a sampled triplet. Returns: A tf.data.Dataset of ViewSequences without images, consisting of triplets from the input sequence separated by the given offset. """ def mapper(timestamp_trips, rgb_trips, pano_trips, depth_trips, normal_trips, pose_trips): """A function mapping a data tuple to ViewTrip.""" return ViewTrip(self.scene_id, self.sequence_id, timestamp_trips, rgb_trips, pano_trips, depth_trips, normal_trips, tf.zeros([1]), pose_trips, self.intrinsics[0], self.resolution[0]) with tf.control_dependencies( [tf.Assert(tf.less(max_gap, self.length()), [max_gap, self.length()])]): timestamp_trips = [] rgb_trips = [] pano_trips = [] depth_trips = [] normal_trips = [] pose_trips = [] # generate triplets with an offset that ranges # from 'min_gap' to 'max_gap'. for stride in range(min_gap, max_gap + 1): inds = tf.range(stride, self.length() - stride) inds_jitter = tf.random.uniform( minval=-40, maxval=40, shape=[self.length() - 2 * stride], dtype=tf.int32) rand_inds = tf.minimum( tf.maximum(inds + inds_jitter, 0), self.length() - 1) timestamp = tf.stack([ self.timestamp[:-2 * stride], self.timestamp[2 * stride:], self.timestamp[stride:-stride], tf.gather(self.timestamp, rand_inds) ], axis=1) rgb = tf.stack([ self.rgb[:-2 * stride], self.rgb[2 * stride:], self.rgb[stride:-stride], tf.gather(self.rgb, rand_inds) ], axis=1) pano = tf.stack([ self.pano[:-2 * stride], self.pano[2 * stride:], self.pano[stride:-stride], tf.gather(self.pano, rand_inds) ], axis=1) depth = tf.stack([ self.depth[:-2 * stride], self.depth[2 * stride:], self.depth[stride:-stride], tf.gather(self.depth, rand_inds) ], axis=1) normal = tf.stack([ self.normal[:-2 * stride], self.normal[2 * stride:], self.normal[stride:-stride], tf.gather(self.normal, rand_inds) ], axis=1) pose = tf.stack([ self.pose[:-2 * stride], self.pose[2 * stride:], self.pose[stride:-stride], tf.gather(self.pose, rand_inds) ], axis=1) timestamp_trips.append(timestamp) rgb_trips.append(rgb) pano_trips.append(pano) depth_trips.append(depth) normal_trips.append(normal) pose_trips.append(pose) timestamp_trips = tf.concat(timestamp_trips, 0) rgb_trips = tf.concat(rgb_trips, 0) pano_trips = tf.concat(pano_trips, 0) depth_trips = tf.concat(depth_trips, 0) normal_trips = tf.concat(normal_trips, 0) pose_trips = tf.concat(pose_trips, 0) dataset = tf.data.Dataset.from_tensor_slices( (timestamp_trips, rgb_trips, pano_trips, depth_trips, normal_trips, pose_trips)) return dataset.map(mapper)
def train_uvf(train_dir, environment=None, num_bin_actions=3, agent_class=None, meta_agent_class=None, state_preprocess_class=None, inverse_dynamics_class=None, exp_action_wrapper=None, replay_buffer=None, meta_replay_buffer=None, replay_num_steps=1, meta_replay_num_steps=1, critic_optimizer=None, actor_optimizer=None, meta_critic_optimizer=None, meta_actor_optimizer=None, repr_optimizer=None, relabel_contexts=False, meta_relabel_contexts=False, batch_size=64, repeat_size=0, num_episodes_train=2000, initial_episodes=2, initial_steps=None, num_updates_per_observation=1, num_collect_per_update=1, num_collect_per_meta_update=1, gamma=1.0, meta_gamma=1.0, reward_scale_factor=1.0, target_update_period=1, should_stop_early=None, clip_gradient_norm=0.0, summarize_gradients=False, debug_summaries=False, log_every_n_steps=100, prefetch_queue_capacity=2, policy_save_dir='policy', save_policy_every_n_steps=1000, save_policy_interval_secs=0, replay_context_ratio=0.0, next_state_as_context_ratio=0.0, state_index=0, zero_timer_ratio=0.0, timer_index=-1, debug=False, max_policies_to_save=None, max_steps_per_episode=None, load_path=LOAD_PATH): """Train an agent.""" """ Here is a summary of what this method does: 1) Create the lower level agent, the meta agent, the state preprocessing and the inverse dynamics (what is that?) 2) Setup automatic summaries of useful things on Tensorboard 3) Create "Collect experience" for the beginning and the middle parts of training (ops are identical) 4) Setup the the meta-agent (meta) and low-level agent (nometa) ops (in that order) 1) Get random batch and add it to a queue (why?) 2) Dequeue a batch from the queue (and optionally repeat it) 3) Preprocess the state and the next states 4) If META, sample best meta action (sample) or sample directly from inverse dynamics (FuN) (what is this?) 5) If META, create state preprocessing training op (based on state preprocessing op) (what's the loss?) 6) Sample the current and next contexts (optionally use the real ones instead of the adjusted ones) 7) Compute the context rewards and discounts (if META, using the raw states, else using the processed states) 8) Multiply the discounts by the learning rate GAMMA 9) Create the loss function and training operation for the critic and for the actor 5) Create the low-level training op (NOMETA actor + NOMETA critic + representation op) 6) Create the meta-level training op (META actor + META critic op) 7) Periodically update the targets of the agent and the meta agent (to add stability during training?) 8) Save the variables of all the agents and meta vars periodically 9) Define the train_step_fn that can do a single gradient step 10) Define the global train_ops (train_op, meta_train_op, collect_experience_op) that defines the loss functions Note: at the beginning of the training, 1) Initialize the local variables 2) Update the targets of the agent and meta agents (so that the main and target networks are equal) 3) Setup the first global step (= 0) 4) Experience is collected during N initial steps """ tf_env = create_maze_env.TFPyEnvironment(environment) observation_spec = [tf_env.observation_spec()] action_spec = [tf_env.action_spec()] max_steps_per_episode = max_steps_per_episode or tf_env.pyenv.max_episode_steps assert max_steps_per_episode, 'max_steps_per_episode need to be set' if initial_steps is None: initial_steps = initial_episodes * max_steps_per_episode if agent_class.ACTION_TYPE == 'discrete': assert False else: assert agent_class.ACTION_TYPE == 'continuous' assert agent_class.ACTION_TYPE == meta_agent_class.ACTION_TYPE with tf.variable_scope('meta_agent'): meta_agent = meta_agent_class( observation_spec, action_spec, tf_env, debug_summaries=debug_summaries) meta_agent.set_replay(replay=meta_replay_buffer) with tf.variable_scope('uvf_agent'): uvf_agent = agent_class( observation_spec, action_spec, tf_env, debug_summaries=debug_summaries) uvf_agent.set_meta_agent(agent=meta_agent) uvf_agent.set_replay(replay=replay_buffer) with tf.variable_scope('state_preprocess'): state_preprocess = state_preprocess_class() with tf.variable_scope('inverse_dynamics'): inverse_dynamics = inverse_dynamics_class( meta_agent.sub_context_as_action_specs[0]) # Create counter variables global_step = tf.contrib.framework.get_or_create_global_step() num_episodes = tf.Variable(0, dtype=tf.int64, name='num_episodes') num_resets = tf.Variable(0, dtype=tf.int64, name='num_resets') num_updates = tf.Variable(0, dtype=tf.int64, name='num_updates') num_meta_updates = tf.Variable(0, dtype=tf.int64, name='num_meta_updates') episode_rewards = tf.Variable([0.] * 100, name='episode_rewards') episode_meta_rewards = tf.Variable([0.] * 100, name='episode_meta_rewards') # Create counter variables summaries train_utils.create_counter_summaries([ ('environment_steps', global_step), ('num_episodes', num_episodes), ('num_resets', num_resets), ('num_updates', num_updates), ('num_meta_updates', num_meta_updates), ('replay_buffer_adds', replay_buffer.get_num_adds()), ('meta_replay_buffer_adds', meta_replay_buffer.get_num_adds()), ]) tf.summary.scalar('avg_episode_rewards', tf.reduce_mean(episode_rewards[1:])) tf.summary.scalar('avg_episode_meta_rewards', tf.reduce_mean(episode_meta_rewards[1:])) tf.summary.histogram('episode_rewards', episode_rewards[1:]) tf.summary.histogram('episode_meta_rewards', episode_meta_rewards[1:]) # Create init ops action_fn = uvf_agent.action action_fn = uvf_agent.add_noise_fn(action_fn, global_step=None) meta_action_fn = meta_agent.action meta_action_fn = meta_agent.add_noise_fn(meta_action_fn, global_step=None) meta_actions_fn = meta_agent.actions meta_actions_fn = meta_agent.add_noise_fn(meta_actions_fn, global_step=None) init_collect_experience_op = collect_experience( tf_env, uvf_agent, meta_agent, state_preprocess, replay_buffer, meta_replay_buffer, action_fn, meta_action_fn, environment_steps=global_step, num_episodes=num_episodes, num_resets=num_resets, episode_rewards=episode_rewards, episode_meta_rewards=episode_meta_rewards, store_context=True, disable_agent_reset=False, ) # Create train ops (exactly the same as above, strangely) collect_experience_op = collect_experience( tf_env, uvf_agent, meta_agent, state_preprocess, replay_buffer, meta_replay_buffer, action_fn, meta_action_fn, environment_steps=global_step, num_episodes=num_episodes, num_resets=num_resets, episode_rewards=episode_rewards, episode_meta_rewards=episode_meta_rewards, store_context=True, disable_agent_reset=False, ) train_op_list = [] repr_train_op = tf.constant(0.0) for mode in ['meta', 'nometa']: if mode == 'meta': agent = meta_agent buff = meta_replay_buffer critic_opt = meta_critic_optimizer actor_opt = meta_actor_optimizer relabel = meta_relabel_contexts num_steps = meta_replay_num_steps my_gamma = meta_gamma, n_updates = num_meta_updates else: agent = uvf_agent buff = replay_buffer critic_opt = critic_optimizer actor_opt = actor_optimizer relabel = relabel_contexts num_steps = replay_num_steps my_gamma = gamma n_updates = num_updates with tf.name_scope(mode): batch = buff.get_random_batch(batch_size, num_steps=num_steps) states, actions, rewards, discounts, next_states = batch[:5] with tf.name_scope('Reward'): tf.summary.scalar('average_step_reward', tf.reduce_mean(rewards)) rewards *= reward_scale_factor batch_queue = slim.prefetch_queue.prefetch_queue( [states, actions, rewards, discounts, next_states] + batch[5:], capacity=prefetch_queue_capacity, name='batch_queue') batch_dequeue = batch_queue.dequeue() if repeat_size > 0: batch_dequeue = [ tf.tile(batch, (repeat_size + 1,) + (1,) * (batch.shape.ndims - 1)) for batch in batch_dequeue ] batch_size *= (repeat_size + 1) states, actions, rewards, discounts, next_states = batch_dequeue[:5] if mode == 'meta': low_states = batch_dequeue[5] low_actions = batch_dequeue[6] low_state_reprs = state_preprocess(low_states) state_reprs = state_preprocess(states) next_state_reprs = state_preprocess(next_states) if mode == 'meta': # Re-label meta-action prev_actions = actions if FLAGS.goal_sample_strategy == 'None': pass elif FLAGS.goal_sample_strategy == 'FuN': actions = inverse_dynamics.sample(state_reprs, next_state_reprs, 1, prev_actions, sc=0.1) actions = tf.stop_gradient(actions) elif FLAGS.goal_sample_strategy == 'sample': actions = sample_best_meta_actions(state_reprs, next_state_reprs, prev_actions, low_states, low_actions, low_state_reprs, inverse_dynamics, uvf_agent, k=10) else: assert False if state_preprocess.trainable and mode == 'meta': # Representation learning is based on meta-transitions, but is trained # along with low-level policy updates. repr_loss, _, _ = state_preprocess.loss(states, next_states, low_actions, low_states) repr_train_op = slim.learning.create_train_op( repr_loss, repr_optimizer, global_step=None, update_ops=None, summarize_gradients=summarize_gradients, clip_gradient_norm=clip_gradient_norm, variables_to_train=state_preprocess.get_trainable_vars(), ) # Get contexts for training contexts, next_contexts = agent.sample_contexts( mode='train', batch_size=batch_size, state=states, next_state=next_states, ) if not relabel: # Re-label context (in the style of TDM or HER). contexts, next_contexts = ( batch_dequeue[-2 * len(contexts):-1 * len(contexts)], batch_dequeue[-1 * len(contexts):]) merged_states = agent.merged_states(states, contexts) merged_next_states = agent.merged_states(next_states, next_contexts) if mode == 'nometa': context_rewards, context_discounts = agent.compute_rewards( 'train', state_reprs, actions, rewards, next_state_reprs, contexts) elif mode == 'meta': # Meta-agent uses sum of rewards, not context-specific rewards. _, context_discounts = agent.compute_rewards( 'train', states, actions, rewards, next_states, contexts) context_rewards = rewards if agent.gamma_index is not None: context_discounts *= tf.cast( tf.reshape(contexts[agent.gamma_index], (-1,)), dtype=context_discounts.dtype) else: context_discounts *= my_gamma critic_loss = agent.critic_loss(merged_states, actions, context_rewards, context_discounts, merged_next_states) critic_loss = tf.reduce_mean(critic_loss) actor_loss = agent.actor_loss(merged_states, actions, context_rewards, context_discounts, merged_next_states) actor_loss *= tf.to_float( # Only update actor every N steps. tf.equal(n_updates % target_update_period, 0)) critic_train_op = slim.learning.create_train_op( critic_loss, critic_opt, global_step=n_updates, update_ops=None, summarize_gradients=summarize_gradients, clip_gradient_norm=clip_gradient_norm, variables_to_train=agent.get_trainable_critic_vars(), ) critic_train_op = uvf_utils.tf_print( critic_train_op, [critic_train_op], message='critic_loss', print_freq=1000, name='critic_loss') train_op_list.append(critic_train_op) if actor_loss is not None: actor_train_op = slim.learning.create_train_op( actor_loss, actor_opt, global_step=None, update_ops=None, summarize_gradients=summarize_gradients, clip_gradient_norm=clip_gradient_norm, variables_to_train=agent.get_trainable_actor_vars(), ) actor_train_op = uvf_utils.tf_print( actor_train_op, [actor_train_op], message='actor_loss', print_freq=1000, name='actor_loss') train_op_list.append(actor_train_op) assert len(train_op_list) == 4 # Update targets should happen after the networks have been updated. # control_dependencies ensure what's inside is evaluated before the things inside the with-statement with tf.control_dependencies(train_op_list[2:]): update_targets_op = uvf_utils.periodically( uvf_agent.update_targets, target_update_period, 'update_targets') if meta_agent is not None: with tf.control_dependencies(train_op_list[:2]): update_meta_targets_op = uvf_utils.periodically( meta_agent.update_targets, target_update_period, 'update_targets') assert_op = tf.Assert( # Hack to get training to stop. tf.less_equal(global_step, 200 + num_episodes_train * max_steps_per_episode), [global_step]) with tf.control_dependencies([update_targets_op, assert_op]): train_op = tf.add_n(train_op_list[2:], name='post_update_targets') # Representation training steps on every low-level policy training step. train_op += repr_train_op with tf.control_dependencies([update_meta_targets_op, assert_op]): meta_train_op = tf.add_n(train_op_list[:2], name='post_update_meta_targets') if debug_summaries: train_.gen_debug_batch_summaries(batch) slim.summaries.add_histogram_summaries( uvf_agent.get_trainable_critic_vars(), 'critic_vars') slim.summaries.add_histogram_summaries( uvf_agent.get_trainable_actor_vars(), 'actor_vars') train_ops = train_utils.TrainOps(train_op, meta_train_op, collect_experience_op) policy_save_path = os.path.join(train_dir, policy_save_dir, 'model.ckpt') policy_vars = uvf_agent.get_actor_vars() + meta_agent.get_actor_vars() + [ global_step, num_episodes, num_resets ] + list(uvf_agent.context_vars) + list(meta_agent.context_vars) + state_preprocess.get_trainable_vars() # add critic vars, since some test evaluation depends on them policy_vars += uvf_agent.get_trainable_critic_vars() + meta_agent.get_trainable_critic_vars() policy_saver = tf.train.Saver( policy_vars, max_to_keep=max_policies_to_save, sharded=False) lowlevel_vars = (uvf_agent.get_actor_vars() + uvf_agent.get_trainable_critic_vars() + state_preprocess.get_trainable_vars()) lowlevel_saver = tf.train.Saver(lowlevel_vars) def policy_save_fn(sess): policy_saver.save( sess, policy_save_path, global_step=global_step, write_meta_graph=False) if save_policy_interval_secs > 0: tf.logging.info( 'Wait %d secs after save policy.' % save_policy_interval_secs) time.sleep(save_policy_interval_secs) train_step_fn = train_utils.TrainStep( max_number_of_steps=num_episodes_train * max_steps_per_episode + 100, num_updates_per_observation=num_updates_per_observation, num_collect_per_update=num_collect_per_update, num_collect_per_meta_update=num_collect_per_meta_update, log_every_n_steps=log_every_n_steps, policy_save_fn=policy_save_fn, save_policy_every_n_steps=save_policy_every_n_steps, should_stop_early=should_stop_early).train_step local_init_op = tf.local_variables_initializer() init_targets_op = tf.group(uvf_agent.update_targets(1.0), meta_agent.update_targets(1.0)) def initialize_training_fn(sess): """Initialize training function.""" sess.run(local_init_op) sess.run(init_targets_op) if load_path: tf.logging.info('Restoring low-level from %s' % load_path) lowlevel_saver.restore(sess, load_path) global_step_value = sess.run(global_step) assert global_step_value == 0, 'Global step should be zero.' collect_experience_call = sess.make_callable( init_collect_experience_op) for _ in range(initial_steps): collect_experience_call() train_saver = tf.train.Saver(max_to_keep=2, sharded=True) tf.logging.info('train dir: %s', train_dir) return slim.learning.train( train_ops, train_dir, train_step_fn=train_step_fn, save_interval_secs=FLAGS.save_interval_secs, saver=train_saver, log_every_n_steps=0, global_step=global_step, master="", is_chief=(FLAGS.task == 0), save_summaries_secs=FLAGS.save_summaries_secs, init_fn=initialize_training_fn)
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] Raises: ValueError: if image height or width are not 256 pixels. """ image_shape = preprocessed_inputs.get_shape() image_shape.assert_has_rank(4) image_height = image_shape[1].value image_width = image_shape[2].value if image_height is None or image_width is None: shape_assert = tf.Assert( tf.logical_and(tf.equal(tf.shape(preprocessed_inputs)[1], 256), tf.equal(tf.shape(preprocessed_inputs)[2], 256)), ['image size must be 256 in both height and width.']) with tf.control_dependencies([shape_assert]): preprocessed_inputs = tf.identity(preprocessed_inputs) elif image_height != 256 or image_width != 256: raise ValueError( 'image size must be = 256 in both height and width;' ' image dim = %d,%d' % (image_height, image_width)) feature_map_layout = { 'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', ''], 'layer_depth': [-1, -1, 512, 256, 256], 'conv_kernel_size': [-1, -1, 3, 3, 2], 'use_explicit_padding': self._use_explicit_padding, 'use_depthwise': self._use_depthwise, } with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope(is_training=None)): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = mobilenet_v1.mobilenet_v1_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Conv2d_13_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, use_explicit_padding=self._use_explicit_padding, scope=scope) with slim.arg_scope(self._conv_hyperparams_fn()): feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def VQVAE_layer(self, inputs): # Assert last dimension is same as self._embedding_dim print("inputs:", inputs) input_shape = tf.shape(inputs) with tf.control_dependencies([ tf.Assert(tf.equal(input_shape[-1], self._embedding_dim), [input_shape])]): flat_inputs = tf.reshape(inputs, [-1, input_shape[1] * input_shape[2], self._embedding_dim]) print("flat_inputs:", flat_inputs) self.variable_def() # set all variable self.embedding_total_count += 1 # the _w is already qunatized: for each row, each idx(latent variable digit) have its own value to pass, value pf _w is quantized embd ouput def dist_fn(tensor_apart): a2 = tf.reduce_sum(tensor_apart ** 2, 1, keepdims=True) b2 = tf.reduce_sum(self._w ** 2, 0, keepdims=True) ab = tf.matmul(tensor_apart, self._w) # print("tensor_apart:",tensor_apart) # print("self._w:",self._w) # print("ab:", ab) # print("a2:", a2) # print("b2:", b2) return a2 - 2 * ab + b2 # dist = (tf.reduce_sum(tensor_apart ** 2, 1, keepdims=True) # - 2 * tf.matmul(tensor_apart, self._w) # + tf.reduce_sum(self._w ** 2, 0, keepdims=True)) # different shape: tf.add broadcast # return dist distances = tf.map_fn(dist_fn, flat_inputs) print("distances:", distances) ##### ##### Gradient Based update ##### # # distance.shape = [b,H*W,num_embeddings] # encoding_indices = tf.argmin(distances, # 2) # [b,H*W] # encodings = tf.one_hot(encoding_indices, self._num_embeddings) # quantized_embd_out = self.quantize( # encoding_indices) # Actually, this quantized method find the value from corespond econding_idx from w # print("quantized_embd_out:", quantized_embd_out) # print("inputs:", inputs) # print("encoding_indices:", encoding_indices) # # # encoding_indices = tf.expand_dims(encoding_indices, axis=-1) # # # quantized_embd_out = self.quantize( # encoding_indices) # Actually, this quantized method find the value from corespond econding_idx from w # print("quantized_embd_out:", quantized_embd_out) # quantized_embd_out = tf.reshape(quantized_embd_out, [tf.shape(inputs)[0], # tf.shape(inputs)[1], # tf.shape(inputs)[2], # quantized_embd_out.get_shape().as_list()[ # 2]]) # # e_latent_loss = tf.reduce_mean((tf.stop_gradient(quantized_embd_out) - inputs) ** 2) # embedding loss # q_latent_loss = tf.reduce_mean((tf.stop_gradient(inputs) - quantized_embd_out) ** 2) # VQ_loss = e_latent_loss + self.commit_loss_coef * q_latent_loss # # quantized_embd_out = inputs + tf.stop_gradient( # quantized_embd_out - inputs) # in order to pass value to decoder??? # assign_moving_avg_op = self.loop_assign_moving_avg(encodings, flat_inputs) # temp_decay_op = self.temperature_decay() # # return { # 'quantized_embd_out': quantized_embd_out, # # "quantized_embd_out": non_max_quantized_embd_out, # 'VQ_loss': VQ_loss, # # 'encodings': multi_hot_encodings, # 'encodings': encodings, # 'encoding_indices': encoding_indices, # 'assign_moving_avg_op': assign_moving_avg_op, # 'temp_decay_op': temp_decay_op} # ##### # ##### EMA Moving average(argmin) # ##### # # # distance.shape = [b,H*W,num_embeddings] # encoding_indices = tf.argmin(distances, # 2) # [b,H*W] # encodings = tf.one_hot(encoding_indices, self._num_embeddings) # quantized_embd_out = self.quantize( # encoding_indices) # Actually, this quantized method find the value from corespond econding_idx from w # print("quantized_embd_out:", quantized_embd_out) # print("inputs:", inputs) # print("encoding_indices:", encoding_indices) # # # encoding_indices = tf.expand_dims(encoding_indices, axis=-1) # # # quantized_embd_out = self.quantize( # encoding_indices) # Actually, this quantized method find the value from corespond econding_idx from w # print("quantized_embd_out:", quantized_embd_out) # quantized_embd_out = tf.reshape(quantized_embd_out, [tf.shape(inputs)[0], # tf.shape(inputs)[1], # tf.shape(inputs)[2], # quantized_embd_out.get_shape().as_list()[ # 2]]) # # e_latent_loss = tf.reduce_mean((tf.stop_gradient(quantized_embd_out) - inputs) ** 2) # embedding loss # # q_latent_loss = tf.reduce_mean((tf.stop_gradient(inputs) - quantized_embd_out) ** 2) # VQ_loss = e_latent_loss # # quantized_embd_out = inputs + tf.stop_gradient( # quantized_embd_out - inputs) # in order to pass value to decoder??? # assign_moving_avg_op = self.loop_assign_moving_avg(encodings, flat_inputs) # temp_decay_op = self.temperature_decay() # # return { # 'quantized_embd_out': quantized_embd_out, # # "quantized_embd_out": non_max_quantized_embd_out, # 'VQ_loss': VQ_loss, # # 'encodings': multi_hot_encodings, # 'encodings': encodings, # 'encoding_indices': encoding_indices, # 'assign_moving_avg_op': assign_moving_avg_op, # 'temp_decay_op': temp_decay_op} #### #### EMA Moving average(non max) #### non_max_encoding_indices = self.temperature_sampler(distances, self.sampling_temperature) # [b,H*W,top_k] print("non_max_encoding_indices",non_max_encoding_indices) # non_max_encoding_indices = tf.cast(tf.expand_dims(tf.argmin(distances, 2), -1),tf.int32) # [b,H*W] # print("non_max_encoding_indices:",non_max_encoding_indices) encoding_indices = tf.expand_dims(tf.argmin(distances,2),-1) # [b,H*W] print("non_max_encoding_indices(argmax)", encoding_indices) same_idx =tf.reduce_sum(tf.cast(tf.equal(non_max_encoding_indices,tf.cast(encoding_indices,tf.int32)),tf.float32)) multi_hot_encodings = tf.map_fn(lambda x: tf.reduce_sum(tf.one_hot(x, self._num_embeddings), axis=-2), tf.transpose(non_max_encoding_indices, perm=[1, 0, 2]), dtype=tf.float32) multi_hot_encodings = tf.transpose(multi_hot_encodings, perm=[1, 0, 2]) print("multi_hot_encodings:", multi_hot_encodings) non_max_quantized_embd_out = self.quantize(non_max_encoding_indices) # print("non_max_quantized_embd_out:", non_max_quantized_embd_out) non_max_quantized_embd_out = tf.reshape(non_max_quantized_embd_out, [tf.shape(inputs)[0], tf.shape(inputs)[1], tf.shape(inputs)[2], non_max_quantized_embd_out.get_shape().as_list()[2]]) # print("non_max_quantized_embd_out:", non_max_quantized_embd_out) e_latent_loss = tf.reduce_mean((tf.stop_gradient(non_max_quantized_embd_out) - inputs) ** 2) # embedding loss # q_latent_loss = tf.reduce_mean((tf.stop_gradient(inputs) - non_max_quantized_embd_out) ** 2) VQ_loss = e_latent_loss non_max_quantized_embd_out = inputs + tf.stop_gradient( non_max_quantized_embd_out - inputs) # in order to pass value to decoder??? assign_moving_avg_op = self.loop_assign_moving_avg(multi_hot_encodings, flat_inputs) temp_decay_op = self.temperature_decay() return { # 'quantized_embd_out': quantized_embd_out, "quantized_embd_out": non_max_quantized_embd_out, 'VQ_loss': VQ_loss, 'encodings': multi_hot_encodings, # 'encodings': encodings, # 'encoding_indices': encoding_indices, 'encoding_indices': multi_hot_encodings, 'assign_moving_avg_op': assign_moving_avg_op, 'temp_decay_op': temp_decay_op, # "top_k_idx":self.top_k_idx.shape 'top_k_idx':same_idx }
def pad_to_bounding_box(image, offset_height, offset_width, target_height, target_width, pad_value): """Pads the given image with the given pad_value. Works like tf.image.pad_to_bounding_box, except it can pad the image with any given arbitrary pad value and also handle images whose sizes are not known during graph construction. Args: image: 3-D tensor with shape [height, width, channels] offset_height: Number of rows of zeros to add on top. offset_width: Number of columns of zeros to add on the left. target_height: Height of output image. target_width: Width of output image. pad_value: Value to pad the image tensor with. Returns: 3-D tensor of shape [target_height, target_width, channels]. Raises: ValueError: If the shape of image is incompatible with the offset_* or target_* arguments. """ with tf.name_scope(None, 'pad_to_bounding_box', [image]): image = tf.convert_to_tensor(image, name='image') original_dtype = image.dtype if original_dtype != tf.float32 and original_dtype != tf.float64: # If image dtype is not float, we convert it to int32 to avoid overflow. image = tf.cast(image, tf.int32) image_rank_assert = tf.Assert( tf.logical_or(tf.equal(tf.rank(image), 3), tf.equal(tf.rank(image), 4)), ['Wrong image tensor rank.']) with tf.control_dependencies([image_rank_assert]): image -= pad_value image_shape = image.get_shape() is_batch = True if image_shape.ndims == 3: is_batch = False image = tf.expand_dims(image, 0) elif image_shape.ndims is None: is_batch = False image = tf.expand_dims(image, 0) image.set_shape([None] * 4) elif image.get_shape().ndims != 4: raise ValueError('Input image must have either 3 or 4 dimensions.') _, height, width, _ = _image_dimensions(image, rank=4) target_width_assert = tf.Assert(tf.greater_equal(target_width, width), ['target_width must be >= width']) target_height_assert = tf.Assert( tf.greater_equal(target_height, height), ['target_height must be >= height']) with tf.control_dependencies([target_width_assert]): after_padding_width = target_width - offset_width - width with tf.control_dependencies([target_height_assert]): after_padding_height = target_height - offset_height - height offset_assert = tf.Assert( tf.logical_and(tf.greater_equal(after_padding_width, 0), tf.greater_equal(after_padding_height, 0)), ['target size not possible with the given target offsets']) batch_params = tf.stack([0, 0]) height_params = tf.stack([offset_height, after_padding_height]) width_params = tf.stack([offset_width, after_padding_width]) channel_params = tf.stack([0, 0]) with tf.control_dependencies([offset_assert]): paddings = tf.stack( [batch_params, height_params, width_params, channel_params]) padded = tf.pad(image, paddings) if not is_batch: padded = tf.squeeze(padded, axis=[0]) outputs = padded + pad_value if outputs.dtype != original_dtype: outputs = tf.cast(outputs, original_dtype) return outputs
def nearest_patch_swapping(content_features, style_features, patch_size=3): # channels for both the content and style, must be the same c_shape = tf.shape(content_features) s_shape = tf.shape(style_features) channel_assertion = tf.Assert(tf.equal(c_shape[3], s_shape[3]), ['number of channels must be the same']) with tf.control_dependencies([channel_assertion]): # spatial shapes for style and content features c_height, c_width, c_channel = c_shape[1], c_shape[2], c_shape[3] # convert the style features into convolutional kernels style_kernels = tf.extract_image_patches( style_features, ksizes=[1, patch_size, patch_size, 1], strides=[1, 1, 1, 1], rates=[1, 1, 1, 1], padding='SAME') style_kernels = tf.squeeze(style_kernels, axis=0) style_kernels = tf.transpose(style_kernels, perm=[2, 0, 1]) # gather the conv and deconv kernels v_height, v_width = style_kernels.get_shape().as_list()[1:3] deconv_kernels = tf.reshape(style_kernels, shape=(patch_size, patch_size, c_channel, v_height * v_width)) kernels_norm = tf.norm(style_kernels, axis=0, keep_dims=True) kernels_norm = tf.reshape(kernels_norm, shape=(1, 1, 1, v_height * v_width)) # calculate the normalization factor mask = tf.ones((c_height, c_width), tf.float32) fullmask = tf.zeros( (c_height + patch_size - 1, c_width + patch_size - 1), tf.float32) for x in range(patch_size): for y in range(patch_size): paddings = [[x, patch_size - x - 1], [y, patch_size - y - 1]] padded_mask = tf.pad(mask, paddings=paddings, mode="CONSTANT") fullmask += padded_mask pad_width = int((patch_size - 1) / 2) deconv_norm = tf.slice(fullmask, [pad_width, pad_width], [c_height, c_width]) deconv_norm = tf.reshape(deconv_norm, shape=(1, c_height, c_width, 1)) ######################## # starting convolution # ######################## # padding operation pad_total = patch_size - 1 pad_beg = pad_total // 2 pad_end = pad_total - pad_beg paddings = [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]] # convolutional operations net = tf.pad(content_features, paddings=paddings, mode="REFLECT") net = tf.nn.conv2d(net, tf.div(deconv_kernels, kernels_norm + 1e-7), strides=[1, 1, 1, 1], padding='VALID') # find the maximum locations best_match_ids = tf.argmax(net, axis=3) best_match_ids = tf.cast(tf.one_hot(best_match_ids, depth=v_height * v_width), dtype=tf.float32) # find the patches and warping the output unnormalized_output = tf.nn.conv2d_transpose( value=best_match_ids, filter=deconv_kernels, output_shape=(c_shape[0], c_height + pad_total, c_width + pad_total, c_channel), strides=[1, 1, 1, 1], padding='VALID') unnormalized_output = tf.slice(unnormalized_output, [0, pad_beg, pad_beg, 0], c_shape) output = tf.div(unnormalized_output, deconv_norm) output = tf.reshape(output, shape=c_shape) # output the swapped feature maps return output
def detection_targets_graph2(proposals, gt_class_ids, gt_boxes, config): ''' Generates detection targets for one image. Subsamples proposals and generates target class IDs, bounding box deltas, and masks for each. Inputs: ------- proposals: [N, 2000, (y1, x1, y2, x2)] in normalized coordinates. Might be zero padded if there are not enough proposals. gt_class_ids: [MAX_GT_INSTANCES] int class IDs gt_boxes: [MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized coordinates. gt_masks: [height, width, MAX_GT_INSTANCES] of boolean type. Returns: Target ROIs and corresponding class IDs, bounding box shifts, and masks. -------- rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized coordinates class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs. Zero padded. deltas: [TRAIN_ROIS_PER_IMAGE, NUM_CLASSES, (dy, dx, log(dh), log(dw))] Class-specific bbox refinments. masks: [TRAIN_ROIS_PER_IMAGE, height, width). Masks cropped to bbox boundaries and resized to neural network output size. Note: Returned arrays might be zero padded if not enough target ROIs. ''' # Assertions asserts = [ tf.Assert(tf.greater(tf.shape(proposals)[0], 0), [proposals], name="roi_assertion"), ] with tf.control_dependencies(asserts): proposals = tf.identity(proposals) # print('>>> detection_targets_graph ') # print(' propsals.shape :', proposals.shape, proposals.get_shape(), KB.int_shape(proposals) ) # print(' gt_boxes.shape :', gt_boxes.shape , KB.int_shape(gt_boxes) ) # print(' gt_class_ids.shape :', gt_class_ids.shape, KB.int_shape(gt_class_ids)) # print(' gt_masks.shape :', gt_masks.shape , KB.int_shape(gt_masks) ) # Remove zero padding # non_zeros returns indicies to valid bboxes, which we use to index gt_class_ids, and gt_masks proposals, _ = utils.trim_zeros_graph(proposals, name="trim_proposals") gt_boxes, non_zeros = utils.trim_zeros_graph(gt_boxes, name="trim_gt_boxes") gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros, name="trim_gt_class_ids") # gt_masks = tf.gather(gt_masks, tf.where(non_zeros)[:, 0], axis=2,name="trim_gt_masks") #------------------------------------------------------------------------------------------ # Handle COCO crowds # A crowd box in COCO is a bounding box around several instances. Exclude # them from training. A crowd box is given a negative class ID. #------------------------------------------------------------------------------------------ # tf.where : returns the coordinates of true elements of the specified conditon. # The coordinates are returned in a 2-D tensor where the first dimension (rows) # represents the number of true elements, and the second dimension (columns) # represents the coordinates of the true elements. # Keep in mind, the shape of the output tensor can vary depending on how many # true values there are in input. Indices are output in row-major order. # # tf.gather: Gather slices from params axis (default = 0) according to indices. # indices must be an integer tensor of any dimension (usually 0-D or 1-D). # Produces an output tensor with shape: # params.shape[:axis] + indices.shape + params.shape[axis + 1:] # # tf.squeeze: Removes dimensions of size 1 from the shape of a tensor. # Given a tensor input, this operation returns a tensor of the same type with # all dimensions of size 1 removed. If you don't want to remove all size 1 # dimensions, you can remove specific size 1 dimensions by specifying axis. #------------------------------------------------------------------------------------------ crowd_ix = tf.where(gt_class_ids < 0)[:, 0] non_crowd_ix = tf.where(gt_class_ids > 0)[:, 0] crowd_boxes = tf.gather(gt_boxes, crowd_ix) # crowd_masks = tf.gather(gt_masks, crowd_ix, axis=2) gt_class_ids = tf.gather(gt_class_ids, non_crowd_ix) gt_boxes = tf.gather(gt_boxes, non_crowd_ix) # gt_masks = tf.gather(gt_masks, non_crowd_ix, axis=2) # Compute overlaps with crowd boxes [anchors, crowds] crowd_overlaps = overlaps_graph2(proposals, crowd_boxes) crowd_iou_max = tf.reduce_max(crowd_overlaps, axis=1) no_crowd_bool = (crowd_iou_max < 0.001) # Compute overlaps matrix [proposals, gt_boxes] - The IoU between # proposals and gt_boxes (non-crowd gt boxes, designated by classId < 0 in Coco) # overlaps is # compute max of elements across axis 1 of overlaps tensor. overlaps = overlaps_graph2(proposals, gt_boxes) roi_iou_max = tf.reduce_max(overlaps, axis=1) # print(' overlaps.shape :', overlaps.shape, KB.int_shape(overlaps) ) ## 1. Determine indices of postive ROI propsal boxes # Identify ROI proposal boxes that have an IoU >= 05 overlap with some gt_box, and store # indices into positive_indices positive_roi_bool = (roi_iou_max >= 0.5) positive_indices = tf.where(positive_roi_bool)[:, 0] ## 2. Determine indices of negative ROI proposal boxes # those with < 0.5 with every GT box and are not crowds bboxes # the where creates a array with shape [# of answers, 1] so we use [:, 0] after ## current method negative_indices = tf.where( tf.logical_and(roi_iou_max < 0.5, no_crowd_bool))[:, 0] ## new method # this modification will determine negative ROI proposal boxes but in addition, # will suppress the zero RoIs from the indicies # note that ( negative_bool = ~positive_roi_bool) # negative_nonzero_bool = tf.logical_and(~positive_roi_bool, (roi_iou_max > 0)) # negative_nonzero_bool = tf.logical_and(negative_nonzero_bool, no_crowd_bool) # negative_indices2 = tf.where(negative_nonzero_bool) [:, 0] ## 3. Subsample positive ROIs based on ROI_POSITIVE_RATIO # Aim for 33% positive (config.ROI_POSITIVE_RATIO = 0.33) # Positive ROIs 33% of config.TRAIN_ROIS_PER_IMAGE ~ 11 positive_count = int(config.TRAIN_ROIS_PER_IMAGE * config.ROI_POSITIVE_RATIO) positive_indices = tf.random_shuffle(positive_indices)[:positive_count] positive_count = tf.shape(positive_indices)[0] ## 4. Add Negative ROIs. Add enough to maintain positive:negative ratio # negative_count = int((positive_count / config.ROI_POSITIVE_RATIO) - positive_count) r = 1.0 / config.ROI_POSITIVE_RATIO negative_count = tf.cast(r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count negative_indices = tf.random_shuffle(negative_indices)[:negative_count] ## 5. Gather selected positive and negative ROIs positive_rois = tf.gather(proposals, positive_indices) negative_rois = tf.gather(proposals, negative_indices) ## 6. Assign positive ROIs to GT boxes. # roi_gt_box_assignment shows for each positive overlap, which class has the maximum overlap positive_overlaps = tf.gather(overlaps, positive_indices) roi_gt_box_assignment = tf.argmax(positive_overlaps, axis=1) roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment) roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment) # print(' shape of positive overlaps is :', positive_overlaps.get_shape()) ## 7. Compute bbox delta # calculate refinement (difference b/w positive rois and gt_boxes) for positive ROIs roi_gt_deltas = utils.box_refinement_graph(positive_rois, roi_gt_boxes) roi_gt_deltas /= config.BBOX_STD_DEV ## 8. prepare gt_masks # transpose gt_masks from [h, w, N] to [N, height, width] and add 4th dim at end [N, height, width, 1] # Pick the right mask for each ROI # transposed_masks = tf.expand_dims(tf.transpose(gt_masks, [2, 0, 1]), -1) # roi_masks = tf.gather(transposed_masks, roi_gt_box_assignment) # Compute mask targets # boxes = positive_rois # if config.USE_MINI_MASK: # Transform ROI corrdinates from normalized image space # to normalized mini-mask space. # y1, x1, y2, x2 = tf.split(positive_rois, 4, axis=1) # gt_y1, gt_x1, gt_y2, gt_x2 = tf.split(roi_gt_boxes, 4, axis=1) # gt_h = gt_y2 - gt_y1 # gt_w = gt_x2 - gt_x1 # y1 = (y1 - gt_y1) / gt_h # x1 = (x1 - gt_x1) / gt_w # y2 = (y2 - gt_y1) / gt_h # x2 = (x2 - gt_x1) / gt_w # boxes = tf.concat([y1, x1, y2, x2], 1) # box_ids = tf.range(0, tf.shape(roi_masks)[0]) # masks = tf.image.crop_and_resize(tf.cast(roi_masks, tf.float32), # boxes, # box_ids, # config.MASK_SHAPE) # Remove the extra dimension from masks. # masks = tf.squeeze(masks, axis=3) # Threshold mask pixels at 0.5 to have GT masks be 0 or 1 to use with # binary cross entropy loss. # masks = tf.round(masks) # Append negative ROIs and pad bbox roi_gt_deltas and masks that # are not used for negative ROIs with zeros. rois = tf.concat([positive_rois, negative_rois], axis=0) N = tf.shape(negative_rois)[0] P = tf.maximum(config.TRAIN_ROIS_PER_IMAGE - tf.shape(rois)[0], 0) rois = tf.pad(rois, [(0, P), (0, 0)]) roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)]) roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)]) roi_gt_deltas = tf.pad(roi_gt_deltas, [(0, N + P), (0, 0)]) # masks = tf.pad(masks , [[0, N + P], (0, 0), (0, 0)]) # print(' roi_gt_boxes : ' , tf.shape(roi_gt_boxes) ) # print(' P: ' , P, ' N : ', N) # print(' roi.shape :', rois.shape , tf.shape(rois)) # print(' roi_gt_class_ids.shape:', roi_gt_class_ids.shape, tf.shape(roi_gt_class_ids)) # print(' roi_gt_deltas.shape :', roi_gt_deltas.shape , tf.shape(roi_gt_deltas)) # print(' masks.shape :', masks.shape , tf.shape(masks)) # print(' roi_gt_boxes.shape :', roi_gt_boxes.shape , tf.shape(roi_gt_boxes)) return rois, roi_gt_class_ids, roi_gt_deltas, roi_gt_boxes
def __init__(self, pdfs: List[ZfitPDF], fracs: Optional[ztyping.ParamTypeInput] = None, obs: ztyping.ObsTypeInput = None, name: str = "SumPDF"): """Create the sum of the `pdfs` with `fracs` as coefficients. Args: pdfs (pdf): The pdfs to add. fracs (iterable): coefficients for the linear combination of the pdfs. If pdfs are extended, this throws an error. - len(frac) == len(basic) - 1 results in the interpretation of a non-extended pdf. The last coefficient will equal to 1 - sum(frac) - len(frac) == len(pdf) each pdf in `pdfs` will become an extended pdf with the given yield. name (str): """ # Check user input, improve TODO self._fracs = None set_yield_at_end = False pdfs = convert_to_container(pdfs) self.pdfs = pdfs if len(pdfs) < 2: raise ValueError("Cannot build a sum of a single pdf") if fracs is not None: fracs = convert_to_container(fracs) fracs = [convert_to_parameter(frac) for frac in fracs] # check if all extended extended_pdfs = self.pdfs_extended implicit = None extended = None if all(extended_pdfs): implicit = True extended = True # all extended except one -> fraction elif sum(extended_pdfs) == len(extended_pdfs) - 1: implicit = True extended = False # no pdf is extended -> using `fracs` elif not any(extended_pdfs) and fracs is not None: # make extended if len(fracs) == len(pdfs): implicit = False extended = True elif len(fracs) == len(pdfs) - 1: implicit = False extended = False # catch if args don't fit known case value_error = implicit is None or extended is None if (implicit and fracs is not None) or value_error: raise ModelIncompatibleError("Wrong arguments. Either" "\n a) `pdfs` are not extended and `fracs` is given with length pdfs " "(-> pdfs get extended) or pdfs - 1 (fractions)" "\n b) all or all except 1 `pdfs` are extended and fracs is None.") # create fracs if one is not extended if not extended and implicit: fracs = [] not_extended_position = None new_pdfs = [] for i, pdf in enumerate(pdfs): if pdf.is_extended: fracs.append(pdf.get_yield()) pdf = pdf.copy() pdf._set_yield_inplace(None) # make non-extended else: fracs.append(tf.constant(0., dtype=ztypes.float)) not_extended_position = i new_pdfs.append(pdf) pdfs = new_pdfs copied_fracs = fracs.copy() remaining_frac_func = lambda: tf.constant(1., dtype=ztypes.float) - tf.add_n(copied_fracs) remaining_frac = convert_to_parameter(remaining_frac_func, dependents=[convert_to_parameter(f) for f in copied_fracs]) if run.numeric_checks: assert_op = tf.Assert(tf.greater_equal(remaining_frac, tf.constant(0., dtype=ztypes.float)), data=[remaining_frac]) # check fractions deps = [assert_op] else: deps = [] fracs[not_extended_position] = remaining_frac implicit = False # now it's explicit elif not extended and not implicit: # remaining_frac_func = lambda: tf.constant(1., dtype=ztypes.float) - tf.add_n(fracs) copied_fracs = fracs.copy() def remaining_frac_func(): return tf.constant(1., dtype=ztypes.float) - tf.add_n(copied_fracs) remaining_frac = convert_to_parameter(remaining_frac_func, dependents=[convert_to_parameter(f) for f in copied_fracs]) if run.numeric_checks: assert_op = tf.Assert(tf.greater_equal(remaining_frac, tf.constant(0., dtype=ztypes.float)), data=[remaining_frac]) # check fractions deps = [assert_op] else: deps = [] fracs.append(remaining_frac) # make extended elif extended and not implicit: yields = fracs pdfs = [pdf.create_extended(yield_) for pdf, yield_ in zip(pdfs, yields)] implicit = True elif extended and implicit: yields = [pdf.get_yield() for pdf in pdfs] if extended: # TODO(Mayou36): convert to correct dtype def sum_yields_func(): return tf.reduce_sum( input_tensor=[tf.convert_to_tensor(value=y, dtype_hint=ztypes.float) for y in yields.copy()]) sum_yields = convert_to_parameter(sum_yields_func, dependents=yields) yield_fracs = [convert_to_parameter(lambda yield_=yield_: yield_ / sum_yields, dependents=yield_) for yield_ in yields] self.fracs = yield_fracs set_yield_at_end = True self._maybe_extended_fracs = [tf.constant(1, dtype=ztypes.float)] * len(self.pdfs) else: self._maybe_extended_fracs = fracs self.pdfs = pdfs params = OrderedDict() # TODO(Mayou36): this is not right. Where to create the params if extended? The correct fracs? for i, frac in enumerate(self._maybe_extended_fracs): params['frac_{}'.format(i)] = frac super().__init__(pdfs=pdfs, obs=obs, params=params, name=name) if set_yield_at_end: self._set_yield_inplace(sum_yields)
def detection_targets_graph(proposals, gt_class_ids, gt_boxes, gt_masks, config): """Generates detection targets for one image. Subsamples proposals and generates target class IDs, bounding box deltas, and masks for each. Inputs: proposals: [POST_NMS_ROIS_TRAINING, (y1, x1, y2, x2)] in normalized coordinates. Might be zero padded if there are not enough proposals. gt_class_ids: [MAX_GT_INSTANCES] int class IDs gt_boxes: [MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized coordinates. gt_masks: [height, width, MAX_GT_INSTANCES] of boolean type. Returns: Target ROIs and corresponding class IDs, bounding box shifts, and masks. rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized coordinates class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs. Zero padded. deltas: [TRAIN_ROIS_PER_IMAGE, (dy, dx, log(dh), log(dw))] masks: [TRAIN_ROIS_PER_IMAGE, height, width]. Masks cropped to bbox boundaries and resized to neural network output size. Note: Returned arrays might be zero padded if not enough target ROIs. """ # Assertions asserts = [ tf.Assert(tf.greater(tf.shape(proposals)[0], 0), [proposals], name="roi_assertion"), ] with tf.control_dependencies(asserts): proposals = tf.identity(proposals) # Remove zero padding proposals, _ = trim_zeros_graph(proposals, name="trim_proposals") gt_boxes, non_zeros = trim_zeros_graph(gt_boxes, name="trim_gt_boxes") gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros, name="trim_gt_class_ids") gt_masks = tf.gather(gt_masks, tf.where(non_zeros)[:, 0], axis=2, name="trim_gt_masks") # Handle COCO crowds # A crowd box in COCO is a bounding box around several instances. Exclude # them from training. A crowd box is given a negative class ID. crowd_ix = tf.where(gt_class_ids < 0)[:, 0] non_crowd_ix = tf.where(gt_class_ids > 0)[:, 0] crowd_boxes = tf.gather(gt_boxes, crowd_ix) gt_class_ids = tf.gather(gt_class_ids, non_crowd_ix) gt_boxes = tf.gather(gt_boxes, non_crowd_ix) gt_masks = tf.gather(gt_masks, non_crowd_ix, axis=2) # Compute overlaps matrix [proposals, gt_boxes] overlaps = overlaps_graph(proposals, gt_boxes) # Compute overlaps with crowd boxes [proposals, crowd_boxes] crowd_overlaps = overlaps_graph(proposals, crowd_boxes) crowd_iou_max = tf.reduce_max(crowd_overlaps, axis=1) no_crowd_bool = (crowd_iou_max < 0.001) # Determine positive and negative ROIs roi_iou_max = tf.reduce_max(overlaps, axis=1) # 1. Positive ROIs are those with >= 0.5 IoU with a GT box positive_roi_bool = (roi_iou_max >= 0.5) positive_indices = tf.where(positive_roi_bool)[:, 0] # 2. Negative ROIs are those with < 0.5 with every GT box. Skip crowds. negative_indices = tf.where( tf.logical_and(roi_iou_max < 0.5, no_crowd_bool))[:, 0] # Subsample ROIs. Aim for 33% positive # Positive ROIs positive_count = int(config.TRAIN_ROIS_PER_IMAGE * config.ROI_POSITIVE_RATIO) positive_indices = tf.random.shuffle(positive_indices)[:positive_count] positive_count = tf.shape(positive_indices)[0] # Negative ROIs. Add enough to maintain positive:negative ratio. r = 1.0 / config.ROI_POSITIVE_RATIO negative_count = tf.cast(r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count negative_indices = tf.random.shuffle(negative_indices)[:negative_count] # Gather selected ROIs positive_rois = tf.gather(proposals, positive_indices) negative_rois = tf.gather(proposals, negative_indices) # Assign positive ROIs to GT boxes. positive_overlaps = tf.gather(overlaps, positive_indices) roi_gt_box_assignment = tf.cond( tf.greater(tf.shape(positive_overlaps)[1], 0), true_fn=lambda: tf.argmax(positive_overlaps, axis=1), false_fn=lambda: tf.cast(tf.constant([]), tf.int64)) roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment) roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment) # Compute bbox refinement for positive ROIs deltas = utils.box_refinement_graph(positive_rois, roi_gt_boxes) deltas /= config.BBOX_STD_DEV # Assign positive ROIs to GT masks # Permute masks to [N, height, width, 1] transposed_masks = tf.expand_dims(tf.transpose(gt_masks, [2, 0, 1]), -1) # Pick the right mask for each ROI roi_masks = tf.gather(transposed_masks, roi_gt_box_assignment) # Compute mask targets boxes = positive_rois if config.USE_MINI_MASK: # Transform ROI coordinates from normalized image space # to normalized mini-mask space. y1, x1, y2, x2 = tf.split(positive_rois, 4, axis=1) gt_y1, gt_x1, gt_y2, gt_x2 = tf.split(roi_gt_boxes, 4, axis=1) gt_h = gt_y2 - gt_y1 gt_w = gt_x2 - gt_x1 y1 = (y1 - gt_y1) / gt_h x1 = (x1 - gt_x1) / gt_w y2 = (y2 - gt_y1) / gt_h x2 = (x2 - gt_x1) / gt_w boxes = tf.concat([y1, x1, y2, x2], 1) box_ids = tf.range(0, tf.shape(roi_masks)[0]) masks = tf.image.crop_and_resize(tf.cast(roi_masks, tf.float32), boxes, box_ids, config.MASK_SHAPE) # Remove the extra dimension from masks. masks = tf.squeeze(masks, axis=3) # Threshold mask pixels at 0.5 to have GT masks be 0 or 1 to use with # binary cross entropy loss. masks = tf.round(masks) # Append negative ROIs and pad bbox deltas and masks that # are not used for negative ROIs with zeros. rois = tf.concat([positive_rois, negative_rois], axis=0) N = tf.shape(negative_rois)[0] P = tf.maximum(config.TRAIN_ROIS_PER_IMAGE - tf.shape(rois)[0], 0) rois = tf.pad(rois, [(0, P), (0, 0)]) roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)]) roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)]) deltas = tf.pad(deltas, [(0, N + P), (0, 0)]) masks = tf.pad(masks, [[0, N + P], (0, 0), (0, 0)]) return rois, roi_gt_class_ids, deltas, masks
def _random_crop(image_list, label_list, crop_height, crop_width): if not image_list: raise ValueError('Empty image_list.') # Compute the rank assertions. rank_assertions = [] for i in range(len(image_list)): image_rank = tf.rank(image_list[i]) rank_assert = tf.Assert(tf.equal(image_rank, 3), [ 'Wrong rank for tensor %s [expected] [actual]', image_list[i].name, 3, image_rank ]) rank_assertions.append(rank_assert) image_shape = control_flow_ops.with_dependencies([rank_assertions[0]], tf.shape(image_list[0])) image_height = image_shape[0] image_width = image_shape[1] crop_size_assert = tf.Assert( tf.logical_and(tf.greater_equal(image_height, crop_height), tf.greater_equal(image_width, crop_width)), [ 'Crop size greater than the image size.', image_height, image_width, crop_height, crop_width ]) asserts = [rank_assertions[0], crop_size_assert] for i in range(1, len(image_list)): image = image_list[i] asserts.append(rank_assertions[i]) shape = control_flow_ops.with_dependencies([rank_assertions[i]], tf.shape(image)) height = shape[0] width = shape[1] height_assert = tf.Assert(tf.equal(height, image_height), [ 'Wrong height for tensor %s [expected][actual]', image.name, height, image_height ]) width_assert = tf.Assert(tf.equal(width, image_width), [ 'Wrong width for tensor %s [expected][actual]', image.name, width, image_width ]) asserts.extend([height_assert, width_assert]) # Create a random bounding box. # # Use tf.random_uniform and not numpy.random.rand as doing the former would # generate random numbers at graph eval time, unlike the latter which # generates random numbers at graph definition time. max_offset_height = control_flow_ops.with_dependencies( asserts, tf.reshape(image_height - crop_height + 1, [])) max_offset_width = control_flow_ops.with_dependencies( asserts, tf.reshape(image_width - crop_width + 1, [])) offset_height = tf.random_uniform([], maxval=max_offset_height, dtype=tf.int32) offset_width = tf.random_uniform([], maxval=max_offset_width, dtype=tf.int32) cropped_images = [ _crop(image, offset_height, offset_width, crop_height, crop_width) for image in image_list ] cropped_labels = [ _crop(label, offset_height, offset_width, crop_height, crop_width) for label in label_list ] return cropped_images, cropped_labels
def minimize(value_and_gradients_function, initial_position, tolerance=1e-8, x_tolerance=0, f_relative_tolerance=0, initial_inverse_hessian_estimate=None, max_iterations=50, parallel_iterations=1, name=None): """Applies the BFGS algorithm to minimize a differentiable function. Performs unconstrained minimization of a differentiable function using the BFGS scheme. For details of the algorithm, see [Nocedal and Wright(2006)][1]. ### Usage: The following example demonstrates the BFGS optimizer attempting to find the minimum for a simple two dimensional quadratic objective function. ```python minimum = np.array([1.0, 1.0]) # The center of the quadratic bowl. scales = np.array([2.0, 3.0]) # The scales along the two axes. # The objective function and the gradient. def quadratic(x): value = tf.reduce_sum(scales * (x - minimum) ** 2) return value, tf.gradients(value, x)[0] start = tf.constant([0.6, 0.8]) # Starting point for the search. optim_results = tfp.optimizer.bfgs_minimize( quadratic, initial_position=start, tolerance=1e-8) with tf.Session() as session: results = session.run(optim_results) # Check that the search converged assert(results.converged) # Check that the argmin is close to the actual value. np.testing.assert_allclose(results.position, minimum) # Print out the total number of function evaluations it took. Should be 6. print ("Function evaluations: %d" % results.num_objective_evaluations) ``` ### References: [1]: Jorge Nocedal, Stephen Wright. Numerical Optimization. Springer Series in Operations Research. pp 136-140. 2006 http://pages.mtu.edu/~struther/Courses/OLD/Sp2013/5630/Jorge_Nocedal_Numerical_optimization_267490.pdf Args: value_and_gradients_function: A Python callable that accepts a point as a real `Tensor` and returns a tuple of `Tensor`s of real dtype containing the value of the function and its gradient at that point. The function to be minimized. The first component of the return value should be a real scalar `Tensor`. The second component (the gradient) should have the same shape as the input value to the function. initial_position: `Tensor` of real dtype. The starting point of the search procedure. Should be a point at which the function value and the gradient norm are finite. tolerance: Scalar `Tensor` of real dtype. Specifies the gradient tolerance for the procedure. If the supremum norm of the gradient vector is below this number, the algorithm is stopped. x_tolerance: Scalar `Tensor` of real dtype. If the absolute change in the position between one iteration and the next is smaller than this number, the algorithm is stopped. f_relative_tolerance: Scalar `Tensor` of real dtype. If the relative change in the objective value between one iteration and the next is smaller than this value, the algorithm is stopped. initial_inverse_hessian_estimate: Optional `Tensor` of the same dtype as the components of the output of the `value_and_gradients_function`. If specified, the shape should be `initial_position.shape` * 2. For example, if the shape of `initial_position` is `[n]`, then the acceptable shape of `initial_inverse_hessian_estimate` is as a square matrix of shape `[n, n]`. If the shape of `initial_position` is `[n, m]`, then the required shape is `[n, m, n, m]`. For the correctness of the algorithm, it is required that this parameter be symmetric and positive definite. Specifies the starting estimate for the inverse of the Hessian at the initial point. If not specified, the identity matrix is used as the starting estimate for the inverse Hessian. max_iterations: Scalar positive int32 `Tensor`. The maximum number of iterations for BFGS updates. parallel_iterations: Positive integer. The number of iterations allowed to run in parallel. name: (Optional) Python str. The name prefixed to the ops created by this function. If not supplied, the default name 'minimize' is used. Returns: optimizer_results: A namedtuple containing the following items: converged: Scalar boolean tensor indicating whether the minimum was found within tolerance. failed: Scalar boolean tensor indicating whether a line search step failed to find a suitable step size satisfying Wolfe conditions. In the absence of any constraints on the number of objective evaluations permitted, this value will be the complement of `converged`. However, if there is a constraint and the search stopped due to available evaluations being exhausted, both `failed` and `converged` will be simultaneously False. num_objective_evaluations: The total number of objective evaluations performed. position: A tensor containing the last argument value found during the search. If the search converged, then this value is the argmin of the objective function. objective_value: A tensor containing the value of the objective function at the `position`. If the search converged, then this is the (local) minimum of the objective function. objective_gradient: A tensor containing the gradient of the objective function at the `position`. If the search converged the max-norm of this tensor should be below the tolerance. inverse_hessian_estimate: A tensor containing the inverse of the estimated Hessian. """ with tf.name_scope(name, 'minimize', [initial_position, tolerance, initial_inverse_hessian_estimate]): initial_position = tf.convert_to_tensor(initial_position, name='initial_position') dtype = initial_position.dtype.base_dtype tolerance = tf.convert_to_tensor(tolerance, dtype=dtype, name='grad_tolerance') f_relative_tolerance = tf.convert_to_tensor(f_relative_tolerance, dtype=dtype, name='f_relative_tolerance') x_tolerance = tf.convert_to_tensor(x_tolerance, dtype=dtype, name='x_tolerance') max_iterations = tf.convert_to_tensor(max_iterations, name='max_iterations') domain_shape = distribution_util.prefer_static_shape(initial_position) if initial_inverse_hessian_estimate is None: inv_hessian_shape = tf.concat([domain_shape, domain_shape], 0) initial_inv_hessian = tf.eye(tf.size(initial_position), dtype=dtype) initial_inv_hessian = tf.reshape(initial_inv_hessian, inv_hessian_shape, name='initial_inv_hessian') else: initial_inv_hessian = tf.convert_to_tensor( initial_inverse_hessian_estimate, dtype=dtype, name='initial_inv_hessian') # If an initial inverse Hessian is supplied, ensure that it is positive # definite. The easiest way to validate this is to compute the Cholesky # decomposition. However, it seems that simply adding a control dependency # on the decomposition result is not enough to trigger it. We need to # add an assert on the result. if initial_inverse_hessian_estimate is not None: # The supplied Hessian may not be of rank 2. Reshape it so it is. initial_inv_hessian_sqr_mat = tf.reshape( initial_inverse_hessian_estimate, tf.stack([tf.size(initial_position), tf.size(initial_position)], axis=0)) # If the matrix is not positive definite, the Cholesky decomposition will # fail. Adding an assert on it ensures it will be triggered. cholesky_factor = tf.cholesky(initial_inv_hessian_sqr_mat) is_positive_definite = tf.reduce_all(tf.is_finite(cholesky_factor)) asymmetry = tf.norm(initial_inv_hessian_sqr_mat - tf.transpose(initial_inv_hessian_sqr_mat), np.inf) is_symmetric = tf.equal(asymmetry, 0) with tf.control_dependencies( [tf.Assert(is_positive_definite, ['Initial inverse Hessian is not positive definite.', initial_inverse_hessian_estimate]), tf.Assert(is_symmetric, ['Initial inverse Hessian is not symmetric', initial_inverse_hessian_estimate])]): f0, df0 = value_and_gradients_function(initial_position) else: f0, df0 = value_and_gradients_function(initial_position) initial_convergence = _initial_convergence_test(df0, tolerance) def _cond(converged, failed, iteration, *ignored_args): # pylint: disable=unused-argument """Stopping condition for the algorithm.""" keep_going = tf.logical_not(converged | failed | (iteration >= max_iterations)) return keep_going def _body(converged, # pylint: disable=unused-argument stopped, # pylint: disable=unused-argument iteration, total_evals, position, objective_value, objective_gradient, input_inv_hessian_estimate): """Main optimization loop.""" search_direction = _get_search_direction(input_inv_hessian_estimate, objective_gradient) derivative_at_start_pt = tf.reduce_sum(objective_gradient * search_direction) # If the derivative at the start point is not negative, reset the # Hessian estimate and recompute the search direction. needs_reset = derivative_at_start_pt >= 0 def _reset_search_dirn(): search_direction = _get_search_direction(initial_inv_hessian, objective_gradient) return search_direction, initial_inv_hessian search_direction, inv_hessian_estimate = tf.contrib.framework.smart_cond( needs_reset, true_fn=_reset_search_dirn, false_fn=lambda: (search_direction, input_inv_hessian_estimate)) line_search_value_grad_func = _restrict_along_direction( value_and_gradients_function, position, search_direction) derivative_at_start_pt = tf.reduce_sum(objective_gradient * search_direction) ls_result = linesearch.hager_zhang( line_search_value_grad_func, initial_step_size=tf.convert_to_tensor(1, dtype=dtype), objective_at_zero=objective_value, grad_objective_at_zero=derivative_at_start_pt) # Fail if the objective value is not finite or the line search failed. ls_failed = ~ls_result.converged # If the line search failed, then quit at this point. def _failed_fn(): """Line search failed action.""" failed_retval = BfgsOptimizerResults( converged=False, failed=True, num_iterations=iteration + 1, num_objective_evaluations=total_evals + ls_result.func_evals, position=position, objective_value=objective_value, objective_gradient=objective_gradient, inverse_hessian_estimate=inv_hessian_estimate) return failed_retval def _success_fn(): return _bfgs_update(value_and_gradients_function, position, objective_value, objective_gradient, search_direction, inv_hessian_estimate, ls_result.left_pt, iteration, total_evals + ls_result.func_evals, tolerance, f_relative_tolerance, x_tolerance) return tf.contrib.framework.smart_cond( ls_failed, true_fn=_failed_fn, false_fn=_success_fn) initial_values = BfgsOptimizerResults( converged=initial_convergence, failed=False, num_iterations=tf.convert_to_tensor(0), num_objective_evaluations=1, position=initial_position, objective_value=f0, objective_gradient=df0, inverse_hessian_estimate=initial_inv_hessian) return tf.while_loop(_cond, _body, initial_values, parallel_iterations=parallel_iterations)
def lossfunc(x, alpha, scale, approximate=False, epsilon=1e-6): r"""Implements the general form of the loss. This implements the rho(x, \alpha, c) function described in "A General and Adaptive Robust Loss Function", Jonathan T. Barron, https://arxiv.org/abs/1701.03077. Args: x: The residual for which the loss is being computed. x can have any shape, and alpha and scale will be broadcasted to match x's shape if necessary. Must be a tensorflow tensor or numpy array of floats. alpha: The shape parameter of the loss (\alpha in the paper), where more negative values produce a loss with more robust behavior (outliers "cost" less), and more positive values produce a loss with less robust behavior (outliers are penalized more heavily). Alpha can be any value in [-infinity, infinity], but the gradient of the loss with respect to alpha is 0 at -infinity, infinity, 0, and 2. Must be a tensorflow tensor or numpy array of floats with the same precision as `x`. Varying alpha allows for smooth interpolation between a number of discrete robust losses: alpha=-Infinity: Welsch/Leclerc Loss. alpha=-2: Geman-McClure loss. alpha=0: Cauchy/Lortentzian loss. alpha=1: Charbonnier/pseudo-Huber loss. alpha=2: L2 loss. scale: The scale parameter of the loss. When |x| < scale, the loss is an L2-like quadratic bowl, and when |x| > scale the loss function takes on a different shape according to alpha. Must be a tensorflow tensor or numpy array of single-precision floats. approximate: a bool, where if True, this function returns an approximate and faster form of the loss, as described in the appendix of the paper. This approximation holds well everywhere except as x and alpha approach zero. epsilon: A float that determines how inaccurate the "approximate" version of the loss will be. Larger values are less accurate but more numerically stable. Must be great than single-precision machine epsilon. Returns: The losses for each element of x, in the same shape as x. This is returned as a TensorFlow graph node of single precision floats. """ # `scale` and `alpha` must have the same type as `x`. float_dtype = x.dtype tf.debugging.assert_type(scale, float_dtype) tf.debugging.assert_type(alpha, float_dtype) # `scale` must be > 0. assert_ops = [tf.Assert(tf.reduce_all(tf.greater(scale, 0.)), [scale])] with tf.control_dependencies(assert_ops): # Broadcast `alpha` and `scale` to have the same shape as `x`. alpha = tf.broadcast_to(alpha, tf.shape(x)) scale = tf.broadcast_to(scale, tf.shape(x)) if approximate: # `epsilon` must be greater than single-precision machine epsilon. assert epsilon > np.finfo(np.float32).eps # Compute an approximate form of the loss which is faster, but innacurate # when x and alpha are near zero. b = tf.abs(alpha - tf.cast(2., float_dtype)) + epsilon d = tf.where(tf.greater_equal(alpha, 0.), alpha + epsilon, alpha - epsilon) loss = (b / d) * (tf.pow(tf.square(x / scale) / b + 1., 0.5 * d) - 1.) else: # Compute the exact loss. # This will be used repeatedly. squared_scaled_x = tf.square(x / scale) # The loss when alpha == 2. loss_two = 0.5 * squared_scaled_x # The loss when alpha == 0. loss_zero = util.log1p_safe(0.5 * squared_scaled_x) # The loss when alpha == -infinity. loss_neginf = -tf.math.expm1(-0.5 * squared_scaled_x) # The loss when alpha == +infinity. loss_posinf = util.expm1_safe(0.5 * squared_scaled_x) # The loss when not in one of the above special cases. machine_epsilon = tf.cast(np.finfo(np.float32).eps, float_dtype) # Clamp |2-alpha| to be >= machine epsilon so that it's safe to divide by. beta_safe = tf.maximum(machine_epsilon, tf.abs(alpha - 2.)) # Clamp |alpha| to be >= machine epsilon so that it's safe to divide by. alpha_safe = tf.where(tf.greater_equal(alpha, 0.), tf.ones_like(alpha), -tf.ones_like(alpha)) * tf.maximum( machine_epsilon, tf.abs(alpha)) loss_otherwise = (beta_safe / alpha_safe) * ( tf.pow(squared_scaled_x / beta_safe + 1., 0.5 * alpha) - 1.) # Select which of the cases of the loss to return. loss = tf.where( tf.equal(alpha, -tf.cast(float('inf'), float_dtype)), loss_neginf, tf.where( tf.equal(alpha, 0.), loss_zero, tf.where( tf.equal(alpha, 2.), loss_two, tf.where( tf.equal(alpha, tf.cast(float('inf'), float_dtype)), loss_posinf, loss_otherwise)))) return loss
def polymorphic_distribution_fn(example): action_inputs = input_fn_and_spec[0](example) tf.nest.map_structure( lambda spec, t: tf.Assert(spec.is_compatible_with(t[ 0]), [t]), action_fn_input_spec, action_inputs) return distribution_fn(*action_inputs)
def MyFn(x): with tf.control_dependencies( [tf.Assert(tf.less_equal(x, 10.0), [x])]): return tf.identity(x)
def assert_in_range(x, *, min, max): """Asserts that x is in [min, max] elementwise""" return tf.Assert( tf.logical_and(tf.greater_equal(tf.reduce_min(x), min), tf.less_equal(tf.reduce_max(x), max)), [x])
def _brent(objective_fn, left_bracket, right_bracket, value_at_left_bracket=None, value_at_right_bracket=None, absolute_root_tolerance=2e-7, relative_root_tolerance=None, function_tolerance=2e-7, max_iterations=100, stopping_policy_fn=None, validate_args=False, name=None): r"""Finds root(s) of a function of a single variable using Brent's method. [Brent's method](https://en.wikipedia.org/wiki/Brent%27s_method) is a root-finding algorithm combining the bisection method, the secant method and extrapolation. Like bisection it is guaranteed to converge towards a root if one exists, but that convergence is superlinear and on par with less reliable methods. This implementation is a translation of the algorithm described in the [original article](https://academic.oup.com/comjnl/article/14/4/422/325237). Args: objective_fn: Python callable for which roots are searched. It must be a callable of a single `Tensor` parameter and return a `Tensor` of the same shape and dtype as `left_bracket`. left_bracket: `Tensor` or Python float representing the first starting points. The function will search for roots between each pair of points defined by `left_bracket` and `right_bracket`. The shape of `left_bracket` should match that of the input to `objective_fn`. right_bracket: `Tensor` of the same shape and dtype as `left_bracket` or Python float representing the second starting points. The function will search for roots between each pair of points defined by `left_bracket` and `right_bracket`. This argument must have the same shape as `left_bracket`. value_at_left_bracket: Optional `Tensor` or Pyhon float representing the value of `objective_fn` at `left_bracket`. If specified, this argument must have the same shape as `left_bracket`. If not specified, the value will be evaluated during the search. Default value: None. value_at_right_bracket: Optional `Tensor` or Pyhon float representing the value of `objective_fn` at `right_bracket`. If specified, this argument must have the same shape as `right_bracket`. If not specified, the value will be evaluated during the search. Default value: None. absolute_root_tolerance: Optional `Tensor` representing the absolute tolerance for estimated roots, with the total tolerance being calculated as `(absolute_root_tolerance + relative_root_tolerance * |root|) / 2`. If specified, this argument must be positive, broadcast with the shape of `left_bracket` and have the same dtype. Default value: `2e-7`. relative_root_tolerance: Optional `Tensor` representing the relative tolerance for estimated roots, with the total tolerance being calculated as `(absolute_root_tolerance + relative_root_tolerance * |root|) / 2`. If specified, this argument must be positive, broadcast with the shape of `left_bracket` and have the same dtype. Default value: `None` which translates to `4 * numpy.finfo(left_bracket.dtype.as_numpy_dtype).eps`. function_tolerance: Optional `Tensor` representing the tolerance used to check for roots. If the absolute value of `objective_fn` is smaller than or equal to `function_tolerance` at a given estimate, then that estimate is considered a root for the function. If specified, this argument must broadcast with the shape of `left_bracket` and have the same dtype. Set to zero to match Brent's original algorithm and to continue the search until an exact root is found. Default value: `2e-7`. max_iterations: Optional `Tensor` of an integral dtype or Python integer specifying the maximum number of steps to perform for each initial point. Must broadcast with the shape of `left_bracket`. If an element is set to zero, the function will not search for any root for the corresponding points in `left_bracket` and `right_bracket`. Instead, it will return the best estimate from the inputs. Default value: `100`. stopping_policy_fn: Python `callable` controlling the algorithm termination. It must be a callable accepting a `Tensor` of booleans with the shape of `left_bracket` (each denoting whether the search is finished for each starting point), and returning a scalar boolean `Tensor` (indicating whether the overall search should stop). Typical values are `tf.reduce_all` (which returns only when the search is finished for all pairs of points), and `tf.reduce_any` (which returns as soon as the search is finished for any pair of points). Default value: `None` which translates to `tf.reduce_all`. validate_args: Python `bool` indicating whether to validate arguments such as `left_bracket`, `right_bracket`, `absolute_root_tolerance`, `relative_root_tolerance`, `function_tolerance`, and `max_iterations`. Default value: `False`. name: Python `str` name prefixed to ops created by this function. Returns: brent_results: A Python object containing the following attributes: estimated_root: `Tensor` containing the best estimate explored. If the search was successful within the specified tolerance, this estimate is a root of the objective function. objective_at_estimated_root: `Tensor` containing the value of the objective function at `estimated_root`. If the search was successful within the specified tolerance, then this is close to 0. It has the same dtype and shape as `estimated_root`. num_iterations: `Tensor` containing the number of iterations performed. It has the same dtype as `max_iterations` and shape as `estimated_root`. converged: Scalar boolean `Tensor` indicating whether `estimated_root` is a root within the tolerance specified for the search. It has the same shape as `estimated_root`. Raises: ValueError: if the `stopping_policy_fn` is not callable. """ with tf.name_scope(name, "brent_root", [ left_bracket, right_bracket, value_at_left_bracket, value_at_right_bracket, max_iterations ]): state, params, constants = _prepare_brent_args( objective_fn, left_bracket, right_bracket, value_at_left_bracket, value_at_right_bracket, absolute_root_tolerance, relative_root_tolerance, function_tolerance, max_iterations, stopping_policy_fn) assertions = [] if validate_args: assertions += [ tf.Assert( tf.reduce_all( state.value_at_last_estimate * state.value_at_best_estimate <= constants.zero_value), [state.value_at_last_estimate, state.value_at_best_estimate]), tf.Assert( tf.reduce_all(params.absolute_root_tolerance > constants.zero), [params.absolute_root_tolerance]), tf.Assert( tf.reduce_all(params.relative_root_tolerance > constants.zero), [params.relative_root_tolerance]), tf.Assert( tf.reduce_all(params.function_tolerance >= constants.zero), [params.function_tolerance]), tf.Assert( tf.reduce_all(params.max_iterations >= state.num_iterations), [params.max_iterations]), ] with tf.control_dependencies(assertions): result = tf.while_loop( # Negate `_should_stop` to determine if the search should continue. # This means, in particular, that tf.reduce_*all* will return only # when the search is finished for *all* starting points. lambda loop_vars: ~_should_stop(loop_vars, params.stopping_policy_fn), lambda state: _brent_loop_body(state, params, constants), loop_vars=[state]) state = result[0] converged = tf.math.abs(state.value_at_best_estimate) <= function_tolerance return BrentResults( estimated_root=state.best_estimate, objective_at_estimated_root=state.value_at_best_estimate, num_iterations=state.num_iterations, converged=converged)
def _project_distribution(self, supports, weights, target_support, validate_args=False): """Projects a batch of (support, weights) onto target_support. Based on equation (7) in (Bellemare et al., 2017): https://arxiv.org/abs/1707.06887 In the rest of the comments we will refer to this equation simply as Eq7. This code is not easy to digest, so we will use a running example to clarify what is going on, with the following sample inputs: * supports = [[0, 2, 4, 6, 8], [1, 3, 4, 5, 6]] * weights = [[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.2, 0.5, 0.1, 0.1]] * target_support = [4, 5, 6, 7, 8] In the code below, comments preceded with 'Ex:' will be referencing the above values. Args: supports: Tensor of shape (batch_size, num_dims) defining supports for the distribution. weights: Tensor of shape (batch_size, num_dims) defining weights on the original support points. Although for the CategoricalDQN agent these weights are probabilities, it is not required that they are. target_support: Tensor of shape (num_dims) defining support of the projected distribution. The values must be monotonically increasing. Vmin and Vmax will be inferred from the first and last elements of this tensor, respectively. The values in this tensor must be equally spaced. validate_args: Whether we will verify the contents of the target_support parameter. Returns: A Tensor of shape (batch_size, num_dims) with the projection of a batch of (support, weights) onto target_support. Raises: ValueError: If target_support has no dimensions, or if shapes of supports, weights, and target_support are incompatible. """ target_support_deltas = target_support[1:] - target_support[:-1] # delta_z = `\Delta z` in Eq7. delta_z = target_support_deltas[0] validate_deps = [] supports.shape.assert_is_compatible_with(weights.shape) supports[0].shape.assert_is_compatible_with(target_support.shape) target_support.shape.assert_has_rank(1) if validate_args: # Assert that supports and weights have the same shapes. validate_deps.append( tf.Assert( tf.reduce_all( tf.equal(tf.shape(supports), tf.shape(weights))), [supports, weights])) # Assert that elements of supports and target_support have the same shape. validate_deps.append( tf.Assert( tf.reduce_all( tf.equal( tf.shape(supports)[1], tf.shape(target_support))), [supports, target_support])) # Assert that target_support has a single dimension. validate_deps.append( tf.Assert(tf.equal(tf.size(tf.shape(target_support)), 1), [target_support])) # Assert that the target_support is monotonically increasing. validate_deps.append( tf.Assert(tf.reduce_all(target_support_deltas > 0), [target_support])) # Assert that the values in target_support are equally spaced. validate_deps.append( tf.Assert( tf.reduce_all(tf.equal(target_support_deltas, delta_z)), [target_support])) with tf.control_dependencies(validate_deps): # Ex: `v_min, v_max = 4, 8`. v_min, v_max = target_support[0], target_support[-1] # Ex: `batch_size = 2`. batch_size = tf.shape(supports)[0] # `N` in Eq7. # Ex: `num_dims = 5`. num_dims = tf.shape(target_support)[0] # clipped_support = `[\hat{T}_{z_j}]^{V_max}_{V_min}` in Eq7. # Ex: `clipped_support = [[[ 4. 4. 4. 6. 8.]] # [[ 4. 4. 4. 5. 6.]]]`. clipped_support = tf.clip_by_value(supports, v_min, v_max)[:, None, :] # Ex: `tiled_support = [[[[ 4. 4. 4. 6. 8.] # [ 4. 4. 4. 6. 8.] # [ 4. 4. 4. 6. 8.] # [ 4. 4. 4. 6. 8.] # [ 4. 4. 4. 6. 8.]] # [[ 4. 4. 4. 5. 6.] # [ 4. 4. 4. 5. 6.] # [ 4. 4. 4. 5. 6.] # [ 4. 4. 4. 5. 6.] # [ 4. 4. 4. 5. 6.]]]]`. tiled_support = tf.tile([clipped_support], [1, 1, num_dims, 1]) # Ex: `reshaped_target_support = [[[ 4.] # [ 5.] # [ 6.] # [ 7.] # [ 8.]] # [[ 4.] # [ 5.] # [ 6.] # [ 7.] # [ 8.]]]`. reshaped_target_support = tf.tile(target_support[:, None], [batch_size, 1]) reshaped_target_support = tf.reshape(reshaped_target_support, [batch_size, num_dims, 1]) # numerator = `|clipped_support - z_i|` in Eq7. # Ex: `numerator = [[[[ 0. 0. 0. 2. 4.] # [ 1. 1. 1. 1. 3.] # [ 2. 2. 2. 0. 2.] # [ 3. 3. 3. 1. 1.] # [ 4. 4. 4. 2. 0.]] # [[ 0. 0. 0. 1. 2.] # [ 1. 1. 1. 0. 1.] # [ 2. 2. 2. 1. 0.] # [ 3. 3. 3. 2. 1.] # [ 4. 4. 4. 3. 2.]]]]`. numerator = tf.abs(tiled_support - reshaped_target_support) quotient = 1 - (numerator / delta_z) # clipped_quotient = `[1 - numerator / (\Delta z)]_0^1` in Eq7. # Ex: `clipped_quotient = [[[[ 1. 1. 1. 0. 0.] # [ 0. 0. 0. 0. 0.] # [ 0. 0. 0. 1. 0.] # [ 0. 0. 0. 0. 0.] # [ 0. 0. 0. 0. 1.]] # [[ 1. 1. 1. 0. 0.] # [ 0. 0. 0. 1. 0.] # [ 0. 0. 0. 0. 1.] # [ 0. 0. 0. 0. 0.] # [ 0. 0. 0. 0. 0.]]]]`. clipped_quotient = tf.clip_by_value(quotient, 0, 1) # Ex: `weights = [[ 0.1 0.6 0.1 0.1 0.1] # [ 0.1 0.2 0.5 0.1 0.1]]`. weights = weights[:, None, :] # inner_prod = `\sum_{j=0}^{N-1} clipped_quotient * p_j(x', \pi(x'))` # in Eq7. # Ex: `inner_prod = [[[[ 0.1 0.6 0.1 0. 0. ] # [ 0. 0. 0. 0. 0. ] # [ 0. 0. 0. 0.1 0. ] # [ 0. 0. 0. 0. 0. ] # [ 0. 0. 0. 0. 0.1]] # [[ 0.1 0.2 0.5 0. 0. ] # [ 0. 0. 0. 0.1 0. ] # [ 0. 0. 0. 0. 0.1] # [ 0. 0. 0. 0. 0. ] # [ 0. 0. 0. 0. 0. ]]]]`. inner_prod = clipped_quotient * weights # Ex: `projection = [[ 0.8 0.0 0.1 0.0 0.1] # [ 0.8 0.1 0.1 0.0 0.0]]`. projection = tf.reduce_sum(inner_prod, 3) projection = tf.reshape(projection, [batch_size, num_dims]) return projection
def __call__(self, t, x): with tf.device("/cpu:0"): check = tf.Assert(self.ph, [t]) with tf.control_dependencies([check]): y = tf.identity(x) return y
def detection_targets_graph(proposals, gt_class_ids, gt_boxes, config): """Generates detection targets for one image. Subsamples proposals and generates target class IDs, bounding box deltas for each. Inputs: proposals: [N, (y1, x1, y2, x2)] in normalized coordinates. Might be zero padded if there are not enough proposals. gt_class_ids: [MAX_GT_INSTANCES] int class IDs gt_boxes: [MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized coordinates. Returns: Target ROIs and corresponding class IDs, bounding box shifts rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized coordinates class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs. Zero padded. deltas: [TRAIN_ROIS_PER_IMAGE, NUM_CLASSES, (dy, dx, log(dh), log(dw))] Class-specific bbox refinments. Note: Returned arrays might be zero padded if not enough target ROIs. """ # Assertions asserts = [ tf.Assert(tf.greater(tf.shape(proposals)[0], 0), [proposals], name="roi_assertion"), ] with tf.control_dependencies(asserts): proposals = tf.identity(proposals) # Remove zero padding proposals, _ = trim_zeros_graph(proposals, name="trim_proposals") gt_boxes, non_zeros = trim_zeros_graph(gt_boxes, name="trim_gt_boxes") gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros, name="trim_gt_class_ids") # Handle COCO crowds # A crowd box in COCO is a bounding box around several instances. Exclude # them from training. A crowd box is given a negative class ID. crowd_ix = tf.where(gt_class_ids < 0)[:, 0] non_crowd_ix = tf.where(gt_class_ids > 0)[:, 0] crowd_boxes = tf.gather(gt_boxes, crowd_ix) gt_class_ids = tf.gather(gt_class_ids, non_crowd_ix) gt_boxes = tf.gather(gt_boxes, non_crowd_ix) # Compute overlaps matrix [proposals, gt_boxes] overlaps = overlaps_graph(proposals, gt_boxes) # Compute overlaps with crowd boxes [anchors, crowds] crowd_overlaps = overlaps_graph(proposals, crowd_boxes) crowd_iou_max = tf.reduce_max(crowd_overlaps, axis=1) no_crowd_bool = (crowd_iou_max < 0.001) # Determine postive and negative ROIs roi_iou_max = tf.reduce_max(overlaps, axis=1) # 1. Positive ROIs are those with >= 0.5 IoU with a GT box positive_roi_bool = (roi_iou_max >= 0.5) positive_indices = tf.where(positive_roi_bool)[:, 0] # 2. Negative ROIs are those with < 0.5 with every GT box. Skip crowds. negative_indices = tf.where( tf.logical_and(roi_iou_max < 0.5, no_crowd_bool))[:, 0] # Subsample ROIs. Aim for 33% positive # Positive ROIs positive_count = int(config.TRAIN_ROIS_PER_IMAGE * config.ROI_POSITIVE_RATIO) positive_indices = tf.random_shuffle(positive_indices)[:positive_count] positive_count = tf.shape(positive_indices)[0] # Negative ROIs. Add enough to maintain positive:negative ratio. r = 1.0 / config.ROI_POSITIVE_RATIO negative_count = tf.cast(r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count negative_indices = tf.random_shuffle(negative_indices)[:negative_count] # Gather selected ROIs positive_rois = tf.gather(proposals, positive_indices) negative_rois = tf.gather(proposals, negative_indices) # Assign positive ROIs to GT boxes. positive_overlaps = tf.gather(overlaps, positive_indices) roi_gt_box_assignment = tf.argmax(positive_overlaps, axis=1) roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment) roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment) # Compute bbox refinement for positive ROIs deltas = KerasRFCN.Utils.box_refinement_graph(positive_rois, roi_gt_boxes) deltas /= config.BBOX_STD_DEV # Append negative ROIs and pad bbox deltas and masks that # are not used for negative ROIs with zeros. rois = tf.concat([positive_rois, negative_rois], axis=0) N = tf.shape(negative_rois)[0] P = tf.maximum(config.TRAIN_ROIS_PER_IMAGE - tf.shape(rois)[0], 0) rois = tf.pad(rois, [(0, P), (0, 0)]) roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)]) roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)]) deltas = tf.pad(deltas, [(0, N + P), (0, 0)]) return rois, roi_gt_class_ids, deltas
def extract_features(self, preprocessed_inputs, init_extraction=False): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ if init_extraction: preprocessed_inputs.get_shape().assert_has_rank(4) shape_assert = tf.Assert( tf.logical_and( tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33), tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)), ['image size must at least be 33 in both height and width.']) with tf.control_dependencies([shape_assert]): with slim.arg_scope(self._conv_hyperparams): with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: _, image_features = mobilenet_v1.mobilenet_v1_base( preprocessed_inputs, final_endpoint='Conv2d_13_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope) feature_head = image_features['Conv2d_13_pointwise'] feature_head = slim.conv2d( feature_head, 512, [3, 3], stride=1, padding='SAME', scope='Conv2d_Append_1x1_256') feature_head = tf.nn.avg_pool( feature_head, strides=[1, 1, 1, 1], ksize=[1, 4, 4, 1], padding='VALID', ) return feature_head else: preprocessed_inputs.get_shape().assert_has_rank(4) shape_assert = tf.Assert( tf.logical_and( tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33), tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)), ['image size must at least be 33 in both height and width.']) bottomup_features_names = [ 'Conv2d_11_pointwise', 'Conv2d_13_pointwise' ] num_appended_layers = 0 #appended_channel_num = [512, 256, 256, 256] appended_channel_num = [512] with tf.control_dependencies([shape_assert]): with slim.arg_scope(self._conv_hyperparams): with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: _, image_features = mobilenet_v1.mobilenet_v1_base( preprocessed_inputs, final_endpoint='Conv2d_13_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope) topdown_features = self._topdown_feature_maps( image_features, bottomup_features_names=bottomup_features_names, num_appended_layers=num_appended_layers, appended_channel_num=appended_channel_num) return topdown_features.values()
def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5): """Performs box voting as described in S. Gidaris and N. Komodakis, ICCV 2015. Performs box voting as described in 'Object detection via a multi-region & semantic segmentation-aware CNN model', Gidaris and Komodakis, ICCV 2015. For each box 'B' in selected_boxes, we find the set 'S' of boxes in pool_boxes with iou overlap >= iou_thresh. The location of B is set to the weighted average location of boxes in S (scores are used for weighting). And the score of B is set to the average score of boxes in S. Args: selected_boxes: BoxList containing a subset of boxes in pool_boxes. These boxes are usually selected from pool_boxes using non max suppression. pool_boxes: BoxList containing a set of (possibly redundant) boxes. iou_thresh: (float scalar) iou threshold for matching boxes in selected_boxes and pool_boxes. Returns: BoxList containing averaged locations and scores for each box in selected_boxes. Raises: ValueError: if a) selected_boxes or pool_boxes is not a BoxList. b) if iou_thresh is not in [0, 1]. c) pool_boxes does not have a scores field. """ if not 0.0 <= iou_thresh <= 1.0: raise ValueError('iou_thresh must be between 0 and 1') if not isinstance(selected_boxes, box_list.BoxList): raise ValueError('selected_boxes must be a BoxList') if not isinstance(pool_boxes, box_list.BoxList): raise ValueError('pool_boxes must be a BoxList') if not pool_boxes.has_field('scores'): raise ValueError('pool_boxes must have a \'scores\' field') iou_ = iou(selected_boxes, pool_boxes) match_indicator = tf.to_float(tf.greater(iou_, iou_thresh)) num_matches = tf.reduce_sum(match_indicator, 1) # TODO: Handle the case where some boxes in selected_boxes do not # match to any boxes in pool_boxes. For such boxes without any matches, we # should return the original boxes without voting. match_assert = tf.Assert(tf.reduce_all(tf.greater(num_matches, 0)), [ 'Each box in selected_boxes must match with at least one box ' 'in pool_boxes.' ]) scores = tf.expand_dims(pool_boxes.get_field('scores'), 1) scores_assert = tf.Assert(tf.reduce_all(tf.greater_equal(scores, 0)), ['Scores must be non negative.']) with tf.control_dependencies([scores_assert, match_assert]): sum_scores = tf.matmul(match_indicator, scores) averaged_scores = tf.reshape(sum_scores, [-1]) / num_matches box_locations = tf.matmul(match_indicator, pool_boxes.get() * scores) / sum_scores averaged_boxes = box_list.BoxList(box_locations) _copy_extra_fields(averaged_boxes, selected_boxes) averaged_boxes.add_field('scores', averaged_scores) return averaged_boxes
def detection_targets_graph(proposals, gt_class_ids, gt_boxes, gt_masks, config): asserts = [ tf.Assert(tf.greater(tf.shape(proposals)[0], 0), [proposals], name="roi_assertion"), ] with tf.control_dependencies(asserts): proposals = tf.identity(proposals) # 移除之前获得的padding的部分 proposals, _ = trim_zeros_graph(proposals, name="trim_proposals") gt_boxes, non_zeros = trim_zeros_graph(gt_boxes, name="trim_gt_boxes") gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros, name="trim_gt_class_ids") gt_masks = tf.gather(gt_masks, tf.where(non_zeros)[:, 0], axis=2, name="trim_gt_masks") # Handle COCO crowds # A crowd box in COCO is a bounding box around several instances. Exclude # them from training. A crowd box is given a negative class ID. crowd_ix = tf.where(gt_class_ids < 0)[:, 0] non_crowd_ix = tf.where(gt_class_ids > 0)[:, 0] crowd_boxes = tf.gather(gt_boxes, crowd_ix) gt_class_ids = tf.gather(gt_class_ids, non_crowd_ix) gt_boxes = tf.gather(gt_boxes, non_crowd_ix) gt_masks = tf.gather(gt_masks, non_crowd_ix, axis=2) # 计算建议框和所有真实框的重合程度 [proposals, gt_boxes] overlaps = overlaps_graph(proposals, gt_boxes) # 计算和 crowd boxes 的重合程度 [proposals, crowd_boxes] crowd_overlaps = overlaps_graph(proposals, crowd_boxes) crowd_iou_max = tf.reduce_max(crowd_overlaps, axis=1) no_crowd_bool = (crowd_iou_max < 0.001) # Determine positive and negative ROIs roi_iou_max = tf.reduce_max(overlaps, axis=1) # 1. 正样本建议框和真实框的重合程度大于0.5 positive_roi_bool = (roi_iou_max >= 0.5) positive_indices = tf.where(positive_roi_bool)[:, 0] # 2. 负样本建议框和真实框的重合程度小于0.5,Skip crowds. negative_indices = tf.where(tf.logical_and(roi_iou_max < 0.5, no_crowd_bool))[:, 0] # Subsample ROIs. Aim for 33% positive # 进行正负样本的平衡 # 取出最大33%的正样本 positive_count = int(config.TRAIN_ROIS_PER_IMAGE * config.ROI_POSITIVE_RATIO) positive_indices = tf.random_shuffle(positive_indices)[:positive_count] positive_count = tf.shape(positive_indices)[0] # 保持正负样本比例 r = 1.0 / config.ROI_POSITIVE_RATIO negative_count = tf.cast(r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count negative_indices = tf.random_shuffle(negative_indices)[:negative_count] # 获得正样本和负样本 positive_rois = tf.gather(proposals, positive_indices) negative_rois = tf.gather(proposals, negative_indices) # 获取建议框和真实框重合程度 positive_overlaps = tf.gather(overlaps, positive_indices) # 判断是否有真实框 roi_gt_box_assignment = tf.cond( tf.greater(tf.shape(positive_overlaps)[1], 0), true_fn = lambda: tf.argmax(positive_overlaps, axis=1), false_fn = lambda: tf.cast(tf.constant([]),tf.int64) ) # 找到每一个建议框对应的真实框和种类 roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment) roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment) # 解码获得网络应该有得预测结果 deltas = utils.box_refinement_graph(positive_rois, roi_gt_boxes) deltas /= config.BBOX_STD_DEV # 切换mask的形式[N, height, width, 1] transposed_masks = tf.expand_dims(tf.transpose(gt_masks, [2, 0, 1]), -1) # 取出对应的层 roi_masks = tf.gather(transposed_masks, roi_gt_box_assignment) # Compute mask targets boxes = positive_rois if config.USE_MINI_MASK: # Transform ROI coordinates from normalized image space # to normalized mini-mask space. y1, x1, y2, x2 = tf.split(positive_rois, 4, axis=1) gt_y1, gt_x1, gt_y2, gt_x2 = tf.split(roi_gt_boxes, 4, axis=1) gt_h = gt_y2 - gt_y1 gt_w = gt_x2 - gt_x1 y1 = (y1 - gt_y1) / gt_h x1 = (x1 - gt_x1) / gt_w y2 = (y2 - gt_y1) / gt_h x2 = (x2 - gt_x1) / gt_w boxes = tf.concat([y1, x1, y2, x2], 1) box_ids = tf.range(0, tf.shape(roi_masks)[0]) masks = tf.image.crop_and_resize(tf.cast(roi_masks, tf.float32), boxes, box_ids, config.MASK_SHAPE) # Remove the extra dimension from masks. masks = tf.squeeze(masks, axis=3) # 防止resize后的结果不是1或者0 masks = tf.round(masks) # 一般传入config.TRAIN_ROIS_PER_IMAGE个建议框进行训练, # 如果数量不够则padding rois = tf.concat([positive_rois, negative_rois], axis=0) N = tf.shape(negative_rois)[0] P = tf.maximum(config.TRAIN_ROIS_PER_IMAGE - tf.shape(rois)[0], 0) rois = tf.pad(rois, [(0, P), (0, 0)]) roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)]) roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)]) deltas = tf.pad(deltas, [(0, N + P), (0, 0)]) masks = tf.pad(masks, [[0, N + P], (0, 0), (0, 0)]) return rois, roi_gt_class_ids, deltas, masks