def create_optimizer(loss, init_lr, num_train_steps, num_warmup_steps, use_tpu, pretrained_param_names, freeze_pretrained_steps, restart_warmup_after_unfreeze=True, lr_after_restarting=0.): """Creates an optimizer training op.""" global_step = tf.train.get_or_create_global_step() global_steps_int = tf.cast(global_step, tf.int32) num_train_steps_int = tf.constant(num_train_steps, dtype=tf.int32) warmup_steps_int = tf.constant(num_warmup_steps, dtype=tf.int32) current_step_in_decay = global_steps_int - warmup_steps_int num_decay_steps = num_train_steps_int - warmup_steps_int global_steps_float = tf.cast(global_steps_int, tf.float32) if freeze_pretrained_steps and restart_warmup_after_unfreeze: freeze_pretrained_steps_int = tf.cast(freeze_pretrained_steps, tf.int32) global_steps_int -= (tf.cast( global_steps_int >= freeze_pretrained_steps_int, tf.int32) * freeze_pretrained_steps_int) if lr_after_restarting <= 0.: raise ValueError( "Learning rate after restarting should not be zero: " + str(lr_after_restarting)) learning_rate = tf.cond(global_step < freeze_pretrained_steps, lambda: init_lr, lambda: lr_after_restarting) current_step_in_decay = tf.cond( global_step < freeze_pretrained_steps, lambda: current_step_in_decay, lambda: global_steps_int - warmup_steps_int) after_unfreeze_decay_steps = num_train_steps_int - ( freeze_pretrained_steps + warmup_steps_int) num_decay_steps = tf.cond(global_step < freeze_pretrained_steps, lambda: num_decay_steps, lambda: after_unfreeze_decay_steps) after_unfreeze_steps = global_steps_float - tf.cast( freeze_pretrained_steps_int, tf.float32) global_steps_float = tf.cond(global_step < freeze_pretrained_steps, lambda: global_steps_float, lambda: after_unfreeze_steps) tf.summary.scalar( "is pretraining", tf.cast(global_step < freeze_pretrained_steps, tf.int32)) else: learning_rate = tf.constant(value=init_lr, shape=[], dtype=tf.float32) tf.summary.scalar("global step count", global_steps_float) tf.summary.scalar("current base learning rate", learning_rate) tf.summary.scalar("global decay step", current_step_in_decay) tf.summary.scalar("total decay steps", num_decay_steps) # Implements linear decay of the learning rate. learning_rate = tf.train.polynomial_decay(learning_rate, tf.cast(current_step_in_decay, tf.float32), tf.cast(num_decay_steps, tf.float32), end_learning_rate=0.0, power=1.0, cycle=False) tf.summary.scalar("decayed learning rate", learning_rate) # Implements linear warmup. I.e., if global_step < num_warmup_steps, the # learning rate will be `global_step/num_warmup_steps * init_lr`. if num_warmup_steps: warmup_steps_float = tf.cast(warmup_steps_int, tf.float32) warmup_percent_done = global_steps_float / warmup_steps_float tf.summary.scalar("warmup percent done", warmup_percent_done) warmup_learning_rate = learning_rate * warmup_percent_done is_warmup = global_steps_int < warmup_steps_int tf.summary.scalar("is warmup", tf.cast(is_warmup, tf.float32)) learning_rate = tf.cond(is_warmup, lambda: warmup_learning_rate, lambda: learning_rate) tf.summary.scalar("learning rate", learning_rate) # It is recommended that you use this optimizer for fine tuning, since this # is how the model was trained (note that the Adam m/v variables are NOT # loaded from init_checkpoint.) optimizer = AdamWeightDecayOptimizer( learning_rate=learning_rate, weight_decay_rate=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-6, exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"], pretrained_param_names=pretrained_param_names, freeze_pretrained_steps=freeze_pretrained_steps) if use_tpu: optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer) tvars = tf.trainable_variables() grads = tf.gradients(loss, tvars) # This is how the model was pre-trained. (grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0) train_op = optimizer.apply_gradients(list(zip(grads, tvars)), global_step=global_step) # Normally the global step update is done inside of `apply_gradients`. # However, `AdamWeightDecayOptimizer` doesn't do this. But if you use # a different optimizer, you should probably take this line out. new_global_step = global_step + 1 train_op = tf.group(train_op, [global_step.assign(new_global_step)]) return train_op
def train(train_dir, config, dataset_fn, checkpoints_to_keep=5, keep_checkpoint_every_n_hours=1, num_steps=None, master='', num_sync_workers=0, num_ps_tasks=0, task=0): """Train loop.""" tf.gfile.MakeDirs(train_dir) is_chief = (task == 0) if is_chief: _trial_summary(config.hparams, config.train_examples_path or config.tfds_name, train_dir) with tf.Graph().as_default(): with tf.device( tf.train.replica_device_setter(num_ps_tasks, merge_devices=True)): model = config.model model.build(config.hparams, config.data_converter.output_depth, is_training=True) optimizer = model.train(**_get_input_tensors(dataset_fn(), config)) hooks = [] if num_sync_workers: optimizer = tf.train.SyncReplicasOptimizer( optimizer, num_sync_workers) hooks.append(optimizer.make_session_run_hook(is_chief)) grads, var_list = list( zip(*optimizer.compute_gradients(model.loss))) global_norm = tf.global_norm(grads) tf.summary.scalar('global_norm', global_norm) if config.hparams.clip_mode == 'value': g = config.hparams.grad_clip clipped_grads = [ tf.clip_by_value(grad, -g, g) for grad in grads ] elif config.hparams.clip_mode == 'global_norm': clipped_grads = tf.cond( global_norm < config.hparams.grad_norm_clip_to_zero, lambda: tf.clip_by_global_norm( # pylint:disable=g-long-lambda grads, config.hparams.grad_clip, use_norm=global_norm)[0], lambda: [tf.zeros(tf.shape(g)) for g in grads]) else: raise ValueError('Unknown clip_mode: {}'.format( config.hparams.clip_mode)) train_op = optimizer.apply_gradients(list( zip(clipped_grads, var_list)), global_step=model.global_step, name='train_step') logging_dict = { 'global_step': model.global_step, 'loss': model.loss } hooks.append( tf.train.LoggingTensorHook(logging_dict, every_n_iter=100)) if num_steps: hooks.append(tf.train.StopAtStepHook(last_step=num_steps)) scaffold = tf.train.Scaffold(saver=tf.train.Saver( max_to_keep=checkpoints_to_keep, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours)) tf_slim.training.train(train_op=train_op, logdir=train_dir, scaffold=scaffold, hooks=hooks, save_checkpoint_secs=60, master=master, is_chief=is_chief)
def build_learning_rate_schedule( learning_rate, decay_type, warmup_start_epoch, max_learning_rate_epoch, decay_end_epoch, global_step, steps_per_epoch, **decay_type_specific_kwargs): """Build learning rate from base learning rate and other details. We note that warmup_start_epoch <= max_learning_rate_epoch < decay_end_epoch since the warmup happens at the start of learning rate schedule. Args: learning_rate: Learning rate for the model. decay_type: Name of the decay that should be applied to the learning rate. warmup_start_epoch: Epoch at which learning rate warmup starts. max_learning_rate_epoch: Epoch at which learning rate warmup ends and the decay kicks in. decay_end_epoch: Epoch at which learning rate decays ends, at which point learning rate becomes 0. global_step: The global step to use for learning rate computation. steps_per_epoch: Integer which defines the number of steps that are run for every epoch. **decay_type_specific_kwargs: Specific key-word arguments which are unique to a said `decay_type`. Returns: Scalar tensor which stores the learning rate at a given global step. """ if decay_end_epoch == max_learning_rate_epoch: # This stage of training is 0 epochs long, so just return learning_rate and # avoid potential divide by 0 problems. if warmup_start_epoch < max_learning_rate_epoch: raise ValueError( 'Cannot have warmup for a 0-step learning rate schedule.') return learning_rate assert warmup_start_epoch <= max_learning_rate_epoch assert max_learning_rate_epoch < decay_end_epoch max_learning_rate_epoch_tensor = tf.convert_to_tensor(max_learning_rate_epoch) warmup_start_epoch_tensor = tf.convert_to_tensor( warmup_start_epoch, max_learning_rate_epoch_tensor.dtype) decay_end_epoch_tensor = tf.convert_to_tensor( decay_end_epoch, max_learning_rate_epoch_tensor.dtype) steps_per_epoch_tensor = tf.cast(steps_per_epoch, max_learning_rate_epoch_tensor.dtype) # Learning rate decay kicks in starting max_learning_rate_epoch # Before max_learning_rate_epoch either there is a warmup or the learning rate # is set to the constant value of `initial_lr`. learning_rate_step = global_step - tf.cast( max_learning_rate_epoch_tensor * steps_per_epoch_tensor, global_step.dtype) def _no_decay_fn(initial_lr, *args, **kwargs): del args, kwargs return initial_lr decay_type_fn_map = { enums.DecayType.EXPONENTIAL: exponential_decay, enums.DecayType.COSINE: cosine_decay, enums.DecayType.PIECEWISE_LINEAR: piecewise_linear_decay, enums.DecayType.NO_DECAY: _no_decay_fn, } if decay_type not in decay_type_fn_map: raise ValueError(f'Unknown decay type {decay_type}') decayed_learning_rate = decay_type_fn_map[decay_type]( initial_lr=learning_rate, global_step=learning_rate_step, total_epochs=decay_end_epoch_tensor - max_learning_rate_epoch_tensor, steps_per_epoch=steps_per_epoch, **decay_type_specific_kwargs) # The learning rate is set to 0 once global_step is more than total_steps. total_steps = tf.cast( steps_per_epoch_tensor * ( decay_end_epoch_tensor - max_learning_rate_epoch_tensor), global_step.dtype) decayed_learning_rate = tf.cond( learning_rate_step <= total_steps, lambda: decayed_learning_rate, lambda: 0.0) warmup_step_counter = global_step - tf.cast( warmup_start_epoch_tensor * steps_per_epoch_tensor, global_step.dtype) return maybe_add_warmup_to_lr( learning_rate, decayed_learning_rate, warmup_step_counter, max_learning_rate_epoch - warmup_start_epoch_tensor, steps_per_epoch_tensor)
def _buckets(data, bucket_count=None): """Create a TensorFlow op to group data into histogram buckets. Arguments: data: A `Tensor` of any shape. Must be castable to `float64`. bucket_count: Optional positive `int` or scalar `int32` `Tensor`. Returns: A `Tensor` of shape `[k, 3]` and type `float64`. The `i`th row is a triple `[left_edge, right_edge, count]` for a single bucket. The value of `k` is either `bucket_count` or `1` or `0`. """ # TODO(nickfelt): remove on-demand imports once dep situation is fixed. import tensorflow.compat.v1 as tf if bucket_count is None: bucket_count = summary_v2.DEFAULT_BUCKET_COUNT with tf.name_scope("buckets", values=[data, bucket_count]), tf.control_dependencies([ tf.assert_scalar(bucket_count), tf.assert_type(bucket_count, tf.int32) ]): data = tf.reshape(data, shape=[-1]) # flatten data = tf.cast(data, tf.float64) is_empty = tf.equal(tf.size(input=data), 0) def when_empty(): return tf.constant([], shape=(0, 3), dtype=tf.float64) def when_nonempty(): min_ = tf.reduce_min(input_tensor=data) max_ = tf.reduce_max(input_tensor=data) range_ = max_ - min_ is_singular = tf.equal(range_, 0) def when_nonsingular(): bucket_width = range_ / tf.cast(bucket_count, tf.float64) offsets = data - min_ bucket_indices = tf.cast(tf.floor(offsets / bucket_width), dtype=tf.int32) clamped_indices = tf.minimum(bucket_indices, bucket_count - 1) # Use float64 instead of float32 to avoid accumulating floating point error # later in tf.reduce_sum when summing more than 2^24 individual `1.0` values. # See https://github.com/tensorflow/tensorflow/issues/51419 for details. one_hots = tf.one_hot(clamped_indices, depth=bucket_count, dtype=tf.float64) bucket_counts = tf.cast( tf.reduce_sum(input_tensor=one_hots, axis=0), dtype=tf.float64, ) edges = tf.linspace(min_, max_, bucket_count + 1) left_edges = edges[:-1] right_edges = edges[1:] return tf.transpose( a=tf.stack([left_edges, right_edges, bucket_counts])) def when_singular(): center = min_ bucket_starts = tf.stack([center - 0.5]) bucket_ends = tf.stack([center + 0.5]) bucket_counts = tf.stack( [tf.cast(tf.size(input=data), tf.float64)]) return tf.transpose( a=tf.stack([bucket_starts, bucket_ends, bucket_counts])) return tf.cond(is_singular, when_singular, when_nonsingular) return tf.cond(is_empty, when_empty, when_nonempty)
def build_sample_graph(self, input_pianorolls=None, outer_masks=None, total_gibbs_steps=None): """Builds the tf.while_loop based sampling graph. Args: input_pianorolls: Optional input pianorolls override. If None, uses the pianorolls placeholder. outer_masks: Optional input outer_masks override. If None, uses the outer_masks placeholder. total_gibbs_steps: Optional input total_gibbs_steps override. If None, uses the total_gibbs_steps placeholder. Returns: The output op of the graph. """ if input_pianorolls is None: input_pianorolls = self.inputs["pianorolls"] if outer_masks is None: outer_masks = self.inputs["outer_masks"] tt = tf.shape(input_pianorolls)[1] sample_steps = tf.to_float(self.inputs["sample_steps"]) if total_gibbs_steps is None: total_gibbs_steps = self.inputs["total_gibbs_steps"] temperature = self.inputs["temperature"] input_pianorolls = tf.to_float(input_pianorolls) outer_masks = self.make_outer_masks(outer_masks, input_pianorolls) # Calculate total_gibbs_steps as steps * num_instruments if not given. total_gibbs_steps = tf.cond( tf.equal(total_gibbs_steps, 0), lambda: tf.to_float(tt * self.hparams.num_instruments), lambda: tf.to_float(total_gibbs_steps)) # sample_steps is set to total_gibbs_steps if not given. sample_steps = tf.cond(tf.equal(sample_steps, 0), lambda: total_gibbs_steps, lambda: tf.to_float(sample_steps)) def infer_step(pianorolls, step_count): """Called by tf.while_loop, takes a Gibbs step.""" mask_prob = compute_mask_prob_from_yao_schedule( step_count, total_gibbs_steps) # 1 indicates mask out, 0 is not mask. masks = make_bernoulli_masks(tf.shape(pianorolls), mask_prob, outer_masks) logits = self.predict(pianorolls, masks) samples = sample_with_temperature(logits, temperature=temperature) outputs = pianorolls * (1 - masks) + samples * masks check_completion_op = tf.assert_equal( tf.where(tf.equal(tf.reduce_max(masks, axis=2), 1.), tf.reduce_max(outputs, axis=2), tf.reduce_max(pianorolls, axis=2)), 1.) with tf.control_dependencies([check_completion_op]): outputs = tf.identity(outputs) step_count += 1 return outputs, step_count current_step = tf.to_float(self.inputs["current_step"]) # Initializes pianorolls by evaluating the model once to fill in all gaps. logits = self.predict(tf.to_float(input_pianorolls), outer_masks) samples = sample_with_temperature(logits, temperature=temperature) tf.get_variable_scope().reuse_variables() self.samples, current_step = tf.while_loop( lambda samples, current_step: current_step < sample_steps, infer_step, [samples, current_step], shape_invariants=[ tf.TensorShape([None, None, None, None]), tf.TensorShape(None), ], back_prop=False, parallel_iterations=1, name="coco_while") self.samples.set_shape(input_pianorolls.shape) return self.samples
def parse_fn(filename, output_sequence_length=IMAGES_PER_SEQUENCE): """Read data from single files stored in directories. Args: filename: the filename of the set of files to be loaded. output_sequence_length: Length of the output sequence. If less than IMAGES_PER_SEQUENCE, only the first `output_sequence_length` frames will be kept. Returns: A dictionary that maps strings to tf.Tensors of type float32: 'rgb': an RGB image of shape H, W, 3. Each channel value is between 0.0 and 1.0. 'intrinsics': a list of intrinsics values. """ if output_sequence_length > IMAGES_PER_SEQUENCE or output_sequence_length < 1: raise ValueError( 'Invalid output_sequence_length %d: must be within [1, ' '%d].' % (output_sequence_length, IMAGES_PER_SEQUENCE)) image_file = tf.strings.join([filename, '.png']) intrinsics_file = tf.strings.join([filename, '_cam.txt']) mask_file = tf.strings.join([filename, '-fseg.png']) # Read files. encoded_image = tf.io.read_file(image_file) encoded_mask = tf.io.read_file(mask_file) intrinsics_content = tf.io.read_file(intrinsics_file) content_is_empty = tf.math.equal(intrinsics_content, '') filename_matches = tf.strings.regex_full_match( filename, '.*%s$' % KITTI_CORRUPT_FILE) file_is_corrupt = tf.math.logical_and(content_is_empty, filename_matches) intrinsics_content = tf.cond(file_is_corrupt, lambda: KITTI_CORRUPT_FILE_INTRINSICS, lambda: intrinsics_content) # Parse intrinsics data to a tensor representing a 3x3 matrix. intrinsics = tf.strings.split([intrinsics_content], ',').values intrinsics = tf.strings.to_number(intrinsics) intrinsics.set_shape([9]) fx, _, x0, _, fy, y0, _, _, _ = tf.unstack(intrinsics) intrinsics = tf.stack([IMAGE_WIDTH, IMAGE_HEIGHT, fx, fy, x0, y0]) # Decode and normalize images. decoded_image = tf.image.decode_png(encoded_image, channels=3) decoded_image = tf.to_float(decoded_image) * (1 / 255.0) split_image_sequence = tf.split(decoded_image, IMAGES_PER_SEQUENCE, axis=1) decoded_mask = tf.image.decode_png(encoded_mask, channels=3) mask_r, mask_g, mask_b = tf.unstack(tf.to_int32(decoded_mask), axis=-1) # Since TPU does not support images of type uint8, we encode the 3 RGB uint8 # values into one int32 value. mask = mask_r * (256 * 256) + mask_g * 256 + mask_b # All images in our pipeline have 3 dimensions (height, width, channels), so # we add a third dimension to the mask too. mask = tf.expand_dims(mask, -1) split_mask_sequence = tf.split(mask, IMAGES_PER_SEQUENCE, axis=1) return { 'rgb': tf.stack(split_image_sequence[:output_sequence_length]), 'intrinsics': tf.stack([intrinsics] * output_sequence_length), 'mask': tf.stack(split_mask_sequence[:output_sequence_length]), }
def _parse_train_data(self, data): """Parses data for training. Args: data: the decoded tensor dictionary from TfExampleDecoder. Returns: image: image tensor that is preproessed to have normalized value and dimension [output_size[0], output_size[1], 3] labels: a dictionary of tensors used for training. The following describes {key: value} pairs in the dictionary. image_info: a 2D `Tensor` that encodes the information of the image and the applied preprocessing. It is in the format of [[original_height, original_width], [scaled_height, scaled_width], anchor_boxes: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, 4] representing anchor boxes at each level. rpn_score_targets: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, anchors_per_location]. The height_l and width_l represent the dimension of class logits at l-th level. rpn_box_targets: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, anchors_per_location * 4]. The height_l and width_l represent the dimension of bounding box regression output at l-th level. gt_boxes: Groundtruth bounding box annotations. The box is represented in [y1, x1, y2, x2] format. The coordinates are w.r.t the scaled image that is fed to the network. The tennsor is padded with -1 to the fixed dimension [self._max_num_instances, 4]. gt_classes: Groundtruth classes annotations. The tennsor is padded with -1 to the fixed dimension [self._max_num_instances]. gt_masks: groundtrugh masks cropped by the bounding box and resized to a fixed size determined by mask_crop_size. """ classes = data['groundtruth_classes'] boxes = data['groundtruth_boxes'] if self._include_mask: masks = data['groundtruth_instance_masks'] is_crowds = data['groundtruth_is_crowd'] # Skips annotations with `is_crowd` = True. if self._skip_crowd_during_training and self._is_training: num_groundtrtuhs = tf.shape(classes)[0] with tf.control_dependencies([num_groundtrtuhs, is_crowds]): indices = tf.cond( tf.greater(tf.size(is_crowds), 0), lambda: tf.where(tf.logical_not(is_crowds))[:, 0], lambda: tf.cast(tf.range(num_groundtrtuhs), tf.int64)) classes = tf.gather(classes, indices) boxes = tf.gather(boxes, indices) if self._include_mask: masks = tf.gather(masks, indices) # Gets original image and its size. image = data['image'] image_shape = tf.shape(image)[0:2] # Normalizes image with mean and std pixel values. image = input_utils.normalize_image(image) # Flips image randomly during training. if self._aug_rand_hflip: if self._include_mask: image, boxes, masks = input_utils.random_horizontal_flip( image, boxes, masks) else: image, boxes = input_utils.random_horizontal_flip(image, boxes) # Converts boxes from normalized coordinates to pixel coordinates. # Now the coordinates of boxes are w.r.t. the original image. boxes = box_utils.denormalize_boxes(boxes, image_shape) # Resizes and crops image. image, image_info = input_utils.resize_and_crop_image( image, self._output_size, padded_size=input_utils.compute_padded_size( self._output_size, 2**self._max_level), aug_scale_min=self._aug_scale_min, aug_scale_max=self._aug_scale_max) image_height, image_width, _ = image.get_shape().as_list() # Resizes and crops boxes. # Now the coordinates of boxes are w.r.t the scaled image. image_scale = image_info[2, :] offset = image_info[3, :] boxes = input_utils.resize_and_crop_boxes(boxes, image_scale, (image_height, image_width), offset) if self._include_mask: masks = input_utils.resize_and_crop_masks( tf.expand_dims(masks, axis=-1), image_scale, (image_height, image_width), offset) masks = tf.squeeze(masks, axis=-1) # Filters out ground truth boxes that are all zeros. indices = input_utils.get_non_empty_box_indices(boxes) boxes = tf.gather(boxes, indices) classes = tf.gather(classes, indices) if self._include_mask: masks = tf.gather(masks, indices) num_masks = tf.shape(masks)[0] masks = tf.image.crop_and_resize( tf.expand_dims(masks, axis=-1), box_utils.normalize_boxes(boxes, tf.shape(image)[0:2]), box_ind=tf.range(num_masks, dtype=tf.int32), crop_size=[self._mask_crop_size, self._mask_crop_size], method='bilinear') masks = tf.squeeze(masks, axis=-1) # Assigns anchor targets. # Note that after the target assignment, box targets are absolute pixel # offsets w.r.t. the scaled image. input_anchor = anchor.Anchor(self._min_level, self._max_level, self._num_scales, self._aspect_ratios, self._anchor_size, (image_height, image_width)) anchor_labeler = anchor.RpnAnchorLabeler(input_anchor, self._rpn_match_threshold, self._rpn_unmatched_threshold, self._rpn_batch_size_per_im, self._rpn_fg_fraction) rpn_score_targets, rpn_box_targets = anchor_labeler.label_anchors( boxes, tf.cast(tf.expand_dims(classes, axis=-1), dtype=tf.float32)) # If bfloat16 is used, casts input image to tf.bfloat16. if self._use_bfloat16: image = tf.cast(image, dtype=tf.bfloat16) # Packs labels for model_fn outputs. labels = { 'anchor_boxes': input_anchor.multilevel_boxes, 'image_info': image_info, 'rpn_score_targets': rpn_score_targets, 'rpn_box_targets': rpn_box_targets, } labels['gt_boxes'] = input_utils.pad_to_fixed_size( boxes, self._max_num_instances, -1) labels['gt_classes'] = input_utils.pad_to_fixed_size( classes, self._max_num_instances, -1) if self._include_mask: labels['gt_masks'] = input_utils.pad_to_fixed_size( masks, self._max_num_instances, -1) return image, labels
def proposal(*args): return tf.cond( pred=no_crop_check(), true_fn=no_crop_proposal, false_fn=crop_proposal, )
def get_train_ops(loss, tf_variables, train_step, clip_mode=None, grad_bound=None, l2_reg=1e-4, lr_warmup_val=None, lr_warmup_steps=100, lr_init=0.1, lr_dec_start=0, lr_dec_every=10000, lr_dec_rate=0.1, lr_dec_min=None, lr_cosine=False, lr_max=None, lr_min=None, lr_T_0=None, lr_T_mul=None, num_train_batches=None, optim_algo=None, sync_replicas=False, num_aggregate=None, num_replicas=None, get_grad_norms=False, moving_average=None): """ Args: clip_mode: "global", "norm", or None. moving_average: store the moving average of parameters """ if l2_reg > 0: l2_losses = [] for var in tf_variables: l2_losses.append(tf.reduce_sum(var**2)) l2_loss = tf.add_n(l2_losses) loss += l2_reg * l2_loss # loss = loss + 1e-4*l2_loss grads = tf.gradients(loss, tf_variables) grad_norm = tf.global_norm(grads) grad_norms = {} for v, g in zip(tf_variables, grads): if v is None or g is None: continue if isinstance(g, tf.IndexedSlices): grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g.values**2)) else: grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g**2)) if clip_mode is not None: assert grad_bound is not None, "Need grad_bound to clip gradients." if clip_mode == "global": grads, _ = tf.clip_by_global_norm(grads, grad_bound) elif clip_mode == "norm": clipped = [] for g in grads: if isinstance(g, tf.IndexedSlices): c_g = tf.clip_by_norm(g.values, grad_bound) c_g = tf.IndexedSlices(g.indices, c_g) else: c_g = tf.clip_by_norm(g, grad_bound) clipped.append(g) grads = clipped else: raise NotImplementedError("Unknown clip_mode {}".format(clip_mode)) if lr_cosine: assert lr_max is not None, "Need lr_max to use lr_cosine" assert lr_min is not None, "Need lr_min to use lr_cosine" assert lr_T_0 is not None, "Need lr_T_0 to use lr_cosine" assert lr_T_mul is not None, "Need lr_T_mul to use lr_cosine" assert num_train_batches is not None, ("Need num_train_batches to use" " lr_cosine") curr_epoch = train_step // num_train_batches # train step will be calculated by just one batch! last_reset = tf.Variable(0, dtype=tf.int32, trainable=False, name="last_reset") T_i = tf.Variable(lr_T_0, dtype=tf.int32, trainable=False, name="T_i") T_curr = curr_epoch - last_reset def _update(): update_last_reset = tf.assign(last_reset, curr_epoch, use_locking=True) update_T_i = tf.assign(T_i, T_i * lr_T_mul, use_locking=True) with tf.control_dependencies([update_last_reset, update_T_i]): rate = tf.to_float(T_curr) / tf.to_float(T_i) * 3.1415926 lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate)) return lr def _no_update(): rate = tf.to_float(T_curr) / tf.to_float(T_i) * 3.1415926 lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate)) return lr learning_rate = tf.cond(tf.greater_equal(T_curr, T_i), _update, _no_update) else: learning_rate = tf.train.exponential_decay( lr_init, tf.maximum(train_step - lr_dec_start, 0), lr_dec_every, lr_dec_rate, staircase=True) if lr_dec_min is not None: learning_rate = tf.maximum(learning_rate, lr_dec_min) if lr_warmup_val is not None: learning_rate = tf.cond(tf.less(train_step, lr_warmup_steps), lambda: lr_warmup_val, lambda: learning_rate) if optim_algo == "momentum": opt = tf.train.MomentumOptimizer(learning_rate, 0.9, use_locking=True, use_nesterov=True) elif optim_algo == "sgd": opt = tf.train.GradientDescentOptimizer(learning_rate, use_locking=True) elif optim_algo == "adam": opt = tf.train.AdamOptimizer(learning_rate, beta1=0.0, epsilon=1e-3, use_locking=True) else: raise ValueError("Unknown optim_algo {}".format(optim_algo)) if sync_replicas: assert num_aggregate is not None, "Need num_aggregate to sync." assert num_replicas is not None, "Need num_replicas to sync." opt = tf.train.SyncReplicasOptimizer( opt, replicas_to_aggregate=num_aggregate, total_num_replicas=num_replicas, use_locking=True) if moving_average is not None: opt = tf.contrib.opt.MovingAverageOptimizer( opt, average_decay=moving_average) train_op = opt.apply_gradients(zip(grads, tf_variables), global_step=train_step) if get_grad_norms: return train_op, learning_rate, grad_norm, opt, grad_norms else: return train_op, learning_rate, grad_norm, opt
def body(x): x = tf.constant(7) z = tf.constant(20) res = tf.cond(tf.less(x, 10), lambda: tf.add( 10, 20), lambda: tf.square(10)) return tf.multiply(res, x)
def D(data_source, x_real, x_fake, dropout_rate, is_training, reuse=True, print_summary=True): # data_source is a string, either "fake" or "real", which determines whether do to the word # embedding lookup to avoid non-differentiability issues. # discriminator (x -> n + 1 class) with tf.variable_scope('Discriminator', reuse=reuse) as scope: # Embedding layer # Input x has shape [batch_size, 63] where 63 is the sequence length W_embed = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0), name="W_embed") embedded_chars = tf.nn.embedding_lookup(W_embed, x_real) # Add a channel dimension: embedded_char_expanded = tf.expand_dims(embedded_chars, -1) # Output size: [batch_size, sequence_length, embedding_size, 1] print('fake shape is!') print(x_fake.get_shape()) print('embed_char_expand shape is!') print(embedded_char_expanded.get_shape()) # conditional pipeline! def f1(): return embedded_char_expanded def f2(): return x_fake real_or_fake = tf.math.equal('real', data_source) input_x = tf.cond(real_or_fake, f1, f2) print('input_x shape is!') # [batch, seq_len, embed_size, 1] print(input_x.get_shape()) pooled_outputs = [ ] # As per the paper, the pooling layer takes the max of each filter's featuremaps # NOTE: We are using multiple filter sizes as per the paper's specs for i, filter_size in enumerate(filter_sizes): #with tf.name_scope("conv-maxpool-filter_size-"+str(filter_size)): # Define W as the filter matrix (NOTE: different namescope from the W above) # Initialized with truncated normal parameters # The W filter has shape: [height, width, input_channels, output_channels] W = tf.Variable( tf.truncated_normal( [filter_size, embedding_size, 1, num_filters], stddev=0.1)) # Conv layer: valid padding yields output of shape: # [none, sequence_length - filter_size + 1, 1, num_filters] # for dimensions: [none, height, width, channel] # TF document: "(conv2d) has the same type as input and the same outer batch shape." conv = tf.nn.conv2d(input_x, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") # Biase vector: 1d vector with length=number of output channels of conv b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b") # Relu h = tf.nn.relu(tf.nn.bias_add(conv, b), name='relu') lrelu3 = tf.maximum(0.2 * h, h) # TF document: "ksize: The size of the window for each dimension of the input tensor." pooled = tf.nn.max_pool( lrelu3, ksize=[1, sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding="VALID", name="pool") # The output now has size: [none, 1, 1, num_filters] pooled_outputs.append(pooled) num_filters_total = num_filters * len(filter_sizes) h_pool = tf.concat(pooled_outputs, 3) # Concatenate on the forth dimension h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total]) # The output now has shape: [none, num_filters_total] #with tf.name_scope("dropout"): h_drop = tf.nn.dropout(h_pool_flat, rate=dropout_rate) #with tf.name_scope("output"): # Fully connected layer # Matrix multiplication: (none, num_filters_total)x(num_filters_total, num_classes) = (none, num_classes) W = tf.Variable(tf.truncated_normal( [num_filters_total, num_classes + 1], stddev=0.1), name="W") # NOTE: b has dimension of the channels (in this case, num_classes) b = tf.Variable(tf.constant(0.1, shape=[num_classes + 1]), name="b") fc = tf.nn.xw_plus_b(h_drop, W, b, name="scores") #Logits output = tf.nn.softmax(fc) return h_pool_flat, fc, output, real_or_fake
def _dataset_parser(value): """Parse data to a fixed dimension input image and learning targets. Args: value: A dictionary contains an image and groundtruth annotations. Returns: image: Image tensor that is preproessed to have normalized value and fixed dimension [image_size, image_size, 3] cls_targets_dict: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, num_anchors]. The height_l and width_l represent the dimension of class logits at l-th level. box_targets_dict: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, num_anchors * 4]. The height_l and width_l represent the dimension of bounding box regression output at l-th level. num_positives: Number of positive anchors in the image. source_id: Source image id. Default value -1 if the source id is empty in the groundtruth annotation. image_scale: Scale of the proccessed image to the original image. image_info: image information that includes the original height and width, the scale of the proccessed image to the original image, and the scaled height and width. boxes: Groundtruth bounding box annotations. The box is represented in [y1, x1, y2, x2] format. The tennsor is padded with -1 to the fixed dimension [self._max_num_instances, 4]. is_crowds: Groundtruth annotations to indicate if an annotation represents a group of instances by value {0, 1}. The tennsor is padded with 0 to the fixed dimension [self._max_num_instances]. areas: Groundtruth areas annotations. The tennsor is padded with -1 to the fixed dimension [self._max_num_instances]. classes: Groundtruth classes annotations. The tennsor is padded with -1 to the fixed dimension [self._max_num_instances]. """ with tf.name_scope('parser'): data = example_decoder.decode(value) data['groundtruth_is_crowd'] = tf.cond( tf.greater(tf.size(data['groundtruth_is_crowd']), 0), lambda: data['groundtruth_is_crowd'], lambda: tf.zeros_like(data['groundtruth_classes'], dtype=tf.bool)) source_id = data['source_id'] image = data['image'] boxes = data['groundtruth_boxes'] classes = data['groundtruth_classes'] classes = tf.reshape(tf.cast(classes, dtype=tf.float32), [-1, 1]) areas = data['groundtruth_area'] is_crowds = data['groundtruth_is_crowd'] classes = tf.reshape(tf.cast(classes, dtype=tf.float32), [-1, 1]) input_height = tf.shape(image)[0] input_width = tf.shape(image)[1] if params['skip_crowd_during_training'] and self._is_training: indices = tf.where( tf.logical_not(data['groundtruth_is_crowd'])) classes = tf.gather_nd(classes, indices) boxes = tf.gather_nd(boxes, indices) input_processor = DetectionInputProcessor( image, params['image_size'], boxes, classes) input_processor.normalize_image() if self._is_training and params['input_rand_hflip']: input_processor.random_horizontal_flip() if self._is_training: input_processor.set_training_random_scale_factors( params['train_scale_min'], params['train_scale_max']) else: input_processor.set_scale_factors_to_output_size() image = input_processor.resize_and_crop_image() boxes, classes = input_processor.resize_and_crop_boxes() # Assign anchors. (cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors(boxes, classes) source_id = tf.where(tf.equal(source_id, tf.constant('')), '-1', source_id) source_id = tf.string_to_number(source_id) # Pad groundtruth data for evaluation. image_scale = input_processor.image_scale_to_original scaled_height = tf.to_float( input_height) * input_processor.image_scale scaled_width = tf.to_float( input_width) * input_processor.image_scale image_info = tf.stack([ tf.cast(scaled_height, dtype=tf.float32), tf.cast(scaled_width, dtype=tf.float32), image_scale, tf.cast(input_height, dtype=tf.float32), tf.cast(input_width, dtype=tf.float32), ]) boxes *= image_scale is_crowds = tf.cast(is_crowds, dtype=tf.float32) boxes = pad_to_fixed_size(boxes, -1, [self._max_num_instances, 4]) is_crowds = pad_to_fixed_size(is_crowds, 0, [self._max_num_instances, 1]) areas = pad_to_fixed_size(areas, -1, [self._max_num_instances, 1]) classes = pad_to_fixed_size(classes, -1, [self._max_num_instances, 1]) if params['use_bfloat16']: image = tf.cast(image, dtype=tf.bfloat16) return (image, cls_targets, box_targets, num_positives, source_id, image_scale, image_info, boxes, is_crowds, areas, classes)
def compute_total_loss(self, pd_new, pd_old, value_tensor, return_tensor, batch_advantage_norm, policy_old_neg_logprob_tensor, policy_action_tensor): """Defines the total loss function. Args: pd_new: The current policy distribution (a multivariate normal distribution). This policy distribution gets updated in the course of training. pd_old: The old policy distribution that we use during sampling the trajectory (a multivariate normal distribution). value_tensor: The values associated to the rollout trajectory. return_tensor: The return values computed for the rollout trajectory. batch_advantage_norm: The normalized advantage tensor computed for a batch of data. For advantage calculation, we use generalized advantage estimation (GAE) formula. policy_old_neg_logprob_tensor: The negative log probabilities from the policy rollouts. policy_action_tensor: The actions from the policy rollouts. """ # Policy loss ppo_policy_loss_out = ppo_loss.ppo_policy_loss( neg_logprobs_old=policy_old_neg_logprob_tensor, actions=policy_action_tensor, advantages=batch_advantage_norm, dist_new=pd_new, mcts_sampling=self.mcts_sampling_enable) (self.policy_loss, self.approxkl, self.clipfrac, self.policy_ratio) = ppo_policy_loss_out # Value Loss if self._ppo2_enable: self.value_loss = ppo_loss.ppo2_value_loss( value_old=value_tensor, pred_value=self.value_new, returns=return_tensor) else: self.value_loss = ppo_loss.ppo1_value_loss( pred_value=self.value_new, returns=return_tensor) # MSE loss between mean and standard deviations self.mean_mse_loss, self.logstd_mse_loss = ppo_loss.l2_norm_policy_loss( policy_mean=self.mean_new, policy_logstd=self.logstd_new, mcts_mean=self.mean_old, mcts_logstd=self.logstd_old) mcts_dist = distributions.MultiVariateNormalDiag( mean=self.mean_old, logstd=self.logstd_old) policy_dist = distributions.MultiVariateNormalDiag( mean=self.mean_new, logstd=self.logstd_new) self.imitation_kl_divergence = tf.reduce_mean( policy_dist.kl_divergence(mcts_dist)) # Calculate KL divergence and entropy of new distribution self.kl_divergence = tf.reduce_mean(pd_new.kl_divergence(pd_old)) self.entropy = pd_new.entropy() # Calculate entropy loss self.entropy_loss = tf.reduce_mean(self.entropy) # Calulate total loss total_loss_ppo = (self._policy_coeff * self.policy_loss) + ( self._value_coeff * self.value_loss) - (self._entropy_coeff * self.entropy_loss) total_loss_mcts = (self._value_coeff * self.value_loss) + ( self._mse_loss_coeff * (self.imitation_kl_divergence + self.entropy_loss)) self.total_loss = tf.cond(tf.equal(self.mcts_sampling_enable, True), lambda: total_loss_mcts, lambda: total_loss_ppo)
def create_train_op(optimizer, grads_and_vars, max_grad=1.0, mixed_precision=False, gradient_accumulation_steps=1): global_step = tf.train.get_or_create_global_step() if gradient_accumulation_steps > 1: local_step = tf.get_variable(name="local_step", shape=[], dtype=tf.int32, trainable=False, initializer=tf.zeros_initializer) batch_finite = tf.get_variable(name="batch_finite", shape=[], dtype=tf.bool, trainable=False, initializer=tf.ones_initializer) accum_vars = [ tf.get_variable(name=tvar.name.split(":")[0] + "/accum", shape=tvar.shape.as_list(), dtype=tf.float32, trainable=False, initializer=tf.zeros_initializer()) for tvar in tf.trainable_variables() ] reset_step = tf.cast(tf.math.equal( local_step % gradient_accumulation_steps, 0), dtype=tf.bool) local_step = tf.cond( reset_step, lambda: local_step.assign(tf.ones_like(local_step)), lambda: local_step.assign_add(1)) grads_and_vars_and_accums = [(gv[0], gv[1], accum_vars[i]) for i, gv in enumerate(grads_and_vars) if gv[0] is not None] grads, tvars, accum_vars = list(zip(*grads_and_vars_and_accums)) all_are_finite = tf.reduce_all([ tf.reduce_all(tf.is_finite(g)) for g in grads ]) if mixed_precision else tf.constant(True, dtype=tf.bool) batch_finite = tf.cond( reset_step, lambda: batch_finite.assign( tf.math.logical_and(tf.constant(True, dtype=tf.bool), all_are_finite)), lambda: batch_finite.assign( tf.math.logical_and(batch_finite, all_are_finite))) # This is how the model was pre-trained. # ensure global norm is a finite number # to prevent clip_by_global_norm from having a hizzy fit. (clipped_grads, _) = tf.clip_by_global_norm(grads, clip_norm=max_grad) accum_vars = tf.cond( reset_step, lambda: [ accum_vars[i].assign(grad) for i, grad in enumerate(clipped_grads) ], lambda: [ accum_vars[i].assign_add(grad) for i, grad in enumerate(clipped_grads) ]) def update(accum_vars): return optimizer.apply_gradients(list(zip(accum_vars, tvars))) update_step = tf.identity(tf.cast(tf.math.equal( local_step % gradient_accumulation_steps, 0), dtype=tf.bool), name="update_step") update_op = tf.cond(update_step, lambda: update(accum_vars), lambda: tf.no_op()) new_global_step = tf.cond( tf.math.logical_and(update_step, batch_finite), lambda: global_step + 1, lambda: global_step) new_global_step = tf.identity(new_global_step, name='step_update') train_op = tf.group(update_op, [global_step.assign(new_global_step)]) else: grads_and_vars = [(g, v) for g, v in grads_and_vars if g is not None] grads, tvars = list(zip(*grads_and_vars)) all_are_finite = tf.reduce_all([ tf.reduce_all(tf.is_finite(g)) for g in grads ]) if mixed_precision else tf.constant(True, dtype=tf.bool) # This is how the model was pre-trained. # ensure global norm is a finite number # to prevent clip_by_global_norm from having a hizzy fit. (clipped_grads, _) = tf.clip_by_global_norm(grads, clip_norm=max_grad) # 这里不要传入global step,adam内部没有对global step累加 # 而原本adam等tf内置优化器会累加,这样就会造成global step重复增加 train_op = optimizer.apply_gradients(list(zip(clipped_grads, tvars))) new_global_step = tf.cond(all_are_finite, lambda: global_step + 1, lambda: global_step) new_global_step = tf.identity(new_global_step, name='step_update') train_op = tf.group(train_op, [global_step.assign(new_global_step)]) return train_op
def __init__(self, num_unique_documents, vocab_size, num_topics, freqs, embedding_size=128, num_sampled=40, learning_rate=1e-3, lmbda=150.0, alpha=None, power=0.75, batch_size=32, clip_gradients=5.0, **kwargs): device = get_device(**kwargs) _graph = tf.Graph() with _graph.as_default(): with tf.device(device): moving_avgs = tf.train.ExponentialMovingAverage(0.9) self.batch_size = batch_size self.freqs = freqs self.X = tf.placeholder(tf.int32, shape=[None]) self.Y = tf.placeholder(tf.int64, shape=[None]) self.DOC = tf.placeholder(tf.int32, shape=[None]) self.switch_loss = tf.Variable(0, trainable=False) train_labels = tf.reshape(self.Y, [-1, 1]) sampler = tf.nn.fixed_unigram_candidate_sampler( train_labels, num_true=1, num_sampled=num_sampled, unique=True, range_max=vocab_size, distortion=power, unigrams=self.freqs, ) self.word_embedding = tf.Variable( tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0)) self.nce_weights = tf.Variable( tf.truncated_normal( [vocab_size, embedding_size], stddev=tf.sqrt(1 / embedding_size), )) self.nce_biases = tf.Variable(tf.zeros([vocab_size])) scalar = 1 / np.sqrt(num_unique_documents + num_topics) self.doc_embedding = tf.Variable( tf.random_normal( [num_unique_documents, num_topics], mean=0, stddev=50 * scalar, )) self.topic_embedding = tf.get_variable( 'topic_embedding', shape=[num_topics, embedding_size], dtype=tf.float32, initializer=tf.orthogonal_initializer(gain=scalar), ) pivot = tf.nn.embedding_lookup(self.word_embedding, self.X) proportions = tf.nn.embedding_lookup(self.doc_embedding, self.DOC) doc = tf.matmul(proportions, self.topic_embedding) doc_context = doc word_context = pivot context = tf.add(word_context, doc_context) loss_word2vec = tf.reduce_mean( tf.nn.nce_loss( weights=self.nce_weights, biases=self.nce_biases, labels=self.Y, inputs=context, num_sampled=num_sampled, num_classes=vocab_size, num_true=1, sampled_values=sampler, )) self.fraction = tf.Variable(1, trainable=False, dtype=tf.float32) n_topics = self.doc_embedding.get_shape()[1].value log_proportions = tf.nn.log_softmax(self.doc_embedding) if alpha is None: alpha = 1.0 / n_topics loss = (alpha - 1) * log_proportions prior = tf.reduce_sum(loss) loss_lda = lmbda * self.fraction * prior global_step = tf.Variable(0, trainable=False, name='global_step') self.cost = tf.cond( global_step < self.switch_loss, lambda: loss_word2vec, lambda: loss_word2vec + loss_lda, ) loss_avgs_op = moving_avgs.apply( [loss_lda, loss_word2vec, self.cost]) with tf.control_dependencies([loss_avgs_op]): optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate) gvs = optimizer.compute_gradients(self.cost) capped_gvs = [( tf.clip_by_value(grad, -clip_gradients, clip_gradients), var, ) for grad, var in gvs] self.optimizer = optimizer.apply_gradients(capped_gvs) self.sess = generate_session(_graph, **kwargs) self.sess.run(tf.global_variables_initializer())
def decode(self, tf_example_string_tensor): """Decodes serialized tensorflow example and returns a tensor dictionary. Args: tf_example_string_tensor: a string tensor holding a serialized tensorflow example proto. Returns: A dictionary of the following tensors. fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3] containing image. fields.InputDataFields.original_image_spatial_shape - 1D int32 tensor of shape [2] containing shape of the image. fields.InputDataFields.source_id - string tensor containing original image id. fields.InputDataFields.key - string tensor with unique sha256 hash key. fields.InputDataFields.filename - string tensor with original dataset filename. fields.InputDataFields.groundtruth_boxes - 2D float32 tensor of shape [None, 4] containing box corners. fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape [None] containing classes for the boxes. fields.InputDataFields.groundtruth_weights - 1D float32 tensor of shape [None] indicating the weights of groundtruth boxes. fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape [None] containing containing object mask area in pixel squared. fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape [None] indicating if the boxes enclose a crowd. Optional: fields.InputDataFields.groundtruth_image_confidences - 1D float tensor of shape [None] indicating if a class is present in the image (1.0) or a class is not present in the image (0.0). fields.InputDataFields.image_additional_channels - 3D uint8 tensor of shape [None, None, num_additional_channels]. 1st dim is height; 2nd dim is width; 3rd dim is the number of additional channels. fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape [None] indicating if the boxes represent `difficult` instances. fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape [None] indicating if the boxes represent `group_of` instances. fields.InputDataFields.groundtruth_keypoints - 3D float32 tensor of shape [None, num_keypoints, 2] containing keypoints, where the coordinates of the keypoints are ordered (y, x). fields.InputDataFields.groundtruth_keypoint_visibilities - 2D bool tensor of shape [None, num_keypoints] containing keypoint visibilites. fields.InputDataFields.groundtruth_instance_masks - 3D float32 tensor of shape [None, None, None] containing instance masks. fields.InputDataFields.groundtruth_image_classes - 1D int64 of shape [None] containing classes for the boxes. fields.InputDataFields.multiclass_scores - 1D float32 tensor of shape [None * num_classes] containing flattened multiclass scores for groundtruth boxes. fields.InputDataFields.context_features - 1D float32 tensor of shape [context_feature_length * num_context_features] fields.InputDataFields.context_feature_length - int32 tensor specifying the length of each feature in context_features """ serialized_example = tf.reshape(tf_example_string_tensor, shape=[]) decoder = slim_example_decoder.TFExampleDecoder(self.keys_to_features, self.items_to_handlers) keys = decoder.list_items() tensors = decoder.decode(serialized_example, items=keys) tensor_dict = dict(zip(keys, tensors)) is_crowd = fields.InputDataFields.groundtruth_is_crowd tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool) tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3]) tensor_dict[fields.InputDataFields.original_image_spatial_shape] = tf.shape( tensor_dict[fields.InputDataFields.image])[:2] if fields.InputDataFields.image_additional_channels in tensor_dict: channels = tensor_dict[fields.InputDataFields.image_additional_channels] channels = tf.squeeze(channels, axis=3) channels = tf.transpose(channels, perm=[1, 2, 0]) tensor_dict[fields.InputDataFields.image_additional_channels] = channels def default_groundtruth_weights(): return tf.ones( [tf.shape(tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]], dtype=tf.float32) tensor_dict[fields.InputDataFields.groundtruth_weights] = tf.cond( tf.greater( tf.shape( tensor_dict[fields.InputDataFields.groundtruth_weights])[0], 0), lambda: tensor_dict[fields.InputDataFields.groundtruth_weights], default_groundtruth_weights) if fields.InputDataFields.groundtruth_keypoints in tensor_dict: # Set all keypoints that are not labeled to NaN. gt_kpt_fld = fields.InputDataFields.groundtruth_keypoints gt_kpt_vis_fld = fields.InputDataFields.groundtruth_keypoint_visibilities visibilities_tiled = tf.tile( tf.expand_dims(tensor_dict[gt_kpt_vis_fld], -1), [1, 1, 2]) tensor_dict[gt_kpt_fld] = tf.where( visibilities_tiled, tensor_dict[gt_kpt_fld], np.nan * tf.ones_like(tensor_dict[gt_kpt_fld])) if self._expand_hierarchy_labels: input_fields = fields.InputDataFields image_classes, image_confidences = self._expand_image_label_hierarchy( tensor_dict[input_fields.groundtruth_image_classes], tensor_dict[input_fields.groundtruth_image_confidences]) tensor_dict[input_fields.groundtruth_image_classes] = image_classes tensor_dict[input_fields.groundtruth_image_confidences] = ( image_confidences) box_fields = [ fields.InputDataFields.groundtruth_group_of, fields.InputDataFields.groundtruth_is_crowd, fields.InputDataFields.groundtruth_difficult, fields.InputDataFields.groundtruth_area, fields.InputDataFields.groundtruth_boxes, fields.InputDataFields.groundtruth_weights, ] def expand_field(field_name): return self._expansion_box_field_labels( tensor_dict[input_fields.groundtruth_classes], tensor_dict[field_name]) # pylint: disable=cell-var-from-loop for field in box_fields: if field in tensor_dict: tensor_dict[field] = tf.cond( tf.size(tensor_dict[field]) > 0, lambda: expand_field(field), lambda: tensor_dict[field]) # pylint: enable=cell-var-from-loop tensor_dict[input_fields.groundtruth_classes] = ( self._expansion_box_field_labels( tensor_dict[input_fields.groundtruth_classes], tensor_dict[input_fields.groundtruth_classes], True)) if fields.InputDataFields.groundtruth_group_of in tensor_dict: group_of = fields.InputDataFields.groundtruth_group_of tensor_dict[group_of] = tf.cast(tensor_dict[group_of], dtype=tf.bool) if fields.InputDataFields.groundtruth_dp_num_points in tensor_dict: tensor_dict[fields.InputDataFields.groundtruth_dp_num_points] = tf.cast( tensor_dict[fields.InputDataFields.groundtruth_dp_num_points], dtype=tf.int32) tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids] = tf.cast( tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids], dtype=tf.int32) if fields.InputDataFields.groundtruth_track_ids in tensor_dict: tensor_dict[fields.InputDataFields.groundtruth_track_ids] = tf.cast( tensor_dict[fields.InputDataFields.groundtruth_track_ids], dtype=tf.int32) return tensor_dict
def call(self, x): input_image, y_pred, y_true, true_boxes = x # adjust the shape of the y_predict [batch, grid_h, grid_w, 3, 4+1+nb_class] y_pred = tf.reshape( y_pred, tf.concat([tf.shape(input=y_pred)[:3], tf.constant([3, -1])], axis=0)) # initialize the masks object_mask = tf.expand_dims(y_true[..., 4], 4) # the variable to keep track of number of batches processed batch_seen = tf.Variable(0.) # compute grid factor and net factor grid_h = tf.shape(input=y_true)[1] grid_w = tf.shape(input=y_true)[2] grid_factor = tf.reshape(tf.cast([grid_w, grid_h], tf.float32), [1, 1, 1, 1, 2]) net_h = tf.shape(input=input_image)[1] net_w = tf.shape(input=input_image)[2] net_factor = tf.reshape(tf.cast([net_w, net_h], tf.float32), [1, 1, 1, 1, 2]) """ Adjust prediction """ pred_box_xy = (self.cell_grid[:, :grid_h, :grid_w, :, :] + tf.sigmoid(y_pred[..., :2])) # sigma(t_xy) + c_xy pred_box_wh = y_pred[..., 2:4] # t_wh pred_box_conf = tf.expand_dims(tf.sigmoid(y_pred[..., 4]), 4) # adjust confidence pred_box_class = y_pred[..., 5:] # adjust class probabilities """ Adjust ground truth """ true_box_xy = y_true[..., 0:2] # (sigma(t_xy) + c_xy) true_box_wh = y_true[..., 2:4] # t_wh true_box_conf = tf.expand_dims(y_true[..., 4], 4) true_box_class = tf.argmax(input=y_true[..., 5:], axis=-1) """ Compare each predicted box to all true boxes """ # initially, drag all objectness of all boxes to 0 conf_delta = pred_box_conf - 0 # then, ignore the boxes which have good overlap with some true box true_xy = true_boxes[..., 0:2] / grid_factor true_wh = true_boxes[..., 2:4] / net_factor true_wh_half = true_wh / 2. true_mins = true_xy - true_wh_half true_maxes = true_xy + true_wh_half pred_xy = tf.expand_dims(pred_box_xy / grid_factor, 4) pred_wh = tf.expand_dims( tf.exp(pred_box_wh) * self.anchors / net_factor, 4) pred_wh_half = pred_wh / 2. pred_mins = pred_xy - pred_wh_half pred_maxes = pred_xy + pred_wh_half intersect_mins = tf.maximum(pred_mins, true_mins) intersect_maxes = tf.minimum(pred_maxes, true_maxes) intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.) intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] true_areas = true_wh[..., 0] * true_wh[..., 1] pred_areas = pred_wh[..., 0] * pred_wh[..., 1] union_areas = pred_areas + true_areas - intersect_areas iou_scores = tf.truediv(intersect_areas, union_areas) best_ious = tf.reduce_max(input_tensor=iou_scores, axis=4) conf_delta *= tf.expand_dims( tf.cast(best_ious < self.ignore_thresh, dtype=tf.float32), 4) """ Compute some online statistics """ true_xy = true_box_xy / grid_factor true_wh = tf.exp(true_box_wh) * self.anchors / net_factor true_wh_half = true_wh / 2. true_mins = true_xy - true_wh_half true_maxes = true_xy + true_wh_half pred_xy = pred_box_xy / grid_factor pred_wh = tf.exp(pred_box_wh) * self.anchors / net_factor pred_wh_half = pred_wh / 2. pred_mins = pred_xy - pred_wh_half pred_maxes = pred_xy + pred_wh_half intersect_mins = tf.maximum(pred_mins, true_mins) intersect_maxes = tf.minimum(pred_maxes, true_maxes) intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.) intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] true_areas = true_wh[..., 0] * true_wh[..., 1] pred_areas = pred_wh[..., 0] * pred_wh[..., 1] union_areas = pred_areas + true_areas - intersect_areas iou_scores = tf.truediv(intersect_areas, union_areas) iou_scores = object_mask * tf.expand_dims(iou_scores, 4) count = tf.reduce_sum(input_tensor=object_mask) count_noobj = tf.reduce_sum(input_tensor=1 - object_mask) detect_mask = tf.cast((pred_box_conf * object_mask) >= 0.5, dtype=tf.float32) class_mask = tf.expand_dims( tf.cast(tf.equal(tf.argmax(input=pred_box_class, axis=-1), true_box_class), dtype=tf.float32), 4) recall50 = tf.reduce_sum( input_tensor=tf.cast(iou_scores >= 0.5, dtype=tf.float32) * detect_mask * class_mask) / (count + 1e-3) recall75 = tf.reduce_sum( input_tensor=tf.cast(iou_scores >= 0.75, dtype=tf.float32) * detect_mask * class_mask) / (count + 1e-3) avg_iou = tf.reduce_sum(input_tensor=iou_scores) / (count + 1e-3) avg_obj = tf.reduce_sum(input_tensor=pred_box_conf * object_mask) / (count + 1e-3) avg_noobj = tf.reduce_sum(input_tensor=pred_box_conf * (1 - object_mask)) / (count_noobj + 1e-3) avg_cat = tf.reduce_sum(input_tensor=object_mask * class_mask) / (count + 1e-3) """ Warm-up training """ batch_seen = tf.assign_add(batch_seen, 1.) true_box_xy, true_box_wh, xywh_mask = tf.cond( pred=tf.less(batch_seen, self.warmup_batches + 1), true_fn=lambda: [ true_box_xy + (0.5 + self.cell_grid[:, :grid_h, :grid_w, :, :]) * (1 - object_mask), true_box_wh + tf.zeros_like(true_box_wh) * (1 - object_mask), tf.ones_like(object_mask) ], false_fn=lambda: [true_box_xy, true_box_wh, object_mask]) """ Compare each true box to all anchor boxes """ wh_scale = tf.exp(true_box_wh) * self.anchors / net_factor wh_scale = tf.expand_dims( 2 - wh_scale[..., 0] * wh_scale[..., 1], axis=4) # the smaller the box, the bigger the scale xy_delta = xywh_mask * (pred_box_xy - true_box_xy) * wh_scale * self.xywh_scale wh_delta = xywh_mask * (pred_box_wh - true_box_wh) * wh_scale * self.xywh_scale conf_delta = object_mask * ( pred_box_conf - true_box_conf) * self.obj_scale + ( 1 - object_mask) * conf_delta * self.noobj_scale class_delta = object_mask * \ tf.expand_dims(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class), 4) * \ self.class_scale loss_xy = tf.reduce_sum(input_tensor=tf.square(xy_delta), axis=list(range(1, 5))) loss_wh = tf.reduce_sum(input_tensor=tf.square(wh_delta), axis=list(range(1, 5))) loss_conf = tf.reduce_sum(input_tensor=tf.square(conf_delta), axis=list(range(1, 5))) loss_class = tf.reduce_sum(input_tensor=class_delta, axis=list(range(1, 5))) loss = loss_xy + loss_wh + loss_conf + loss_class loss = tf.Print(loss, [grid_h, avg_obj], message='avg_obj \t\t', summarize=1000) loss = tf.Print(loss, [grid_h, avg_noobj], message='avg_noobj \t\t', summarize=1000) loss = tf.Print(loss, [grid_h, avg_iou], message='avg_iou \t\t', summarize=1000) loss = tf.Print(loss, [grid_h, avg_cat], message='avg_cat \t\t', summarize=1000) loss = tf.Print(loss, [grid_h, recall50], message='recall50 \t', summarize=1000) loss = tf.Print(loss, [grid_h, recall75], message='recall75 \t', summarize=1000) loss = tf.Print(loss, [grid_h, count], message='count \t', summarize=1000) loss = tf.Print(loss, [ grid_h, tf.reduce_sum(input_tensor=loss_xy), tf.reduce_sum(input_tensor=loss_wh), tf.reduce_sum(input_tensor=loss_conf), tf.reduce_sum(input_tensor=loss_class) ], message='loss xy, wh, conf, class: \t', summarize=1000) return loss * self.grid_scale
def call(self, y_pred, mask=None): ''' Returns: 3D tensor of shape `(batch_size, top_k, 6)`. The second axis is zero-padded to always yield `top_k` predictions per batch item. The last axis contains the coordinates for each predicted box in the format `[class_id, confidence, xmin, ymin, xmax, ymax]`. ''' ##################################################################################### # 1. Convert the box coordinates from predicted anchor box offsets to predicted # absolute coordinates ##################################################################################### # Extract the predicted class IDs as the indices of the highest confidence values. class_ids = tf.expand_dims(tf.to_float( tf.argmax(y_pred[..., :-12], axis=-1)), axis=-1) # Extract the confidences of the maximal classes. confidences = tf.reduce_max(y_pred[..., :-12], axis=-1, keep_dims=True) # Convert anchor box offsets to image offsets. cx = y_pred[..., -12] * y_pred[..., -4] * y_pred[..., -6] + y_pred[ ..., -8] # cx = cx_pred * cx_variance * w_anchor + cx_anchor cy = y_pred[..., -11] * y_pred[..., -3] * y_pred[..., -5] + y_pred[ ..., -7] # cy = cy_pred * cy_variance * h_anchor + cy_anchor w = tf.exp(y_pred[..., -10] * y_pred[..., -2]) * y_pred[ ..., -6] # w = exp(w_pred * variance_w) * w_anchor h = tf.exp(y_pred[..., -9] * y_pred[..., -1]) * y_pred[ ..., -5] # h = exp(h_pred * variance_h) * h_anchor # Convert 'centroids' to 'corners'. xmin = cx - 0.5 * w ymin = cy - 0.5 * h xmax = cx + 0.5 * w ymax = cy + 0.5 * h # If the model predicts box coordinates relative to the image dimensions and they are supposed # to be converted back to absolute coordinates, do that. def normalized_coords(): xmin1 = tf.expand_dims(xmin * self.tf_img_width, axis=-1) ymin1 = tf.expand_dims(ymin * self.tf_img_height, axis=-1) xmax1 = tf.expand_dims(xmax * self.tf_img_width, axis=-1) ymax1 = tf.expand_dims(ymax * self.tf_img_height, axis=-1) return xmin1, ymin1, xmax1, ymax1 def non_normalized_coords(): return tf.expand_dims(xmin, axis=-1), tf.expand_dims( ymin, axis=-1), tf.expand_dims(xmax, axis=-1), tf.expand_dims(ymax, axis=-1) xmin, ymin, xmax, ymax = tf.cond(self.tf_normalize_coords, normalized_coords, non_normalized_coords) # Concatenate the one-hot class confidences and the converted box coordinates to form the decoded predictions tensor. y_pred = tf.concat( values=[class_ids, confidences, xmin, ymin, xmax, ymax], axis=-1) ##################################################################################### # 2. Perform confidence thresholding, non-maximum suppression, and top-k filtering. ##################################################################################### batch_size = tf.shape(y_pred)[0] # Output dtype: tf.int32 n_boxes = tf.shape(y_pred)[1] n_classes = y_pred.shape[2] - 4 class_indices = tf.range(1, n_classes) # Create a function that filters the predictions for the given batch item. Specifically, it performs: # - confidence thresholding # - non-maximum suppression (NMS) # - top-k filtering def filter_predictions(batch_item): # Keep only the non-background boxes. positive_boxes = tf.not_equal(batch_item[..., 0], 0.0) predictions = tf.boolean_mask(tensor=batch_item, mask=positive_boxes) def perform_confidence_thresholding(): # Apply confidence thresholding. threshold_met = predictions[:, 1] > self.tf_confidence_thresh return tf.boolean_mask(tensor=predictions, mask=threshold_met) def no_positive_boxes(): return tf.constant(value=0.0, shape=(1, 6)) # If there are any positive predictions, perform confidence thresholding. predictions_conf_thresh = tf.cond( tf.equal(tf.size(predictions), 0), no_positive_boxes, perform_confidence_thresholding) def perform_nms(): scores = predictions_conf_thresh[..., 1] # `tf.image.non_max_suppression()` needs the box coordinates in the format `(ymin, xmin, ymax, xmax)`. xmin = tf.expand_dims(predictions_conf_thresh[..., -4], axis=-1) ymin = tf.expand_dims(predictions_conf_thresh[..., -3], axis=-1) xmax = tf.expand_dims(predictions_conf_thresh[..., -2], axis=-1) ymax = tf.expand_dims(predictions_conf_thresh[..., -1], axis=-1) boxes = tf.concat(values=[ymin, xmin, ymax, xmax], axis=-1) maxima_indices = tf.image.non_max_suppression( boxes=boxes, scores=scores, max_output_size=self.tf_nms_max_output_size, iou_threshold=self.iou_threshold, name='non_maximum_suppresion') maxima = tf.gather(params=predictions_conf_thresh, indices=maxima_indices, axis=0) return maxima def no_confident_predictions(): return tf.constant(value=0.0, shape=(1, 6)) # If any boxes made the threshold, perform NMS. predictions_nms = tf.cond( tf.equal(tf.size(predictions_conf_thresh), 0), no_confident_predictions, perform_nms) # Perform top-k filtering for this batch item or pad it in case there are # fewer than `self.top_k` boxes left at this point. Either way, produce a # tensor of length `self.top_k`. By the time we return the final results tensor # for the whole batch, all batch items must have the same number of predicted # boxes so that the tensor dimensions are homogenous. If fewer than `self.top_k` # predictions are left after the filtering process above, we pad the missing # predictions with zeros as dummy entries. def top_k(): return tf.gather(params=predictions_nms, indices=tf.nn.top_k(predictions_nms[:, 1], k=self.tf_top_k, sorted=True).indices, axis=0) def pad_and_top_k(): padded_predictions = tf.pad(tensor=predictions_nms, paddings=[[ 0, self.tf_top_k - tf.shape(predictions_nms)[0] ], [0, 0]], mode='CONSTANT', constant_values=0.0) return tf.gather(params=padded_predictions, indices=tf.nn.top_k(padded_predictions[:, 1], k=self.tf_top_k, sorted=True).indices, axis=0) top_k_boxes = tf.cond( tf.greater_equal(tf.shape(predictions_nms)[0], self.tf_top_k), top_k, pad_and_top_k) return top_k_boxes # Iterate `filter_predictions()` over all batch items. output_tensor = tf.map_fn(fn=lambda x: filter_predictions(x), elems=y_pred, dtype=None, parallel_iterations=128, back_prop=False, swap_memory=False, infer_shape=True, name='loop_over_batch') return output_tensor
def patch_image(image, bboxes=None, offset_height=0, offset_width=0, target_height=None, target_width=None): """Gets a patch using tf.image.crop_to_bounding_box and adjusts bboxes If patching would leave us with zero bboxes, we return the image and bboxes unchanged. Args: image: Float32 Tensor with shape (H, W, 3). bboxes: Tensor with the ground-truth boxes. Shaped (total_boxes, 5). The last element in each box is the category label. offset_height: Height of the upper-left corner of the patch with respect to the original image. Non-negative. offset_width: Width of the upper-left corner of the patch with respect to the original image. Non-negative. target_height: Height of the patch. If set to none, it will be the maximum (tf.shape(image)[0] - offset_height - 1). Positive. target_width: Width of the patch. If set to none, it will be the maximum (tf.shape(image)[1] - offset_width - 1). Positive. Returns: image: Patch of the original image. bboxes: Adjusted bboxes (only those whose centers are inside the patch). The key isn't set if bboxes is None. """ # TODO: make this function safe with respect to senseless inputs (i.e # having an offset_height that's larger than tf.shape(image)[0], etc.) # As of now we only use it inside random_patch, which already makes sure # the arguments are legal. im_shape = tf.shape(image) if target_height is None: target_height = im_shape[0] - offset_height - 1 if target_width is None: target_width = im_shape[1] - offset_width - 1 new_image = tf.image.crop_to_bounding_box( image, offset_height=offset_height, offset_width=offset_width, target_height=target_height, target_width=target_width, ) patch_shape = tf.shape(new_image) # Return if we didn't have bboxes. if bboxes is None: # Resize the patch to the original image's size. This is to make sure # we respect restrictions in image size in the models. new_image_resized = tf.image.resize_images( new_image, im_shape[:2], method=tf.image.ResizeMethod.BILINEAR) return_dict = {"image": new_image_resized} return return_dict # Now we will remove all bboxes whose centers are not inside the cropped # image. # First get the x and y coordinates of the center of each of the # bboxes. bboxes_center_x = tf.reduce_mean( tf.concat( [ # bboxes[:, 0] gets a Tensor with shape (20,). # We do this to get a Tensor with shape (20, 1). bboxes[:, 0:1], bboxes[:, 2:3], ], axis=1, )) bboxes_center_y = tf.reduce_mean(tf.concat( [bboxes[:, 1:2], bboxes[:, 3:4]], axis=1), axis=1) # Now we get a boolean tensor holding for each of the bboxes' centers # wheter they are inside the patch. center_x_is_inside = tf.logical_and( tf.greater(bboxes_center_x, offset_width), tf.less(bboxes_center_x, tf.add(target_width, offset_width))) center_y_is_inside = tf.logical_and( tf.greater(bboxes_center_y, offset_height), tf.less(bboxes_center_y, tf.add(target_height, offset_height))) center_is_inside = tf.logical_and(center_x_is_inside, center_y_is_inside) # Now we mask the bboxes, removing all those whose centers are outside # the patch. masked_bboxes = tf.boolean_mask(bboxes, center_is_inside) # We move the bboxes to the right place, clipping them if # necessary. new_bboxes_unclipped = tf.concat( [ tf.subtract(masked_bboxes[:, 0:1], offset_width), tf.subtract(masked_bboxes[:, 1:2], offset_height), tf.subtract(masked_bboxes[:, 2:3], offset_width), tf.subtract(masked_bboxes[:, 3:4], offset_height), ], axis=1, ) # Finally, we clip the boxes and add back the labels. new_bboxes = tf.concat( [ tf.to_int32( clip_boxes(new_bboxes_unclipped, imshape=patch_shape[:2]), ), masked_bboxes[:, 4:], ], axis=1, ) # Now resize the image to the original size and adjust bboxes accordingly new_image_resized = tf.image.resize_images( new_image, im_shape[:2], method=tf.image.ResizeMethod.BILINEAR) # adjust_bboxes requires height and width values with dtype=float32 new_bboxes_resized = adjust_bboxes( new_bboxes, old_height=tf.to_float(patch_shape[0]), old_width=tf.to_float(patch_shape[1]), new_height=tf.to_float(im_shape[0]), new_width=tf.to_float(im_shape[1]), ) # Finally, set up the return dict, but only update the image and bboxes if # our patch has at least one bbox in it. update_condition = tf.greater_equal(tf.shape(new_bboxes_resized)[0], 1) return_dict = {} return_dict["image"] = tf.cond(update_condition, lambda: new_image_resized, lambda: image) return_dict["bboxes"] = tf.cond(update_condition, lambda: new_bboxes_resized, lambda: bboxes) return return_dict
def filter_predictions(batch_item): # Keep only the non-background boxes. positive_boxes = tf.not_equal(batch_item[..., 0], 0.0) predictions = tf.boolean_mask(tensor=batch_item, mask=positive_boxes) def perform_confidence_thresholding(): # Apply confidence thresholding. threshold_met = predictions[:, 1] > self.tf_confidence_thresh return tf.boolean_mask(tensor=predictions, mask=threshold_met) def no_positive_boxes(): return tf.constant(value=0.0, shape=(1, 6)) # If there are any positive predictions, perform confidence thresholding. predictions_conf_thresh = tf.cond( tf.equal(tf.size(predictions), 0), no_positive_boxes, perform_confidence_thresholding) def perform_nms(): scores = predictions_conf_thresh[..., 1] # `tf.image.non_max_suppression()` needs the box coordinates in the format `(ymin, xmin, ymax, xmax)`. xmin = tf.expand_dims(predictions_conf_thresh[..., -4], axis=-1) ymin = tf.expand_dims(predictions_conf_thresh[..., -3], axis=-1) xmax = tf.expand_dims(predictions_conf_thresh[..., -2], axis=-1) ymax = tf.expand_dims(predictions_conf_thresh[..., -1], axis=-1) boxes = tf.concat(values=[ymin, xmin, ymax, xmax], axis=-1) maxima_indices = tf.image.non_max_suppression( boxes=boxes, scores=scores, max_output_size=self.tf_nms_max_output_size, iou_threshold=self.iou_threshold, name='non_maximum_suppresion') maxima = tf.gather(params=predictions_conf_thresh, indices=maxima_indices, axis=0) return maxima def no_confident_predictions(): return tf.constant(value=0.0, shape=(1, 6)) # If any boxes made the threshold, perform NMS. predictions_nms = tf.cond( tf.equal(tf.size(predictions_conf_thresh), 0), no_confident_predictions, perform_nms) # Perform top-k filtering for this batch item or pad it in case there are # fewer than `self.top_k` boxes left at this point. Either way, produce a # tensor of length `self.top_k`. By the time we return the final results tensor # for the whole batch, all batch items must have the same number of predicted # boxes so that the tensor dimensions are homogenous. If fewer than `self.top_k` # predictions are left after the filtering process above, we pad the missing # predictions with zeros as dummy entries. def top_k(): return tf.gather(params=predictions_nms, indices=tf.nn.top_k(predictions_nms[:, 1], k=self.tf_top_k, sorted=True).indices, axis=0) def pad_and_top_k(): padded_predictions = tf.pad(tensor=predictions_nms, paddings=[[ 0, self.tf_top_k - tf.shape(predictions_nms)[0] ], [0, 0]], mode='CONSTANT', constant_values=0.0) return tf.gather(params=padded_predictions, indices=tf.nn.top_k(padded_predictions[:, 1], k=self.tf_top_k, sorted=True).indices, axis=0) top_k_boxes = tf.cond( tf.greater_equal(tf.shape(predictions_nms)[0], self.tf_top_k), top_k, pad_and_top_k) return top_k_boxes
def main(unused_argv=None): tf.logging.set_verbosity(FLAGS.log) if FLAGS.config is None: raise RuntimeError("No config name specified.") config = utils.get_module("wavenet." + FLAGS.config).Config( FLAGS.train_path) logdir = FLAGS.logdir tf.logging.info("Saving to %s" % logdir) with tf.Graph().as_default(): total_batch_size = FLAGS.total_batch_size assert total_batch_size % FLAGS.worker_replicas == 0 worker_batch_size = total_batch_size / FLAGS.worker_replicas # Run the Reader on the CPU cpu_device = "/job:localhost/replica:0/task:0/cpu:0" if FLAGS.ps_tasks: cpu_device = "/job:worker/cpu:0" with tf.device(cpu_device): inputs_dict = config.get_batch(worker_batch_size) with tf.device( tf.train.replica_device_setter(ps_tasks=FLAGS.ps_tasks, merge_devices=True)): global_step = tf.get_variable( "global_step", [], tf.int32, initializer=tf.constant_initializer(0), trainable=False) # pylint: disable=cell-var-from-loop lr = tf.constant(config.learning_rate_schedule[0]) for key, value in config.learning_rate_schedule.items(): lr = tf.cond(tf.less(global_step, key), lambda: lr, lambda: tf.constant(value)) # pylint: enable=cell-var-from-loop tf.summary.scalar("learning_rate", lr) # build the model graph outputs_dict = config.build(inputs_dict, is_training=True) loss = outputs_dict["loss"] tf.summary.scalar("train_loss", loss) worker_replicas = FLAGS.worker_replicas ema = tf.train.ExponentialMovingAverage(decay=0.9999, num_updates=global_step) opt = tf.train.SyncReplicasOptimizer( tf.train.AdamOptimizer(lr, epsilon=1e-8), worker_replicas, total_num_replicas=worker_replicas, variable_averages=ema, variables_to_average=tf.trainable_variables()) train_op = opt.minimize(loss, global_step=global_step, name="train", colocate_gradients_with_ops=True) session_config = tf.ConfigProto(allow_soft_placement=True) is_chief = (FLAGS.task == 0) local_init_op = opt.chief_init_op if is_chief else opt.local_step_init_op slim.learning.train( train_op=train_op, logdir=logdir, is_chief=is_chief, master=FLAGS.master, number_of_steps=config.num_iters, global_step=global_step, log_every_n_steps=250, local_init_op=local_init_op, save_interval_secs=300, sync_optimizer=opt, session_config=session_config, )
def train(flags): """Training entry point.""" log_dir = flags.log_dir flags.pretrained_model_dir = log_dir log_dir = os.path.join(log_dir, 'train') flags.eval_interval_secs = 0 with tf.Graph().as_default(): global_step = tf.Variable( 0, trainable=False, name='global_step', dtype=tf.int64) global_step_confidence = tf.Variable( 0, trainable=False, name='global_step_confidence', dtype=tf.int64) model = build_model(flags) images_query_pl, labels_query_pl, \ images_support_pl, labels_support_pl = \ build_episode_placeholder(flags) # Augments the input. if flags.dataset == 'cifar10' or flags.dataset == 'cifar100': images_query_pl_aug = data_loader.augment_cifar( images_query_pl, is_training=True) images_support_pl_aug = data_loader.augment_cifar( images_support_pl, is_training=True) elif flags.dataset == 'tinyimagenet': images_query_pl_aug = data_loader.augment_tinyimagenet( images_query_pl, is_training=True) images_support_pl_aug = data_loader.augment_tinyimagenet( images_support_pl, is_training=True) logits, logits_z = build_proto_train_graph( images_query=images_query_pl_aug, images_support=images_support_pl_aug, flags=flags, is_training=True, model=model) # Losses and optimizer ## Classification loss loss_classification = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=tf.one_hot(labels_query_pl, flags.num_classes_train))) # Confidence loss _, top_k_indices = tf.nn.top_k(logits, k=1) pred = tf.squeeze(top_k_indices) incorrect_mask = tf.math.logical_not(tf.math.equal(pred, labels_query_pl)) incorrect_logits_z = tf.boolean_mask(logits_z, incorrect_mask) incorrect_labels_z = tf.boolean_mask(labels_query_pl, incorrect_mask) signal_variance = tf.math.reduce_sum(tf.cast(incorrect_mask, tf.int32)) loss_variance_incorrect = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=incorrect_logits_z, labels=tf.one_hot(incorrect_labels_z, flags.num_classes_train))) loss_variance_zero = 0.0 loss_confidence = tf.cond( tf.greater(signal_variance, 0), lambda: loss_variance_incorrect, lambda: loss_variance_zero) regu_losses = tf.losses.get_regularization_losses() loss = tf.add_n([loss_classification] + regu_losses) # Learning rate if flags.lr_anneal == 'const': learning_rate = flags.init_learning_rate elif flags.lr_anneal == 'pwc': learning_rate = get_pwc_learning_rate(global_step, flags) elif flags.lr_anneal == 'exp': lr_decay_step = flags.number_of_steps // flags.n_lr_decay learning_rate = tf.train.exponential_decay( flags.init_learning_rate, global_step, lr_decay_step, 1.0 / flags.lr_decay_rate, staircase=True) else: raise Exception('Not implemented') # Optimizer optimizer = tf.train.MomentumOptimizer( learning_rate=learning_rate, momentum=0.9) optimizer_confidence = tf.train.MomentumOptimizer( learning_rate=learning_rate, momentum=0.9) train_op = contrib_slim.learning.create_train_op( total_loss=loss, optimizer=optimizer, global_step=global_step, clip_gradient_norm=flags.clip_gradient_norm) variable_variance = [] for v in tf.trainable_variables(): if 'fc_variance' in v.name: variable_variance.append(v) train_op_confidence = contrib_slim.learning.create_train_op( total_loss=loss_confidence, optimizer=optimizer_confidence, global_step=global_step_confidence, clip_gradient_norm=flags.clip_gradient_norm, variables_to_train=variable_variance) tf.summary.scalar('loss', loss) tf.summary.scalar('loss_classification', loss_classification) tf.summary.scalar('loss_variance', loss_confidence) tf.summary.scalar('regu_loss', tf.add_n(regu_losses)) tf.summary.scalar('learning_rate', learning_rate) # Merges all summaries except for pretrain summary = tf.summary.merge( tf.get_collection('summaries', scope='(?!pretrain).*')) # Gets datasets few_shot_data_train, test_dataset, train_dataset = get_train_datasets(flags) # Defines session and logging summary_writer_train = tf.summary.FileWriter(log_dir, flush_secs=1) saver = tf.train.Saver(max_to_keep=1, save_relative_paths=True) print(saver.saver_def.filename_tensor_name) print(saver.saver_def.restore_op_name) # pylint: disable=unused-variable run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() supervisor = tf.train.Supervisor( logdir=log_dir, init_feed_dict=None, summary_op=None, init_op=tf.global_variables_initializer(), summary_writer=summary_writer_train, saver=saver, global_step=global_step, save_summaries_secs=flags.save_summaries_secs, save_model_secs=0) with supervisor.managed_session() as sess: checkpoint_step = sess.run(global_step) if checkpoint_step > 0: checkpoint_step += 1 eval_interval_steps = flags.eval_interval_steps for step in range(checkpoint_step, flags.number_of_steps): # Computes the classification loss using a batch of data. images_query, labels_query,\ images_support, labels_support = \ few_shot_data_train.next_few_shot_batch( query_batch_size_per_task=flags.train_batch_size, num_classes_per_task=flags.num_classes_train, num_supports_per_class=flags.num_shots_train, num_tasks=flags.num_tasks_per_batch) feed_dict = { images_query_pl: images_query.astype(dtype=np.float32), labels_query_pl: labels_query, images_support_pl: images_support.astype(dtype=np.float32), labels_support_pl: labels_support } t_batch = time.time() dt_batch = time.time() - t_batch t_train = time.time() loss, loss_confidence = sess.run([train_op, train_op_confidence], feed_dict=feed_dict) dt_train = time.time() - t_train if step % 100 == 0: summary_str = sess.run(summary, feed_dict=feed_dict) summary_writer_train.add_summary(summary_str, step) summary_writer_train.flush() logging.info('step %d, loss : %.4g, dt: %.3gs, dt_batch: %.3gs', step, loss, dt_train, dt_batch) if float(step) / flags.number_of_steps > 0.5: eval_interval_steps = flags.eval_interval_fine_steps if eval_interval_steps > 0 and step % eval_interval_steps == 0: saver.save(sess, os.path.join(log_dir, 'model'), global_step=step) eval( flags=flags, train_dataset=train_dataset, test_dataset=test_dataset) if float( step ) > 0.5 * flags.number_of_steps + flags.number_of_steps_to_early_stop: break
def _parse_train_data(self, data): """Parses data for training and evaluation.""" classes = data['groundtruth_classes'] boxes = data['groundtruth_boxes'] is_crowds = data['groundtruth_is_crowd'] # Skips annotations with `is_crowd` = True. if self._skip_crowd_during_training and self._is_training: num_groundtrtuhs = tf.shape(classes)[0] with tf.control_dependencies([num_groundtrtuhs, is_crowds]): indices = tf.cond( tf.greater(tf.size(is_crowds), 0), lambda: tf.where(tf.logical_not(is_crowds))[:, 0], lambda: tf.cast(tf.range(num_groundtrtuhs), tf.int64)) classes = tf.gather(classes, indices) boxes = tf.gather(boxes, indices) # Gets original image and its size. image = data['image'] if self._aug_policy: if AUTOAUG_IMPORTED: image, boxes = autoaugment_utils.distort_image_with_autoaugment( image, boxes, self._aug_policy) else: raise ImportError( 'Unable to get autoaugment_utils, likely due ' 'to imcompatability with TF 2.X.') image_shape = tf.shape(image)[0:2] # Normalizes image with mean and std pixel values. image = input_utils.normalize_image(image) # Flips image randomly during training. if self._aug_rand_hflip: image, boxes = input_utils.random_horizontal_flip(image, boxes) # Converts boxes from normalized coordinates to pixel coordinates. # Now the coordinates of boxes are w.r.t. the original image. boxes = box_utils.denormalize_boxes(boxes, image_shape) # Resizes and crops image. image, image_info = input_utils.resize_and_crop_image( image, self._output_size, padded_size=input_utils.compute_padded_size( self._output_size, 2**self._max_level), aug_scale_min=self._aug_scale_min, aug_scale_max=self._aug_scale_max) image_height, image_width, _ = image.get_shape().as_list() # Resizes and crops boxes. # Now the coordinates of boxes are w.r.t the scaled image. image_scale = image_info[2, :] offset = image_info[3, :] boxes = input_utils.resize_and_crop_boxes(boxes, image_scale, image_info[1, :], offset) # Filters out ground truth boxes that are all zeros. indices = box_utils.get_non_empty_box_indices(boxes) boxes = tf.gather(boxes, indices) classes = tf.gather(classes, indices) # Assigns anchor targets. # Note that after the target assignment, box targets are absolute pixel # offsets w.r.t. the scaled image. input_anchor = anchor.Anchor(self._min_level, self._max_level, self._num_scales, self._aspect_ratios, self._anchor_size, (image_height, image_width)) anchor_labeler = anchor.AnchorLabeler(input_anchor, self._match_threshold, self._unmatched_threshold) (cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors( boxes, tf.cast(tf.expand_dims(classes, axis=1), tf.float32)) # If bfloat16 is used, casts input image to tf.bfloat16. if self._use_bfloat16: image = tf.cast(image, dtype=tf.bfloat16) # Packs labels for model_fn outputs. labels = { 'cls_targets': cls_targets, 'box_targets': box_targets, 'anchor_boxes': input_anchor.multilevel_boxes, 'num_positives': num_positives, 'image_info': image_info, } return image, labels
# # This is how the model was pre-trained. #(grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0) // Change 11 clip grads grads_and_vars = [(g, v) for g, v in grads_and_vars if g is not None] grads, tvars = list(zip(*grads_and_vars)) all_are_finite = tf.reduce_all( [tf.reduce_all(tf.is_finite(g)) for g in grads]) if use_fp16 or manual_fp16 else tf.constant(True, dtype=tf.bool) # This is how the model was pre-trained. # ensure global norm is a finite number # to prevent clip_by_global_norm from having a hizzy fit. (clipped_grads, _) = tf.clip_by_global_norm( grads, clip_norm=1.0, use_norm=tf.cond( all_are_finite, lambda: tf.global_norm(grads), lambda: tf.constant(1.0))) #train_op = optimizer.apply_gradients( # list(zip(grads, tvars)), global_step=global_step) // Change 12 apply grads using the cliped grads train_op = optimizer.apply_gradients( list(zip(clipped_grads, tvars)), global_step=global_step) # Normally the global step update is done inside of `apply_gradients`. # However, neither `AdamWeightDecayOptimizer` nor `LAMBOptimizer` do this. # But if you use a different optimizer, you should probably take this line # out. new_global_step = global_step + 1 train_op = tf.group(train_op, [global_step.assign(new_global_step)]) return train_op
def _generate_detections_tf(cls_outputs, box_outputs, anchor_boxes, indices, classes, image_id, image_scale, num_classes, min_score_thresh=0.2, max_boxes_to_draw=50, soft_nms_sigma=0.0, iou_threshold=0.5, use_native_nms=True): """Generates detections with model outputs and anchors. Args: cls_outputs: a numpy array with shape [N, 1], which has the highest class scores on all feature levels. The N is the number of selected top-K total anchors on all levels. (k being MAX_DETECTION_POINTS) box_outputs: a numpy array with shape [N, 4], which stacks box regression outputs on all feature levels. The N is the number of selected top-k total anchors on all levels. (k being MAX_DETECTION_POINTS) anchor_boxes: a numpy array with shape [N, 4], which stacks anchors on all feature levels. The N is the number of selected top-k total anchors on all levels. indices: a numpy array with shape [N], which is the indices from top-k selection. classes: a numpy array with shape [N], which represents the class prediction on all selected anchors from top-k selection. image_id: an integer number to specify the image id. image_scale: a float tensor representing the scale between original image and input image for the detector. It is used to rescale detections for evaluating with the original groundtruth annotations. num_classes: a integer that indicates the number of classes. min_score_thresh: A float representing the threshold for deciding when to remove boxes based on score. max_boxes_to_draw: Max number of boxes to draw. soft_nms_sigma: A scalar float representing the Soft NMS sigma parameter; See Bodla et al, https://arxiv.org/abs/1704.04503). When `soft_nms_sigma=0.0` (which is default), we fall back to standard (hard) NMS. iou_threshold: A float representing the threshold for deciding whether boxes overlap too much with respect to IOU. use_native_nms: a bool that indicates whether to use native nms. Returns: detections: detection results in a tensor with each row representing [image_id, y, x, height, width, score, class] """ anchor_boxes = tf.gather(anchor_boxes, indices) scores = tf.math.sigmoid(cls_outputs) # apply bounding box regression to anchors boxes = decode_box_outputs_tf(tf.transpose(box_outputs, [1, 0]), tf.transpose(anchor_boxes, [1, 0])) def _else(detections, class_id, indices): """Else branch for generating detections.""" boxes_cls = tf.gather(boxes, indices) scores_cls = tf.gather(scores, indices) # Select top-scoring boxes in each class and apply non-maximum suppression # (nms) for boxes in the same class. The selected boxes from each class are # then concatenated for the final detection outputs. if use_native_nms: top_detection_idx, scores_cls = tf.image.non_max_suppression_with_scores( boxes_cls, scores_cls, max_boxes_to_draw, iou_threshold=iou_threshold, score_threshold=min_score_thresh, soft_nms_sigma=soft_nms_sigma) scores_cls = tf.expand_dims(scores_cls, axis=1) boxes_cls = tf.gather(boxes_cls, top_detection_idx) top_detections_cls = tf.concat([boxes_cls, scores_cls], axis=1) else: scores_cls = tf.expand_dims(scores_cls, axis=1) all_detections_cls = tf.concat([boxes_cls, scores_cls], axis=1) top_detection_idx = nms_tf(all_detections_cls, iou_threshold) top_detections_cls = tf.gather(all_detections_cls, top_detection_idx) height = top_detections_cls[:, 2] - top_detections_cls[:, 0] width = top_detections_cls[:, 3] - top_detections_cls[:, 1] top_detections_cls = tf.stack([ top_detections_cls[:, 0] * image_scale, top_detections_cls[:, 1] * image_scale, height * image_scale, width * image_scale, top_detections_cls[:, 4] ], axis=-1) top_detections_cls = tf.stack([ tf.cast(tf.repeat(image_id, tf.size(top_detection_idx)), tf.float32), *tf.unstack(top_detections_cls, 5, axis=1), tf.repeat(class_id + 1.0, tf.size(top_detection_idx)) ], axis=1) detections = tf.concat([detections, top_detections_cls], axis=0) return detections detections = tf.constant([], tf.float32, [0, 7]) for c in range(num_classes): indices_cls = tf.squeeze(tf.where_v2(tf.equal(classes, c)), axis=-1) detections = tf.cond( tf.equal(tf.size(indices), 0), lambda: detections, lambda id=c, id_cls=indices_cls: _else(detections, id, id_cls)) indices_final = tf.argsort(detections[:, -2], direction='DESCENDING') detections = tf.gather(detections, indices_final[:max_boxes_to_draw], name='detection') return detections
def _generate_detections_tf(cls_outputs, box_outputs, anchor_boxes, indices, classes, image_id, image_scale, num_classes, use_native_nms=False): """Generates detections with model outputs and anchors. Args: cls_outputs: a numpy array with shape [N, 1], which has the highest class scores on all feature levels. The N is the number of selected top-K total anchors on all levels. (k being MAX_DETECTION_POINTS) box_outputs: a numpy array with shape [N, 4], which stacks box regression outputs on all feature levels. The N is the number of selected top-k total anchors on all levels. (k being MAX_DETECTION_POINTS) anchor_boxes: a numpy array with shape [N, 4], which stacks anchors on all feature levels. The N is the number of selected top-k total anchors on all levels. indices: a numpy array with shape [N], which is the indices from top-k selection. classes: a numpy array with shape [N], which represents the class prediction on all selected anchors from top-k selection. image_id: an integer number to specify the image id. image_scale: a float tensor representing the scale between original image and input image for the detector. It is used to rescale detections for evaluating with the original groundtruth annotations. num_classes: a integer that indicates the number of classes. use_native_nms: a bool that indicates whether to use native nms. Returns: detections: detection results in a tensor with each row representing [image_id, y, x, height, width, score, class] """ anchor_boxes = tf.gather(anchor_boxes, indices) scores = tf.math.sigmoid(cls_outputs) # apply bounding box regression to anchors boxes = decode_box_outputs_tf(tf.transpose(box_outputs, [1, 0]), tf.transpose(anchor_boxes, [1, 0])) def _else(detections, class_id): """Else branch forr generating detections.""" boxes_cls = tf.gather(boxes, indices) scores_cls = tf.gather(scores, indices) # Select top-scoring boxes in each class and apply non-maximum suppression # (nms) for boxes in the same class. The selected boxes from each class are # then concatenated for the final detection outputs. all_detections_cls = tf.concat( [tf.reshape(boxes_cls, [-1, 4]), scores_cls], axis=1) if use_native_nms: top_detection_idx = tf.image.non_max_suppression( all_detections_cls[:, :4], all_detections_cls[:, 4], MAX_DETECTIONS_PER_IMAGE, iou_threshold=0.5) else: top_detection_idx = nms_tf(all_detections_cls, 0.5) top_detections_cls = tf.gather(all_detections_cls, top_detection_idx) height = top_detections_cls[:, 2] - top_detections_cls[:, 0] width = top_detections_cls[:, 3] - top_detections_cls[:, 1] top_detections_cls = tf.stack([ top_detections_cls[:, 0] * image_scale, top_detections_cls[:, 1] * image_scale, height * image_scale, width * image_scale, top_detections_cls[:, 4] ], axis=-1) top_detections_cls = tf.stack([ tf.cast(tf.repeat(image_id, tf.size(top_detection_idx)), tf.float32), *tf.unstack(top_detections_cls, 5, axis=1), tf.repeat(class_id + 1.0, tf.size(top_detection_idx)) ], axis=1) detections = tf.concat([detections, top_detections_cls], axis=0) return detections detections = tf.constant([], tf.float32, [0, 7]) for c in range(num_classes): indices = tf.where(tf.equal(classes, c)) detections = tf.cond(tf.equal(tf.shape(indices)[0], 0), lambda: detections, lambda class_id=c: _else(detections, class_id)) return tf.identity(detections, name='detection')
def last_value_quantize(self, inputs, per_channel=False, init_min=-6.0, init_max=6.0, name_prefix='FixedValueQuant', reuse=None, is_training=False, num_bits=8, narrow_range=False, relative_quantile=0, freeze=False, quant_delay=False): """Adds a layer that collects quantization ranges as last input ranges. LastValueQuantize creates variables called 'min' and 'max', representing the interval used for quantization and clamping. Args: inputs: a tensor containing values to be quantized. per_channel: (Optional) a boolean specifying whether to use different quantization ranges per output channel. init_min: a float scalar, the initial value for variable min. init_max: a float scalar, the initial value for variable max. name_prefix: name_prefix for created nodes. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. is_training: Whether the op is applied to a training or eval graph. num_bits: Number of bits to use for quantization, must be between 2 and 8. narrow_range: Whether to use the narrow quantization range [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1]. relative_quantile: Specify the location of quantization min and max parameters. relative_quantile = 0 is equivalent to using min and max of input; relative_quantile = 1 set min and max the optimal location assuming the input distribution is uniform. In reality, a good value should be in the range [0 1]. freeze: If True, the min and max variables are calculated once at the begining of training and then freeze. This is used for quantized fine-tuning of a pretrained checkpoint. If False, the min and max are calculated and updated every cycle. quant_delay: The number of global steps after which the fake quantization are turned on. Used for performing fine-tuning experiment without starting from a pre-trained checkpoint. Returns: a tensor containing quantized values. """ with tf.variable_scope( None, default_name=name_prefix, values=[inputs], reuse=reuse) as scope: scope.set_partitioner(None) input_shape = inputs.get_shape() input_dim = len(input_shape) if per_channel: # Only support quantizing 1-, 2- and 4-dimensional tensors. assert input_dim in [1, 2, 4] min_max_shape = [input_shape[-1]] else: min_max_shape = [] min_var = tf.get_variable('min', min_max_shape, tf.float32, initializer=tf.constant_initializer(init_min), trainable=False) max_var = tf.get_variable('max', min_max_shape, tf.float32, initializer=tf.constant_initializer(init_max), trainable=False) if not is_training: return self.delayed_quant( inputs, min_var, max_var, per_channel=per_channel, num_bits=num_bits, narrow_range=narrow_range, quant_delay=None) if per_channel: if input_dim == 2: reduce_dims = [0] elif input_dim == 4: reduce_dims = [0, 1, 2] if num_bits >= 4: quantile = 0 else: quantile = (1.0 / 2.0**(num_bits + 1.0)) * relative_quantile * 100 if per_channel: if input_dim >= 2: batch_min = tfp.stats.percentile( inputs, q=quantile, axis=reduce_dims, name='BatchMin') else: batch_min = inputs else: batch_min = tfp.stats.percentile( inputs, q=quantile, name='BatchMin') if per_channel: if input_dim >= 2: batch_max = tfp.stats.percentile( inputs, q=100 - quantile, axis=reduce_dims, name='BatchMax') else: batch_max = inputs else: batch_max = tfp.stats.percentile( inputs, q=100 - quantile, name='BatchMax') if narrow_range: multiplier = 1.0 else: multiplier = 1.0 + 1.0 / (2.0**(num_bits-1.0) - 1.0) batch_abs_max = tf.maximum(tf.abs(batch_min), tf.abs(batch_max)) if narrow_range: batch_adjusted_min = 0 - batch_abs_max else: multiplier = 1.0 + 1.0 / (2.0**(num_bits-1.0) - 1.0) batch_adjusted_min = 0 - tf.scalar_mul(multiplier, batch_abs_max) batch_abs_max = tf.cast(batch_abs_max, tf.float32) batch_adjusted_min = tf.cast(batch_adjusted_min, tf.float32) if freeze: def make_var_op(var): def f(): return var return f quant_step = common.CreateOrGetQuantizationStep() min_max_assign = tf.less_equal( quant_step, 1, name='MinMaxAssign') min_value = tf.cond(min_max_assign, make_var_op(batch_adjusted_min), make_var_op(min_var), name='AssignMinCond') max_value = tf.cond(min_max_assign, make_var_op(batch_abs_max), make_var_op(max_var), name='AssignMaxCond') else: min_value = batch_adjusted_min max_value = batch_abs_max assign_min = tf.assign(min_var, min_value) assign_max = tf.assign(max_var, max_value) return self.delayed_quant( inputs, assign_min, assign_max, per_channel=per_channel, num_bits=num_bits, narrow_range=narrow_range, quant_delay=quant_delay)
def _match(self, similarity_matrix): """Tries to match each column of the similarity matrix to a row. Args: similarity_matrix: tensor of shape [N, M] representing any similarity metric. Returns: Match object with corresponding matches for each of M columns. """ def _match_when_rows_are_empty(): """Performs matching when the rows of similarity matrix are empty. When the rows are empty, all detections are false positives. So we return a tensor of -1's to indicate that the columns do not match to any rows. Returns: matches: int32 tensor indicating the row each column matches to. """ similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape( similarity_matrix) return -1 * tf.ones([similarity_matrix_shape[1]], dtype=tf.int32) def _match_when_rows_are_non_empty(): """Performs matching when the rows of similarity matrix are non empty. Returns: matches: int32 tensor indicating the row each column matches to. """ # Matches for each column matches = tf.argmax(similarity_matrix, 0, output_type=tf.int32) # Deal with matched and unmatched threshold if self._matched_threshold is not None: # Get logical indices of ignored and unmatched columns as tf.int64 matched_vals = tf.reduce_max(similarity_matrix, 0) below_unmatched_threshold = tf.greater( self._unmatched_threshold, matched_vals) between_thresholds = tf.logical_and( tf.greater_equal(matched_vals, self._unmatched_threshold), tf.greater(self._matched_threshold, matched_vals)) if self._negatives_lower_than_unmatched: matches = self._set_values_using_indicator( matches, below_unmatched_threshold, -1) matches = self._set_values_using_indicator( matches, between_thresholds, -2) else: matches = self._set_values_using_indicator( matches, below_unmatched_threshold, -2) matches = self._set_values_using_indicator( matches, between_thresholds, -1) if self._force_match_for_each_row: similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape( similarity_matrix) force_match_column_ids = tf.argmax(similarity_matrix, 1, output_type=tf.int32) force_match_column_indicators = tf.one_hot( force_match_column_ids, depth=similarity_matrix_shape[1]) force_match_row_ids = tf.argmax(force_match_column_indicators, 0, output_type=tf.int32) force_match_column_mask = tf.cast( tf.reduce_max(force_match_column_indicators, 0), tf.bool) final_matches = tf.where(force_match_column_mask, force_match_row_ids, matches) return final_matches else: return matches if similarity_matrix.shape.is_fully_defined(): if similarity_matrix.shape[0].value == 0: return _match_when_rows_are_empty() else: return _match_when_rows_are_non_empty() else: return tf.cond(tf.greater(tf.shape(similarity_matrix)[0], 0), _match_when_rows_are_non_empty, _match_when_rows_are_empty)
tensor_dict[fields.InputDataFields.image])[:2] if fields.InputDataFields.image_additional_channels in tensor_dict: channels = tensor_dict[fields.InputDataFields.image_additional_channels] channels = tf.squeeze(channels, axis=3) channels = tf.transpose(channels, perm=[1, 2, 0]) tensor_dict[fields.InputDataFields.image_additional_channels] = channels def default_groundtruth_weights(): return tf.ones( [tf.shape(tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]], dtype=tf.float32) tensor_dict[fields.InputDataFields.groundtruth_weights] = tf.cond( tf.greater( tf.shape( tensor_dict[fields.InputDataFields.groundtruth_weights])[0], 0), lambda: tensor_dict[fields.InputDataFields.groundtruth_weights], default_groundtruth_weights) if fields.InputDataFields.groundtruth_keypoints in tensor_dict: # Set all keypoints that are not labeled to NaN. gt_kpt_fld = fields.InputDataFields.groundtruth_keypoints gt_kpt_vis_fld = fields.InputDataFields.groundtruth_keypoint_visibilities visibilities_tiled = tf.tile( tf.expand_dims(tensor_dict[gt_kpt_vis_fld], -1), [1, 1, 2]) tensor_dict[gt_kpt_fld] = tf.where( visibilities_tiled, tensor_dict[gt_kpt_fld], np.nan * tf.ones_like(tensor_dict[gt_kpt_fld]))
def random_crop_image(image, boxes, labels, masks=None, keypoints=None, min_object_covered=1.0, aspect_ratio_range=(0.75, 1.33), area_range=(0.1, 1.0), overlap_thresh=0.3, clip_boxes=True, random_coef=0.0, seed=None): """Randomly crops the image. Given the input image and its bounding boxes, this op randomly crops a subimage. Given a user-provided set of input constraints, the crop window is resampled until it satisfies these constraints. If within 100 trials it is unable to find a valid crop, the original image is returned. See the Args section for a description of the input constraints. Both input boxes and returned Boxes are in normalized form (e.g., lie in the unit square [0, 1]). This function will return the original image with probability random_coef. Note: Keypoint coordinates that are outside the crop will be set to NaN, which is consistent with the original keypoint encoding for non-existing keypoints. Args: image: rank 3 float32 tensor contains 1 image -> [height, width, channels] with pixel values varying between [0, 1]. boxes: rank 2 float32 tensor containing the bounding boxes with shape [num_instances, 4]. Boxes are in normalized form meaning their coordinates vary between [0, 1]. Each row is in the form of [ymin, xmin, ymax, xmax]. labels: rank 1 int32 tensor containing the object classes. masks: (optional) rank 3 float32 tensor with shape [num_instances, height, width] containing instance masks. The masks are of the same height, width as the input `image`. keypoints: (optional) rank 3 float32 tensor with shape [num_instances, num_keypoints, 2]. The keypoints are in y-x normalized coordinates. min_object_covered: the cropped image must cover at least this fraction of at least one of the input bounding boxes. aspect_ratio_range: allowed range for aspect ratio of cropped image. area_range: allowed range for area ratio between cropped image and the original image. overlap_thresh: minimum overlap thresh with new cropped image to keep the box. clip_boxes: whether to clip the boxes to the cropped image. random_coef: a random coefficient that defines the chance of getting the original image. If random_coef is 0, we will always get the cropped image, and if it is 1.0, we will always get the original image. seed: random seed. Returns: image: Image shape will be [new_height, new_width, channels]. boxes: boxes which is the same rank as input boxes. Boxes are in normalized form. labels: new labels. If label_weights, multiclass_scores, masks, or keypoints is not None, the function also returns: masks: rank 3 float32 tensor with shape [num_instances, height, width] containing instance masks. keypoints: rank 3 float32 tensor with shape [num_instances, num_keypoints, 2] """ def strict_random_crop_image_fn(): return _strict_random_crop_image( image, boxes, labels, masks=masks, keypoints=keypoints, min_object_covered=min_object_covered, aspect_ratio_range=aspect_ratio_range, area_range=area_range, overlap_thresh=overlap_thresh, clip_boxes=clip_boxes) # avoids tf.cond to make faster RCNN training on borg. See b/140057645. if random_coef < sys.float_info.min: result = strict_random_crop_image_fn() else: do_a_crop_random = tf.greater(tf.random_uniform([], seed=seed), random_coef) outputs = [image, boxes, labels] if masks is not None: outputs.append(masks) if keypoints is not None: outputs.append(keypoints) result = tf.cond(do_a_crop_random, strict_random_crop_image_fn, lambda: tuple(outputs)) return result