def upsampling_tpu_compatible(data, scale): """Nearest neighbor upsampling TPU-compatible implementation. This implementation is TPU compatible as opposed to tf.image.resize_nearest_neighbor(). Args: data: A 4D float32 tensor of shape [batch, height, width, channels]. scale: An integer multiple to scale resolution of input data. Returns: A 4D float32 tensor of shape [batch, height*scale, width*scale, channels]. """ with tf.name_scope('upsampling_tpu_compatible'): if data.get_shape().is_fully_defined(): bs, height, width, _ = [s.value for s in data.get_shape()] else: shape = tf.shape(data) bs, height, width = shape[0], shape[1], shape[2] channels = data.get_shape().as_list()[3] # Use reshape to quickly upsample the input. The nearest pixel is selected # implicitly via broadcasting. data = tf.reshape(data, [bs, height, 1, width, 1, channels]) * tf.ones( [1, 1, scale, 1, scale, 1], dtype=data.dtype) return tf.reshape(data, [bs, height * scale, width * scale, channels])
def generate_detections(self, cls_ouputs, box_outputs, image_id): cls_outputs_all = [] box_outputs_all = [] for level in range(self._anchors.min_level, self._anchors.max_level + 1): cls_outputs_all.append( tf.reshape(cls_ouputs[level], [-1, self._num_classes])) box_outputs_all.append(tf.reshape(box_outputs[level], [-1, 4])) cls_outputs_all = tf.concat(cls_outputs_all, 0) box_outputs_all = tf.concat(box_outputs_all, 0) return tf.py_func( _generate_detections, [cls_outputs_all, box_outputs_all, self._anchors.boxes, image_id], tf.float32)
def _transform_images(self, params, features, labels=None): """Transforms images.""" images = features['images'] batch_size, _, _, c = images.get_shape().as_list() if params['conv0_space_to_depth_block_size'] != 0: # Transforms (space-to-depth) images for TPU performance. def _fused_transform(images, image_size): return spatial_transform.fused_transpose_and_space_to_depth( images, image_size, params['conv0_space_to_depth_block_size'], params['transpose_input']) images = tf.cond( tf.less(features['image_info'][0, 3], features['image_info'][0, 4]), lambda: _fused_transform(images, params['image_size']), lambda: _fused_transform(images, params['image_size'][::-1])) else: # Transposes images for TPU performance. image_area = params['image_size'][0] * params['image_size'][1] if params['transpose_input']: images = tf.transpose(images, [1, 2, 0, 3]) # Flattens spatial dimensions so that the image tensor has a static # shape. images = tf.reshape(images, [image_area, batch_size, c]) else: images = tf.reshape(images, [batch_size, image_area, c]) if params['use_bfloat16']: images = tf.cast(images, dtype=tf.bfloat16) features['images'] = images if labels is not None: return features, labels else: return features, tf.zeros([batch_size])
def _unpack_labels(self, labels): """Unpacks an array of labels into multiscales labels.""" labels_unpacked = OrderedDict() anchors = self._anchors count = 0 for level in range(anchors.min_level, anchors.max_level + 1): feat_size = int(anchors.image_size / 2**level) steps = feat_size**2 * anchors.get_anchors_per_location() indices = tf.range(count, count + steps) count += steps labels_unpacked[level] = tf.reshape(tf.gather(labels, indices), [feat_size, feat_size, -1]) return labels_unpacked
def pad_to_fixed_size(data, pad_value, output_shape): """Pad data to a fixed length at the first dimension. Args: data: Tensor to be padded to output_shape. pad_value: A constant value assigned to the paddings. output_shape: The output shape of a 2D tensor. Returns: The Padded tensor with output_shape [max_num_instances, dimension]. """ max_num_instances = output_shape[0] dimension = output_shape[1] data = tf.reshape(data, [-1, dimension]) num_instances = tf.shape(data)[0] assert_length = tf.Assert(tf.less_equal(num_instances, max_num_instances), [num_instances]) with tf.control_dependencies([assert_length]): pad_length = max_num_instances - num_instances paddings = pad_value * tf.ones([pad_length, dimension]) padded_data = tf.concat([data, paddings], axis=0) padded_data = tf.reshape(padded_data, output_shape) return padded_data
def resize_and_crop_boxes(self): """Resize boxes and crop it to the self._output dimension.""" boxlist = preprocessor.box_list.BoxList(self._boxes) boxes = preprocessor.box_list_scale( boxlist, self._scaled_height, self._scaled_width).get() # Adjust box coordinates based on the offset. box_offset = tf.stack([self._crop_offset_y, self._crop_offset_x, self._crop_offset_y, self._crop_offset_x,]) boxes -= tf.to_float(tf.reshape(box_offset, [1, 4])) # Clip the boxes. boxes = self.clip_boxes(boxes) # Filter out ground truth boxes that are all zeros. indices = tf.where(tf.not_equal(tf.reduce_sum(boxes, axis=1), 0)) boxes = tf.gather_nd(boxes, indices) classes = tf.gather_nd(self._classes, indices) return boxes, classes
def label_anchors(self, gt_boxes, gt_labels): """Labels anchors with ground truth inputs. Args: gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes. For each row, it stores [y0, x0, y1, x1] for four corners of a box. gt_labels: A integer tensor with shape [N, 1] representing groundtruth classes. Returns: cls_targets_dict: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, num_anchors * num_classes]. The height_l and width_l represent the dimension of class logits at l-th level. box_targets_dict: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, num_anchors * 4]. The height_l and width_l represent the dimension of bounding box regression output at l-th level. num_positives: scalar tensor storing number of positives in an image. """ gt_box_list = box_list.BoxList(gt_boxes) anchor_box_list = box_list.BoxList(self._anchors.boxes) # cls_weights, box_weights are not used cls_targets, _, box_targets, _, matches = self._target_assigner.assign( anchor_box_list, gt_box_list, gt_labels) # class labels start from 1 and the background class = -1 cls_targets -= 1 # create one-hot labels cls_targets_one_hot = tf.one_hot(tf.cast(cls_targets, dtype=tf.int32), self._num_classes) cls_targets_one_hot = tf.reshape(cls_targets_one_hot, [-1, self._num_classes]) cls_targets_dict = self._unpack_labels(cls_targets_one_hot) box_targets_dict = self._unpack_labels(box_targets) num_positives = tf.reduce_sum( tf.cast(tf.not_equal(matches.match_results, -1), tf.float32)) return cls_targets_dict, box_targets_dict, num_positives
def _process_example(images, cls_targets, box_targets, num_positives, source_ids, image_scales, boxes, is_crowds, areas, classes): """Processes one batch of data.""" labels = {} # Count num_positives in a batch. num_positives_batch = tf.reduce_mean(num_positives) labels['mean_num_positives'] = tf.reshape( tf.tile(tf.expand_dims(num_positives_batch, 0), [ batch_size, ]), [batch_size, 1]) for level in range(params['min_level'], params['max_level'] + 1): labels['cls_targets_%d' % level] = cls_targets[level] labels['box_targets_%d' % level] = box_targets[level] # Concatenate groundtruth annotations to a tensor. groundtruth_data = tf.concat([boxes, is_crowds, areas, classes], axis=2) labels['source_ids'] = source_ids labels['groundtruth_data'] = groundtruth_data labels['image_scales'] = image_scales return images, labels
def _dataset_parser(value): """Parse data to a fixed dimension input image and learning targets.""" with tf.name_scope('parser'): data = example_decoder.decode(value) source_id = data['source_id'] image = data['image'] boxes = data['groundtruth_boxes'] classes = data['groundtruth_classes'] classes = tf.reshape(tf.cast(classes, dtype=tf.float32), [-1, 1]) # the image normalization is identical to Cloud TPU ResNet-50 image = tf.image.convert_image_dtype(image, dtype=tf.float32) image = _normalize_image(image) if params['input_rand_hflip']: image, boxes = preprocessor.random_horizontal_flip( image, boxes=boxes) image_original_shape = tf.shape(image) image, _ = preprocessor.resize_to_range( image, min_dimension=params['image_size'], max_dimension=params['image_size']) image_scale = tf.to_float( image_original_shape[0]) / tf.to_float(tf.shape(image)[0]) image, boxes = preprocessor.scale_boxes_to_pixel_coordinates( image, boxes, keypoints=None) image = tf.image.pad_to_bounding_box(image, 0, 0, params['image_size'], params['image_size']) (cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors(boxes, classes) source_id = tf.string_to_number(source_id, out_type=tf.float32) row = (image, cls_targets, box_targets, num_positives, source_id, image_scale) return row
def _dataset_parser(value): """Parse data to a fixed dimension input image and learning targets. Args: value: A dictionary contains an image and groundtruth annotations. Returns: features: A dictionary that contains the image and auxiliary information. The following describes {key: value} pairs in the dictionary. image: An image tensor that is preprocessed to have normalized value and fixed dimension [image_size, image_size, 3] image_info: Image information that includes the original height and width, the scale of the processed image to the original image, and the scaled height and width. source_ids: Source image id. Default value -1 if the source id is empty in the groundtruth annotation. labels: (only for training) A dictionary that contains groundtruth labels. The following describes {key: value} pairs in the dictionary. score_targets_dict: An ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, num_anchors]. The height_l and width_l represent the dimension of objectiveness score at l-th level. box_targets_dict: An ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, num_anchors * 4]. The height_l and width_l represent the dimension of bounding box regression output at l-th level. gt_boxes: Groundtruth bounding box annotations. The box is represented in [y1, x1, y2, x2] format. The tennsor is padded with -1 to the fixed dimension [self._max_num_instances, 4]. gt_classes: Groundtruth classes annotations. The tennsor is padded with -1 to the fixed dimension [self._max_num_instances]. cropped_gt_masks: Groundtruth masks cropped by the bounding box and resized to a fixed size determined by params['gt_mask_size'] """ with tf.name_scope('parser'): data = example_decoder.decode(value) image = data['image'] source_id = data['source_id'] source_id = tf.where(tf.equal(source_id, tf.constant('')), '-1', source_id) source_id = tf.string_to_number(source_id) if self._mode == tf.estimator.ModeKeys.PREDICT: input_processor = InstanceSegmentationInputProcessor( image, image_size, params['short_side_image_size'], params['long_side_max_image_size']) input_processor.normalize_image() input_processor.set_scale_factors_to_mlperf_reference_size( ) image = input_processor.resize_and_crop_image() if params['use_bfloat16']: image = tf.cast(image, dtype=tf.bfloat16) image_info = input_processor.get_image_info() return { 'images': image, 'image_info': image_info, 'source_ids': source_id } # The following part is for training. instance_masks = data['groundtruth_instance_masks'] boxes = data['groundtruth_boxes'] classes = data['groundtruth_classes'] classes = tf.reshape(tf.cast(classes, dtype=tf.float32), [-1, 1]) if not params['use_category']: classes = tf.cast(tf.greater(classes, 0), dtype=tf.float32) if (params['skip_crowd_during_training'] and self._mode == tf.estimator.ModeKeys.TRAIN): indices = tf.where( tf.logical_not(data['groundtruth_is_crowd'])) classes = tf.gather_nd(classes, indices) boxes = tf.gather_nd(boxes, indices) instance_masks = tf.gather_nd(instance_masks, indices) input_processor = InstanceSegmentationInputProcessor( image, image_size, params['short_side_image_size'], params['long_side_max_image_size'], boxes, classes, instance_masks) input_processor.normalize_image() if params['input_rand_hflip']: input_processor.random_horizontal_flip() input_processor.set_scale_factors_to_mlperf_reference_size() image = input_processor.resize_and_crop_image() boxes, classes = input_processor.resize_and_crop_boxes() cropped_gt_masks = input_processor.crop_gt_masks( params['gt_mask_size']) image_info = input_processor.get_image_info() # Assign anchors. is_height_short_side = tf.less(image_info[3], image_info[4]) score_targets, box_targets = tf.cond( is_height_short_side, lambda: anchor_labeler.label_anchors(boxes, classes), lambda: height_long_side_anchor_labeler.label_anchors(boxes, classes)) # pylint: disable=line-too-long # Pad groundtruth data. boxes *= image_info[2] boxes = pad_to_fixed_size(boxes, -1, [self._max_num_instances, 4]) classes = pad_to_fixed_size(classes, -1, [self._max_num_instances, 1]) # Pads cropped_gt_masks. cropped_gt_masks = tf.reshape( cropped_gt_masks, [-1, (params['gt_mask_size'] + 4)**2]) cropped_gt_masks = pad_to_fixed_size( cropped_gt_masks, -1, [self._max_num_instances, (params['gt_mask_size'] + 4)**2]) cropped_gt_masks = tf.reshape(cropped_gt_masks, [ self._max_num_instances, params['gt_mask_size'] + 4, params['gt_mask_size'] + 4 ]) if params['use_bfloat16']: image = tf.cast(image, dtype=tf.bfloat16) features = {} features['images'] = image features['image_info'] = image_info features['source_ids'] = source_id labels = {} for level in range(params['min_level'], params['max_level'] + 1): labels['score_targets_%d' % level] = score_targets[level] labels['box_targets_%d' % level] = box_targets[level] labels['gt_boxes'] = boxes labels['gt_classes'] = classes labels['cropped_gt_masks'] = cropped_gt_masks return features, labels
def padded_bounding_box_fn(): return tf.reshape(self._masks, [-1, self._ori_height, self._ori_width, 1])
def _dataset_parser(value): """Parse data to a fixed dimension input image and learning targets. Args: value: A dictionary contains an image and groundtruth annotations. Returns: image: Image tensor that is preproessed to have normalized value and fixed dimension [image_size, image_size, 3] cls_targets_dict: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, num_anchors]. The height_l and width_l represent the dimension of class logits at l-th level. box_targets_dict: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, num_anchors * 4]. The height_l and width_l represent the dimension of bounding box regression output at l-th level. num_positives: Number of positive anchors in the image. source_id: Source image id. Default value -1 if the source id is empty in the groundtruth annotation. image_scale: Scale of the proccessed image to the original image. boxes: Groundtruth bounding box annotations. The box is represented in [y1, x1, y2, x2] format. The tennsor is padded with -1 to the fixed dimension [self._max_num_instances, 4]. is_crowds: Groundtruth annotations to indicate if an annotation represents a group of instances by value {0, 1}. The tennsor is padded with 0 to the fixed dimension [self._max_num_instances]. areas: Groundtruth areas annotations. The tennsor is padded with -1 to the fixed dimension [self._max_num_instances]. classes: Groundtruth classes annotations. The tennsor is padded with -1 to the fixed dimension [self._max_num_instances]. """ with tf.name_scope('parser'): data = example_decoder.decode(value) source_id = data['source_id'] image = data['image'] boxes = data['groundtruth_boxes'] classes = data['groundtruth_classes'] classes = tf.reshape(tf.cast(classes, dtype=tf.float32), [-1, 1]) areas = data['groundtruth_area'] is_crowds = data['groundtruth_is_crowd'] classes = tf.reshape(tf.cast(classes, dtype=tf.float32), [-1, 1]) if params['skip_crowd_during_training'] and self._is_training: indices = tf.where( tf.logical_not(data['groundtruth_is_crowd'])) classes = tf.gather_nd(classes, indices) boxes = tf.gather_nd(boxes, indices) # NOTE: The autoaugment method works best when used alongside the # standard horizontal flipping of images along with size jittering # and normalization. if params.get('autoaugment_policy', None) and self._is_training: image, boxes = autoaugment.distort_image_with_autoaugment( image, boxes, params['autoaugment_policy']) input_processor = DetectionInputProcessor( image, params['image_size'], boxes, classes) input_processor.normalize_image() if self._is_training and params['input_rand_hflip']: input_processor.random_horizontal_flip() if self._is_training: input_processor.set_training_random_scale_factors( params['train_scale_min'], params['train_scale_max']) else: input_processor.set_scale_factors_to_output_size() image = input_processor.resize_and_crop_image() boxes, classes = input_processor.resize_and_crop_boxes() # Assign anchors. (cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors(boxes, classes) source_id = tf.where(tf.equal(source_id, tf.constant('')), '-1', source_id) source_id = tf.string_to_number(source_id) # Pad groundtruth data for evaluation. image_scale = input_processor.image_scale_to_original boxes *= image_scale is_crowds = tf.cast(is_crowds, dtype=tf.float32) boxes = pad_to_fixed_size(boxes, -1, [self._max_num_instances, 4]) is_crowds = pad_to_fixed_size(is_crowds, 0, [self._max_num_instances, 1]) areas = pad_to_fixed_size(areas, -1, [self._max_num_instances, 1]) classes = pad_to_fixed_size(classes, -1, [self._max_num_instances, 1]) if params['use_bfloat16']: image = tf.cast(image, dtype=tf.bfloat16) return (image, cls_targets, box_targets, num_positives, source_id, image_scale, boxes, is_crowds, areas, classes)
def model_fn(features, labels, mode, params): """Actual model function.""" #### Training or Evaluation is_training = (mode == tf.estimator.ModeKeys.TRAIN) #### Get loss from inputs if FLAGS.seq2seq_type == "dec_only": seq2seq_loss = model_func_builder.joint_loss elif FLAGS.seq2seq_type == "encdec": seq2seq_loss = model_func_builder.encdec_loss else: raise NotImplementedError total_loss, monitor_dict = seq2seq_loss(features, labels, n_token, is_training) #### Check model parameters num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()]) tf.logging.info("#params: %d", num_params) if FLAGS.verbose: format_str = "{{:<{0}s}}\t{{}}".format( max([len(v.name) for v in tf.trainable_variables()])) for v in tf.trainable_variables(): tf.logging.info(format_str.format(v.name, v.get_shape())) #### Evaluation mode if mode == tf.estimator.ModeKeys.EVAL: #### Reduce sum losses from all TPU cores with tf.colocate_with(total_loss): total_loss = tf.contrib.tpu.cross_replica_sum(total_loss) total_loss = total_loss / FLAGS.num_hosts / FLAGS.num_core_per_host metric_loss = tf.reshape(total_loss, [1]) #### Constructing evaluation TPUEstimatorSpec. eval_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=(metric_fn, [metric_loss])) return eval_spec #### Customized initial checkpoint tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if FLAGS.init_checkpoint: if FLAGS.init_checkpoint.endswith("latest"): ckpt_dir = os.path.dirname(FLAGS.init_checkpoint) init_checkpoint = tf.train.latest_checkpoint(ckpt_dir) else: init_checkpoint = FLAGS.init_checkpoint tf.logging.info("Initialize from the ckpt %s", init_checkpoint) (assignment_map, initialized_variable_names ) = model_utils.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if FLAGS.use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) # Log customized initialization tf.logging.info("**** Global Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) #### Get the train op train_op, optim_dict = optimization.get_train_op(total_loss) monitor_dict.update(optim_dict) #### Creating host calls host_call = model_utils.construct_scalar_host_call( monitor_dict=monitor_dict, model_dir=FLAGS.model_dir, prefix="train/", reduce_fn=tf.reduce_mean, log_freq=FLAGS.log_freq) #### Constructing training TPUEstimatorSpec. train_spec = tf.contrib.tpu.TPUEstimatorSpec(mode=mode, loss=total_loss, train_op=train_op, host_call=host_call, scaffold_fn=scaffold_fn) return train_spec
def __call__(self, params): input_anchors = anchors.Anchors(params['min_level'], params['max_level'], params['num_scales'], params['aspect_ratios'], params['anchor_scale'], params['image_size']) anchor_labeler = anchors.AnchorLabeler(input_anchors, params['num_classes']) example_decoder = tf_example_decoder.TfExampleDecoder() def get_dataset_for_mode(data_dir, is_training): """Return the location of input samples for a given mode.""" if is_training: return '%s/coco_train2017_nocrowd-*' % data_dir return '%s/coco_val2017-*' % data_dir def _dataset_parser(value): """Parse data to a fixed dimension input image and learning targets.""" with tf.name_scope('parser'): data = example_decoder.decode(value) source_id = data['source_id'] image = data['image'] boxes = data['groundtruth_boxes'] classes = data['groundtruth_classes'] classes = tf.reshape(tf.cast(classes, dtype=tf.float32), [-1, 1]) # the image normalization is identical to Cloud TPU ResNet-50 image = tf.image.convert_image_dtype(image, dtype=tf.float32) image = _normalize_image(image) if params['input_rand_hflip']: image, boxes = preprocessor.random_horizontal_flip( image, boxes=boxes) image_original_shape = tf.shape(image) image, _ = preprocessor.resize_to_range( image, min_dimension=params['image_size'], max_dimension=params['image_size']) image_scale = tf.to_float( image_original_shape[0]) / tf.to_float(tf.shape(image)[0]) image, boxes = preprocessor.scale_boxes_to_pixel_coordinates( image, boxes, keypoints=None) image = tf.image.pad_to_bounding_box(image, 0, 0, params['image_size'], params['image_size']) (cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors(boxes, classes) source_id = tf.string_to_number(source_id, out_type=tf.float32) row = (image, cls_targets, box_targets, num_positives, source_id, image_scale) return row batch_size = params['batch_size'] data_file_pattern = get_dataset_for_mode(self._data_dir, self._is_training) dataset = tf.data.Dataset.list_files(data_file_pattern) dataset = dataset.shuffle(buffer_size=1024) if self._is_training: dataset = dataset.repeat() def prefetch_dataset(filename): dataset = tf.data.TFRecordDataset(filename).prefetch(1) return dataset dataset = dataset.apply( tf.contrib.data.parallel_interleave(prefetch_dataset, cycle_length=32, sloppy=True)) dataset = dataset.shuffle(20) dataset = dataset.map(_dataset_parser, num_parallel_calls=64) dataset = dataset.prefetch(batch_size) dataset = dataset.apply( tf.contrib.data.batch_and_drop_remainder(batch_size)) dataset = dataset.prefetch(1) (images, cls_targets, box_targets, num_positives, source_ids, image_scales) = dataset.make_one_shot_iterator().get_next() labels = {} # count num_positives in a batch num_positives_batch = tf.reduce_mean(num_positives) labels['mean_num_positives'] = tf.reshape( tf.tile(tf.expand_dims(num_positives_batch, 0), [ batch_size, ]), [batch_size, 1]) for level in range(params['min_level'], params['max_level'] + 1): labels['cls_targets_%d' % level] = cls_targets[level] labels['box_targets_%d' % level] = box_targets[level] labels['source_ids'] = source_ids labels['image_scales'] = image_scales return images, labels
def model_fn(features, labels, mode, params): """doc.""" # not used del labels #### Training or Evaluation is_training = (mode == tf.estimator.ModeKeys.TRAIN) #### Retrieve `mems` from `params["cache"]` mems = None if FLAGS.mem_len > 0: mems = params["cache"] #### Get loss from inputs total_loss, new_mems, monitor_dict = model_func_builder.get_lm_loss( features, mems, n_token, is_training) #### Put `new_mems` into `new_cache` new_cache = [] if FLAGS.mem_len > 0: new_cache += new_mems #### Check model parameters num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()]) tf.logging.info("#params: %d", num_params) if mode == tf.estimator.ModeKeys.EVAL: def metric_fn(loss): """Evaluation metric Fn which runs on CPU.""" perplexity = tf.exp(tf.reduce_mean(loss) * 1.2) return { "perplexity": tf.metrics.mean(perplexity), } metric_loss = tf.reshape(total_loss, [1, 1]) eval_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=(metric_fn, [metric_loss])) eval_spec.cache = new_cache return eval_spec #### Configuring the optimizer train_op, optim_dict = optimization.get_train_op(total_loss) monitor_dict.update(optim_dict) #### Customized initial checkpoint tvars = tf.global_variables() initialized_variable_names = {} scaffold_fn = None if FLAGS.init_checkpoint is not None: if FLAGS.init_checkpoint.endswith("latest"): ckpt_dir = os.path.dirname(FLAGS.init_checkpoint) init_checkpoint = tf.train.latest_checkpoint(ckpt_dir) else: init_checkpoint = FLAGS.init_checkpoint tf.logging.info("Initialize from the ckpt %s", init_checkpoint) (assignment_map, initialized_variable_names ) = model_utils.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if FLAGS.use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) # Log customized initialization tf.logging.info("**** Global Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) #### Creating host calls host_call = model_utils.construct_scalar_host_call( monitor_dict=monitor_dict, model_dir=FLAGS.model_dir, prefix="train/", reduce_fn=tf.reduce_mean, log_freq=FLAGS.log_freq) #### Constructing training TPUEstimatorSpec with new cache. train_spec = tf.contrib.tpu.TPUEstimatorSpec(mode=mode, loss=total_loss, train_op=train_op, host_call=host_call, scaffold_fn=scaffold_fn) train_spec.cache = new_cache return train_spec
def _model_fn(features, labels, mode, params, model): """Model defination for the RetinaNet model based on ResNet-50. Args: features: the input image tensor with shape [batch_size, height, width, 3]. The height and width are fixed and equal. labels: the input labels in a dictionary. The labels include class targets and box targets which are dense label maps. The labels are generated from get_input_fn function in data/dataloader.py mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT. params: the dictionary defines hyperparameters of model. The default settings are in default_hparams function in this file. model: the RetinaNet model outputs class logits and box regression outputs. Returns: tpu_spec: the TPUEstimatorSpec to run training, evaluation, or prediction. """ cls_outputs, box_outputs = model( features, min_level=params['min_level'], max_level=params['max_level'], num_classes=params['num_classes'], num_anchors=len(params['aspect_ratios'] * params['num_scales']), is_training_bn=params['is_training_bn']) levels = cls_outputs.keys() # First check if it is in PREDICT mode. if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'image': features, } for level in levels: predictions['cls_outputs_%d' % level] = cls_outputs[level] predictions['box_outputs_%d' % level] = box_outputs[level] return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) # Load pretrained model from checkpoint. if params['resnet_checkpoint'] and mode == tf.estimator.ModeKeys.TRAIN: def scaffold_fn(): """Loads pretrained model through scaffold function.""" tf.train.init_from_checkpoint(params['resnet_checkpoint'], { '/': 'resnet50/', }) return tf.train.Scaffold() else: scaffold_fn = None # Set up training loss and learning rate. global_step = tf.train.get_global_step() learning_rate = _learning_rate_schedule(params['learning_rate'], params['lr_warmup_step'], params['lr_drop_step'], global_step) # cls_loss and box_loss are for logging. only total_loss is optimized. total_loss, cls_loss, box_loss = _detection_loss(cls_outputs, box_outputs, labels, params) if mode == tf.estimator.ModeKeys.TRAIN: optimizer = tf.train.MomentumOptimizer( learning_rate, momentum=params['momentum']) if params['use_tpu']: optimizer = tpu_optimizer.CrossShardOptimizer(optimizer) # Batch norm requires update_ops to be added as a train_op dependency. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(total_loss, global_step) else: train_op = None # Evaluation only works on GPU/CPU host and batch_size=1 eval_metrics = None if mode == tf.estimator.ModeKeys.EVAL: def metric_fn(**kwargs): """Evaluation metric fn. Performed on CPU, do not reference TPU ops.""" eval_anchors = anchors.Anchors(params['min_level'], params['max_level'], params['num_scales'], params['aspect_ratios'], params['anchor_scale'], params['image_size']) anchor_labeler = anchors.AnchorLabeler(eval_anchors, params['num_classes']) cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat']) box_loss = tf.metrics.mean(kwargs['box_loss_repeat']) # add metrics to output cls_outputs = {} box_outputs = {} for level in range(params['min_level'], params['max_level'] + 1): cls_outputs[level] = kwargs['cls_outputs_%d' % level] box_outputs[level] = kwargs['box_outputs_%d' % level] detections = anchor_labeler.generate_detections( cls_outputs, box_outputs, kwargs['source_ids']) eval_metric = coco_metric.EvaluationMetric(params['val_json_file']) coco_metrics = eval_metric.estimator_metric_fn(detections, kwargs['image_scales']) # Add metrics to output. output_metrics = { 'cls_loss': cls_loss, 'box_loss': box_loss, } output_metrics.update(coco_metrics) return output_metrics batch_size = params['batch_size'] cls_loss_repeat = tf.reshape( tf.tile(tf.expand_dims(cls_loss, 0), [ batch_size, ]), [batch_size, 1]) box_loss_repeat = tf.reshape( tf.tile(tf.expand_dims(box_loss, 0), [ batch_size, ]), [batch_size, 1]) metric_fn_inputs = { 'cls_loss_repeat': cls_loss_repeat, 'box_loss_repeat': box_loss_repeat, 'source_ids': labels['source_ids'], 'image_scales': labels['image_scales'], } for level in range(params['min_level'], params['max_level'] + 1): metric_fn_inputs['cls_outputs_%d' % level] = cls_outputs[level] metric_fn_inputs['box_outputs_%d' % level] = box_outputs[level] eval_metrics = (metric_fn, metric_fn_inputs) return tpu_estimator.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn)
def model_fn(features, labels, mode, params): """doc.""" #### Training or Evaluation is_training = (mode == tf.estimator.ModeKeys.TRAIN) #### Retrieve `mems` from `params["cache"]` mems = {} idx = 0 for obj_len, key in zip([FLAGS.seq_len], ["mems"]): if obj_len > 0 and FLAGS.mem_len > 0: n_layer = FLAGS.n_layer if FLAGS.use_extra_layer: n_layer += 1 mems[key] = params["cache"][idx * n_layer:(idx + 1) * n_layer] idx += 1 #### Get loss from inputs if FLAGS.loss_type == "electra": total_loss, new_mems, monitor_dict = model_func_builder.electra_loss( features, labels, mems, n_token, is_training) elif FLAGS.loss_type == "mlm": total_loss, new_mems, monitor_dict = model_func_builder.mlm_loss( features, labels, mems, n_token, is_training) elif FLAGS.loss_type == "xlnet": total_loss, new_mems, monitor_dict = model_func_builder.xlnet_loss( features, labels, mems, n_token, is_training) else: raise NotImplementedError #### Turn `new_mems` into `new_cache` new_cache = [] for obj_len, key in zip([FLAGS.seq_len], ["mems"]): if obj_len > 0 and FLAGS.mem_len > 0: new_cache += new_mems[key] #### Check model parameters num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()]) tf.logging.info("#params: %d", num_params) if FLAGS.verbose: format_str = "{{:<{0}s}}\t{{}}".format( max([len(v.name) for v in tf.trainable_variables()])) for v in tf.trainable_variables(): tf.logging.info(format_str.format(v.name, v.get_shape())) #### Evaluation mode if mode == tf.estimator.ModeKeys.EVAL: #### Reduce sum losses from all TPU cores with tf.colocate_with(total_loss): total_loss = tf.contrib.tpu.cross_replica_sum(total_loss) total_loss = total_loss / FLAGS.num_hosts / FLAGS.num_core_per_host metric_loss = tf.reshape(total_loss, [1]) #### Constructing evaluation TPUEstimatorSpec with new cache. eval_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=(metric_fn, [metric_loss])) eval_spec.cache = new_cache return eval_spec #### Get the train op train_op, optim_dict = optimization.get_train_op(total_loss) monitor_dict.update(optim_dict) #### Customized initial checkpoint tvars = tf.global_variables() initialized_variable_names = {} scaffold_fn = None if FLAGS.init_checkpoint is not None: if FLAGS.init_checkpoint.endswith("latest"): ckpt_dir = os.path.dirname(FLAGS.init_checkpoint) init_checkpoint = tf.train.latest_checkpoint(ckpt_dir) else: init_checkpoint = FLAGS.init_checkpoint tf.logging.info("Initialize from the ckpt %s", init_checkpoint) (assignment_map, initialized_variable_names ) = model_utils.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if FLAGS.use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) # Log customized initialization tf.logging.info("**** Global Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) #### Creating host calls host_call = model_utils.construct_scalar_host_call( monitor_dict=monitor_dict, model_dir=FLAGS.model_dir, prefix="train/", reduce_fn=tf.reduce_mean, log_freq=FLAGS.log_freq) #### Constructing training TPUEstimatorSpec with new cache. train_spec = tf.contrib.tpu.TPUEstimatorSpec(mode=mode, loss=total_loss, train_op=train_op, host_call=host_call, scaffold_fn=scaffold_fn) train_spec.cache = new_cache return train_spec
def _local_perm(inputs, targets, is_masked, perm_size, seq_len): """ Sample a permutation of the factorization order, and create an attention mask accordingly. Args: inputs: int64 Tensor in shape [seq_len], input ids. targets: int64 Tensor in shape [seq_len], target ids. is_masked: bool Tensor in shape [seq_len]. True means being selected for partial prediction. perm_size: the length of longest permutation. Could be set to be reuse_len. Should not be larger than reuse_len or there will be data leaks. seq_len: int, sequence length. """ # Generate permutation indices index = tf.range(seq_len, dtype=tf.int64) index = tf.transpose(tf.reshape(index, [-1, perm_size])) index = tf.random_shuffle(index) index = tf.reshape(tf.transpose(index), [-1]) # `perm_mask` and `target_mask` # non-functional tokens non_func_tokens = tf.logical_not(tf.logical_or( tf.equal(inputs, SEP_ID), tf.equal(inputs, CLS_ID))) non_mask_tokens = tf.logical_and(tf.logical_not(is_masked), non_func_tokens) masked_or_func_tokens = tf.logical_not(non_mask_tokens) # Set the permutation indices of non-masked (& non-funcional) tokens to the # smallest index (-1): # (1) they can be seen by all other positions # (2) they cannot see masked positions, so there won"t be information leak smallest_index = -tf.ones([seq_len], dtype=tf.int64) rev_index = tf.where(non_mask_tokens, smallest_index, index) # Create `target_mask`: non-funcional and maksed tokens # 1: use mask as input and have loss # 0: use token (or [SEP], [CLS]) as input and do not have loss target_tokens = tf.logical_and(masked_or_func_tokens, non_func_tokens) target_mask = tf.cast(target_tokens, tf.float32) # Create `perm_mask` # `target_tokens` cannot see themselves self_rev_index = tf.where(target_tokens, rev_index, rev_index + 1) # 1: cannot attend if i <= j and j is not non-masked (masked_or_func_tokens) # 0: can attend if i > j or j is non-masked perm_mask = tf.logical_and( self_rev_index[:, None] <= rev_index[None, :], masked_or_func_tokens) perm_mask = tf.cast(perm_mask, tf.float32) # new target: [next token] for LM and [curr token] (self) for PLM new_targets = tf.concat([inputs[0: 1], targets[: -1]], axis=0) # construct inputs_k inputs_k = inputs # construct inputs_q inputs_q = target_mask return perm_mask, new_targets, target_mask, inputs_k, inputs_q
def parser(record): """function used to parse tfrecord.""" record_spec = { "input": tf.FixedLenFeature([seq_len], tf.int64), "target": tf.FixedLenFeature([seq_len], tf.int64), "seg_id": tf.FixedLenFeature([seq_len], tf.int64), "label": tf.FixedLenFeature([1], tf.int64), "is_masked": tf.FixedLenFeature([seq_len], tf.int64), } # retrieve serialized example example = tf.parse_single_example( serialized=record, features=record_spec) inputs = example.pop("input") target = example.pop("target") is_masked = tf.cast(example.pop("is_masked"), tf.bool) non_reuse_len = seq_len - reuse_len assert perm_size <= reuse_len and perm_size <= non_reuse_len perm_mask_0, target_0, target_mask_0, input_k_0, input_q_0 = _local_perm( inputs[:reuse_len], target[:reuse_len], is_masked[:reuse_len], perm_size, reuse_len) perm_mask_1, target_1, target_mask_1, input_k_1, input_q_1 = _local_perm( inputs[reuse_len:], target[reuse_len:], is_masked[reuse_len:], perm_size, non_reuse_len) perm_mask_0 = tf.concat([perm_mask_0, tf.ones([reuse_len, non_reuse_len])], axis=1) perm_mask_1 = tf.concat([tf.zeros([non_reuse_len, reuse_len]), perm_mask_1], axis=1) perm_mask = tf.concat([perm_mask_0, perm_mask_1], axis=0) target = tf.concat([target_0, target_1], axis=0) target_mask = tf.concat([target_mask_0, target_mask_1], axis=0) input_k = tf.concat([input_k_0, input_k_1], axis=0) input_q = tf.concat([input_q_0, input_q_1], axis=0) if num_predict is not None: indices = tf.range(seq_len, dtype=tf.int64) bool_target_mask = tf.cast(target_mask, tf.bool) indices = tf.boolean_mask(indices, bool_target_mask) ##### extra padding due to CLS/SEP introduced after prepro actual_num_predict = tf.shape(indices)[0] pad_len = num_predict - actual_num_predict ##### target_mapping target_mapping = tf.one_hot(indices, seq_len, dtype=tf.float32) paddings = tf.zeros([pad_len, seq_len], dtype=target_mapping.dtype) target_mapping = tf.concat([target_mapping, paddings], axis=0) example["target_mapping"] = tf.reshape(target_mapping, [num_predict, seq_len]) ##### target target = tf.boolean_mask(target, bool_target_mask) paddings = tf.zeros([pad_len], dtype=target.dtype) target = tf.concat([target, paddings], axis=0) example["target"] = tf.reshape(target, [num_predict]) ##### target mask target_mask = tf.concat( [tf.ones([actual_num_predict], dtype=tf.float32), tf.zeros([pad_len], dtype=tf.float32)], axis=0) example["target_mask"] = tf.reshape(target_mask, [num_predict]) else: example["target"] = tf.reshape(target, [seq_len]) example["target_mask"] = tf.reshape(target_mask, [seq_len]) # reshape back to fixed shape example["perm_mask"] = tf.reshape(perm_mask, [seq_len, seq_len]) example["input_k"] = tf.reshape(input_k, [seq_len]) example["input_q"] = tf.reshape(input_q, [seq_len]) _convert_example(example, use_bfloat16) for k, v in example.items(): tf.logging.info("%s: %s", k, v) return example
def model_fn(features, labels, mode, params): """doc.""" # not used del labels #### Training or Evaluation is_training = (mode == tf.estimator.ModeKeys.TRAIN) #### Retrieve `mems` from `params["cache"]` mems = None if FLAGS.mem_len > 0: mems = params["cache"] #### Get loss from inputs total_loss, new_mems, monitor_dict = model_function.get_lm_loss( features, mems, is_training) #### Put `new_mems` into `new_cache` new_cache = [] if FLAGS.mem_len > 0: new_cache += new_mems #### Check model parameters num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()]) tf.logging.info("#params: %d", num_params) if mode == tf.estimator.ModeKeys.EVAL: def metric_fn(loss): """Evaluation metric Fn which runs on CPU.""" perplexity = tf.exp(tf.reduce_mean(loss) * 1.2) return { "perplexity": tf.metrics.mean(perplexity), } metric_loss = tf.reshape(total_loss, [1, 1]) eval_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=(metric_fn, [metric_loss])) eval_spec.cache = new_cache return eval_spec #### Configuring the optimizer train_op, optim_dict = optimization.get_train_op(total_loss) monitor_dict.update(optim_dict) #### Customized initial checkpoint scaffold_fn = model_utils.init_from_checkpoint(global_vars=True) #### Creating host calls host_call = model_function.construct_scalar_host_call( monitor_dict=monitor_dict, model_dir=FLAGS.model_dir, prefix="train/", reduce_fn=tf.reduce_mean) #### Constructing training TPUEstimatorSpec with new cache. train_spec = tf.contrib.tpu.TPUEstimatorSpec(mode=mode, loss=total_loss, train_op=train_op, host_call=host_call, scaffold_fn=scaffold_fn) train_spec.cache = new_cache return train_spec