def tf_apply_to_image_or_images(fn, image_or_images, **map_kw): """Applies a function to a single image or each image in a batch of them. Args: fn: the function to apply, receives an image, returns an image. image_or_images: Either a single image, or a batch of images. **map_kw: Arguments passed through to tf.map_fn if called. Returns: The result of applying the function to the image or batch of images. Raises: ValueError: if the input is not of rank 3 or 4. """ static_rank = image_or_images.shape.rank if static_rank == 3: # A single image: HWC return fn(image_or_images) elif static_rank == 4: # A batch of images: BHWC return tf.map_fn(fn, image_or_images, **map_kw) elif static_rank > 4: # A batch of images: ...HWC input_shape = tf.shape(image_or_images) h, w, c = image_or_images.get_shape().as_list()[-3:] image_or_images = tf.reshape(image_or_images, [-1, h, w, c]) image_or_images = tf.map_fn(fn, image_or_images, **map_kw) return tf.reshape(image_or_images, input_shape) else: raise ValueError("Unsupported image rank: %d" % static_rank)
def display_images_fetches_fullTest(paths, inputs, targets, nbTargets): tensorOneInput, SurfaceLightFixedView, HemishpereLightFixedView = deprocess_images_fullTest( inputs, targets, nbTargets) #, HemishpereLightHemisphereView with tf.name_scope("encode_images"): display_fetches = { "paths": paths, "tensorOneInput": tf.map_fn(tf.image.encode_png, tensorOneInput, dtype=tf.string, name="tensorOneInput_pngs"), "SurfaceLightFixedView": tf.map_fn(tf.image.encode_png, SurfaceLightFixedView, dtype=tf.string, name="SurfaceLightFixedView_pngs"), "HemishpereLightFixedView": tf.map_fn(tf.image.encode_png, HemishpereLightFixedView, dtype=tf.string, name="HemishpereLightFixedView_pngs"), #"HemishpereLightHemisphereView": tf.map_fn(tf.image.encode_png, HemishpereLightHemisphereView, dtype=tf.string, name="HemishpereLightHemisphereView_pngs"), } return display_fetches
def build_network(self, images, is_training, reuse): if is_training: keep_prob = 0.7 with tf.variable_scope(self.scope, reuse=reuse): # data augmentation if self.image_shape[2] == 1: random_flip = lambda x: tf.image.random_flip_left_right(x, seed=0.5) augmented_images = tf.map_fn(random_flip, images) else: random_flip = lambda x: tf.image.random_flip_left_right(x, seed=0.5) random_brightness = lambda x: tf.image.random_brightness(x, max_delta=0.5, seed=0.5) random_hue = lambda x: tf.image.random_hue(x, 0.08, seed=0.5) random_saturation = lambda x: tf.image.random_saturation(x, 0.5, 1.5, seed=0.5) augmented_images = tf.map_fn(random_flip, images) augmented_images = tf.map_fn(random_hue, augmented_images) augmented_images = tf.map_fn(random_saturation, augmented_images) augmented_images = tf.map_fn(random_brightness, augmented_images) augmented_images = tf.image.random_contrast(augmented_images, lower=0.5, upper=1.5, seed=0.5) logits = self.inference(augmented_images,is_training, reuse, keep_prob) else: keep_prob = 0.7 with tf.variable_scope(self.scope, reuse=reuse): logits = self.inference(images,is_training, reuse, keep_prob) return tf.nn.softmax(logits), logits
def display_images_fetches(paths, inputs, targets, gammaCorrectedInputs, outputs, nbTargets, logAlbedo): converted_inputs, converted_targets, converted_outputs, converted_gammaCorrectedInputs = deprocess_images( inputs, targets, outputs, gammaCorrectedInputs, nbTargets, logAlbedo) with tf.name_scope("encode_images"): display_fetches = { "paths": paths, "inputs": tf.map_fn(tf.image.encode_png, converted_inputs, dtype=tf.string, name="input_pngs"), "targets": tf.map_fn(tf.image.encode_png, converted_targets, dtype=tf.string, name="target_pngs"), "outputs": tf.map_fn(tf.image.encode_png, converted_outputs, dtype=tf.string, name="output_pngs"), "gammaCorrectedInputs": tf.map_fn(tf.image.encode_png, converted_gammaCorrectedInputs, dtype=tf.string, name="gammaInput_pngs"), } images = [converted_inputs, converted_targets, converted_outputs] return display_fetches, images
def _add_ips_example_weights_to_targets(self, targets): """Add ips_example_weights to targets. Used in ips baseline model.""" # Add subgroup information to targets target_subgroups = (targets[self.target_column_name], targets[self.sensitive_column_names[1]], targets[self.sensitive_column_names[0]]) targets[SUBGROUP_TARGET_COLUMN_NAME] = tf.map_fn( lambda x: (2 * x[1]) + (1 * x[2]), target_subgroups, dtype=tf.float32) # Load precomputed IPS weights into a HashTable. ips_with_label_table = self._load_json_dict_into_hashtable(self._ips_with_label_file) # pylint: disable=line-too-long ips_without_label_table = self._load_json_dict_into_hashtable(self._ips_without_label_file) # pylint: disable=line-too-long # Adding IPS example weights to targets # pylint: disable=g-long-lambda targets[IPS_WITH_LABEL_TARGET_COLUMN_NAME] = tf.map_fn( lambda x: ips_with_label_table.lookup( tf.cast((4 * x[0]) + (2 * x[1]) + (1 * x[2]), dtype=tf.int64)), target_subgroups, dtype=tf.float32) targets[IPS_WITHOUT_LABEL_TARGET_COLUMN_NAME] = tf.map_fn( lambda x: ips_without_label_table.lookup( tf.cast((2 * x[1]) + (1 * x[2]), dtype=tf.int64)), target_subgroups, dtype=tf.float32) # pylint: enable=g-long-lambda return targets
def __init__(self, raw_cifar10data, sess, model): assert isinstance(raw_cifar10data, CIFAR10Data) self.image_size = 32 # create augmentation computational graph self.x_input_placeholder = tf.placeholder(tf.float32, shape=[None, 32, 32, 3]) padded = tf.map_fn( lambda img: tf.image.resize_image_with_crop_or_pad( img, self.image_size + 4, self.image_size + 4), self.x_input_placeholder) cropped = tf.map_fn( lambda img: tf.random_crop( img, [self.image_size, self.image_size, 3]), padded) flipped = tf.map_fn(lambda img: tf.image.random_flip_left_right(img), cropped) self.augmented = flipped self.train_data = AugmentedDataSubset(raw_cifar10data.train_data, sess, self.x_input_placeholder, self.augmented) self.eval_data = AugmentedDataSubset(raw_cifar10data.eval_data, sess, self.x_input_placeholder, self.augmented) self.label_names = raw_cifar10data.label_names
def _pre_process(self): resize = tf.map_fn( lambda frame: tf.image.resize_images(frame, (60, 120)), self._observation) and_standardize = tf.map_fn( lambda frame: tf.image.per_image_standardization(frame), resize) self._preprocessed_state = and_standardize
def build_network(self, images, is_training, reuse): if is_training: with TowerContext("", is_training=True): with tf.variable_scope(self.scope, reuse=reuse): # data augmentation if self.image_shape[2] == 1: random_flip = lambda x: tf.image.random_flip_left_right(x, seed=0) standardize = lambda x: tf.image.per_image_standardization(x) augmented_images = tf.map_fn(random_flip, images) augmented_images = tf.map_fn(standardize, augmented_images) else: random_flip = lambda x: tf.image.random_flip_left_right(x, seed=0) random_brightness = lambda x: tf.image.random_brightness(x, max_delta=63, seed=0) random_hue = lambda x: tf.image.random_hue(x, 0.08, seed=0) random_saturation = lambda x: tf.image.random_saturation(x, 0.5, 1.5, seed=0) standardize = lambda x: tf.image.per_image_standardization(x) augmented_images = tf.map_fn(random_flip, images) augmented_images = tf.map_fn(random_hue, augmented_images) augmented_images = tf.map_fn(random_brightness, augmented_images) augmented_images = tf.map_fn(random_saturation, augmented_images) augmented_images = tf.image.random_contrast(augmented_images, lower=0.2, upper=1.8, seed=0) augmented_images = tf.map_fn(standardize, augmented_images) logits = self.inference(augmented_images) else: with TowerContext("", is_training=False): with tf.variable_scope(self.scope, reuse=reuse): standardize = lambda x: tf.image.per_image_standardization(x) augmented_images = tf.map_fn(standardize, images) logits = self.inference(augmented_images) return tf.nn.softmax(logits), logits
def MD_parallel(image, units): """ aranges input into the 4 directions and stacks into a single tensor to be processed by fast_MD """ _, height, width, inp_size = image.get_shape().as_list() # four orientations tl = image tr = tf.map_fn(tf.image.flip_left_right, image) bl = tf.map_fn(tf.image.flip_up_down, image) br = tf.map_fn(tf.image.flip_left_right, tf.map_fn(tf.image.flip_up_down, image)) all_together = tf.stack([tl, tr, bl, br], 4) # all_activations is b x height x width x units x dir # seperate to reorient activations all_activations = fast_MD_dynamic(all_together, units) tl, tr, bl, br = tf.split(all_activations, num_or_size_splits=4, axis=4) # flip etc to align activations correctly tl = tl[:, :, :, :, 0] tr = tf.map_fn(tf.image.flip_left_right, tr[:, :, :, :, 0]) bl = tf.map_fn(tf.image.flip_up_down, bl[:, :, :, :, 0]) br = tf.map_fn(tf.image.flip_up_down, tf.map_fn(tf.image.flip_left_right, br[:, :, :, :, 0])) # stack into tensor all_together = tf.stack([tl, tr, bl, br], 4) all_together.set_shape([None, height, width, units, 4]) return all_together
def parse_fn(sequence_example): """Parses a Kinetics example.""" context_features = { ms.get_example_id_key(): ms.get_example_id_default_parser(), } if parse_labels: context_features[ ms.get_clip_label_string_key()] = tf.FixedLenFeature( (), tf.string) context_features[ ms.get_clip_label_index_key()] = tf.FixedLenFeature( (), tf.int64) sequence_features = { ms.get_image_encoded_key(): ms.get_image_encoded_default_parser(), ms.get_forward_flow_encoded_key(): ms.get_forward_flow_encoded_default_parser(), } parsed_context, parsed_sequence = tf.io.parse_single_sequence_example( sequence_example, context_features, sequence_features) images = tf.image.convert_image_dtype( tf.map_fn(tf.image.decode_jpeg, parsed_sequence[ms.get_image_encoded_key()], back_prop=False, dtype=tf.uint8), tf.float32) num_frames = tf.shape(images)[0] flow = tf.image.convert_image_dtype( tf.map_fn(tf.image.decode_jpeg, parsed_sequence[ms.get_forward_flow_encoded_key()], back_prop=False, dtype=tf.uint8), tf.float32) # The flow is quantized for storage in JPEGs by the FlowToImageCalculator. # The quantization needs to be inverted. flow = (flow[:, :, :, :2] - 0.5) * 2 * 20. output_dict = { "images": images, "flow": flow, "num_frames": num_frames, } if parse_labels: target = tf.one_hot( parsed_context[ms.get_clip_label_index_key()], 700) output_dict["labels"] = target return output_dict
def aggregate_argmax(z_mean, z_logvar, new_mean, new_log_var, labels, kl_per_point): """Argmax aggregation with adaptive k. The bottom k dimensions in terms of distance are not averaged. K is estimated adaptively by binning the distance into two bins of equal width. Args: z_mean: Mean of the encoder distribution for the original image. z_logvar: Logvar of the encoder distribution for the original image. new_mean: Average mean of the encoder distribution of the pair of images. new_log_var: Average logvar of the encoder distribution of the pair of images. labels: One-hot-encoding with the position of the dimension that should not be shared. kl_per_point: Distance between the two encoder distributions. Returns: Mean and logvariance for the new observation. """ del labels mask = tf.equal( tf.map_fn(discretize_in_bins, kl_per_point, tf.int32), 1) z_mean_averaged = tf.where(mask, z_mean, new_mean) z_logvar_averaged = tf.where(mask, z_logvar, new_log_var) return z_mean_averaged, z_logvar_averaged
def process_tensors_from_config(tensors, data_config): """Apply filters and maps to an existing dataset, based on the config.""" def wrap_ensemble_fn(data, i): """Function to be mapped over the ensemble dimension.""" d = data.copy() fns = ensembled_map_fns(data_config) fn = compose(fns) d['ensemble_index'] = i return fn(d) eval_cfg = data_config.eval tensors = compose( nonensembled_map_fns( data_config))( tensors) tensors_0 = wrap_ensemble_fn(tensors, tf.constant(0)) num_ensemble = eval_cfg.num_ensemble if data_config.common.resample_msa_in_recycling: # Separate batch per ensembling & recycling step. num_ensemble *= data_config.common.num_recycle + 1 if isinstance(num_ensemble, tf.Tensor) or num_ensemble > 1: fn_output_signature = tree.map_structure( tf.TensorSpec.from_tensor, tensors_0) tensors = tf.map_fn( lambda x: wrap_ensemble_fn(tensors, x), tf.range(num_ensemble), parallel_iterations=1, fn_output_signature=fn_output_signature) else: tensors = tree.map_structure(lambda x: x[None], tensors_0) return tensors
def parse_fn(sequence_example): """Parses a clip classification example.""" context_features = { ms.get_example_id_key(): ms.get_example_id_default_parser(), ms.get_clip_label_index_key(): ms.get_clip_label_index_default_parser(), ms.get_clip_label_string_key(): ms.get_clip_label_string_default_parser() } sequence_features = { ms.get_image_encoded_key(): ms.get_image_encoded_default_parser(), } parsed_context, parsed_sequence = tf.io.parse_single_sequence_example( sequence_example, context_features, sequence_features) example_id = parsed_context[ms.get_example_id_key()] classification_target = tf.one_hot( tf.sparse_tensor_to_dense( parsed_context[ms.get_clip_label_index_key()]), NUM_CLASSES) images = tf.map_fn( tf.image.decode_jpeg, parsed_sequence[ms.get_image_encoded_key()], back_prop=False, dtype=tf.uint8) return { "id": example_id, "labels": classification_target, "images": images, }
def _get_bbox_pred(self, proposed_boxes, gt_boxes_per_class): """Computes valid bbox_pred from proposals and gt_boxes for each class. Args: proposed_boxes: Tensor with shape (num_proposals, 4). gt_boxes_per_class: Tensor holding the ground truth boxes for each class. Has shape (num_classes, num_gt_boxes_per_class, 4). Returns: A tensor with shape (num_proposals, num_classes * 4), holding the correct bbox_preds. """ def bbox_encode(gt_boxes): return encode( proposed_boxes, gt_boxes ) bbox_pred_tensor = tf.map_fn( bbox_encode, gt_boxes_per_class, dtype=tf.float32 ) # We need to explicitly unstack the tensor so that tf.concat works # properly. bbox_pred_list = tf.unstack(bbox_pred_tensor) return tf.concat(bbox_pred_list, 1)
def get2d_histogram(x, y, value_range, nbins=100, dtype=tf.dtypes.int32): """ Bins x, y coordinates of points onto simple square 2d histogram Given the tensor x and y: x: x coordinates of points y: y coordinates of points this operation returns a rank 2 `Tensor` representing the indices of a histogram into which each element of `values` would be binned. The bins are equal width and determined by the arguments `value_range` and `nbins`. Args: x: Numeric `Tensor`. y: Numeric `Tensor`. value_range[0] lims for x value_range[1] lims for y nbins: Scalar `int32 Tensor`. Number of histogram bins. dtype: dtype for returned histogram. """ x_range = value_range[0] y_range = value_range[1] histy_bins = tf.histogram_fixed_width_bins(y, y_range, nbins=nbins, dtype=dtype) H = tf.map_fn( lambda i: tf.histogram_fixed_width( x[histy_bins == i], x_range, nbins=nbins), tf.range(nbins)) return H # Matrix!
def sample_partial_sequence_batch(features, constant_values=0): """Samples partial sequences from a batch of expression sequences. Args: features: Dict of tensors. This dict need to have: * 'expression_sequence': an int32 tensor with shape [batch_size, max_length]. * 'expression_sequence_mask': an boolean tensor with shape [batch_size, max_length]. constant_values: Integer. The value to pad at the end of partial sequence to the same length of expression sequence. Returns: A feature dict. The following keys are added to the dict. * 'partial_sequence': an int32 tensor with shape [batch_size, max_length]. * 'partial_sequence_mask': a boolean tensor with shape [batch_size, max_length]. * 'partial_sequence_length': an int32 tensor with shape [batch_size]. * 'next_production_rule': an int32 tensor with shape [batch_size]. """ (partial_sequences, partial_sequence_masks, partial_sequence_lengths, next_production_rules) = tf.map_fn( functools.partial(sample_partial_sequence, constant_values=constant_values), (features['expression_sequence'], features['expression_sequence_mask']), dtype=(tf.int32, tf.bool, tf.int32, tf.int32)) features['partial_sequence'] = partial_sequences features['partial_sequence_mask'] = partial_sequence_masks features['partial_sequence_length'] = partial_sequence_lengths features['next_production_rule'] = next_production_rules return features
def tf_example_input(batch_size, desired_image_size, padding_stride): """tf.Example input.""" placeholder = tf.placeholder(dtype=tf.string, shape=(batch_size,)) def _prepare(tf_example_string): return preprocess_image( convert_image( decode_image( parse_tf_example(tf_example_string))), desired_image_size, padding_stride) if batch_size == 1: tf_example_string = tf.squeeze(placeholder, axis=0) image, image_info = _prepare(tf_example_string) images = tf.expand_dims(image, axis=0) images_info = tf.expand_dims(image_info, axis=0) else: images, images_info = tf.map_fn( _prepare, placeholder, back_prop=False, dtype=(tf.float32, tf.float32)) return placeholder, {'images': images, 'image_info': images_info}
def tf_shift_logprobs(mat, axis): """ Shifts the log-probs per-batch row-wise. :param mat: (B, U, T, V) :param axis: :return: (B, T+U+1, U, V) """ # mat: (B, T, U, V) # axis_to_expand: usually U # axis: usually T # batch-axis has to be first max_time = tf.shape(mat)[axis] # T def fn(args): # x: (B, U, V) """Computes the shift per diagonal and pads accordingly.""" x, shift = args padded = tf.pad( x, [ [0, 0], # B [shift, max_time - shift], # U+T+1 [0, 0] # V ], constant_values=0) return padded, shift elems0 = tf.transpose(mat, [1, 0, 2, 3]) # [T, B, U, V] elems1 = tf.range(max_time) # [T] t, _ = tf.map_fn(fn, elems=(elems0, elems1)) # T* [B, T+U+1, V] t = tf.transpose(t, [1, 0, 2, 3]) # [B, T, U+1, V] return t
def build_detection(self, reuse=False): with tf.variable_scope('detection', reuse=reuse): def _translation_match( x, z): # translation match for one example within a batch x = tf.expand_dims(x, 0) # [1, in_height, in_width, in_channels] z = tf.expand_dims( z, -1) # [filter_height, filter_width, in_channels, 1] return tf.nn.conv2d(x, z, strides=[1, 1, 1, 1], padding='VALID', name='translation_match') output = tf.map_fn(lambda x: _translation_match(x[0], x[1]), (self.instance_embeds, self.templates), dtype=self.instance_embeds.dtype) output = tf.squeeze(output, [1, 4]) # of shape e.g., [8, 15, 15] # Adjust score, this is required to make training possible. config = self.model_config['adjust_response_config'] bias = tf.get_variable('biases', [1], dtype=tf.float32, initializer=tf.constant_initializer( 0.0, dtype=tf.float32), trainable=config['train_bias']) response = config['scale'] * output + bias self.response = response
def decode_and_resize_img(img_bytes, height, width, depth): # type: (tf.Tensor, int, int, int) -> tf.Tensor """Decodes and resizes input image to return a float representation. Args: img_bytes: tensor for input bytes. height: Desired height of image. width: Desired width of image. depth: Desired bit depth of image. Returns: float_pixels: Tensor storing the image as float. This is the input tensor that we'll reference in the Explainable AI feature to show how output changes with input. """ features = tf.squeeze(img_bytes, axis=1, name='input_squeeze') float_pixels = tf.map_fn( # pylint: disable=g-long-lambda lambda img: tf.image.resize_with_crop_or_pad( tf.io.decode_image(img, channels=depth, dtype=tf.float32), height, width), features, dtype=tf.float32, name='input_convert') float_pixels = tf.ensure_shape(float_pixels, (None, height, width, depth)) float_pixels = tf.identity(float_pixels, name=input_pixels_tensor_name) return float_pixels
def augment_train(img_tensor, hps): def augment_each(img): if hps.random_crop: img = tf.random_crop(img, [hps.height, hps.width, hps.n_col]) else: img = tf.image.central_crop(img, hps.height / hps.height_pad) if hps.dataset not in ['mnist', 'gts', 'svhn'] and hps.fl_mirroring: img = tf.image.random_flip_left_right(img) img = tf.image.random_brightness(img, max_delta=0.1) img = tf.minimum(tf.maximum(img, 0.0), 1.0) img = tf.image.random_contrast(img, lower=0.6, upper=1.4) img = tf.minimum(tf.maximum(img, 0.0), 1.0) return img with tf.device('/cpu:0'): if hps.fl_rotations: rand_angles = tf.random_uniform([hps.batch_size], minval=-hps.max_rotate_angle, maxval=hps.max_rotate_angle) img_tensor = tf.contrib.image.rotate(img_tensor, rand_angles) img_tensor = tf.map_fn(augment_each, img_tensor) if hps.gauss_noise_flag: expected_noise_norm = 2.0 if hps.dataset in ['mnist', 'fmnist' ] else 1.0 gauss_noise = tf.random_normal(tf.shape(img_tensor), stddev=expected_noise_norm / hps.n_in) img_tensor += gauss_noise img_tensor = tf.minimum(tf.maximum(img_tensor, 0.0), 1.0) return img_tensor
def compress(tensor): tensor_uint8 = tf.image.convert_image_dtype(tensor, tf.uint8) return tf.map_fn( lambda x: tf.io.encode_jpeg(x, quality=quality), tensor_uint8, dtype=tf.string, parallel_iterations=tensor.shape[0])
def infer_latent(self, hiddens, y=None, use_mean_y=False): """Performs inference over the latent variable z. Args: hiddens: The shared encoder activations, 4D `Tensor` of size `[B, ...]`. y: Categorical cluster variable, `Tensor` of size `[B, ...]`. use_mean_y: Boolean, whether to take the mean encoding over all y. Returns: The distribution `q(z|x, y)`, which on sample produces tensors of size `[N, B, ...]` where `B` is the batch size of `x` and `y`, and `N` is the number of samples and `...` represents the shape of the latent variables. """ with tf.control_dependencies([tfc.assert_rank(hiddens, 2)]): if y is None: y = tf.to_float(self.infer_cluster(hiddens).mode()) if use_mean_y: # If use_mean_y, then y must be probabilities all_y = tf.tile(tf.expand_dims(tf.one_hot(tf.range(y.shape[1]), y.shape[1]), axis=1), multiples=[1, y.shape[0], 1]) # Compute z KL from x (for all possible y), and keep z's around z_all = tf.map_fn(fn=lambda y: self._latent_encoder( hiddens, y, is_training=self._is_training).mean(), elems=all_y, dtype=tf.float32) return tf.einsum('ij,jik->ik', y, z_all) else: return self._latent_encoder(hiddens, y, is_training=self._is_training)
def _decode_png_instance_masks(self, keys_to_tensors): """Decode PNG instance segmentation masks and stack into dense tensor. The instance segmentation masks are reshaped to [num_instances, height, width]. Args: keys_to_tensors: a dictionary from keys to tensors. Returns: A 3-D float tensor of shape [num_instances, height, width] with values in {0, 1}. """ def decode_png_mask(image_buffer): image = tf.squeeze( tf.image.decode_image(image_buffer, channels=1), axis=2) image.set_shape([None, None]) image = tf.cast(tf.greater(image, 0), dtype=tf.float32) return image png_masks = keys_to_tensors['image/object/mask'] height = keys_to_tensors['image/height'] width = keys_to_tensors['image/width'] if isinstance(png_masks, tf.SparseTensor): png_masks = tf.sparse_tensor_to_dense(png_masks, default_value='') return tf.cond( tf.greater(tf.size(png_masks), 0), lambda: tf.map_fn(decode_png_mask, png_masks, dtype=tf.float32), lambda: tf.zeros(tf.cast(tf.stack([0, height, width]), dtype=tf.int32)))
def image_tensor_input(batch_size, desired_image_size, padding_stride): """Image tensor input.""" desired_image_height, desired_image_width = desired_image_size placeholder = tf.placeholder( dtype=tf.uint8, shape=(batch_size, desired_image_height, desired_image_width, 3)) def _prepare(image): return preprocess_image( convert_image(image), desired_image_size, padding_stride) if batch_size == 1: image = tf.squeeze(placeholder, axis=0) image, image_info = _prepare(image) images = tf.expand_dims(image, axis=0) images_info = tf.expand_dims(image_info, axis=0) else: images, images_info = tf.map_fn( _prepare, placeholder, back_prop=False, dtype=(tf.float32, tf.float32)) return placeholder, {'images': images, 'image_info': images_info}
def batch_image_preprocess(raw_images, image_size: Union[int, Tuple[int, int]], batch_size: int = None): """Preprocess batched images for inference. Args: raw_images: a list of images, each image can be a tensor or a numpy arary. image_size: single integer of image size for square image or tuple of two integers, in the format of (image_height, image_width). batch_size: if None, use map_fn to deal with dynamic batch size. Returns: (image, scale): a tuple of processed images and scales. """ if not batch_size: # map_fn is a little bit slower due to some extra overhead. map_fn = functools.partial(image_preprocess, image_size=image_size) images, scales = tf.map_fn(map_fn, raw_images, dtype=(tf.float32, tf.float32), back_prop=False) return (images, scales) # If batch size is known, use a simple loop. scales, images = [], [] for i in range(batch_size): image, scale = image_preprocess(raw_images[i], image_size) scales.append(scale) images.append(image) images = tf.stack(images) scales = tf.stack(scales) return (images, scales)
def lovasz_softmax(probas, labels, only_present=True, per_image=False, ignore=None, order='BHWC'): """ Multi-class Lovasz-Softmax loss probas: [B, H, W, C] or [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1) labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1) only_present: average only on classes present in ground truth per_image: compute the loss per image instead of per batch ignore: void class labels order: use BHWC or BCHW """ probas = tf.nn.softmax(probas, 3) labels = helpers.reverse_one_hot(labels) if per_image: def treat_image(prob, lab): prob, lab = tf.expand_dims(prob, 0), tf.expand_dims(lab, 0) prob, lab = _flatten_probas(prob, lab, ignore, order) return _lovasz_softmax_flat(prob, lab, only_present=only_present) losses = tf.map_fn(treat_image, (probas, labels), dtype=tf.float32) else: losses = _lovasz_softmax_flat(*_flatten_probas(probas, labels, ignore, order), only_present=only_present) return losses
def _create_targets(self, groundtruth): """ Arguments: groundtruth: a dict with the following keys 'boxes': a float tensor with shape [batch_size, N, 4]. 'num_boxes': an int tensor with shape [batch_size]. Returns: regression_targets: a float tensor with shape [batch_size, num_anchors, 4]. matches: an int tensor with shape [batch_size, num_anchors], `-1` means that an anchor box is negative (background), and `-2` means that we must ignore this anchor box. """ def fn(x): boxes, num_boxes = x boxes = boxes[:num_boxes] regression_targets, matches = get_training_targets( self.anchors, boxes, positives_threshold=POSITIVES_THRESHOLD, negatives_threshold=NEGATIVES_THRESHOLD) return regression_targets, matches with tf.name_scope('target_creation'): regression_targets, matches = tf.map_fn( fn, [groundtruth['boxes'], groundtruth['num_boxes']], dtype=(tf.float32, tf.int32), parallel_iterations=PARALLEL_ITERATIONS, back_prop=False, swap_memory=False, infer_shape=True) return regression_targets, matches
def _encoded_image_string_tensor_input_placeholder(input_shape=None): """Returns input that accepts a batch of PNG or JPEG strings. Args: input_shape: the shape to resize the output decoded images to (optional). Returns: a tuple of input placeholder and the output decoded images. """ batch_image_str_placeholder = tf.placeholder( dtype=tf.string, shape=[None], name='encoded_image_string_tensor') def decode(encoded_image_string_tensor): image_tensor = tf.image.decode_image(encoded_image_string_tensor, channels=3) image_tensor.set_shape((None, None, 3)) if input_shape is not None: image_tensor = tf.image.resize(image_tensor, input_shape[1:3]) return image_tensor return (batch_image_str_placeholder, tf.map_fn(decode, elems=batch_image_str_placeholder, dtype=tf.uint8, parallel_iterations=32, back_prop=False))
def decode_image(key, raw_bytes): """Decodes single or batches of JPEG- or PNG-encoded string tensors. Args: key: String key specified in feature map. raw_bytes: String tensor to decode as JPEG or PNG. Returns: Decoded image tensor with shape specified by tensor spec. Raises: ValueError: If dtype other than uint8 or uint16 is supplied for image specs. """ img_batch_dims = tf.shape(raw_bytes) # The spatial + channel dimensions of a single image, assumed to be the # last 3 entries of the image feature's tensor spec. if len(tensor_spec_dict[key].shape) < 3: raise ValueError( 'Shape of tensor spec for image feature "%s" must ' 'be 3 dimensional (h, w, c), but is %s' % (tensor_spec_dict[key].name, tensor_spec_dict[key].shape)) single_img_dims = tensor_spec_dict[key].shape[-3:] num_channels = single_img_dims[2] if num_channels not in [1, 3]: raise ValueError( 'Last dimension of shape of tensor spec for image ' 'feature "%s" must 1 or 3, but the shape is %s' % (tensor_spec_dict[key].name, tensor_spec_dict[key].shape)) # Collapse (possibly multiple) batch dims to a single batch dim for # decoding purposes. raw_bytes = tf.reshape(raw_bytes, [-1]) data_type = tensor_spec_dict[key].dtype if data_type not in SUPPORTED_PIXEL_ENCODINGS: raise ValueError('Decoding an image requires tensorspec.data_type ' 'to be uint8 or uint16.') def _decode_images(image_bytes): """Decode single image.""" def _zero_image(): return tf.zeros(single_img_dims, dtype=data_type) def _tf_decode_image(): return tf.image.decode_image( image_bytes, channels=num_channels, dtype=data_type) image = tf.cond( tf.equal(image_bytes, ''), _zero_image, _tf_decode_image) image.set_shape(single_img_dims) return image img = tf.map_fn( _decode_images, raw_bytes, dtype=data_type, back_prop=False) img.set_shape(raw_bytes.shape.concatenate(single_img_dims)) # Expand the collapsed batch dim back to the original img_batch_dims. img = tf.reshape(img, tf.concat([img_batch_dims, single_img_dims], 0)) return img