def _gif_and_image_summary(name, images, fps, saturate=False, step=None): images = tf.image.convert_image_dtype(images, tf.uint8, saturate=saturate) output = tf.concat(tf.unstack(images), axis=2)[None] gif_utils.gif_summary_v2(name, output, 1, fps, step=step) output = tf.concat(tf.unstack(images), axis=2) output = tf.concat(tf.unstack(output), axis=0)[None] tf.contrib.summary.image(name, output, step=step)
def true_fn(images): if augment_entire_batch: image_2 = images mean_color = tf.reduce_mean(image_2, axis=[1, 2], keepdims=True) print(mean_color.shape) else: image_1, image_2 = tf.unstack(images) mean_color = tf.reduce_mean(image_2, axis=[0, 1], keepdims=True) def body(var_img, mean_color): x0 = tf.random.uniform([], 0, width, dtype=tf.int32) y0 = tf.random.uniform([], 0, height, dtype=tf.int32) dx = tf.random.uniform([], min_size, max_size, dtype=tf.int32) dy = tf.random.uniform([], min_size, max_size, dtype=tf.int32) x = tf.range(width) x_mask = (x0 <= x) & (x < x0+dx) y = tf.range(height) y_mask = (y0 <= y) & (y < y0+dy) mask = x_mask & y_mask[:, tf.newaxis] mask = tf.cast(mask[:, :, tf.newaxis], image_2.dtype) result = var_img * (1 - mask) + mean_color * mask return result # Perform at least one erase operation. image_2 = body(image_2, mean_color) # Perform additional erase operations. for _ in range(max_operations - 1): perform_erase = tf.less( tf.random.uniform([]), probability_additional_operations) image_2 = tf.cond(perform_erase, lambda: body(image_2, mean_color), lambda: image_2) if augment_entire_batch: images = image_2 else: images = tf.stack([image_1, image_2]) return images
def filter_before_first_step(time_steps, actions=None): flat_time_steps = tf.nest.flatten(time_steps) flat_time_steps = [tf.unstack(time_step, axis=1) for time_step in flat_time_steps] time_steps = [tf.nest.pack_sequence_as(time_steps, time_step) for time_step in zip(*flat_time_steps)] if actions is None: actions = [None] * len(time_steps) else: actions = tf.unstack(actions, axis=1) assert len(time_steps) == len(actions) time_steps = list(reversed(time_steps)) actions = list(reversed(actions)) filtered_time_steps = [] filtered_actions = [] for t, (time_step, action) in enumerate(zip(time_steps, actions)): if t == 0: reset_mask = tf.equal(time_step.step_type, ts.StepType.FIRST) else: time_step = tf.nest.map_structure(lambda x, y: tf.where(reset_mask, x, y), last_time_step, time_step) action = tf.where(reset_mask, tf.zeros_like(action), action) if action is not None else None filtered_time_steps.append(time_step) filtered_actions.append(action) reset_mask = tf.logical_or( reset_mask, tf.equal(time_step.step_type, ts.StepType.FIRST)) last_time_step = time_step filtered_time_steps = list(reversed(filtered_time_steps)) filtered_actions = list(reversed(filtered_actions)) filtered_flat_time_steps = [tf.nest.flatten(time_step) for time_step in filtered_time_steps] filtered_flat_time_steps = [tf.stack(time_step, axis=1) for time_step in zip(*filtered_flat_time_steps)] filtered_time_steps = tf.nest.pack_sequence_as(filtered_time_steps[0], filtered_flat_time_steps) if action is None: return filtered_time_steps else: actions = tf.stack(filtered_actions, axis=1) return filtered_time_steps, actions
def _make_masked_autoregressive_shift_and_log_scale_fn( masked_autoregressive_model): """Returns a function that computes shift and log-scale coefficients. Args: masked_autoregressive_model: A Keras model that computes the fprop. Returns: A function that computes shift and log-scale coefficients. """ if self.num_params_per_input == 1: return lambda x: (masked_autoregressive_model(x)[Ellipsis, 0], None) else: return lambda x: tf.unstack(masked_autoregressive_model(x), axis=-1)
def _get_joint_loss_outputs(self, inputs): outputs = [] for id_of_model, model in self.ids_to_models.items(): outputs.append( model(self._get_model_inputs(id_of_model, inputs), apply_projection_layer=False)) outputs = tf.stack(outputs) outputs = tf.transpose(outputs, perm=[1, 0, 2]) outputs = self.dropout_layer(outputs) outputs = self.transformer_layer(outputs) outputs = tf.transpose(outputs, perm=[1, 0, 2]) outputs = tf.unstack(outputs) outputs = self._project_with_submodels(outputs) outputs = tf.reduce_sum(outputs, axis=0) return outputs
def random_scale_second(images, flow=None, mask=None, min_scale=1.0, max_scale=1.0): """Performs a random scaling on the second image in the given range.""" # choose a random scale factor and compute new resolution orig_height = tf.shape(images)[-3] orig_width = tf.shape(images)[-2] scale = tf.random.uniform([], minval=min_scale, maxval=max_scale, dtype=tf.float32) new_height = tf.cast( tf.math.ceil(tf.cast(orig_height, tf.float32) * scale), tf.int32) new_width = tf.cast(tf.math.ceil(tf.cast(orig_width, tf.float32) * scale), tf.int32) # rescale only the second image image_1, image_2 = tf.unstack(images) image_2 = uflow_utils.resize(image_2, new_height, new_width, is_flow=False) # crop either first or second image to have matching dimensions if scale < 1.0: image_1 = _center_crop(image_1, new_height, new_width) else: image_2 = _center_crop(image_2, orig_height, orig_width) images = tf.stack([image_1, image_2]) if flow is not None: # get current locations (with the origin in the image center) positions = _positions_center_origin(orig_height, orig_width) # compute scale factor of the actual new image resolution scale_flow_h = tf.cast(new_height, tf.float32) / tf.cast( orig_height, tf.float32) scale_flow_w = tf.cast(new_width, tf.float32) / tf.cast( orig_width, tf.float32) scale_flow = tf.stack([scale_flow_h, scale_flow_w]) # compute augmented flow (multiply by mask to zero invalid flow locations) flow = ((positions + flow) * scale_flow - positions) * mask if scale < 1.0: # in case we downsample the image we crop the reference image to keep the # same shape flow = _center_crop(flow, new_height, new_width) mask = _center_crop(mask, new_height, new_width) return images, flow, mask
def true_fn(images, flow, mask): # choose a random scale factor and compute new resolution orig_height = tf.shape(images)[-3] orig_width = tf.shape(images)[-2] new_height, new_width, scale = _get_random_scaled_resolution( orig_height=orig_height, orig_width=orig_width, min_scale=min_scale, max_scale=max_scale, max_strech=0.0, probability_strech=0.0) # rescale only the second image image_1, image_2 = tf.unstack(images) image_2 = smurf_utils.resize(image_2, new_height, new_width, is_flow=False) # Crop either first or second image to have matching dimensions if scale < 1.0: image_1 = _center_crop(image_1, new_height, new_width) else: image_2 = _center_crop(image_2, orig_height, orig_width) images = tf.stack([image_1, image_2]) if flow is not None: # get current locations (with the origin in the image center) positions = _positions_center_origin(orig_height, orig_width) # compute scale factor of the actual new image resolution scale_flow_h = tf.cast(new_height, tf.float32) / tf.cast( orig_height, tf.float32) scale_flow_w = tf.cast(new_width, tf.float32) / tf.cast( orig_width, tf.float32) scale_flow = tf.stack([scale_flow_h, scale_flow_w]) # compute augmented flow (multiply by mask to zero invalid flow locations) flow = ((positions + flow) * scale_flow - positions) * mask if scale < 1.0: # in case we downsample the image we crop the reference image to keep # the same shape flow = _center_crop(flow, new_height, new_width) mask = _center_crop(mask, new_height, new_width) return images, flow, mask
def true_fn(images): image_1, image_2 = tf.unstack(images) image_1 = tf.image.random_brightness(image_1, max_delta) image_2 = tf.image.random_brightness(image_2, max_delta) return tf.stack([image_1, image_2])
def true_fn(images): image_1, image_2 = tf.unstack(images) image_1 = tf.image.random_contrast(image_1, min_bound, max_bound) image_2 = tf.image.random_contrast(image_2, min_bound, max_bound) return tf.stack([image_1, image_2])
def true_fn(images, flow, mask): angle_radian = tf.random.uniform( [], minval=-max_rotation, maxval=max_rotation, dtype=tf.float32) * math.pi / 180.0 image_1, image_2 = tf.unstack(images) image_2 = rotate(image_2, angle_radian, is_flow=False, mask=None) images = tf.stack([image_1, image_2]) if not_empty_crop: orig_height = tf.shape(images)[-3] orig_width = tf.shape(images)[-2] # introduce abbreviations for shorter notation cos = tf.math.cos(angle_radian % math.pi) sin = tf.math.sin(angle_radian % math.pi) h = tf.cast(orig_height, tf.float32) w = tf.cast(orig_width, tf.float32) # compute required scale factor scale = tf.cond(tf.math.less(angle_radian % math.pi, math.pi/2.0), lambda: tf.math.maximum((w/h)*sin+cos, (h/w)*sin+cos), lambda: tf.math.maximum((w/h)*sin-cos, (h/w)*sin-cos)) new_height = tf.math.floor(h / scale) new_width = tf.math.floor(w / scale) # crop image again to original size offset_height = tf.cast((h-new_height)/2, tf.int32) offset_width = tf.cast((w-new_width)/2, tf.int32) images = tf.image.crop_to_bounding_box( images, offset_height=offset_height, offset_width=offset_width, target_height=tf.cast(new_height, tf.int32), target_width=tf.cast(new_width, tf.int32)) if flow is not None: # get current locations (with the origin in the image center) positions = _positions_center_origin(orig_height, orig_width) # compute augmented flow (multiply by mask to zero invalid flow locations) cos = tf.math.cos(angle_radian) sin = tf.math.sin(angle_radian) rotation_matrix = tf.reshape([cos, sin, -sin, cos], [2, 2]) flow = (tf.linalg.matmul( (positions + flow), rotation_matrix) - positions) * mask if not_empty_crop: # crop flow and mask again to original size flow = tf.image.crop_to_bounding_box( flow, offset_height=offset_height, offset_width=offset_width, target_height=tf.cast(new_height, tf.int32), target_width=tf.cast(new_width, tf.int32)) mask = tf.image.crop_to_bounding_box( mask, offset_height=offset_height, offset_width=offset_width, target_height=tf.cast(new_height, tf.int32), target_width=tf.cast(new_width, tf.int32)) return images, flow, mask
def random_crop(images, flow, mask, crop_height, crop_width, relative_offset, probability_crop_offset): """Performs a random crop with the given height and width.""" # early return if crop_height or crop_width is not specified if crop_height is None or crop_width is None: return images, flow, mask orig_height = tf.shape(images)[-3] orig_width = tf.shape(images)[-2] # check if crop size fits the image size scale = 1.0 ratio = tf.cast(crop_height, tf.float32) / tf.cast(orig_height, tf.float32) scale = tf.math.maximum(scale, ratio) ratio = tf.cast(crop_width, tf.float32) / tf.cast(orig_width, tf.float32) scale = tf.math.maximum(scale, ratio) # compute minimum required hight new_height = tf.cast( tf.math.ceil(tf.cast(orig_height, tf.float32) * scale), tf.int32) new_width = tf.cast( tf.math.ceil(tf.cast(orig_width, tf.float32) * scale), tf.int32) # perform resize (scales with 1 if not required) images = smurf_utils.resize(images, new_height, new_width, is_flow=False) # compute joint offset max_offset_h = new_height - tf.cast(crop_height, dtype=tf.int32) max_offset_w = new_width - tf.cast(crop_width, dtype=tf.int32) joint_offset_h = tf.random.uniform([], maxval=max_offset_h+1, dtype=tf.int32) joint_offset_w = tf.random.uniform([], maxval=max_offset_w+1, dtype=tf.int32) # compute relative offset min_relative_offset_h = tf.math.maximum( joint_offset_h - relative_offset, 0) max_relative_offset_h = tf.math.minimum( joint_offset_h + relative_offset, max_offset_h) min_relative_offset_w = tf.math.maximum( joint_offset_w - relative_offset, 0) max_relative_offset_w = tf.math.minimum( joint_offset_w + relative_offset, max_offset_w) relative_offset_h = tf.random.uniform( [], minval=min_relative_offset_h, maxval=max_relative_offset_h+1, dtype=tf.int32) relative_offset_w = tf.random.uniform( [], minval=min_relative_offset_w, maxval=max_relative_offset_w+1, dtype=tf.int32) set_crop_offset = tf.random.uniform([]) < probability_crop_offset relative_offset_h = tf.cond( set_crop_offset, lambda: relative_offset_h, lambda: joint_offset_h) relative_offset_w = tf.cond( set_crop_offset, lambda: relative_offset_w, lambda: joint_offset_w) # crop both images image_1, image_2 = tf.unstack(images) image_1 = tf.image.crop_to_bounding_box( image_1, offset_height=joint_offset_h, offset_width=joint_offset_w, target_height=crop_height, target_width=crop_width) image_2 = tf.image.crop_to_bounding_box( image_2, offset_height=relative_offset_h, offset_width=relative_offset_w, target_height=crop_height, target_width=crop_width) images = tf.stack([image_1, image_2]) if flow is not None: # perform resize (scales with 1 if not required) flow, mask = smurf_utils.resize( flow, new_height, new_width, is_flow=True, mask=mask) # crop flow and mask flow = tf.image.crop_to_bounding_box( flow, offset_height=joint_offset_h, offset_width=joint_offset_w, target_height=crop_height, target_width=crop_width) mask = tf.image.crop_to_bounding_box( mask, offset_height=joint_offset_h, offset_width=joint_offset_w, target_height=crop_height, target_width=crop_width) # correct flow for relative shift (/crop) flow_delta = tf.stack( [tf.cast(relative_offset_h - joint_offset_h, tf.float32), tf.cast(relative_offset_w - joint_offset_w, tf.float32)]) flow = (flow - flow_delta) * mask return images, flow, mask, joint_offset_h, joint_offset_w
def true_fn(images): image_1, image_2 = tf.unstack(images) image_1 = potential_asymmetric_augmentations(image_1) image_2 = potential_asymmetric_augmentations(image_2) return tf.stack([image_1, image_2])
def photometric_augmentation(images, augment_color_swap=True, augment_hue_shift=True, augment_saturation=False, augment_brightness=False, augment_contrast=False, augment_gaussian_noise=False, augment_brightness_individual=False, augment_contrast_individual=False, max_delta_hue=0.5, min_bound_saturation=0.8, max_bound_saturation=1.2, max_delta_brightness=0.1, min_bound_contrast=0.8, max_bound_contrast=1.2, min_bound_gaussian_noise=0.0, max_bound_gaussian_noise=0.02, max_delta_brightness_individual=0.02, min_bound_contrast_individual=0.95, max_bound_contrast_individual=1.05): """Applies photometric augmentations to an image pair.""" # Randomly permute colors by rolling and reversing. # This covers all permutations. if augment_color_swap: r = tf.random.uniform([], maxval=3, dtype=tf.int32) images = tf.roll(images, r, axis=-1) r = tf.equal(tf.random.uniform([], maxval=2, dtype=tf.int32), 1) images = tf.cond(pred=r, true_fn=lambda: tf.reverse(images, axis=[-1]), false_fn=lambda: images) if augment_hue_shift: images = tf.image.random_hue(images, max_delta_hue) if augment_saturation: images = tf.image.random_saturation( images, min_bound_saturation, max_bound_saturation) if augment_brightness: images = tf.image.random_brightness(images, max_delta_brightness) if augment_contrast: images = tf.image.random_contrast( images, min_bound_contrast, max_bound_contrast) if augment_gaussian_noise: sigma = tf.random.uniform([], minval=min_bound_gaussian_noise, maxval=max_bound_gaussian_noise, dtype=tf.float32) noise = tf.random.normal( tf.shape(input=images), stddev=sigma, dtype=tf.float32) images = images + noise # perform relative photometric augmentation (individually per image) image_1, image_2 = tf.unstack(images) if augment_brightness_individual: image_1 = tf.image.random_contrast( image_1, min_bound_contrast_individual, max_bound_contrast_individual) image_2 = tf.image.random_contrast( image_2, min_bound_contrast_individual, max_bound_contrast_individual) if augment_contrast_individual: image_1 = tf.image.random_brightness( image_1, max_delta_brightness_individual) image_2 = tf.image.random_brightness( image_2, max_delta_brightness_individual) # crop values to ensure values in [0,1] (some augmentations can violate this) image_1 = tf.clip_by_value(image_1, 0.0, 1.0) image_2 = tf.clip_by_value(image_2, 0.0, 1.0) return tf.stack([image_1, image_2])