def _test_gradient(device): if device == "gpu" and visible_gpu(): pytest.xfail("no gpu is visible") with NumpySeed(100): with tf.device('/{}:0'.format(device)): sprites, scales, offsets, backgrounds = get_data(random_alpha=True, squash=0.99) sprites_tf = constant_op.constant(sprites) scales_tf = constant_op.constant(scales) offsets_tf = constant_op.constant(offsets) backgrounds_tf = constant_op.constant(backgrounds) images = render_sprites.render_sprites(sprites_tf, scales_tf, offsets_tf, backgrounds_tf) sess = get_session() with sess.as_default(): with tf.device(device): err = gradient_checker.compute_gradient_error( [sprites_tf, scales_tf, offsets_tf, backgrounds_tf], [ sprites.shape, scales.shape, offsets.shape, backgrounds.shape ], images, backgrounds.shape, [sprites, scales, offsets, backgrounds], delta=0.002) print("Jacobian error: {}".format(err)) threshold = 2e-4 assert err < threshold, "Jacobian error ({}) exceeded threshold ({})".format( err, threshold)
def render(self, tensors, background): render_tensors = {} # --- Compute sprite locations from box parameters --- objects = tf.reshape(tensors["objects"], ( self.batch_size, self.HWB, *self.object_shape, self.image_depth + 1, )) obj_img, obj_alpha = tf.split(objects, [3, 1], axis=-1) if "alpha" in self.no_gradient: obj_alpha = tf.stop_gradient(obj_alpha) if "alpha" in self.fixed_values: obj_alpha = float( self.fixed_values["alpha"]) * tf.ones_like(obj_alpha) obj_alpha *= tf.reshape(tensors['obj'], (self.batch_size, self.HWB, 1, 1, 1)) z = tf.reshape(tensors['z'], (self.batch_size, self.HWB, 1, 1, 1)) obj_importance = tf.maximum(obj_alpha * z, 0.01) objects = tf.concat([obj_img, obj_alpha, obj_importance], axis=-1) # --- Compose images --- ys, xs, yt, xt = tensors["ys"], tensors["xs"], tensors["yt"], tensors[ "xt"] scales = tf.concat([ys, xs], axis=-1) scales = tf.reshape(scales, (self.batch_size, self.HWB, 2)) offsets = tf.concat([yt, xt], axis=-1) offsets = tf.reshape(offsets, (self.batch_size, self.HWB, 2)) output = render_sprites.render_sprites(objects, tensors["n_objects"], scales, offsets, background) # --- Store values --- render_tensors['area'] = (ys * float(self.image_height)) * ( xs * float(self.image_width)) render_tensors['output'] = output return render_tensors
def run(device, show_plots, process_data=None, **get_data_kwargs): with NumpySeed(100): data = get_data(**get_data_kwargs) if process_data is None: process_data = lambda *x: x sprites, scales, offsets, backgrounds = process_data(*data) with tf.device('/{}:0'.format(device)): images = render_sprites.render_sprites(sprites, scales, offsets, backgrounds) sess = get_session() result = sess.run(images) result = np.clip(result, 1e-6, 1 - 1e-6) if show_plots: import matplotlib.pyplot as plt fig, (ax1, ax2) = plt.subplots(1, 2) ax1.imshow(result[0]) ax2.imshow(result[1]) plt.show()
def _build_program_interpreter(self, tensors): # --- Get object attributes using object encoder --- max_objects = tensors["max_objects"] yt, xt, ys, xs = tf.split(tensors["normalized_box"], 4, axis=-1) transform_constraints = snt.AffineWarpConstraints.no_shear_2d() warper = snt.AffineGridWarper((self.image_height, self.image_width), self.object_shape, transform_constraints) _boxes = tf.concat( [xs, 2 * (xt + xs / 2) - 1, ys, 2 * (yt + ys / 2) - 1], axis=-1) _boxes = tf.reshape(_boxes, (self.batch_size * max_objects, 4)) grid_coords = warper(_boxes) grid_coords = tf.reshape(grid_coords, ( self.batch_size, max_objects, *self.object_shape, 2, )) glimpse = tf.contrib.resampler.resampler(tensors["inp"], grid_coords) object_encoder_in = tf.reshape(glimpse, (self.batch_size * max_objects, *self.object_shape, self.image_depth)) attr = self.object_encoder(object_encoder_in, (1, 1, 2 * self.A), self.is_training) attr = tf.reshape(attr, (self.batch_size, max_objects, 2 * self.A)) attr_mean, attr_log_std = tf.split(attr, [self.A, self.A], axis=-1) attr_std = tf.exp(attr_log_std) if not self.noisy: attr_std = tf.zeros_like(attr_std) attr, attr_kl = normal_vae(attr_mean, attr_std, self.attr_prior_mean, self.attr_prior_std) object_decoder_in = tf.reshape( attr, (self.batch_size * max_objects, 1, 1, self.A)) # --- Compute sprites from attr using object decoder --- object_logits = self.object_decoder( object_decoder_in, self.object_shape + (self.image_depth, ), self.is_training) objects = tf.nn.sigmoid(tf.clip_by_value(object_logits, -10., 10.)) objects = tf.reshape(objects, ( self.batch_size, max_objects, *self.object_shape, self.image_depth, )) alpha = tensors["obj"][:, :, :, None, None] * tf.ones_like( objects[:, :, :, :, :1]) importance = tf.ones_like(objects[:, :, :, :, :1]) objects = tf.concat([objects, alpha, importance], axis=-1) # -- Reconstruct image --- scales = tf.concat([ys, xs], axis=-1) scales = tf.reshape(scales, (self.batch_size, max_objects, 2)) offsets = tf.concat([yt, xt], axis=-1) offsets = tf.reshape(offsets, (self.batch_size, max_objects, 2)) output = render_sprites.render_sprites(objects, tensors["n_objects"], scales, offsets, tensors["background"]) return dict(output=output, glimpse=tf.reshape(glimpse, (self.batch_size, max_objects, *self.object_shape, self.image_depth)), attr=tf.reshape(attr, (self.batch_size, max_objects, self.A)), attr_kl=tf.reshape(attr_kl, (self.batch_size, max_objects, self.A)), objects=tf.reshape(objects, ( self.batch_size, max_objects, *self.object_shape, self.image_depth, )))
for s in sprites ] scales_ph = [ tf.placeholder(tf.float32, (None, *s.shape[1:])) for s in scales ] offsets_ph = [ tf.placeholder(tf.float32, (None, *s.shape[1:])) for s in offsets ] backgrounds_ph = tf.placeholder(tf.float32, (None, *backgrounds.shape[1:])) with tf.device('/{}:0'.format(device)): images = render_sprites.render_sprites(sprites_ph, scales_ph, offsets_ph, backgrounds_ph) d = {} d.update({ph: a for ph, a in zip(sprites_ph, sprites)}) d.update({ph: a for ph, a in zip(scales_ph, scales)}) d.update({ph: a for ph, a in zip(offsets_ph, offsets)}) d[backgrounds_ph] = backgrounds result = sess.run(images, feed_dict=d) from dps.utils import image_to_string print(image_to_string(result[0, ..., 0])) print() print(image_to_string(result[0, ..., 1])) print() print(image_to_string(result[0, ..., 2]))
def _call(self, objects, background, is_training, appearance_only=False): if not self.initialized: self.image_depth = tf_shape(background)[-1] self.maybe_build_subnet("object_decoder") # --- compute sprite appearance from attr using object decoder --- appearance_logit = apply_object_wise( self.object_decoder, objects.attr, output_size=self.object_shape + (self.image_depth+1,), is_training=is_training) appearance_logit = appearance_logit * ([self.color_logit_scale] * 3 + [self.alpha_logit_scale]) appearance_logit = appearance_logit + ([0.] * 3 + [self.alpha_logit_bias]) appearance = tf.nn.sigmoid(tf.clip_by_value(appearance_logit, -10., 10.)) if appearance_only: return dict(appearance=appearance) appearance_for_output = appearance batch_size, *obj_leading_shape, _, _, _ = tf_shape(appearance) n_objects = np.prod(obj_leading_shape) appearance = tf.reshape( appearance, (batch_size, n_objects, *self.object_shape, self.image_depth+1)) obj_colors, obj_alpha = tf.split(appearance, [self.image_depth, 1], axis=-1) obj_alpha *= tf.reshape(objects.render_obj, (batch_size, n_objects, 1, 1, 1)) z = tf.reshape(objects.z, (batch_size, n_objects, 1, 1, 1)) obj_importance = tf.maximum(obj_alpha * z / self.importance_temp, 0.01) object_maps = tf.concat([obj_colors, obj_alpha, obj_importance], axis=-1) *_, image_height, image_width, _ = tf_shape(background) yt, xt, ys, xs = coords_to_image_space( objects.yt, objects.xt, objects.ys, objects.xs, (image_height, image_width), self.anchor_box, top_left=True) scales = tf.concat([ys, xs], axis=-1) scales = tf.reshape(scales, (batch_size, n_objects, 2)) offsets = tf.concat([yt, xt], axis=-1) offsets = tf.reshape(offsets, (batch_size, n_objects, 2)) # --- Compose images --- n_objects_per_image = tf.fill((batch_size,), int(n_objects)) output = render_sprites.render_sprites( object_maps, n_objects_per_image, scales, offsets, background ) return dict( appearance=appearance_for_output, output=output)
def _call(self, objects, background, is_training, appearance_only=False): if not self.initialized: self.image_depth = tf_shape(background)[-1] self.maybe_build_subnet("object_decoder") # --- compute sprite appearance from attr using object decoder --- appearance_logits = apply_object_wise( self.object_decoder, objects.attr, self.object_shape + (self.image_depth + 1, ), is_training) appearance_logits = appearance_logits * ([self.color_logit_scale] * 3 + [self.alpha_logit_scale]) appearance_logits = appearance_logits + ([0.] * 3 + [self.alpha_logit_bias]) appearance = tf.nn.sigmoid( tf.clip_by_value(appearance_logits, -10., 10.)) if appearance_only: return dict(appearance=appearance) appearance_for_output = appearance batch_size, *obj_leading_shape, _, _, _ = tf_shape(appearance) n_objects = np.prod(obj_leading_shape) appearance = tf.reshape( appearance, (batch_size, n_objects, *self.object_shape, self.image_depth + 1)) obj_colors, obj_alpha = tf.split(appearance, [self.image_depth, 1], axis=-1) if "alpha" in self.no_gradient: obj_alpha = tf.stop_gradient(obj_alpha) if "alpha" in self.fixed_values: obj_alpha = float( self.fixed_values["alpha"]) * tf.ones_like(obj_alpha) obj_alpha *= tf.reshape(objects.obj, (batch_size, n_objects, 1, 1, 1)) z = tf.reshape(objects.z, (batch_size, n_objects, 1, 1, 1)) obj_importance = tf.maximum(obj_alpha * z, 0.01) object_maps = tf.concat([obj_colors, obj_alpha, obj_importance], axis=-1) ys, xs, yt, xt = objects.ys, objects.xs, objects.yt, objects.xt scales = tf.concat([ys, xs], axis=-1) scales = tf.reshape(scales, (batch_size, n_objects, 2)) offsets = tf.concat([yt, xt], axis=-1) offsets = tf.reshape(offsets, (batch_size, n_objects, 2)) # --- Compose images --- n_objects_per_image = tf.fill((batch_size, ), int(n_objects)) output = render_sprites.render_sprites(object_maps, n_objects_per_image, scales, offsets, background) return dict(appearance=appearance_for_output, output=output)
def _call(self, objects, background, is_training, appearance_only=False, mask_only=False): """ If mask_only==True, then we ignore the provided background, using a black blackground instead, and also ignore the computed appearance, using all-white appearances instead. """ if not self.initialized: self.image_depth = tf_shape(background)[-1] single = False if isinstance(objects, dict): single = True objects = [objects] _object_maps = [] _scales = [] _offsets = [] _appearance = [] for i, obj in enumerate(objects): anchor_box = self.anchor_boxes[i] object_shape = self.object_shapes[i] object_decoder = self.maybe_build_subnet( "object_decoder_for_flight_{}".format(i), builder_name='build_object_decoder') # --- compute sprite appearance from attr using object decoder --- appearance_logit = apply_object_wise( object_decoder, obj.attr, output_size=object_shape + (self.image_depth+1,), is_training=is_training) appearance_logit = appearance_logit * ([self.color_logit_scale] * self.image_depth + [self.alpha_logit_scale]) appearance_logit = appearance_logit + ([0.] * self.image_depth + [self.alpha_logit_bias]) appearance = tf.nn.sigmoid(tf.clip_by_value(appearance_logit, -10., 10.)) _appearance.append(appearance) if appearance_only: continue batch_size, *obj_leading_shape, _, _, _ = tf_shape(appearance) n_objects = np.prod(obj_leading_shape) appearance = tf.reshape( appearance, (batch_size, n_objects, *object_shape, self.image_depth+1)) obj_colors, obj_alpha = tf.split(appearance, [self.image_depth, 1], axis=-1) if mask_only: obj_colors = tf.ones_like(obj_colors) obj_alpha *= tf.reshape(obj.obj, (batch_size, n_objects, 1, 1, 1)) z = tf.reshape(obj.z, (batch_size, n_objects, 1, 1, 1)) obj_importance = tf.maximum(obj_alpha * z / self.importance_temp, 0.01) object_maps = tf.concat([obj_colors, obj_alpha, obj_importance], axis=-1) *_, image_height, image_width, _ = tf_shape(background) yt, xt, ys, xs = coords_to_image_space( obj.yt, obj.xt, obj.ys, obj.xs, (image_height, image_width), anchor_box, top_left=True) scales = tf.concat([ys, xs], axis=-1) scales = tf.reshape(scales, (batch_size, n_objects, 2)) offsets = tf.concat([yt, xt], axis=-1) offsets = tf.reshape(offsets, (batch_size, n_objects, 2)) _object_maps.append(object_maps) _scales.append(scales) _offsets.append(offsets) if single: _appearance = _appearance[0] if appearance_only: return dict(appearance=_appearance) if mask_only: background = tf.zeros_like(background) # --- Compose images --- output = render_sprites.render_sprites( _object_maps, _scales, _offsets, background ) return dict( appearance=_appearance, output=output)