def generator_setup(self): self.net_Ens = [] self.net_Des = [] self.optimizer_Ens = [] self.optimizer_Des = [] for _idxmc in range(0, self.opt.n_G): net_En = Encoder(self.opt.isize, self.opt.nz, self.opt.nc, self.opt.ngf, self.opt.ngpu, self.opt.extralayers).to('cuda') net_De = Decoder(self.opt.isize, self.opt.nz, self.opt.nc, self.opt.ngf, self.opt.ngpu, self.opt.extralayers).to('cuda') # TODO: initialized weight with prior N(0, 0.02) [From bayesian GAN] net_En.apply(weights_init) net_De.apply(weights_init) optimizer_En = torch.optim.Adam(net_En.parameters(), lr=self.opt.lr, betas=(self.opt.beta1, 0.999)) optimizer_De = torch.optim.Adam(net_De.parameters(), lr=self.opt.lr, betas=(self.opt.beta1, 0.999)) self.net_Ens.append(net_En) self.net_Des.append(net_De) self.optimizer_Ens.append(optimizer_En) self.optimizer_Des.append(optimizer_De)
def __init__(self, zdim, point_dim, use_deterministic_encoder=False): super(Rot_Encoder, self).__init__() self.use_deterministic_encoder = use_deterministic_encoder self.zdim = zdim self.point_dim = point_dim self.transform = STN3d() self.feature = Encoder(self.zdim, self.point_dim, self.use_deterministic_encoder)
def __init__(self, input_shape, latent_size=100, n_filters=64, n_extra_layers=0, **kwargs): kwargs['name'] = type(self).__name__ super().__init__(**kwargs) # Use the DCGAN encoder/decoder models self.net_enc = Encoder(input_shape, latent_size, n_filters, n_extra_layers, name='encoder').model self.net_dec = Decoder(input_shape, latent_size, n_filters, n_extra_layers, name='decoder').model
def __init__(self, input_shape, latent_size=100, n_filters=64, n_extra_layers=0, **kwargs): kwargs['name'] = type(self).__name__ super().__init__(**kwargs) # Use the DCGAN encoder/decoder models encoder = Encoder(input_shape, latent_size, n_filters, n_extra_layers, name='encoder').model decoder = Decoder(input_shape, latent_size, n_filters, n_extra_layers, name='decoder').model # build the encoder as simple sequential self.net_enc = tf.keras.Sequential( [ # drop the latent convolution layer (last layer) from the encoder *encoder.layers[:-1], # preprocess before variational sampling tf.keras.layers.Flatten(name='encoder_flatten'), tf.keras.layers.Dense( latent_size, activation='relu', name='encoder_fc') ], name='encoder') decoder_input_size = self.net_enc.layers[-3].output_shape[1:] # build the decoder as simple sequential self.net_dec = tf.keras.Sequential( [ # postprocess after variational sampling tf.keras.layers.Dense(np.prod(decoder_input_size), activation='relu', name='decoder_fc'), tf.keras.layers.Reshape(decoder_input_size, name='decoder_reshape'), # drop the latent convolution layer with normalization and activation # (first three layers) from the decoder *decoder.layers[3:] ], name='decoder') # Use input size as reconstruction loss weight self.loss_weight = tf.cast(tf.math.reduce_prod(input_shape), tf.float32)
def __init__(self, input_shape, latent_size=100, n_filters=64, n_extra_layers=0, **kwargs): """GANomaly Generator Model Args: input_shape (tuple): shape of one input datum (without batch size) latent_size (int, optional): Size of the decoder input or of the latent space. Defaults to 100. n_filters (int, optional): Filter count of the initial convolution layer. Defaults to 64. n_extra_layers (int, optional): Count of additional layers. Defaults to 0. """ kwargs['name'] = type(self).__name__ super().__init__(**kwargs) model = Encoder(input_shape, 1, n_filters, n_extra_layers).model layers = list(model.layers) self.features = tf.keras.Sequential(layers[:-1], name='features') self.classifier = tf.keras.Sequential(layers[-1], name='classifier') self.classifier.add(tf.keras.layers.Reshape((1,))) self.classifier.add(tf.keras.layers.Activation('sigmoid'))
def into_depth_and_rgb_block(self, raw_src_image, raw_src_depth, pose): b, h, w, _ = raw_src_image.get_shape().as_list() z_size = 856 z_geo_size = 600 with tf.name_scope('preprocessing'): src_image = self.image2tensor(raw_src_image) if len(raw_src_depth.get_shape()) != 4: src_depth = tf.expand_dims(raw_src_depth, axis=3) else: src_depth = raw_src_depth # self.manual_check = pose with tf.name_scope('concat_rgbd'): #conv_depth = conv2d(raw_src_depth, 32, is_train=True, k_h=3, k_w=3, s=1) #conv_rgb = conv2d(src_image, 32*3, is_train=True, k_h=3, k_w=3, s=1) input_rgbd = tf.concat([src_image, src_depth], axis=3) with tf.name_scope('Encoder'): z_enc_out = Encoder(input_rgbd, num_outputs=z_size, reuse_weights=tf.AUTO_REUSE) _, z_h, z_w, _ = z_enc_out.get_shape().as_list() # print('encoder out', z_enc_out) # transform latent vector z_geo = tf.reshape(z_enc_out[:, :, :, :z_geo_size], [b, -1, 4]) z_app = z_enc_out[:, :, :, z_geo_size:] # print('z geo', z_geo) # print('z app', z_app) z_geo_tf = tf.matmul(z_geo, pose) # print('z geo tf', z_geo_tf) # print('inv_RT', inv_RT) z_geo_tf = tf.reshape( z_geo_tf, [b, 1, 1, 600]) # TODO: solving z_h and z_w values z_tf = tf.concat([z_geo_tf, z_app], axis=3) with tf.name_scope('Depth'): if self.data == 'car': depth_bias = 2 depth_scale = 1.0 # self.depth_scale_vis = 125. / depth_scale # self.depth_bias_vis = depth_bias - depth_scale depth_dec_out = Decoder(z_geo_tf, 1, variable_scope='Depth_Decoder', reuse_weights=tf.AUTO_REUSE) depth_pred = depth_scale * tf.nn.tanh(depth_dec_out) + depth_bias with tf.name_scope('Pixel'): pixel_dec_out = Decoder(z_tf, 3, variable_scope='Pixel_Decoder', reuse_weights=tf.AUTO_REUSE) pixel_pred = tf.nn.tanh(pixel_dec_out) # print('pixel pred', pixel_pred) # with tf.name_scope('prediction'): # warped_pred = projective_inverse_warp(src_image, tf.squeeze(depth_pred), RT, intrinsic, ret_flows=False) # print('warped pred', warped_pred) # tgt_img_tf = projective_inverse_warp(src_image, raw_tgt_depth, RT, intrinsic, ret_flows=False) # Collect output tensors pred = {} pred['out_depth'] = depth_pred pred['out_pixel'] = pixel_pred # pred['warped_image'] = warped_pred # pred['inverse_warping_image'] = tgt_img_tf # pred['tgt_image'] = fake_tgt return pred
def infer_tgt_views(self, raw_src_image, RT, intrinsic): b, h, w, _ = raw_src_image.get_shape().as_list() z_size = 856 with tf.name_scope('preprocessing'): src_image = self.image2tensor(raw_src_image) self.manual_check = RT RT, inv_RT = self.reshape_posematrix(RT) with tf.name_scope('Encoder'): z_enc_out = Encoder(src_image, num_outputs=z_size) _, z_h, z_w, _ = z_enc_out.get_shape().as_list() # print('encoder out', z_enc_out) # transform latent vector z_geo = tf.reshape(z_enc_out[:, :, :, :600], [b, -1, 4]) z_app = z_enc_out[:, :, :, 600:] # print('z geo', z_geo) # print('z app', z_app) z_geo_tf = tf.matmul(z_geo, inv_RT) # print('z geo tf', z_geo_tf) # print('inv_RT', inv_RT) z_geo_tf = tf.reshape( z_geo_tf, [b, 1, 1, 600]) # TODO: solving z_h and z_w values z_tf = tf.concat([z_geo_tf, z_app], axis=3) with tf.name_scope('Depth'): if self.data == 'car': depth_bias = 2 depth_scale = 1.0 # self.depth_scale_vis = 125. / depth_scale # self.depth_bias_vis = depth_bias - depth_scale depth_dec_out = Decoder(z_geo_tf, 1, variable_scope='Depth_Decoder') depth_pred = depth_scale * tf.nn.tanh(depth_dec_out) + depth_bias with tf.name_scope('Mask'): mask_dec_out = Decoder(z_geo_tf, 1, variable_scope='Mask_Decoder') mask_pred = tf.nn.sigmoid(mask_dec_out) # print('mask pred', mask_pred) with tf.name_scope('Pixel'): pixel_dec_out = Decoder(z_tf, 3, variable_scope='Pixel_Decoder') pixel_pred = tf.nn.tanh(pixel_dec_out) # print('pixel pred', pixel_pred) with tf.name_scope('prediction'): warped_pred = projective_inverse_warp(src_image, tf.squeeze(depth_pred), RT, intrinsic, ret_flows=False) # print('warped pred', warped_pred) fake_tgt = tf.multiply(pixel_pred, mask_pred) + tf.multiply( warped_pred, 1 - mask_pred) # Collect output tensors pred = {} pred['out_depth'] = depth_pred pred['out_mask'] = mask_pred pred['out_pixel'] = pixel_pred pred['warped_image'] = warped_pred pred['tgt_image'] = fake_tgt return pred
def __init__(self, input_shape, latent_size=100, n_filters=64, n_extra_layers=0, **kwargs): kwargs['name'] = type(self).__name__ super().__init__(**kwargs) self.encoder_i = Encoder(input_shape, latent_size, n_filters, n_extra_layers, name='encoder_i').model self.decoder = Decoder(input_shape, latent_size, n_filters, n_extra_layers, name='decoder').model self.encoder_o = Encoder(input_shape, latent_size, n_filters, n_extra_layers, name='encoder_o').model
class Generator(tf.keras.Model): """GANomaly Generator Model Args: input_shape (tuple): shape of one input datum (without batch size) latent_size (int, optional): Size of the decoder input or of the latent space. Defaults to 100. n_filters (int, optional): Filter count of the initial convolution layer. Defaults to 64. n_extra_layers (int, optional): Count of additional layers. Defaults to 0. """ def __init__(self, input_shape, latent_size=100, n_filters=64, n_extra_layers=0, **kwargs): kwargs['name'] = type(self).__name__ super().__init__(**kwargs) self.encoder_i = Encoder(input_shape, latent_size, n_filters, n_extra_layers, name='encoder_i').model self.decoder = Decoder(input_shape, latent_size, n_filters, n_extra_layers, name='decoder').model self.encoder_o = Encoder(input_shape, latent_size, n_filters, n_extra_layers, name='encoder_o').model def summary(self, **kwargs): print_model(self, print_fn=kwargs.get('print_fn') or print) super().summary(**kwargs) self.encoder_i.summary(**kwargs) self.decoder.summary(**kwargs) self.encoder_o.summary(**kwargs) def call(self, x, training=False): latent_i = self.encoder_i(x, training) fake = self.decoder(latent_i, training) latent_o = self.encoder_o(fake, training) return fake, latent_i, latent_o def test_step(self, data): # test_step(): https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/keras/engine/training.py#L1148-L1180 # evaluate(): https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/keras/engine/training.py#L1243-L1394 # fit(): https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/keras/engine/training.py#L824-L1146 x, y, _ = tf.keras.utils.unpack_x_y_sample_weight(data) # x.shape: (batchsize, width, height, depth) # y.shape: (batchsize, 1) on numpy array or (batchsize,) on tf.data.Dataset _, latent_i, latent_o = self(x, training=False) # letent_x.shape: (batchsize, 1, 1, latent_size) error = tf.keras.backend.mean(tf.keras.backend.square(latent_i - latent_o), axis=-1) # error.shape: (batchsize, 1, 1, 1) return { "losses": tf.reshape(error, (-1, 1)), "labels": tf.reshape(y, (-1, 1)) } def predict_step(self, data): # https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/keras/engine/training.py#L1396 x, _, _ = tf.keras.utils.unpack_x_y_sample_weight(data) # x.shape: (batchsize, width, height, depth) _, latent_i, latent_o = self(x, training=False) # letent_x.shape: (batchsize, 1, 1, latent_size) error = tf.keras.backend.mean(tf.keras.backend.square(latent_i - latent_o), axis=-1) # error.shape: (batchsize, 1, 1, 1) return tf.reshape(error, (-1, 1))
def build_train_graph(self, is_train=True): z_size = 856 with tf.name_scope('Encoder'): z_enc_out = Encoder(self.src_image, num_outputs=z_size) _, z_h, z_w, _ = z_enc_out.get_shape().as_list() print('encoder out', z_enc_out) # transform latent vector z_geo = tf.reshape(z_enc_out[:, :, :, :600], [self.batch_size, -1, 4]) z_app = z_enc_out[:, :, :, 600:] print('z geo', z_geo) print('z app', z_app) z_geo_tf = tf.matmul(z_geo, self.inv_RT) print('z geo tf', z_geo_tf) print('inv_RT', self.inv_RT) z_geo_tf = tf.reshape(z_geo_tf, [self.batch_size, 1,1, 600]) #TODO: solving z_h and z_w values z_tf = tf.concat([z_geo_tf, z_app], axis=3) print('z tf', z_tf) with tf.name_scope('Depth'): if self.data == 'car': depth_bias = 2 depth_scale = 1.0 self.depth_scale_vis = 125. / depth_scale self.depth_bias_vis = depth_bias - depth_scale depth_dec_out = Decoder(z_geo_tf, 1, variable_scope='Depth_Decoder') depth_pred = depth_scale * tf.nn.tanh(depth_dec_out) + depth_bias with tf.name_scope('Mask'): mask_dec_out = Decoder (z_geo_tf, 1, variable_scope='Mask_Decoder') mask_pred = tf.nn.sigmoid(mask_dec_out) print('mask pred', mask_pred) with tf.name_scope('Pixel'): pixel_dec_out = Decoder(z_tf, 3, variable_scope='Pixel_Decoder') pixel_pred = tf.nn.tanh(pixel_dec_out) print('pixel pred', pixel_pred) with tf.name_scope('prediction'): warped_pred = projective_inverse_warp(self.src_image, tf.squeeze(depth_pred), self.RT, self.intrinsic, ret_flows=False) print('warped pred', warped_pred) fake_tgt = tf.multiply(pixel_pred, mask_pred) + tf.multiply(warped_pred, 1-mask_pred) with tf.name_scope('loss'): self.eval_loss ={} depth_loss = tf.reduce_mean(tf.abs(self.tgt_image - warped_pred)) * self.loss_weight pixel_loss = tf.reduce_mean(tf.abs(self.tgt_image - pixel_pred)) * self.loss_weight mask_loss = tf.reduce_mean(tf.abs(self.tgt_image - fake_tgt)) * self.loss_weight self.total_loss = depth_loss + pixel_loss + mask_loss self.eval_loss['depth_loss'] = depth_loss self.eval_loss['pixel_loss'] = pixel_loss self.eval_loss['mask_loss'] = mask_loss self.eval_loss['total_loss'] = self.total_loss # Summaries tf.summary.image('src_image', self.deprocess_image(self.src_image)) tf.summary.image('tgt_image', self.deprocess_image(self.tgt_image)) tf.summary.image('fake_tgt_image', self.deprocess_image(fake_tgt)) tf.summary.image('pixel_pred_image', self.deprocess_image(pixel_pred)) tf.summary.image('warped_pred_image', warped_pred) tf.summary.scalar('total_loss', self.total_loss) # Define optimizers with tf.name_scope('train_optimizers'): self.optimizer = tf.train.AdamOptimizer(self.learning_rate, self.beta1) train_vars = [var for var in tf.trainable_variables()] grads_and_vars = self.optimizer.compute_gradients(self.total_loss, var_list=train_vars) self.train_op = self.optimizer.apply_gradients(grads_and_vars)
class CAE(tf.keras.Model): def __init__(self, input_shape, latent_size=100, n_filters=64, n_extra_layers=0, **kwargs): kwargs['name'] = type(self).__name__ super().__init__(**kwargs) # Use the DCGAN encoder/decoder models self.net_enc = Encoder(input_shape, latent_size, n_filters, n_extra_layers, name='encoder').model self.net_dec = Decoder(input_shape, latent_size, n_filters, n_extra_layers, name='decoder').model def summary(self, **kwargs): print_model(self) super().summary(**kwargs) self.net_enc.summary(**kwargs) self.net_dec.summary(**kwargs) def load_weights(self, path): if not (os.path.isfile(os.path.join(path, 'encoder.index')) and os.path.isfile(os.path.join(path, 'decoder.index'))): warning( 'No valid pre-trained network weights in: "{}"'.format(path)) return self.net_enc.load_weights(os.path.join(path, 'encoder')) self.net_dec.load_weights(os.path.join(path, 'decoder')) info('Loaded pre-trained network weights from: "{}"'.format(path)) def save_weights(self, path): self.net_enc.save_weights(os.path.join(path, 'encoder')) self.net_dec.save_weights(os.path.join(path, 'decoder')) info('Saved pre-trained network weights to: "{}"'.format(path)) def call(self, x, training=False): encoded_image = self.net_enc(x, training=training) decoded_image = self.net_dec(encoded_image, training=training) return decoded_image def train_step(self, data): x, _, sample_weight = tf.keras.utils.unpack_x_y_sample_weight(data) return super().train_step((x, x, sample_weight)) def test_step(self, data): x, y, _ = tf.keras.utils.unpack_x_y_sample_weight(data) x_pred = self(x, training=False) #loss = self.compiled_loss( # x, # decoded_image, # sample_weight=sample_weight, # regularization_losses=self.losses, # ) # we need a loss value per image which isn't provided by compiled_loss # assume we always have a shape of (batch_size, width, height, depth) losses = tf.keras.backend.mean(tf.keras.backend.square(x - x_pred), axis=[1, 2, 3]) return { "losses": tf.reshape(losses, (-1, 1)), "labels": tf.reshape(y, (-1, 1)) } def predict_step(self, data): x, _, _ = tf.keras.utils.unpack_x_y_sample_weight(data) x_pred = self(x, training=False) losses = tf.keras.backend.mean(tf.keras.backend.square(x - x_pred), axis=[1, 2, 3]) return tf.reshape(losses, (-1, 1))
def __init__(self, input_shape, latent_size=100, n_filters=64, n_extra_layers=0, intermediate_size=0, **kwargs): kwargs['name'] = type(self).__name__ super().__init__(**kwargs) # Use the DCGAN encoder/decoder models encoder = Encoder(input_shape, latent_size, n_filters, n_extra_layers, name='encoder').model decoder = Decoder(input_shape, latent_size, n_filters, n_extra_layers, name='decoder').model # build the encoder as simple sequential self.net_enc = tf.keras.Sequential( [ # drop the latent convolution layer (last layer) from the encoder *encoder.layers[:-1], # preprocess before variational sampling tf.keras.layers.Flatten(name='encoder_flatten') ], name='encoder') variational_input_size = (np.prod( self.net_enc.layers[-2].output_shape[1:]), ) decoder_input_size = self.net_enc.layers[-2].output_shape[1:] # add an optional fully connected intermediate layer if intermediate_size and intermediate_size > 0: variational_input_size = (intermediate_size, ) self.net_enc.add( tf.keras.layers.Dense(intermediate_size, activation='relu', name='encoder_intermediate')) # build the variational part with the functional api variational_input = tf.keras.Input(shape=variational_input_size, name='input_variational') z_mean = tf.keras.layers.Dense(latent_size, name='z_mean')(variational_input) z_log_var = tf.keras.layers.Dense(latent_size, name='z_log_var')(variational_input) # sample z from z_mean and z_log_var z = Sampling(name='sampling_z')([z_mean, z_log_var]) self.net_var = tf.keras.Model(variational_input, [z_mean, z_log_var, z], name='variational') # build the decoder as simple sequential self.net_dec = tf.keras.Sequential( [ # postprocess after variational sampling tf.keras.layers.Dense(np.prod(decoder_input_size), activation='relu', name='decoder_intermediate'), tf.keras.layers.Reshape(decoder_input_size, name='decoder_reshape'), # drop the latent convolution layer with normalization and activation # (first three layers) from the decoder *decoder.layers[3:] ], name='decoder') # Use input size as reconstruction loss weight self.loss_weight = tf.cast(tf.math.reduce_prod(input_shape), tf.float32)