Example #1
0
 def generator_setup(self):
     self.net_Ens = []
     self.net_Des = []
     self.optimizer_Ens = []
     self.optimizer_Des = []
     for _idxmc in range(0, self.opt.n_G):
         net_En = Encoder(self.opt.isize, self.opt.nz, self.opt.nc,
                          self.opt.ngf, self.opt.ngpu,
                          self.opt.extralayers).to('cuda')
         net_De = Decoder(self.opt.isize, self.opt.nz, self.opt.nc,
                          self.opt.ngf, self.opt.ngpu,
                          self.opt.extralayers).to('cuda')
         # TODO: initialized weight with prior N(0, 0.02) [From bayesian GAN]
         net_En.apply(weights_init)
         net_De.apply(weights_init)
         optimizer_En = torch.optim.Adam(net_En.parameters(),
                                         lr=self.opt.lr,
                                         betas=(self.opt.beta1, 0.999))
         optimizer_De = torch.optim.Adam(net_De.parameters(),
                                         lr=self.opt.lr,
                                         betas=(self.opt.beta1, 0.999))
         self.net_Ens.append(net_En)
         self.net_Des.append(net_De)
         self.optimizer_Ens.append(optimizer_En)
         self.optimizer_Des.append(optimizer_De)
Example #2
0
 def __init__(self, zdim, point_dim, use_deterministic_encoder=False):
     super(Rot_Encoder, self).__init__()
     self.use_deterministic_encoder = use_deterministic_encoder
     self.zdim = zdim
     self.point_dim = point_dim
     self.transform = STN3d()
     self.feature = Encoder(self.zdim, self.point_dim,
                            self.use_deterministic_encoder)
    def __init__(self,
                 input_shape,
                 latent_size=100,
                 n_filters=64,
                 n_extra_layers=0,
                 **kwargs):
        kwargs['name'] = type(self).__name__
        super().__init__(**kwargs)

        # Use the DCGAN encoder/decoder models
        self.net_enc = Encoder(input_shape,
                               latent_size,
                               n_filters,
                               n_extra_layers,
                               name='encoder').model
        self.net_dec = Decoder(input_shape,
                               latent_size,
                               n_filters,
                               n_extra_layers,
                               name='decoder').model
    def __init__(self,
                 input_shape,
                 latent_size=100,
                 n_filters=64,
                 n_extra_layers=0,
                 **kwargs):
        kwargs['name'] = type(self).__name__
        super().__init__(**kwargs)

        # Use the DCGAN encoder/decoder models
        encoder = Encoder(input_shape,
                          latent_size,
                          n_filters,
                          n_extra_layers,
                          name='encoder').model
        decoder = Decoder(input_shape,
                          latent_size,
                          n_filters,
                          n_extra_layers,
                          name='decoder').model

        # build the encoder as simple sequential
        self.net_enc = tf.keras.Sequential(
            [
                # drop the latent convolution layer (last layer) from the encoder
                *encoder.layers[:-1],
                # preprocess before variational sampling
                tf.keras.layers.Flatten(name='encoder_flatten'),
                tf.keras.layers.Dense(
                    latent_size, activation='relu', name='encoder_fc')
            ],
            name='encoder')
        decoder_input_size = self.net_enc.layers[-3].output_shape[1:]

        # build the decoder as simple sequential
        self.net_dec = tf.keras.Sequential(
            [
                # postprocess after variational sampling
                tf.keras.layers.Dense(np.prod(decoder_input_size),
                                      activation='relu',
                                      name='decoder_fc'),
                tf.keras.layers.Reshape(decoder_input_size,
                                        name='decoder_reshape'),
                # drop the latent convolution layer with normalization and activation
                # (first three layers) from the decoder
                *decoder.layers[3:]
            ],
            name='decoder')

        # Use input size as reconstruction loss weight
        self.loss_weight = tf.cast(tf.math.reduce_prod(input_shape),
                                   tf.float32)
Example #5
0
    def __init__(self, input_shape, latent_size=100, n_filters=64, n_extra_layers=0, **kwargs):
        """GANomaly Generator Model

        Args:
            input_shape (tuple): shape of one input datum (without batch size)
            latent_size (int, optional): Size of the decoder input or of the latent space. Defaults to 100.
            n_filters (int, optional): Filter count of the initial convolution layer. Defaults to 64.
            n_extra_layers (int, optional): Count of additional layers. Defaults to 0.
        """
        kwargs['name'] = type(self).__name__
        super().__init__(**kwargs)
        model = Encoder(input_shape, 1, n_filters, n_extra_layers).model
        layers = list(model.layers)

        self.features = tf.keras.Sequential(layers[:-1], name='features')
        self.classifier = tf.keras.Sequential(layers[-1], name='classifier')
        self.classifier.add(tf.keras.layers.Reshape((1,)))
        self.classifier.add(tf.keras.layers.Activation('sigmoid'))
Example #6
0
    def into_depth_and_rgb_block(self, raw_src_image, raw_src_depth, pose):
        b, h, w, _ = raw_src_image.get_shape().as_list()
        z_size = 856
        z_geo_size = 600

        with tf.name_scope('preprocessing'):
            src_image = self.image2tensor(raw_src_image)
            if len(raw_src_depth.get_shape()) != 4:
                src_depth = tf.expand_dims(raw_src_depth, axis=3)
            else:
                src_depth = raw_src_depth
            # self.manual_check = pose

        with tf.name_scope('concat_rgbd'):
            #conv_depth = conv2d(raw_src_depth, 32, is_train=True, k_h=3, k_w=3, s=1)
            #conv_rgb = conv2d(src_image, 32*3, is_train=True, k_h=3, k_w=3, s=1)
            input_rgbd = tf.concat([src_image, src_depth], axis=3)

        with tf.name_scope('Encoder'):
            z_enc_out = Encoder(input_rgbd,
                                num_outputs=z_size,
                                reuse_weights=tf.AUTO_REUSE)
            _, z_h, z_w, _ = z_enc_out.get_shape().as_list()
            # print('encoder out', z_enc_out)

            # transform latent vector
            z_geo = tf.reshape(z_enc_out[:, :, :, :z_geo_size], [b, -1, 4])
            z_app = z_enc_out[:, :, :, z_geo_size:]
            # print('z geo', z_geo)
            # print('z app', z_app)

            z_geo_tf = tf.matmul(z_geo, pose)
            # print('z geo tf', z_geo_tf)
            # print('inv_RT', inv_RT)

            z_geo_tf = tf.reshape(
                z_geo_tf, [b, 1, 1, 600])  # TODO: solving z_h and z_w values
            z_tf = tf.concat([z_geo_tf, z_app], axis=3)

        with tf.name_scope('Depth'):
            if self.data == 'car':
                depth_bias = 2
                depth_scale = 1.0
                # self.depth_scale_vis = 125. / depth_scale
                # self.depth_bias_vis = depth_bias - depth_scale

            depth_dec_out = Decoder(z_geo_tf,
                                    1,
                                    variable_scope='Depth_Decoder',
                                    reuse_weights=tf.AUTO_REUSE)
            depth_pred = depth_scale * tf.nn.tanh(depth_dec_out) + depth_bias

        with tf.name_scope('Pixel'):
            pixel_dec_out = Decoder(z_tf,
                                    3,
                                    variable_scope='Pixel_Decoder',
                                    reuse_weights=tf.AUTO_REUSE)
            pixel_pred = tf.nn.tanh(pixel_dec_out)
            # print('pixel pred', pixel_pred)

        # with tf.name_scope('prediction'):
        # warped_pred = projective_inverse_warp(src_image, tf.squeeze(depth_pred), RT, intrinsic, ret_flows=False)
        # print('warped pred', warped_pred)

        # tgt_img_tf = projective_inverse_warp(src_image, raw_tgt_depth, RT, intrinsic, ret_flows=False)

        # Collect output tensors
        pred = {}
        pred['out_depth'] = depth_pred
        pred['out_pixel'] = pixel_pred
        # pred['warped_image'] = warped_pred
        # pred['inverse_warping_image'] = tgt_img_tf
        # pred['tgt_image'] = fake_tgt

        return pred
Example #7
0
    def infer_tgt_views(self, raw_src_image, RT, intrinsic):
        b, h, w, _ = raw_src_image.get_shape().as_list()
        z_size = 856

        with tf.name_scope('preprocessing'):
            src_image = self.image2tensor(raw_src_image)
            self.manual_check = RT
            RT, inv_RT = self.reshape_posematrix(RT)

        with tf.name_scope('Encoder'):
            z_enc_out = Encoder(src_image, num_outputs=z_size)
            _, z_h, z_w, _ = z_enc_out.get_shape().as_list()
            # print('encoder out', z_enc_out)

            # transform latent vector
            z_geo = tf.reshape(z_enc_out[:, :, :, :600], [b, -1, 4])
            z_app = z_enc_out[:, :, :, 600:]
            # print('z geo', z_geo)
            # print('z app', z_app)

            z_geo_tf = tf.matmul(z_geo, inv_RT)
            # print('z geo tf', z_geo_tf)
            # print('inv_RT', inv_RT)

            z_geo_tf = tf.reshape(
                z_geo_tf, [b, 1, 1, 600])  # TODO: solving z_h and z_w values
            z_tf = tf.concat([z_geo_tf, z_app], axis=3)

        with tf.name_scope('Depth'):
            if self.data == 'car':
                depth_bias = 2
                depth_scale = 1.0
                # self.depth_scale_vis = 125. / depth_scale
                # self.depth_bias_vis = depth_bias - depth_scale

            depth_dec_out = Decoder(z_geo_tf,
                                    1,
                                    variable_scope='Depth_Decoder')
            depth_pred = depth_scale * tf.nn.tanh(depth_dec_out) + depth_bias

        with tf.name_scope('Mask'):
            mask_dec_out = Decoder(z_geo_tf, 1, variable_scope='Mask_Decoder')
            mask_pred = tf.nn.sigmoid(mask_dec_out)
            # print('mask pred', mask_pred)

        with tf.name_scope('Pixel'):
            pixel_dec_out = Decoder(z_tf, 3, variable_scope='Pixel_Decoder')
            pixel_pred = tf.nn.tanh(pixel_dec_out)
            # print('pixel pred', pixel_pred)

        with tf.name_scope('prediction'):
            warped_pred = projective_inverse_warp(src_image,
                                                  tf.squeeze(depth_pred),
                                                  RT,
                                                  intrinsic,
                                                  ret_flows=False)
            # print('warped pred', warped_pred)

            fake_tgt = tf.multiply(pixel_pred, mask_pred) + tf.multiply(
                warped_pred, 1 - mask_pred)

        # Collect output tensors
        pred = {}
        pred['out_depth'] = depth_pred
        pred['out_mask'] = mask_pred
        pred['out_pixel'] = pixel_pred
        pred['warped_image'] = warped_pred
        pred['tgt_image'] = fake_tgt

        return pred
Example #8
0
 def __init__(self, input_shape, latent_size=100, n_filters=64, n_extra_layers=0, **kwargs):
     kwargs['name'] = type(self).__name__
     super().__init__(**kwargs)
     self.encoder_i = Encoder(input_shape, latent_size, n_filters, n_extra_layers, name='encoder_i').model
     self.decoder = Decoder(input_shape, latent_size, n_filters, n_extra_layers, name='decoder').model
     self.encoder_o = Encoder(input_shape, latent_size, n_filters, n_extra_layers, name='encoder_o').model
Example #9
0
class Generator(tf.keras.Model):
    """GANomaly Generator Model

    Args:
        input_shape (tuple): shape of one input datum (without batch size)
        latent_size (int, optional): Size of the decoder input or of the latent space. Defaults to 100.
        n_filters (int, optional): Filter count of the initial convolution layer. Defaults to 64.
        n_extra_layers (int, optional): Count of additional layers. Defaults to 0.
    """
    def __init__(self, input_shape, latent_size=100, n_filters=64, n_extra_layers=0, **kwargs):
        kwargs['name'] = type(self).__name__
        super().__init__(**kwargs)
        self.encoder_i = Encoder(input_shape, latent_size, n_filters, n_extra_layers, name='encoder_i').model
        self.decoder = Decoder(input_shape, latent_size, n_filters, n_extra_layers, name='decoder').model
        self.encoder_o = Encoder(input_shape, latent_size, n_filters, n_extra_layers, name='encoder_o').model

    def summary(self, **kwargs):
        print_model(self, print_fn=kwargs.get('print_fn') or print)
        super().summary(**kwargs)
        self.encoder_i.summary(**kwargs)
        self.decoder.summary(**kwargs)
        self.encoder_o.summary(**kwargs)

    def call(self, x, training=False):
        latent_i = self.encoder_i(x, training)
        fake = self.decoder(latent_i, training)
        latent_o = self.encoder_o(fake, training)
        return fake, latent_i, latent_o

    def test_step(self, data):
        # test_step():  https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/keras/engine/training.py#L1148-L1180
        # evaluate():   https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/keras/engine/training.py#L1243-L1394
        # fit():        https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/keras/engine/training.py#L824-L1146

        x, y, _ = tf.keras.utils.unpack_x_y_sample_weight(data)
        # x.shape: (batchsize, width, height, depth)
        # y.shape: (batchsize, 1) on numpy array or (batchsize,) on tf.data.Dataset

        _, latent_i, latent_o = self(x, training=False)
        # letent_x.shape: (batchsize, 1, 1, latent_size)

        error = tf.keras.backend.mean(tf.keras.backend.square(latent_i - latent_o), axis=-1)
        # error.shape: (batchsize, 1, 1, 1)

        return {
            "losses": tf.reshape(error, (-1, 1)),
            "labels": tf.reshape(y, (-1, 1))
            }

    def predict_step(self, data):
        # https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/keras/engine/training.py#L1396

        x, _, _ = tf.keras.utils.unpack_x_y_sample_weight(data)
        # x.shape: (batchsize, width, height, depth)

        _, latent_i, latent_o = self(x, training=False)
        # letent_x.shape: (batchsize, 1, 1, latent_size)

        error = tf.keras.backend.mean(tf.keras.backend.square(latent_i - latent_o), axis=-1)
        # error.shape: (batchsize, 1, 1, 1)

        return tf.reshape(error, (-1, 1))
Example #10
0
    def build_train_graph(self, is_train=True):
        z_size = 856

        with tf.name_scope('Encoder'):
            z_enc_out = Encoder(self.src_image, num_outputs=z_size)
            _, z_h, z_w, _ = z_enc_out.get_shape().as_list()
            print('encoder out', z_enc_out)


            # transform latent vector
            z_geo = tf.reshape(z_enc_out[:, :, :, :600], [self.batch_size, -1, 4])
            z_app = z_enc_out[:, :, :, 600:]
            print('z geo', z_geo)
            print('z app', z_app)

            z_geo_tf = tf.matmul(z_geo, self.inv_RT)
            print('z geo tf', z_geo_tf)
            print('inv_RT', self.inv_RT)

            z_geo_tf = tf.reshape(z_geo_tf, [self.batch_size, 1,1, 600]) #TODO: solving z_h and z_w values
            z_tf = tf.concat([z_geo_tf, z_app], axis=3)
            print('z tf', z_tf)

        with tf.name_scope('Depth'):
            if self.data == 'car':
                depth_bias = 2
                depth_scale = 1.0
                self.depth_scale_vis = 125. / depth_scale
                self.depth_bias_vis = depth_bias - depth_scale

            depth_dec_out = Decoder(z_geo_tf, 1, variable_scope='Depth_Decoder')
            depth_pred = depth_scale * tf.nn.tanh(depth_dec_out) + depth_bias


        with tf.name_scope('Mask'):
            mask_dec_out = Decoder (z_geo_tf, 1,  variable_scope='Mask_Decoder')
            mask_pred = tf.nn.sigmoid(mask_dec_out)
            print('mask pred', mask_pred)

        with tf.name_scope('Pixel'):
            pixel_dec_out = Decoder(z_tf, 3, variable_scope='Pixel_Decoder')
            pixel_pred = tf.nn.tanh(pixel_dec_out)
            print('pixel pred', pixel_pred)

        with tf.name_scope('prediction'):
            warped_pred = projective_inverse_warp(self.src_image, tf.squeeze(depth_pred), self.RT, self.intrinsic, ret_flows=False)
            print('warped pred', warped_pred)

            fake_tgt = tf.multiply(pixel_pred, mask_pred) + tf.multiply(warped_pred, 1-mask_pred)

        with tf.name_scope('loss'):
            self.eval_loss ={}

            depth_loss = tf.reduce_mean(tf.abs(self.tgt_image - warped_pred)) * self.loss_weight
            pixel_loss = tf.reduce_mean(tf.abs(self.tgt_image - pixel_pred)) * self.loss_weight
            mask_loss = tf.reduce_mean(tf.abs(self.tgt_image - fake_tgt)) * self.loss_weight

            self.total_loss = depth_loss + pixel_loss + mask_loss

            self.eval_loss['depth_loss'] = depth_loss
            self.eval_loss['pixel_loss'] = pixel_loss
            self.eval_loss['mask_loss'] = mask_loss
            self.eval_loss['total_loss'] = self.total_loss

        # Summaries
        tf.summary.image('src_image', self.deprocess_image(self.src_image))
        tf.summary.image('tgt_image', self.deprocess_image(self.tgt_image))

        tf.summary.image('fake_tgt_image', self.deprocess_image(fake_tgt))
        tf.summary.image('pixel_pred_image', self.deprocess_image(pixel_pred))
        tf.summary.image('warped_pred_image', warped_pred)
        tf.summary.scalar('total_loss', self.total_loss)


        # Define optimizers
        with tf.name_scope('train_optimizers'):
            self.optimizer = tf.train.AdamOptimizer(self.learning_rate, self.beta1)
            train_vars = [var for var in tf.trainable_variables()]
            grads_and_vars = self.optimizer.compute_gradients(self.total_loss, var_list=train_vars)
            self.train_op = self.optimizer.apply_gradients(grads_and_vars)
class CAE(tf.keras.Model):
    def __init__(self,
                 input_shape,
                 latent_size=100,
                 n_filters=64,
                 n_extra_layers=0,
                 **kwargs):
        kwargs['name'] = type(self).__name__
        super().__init__(**kwargs)

        # Use the DCGAN encoder/decoder models
        self.net_enc = Encoder(input_shape,
                               latent_size,
                               n_filters,
                               n_extra_layers,
                               name='encoder').model
        self.net_dec = Decoder(input_shape,
                               latent_size,
                               n_filters,
                               n_extra_layers,
                               name='decoder').model

    def summary(self, **kwargs):
        print_model(self)
        super().summary(**kwargs)
        self.net_enc.summary(**kwargs)
        self.net_dec.summary(**kwargs)

    def load_weights(self, path):
        if not (os.path.isfile(os.path.join(path, 'encoder.index'))
                and os.path.isfile(os.path.join(path, 'decoder.index'))):
            warning(
                'No valid pre-trained network weights in: "{}"'.format(path))
            return
        self.net_enc.load_weights(os.path.join(path, 'encoder'))
        self.net_dec.load_weights(os.path.join(path, 'decoder'))
        info('Loaded pre-trained network weights from: "{}"'.format(path))

    def save_weights(self, path):
        self.net_enc.save_weights(os.path.join(path, 'encoder'))
        self.net_dec.save_weights(os.path.join(path, 'decoder'))
        info('Saved pre-trained network weights to: "{}"'.format(path))

    def call(self, x, training=False):
        encoded_image = self.net_enc(x, training=training)
        decoded_image = self.net_dec(encoded_image, training=training)
        return decoded_image

    def train_step(self, data):
        x, _, sample_weight = tf.keras.utils.unpack_x_y_sample_weight(data)
        return super().train_step((x, x, sample_weight))

    def test_step(self, data):
        x, y, _ = tf.keras.utils.unpack_x_y_sample_weight(data)

        x_pred = self(x, training=False)

        #loss = self.compiled_loss(
        #        x,
        #        decoded_image,
        #        sample_weight=sample_weight,
        #        regularization_losses=self.losses,
        #    )

        # we need a loss value per image which isn't provided by compiled_loss
        # assume we always have a shape of (batch_size, width, height, depth)
        losses = tf.keras.backend.mean(tf.keras.backend.square(x - x_pred),
                                       axis=[1, 2, 3])

        return {
            "losses": tf.reshape(losses, (-1, 1)),
            "labels": tf.reshape(y, (-1, 1))
        }

    def predict_step(self, data):
        x, _, _ = tf.keras.utils.unpack_x_y_sample_weight(data)

        x_pred = self(x, training=False)

        losses = tf.keras.backend.mean(tf.keras.backend.square(x - x_pred),
                                       axis=[1, 2, 3])

        return tf.reshape(losses, (-1, 1))
    def __init__(self,
                 input_shape,
                 latent_size=100,
                 n_filters=64,
                 n_extra_layers=0,
                 intermediate_size=0,
                 **kwargs):
        kwargs['name'] = type(self).__name__
        super().__init__(**kwargs)

        # Use the DCGAN encoder/decoder models
        encoder = Encoder(input_shape,
                          latent_size,
                          n_filters,
                          n_extra_layers,
                          name='encoder').model
        decoder = Decoder(input_shape,
                          latent_size,
                          n_filters,
                          n_extra_layers,
                          name='decoder').model

        # build the encoder as simple sequential
        self.net_enc = tf.keras.Sequential(
            [
                # drop the latent convolution layer (last layer) from the encoder
                *encoder.layers[:-1],
                # preprocess before variational sampling
                tf.keras.layers.Flatten(name='encoder_flatten')
            ],
            name='encoder')
        variational_input_size = (np.prod(
            self.net_enc.layers[-2].output_shape[1:]), )
        decoder_input_size = self.net_enc.layers[-2].output_shape[1:]
        # add an optional fully connected intermediate layer
        if intermediate_size and intermediate_size > 0:
            variational_input_size = (intermediate_size, )
            self.net_enc.add(
                tf.keras.layers.Dense(intermediate_size,
                                      activation='relu',
                                      name='encoder_intermediate'))

        # build the variational part with the functional api
        variational_input = tf.keras.Input(shape=variational_input_size,
                                           name='input_variational')
        z_mean = tf.keras.layers.Dense(latent_size,
                                       name='z_mean')(variational_input)
        z_log_var = tf.keras.layers.Dense(latent_size,
                                          name='z_log_var')(variational_input)
        # sample z from z_mean and z_log_var
        z = Sampling(name='sampling_z')([z_mean, z_log_var])
        self.net_var = tf.keras.Model(variational_input,
                                      [z_mean, z_log_var, z],
                                      name='variational')

        # build the decoder as simple sequential
        self.net_dec = tf.keras.Sequential(
            [
                # postprocess after variational sampling
                tf.keras.layers.Dense(np.prod(decoder_input_size),
                                      activation='relu',
                                      name='decoder_intermediate'),
                tf.keras.layers.Reshape(decoder_input_size,
                                        name='decoder_reshape'),
                # drop the latent convolution layer with normalization and activation
                # (first three layers) from the decoder
                *decoder.layers[3:]
            ],
            name='decoder')

        # Use input size as reconstruction loss weight
        self.loss_weight = tf.cast(tf.math.reduce_prod(input_shape),
                                   tf.float32)