def _build(self, x):
     h = snt.Conv2D(
         output_channels=self._num_hiddens,
         kernel_shape=(3, 3),
         stride=(1, 1),
         name="dec_1")(x)
     
     h = residual_stack(
         h,
         self._num_hiddens,
         self._num_residual_layers,
         self._num_residual_hiddens)
                
     h = snt.Conv2DTranspose(
         output_channels=int(self._num_hiddens / 2),
         output_shape=None,
         kernel_shape=(4, 4),
         stride=(2, 2),
         name="dec_2")(h)
     h = tf.nn.relu(h)
     
     x_recon = snt.Conv2DTranspose(
               output_channels=1,
               output_shape=None,
               kernel_shape=(4, 4),
               stride=(2, 2),
               name="dec_3")(h)
     
     return x_recon
Example #2
0
    def __init__(self, n_latent=4, kernel_size=4, name=None):
        super(VariationalAutoEncoder, self).__init__(name=name)

        self.n_latent = n_latent
        self.encoder = snt.Sequential([
            snt.Conv2D(4, kernel_size, stride=4, padding='SAME'),
            tf.nn.relu,  # [b, 250, 250, 4]
            snt.Conv2D(16, kernel_size, stride=4, padding='SAME'),
            tf.nn.relu,  # [b, 63, 63, 16]
            snt.Conv2D(32, kernel_size, stride=4, padding='SAME'),
            tf.nn.relu,  # [b, 16, 16, 32]
            snt.Conv2D(64, kernel_size, stride=2, padding='SAME'),
            tf.nn.relu,  # [b, 8, 8, 64]
            snt.Flatten()
        ])

        self.mn = snt.nets.MLP([n_latent], activation=tf.nn.relu)
        self.std = snt.nets.MLP([n_latent], activation=tf.nn.relu)

        self.decoder = snt.Sequential([
            snt.nets.MLP([8 * 8 * 64], activation=tf.nn.leaky_relu),
            snt.Reshape([8, 8, 64]),
            snt.Conv2DTranspose(64, kernel_size, stride=2, padding='SAME'),
            tf.nn.relu,  # [b, 16, 16, 64]
            snt.Conv2DTranspose(32, kernel_size, stride=4, padding='SAME'),
            tf.nn.relu,  # [b, 64, 64, 32]
            snt.Conv2DTranspose(16, kernel_size, stride=4, padding='SAME'),
            tf.nn.relu,  # [b, 256, 256, 16]
            snt.Conv2DTranspose(4, kernel_size, stride=4, padding='SAME'),
            tf.nn.relu,  # [b, 1024, 1024, 4]
            snt.Conv2D(1, kernel_size, padding='SAME')
        ])  # [b, 1024, 1024, 1]
Example #3
0
 def decode(self, in_tensor):
     in_tensor = snt.Conv2DTranspose(output_channels=64,
                                     kernel_shape=3,
                                     stride=[1, 2],
                                     name='decode_conv',
                                     padding='SAME')(in_tensor)
     in_tensor = tf.nn.relu(in_tensor)
     in_tensor = snt.Conv2DTranspose(output_channels=64,
                                     kernel_shape=3,
                                     stride=[1, 2],
                                     name='decode_conv',
                                     padding='SAME')(in_tensor)
     in_tensor = tf.nn.relu(in_tensor)
     in_tensor = snt.Conv2DTranspose(output_channels=32,
                                     kernel_shape=3,
                                     stride=[1, 2],
                                     name='decode_conv',
                                     padding='SAME')(in_tensor)
     in_tensor = tf.nn.relu(in_tensor)
     in_tensor = snt.Conv2DTranspose(output_channels=32,
                                     kernel_shape=3,
                                     stride=[1, 2],
                                     name='decode_conv',
                                     padding='SAME')(in_tensor)
     in_tensor = tf.nn.relu(in_tensor)
     in_tensor = snt.Conv2DTranspose(output_channels=self._target_num_way,
                                     kernel_shape=3,
                                     stride=[1, 2],
                                     name='decode_conv',
                                     padding='SAME')(in_tensor)
     in_tensor = snt.Conv2D(output_channels=1,
                            kernel_shape=1,
                            name='decode_conv',
                            padding='SAME')(in_tensor)
     return in_tensor
    def _build(self, x, x_sigma, x_psf):
        h = snt.Conv2D(output_channels=self._num_hiddens,
                       kernel_shape=(3, 3),
                       stride=(1, 1),
                       name="dec_1")(x)

        h = residual_stack(h, self._num_hiddens, self._num_residual_layers,
                           self._num_residual_hiddens)

        h = snt.Conv2DTranspose(output_channels=int(self._num_hiddens / 2),
                                output_shape=None,
                                kernel_shape=(4, 4),
                                stride=(2, 2),
                                name="dec_2")(h)
        h = tf.nn.relu(h)
        """ x_recon_de: reconstructed images without noise and PSF
            x_recon: output to calculate the reconstructed loss """
        x_recon_de = snt.Conv2DTranspose(output_channels=1,
                                         output_shape=None,
                                         kernel_shape=(4, 4),
                                         stride=(2, 2),
                                         name="dec_3")(h)
        # add a PSF convolution layer and noise layer
        x_recon_de = psf_layer(x_recon_de, x_psf)
        x_recon = noise_layer(x_recon_de, x_sigma)

        return x_recon
Example #5
0
    def __init__(self, name='MNIST_Generator', regularization=1.e-4):
        super(MNISTGenerator, self).__init__(name=name)

        reg = {
            'w': l2_regularizer(scale=regularization),
            'b': l2_regularizer(scale=regularization)
        }

        with self._enter_variable_scope():
            self.linear = snt.Linear(name='linear',
                                     output_size=3136,
                                     regularizers=reg)
            self.bn1 = snt.BatchNorm(name='batch_norm_1')
            self.reshape = snt.BatchReshape(name='reshape', shape=[7, 7, 64])
            self.deconv1 = snt.Conv2DTranspose(name='tr-conv2d_1',
                                               output_channels=64,
                                               kernel_shape=5,
                                               stride=2,
                                               regularizers=reg)
            self.bn2 = snt.BatchNorm(name='batch_norm_2')
            self.deconv2 = snt.Conv2DTranspose(name='tr-conv2d_2',
                                               output_channels=32,
                                               kernel_shape=5,
                                               stride=1,
                                               regularizers=reg)
            self.bn3 = snt.BatchNorm(name='batch_norm_3')
            self.deconv3 = snt.Conv2DTranspose(name='tr-conv2d_3',
                                               output_channels=3,
                                               kernel_shape=5,
                                               stride=2,
                                               regularizers=reg)
Example #6
0
    def _build(self, x):
        h = snt.Conv2D(output_channels=self._num_hiddens,
                       kernel_shape=(3, 3),
                       stride=(1, 1),
                       name="dec_1")(x)

        h = self._dropout(h, training=self._is_training)
        h = tf.layers.batch_normalization(
            h,
            training=self._is_training,
            momentum=self._bn_momentum,
            renorm=self._bn_renormalization,
            renorm_momentum=self._bn_momentum,
            renorm_clipping=self._renorm_clipping,
            name="batch_norm_1")

        h = residual_stack(h,
                           self._num_hiddens,
                           self._num_residual_layers,
                           self._num_residual_hiddens,
                           activation=self._activation,
                           training=self._is_training,
                           prob_drop=self._prob_drop,
                           momentum=self._bn_momentum,
                           renorm=self._bn_renormalization,
                           renorm_momentum=self._bn_momentum,
                           renorm_clipping=self._renorm_clipping)

        h = snt.Conv2DTranspose(output_channels=int(self._num_hiddens / 2),
                                output_shape=None,
                                kernel_shape=(4, 4),
                                stride=(2, 2),
                                name="dec_2")(h)

        h = self._dropout(h, training=self._is_training)
        h = tf.layers.batch_normalization(
            h,
            training=self._is_training,
            momentum=self._bn_momentum,
            renorm=self._bn_renormalization,
            renorm_momentum=self._bn_momentum,
            renorm_clipping=self._renorm_clipping,
            name="batch_norm_2")

        h = self._activation(h)

        x_recon = snt.Conv2DTranspose(output_channels=3,
                                      output_shape=None,
                                      kernel_shape=(4, 4),
                                      stride=(2, 2),
                                      name="dec_3")(h)

        return x_recon
Example #7
0
 def _build(self, x):
     for i, l in enumerate(self.padding_linear_layers):
         x = snt.Linear(l)(x)
     for i, l in enumerate(self.layers):
         if i == 0:
             h = snt.Linear(l[1] * l[2] * l[0])(x)
             h = tf.reshape(h, [-1, l[1], l[2], l[0]])
         elif i == len(self.layers) - 1:
             h = snt.Conv2DTranspose(l[0], None, l[1], l[2])(h)
         else:
             h = tf.nn.relu(snt.Conv2DTranspose(l[0], None, l[1], l[2])(h))
     logits = h
     return logits
  def _build_layer(self, build_recursive_skips, inputs, output_channels, layer_kwargs, is_training, decoder_skips,
                   skip_connection_filters=None, is_final=False, use_batchnorm=True):
    
    if build_recursive_skips:
      res_mask = self._build_residual_mask(inputs, stride=layer_kwargs.stride)
    else:
      res_mask = None
      
    outputs = snt.Conv2DTranspose(
      output_channels=output_channels,
      kernel_shape=4,
      data_format=self._data_format,
      **layer_kwargs)(inputs)
  
    if use_batchnorm:
      outputs = self.norm_fn(outputs)
  
    if not is_final:
      outputs = self._activation_fn(outputs)
      if skip_connection_filters is not None:
        outputs = self._connect_skips(
          outputs,
          skip_connection_filters,
          res_mask)

      decoder_skips.append(outputs)
    
      if self._dropout_rate > 0.0:
        outputs = tf.layers.dropout(
          outputs, rate=self._dropout_rate, training=is_training)
        
    return res_mask, outputs
    def __init__(self, kernel_size=4, name=None):
        super(AutoEncoder, self).__init__(name=name)
        self.encoder = snt.Sequential([
            snt.Conv2D(4, kernel_size, stride=4, padding='SAME'), tf.nn.relu,
            snt.Conv2D(16, kernel_size, stride=4, padding='SAME'), tf.nn.relu,
            snt.Conv2D(64, kernel_size, stride=4, padding='SAME'), tf.nn.relu
        ])

        self.decoder = snt.Sequential([
            snt.Conv2DTranspose(64, kernel_size, stride=4,
                                padding='SAME'), tf.nn.relu,
            snt.Conv2DTranspose(16, kernel_size, stride=4,
                                padding='SAME'), tf.nn.relu,
            snt.Conv2DTranspose(4, kernel_size, stride=4, padding='SAME'),
            tf.nn.relu,
            snt.Conv2D(1, kernel_size, padding='SAME')
        ])
Example #10
0
    def __init__(self, name='Generator', latent_size=50, image_size=64, ngf=64, regularization=1.e-4):
        super(Generator, self).__init__(name=name)

        reg = {'w': l2_regularizer(scale=regularization)}

        self.conv_trs = []
        self.batch_norms = []
        self.latent_size = latent_size

        cngf, tisize = ngf // 2, 4
        while tisize != image_size:
            cngf = cngf * 2
            tisize = tisize * 2

        with self._enter_variable_scope():
            self.reshape = snt.BatchReshape(name='batch_reshape', shape=[1, 1, latent_size])
            self.conv_trs.append(snt.Conv2DTranspose(name='tr-conv2d_1',
                                                     output_channels=cngf,
                                                     kernel_shape=4,
                                                     stride=1,
                                                     padding='VALID',
                                                     regularizers=reg,
                                                     use_bias=False))
            self.batch_norms.append(snt.BatchNorm(name='batch_norm_1'))
            csize, cndf = 4, cngf
            n_layer = 2
            while csize < image_size // 2:
                self.conv_trs.append(snt.Conv2DTranspose(name='tr-conv2d_{}'.format(n_layer),
                                                         output_channels=cndf // 2,
                                                         kernel_shape=4,
                                                         stride=2,
                                                         padding='SAME',
                                                         regularizers=reg,
                                                         use_bias=False))
                self.batch_norms.append(snt.BatchNorm(name='batch_norm_{}'.format(n_layer)))
                n_layer += 1
                cndf = cndf // 2
                csize = csize * 2

            self.conv_trs.append(snt.Conv2DTranspose(name='tr-conv2d_{}'.format(n_layer),
                                                     output_channels=3,
                                                     kernel_shape=4,
                                                     stride=2,
                                                     padding='SAME',
                                                     regularizers=reg,
                                                     use_bias=False))
Example #11
0
def _build_conv_t_layer(conv_spec, data_format):
    return snt.Conv2DTranspose(output_channels=conv_spec.output_channels,
                               kernel_shape=conv_spec.kernel_shape,
                               stride=conv_spec.stride,
                               padding=snt.SAME,
                               use_bias=True,
                               data_format=data_format,
                               initializers=_DEFAULT_CONV_INITIALIZERS,
                               regularizers=_DEFAULT_CONV_REGULARIZERS)
Example #12
0
    def __init__(self, kernel_size=4, name=None):
        super(AutoEncoder, self).__init__(name=name)
        self.encoder = snt.Sequential([snt.Conv2D(4, kernel_size, stride=2, padding='SAME'), tf.nn.leaky_relu,    # [4, 128, 128]
                                       snt.Conv2D(8, kernel_size, stride=2, padding='SAME'), tf.nn.leaky_relu,    # [8, 64, 64]
                                       snt.Conv2D(16, kernel_size, stride=2, padding='SAME'), tf.nn.leaky_relu,    # [16, 32, 32]
                                       snt.Conv2D(32, kernel_size, stride=2, padding='SAME'), tf.nn.leaky_relu])    # [32, 16, 16]
                                       # snt.Conv2D(32, kernel_size, stride=2, padding='SAME'), tf.nn.relu,
                                       # snt.Conv2D(64, kernel_size, stride=2, padding='SAME'), tf.nn.relu])

        # self.decoder = snt.Sequential([snt.Conv2DTranspose(64, kernel_size, stride=2, padding='SAME'), tf.nn.relu,
        #                                snt.Conv2DTranspose(32, kernel_size, stride=2, padding='SAME'), tf.nn.relu,
        #                                snt.Conv2DTranspose(16, kernel_size, stride=2, padding='SAME'), tf.nn.relu,

        self.decoder = snt.Sequential([snt.Conv2DTranspose(32, kernel_size, stride=2, padding='SAME'), tf.nn.leaky_relu, # [32, 16, 16]
                                       snt.Conv2DTranspose(16, kernel_size, stride=2, padding='SAME'), tf.nn.leaky_relu, # [16, 32, 32]
                                       snt.Conv2DTranspose(8, kernel_size, stride=2, padding='SAME'), tf.nn.leaky_relu, # [8, 64, 64]
                                       snt.Conv2DTranspose(4, kernel_size, stride=2, padding='SAME'), tf.nn.leaky_relu, # [4, 128, 128]
                                       snt.Conv2D(1, kernel_size, padding='SAME')])    # [1, 256, 256]
Example #13
0
 def __init__(self,
              out_channel,
              dec_channel,
              num_res_blocks,
              residual_hiddens,
              the_stride,
              name='decoder'):
     super(Decoder, self).__init__()
     self.model = snt.Sequential([
         snt.Conv2D(output_channels=dec_channel,
                    kernel_shape=(3, 3),
                    stride=(1, 1),
                    name="dec_0"),
         tf.nn.relu,
     ])
     for _ in range(num_res_blocks):
         self.model = snt.Sequential(
             [self.model,
              residual_block(dec_channel, residual_hiddens)])
     if the_stride == 4:
         self.model = snt.Sequential([
             self.model,
             snt.Conv2DTranspose(output_channels=dec_channel // 2,
                                 output_shape=None,
                                 kernel_shape=(4, 4),
                                 stride=(2, 2),
                                 name="dec_1"),
             tf.nn.relu,
             snt.Conv2DTranspose(output_channels=out_channel,
                                 output_shape=None,
                                 kernel_shape=(4, 4),
                                 stride=(2, 2),
                                 name="dec_2"),
         ])
     elif the_stride == 2:
         self.model = snt.Sequential([
             self.model,
             snt.Conv2DTranspose(output_channels=out_channel,
                                 output_shape=None,
                                 kernel_shape=(4, 4),
                                 stride=(2, 2),
                                 name="dec_1"),
         ])
Example #14
0
    def __init__(self,
                 dim,
                 factor=2,
                 filter_size=5,
                 num_filters=16,
                 act='relu',
                 name='upsampler'):
        super(Upsampler, self).__init__(name=name)
        self._act = Activation(act=act, verbose=True)

        with self._enter_variable_scope():
            self._conv = snt.Conv2DTranspose(num_filters,
                                             [e * factor for e in dim[1:-1]],
                                             filter_size,
                                             stride=2,
                                             use_bias=False)

            dim2 = [dim[0], dim[1] * factor, dim[2] * factor, num_filters]
            self._conv2 = snt.Conv2DTranspose(num_filters,
                                              [e * factor for e in dim2[1:-1]],
                                              filter_size,
                                              stride=2,
                                              use_bias=False)

            dim3 = [
                dim[0], dim[1] * factor * factor, dim[2] * factor * factor,
                num_filters
            ]
            self._conv3 = snt.Conv2DTranspose(num_filters,
                                              [e * factor for e in dim3[1:-1]],
                                              filter_size,
                                              stride=2,
                                              use_bias=False)

            self._seq = snt.Sequential([
                self._conv,
                self._act,
                self._conv2,
                self._act,
                self._conv3,
                self._act,
            ])
Example #15
0
 def __init__(
     self,
     in_channel=3,
     main_channel=128,
     num_res_blocks=2,
     residual_hiddens=32,
     embed_dim=64,
     n_embed=512,
     decay=0.99,
     commitment_cost=0.25,
 ):
     super(VQModel, self).__init__()
     self.enc_b = Encoder(main_channel,
                          num_res_blocks,
                          residual_hiddens,
                          the_stride=4)
     self.enc_t = Encoder(main_channel,
                          num_res_blocks,
                          residual_hiddens,
                          the_stride=2)
     self.quantize_conv_t = snt.Conv2D(output_channels=embed_dim,
                                       kernel_shape=(1, 1),
                                       stride=(1, 1),
                                       name="enc_1")
     self.vq_t = snt.nets.VectorQuantizerEMA(
         embedding_dim=embed_dim,
         num_embeddings=n_embed,
         commitment_cost=commitment_cost,
         decay=decay)
     self.dec_t = Decoder(embed_dim,
                          main_channel,
                          num_res_blocks,
                          residual_hiddens,
                          the_stride=2)
     self.quantize_conv_b = snt.Conv2D(output_channels=embed_dim,
                                       kernel_shape=(1, 1),
                                       stride=(1, 1),
                                       name="enc_1")
     self.vq_b = snt.nets.VectorQuantizerEMA(
         embedding_dim=embed_dim,
         num_embeddings=n_embed,
         commitment_cost=commitment_cost,
         decay=decay)
     self.upsample_t = snt.Conv2DTranspose(output_channels=embed_dim,
                                           output_shape=None,
                                           kernel_shape=(4, 4),
                                           stride=(2, 2),
                                           name="up_1")
     self.dec = Decoder(in_channel,
                        main_channel,
                        num_res_blocks,
                        residual_hiddens,
                        the_stride=4)
Example #16
0
 def func(name, data_format, custom_getter=None):
   conv = snt.Conv2DTranspose(
       name=name,
       output_channels=self.OUT_CHANNELS,
       kernel_shape=self.KERNEL_SHAPE,
       output_shape=(self.INPUT_SHAPE.input_height,
                     self.INPUT_SHAPE.input_width),
       use_bias=use_bias,
       initializers=create_initializers(use_bias),
       data_format=data_format,
       custom_getter=custom_getter)
   if data_format == "NHWC":
     batch_norm = snt.BatchNorm(scale=True, update_ops_collection=None)
   else:  # data_format == "NCHW"
     batch_norm = snt.BatchNorm(scale=True, update_ops_collection=None,
                                fused=True, axis=(0, 2, 3))
   return snt.Sequential([conv,
                          functools.partial(batch_norm, is_training=True)])
  def _build_residual_mask(self, mask_inputs, stride=2):
    """Builds a spatial output mask using the input to the last layer."""
    output_mask_logits = snt.Conv2DTranspose(
      name="output_mask_logits",
      output_channels=1,
      kernel_shape=4,
      stride=stride,
      padding=snt.SAME,
      use_bias=True,
      data_format=self._data_format,
      initializers=_MASK_INITIALIZERS,
      regularizers=self._regularizers)(mask_inputs)

    output_mask = tf.sigmoid(
        output_mask_logits,
        name="output_mask")

    return output_mask
Example #18
0
    def _build(self, x):
        h = snt.Conv2D(output_channels=self._num_hiddens,
                       kernel_shape=(3, 3),
                       stride=(1, 1),
                       initializers=self._initializers,
                       data_format=self._data_format,
                       name="pre_stack")(x)

        h = ResidualStack(  # pylint: disable=not-callable
            self._num_hiddens,
            self._num_residual_layers,
            self._num_residual_hiddens,
            initializers=self._initializers,
            data_format=self._data_format,
            name="residual_stack")(h)

        for i in range(self._num_steps):
            # Does reverse striding -- puts stride-2s after stride-1s.
            stride = (2 if
                      (self._num_steps - 1 - i) < self._num_steps_h else 1,
                      2 if
                      (self._num_steps - 1 - i) < self._num_steps_w else 1)
            h = snt.Conv2DTranspose(output_channels=self._num_hiddens,
                                    output_shape=None,
                                    kernel_shape=(4, 4),
                                    stride=stride,
                                    initializers=self._initializers,
                                    data_format=self._data_format,
                                    name="strided_transpose_{}".format(i))(h)
            h = tf.nn.relu(h)

        x_recon = snt.Conv2D(output_channels=self._num_output_channels,
                             kernel_shape=(3, 3),
                             stride=(1, 1),
                             initializers=self._initializers,
                             data_format=self._data_format,
                             name="final")(h)

        return x_recon
Example #19
0
    def __init__(self,
                 embedding_dim=64,
                 num_embeddings=64,
                 kernel_size=4,
                 name=None):
        super(VectorQuantizerVariationalAutoEncoder, self).__init__(name=name)
        # self.residual_enc = ResidualStack(num_hiddens=64, num_residual_layers=2, num_residual_hiddens=32)
        # self.residual_dec = ResidualStack(num_hiddens=64, num_residual_layers=2, num_residual_hiddens=32)
        self.residual_enc = ResidualStack(num_hiddens=32,
                                          num_residual_layers=2,
                                          num_residual_hiddens=32)
        self.residual_dec = ResidualStack(num_hiddens=32,
                                          num_residual_layers=2,
                                          num_residual_hiddens=32)

        self.embedding_dim = embedding_dim
        self.num_embeddings = num_embeddings

        self.encoder = snt.Sequential([
            snt.Conv2D(4, kernel_size, stride=2, padding='SAME', name='conv4'),
            tf.nn.leaky_relu,  # [b, 128, 128, 4]
            snt.Conv2D(8, kernel_size, stride=2, padding='SAME', name='conv8'),
            tf.nn.leaky_relu,  # [b, 64, 64, 8]
            snt.Conv2D(16,
                       kernel_size,
                       stride=2,
                       padding='SAME',
                       name='conv16'),
            tf.nn.leaky_relu,  # [b, 32, 32, 16]
            snt.Conv2D(32,
                       kernel_size,
                       stride=2,
                       padding='SAME',
                       name='conv32'),
            tf.nn.leaky_relu,  # [b, 16, 16, 32]
            # snt.Conv2D(64, kernel_size, stride=2, padding='SAME', name='conv64'), tf.nn.leaky_relu,   # [b, 8, 8, 64]
            # snt.Conv2D(64, kernel_shape=3, stride=1, padding='SAME', name='conv_enc_1'), tf.nn.leaky_relu,   # [b, 8, 8, 64]
            snt.Conv2D(32,
                       kernel_shape=3,
                       stride=1,
                       padding='SAME',
                       name='conv_enc_1'),
            tf.nn.leaky_relu,  # [b, 16, 16, 32]
            self.residual_enc
        ])

        self.VQVAE = snt.nets.VectorQuantizerEMA(embedding_dim=embedding_dim,
                                                 num_embeddings=num_embeddings,
                                                 commitment_cost=0.25,
                                                 decay=0.994413,
                                                 name='VQ')

        # self.decoder = snt.Sequential([snt.Conv2D(64, kernel_shape=3, stride=1, padding='SAME', name='conv_dec_1'), tf.nn.leaky_relu,    # [b, 8, 8, 64]
        #                                self.residual_dec,
        self.decoder = snt.Sequential([
            snt.Conv2D(32,
                       kernel_shape=3,
                       stride=1,
                       padding='SAME',
                       name='conv_dec_1'),
            tf.nn.leaky_relu,  # [b, 16, 16, 32]
            self.residual_dec,
            # snt.Conv2DTranspose(32, kernel_size, stride=2, padding='SAME', name='convt32'), tf.nn.leaky_relu,    # [b, 16, 16, 32]
            snt.Conv2DTranspose(16,
                                kernel_size,
                                stride=2,
                                padding='SAME',
                                name='convt16'),
            tf.nn.leaky_relu,  # [b, 32, 32, 16]
            snt.Conv2DTranspose(8,
                                kernel_size,
                                stride=2,
                                padding='SAME',
                                name='convt8'),
            tf.nn.leaky_relu,  # [b, 64, 64, 8]
            snt.Conv2DTranspose(4,
                                kernel_size,
                                stride=2,
                                padding='SAME',
                                name='convt4'),
            tf.nn.leaky_relu,  # [b, 128, 128, 4]
            snt.Conv2DTranspose(1,
                                kernel_size,
                                stride=2,
                                padding='SAME',
                                name='convt1'),
            tf.nn.leaky_relu,  # [b, 256, 256, 1]
        ])  # [b, 256, 256, 1]
Example #20
0
    def _build(self, inputs, verbose=VERBOSITY, keep_dropout_prop=0.9):
        filter_sizes = [
            EncodeProcessDecode_v8_edge_segmentation.n_conv_filters,
            EncodeProcessDecode_v8_edge_segmentation.n_conv_filters * 2
        ]

        if EncodeProcessDecode_v8_edge_segmentation.convnet_tanh:
            activation = tf.nn.tanh
        else:
            activation = tf.nn.relu
        """ get image data, get everything >except< last n elements which are non-visual (position and velocity) """
        # image_data = inputs[:, :-EncodeProcessDecode_v5_no_skip_no_core.n_neurons_nodes_non_visual]
        image_data = inputs
        """ in order to apply 2D convolutions, transform shape (batch_size, features) -> shape (batch_size, 1, 1, features)"""
        image_data = tf.expand_dims(image_data, axis=1)
        image_data = tf.expand_dims(image_data,
                                    axis=1)  # yields shape (?,1,1,latent_dim)

        if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm:
            image_data = snt.BatchNorm()(image_data,
                                         is_training=self._is_training)
        ''' layer 0 (1,1,latent_dim) -> (2,2,filter_sizes[1])'''
        outputs = snt.Conv2DTranspose(output_channels=filter_sizes[1],
                                      kernel_shape=2,
                                      stride=1,
                                      padding="VALID")(image_data)
        outputs = activation(outputs)

        if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)

        l01_shape = outputs.get_shape()
        ''' layer 0_1 (2,2,latent_dim) -> (4,4,filter_sizes[1])'''
        outputs = snt.Conv2DTranspose(output_channels=filter_sizes[1],
                                      kernel_shape=2,
                                      stride=2,
                                      padding="SAME")(outputs)
        outputs = activation(outputs)

        if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)

        l02_shape = outputs.get_shape()

        #if is_training:
        #    outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop)
        #else:
        #    outputs = tf.nn.dropout(outputs, keep_prob=1.0)
        ''' layer 0_2 (4,4,latent_dim) -> (7,10,filter_sizes[1])'''
        outputs = snt.Conv2DTranspose(output_channels=filter_sizes[1],
                                      output_shape=[7, 10],
                                      kernel_shape=4,
                                      stride=[1, 2],
                                      padding="VALID")(outputs)
        outputs = activation(outputs)

        if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)

        l1_shape = outputs.get_shape()
        ''' layer 2 (7,10,filter_sizes[1]) -> (15,20,filter_sizes[1]) '''
        outputs = snt.Conv2DTranspose(output_channels=filter_sizes[1],
                                      output_shape=[15, 20],
                                      kernel_shape=[3, 2],
                                      stride=2,
                                      padding="VALID")(outputs)
        outputs = activation(outputs)

        if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)

        l2_shape = outputs.get_shape()

        #if is_training:
        #    outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop)
        #else:
        #    outputs = tf.nn.dropout(outputs, keep_prob=1.0)
        ''' layer 3 (15,20,filter_sizes[1]) -> (15,20,filter_sizes[1]) '''
        outputs = snt.Conv2DTranspose(output_channels=filter_sizes[1],
                                      kernel_shape=2,
                                      stride=1,
                                      padding="SAME")(outputs)
        outputs = activation(outputs)

        if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)

        l3_shape = outputs.get_shape()

        #if is_training:
        #    outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop)
        #else:
        #    outputs = tf.nn.dropout(outputs, keep_prob=1.0)
        ''' layer 5 (15,20,filter_sizes[1]) -> (30,40,filter_sizes[0]) '''
        outputs = snt.Conv2DTranspose(output_channels=filter_sizes[0],
                                      kernel_shape=2,
                                      stride=1,
                                      padding="SAME")(outputs)
        outputs = activation(outputs)

        if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)

        l5_shape = outputs.get_shape()
        ''' layer 6 (30,40,filter_sizes[0])  -> (30,40,filter_sizes[0]) '''
        outputs = snt.Conv2D(output_channels=filter_sizes[0],
                             kernel_shape=3,
                             stride=1,
                             padding="SAME")(outputs)
        outputs = activation(outputs)

        if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)
        ''' layer 6 (30,40,filter_sizes[1]) -> (30,40,filter_sizes[1]) '''
        outputs = snt.Conv2DTranspose(output_channels=filter_sizes[0],
                                      kernel_shape=2,
                                      stride=2,
                                      padding="SAME")(outputs)
        outputs = activation(outputs)

        if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)

        l6_shape = outputs.get_shape()

        #if is_training:
        #    outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop)
        #else:
        #    outputs = tf.nn.dropout(outputs, keep_prob=1.0)
        ''' layer 8 (30,40,filter_sizes[1]) -> (30,40,filter_sizes[0]) '''
        outputs = snt.Conv2DTranspose(output_channels=filter_sizes[0],
                                      kernel_shape=3,
                                      stride=1,
                                      padding="SAME")(outputs)
        outputs = activation(outputs)

        if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)

        l8_shape = outputs.get_shape()
        ''' layer 9 (30,40,filter_sizes[0])  -> (30,40,filter_sizes[0]) '''
        outputs = snt.Conv2D(output_channels=filter_sizes[0],
                             kernel_shape=3,
                             stride=1,
                             padding="SAME")(outputs)
        outputs = activation(outputs)

        if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)
        ''' layer 9 (30,40,filter_sizes[0]) -> (60,80,filter_sizes[0]) '''
        outputs = snt.Conv2DTranspose(output_channels=filter_sizes[0],
                                      kernel_shape=3,
                                      stride=2,
                                      padding="SAME")(outputs)
        outputs = activation(outputs)

        if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)

        l9_shape = outputs.get_shape()

        #if is_training:
        #    outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop)
        #else:
        #    outputs = tf.nn.dropout(outputs, keep_prob=1.0)
        ''' layer 11 (60,80,128)  -> (60,80,128) '''
        outputs = snt.Conv2DTranspose(output_channels=128,
                                      kernel_shape=3,
                                      stride=1,
                                      padding="SAME")(outputs)
        outputs = activation(outputs)

        if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)

        l11_shape = outputs.get_shape()
        ''' layer 12 (60,80,128)  -> (60,80,128) '''
        outputs = snt.Conv2D(output_channels=128,
                             kernel_shape=3,
                             stride=1,
                             padding="SAME")(outputs)
        outputs = activation(outputs)

        if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)

        #if is_training:
        #    outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop)
        #else:
        #    outputs = tf.nn.dropout(outputs, keep_prob=1.0)
        ''' layer 12 (60,80,filter_sizes[0]) -> (120,160,filter_sizes[0]) '''
        outputs = snt.Conv2DTranspose(output_channels=64,
                                      kernel_shape=3,
                                      stride=2,
                                      padding="SAME")(outputs)
        outputs = activation(outputs)

        if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)

        l12_shape = outputs.get_shape()

        #if is_training:
        #    outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop)
        #else:
        #    outputs = tf.nn.dropout(outputs, keep_prob=1.0)

        # outputs = outputs1 + outputs
        ''' layer 14 (120,160,filter_sizes[0]) -> (120,160,filter_sizes[0]) '''
        outputs = snt.Conv2D(output_channels=2,
                             kernel_shape=3,
                             stride=1,
                             padding="SAME")(outputs)

        if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)

        l14_shape = outputs.get_shape()

        visual_latent_output = snt.BatchFlatten()(outputs)

        if verbose:
            print("Latent visual data shape", image_data.get_shape())
            print("Layer01 decoder output shape", l01_shape)
            print("Layer02 decoder output shape", l02_shape)
            print("Layer1 decoder output shape", l1_shape)
            print("Layer2 decoder output shape", l2_shape)
            print("Layer3 decoder output shape", l3_shape)
            print("Layer4 decoder output shape", l5_shape)
            print("Layer5 decoder output shape", l6_shape)
            print("Layer7 decoder output shape", l8_shape)
            print("Layer8 decoder output shape", l9_shape)
            print("Layer10 decoder output shape", l11_shape)
            print("Layer11 decoder output shape", l12_shape)
            print("Layer13 decoder output shape", l14_shape)
            print(
                "decoder shape before adding non-visual data",
                visual_latent_output.get_shape()
            )  # print("shape before skip3 {}".format(l1_shape))  # print("shape after skip3 {}".format(after_skip3))  # print("shape before skip2 {}".format(l11_shape))  # print("shape after skip2 {}".format(after_skip2))  # print("shape before skip1 {}".format(l17_shape))  # print("shape after skip1 {}".format(after_skip1))

        # we assume we don't have access to pos/vel, due to compatibility issues we fill up this space with zeros
        n_non_visual_elements = 6
        non_visual_decoded_output = tf.zeros(
            shape=(tf.shape(visual_latent_output)[0], n_non_visual_elements))
        """ concatenate 6d space latent data with visual data 
        (dimensions if segmentation image only: (?, 19200)) """
        outputs = tf.concat([visual_latent_output, non_visual_decoded_output],
                            axis=1)

        if verbose:
            print("shape decoded output (visual):",
                  visual_latent_output.get_shape())
            print("shape decoded output (latent):",
                  non_visual_decoded_output.get_shape())
            print("final decoder output shape after including non-visual data",
                  outputs.get_shape())

        return outputs
Example #21
0
    def _build(self, inputs):
        """
        Args:
            inputs (type): node of input.
            is_training (type): tells to batchnorm if to generate the update ops.

        Returns:
            logits

        """

        net = inputs

        #LINEAR BLOCK WITH RESHAPE IF NEEDED
        # if linear_first I add extra Linear layers
        if self._linear_first is not None:
            self.linear_layers = [
                snt.Linear(name="linear_{}".format(i),
                           output_size=self._linear_first_sizes[i],
                           use_bias=True,
                           **self._extra_params)
                for i in range(len(self._linear_first_sizes))
            ]

            for i, layer in enumerate(self.linear_layers):
                net = layer(net)
                net = self._dropout(net, training=self._is_training)
                net = tf.layers.batch_normalization(
                    net,
                    training=self._is_training,
                    momentum=self._bn_momentum,
                    renorm=self._bn_renormalization,
                    renorm_momentum=self._bn_momentum,
                    renorm_clipping=self._renorm_clipping,
                    name="batch_norm_lin_{}".format(i))
                net = self._activation(net)

            net = snt.BatchReshape(shape=self._linear_first_reshape)(net)

        #CONV BLOCKS FROM HERE
        self.layers = [
            snt.Conv2DTranspose(name="conv_2d_T_{}".format(i),
                                output_channels=self._hidden_channels[i],
                                kernel_shape=self._kernel_shape,
                                stride=self._decide_stride(i),
                                padding=self._padding,
                                use_bias=True,
                                **self._extra_params)
            for i in range(self._num_layers - 1)
        ]

        li = self._num_layers - 1

        if self._output_shape is None:
            lastlayer = snt.Conv2DTranspose(
                name="conv_2d_T_{}".format(li),
                output_channels=self._hidden_channels[li],
                kernel_shape=self._kernel_shape,
                stride=self._decide_stride(li),
                padding=self._padding,
                use_bias=True,
                **self._extra_params)
        else:
            lastlayer = snt.Conv2DTranspose(
                name="conv_2d_T_{}".format(li),
                output_channels=self._hidden_channels[li],
                kernel_shape=self._kernel_shape,
                output_shape=self._output_shape,
                use_bias=True,
                **self._extra_params)

        self.layers.append(lastlayer)

        # connect them to the graph, adding batch norm and non-linearity
        for i, layer in enumerate(self.layers):
            net = layer(net)
            net = self._dropout(net, training=self._is_training)
            net = tf.layers.batch_normalization(
                net,
                training=self._is_training,
                momentum=self._bn_momentum,
                renorm=self._bn_renormalization,
                renorm_momentum=self._bn_momentum,
                renorm_clipping=self._renorm_clipping,
                name="batch_norm_{}".format(i))

            # no activation at the end
            if i < li:
                net = self._activation(net)

        if self._final_activation:
            net = self._activation(net)

        return net
Example #22
0
    def __init__(self,
                 embedding_dim=64,
                 num_embeddings=512,
                 kernel_size=4,
                 num_layers=5,
                 num_residual_layers=2,
                 name=None):
        super(VectorQuantizerVariationalAutoEncoder, self).__init__(name=name)

        self.embedding_dim = embedding_dim
        self.num_embeddings = num_embeddings
        self.num_layers = num_layers

        self.num_residual_layers = num_residual_layers

        encoder_layers = []
        decoder_layers = []
        for i in range(self.num_layers):
            num_filters = 4 * 2**i
            conv_layer = snt.Conv2D(output_channels=num_filters,
                                    kernel_shape=kernel_size,
                                    stride=2,
                                    padding='SAME',
                                    name=f'conv{num_filters}')
            residual_layer = ResidualStack(
                num_hiddens=num_filters,
                num_residual_layers=self.num_residual_layers,
                num_residual_hiddens=num_filters,
                residual_name=f'enc_{num_filters}')
            encoder_layers.append(conv_layer)
            encoder_layers.append(tf.nn.relu)
            encoder_layers.append(residual_layer)

        for i in range(self.num_layers - 2, -1, -1):
            num_filters = 4 * 2**i
            conv_layer = snt.Conv2DTranspose(output_channels=num_filters,
                                             kernel_shape=kernel_size,
                                             stride=2,
                                             padding='SAME',
                                             name=f'convt{num_filters}')
            residual_layer = ResidualStack(
                num_hiddens=num_filters,
                num_residual_layers=self.num_residual_layers,
                num_residual_hiddens=num_filters,
                residual_name=f'enc_{num_filters}')
            decoder_layers.append(conv_layer)
            decoder_layers.append(tf.nn.relu)
            decoder_layers.append(residual_layer)
        decoder_layers.append(
            snt.Conv2DTranspose(1,
                                kernel_size,
                                stride=2,
                                padding='SAME',
                                name='convt1'))
        decoder_layers.append(tf.nn.relu)
        decoder_layers.append(
            snt.Conv2D(1, kernel_size, padding='SAME', name='conv1'))

        self.encoder = snt.Sequential(encoder_layers)
        self.decoder = snt.Sequential(decoder_layers)

        self.VQVAE = snt.nets.VectorQuantizerEMA(embedding_dim=embedding_dim,
                                                 num_embeddings=num_embeddings,
                                                 commitment_cost=0.25,
                                                 decay=0.994413,
                                                 name='VQ')
    def _build(self, inputs, verbose=VERBOSITY, keep_dropout_prop=0.9):
        filter_sizes = [EncodeProcessDecode_v6_no_core.n_conv_filters,
                        EncodeProcessDecode_v6_no_core.n_conv_filters * 2]

        if EncodeProcessDecode_v6_no_core.convnet_tanh:
            activation = tf.nn.tanh
        else:
            activation = tf.nn.relu

        """ get image data, get everything >except< last n elements which are non-visual (position and velocity) """
        # image_data = inputs[:, :-EncodeProcessDecode_v5_no_skip_no_core.n_neurons_nodes_non_visual]
        image_data = inputs

        """ in order to apply 2D convolutions, transform shape (batch_size, features) -> shape (batch_size, 1, 1, features)"""
        image_data = tf.expand_dims(image_data, axis=1)
        image_data = tf.expand_dims(image_data, axis=1)  # yields shape (?,1,1,latent_dim)

        ''' layer 0 (1,1,latent_dim) -> (2,2,filter_sizes[1])'''
        outputs = snt.Conv2DTranspose(output_channels=filter_sizes[1], kernel_shape=2, stride=1, padding="VALID")(image_data)
        outputs = activation(outputs)
        #outputs = tf.layers.conv2d_transpose(image_data, filters=filter_sizes[1], kernel_size=2, strides=2, padding='valid',
        #                                    activation=activation, use_bias=False,
        #                                     kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05))

        if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)
            #outputs = tf.contrib.layers.instance_norm(outputs)
        l01_shape = outputs.get_shape()

        ''' layer 0_1 (2,2,latent_dim) -> (4,4,filter_sizes[1])'''
        outputs = snt.Conv2DTranspose(output_channels=filter_sizes[1], kernel_shape=2, stride=2, padding="SAME")(outputs)
        outputs = activation(outputs)
        #outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[1], kernel_size=2, strides=2, padding='valid',
        #                                     activation=activation, use_bias=False,
        #                                     kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05))

        if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)
            #outputs = tf.contrib.layers.instance_norm(outputs)
        l02_shape = outputs.get_shape()

        #if is_training:
        #    outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop)
        #else:
        #    outputs = tf.nn.dropout(outputs, keep_prob=1.0)

        ''' layer 0_2 (4,4,latent_dim) -> (7,10,filter_sizes[1])'''
        outputs = snt.Conv2DTranspose(output_channels=filter_sizes[1], output_shape=[7, 10], kernel_shape=4, stride=[1, 2], padding="VALID")(outputs)
        outputs = activation(outputs)
        #outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[1], kernel_size=[4, 4], strides=[1, 2], padding='valid',
        #                                     activation=activation, use_bias=False,
        #                                     kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05))

        if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)
            #outputs = tf.contrib.layers.instance_norm(outputs)

        l1_shape = outputs.get_shape()

        ''' layer 2 (7,10,filter_sizes[1]) -> (15,20,filter_sizes[1]) '''
        outputs = snt.Conv2DTranspose(output_channels=filter_sizes[1], output_shape=[15, 20], kernel_shape=[3, 2], stride=2, padding="VALID")(outputs)
        outputs = activation(outputs)
        #outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[1], kernel_size=(3, 2), strides=2, padding='valid',
        #                                     activation=activation, use_bias=False,
        #                                     kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05))

        if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)
            #outputs = tf.contrib.layers.instance_norm(outputs)

        l2_shape = outputs.get_shape()

        #if is_training:
        #    outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop)
        #else:
        #    outputs = tf.nn.dropout(outputs, keep_prob=1.0)

        ''' layer 3 (15,20,filter_sizes[1]) -> (15,20,filter_sizes[1]) '''
        outputs = snt.Conv2DTranspose(output_channels=filter_sizes[1], kernel_shape=2, stride=1, padding="SAME")(outputs)
        outputs = activation(outputs)
        #outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[1], kernel_size=2, strides=1, padding='same',
        #                                     activation=activation, use_bias=False,
        #                                     kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05))

        if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)
            #outputs = tf.contrib.layers.instance_norm(outputs)

        l3_shape = outputs.get_shape()

        #if is_training:
        #    outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop)
        #else:
        #    outputs = tf.nn.dropout(outputs, keep_prob=1.0)

        ''' layer 5 (15,20,filter_sizes[1]) -> (30,40,filter_sizes[1]) '''
        outputs = snt.Conv2DTranspose(output_channels=filter_sizes[0], kernel_shape=2, stride=1, padding="SAME")(outputs)
        outputs = activation(outputs)
        #outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[0], kernel_size=2, strides=1, padding='same',
        #                                     activation=activation, use_bias=False,
        #                                     kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05))

        if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)
            #outputs = tf.contrib.layers.instance_norm(outputs)
        l5_shape = outputs.get_shape()

        #if is_training:
        #    outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop)
        #else:
        #    outputs = tf.nn.dropout(outputs, keep_prob=1.0)

        ''' layer 6 (30,40,filter_sizes[1]) -> (30,40,filter_sizes[1]) '''
        outputs = snt.Conv2DTranspose(output_channels=filter_sizes[0], kernel_shape=2, stride=2, padding="SAME")(outputs)
        outputs = activation(outputs)
        #outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[0], kernel_size=2, strides=2, padding='same',
        #                                     activation=activation, use_bias=False,
        #                                     kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05))

        if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)
            #outputs = tf.contrib.layers.instance_norm(outputs)

        l6_shape = outputs.get_shape()

        ''' layer 7 (30,40,filter_sizes[1]) -> (30,40,filter_sizes[1]) '''
        outputs = snt.Conv2D(output_channels=filter_sizes[0], kernel_shape=3, stride=1, padding="SAME")(outputs)
        outputs = activation(outputs)
        #outputs = tf.layers.conv2d(outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same', activation=activation,
        #                           use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05))

        if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)
            #outputs = tf.contrib.layers.instance_norm(outputs)

        l7_shape = outputs.get_shape()

        #if is_training:
        #    outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop)
        #else:
        #    outputs = tf.nn.dropout(outputs, keep_prob=1.0)

        ''' layer 8 (30,40,filter_sizes[1]) -> (30,40,filter_sizes[0]) '''
        outputs = snt.Conv2DTranspose(output_channels=filter_sizes[0], kernel_shape=3, stride=1, padding="SAME")(outputs)
        outputs = activation(outputs)
        #outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same',
        #                                     activation=activation, use_bias=False,
        #                                     kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05))

        if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)
            #outputs = tf.contrib.layers.instance_norm(outputs)

        l8_shape = outputs.get_shape()

        #if is_training:
        #    outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop)
        #else:
        #    outputs = tf.nn.dropout(outputs, keep_prob=1.0)

        ''' layer 9 (30,40,filter_sizes[0]) -> (60,80,filter_sizes[0]) '''
        outputs = snt.Conv2DTranspose(output_channels=filter_sizes[0], kernel_shape=3, stride=2, padding="SAME")(outputs)
        outputs = activation(outputs)
        #outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[0], kernel_size=3, strides=2, padding='same',
        #                                     activation=activation, use_bias=False,
        #                                     kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05))

        if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)
            #outputs = tf.contrib.layers.instance_norm(outputs)

        l9_shape = outputs.get_shape()

        ''' layer 10 (60,80,filter_sizes[0]) -> (60,80,filter_sizes[0]) '''
        outputs = snt.Conv2D(output_channels=filter_sizes[0], kernel_shape=3, stride=1, padding="SAME")(outputs)
        outputs = activation(outputs)
        #outputs = tf.layers.conv2d(outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same', activation=activation,
        #                           use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05))

        if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)
            #outputs = tf.contrib.layers.instance_norm(outputs)

        l10_shape = outputs.get_shape()

        #if is_training:
        #    outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop)
        #else:
        #    outputs = tf.nn.dropout(outputs, keep_prob=1.0)

        ''' layer 11 (60,80,filter_sizes[0])  -> (60,80,filter_sizes[0]) '''
        outputs = snt.Conv2DTranspose(output_channels=128, kernel_shape=3, stride=1, padding="SAME")(outputs)
        outputs = activation(outputs)
        #outputs = tf.layers.conv2d_transpose(outputs, filters=64, kernel_size=3, strides=1, padding='same', activation=activation,
        #                                     use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05))

        if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)
            #outputs = tf.contrib.layers.instance_norm(outputs)

        l11_shape = outputs.get_shape()

        #if is_training:
        #    outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop)
        #else:
        #    outputs = tf.nn.dropout(outputs, keep_prob=1.0)

        ''' layer 12 (60,80,filter_sizes[0]) -> (120,160,filter_sizes[0]) '''
        outputs = snt.Conv2DTranspose(output_channels=128, kernel_shape=3, stride=2, padding="SAME")(outputs)
        outputs = activation(outputs)
        #outputs = tf.layers.conv2d_transpose(outputs, filters=64, kernel_size=3, strides=2, padding='same', activation=activation,
        #                                     use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05))

        if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)
            #outputs = tf.contrib.layers.instance_norm(outputs)

        l12_shape = outputs.get_shape()

        #if is_training:
        #    outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop)
        #else:
        #    outputs = tf.nn.dropout(outputs, keep_prob=1.0)


        ''' layer 13 (120,160,filter_sizes[0]) -> (120,160,filter_sizes[0]) '''
        outputs = snt.Conv2D(output_channels=128, kernel_shape=3, stride=1, padding="SAME")(outputs)
        outputs = activation(outputs)
        #outputs = tf.layers.conv2d(outputs, filters=64, kernel_size=3, strides=1, padding='same', activation=activation, use_bias=False,
        #                           kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05))

        if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)
            #outputs = tf.contrib.layers.instance_norm(outputs)

        l13_shape = outputs.get_shape()

        # outputs = outputs1 + outputs

        #if is_training:
        #    outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop)
        #else:
        #    outputs = tf.nn.dropout(outputs, keep_prob=1.0)

        ''' layer 14 (120,160,filter_sizes[0]) -> (120,160,filter_sizes[0]) '''
        outputs = snt.Conv2DTranspose(output_channels=128, kernel_shape=3, stride=1, padding="SAME")(outputs)
        outputs = activation(outputs)
        #outputs = tf.layers.conv2d_transpose(outputs, filters=64, kernel_size=3, strides=1, padding='same', activation=activation,
        #                                     use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05))

        if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)
            #outputs = tf.contrib.layers.instance_norm(outputs)

        l14_shape = outputs.get_shape()

        ''' layer 15 (120,160,filter_sizes[0]) -> (120,160,2) '''
        outputs = snt.Conv2D(output_channels=2, kernel_shape=3, stride=1, padding="SAME")(outputs)
        #outputs = activation(outputs)
        #outputs = tf.layers.conv2d(outputs, filters=2, kernel_size=3, strides=1, padding='same', activation=None, use_bias=False,
        #                           kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05))
        l15_shape = outputs.get_shape()

        #visual_latent_output = tf.layers.flatten(outputs)
        visual_latent_output = snt.BatchFlatten()(outputs)


        if verbose:
            print("Latent visual data shape", image_data.get_shape())
            print("Layer01 decoder output shape", l01_shape)
            print("Layer02 decoder output shape", l02_shape)
            print("Layer1 decoder output shape", l1_shape)
            print("Layer2 decoder output shape", l2_shape)
            print("Layer3 decoder output shape", l3_shape)
            print("Layer4 decoder output shape", l5_shape)
            print("Layer5 decoder output shape", l6_shape)
            print("Layer6 decoder output shape", l7_shape)
            print("Layer7 decoder output shape", l8_shape)
            print("Layer8 decoder output shape", l9_shape)
            print("Layer9 decoder output shape", l10_shape)
            print("Layer10 decoder output shape", l11_shape)
            print("Layer11 decoder output shape", l12_shape)
            print("Layer12 decoder output shape", l13_shape)
            print("Layer13 decoder output shape", l14_shape)
            print("Layer14 decoder output shape", l15_shape)
            print("decoder shape before adding non-visual data", visual_latent_output.get_shape())  # print("shape before skip3 {}".format(l1_shape))  # print("shape after skip3 {}".format(after_skip3))  # print("shape before skip2 {}".format(l11_shape))  # print("shape after skip2 {}".format(after_skip2))  # print("shape before skip1 {}".format(l17_shape))  # print("shape after skip1 {}".format(after_skip1))


        n_non_visual_elements = 6
        """ get x,y,z-position and x,y,z-velocity from n_neurons_nodes_non_visual-dimensional space """
        non_visual_latent_output = inputs[:, -EncodeProcessDecode_v6_no_core.n_neurons_nodes_non_visual:]

        # Transforms the outputs into the appropriate shape.
        """ map latent position/velocity (nodes) from 32d to original 6d space """
        n_neurons = EncodeProcessDecode_v6_no_core.n_neurons_nodes_non_visual
        n_layers = 2
        net = snt.nets.MLP([n_neurons] * n_layers, activate_final=False)
        non_visual_decoded_output = snt.Sequential([net, snt.LayerNorm(), snt.Linear(n_non_visual_elements)])(non_visual_latent_output)

        """ concatenate 6d space latent data with visual data 
        (dimensions if segmentation image only: (?, 19200)) """
        outputs = tf.concat([visual_latent_output, non_visual_decoded_output], axis=1)

        if verbose:
            print("shape decoded output (visual):", visual_latent_output.get_shape())
            print("shape decoded output (latent):", non_visual_decoded_output.get_shape())
            print("final decoder output shape after including non-visual data", outputs.get_shape())

        return outputs
Example #24
0
    def _build(self, inputs):
        """Constructs the generator graph.

    Args:
      inputs: `tf.Tensor` with the input of the generator.

    Returns:
      `tf.Tensor`, the generated samples.
    """
        leaky_relu_activation = lambda x: tf.maximum(0.2 * x, x)
        init_dict = {
            'w': tf.truncated_normal_initializer(seed=547, stddev=0.02),
            'b': tf.constant_initializer(0.3)
        }
        layer1 = snt.Linear(output_size=1024, initializers=init_dict)(inputs)
        layer2 = leaky_relu_activation(
            snt.BatchNorm(offset=1, scale=1,
                          decay_rate=0.9)(layer1,
                                          is_training=True,
                                          test_local_stats=True))
        layer3 = snt.Linear(output_size=128 * 7 * 7,
                            initializers=init_dict)(layer2)
        layer4 = leaky_relu_activation(
            snt.BatchNorm(offset=1, scale=1,
                          decay_rate=0.9)(layer3,
                                          is_training=True,
                                          test_local_stats=True))
        layer5 = snt.BatchReshape((7, 7, 128))(layer4)
        # ("Conv2DTranspose" ,{ "output_channels" : 64 ,"output_shape" : [14,14], "kernel_shape" : [4,4], "stride" : 2, "padding":"SAME" },    0),
        layer6 = snt.Conv2DTranspose(output_channels=64,
                                     output_shape=[14, 14],
                                     kernel_shape=[4, 4],
                                     stride=2,
                                     padding="SAME",
                                     initializers=init_dict)(layer5)
        layer7 = leaky_relu_activation(
            snt.BatchNorm(offset=1, scale=1,
                          decay_rate=0.9)(layer6,
                                          is_training=True,
                                          test_local_stats=True))
        # ("Conv2DTranspose" ,{ "output_channels" : 1 ,"output_shape" : [28,28], "kernel_shape" : [4,4], "stride" : 2, "padding":"SAME" },    0),
        layer8 = snt.Conv2DTranspose(output_channels=1,
                                     output_shape=[28, 28],
                                     kernel_shape=[4, 4],
                                     stride=2,
                                     padding="SAME",
                                     initializers=init_dict)(layer7)
        # Reshape the data to have rank 4.
        # inputs = leaky_relu_activation(inputs)

        # net = snt.nets.ConvNet2DTranspose(
        #     output_channels=[32, 1],
        #     output_shapes=[[14, 14], [28, 28]],
        #     strides=[2],
        #     paddings=[snt.SAME],
        #     kernel_shapes=[[5, 5]],
        #     use_batch_norm=False,
        #     initializers=init_dict)

        # # We use tanh to ensure that the generated samples are in the same range
        # # as the data.
        return tf.nn.sigmoid(layer8)
    def _build(self, inputs, verbose=VERBOSITY):

        if EncodeProcessDecode_v7_dropout.convnet_tanh:
            activation = tf.nn.tanh
        else:
            activation = tf.nn.relu

        image_data = tf.expand_dims(inputs, axis=1)
        image_data = tf.expand_dims(
            image_data, axis=1)  # yields shape (?,1,1,n_neurons_edges)
        if verbose: print(image_data.get_shape())
        ''' 1,1,128 --> 2, 2, 64 '''
        outputs = snt.Conv2DTranspose(output_channels=64,
                                      kernel_shape=2,
                                      stride=1,
                                      padding="VALID")(image_data)
        outputs = activation(outputs)
        if EncodeProcessDecode_v7_dropout.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)
        if verbose: print(outputs.get_shape())
        ''' 2,2,64 --> 4, 4, 64 '''
        outputs = snt.Conv2DTranspose(output_channels=64,
                                      kernel_shape=2,
                                      stride=2,
                                      padding="SAME")(outputs)
        outputs = activation(outputs)
        if EncodeProcessDecode_v7_dropout.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)
        if verbose: print(outputs.get_shape())
        ''' 4,4,64 --> 7, 10, 32 '''
        outputs = snt.Conv2DTranspose(output_channels=32,
                                      output_shape=[7, 10],
                                      kernel_shape=4,
                                      stride=[1, 2],
                                      padding="VALID")(outputs)
        outputs = activation(outputs)
        if EncodeProcessDecode_v7_dropout.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)
        if verbose: print(outputs.get_shape())
        ''' 7,10,32 --> 15, 20, 16 '''
        outputs = snt.Conv2DTranspose(output_channels=16,
                                      output_shape=[15, 20],
                                      kernel_shape=[3, 2],
                                      stride=2,
                                      padding="VALID")(outputs)
        outputs = activation(outputs)
        if EncodeProcessDecode_v7_dropout.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)
        if verbose: print(outputs.get_shape())
        ''' 15, 20, 16 --> 30, 40, 8 '''
        outputs = snt.Conv2DTranspose(output_channels=8,
                                      kernel_shape=2,
                                      stride=2,
                                      padding="SAME")(outputs)
        outputs = activation(outputs)
        if EncodeProcessDecode_v7_dropout.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)
        if verbose: print(outputs.get_shape())
        ''' 30, 40, 8 --> 60, 80, 2 '''
        outputs = snt.Conv2DTranspose(output_channels=2,
                                      kernel_shape=2,
                                      stride=2,
                                      padding="SAME")(outputs)
        outputs = activation(outputs)
        if EncodeProcessDecode_v7_dropout.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)
        if verbose: print(outputs.get_shape())
        ''' 60, 80, 2 --> 120, 160, 1 '''
        outputs = snt.Conv2DTranspose(output_channels=2,
                                      kernel_shape=2,
                                      stride=2,
                                      padding="SAME")(outputs)
        # no activation
        if EncodeProcessDecode_v7_dropout.conv_layer_instance_norm:
            outputs = snt.BatchNorm()(outputs, is_training=self._is_training)
        if verbose: print(outputs.get_shape())

        outputs = snt.BatchFlatten()(outputs)
        if verbose: print(outputs.get_shape())
        return outputs