def _build(self, x): h = snt.Conv2D( output_channels=self._num_hiddens, kernel_shape=(3, 3), stride=(1, 1), name="dec_1")(x) h = residual_stack( h, self._num_hiddens, self._num_residual_layers, self._num_residual_hiddens) h = snt.Conv2DTranspose( output_channels=int(self._num_hiddens / 2), output_shape=None, kernel_shape=(4, 4), stride=(2, 2), name="dec_2")(h) h = tf.nn.relu(h) x_recon = snt.Conv2DTranspose( output_channels=1, output_shape=None, kernel_shape=(4, 4), stride=(2, 2), name="dec_3")(h) return x_recon
def __init__(self, n_latent=4, kernel_size=4, name=None): super(VariationalAutoEncoder, self).__init__(name=name) self.n_latent = n_latent self.encoder = snt.Sequential([ snt.Conv2D(4, kernel_size, stride=4, padding='SAME'), tf.nn.relu, # [b, 250, 250, 4] snt.Conv2D(16, kernel_size, stride=4, padding='SAME'), tf.nn.relu, # [b, 63, 63, 16] snt.Conv2D(32, kernel_size, stride=4, padding='SAME'), tf.nn.relu, # [b, 16, 16, 32] snt.Conv2D(64, kernel_size, stride=2, padding='SAME'), tf.nn.relu, # [b, 8, 8, 64] snt.Flatten() ]) self.mn = snt.nets.MLP([n_latent], activation=tf.nn.relu) self.std = snt.nets.MLP([n_latent], activation=tf.nn.relu) self.decoder = snt.Sequential([ snt.nets.MLP([8 * 8 * 64], activation=tf.nn.leaky_relu), snt.Reshape([8, 8, 64]), snt.Conv2DTranspose(64, kernel_size, stride=2, padding='SAME'), tf.nn.relu, # [b, 16, 16, 64] snt.Conv2DTranspose(32, kernel_size, stride=4, padding='SAME'), tf.nn.relu, # [b, 64, 64, 32] snt.Conv2DTranspose(16, kernel_size, stride=4, padding='SAME'), tf.nn.relu, # [b, 256, 256, 16] snt.Conv2DTranspose(4, kernel_size, stride=4, padding='SAME'), tf.nn.relu, # [b, 1024, 1024, 4] snt.Conv2D(1, kernel_size, padding='SAME') ]) # [b, 1024, 1024, 1]
def decode(self, in_tensor): in_tensor = snt.Conv2DTranspose(output_channels=64, kernel_shape=3, stride=[1, 2], name='decode_conv', padding='SAME')(in_tensor) in_tensor = tf.nn.relu(in_tensor) in_tensor = snt.Conv2DTranspose(output_channels=64, kernel_shape=3, stride=[1, 2], name='decode_conv', padding='SAME')(in_tensor) in_tensor = tf.nn.relu(in_tensor) in_tensor = snt.Conv2DTranspose(output_channels=32, kernel_shape=3, stride=[1, 2], name='decode_conv', padding='SAME')(in_tensor) in_tensor = tf.nn.relu(in_tensor) in_tensor = snt.Conv2DTranspose(output_channels=32, kernel_shape=3, stride=[1, 2], name='decode_conv', padding='SAME')(in_tensor) in_tensor = tf.nn.relu(in_tensor) in_tensor = snt.Conv2DTranspose(output_channels=self._target_num_way, kernel_shape=3, stride=[1, 2], name='decode_conv', padding='SAME')(in_tensor) in_tensor = snt.Conv2D(output_channels=1, kernel_shape=1, name='decode_conv', padding='SAME')(in_tensor) return in_tensor
def _build(self, x, x_sigma, x_psf): h = snt.Conv2D(output_channels=self._num_hiddens, kernel_shape=(3, 3), stride=(1, 1), name="dec_1")(x) h = residual_stack(h, self._num_hiddens, self._num_residual_layers, self._num_residual_hiddens) h = snt.Conv2DTranspose(output_channels=int(self._num_hiddens / 2), output_shape=None, kernel_shape=(4, 4), stride=(2, 2), name="dec_2")(h) h = tf.nn.relu(h) """ x_recon_de: reconstructed images without noise and PSF x_recon: output to calculate the reconstructed loss """ x_recon_de = snt.Conv2DTranspose(output_channels=1, output_shape=None, kernel_shape=(4, 4), stride=(2, 2), name="dec_3")(h) # add a PSF convolution layer and noise layer x_recon_de = psf_layer(x_recon_de, x_psf) x_recon = noise_layer(x_recon_de, x_sigma) return x_recon
def __init__(self, name='MNIST_Generator', regularization=1.e-4): super(MNISTGenerator, self).__init__(name=name) reg = { 'w': l2_regularizer(scale=regularization), 'b': l2_regularizer(scale=regularization) } with self._enter_variable_scope(): self.linear = snt.Linear(name='linear', output_size=3136, regularizers=reg) self.bn1 = snt.BatchNorm(name='batch_norm_1') self.reshape = snt.BatchReshape(name='reshape', shape=[7, 7, 64]) self.deconv1 = snt.Conv2DTranspose(name='tr-conv2d_1', output_channels=64, kernel_shape=5, stride=2, regularizers=reg) self.bn2 = snt.BatchNorm(name='batch_norm_2') self.deconv2 = snt.Conv2DTranspose(name='tr-conv2d_2', output_channels=32, kernel_shape=5, stride=1, regularizers=reg) self.bn3 = snt.BatchNorm(name='batch_norm_3') self.deconv3 = snt.Conv2DTranspose(name='tr-conv2d_3', output_channels=3, kernel_shape=5, stride=2, regularizers=reg)
def _build(self, x): h = snt.Conv2D(output_channels=self._num_hiddens, kernel_shape=(3, 3), stride=(1, 1), name="dec_1")(x) h = self._dropout(h, training=self._is_training) h = tf.layers.batch_normalization( h, training=self._is_training, momentum=self._bn_momentum, renorm=self._bn_renormalization, renorm_momentum=self._bn_momentum, renorm_clipping=self._renorm_clipping, name="batch_norm_1") h = residual_stack(h, self._num_hiddens, self._num_residual_layers, self._num_residual_hiddens, activation=self._activation, training=self._is_training, prob_drop=self._prob_drop, momentum=self._bn_momentum, renorm=self._bn_renormalization, renorm_momentum=self._bn_momentum, renorm_clipping=self._renorm_clipping) h = snt.Conv2DTranspose(output_channels=int(self._num_hiddens / 2), output_shape=None, kernel_shape=(4, 4), stride=(2, 2), name="dec_2")(h) h = self._dropout(h, training=self._is_training) h = tf.layers.batch_normalization( h, training=self._is_training, momentum=self._bn_momentum, renorm=self._bn_renormalization, renorm_momentum=self._bn_momentum, renorm_clipping=self._renorm_clipping, name="batch_norm_2") h = self._activation(h) x_recon = snt.Conv2DTranspose(output_channels=3, output_shape=None, kernel_shape=(4, 4), stride=(2, 2), name="dec_3")(h) return x_recon
def _build(self, x): for i, l in enumerate(self.padding_linear_layers): x = snt.Linear(l)(x) for i, l in enumerate(self.layers): if i == 0: h = snt.Linear(l[1] * l[2] * l[0])(x) h = tf.reshape(h, [-1, l[1], l[2], l[0]]) elif i == len(self.layers) - 1: h = snt.Conv2DTranspose(l[0], None, l[1], l[2])(h) else: h = tf.nn.relu(snt.Conv2DTranspose(l[0], None, l[1], l[2])(h)) logits = h return logits
def _build_layer(self, build_recursive_skips, inputs, output_channels, layer_kwargs, is_training, decoder_skips, skip_connection_filters=None, is_final=False, use_batchnorm=True): if build_recursive_skips: res_mask = self._build_residual_mask(inputs, stride=layer_kwargs.stride) else: res_mask = None outputs = snt.Conv2DTranspose( output_channels=output_channels, kernel_shape=4, data_format=self._data_format, **layer_kwargs)(inputs) if use_batchnorm: outputs = self.norm_fn(outputs) if not is_final: outputs = self._activation_fn(outputs) if skip_connection_filters is not None: outputs = self._connect_skips( outputs, skip_connection_filters, res_mask) decoder_skips.append(outputs) if self._dropout_rate > 0.0: outputs = tf.layers.dropout( outputs, rate=self._dropout_rate, training=is_training) return res_mask, outputs
def __init__(self, kernel_size=4, name=None): super(AutoEncoder, self).__init__(name=name) self.encoder = snt.Sequential([ snt.Conv2D(4, kernel_size, stride=4, padding='SAME'), tf.nn.relu, snt.Conv2D(16, kernel_size, stride=4, padding='SAME'), tf.nn.relu, snt.Conv2D(64, kernel_size, stride=4, padding='SAME'), tf.nn.relu ]) self.decoder = snt.Sequential([ snt.Conv2DTranspose(64, kernel_size, stride=4, padding='SAME'), tf.nn.relu, snt.Conv2DTranspose(16, kernel_size, stride=4, padding='SAME'), tf.nn.relu, snt.Conv2DTranspose(4, kernel_size, stride=4, padding='SAME'), tf.nn.relu, snt.Conv2D(1, kernel_size, padding='SAME') ])
def __init__(self, name='Generator', latent_size=50, image_size=64, ngf=64, regularization=1.e-4): super(Generator, self).__init__(name=name) reg = {'w': l2_regularizer(scale=regularization)} self.conv_trs = [] self.batch_norms = [] self.latent_size = latent_size cngf, tisize = ngf // 2, 4 while tisize != image_size: cngf = cngf * 2 tisize = tisize * 2 with self._enter_variable_scope(): self.reshape = snt.BatchReshape(name='batch_reshape', shape=[1, 1, latent_size]) self.conv_trs.append(snt.Conv2DTranspose(name='tr-conv2d_1', output_channels=cngf, kernel_shape=4, stride=1, padding='VALID', regularizers=reg, use_bias=False)) self.batch_norms.append(snt.BatchNorm(name='batch_norm_1')) csize, cndf = 4, cngf n_layer = 2 while csize < image_size // 2: self.conv_trs.append(snt.Conv2DTranspose(name='tr-conv2d_{}'.format(n_layer), output_channels=cndf // 2, kernel_shape=4, stride=2, padding='SAME', regularizers=reg, use_bias=False)) self.batch_norms.append(snt.BatchNorm(name='batch_norm_{}'.format(n_layer))) n_layer += 1 cndf = cndf // 2 csize = csize * 2 self.conv_trs.append(snt.Conv2DTranspose(name='tr-conv2d_{}'.format(n_layer), output_channels=3, kernel_shape=4, stride=2, padding='SAME', regularizers=reg, use_bias=False))
def _build_conv_t_layer(conv_spec, data_format): return snt.Conv2DTranspose(output_channels=conv_spec.output_channels, kernel_shape=conv_spec.kernel_shape, stride=conv_spec.stride, padding=snt.SAME, use_bias=True, data_format=data_format, initializers=_DEFAULT_CONV_INITIALIZERS, regularizers=_DEFAULT_CONV_REGULARIZERS)
def __init__(self, kernel_size=4, name=None): super(AutoEncoder, self).__init__(name=name) self.encoder = snt.Sequential([snt.Conv2D(4, kernel_size, stride=2, padding='SAME'), tf.nn.leaky_relu, # [4, 128, 128] snt.Conv2D(8, kernel_size, stride=2, padding='SAME'), tf.nn.leaky_relu, # [8, 64, 64] snt.Conv2D(16, kernel_size, stride=2, padding='SAME'), tf.nn.leaky_relu, # [16, 32, 32] snt.Conv2D(32, kernel_size, stride=2, padding='SAME'), tf.nn.leaky_relu]) # [32, 16, 16] # snt.Conv2D(32, kernel_size, stride=2, padding='SAME'), tf.nn.relu, # snt.Conv2D(64, kernel_size, stride=2, padding='SAME'), tf.nn.relu]) # self.decoder = snt.Sequential([snt.Conv2DTranspose(64, kernel_size, stride=2, padding='SAME'), tf.nn.relu, # snt.Conv2DTranspose(32, kernel_size, stride=2, padding='SAME'), tf.nn.relu, # snt.Conv2DTranspose(16, kernel_size, stride=2, padding='SAME'), tf.nn.relu, self.decoder = snt.Sequential([snt.Conv2DTranspose(32, kernel_size, stride=2, padding='SAME'), tf.nn.leaky_relu, # [32, 16, 16] snt.Conv2DTranspose(16, kernel_size, stride=2, padding='SAME'), tf.nn.leaky_relu, # [16, 32, 32] snt.Conv2DTranspose(8, kernel_size, stride=2, padding='SAME'), tf.nn.leaky_relu, # [8, 64, 64] snt.Conv2DTranspose(4, kernel_size, stride=2, padding='SAME'), tf.nn.leaky_relu, # [4, 128, 128] snt.Conv2D(1, kernel_size, padding='SAME')]) # [1, 256, 256]
def __init__(self, out_channel, dec_channel, num_res_blocks, residual_hiddens, the_stride, name='decoder'): super(Decoder, self).__init__() self.model = snt.Sequential([ snt.Conv2D(output_channels=dec_channel, kernel_shape=(3, 3), stride=(1, 1), name="dec_0"), tf.nn.relu, ]) for _ in range(num_res_blocks): self.model = snt.Sequential( [self.model, residual_block(dec_channel, residual_hiddens)]) if the_stride == 4: self.model = snt.Sequential([ self.model, snt.Conv2DTranspose(output_channels=dec_channel // 2, output_shape=None, kernel_shape=(4, 4), stride=(2, 2), name="dec_1"), tf.nn.relu, snt.Conv2DTranspose(output_channels=out_channel, output_shape=None, kernel_shape=(4, 4), stride=(2, 2), name="dec_2"), ]) elif the_stride == 2: self.model = snt.Sequential([ self.model, snt.Conv2DTranspose(output_channels=out_channel, output_shape=None, kernel_shape=(4, 4), stride=(2, 2), name="dec_1"), ])
def __init__(self, dim, factor=2, filter_size=5, num_filters=16, act='relu', name='upsampler'): super(Upsampler, self).__init__(name=name) self._act = Activation(act=act, verbose=True) with self._enter_variable_scope(): self._conv = snt.Conv2DTranspose(num_filters, [e * factor for e in dim[1:-1]], filter_size, stride=2, use_bias=False) dim2 = [dim[0], dim[1] * factor, dim[2] * factor, num_filters] self._conv2 = snt.Conv2DTranspose(num_filters, [e * factor for e in dim2[1:-1]], filter_size, stride=2, use_bias=False) dim3 = [ dim[0], dim[1] * factor * factor, dim[2] * factor * factor, num_filters ] self._conv3 = snt.Conv2DTranspose(num_filters, [e * factor for e in dim3[1:-1]], filter_size, stride=2, use_bias=False) self._seq = snt.Sequential([ self._conv, self._act, self._conv2, self._act, self._conv3, self._act, ])
def __init__( self, in_channel=3, main_channel=128, num_res_blocks=2, residual_hiddens=32, embed_dim=64, n_embed=512, decay=0.99, commitment_cost=0.25, ): super(VQModel, self).__init__() self.enc_b = Encoder(main_channel, num_res_blocks, residual_hiddens, the_stride=4) self.enc_t = Encoder(main_channel, num_res_blocks, residual_hiddens, the_stride=2) self.quantize_conv_t = snt.Conv2D(output_channels=embed_dim, kernel_shape=(1, 1), stride=(1, 1), name="enc_1") self.vq_t = snt.nets.VectorQuantizerEMA( embedding_dim=embed_dim, num_embeddings=n_embed, commitment_cost=commitment_cost, decay=decay) self.dec_t = Decoder(embed_dim, main_channel, num_res_blocks, residual_hiddens, the_stride=2) self.quantize_conv_b = snt.Conv2D(output_channels=embed_dim, kernel_shape=(1, 1), stride=(1, 1), name="enc_1") self.vq_b = snt.nets.VectorQuantizerEMA( embedding_dim=embed_dim, num_embeddings=n_embed, commitment_cost=commitment_cost, decay=decay) self.upsample_t = snt.Conv2DTranspose(output_channels=embed_dim, output_shape=None, kernel_shape=(4, 4), stride=(2, 2), name="up_1") self.dec = Decoder(in_channel, main_channel, num_res_blocks, residual_hiddens, the_stride=4)
def func(name, data_format, custom_getter=None): conv = snt.Conv2DTranspose( name=name, output_channels=self.OUT_CHANNELS, kernel_shape=self.KERNEL_SHAPE, output_shape=(self.INPUT_SHAPE.input_height, self.INPUT_SHAPE.input_width), use_bias=use_bias, initializers=create_initializers(use_bias), data_format=data_format, custom_getter=custom_getter) if data_format == "NHWC": batch_norm = snt.BatchNorm(scale=True, update_ops_collection=None) else: # data_format == "NCHW" batch_norm = snt.BatchNorm(scale=True, update_ops_collection=None, fused=True, axis=(0, 2, 3)) return snt.Sequential([conv, functools.partial(batch_norm, is_training=True)])
def _build_residual_mask(self, mask_inputs, stride=2): """Builds a spatial output mask using the input to the last layer.""" output_mask_logits = snt.Conv2DTranspose( name="output_mask_logits", output_channels=1, kernel_shape=4, stride=stride, padding=snt.SAME, use_bias=True, data_format=self._data_format, initializers=_MASK_INITIALIZERS, regularizers=self._regularizers)(mask_inputs) output_mask = tf.sigmoid( output_mask_logits, name="output_mask") return output_mask
def _build(self, x): h = snt.Conv2D(output_channels=self._num_hiddens, kernel_shape=(3, 3), stride=(1, 1), initializers=self._initializers, data_format=self._data_format, name="pre_stack")(x) h = ResidualStack( # pylint: disable=not-callable self._num_hiddens, self._num_residual_layers, self._num_residual_hiddens, initializers=self._initializers, data_format=self._data_format, name="residual_stack")(h) for i in range(self._num_steps): # Does reverse striding -- puts stride-2s after stride-1s. stride = (2 if (self._num_steps - 1 - i) < self._num_steps_h else 1, 2 if (self._num_steps - 1 - i) < self._num_steps_w else 1) h = snt.Conv2DTranspose(output_channels=self._num_hiddens, output_shape=None, kernel_shape=(4, 4), stride=stride, initializers=self._initializers, data_format=self._data_format, name="strided_transpose_{}".format(i))(h) h = tf.nn.relu(h) x_recon = snt.Conv2D(output_channels=self._num_output_channels, kernel_shape=(3, 3), stride=(1, 1), initializers=self._initializers, data_format=self._data_format, name="final")(h) return x_recon
def __init__(self, embedding_dim=64, num_embeddings=64, kernel_size=4, name=None): super(VectorQuantizerVariationalAutoEncoder, self).__init__(name=name) # self.residual_enc = ResidualStack(num_hiddens=64, num_residual_layers=2, num_residual_hiddens=32) # self.residual_dec = ResidualStack(num_hiddens=64, num_residual_layers=2, num_residual_hiddens=32) self.residual_enc = ResidualStack(num_hiddens=32, num_residual_layers=2, num_residual_hiddens=32) self.residual_dec = ResidualStack(num_hiddens=32, num_residual_layers=2, num_residual_hiddens=32) self.embedding_dim = embedding_dim self.num_embeddings = num_embeddings self.encoder = snt.Sequential([ snt.Conv2D(4, kernel_size, stride=2, padding='SAME', name='conv4'), tf.nn.leaky_relu, # [b, 128, 128, 4] snt.Conv2D(8, kernel_size, stride=2, padding='SAME', name='conv8'), tf.nn.leaky_relu, # [b, 64, 64, 8] snt.Conv2D(16, kernel_size, stride=2, padding='SAME', name='conv16'), tf.nn.leaky_relu, # [b, 32, 32, 16] snt.Conv2D(32, kernel_size, stride=2, padding='SAME', name='conv32'), tf.nn.leaky_relu, # [b, 16, 16, 32] # snt.Conv2D(64, kernel_size, stride=2, padding='SAME', name='conv64'), tf.nn.leaky_relu, # [b, 8, 8, 64] # snt.Conv2D(64, kernel_shape=3, stride=1, padding='SAME', name='conv_enc_1'), tf.nn.leaky_relu, # [b, 8, 8, 64] snt.Conv2D(32, kernel_shape=3, stride=1, padding='SAME', name='conv_enc_1'), tf.nn.leaky_relu, # [b, 16, 16, 32] self.residual_enc ]) self.VQVAE = snt.nets.VectorQuantizerEMA(embedding_dim=embedding_dim, num_embeddings=num_embeddings, commitment_cost=0.25, decay=0.994413, name='VQ') # self.decoder = snt.Sequential([snt.Conv2D(64, kernel_shape=3, stride=1, padding='SAME', name='conv_dec_1'), tf.nn.leaky_relu, # [b, 8, 8, 64] # self.residual_dec, self.decoder = snt.Sequential([ snt.Conv2D(32, kernel_shape=3, stride=1, padding='SAME', name='conv_dec_1'), tf.nn.leaky_relu, # [b, 16, 16, 32] self.residual_dec, # snt.Conv2DTranspose(32, kernel_size, stride=2, padding='SAME', name='convt32'), tf.nn.leaky_relu, # [b, 16, 16, 32] snt.Conv2DTranspose(16, kernel_size, stride=2, padding='SAME', name='convt16'), tf.nn.leaky_relu, # [b, 32, 32, 16] snt.Conv2DTranspose(8, kernel_size, stride=2, padding='SAME', name='convt8'), tf.nn.leaky_relu, # [b, 64, 64, 8] snt.Conv2DTranspose(4, kernel_size, stride=2, padding='SAME', name='convt4'), tf.nn.leaky_relu, # [b, 128, 128, 4] snt.Conv2DTranspose(1, kernel_size, stride=2, padding='SAME', name='convt1'), tf.nn.leaky_relu, # [b, 256, 256, 1] ]) # [b, 256, 256, 1]
def _build(self, inputs, verbose=VERBOSITY, keep_dropout_prop=0.9): filter_sizes = [ EncodeProcessDecode_v8_edge_segmentation.n_conv_filters, EncodeProcessDecode_v8_edge_segmentation.n_conv_filters * 2 ] if EncodeProcessDecode_v8_edge_segmentation.convnet_tanh: activation = tf.nn.tanh else: activation = tf.nn.relu """ get image data, get everything >except< last n elements which are non-visual (position and velocity) """ # image_data = inputs[:, :-EncodeProcessDecode_v5_no_skip_no_core.n_neurons_nodes_non_visual] image_data = inputs """ in order to apply 2D convolutions, transform shape (batch_size, features) -> shape (batch_size, 1, 1, features)""" image_data = tf.expand_dims(image_data, axis=1) image_data = tf.expand_dims(image_data, axis=1) # yields shape (?,1,1,latent_dim) if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm: image_data = snt.BatchNorm()(image_data, is_training=self._is_training) ''' layer 0 (1,1,latent_dim) -> (2,2,filter_sizes[1])''' outputs = snt.Conv2DTranspose(output_channels=filter_sizes[1], kernel_shape=2, stride=1, padding="VALID")(image_data) outputs = activation(outputs) if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) l01_shape = outputs.get_shape() ''' layer 0_1 (2,2,latent_dim) -> (4,4,filter_sizes[1])''' outputs = snt.Conv2DTranspose(output_channels=filter_sizes[1], kernel_shape=2, stride=2, padding="SAME")(outputs) outputs = activation(outputs) if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) l02_shape = outputs.get_shape() #if is_training: # outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) #else: # outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 0_2 (4,4,latent_dim) -> (7,10,filter_sizes[1])''' outputs = snt.Conv2DTranspose(output_channels=filter_sizes[1], output_shape=[7, 10], kernel_shape=4, stride=[1, 2], padding="VALID")(outputs) outputs = activation(outputs) if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) l1_shape = outputs.get_shape() ''' layer 2 (7,10,filter_sizes[1]) -> (15,20,filter_sizes[1]) ''' outputs = snt.Conv2DTranspose(output_channels=filter_sizes[1], output_shape=[15, 20], kernel_shape=[3, 2], stride=2, padding="VALID")(outputs) outputs = activation(outputs) if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) l2_shape = outputs.get_shape() #if is_training: # outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) #else: # outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 3 (15,20,filter_sizes[1]) -> (15,20,filter_sizes[1]) ''' outputs = snt.Conv2DTranspose(output_channels=filter_sizes[1], kernel_shape=2, stride=1, padding="SAME")(outputs) outputs = activation(outputs) if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) l3_shape = outputs.get_shape() #if is_training: # outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) #else: # outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 5 (15,20,filter_sizes[1]) -> (30,40,filter_sizes[0]) ''' outputs = snt.Conv2DTranspose(output_channels=filter_sizes[0], kernel_shape=2, stride=1, padding="SAME")(outputs) outputs = activation(outputs) if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) l5_shape = outputs.get_shape() ''' layer 6 (30,40,filter_sizes[0]) -> (30,40,filter_sizes[0]) ''' outputs = snt.Conv2D(output_channels=filter_sizes[0], kernel_shape=3, stride=1, padding="SAME")(outputs) outputs = activation(outputs) if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) ''' layer 6 (30,40,filter_sizes[1]) -> (30,40,filter_sizes[1]) ''' outputs = snt.Conv2DTranspose(output_channels=filter_sizes[0], kernel_shape=2, stride=2, padding="SAME")(outputs) outputs = activation(outputs) if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) l6_shape = outputs.get_shape() #if is_training: # outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) #else: # outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 8 (30,40,filter_sizes[1]) -> (30,40,filter_sizes[0]) ''' outputs = snt.Conv2DTranspose(output_channels=filter_sizes[0], kernel_shape=3, stride=1, padding="SAME")(outputs) outputs = activation(outputs) if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) l8_shape = outputs.get_shape() ''' layer 9 (30,40,filter_sizes[0]) -> (30,40,filter_sizes[0]) ''' outputs = snt.Conv2D(output_channels=filter_sizes[0], kernel_shape=3, stride=1, padding="SAME")(outputs) outputs = activation(outputs) if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) ''' layer 9 (30,40,filter_sizes[0]) -> (60,80,filter_sizes[0]) ''' outputs = snt.Conv2DTranspose(output_channels=filter_sizes[0], kernel_shape=3, stride=2, padding="SAME")(outputs) outputs = activation(outputs) if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) l9_shape = outputs.get_shape() #if is_training: # outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) #else: # outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 11 (60,80,128) -> (60,80,128) ''' outputs = snt.Conv2DTranspose(output_channels=128, kernel_shape=3, stride=1, padding="SAME")(outputs) outputs = activation(outputs) if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) l11_shape = outputs.get_shape() ''' layer 12 (60,80,128) -> (60,80,128) ''' outputs = snt.Conv2D(output_channels=128, kernel_shape=3, stride=1, padding="SAME")(outputs) outputs = activation(outputs) if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) #if is_training: # outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) #else: # outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 12 (60,80,filter_sizes[0]) -> (120,160,filter_sizes[0]) ''' outputs = snt.Conv2DTranspose(output_channels=64, kernel_shape=3, stride=2, padding="SAME")(outputs) outputs = activation(outputs) if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) l12_shape = outputs.get_shape() #if is_training: # outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) #else: # outputs = tf.nn.dropout(outputs, keep_prob=1.0) # outputs = outputs1 + outputs ''' layer 14 (120,160,filter_sizes[0]) -> (120,160,filter_sizes[0]) ''' outputs = snt.Conv2D(output_channels=2, kernel_shape=3, stride=1, padding="SAME")(outputs) if EncodeProcessDecode_v8_edge_segmentation.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) l14_shape = outputs.get_shape() visual_latent_output = snt.BatchFlatten()(outputs) if verbose: print("Latent visual data shape", image_data.get_shape()) print("Layer01 decoder output shape", l01_shape) print("Layer02 decoder output shape", l02_shape) print("Layer1 decoder output shape", l1_shape) print("Layer2 decoder output shape", l2_shape) print("Layer3 decoder output shape", l3_shape) print("Layer4 decoder output shape", l5_shape) print("Layer5 decoder output shape", l6_shape) print("Layer7 decoder output shape", l8_shape) print("Layer8 decoder output shape", l9_shape) print("Layer10 decoder output shape", l11_shape) print("Layer11 decoder output shape", l12_shape) print("Layer13 decoder output shape", l14_shape) print( "decoder shape before adding non-visual data", visual_latent_output.get_shape() ) # print("shape before skip3 {}".format(l1_shape)) # print("shape after skip3 {}".format(after_skip3)) # print("shape before skip2 {}".format(l11_shape)) # print("shape after skip2 {}".format(after_skip2)) # print("shape before skip1 {}".format(l17_shape)) # print("shape after skip1 {}".format(after_skip1)) # we assume we don't have access to pos/vel, due to compatibility issues we fill up this space with zeros n_non_visual_elements = 6 non_visual_decoded_output = tf.zeros( shape=(tf.shape(visual_latent_output)[0], n_non_visual_elements)) """ concatenate 6d space latent data with visual data (dimensions if segmentation image only: (?, 19200)) """ outputs = tf.concat([visual_latent_output, non_visual_decoded_output], axis=1) if verbose: print("shape decoded output (visual):", visual_latent_output.get_shape()) print("shape decoded output (latent):", non_visual_decoded_output.get_shape()) print("final decoder output shape after including non-visual data", outputs.get_shape()) return outputs
def _build(self, inputs): """ Args: inputs (type): node of input. is_training (type): tells to batchnorm if to generate the update ops. Returns: logits """ net = inputs #LINEAR BLOCK WITH RESHAPE IF NEEDED # if linear_first I add extra Linear layers if self._linear_first is not None: self.linear_layers = [ snt.Linear(name="linear_{}".format(i), output_size=self._linear_first_sizes[i], use_bias=True, **self._extra_params) for i in range(len(self._linear_first_sizes)) ] for i, layer in enumerate(self.linear_layers): net = layer(net) net = self._dropout(net, training=self._is_training) net = tf.layers.batch_normalization( net, training=self._is_training, momentum=self._bn_momentum, renorm=self._bn_renormalization, renorm_momentum=self._bn_momentum, renorm_clipping=self._renorm_clipping, name="batch_norm_lin_{}".format(i)) net = self._activation(net) net = snt.BatchReshape(shape=self._linear_first_reshape)(net) #CONV BLOCKS FROM HERE self.layers = [ snt.Conv2DTranspose(name="conv_2d_T_{}".format(i), output_channels=self._hidden_channels[i], kernel_shape=self._kernel_shape, stride=self._decide_stride(i), padding=self._padding, use_bias=True, **self._extra_params) for i in range(self._num_layers - 1) ] li = self._num_layers - 1 if self._output_shape is None: lastlayer = snt.Conv2DTranspose( name="conv_2d_T_{}".format(li), output_channels=self._hidden_channels[li], kernel_shape=self._kernel_shape, stride=self._decide_stride(li), padding=self._padding, use_bias=True, **self._extra_params) else: lastlayer = snt.Conv2DTranspose( name="conv_2d_T_{}".format(li), output_channels=self._hidden_channels[li], kernel_shape=self._kernel_shape, output_shape=self._output_shape, use_bias=True, **self._extra_params) self.layers.append(lastlayer) # connect them to the graph, adding batch norm and non-linearity for i, layer in enumerate(self.layers): net = layer(net) net = self._dropout(net, training=self._is_training) net = tf.layers.batch_normalization( net, training=self._is_training, momentum=self._bn_momentum, renorm=self._bn_renormalization, renorm_momentum=self._bn_momentum, renorm_clipping=self._renorm_clipping, name="batch_norm_{}".format(i)) # no activation at the end if i < li: net = self._activation(net) if self._final_activation: net = self._activation(net) return net
def __init__(self, embedding_dim=64, num_embeddings=512, kernel_size=4, num_layers=5, num_residual_layers=2, name=None): super(VectorQuantizerVariationalAutoEncoder, self).__init__(name=name) self.embedding_dim = embedding_dim self.num_embeddings = num_embeddings self.num_layers = num_layers self.num_residual_layers = num_residual_layers encoder_layers = [] decoder_layers = [] for i in range(self.num_layers): num_filters = 4 * 2**i conv_layer = snt.Conv2D(output_channels=num_filters, kernel_shape=kernel_size, stride=2, padding='SAME', name=f'conv{num_filters}') residual_layer = ResidualStack( num_hiddens=num_filters, num_residual_layers=self.num_residual_layers, num_residual_hiddens=num_filters, residual_name=f'enc_{num_filters}') encoder_layers.append(conv_layer) encoder_layers.append(tf.nn.relu) encoder_layers.append(residual_layer) for i in range(self.num_layers - 2, -1, -1): num_filters = 4 * 2**i conv_layer = snt.Conv2DTranspose(output_channels=num_filters, kernel_shape=kernel_size, stride=2, padding='SAME', name=f'convt{num_filters}') residual_layer = ResidualStack( num_hiddens=num_filters, num_residual_layers=self.num_residual_layers, num_residual_hiddens=num_filters, residual_name=f'enc_{num_filters}') decoder_layers.append(conv_layer) decoder_layers.append(tf.nn.relu) decoder_layers.append(residual_layer) decoder_layers.append( snt.Conv2DTranspose(1, kernel_size, stride=2, padding='SAME', name='convt1')) decoder_layers.append(tf.nn.relu) decoder_layers.append( snt.Conv2D(1, kernel_size, padding='SAME', name='conv1')) self.encoder = snt.Sequential(encoder_layers) self.decoder = snt.Sequential(decoder_layers) self.VQVAE = snt.nets.VectorQuantizerEMA(embedding_dim=embedding_dim, num_embeddings=num_embeddings, commitment_cost=0.25, decay=0.994413, name='VQ')
def _build(self, inputs, verbose=VERBOSITY, keep_dropout_prop=0.9): filter_sizes = [EncodeProcessDecode_v6_no_core.n_conv_filters, EncodeProcessDecode_v6_no_core.n_conv_filters * 2] if EncodeProcessDecode_v6_no_core.convnet_tanh: activation = tf.nn.tanh else: activation = tf.nn.relu """ get image data, get everything >except< last n elements which are non-visual (position and velocity) """ # image_data = inputs[:, :-EncodeProcessDecode_v5_no_skip_no_core.n_neurons_nodes_non_visual] image_data = inputs """ in order to apply 2D convolutions, transform shape (batch_size, features) -> shape (batch_size, 1, 1, features)""" image_data = tf.expand_dims(image_data, axis=1) image_data = tf.expand_dims(image_data, axis=1) # yields shape (?,1,1,latent_dim) ''' layer 0 (1,1,latent_dim) -> (2,2,filter_sizes[1])''' outputs = snt.Conv2DTranspose(output_channels=filter_sizes[1], kernel_shape=2, stride=1, padding="VALID")(image_data) outputs = activation(outputs) #outputs = tf.layers.conv2d_transpose(image_data, filters=filter_sizes[1], kernel_size=2, strides=2, padding='valid', # activation=activation, use_bias=False, # kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) #outputs = tf.contrib.layers.instance_norm(outputs) l01_shape = outputs.get_shape() ''' layer 0_1 (2,2,latent_dim) -> (4,4,filter_sizes[1])''' outputs = snt.Conv2DTranspose(output_channels=filter_sizes[1], kernel_shape=2, stride=2, padding="SAME")(outputs) outputs = activation(outputs) #outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[1], kernel_size=2, strides=2, padding='valid', # activation=activation, use_bias=False, # kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) #outputs = tf.contrib.layers.instance_norm(outputs) l02_shape = outputs.get_shape() #if is_training: # outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) #else: # outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 0_2 (4,4,latent_dim) -> (7,10,filter_sizes[1])''' outputs = snt.Conv2DTranspose(output_channels=filter_sizes[1], output_shape=[7, 10], kernel_shape=4, stride=[1, 2], padding="VALID")(outputs) outputs = activation(outputs) #outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[1], kernel_size=[4, 4], strides=[1, 2], padding='valid', # activation=activation, use_bias=False, # kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) #outputs = tf.contrib.layers.instance_norm(outputs) l1_shape = outputs.get_shape() ''' layer 2 (7,10,filter_sizes[1]) -> (15,20,filter_sizes[1]) ''' outputs = snt.Conv2DTranspose(output_channels=filter_sizes[1], output_shape=[15, 20], kernel_shape=[3, 2], stride=2, padding="VALID")(outputs) outputs = activation(outputs) #outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[1], kernel_size=(3, 2), strides=2, padding='valid', # activation=activation, use_bias=False, # kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) #outputs = tf.contrib.layers.instance_norm(outputs) l2_shape = outputs.get_shape() #if is_training: # outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) #else: # outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 3 (15,20,filter_sizes[1]) -> (15,20,filter_sizes[1]) ''' outputs = snt.Conv2DTranspose(output_channels=filter_sizes[1], kernel_shape=2, stride=1, padding="SAME")(outputs) outputs = activation(outputs) #outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[1], kernel_size=2, strides=1, padding='same', # activation=activation, use_bias=False, # kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) #outputs = tf.contrib.layers.instance_norm(outputs) l3_shape = outputs.get_shape() #if is_training: # outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) #else: # outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 5 (15,20,filter_sizes[1]) -> (30,40,filter_sizes[1]) ''' outputs = snt.Conv2DTranspose(output_channels=filter_sizes[0], kernel_shape=2, stride=1, padding="SAME")(outputs) outputs = activation(outputs) #outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[0], kernel_size=2, strides=1, padding='same', # activation=activation, use_bias=False, # kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) #outputs = tf.contrib.layers.instance_norm(outputs) l5_shape = outputs.get_shape() #if is_training: # outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) #else: # outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 6 (30,40,filter_sizes[1]) -> (30,40,filter_sizes[1]) ''' outputs = snt.Conv2DTranspose(output_channels=filter_sizes[0], kernel_shape=2, stride=2, padding="SAME")(outputs) outputs = activation(outputs) #outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[0], kernel_size=2, strides=2, padding='same', # activation=activation, use_bias=False, # kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) #outputs = tf.contrib.layers.instance_norm(outputs) l6_shape = outputs.get_shape() ''' layer 7 (30,40,filter_sizes[1]) -> (30,40,filter_sizes[1]) ''' outputs = snt.Conv2D(output_channels=filter_sizes[0], kernel_shape=3, stride=1, padding="SAME")(outputs) outputs = activation(outputs) #outputs = tf.layers.conv2d(outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same', activation=activation, # use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) #outputs = tf.contrib.layers.instance_norm(outputs) l7_shape = outputs.get_shape() #if is_training: # outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) #else: # outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 8 (30,40,filter_sizes[1]) -> (30,40,filter_sizes[0]) ''' outputs = snt.Conv2DTranspose(output_channels=filter_sizes[0], kernel_shape=3, stride=1, padding="SAME")(outputs) outputs = activation(outputs) #outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same', # activation=activation, use_bias=False, # kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) #outputs = tf.contrib.layers.instance_norm(outputs) l8_shape = outputs.get_shape() #if is_training: # outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) #else: # outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 9 (30,40,filter_sizes[0]) -> (60,80,filter_sizes[0]) ''' outputs = snt.Conv2DTranspose(output_channels=filter_sizes[0], kernel_shape=3, stride=2, padding="SAME")(outputs) outputs = activation(outputs) #outputs = tf.layers.conv2d_transpose(outputs, filters=filter_sizes[0], kernel_size=3, strides=2, padding='same', # activation=activation, use_bias=False, # kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) #outputs = tf.contrib.layers.instance_norm(outputs) l9_shape = outputs.get_shape() ''' layer 10 (60,80,filter_sizes[0]) -> (60,80,filter_sizes[0]) ''' outputs = snt.Conv2D(output_channels=filter_sizes[0], kernel_shape=3, stride=1, padding="SAME")(outputs) outputs = activation(outputs) #outputs = tf.layers.conv2d(outputs, filters=filter_sizes[0], kernel_size=3, strides=1, padding='same', activation=activation, # use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) #outputs = tf.contrib.layers.instance_norm(outputs) l10_shape = outputs.get_shape() #if is_training: # outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) #else: # outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 11 (60,80,filter_sizes[0]) -> (60,80,filter_sizes[0]) ''' outputs = snt.Conv2DTranspose(output_channels=128, kernel_shape=3, stride=1, padding="SAME")(outputs) outputs = activation(outputs) #outputs = tf.layers.conv2d_transpose(outputs, filters=64, kernel_size=3, strides=1, padding='same', activation=activation, # use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) #outputs = tf.contrib.layers.instance_norm(outputs) l11_shape = outputs.get_shape() #if is_training: # outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) #else: # outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 12 (60,80,filter_sizes[0]) -> (120,160,filter_sizes[0]) ''' outputs = snt.Conv2DTranspose(output_channels=128, kernel_shape=3, stride=2, padding="SAME")(outputs) outputs = activation(outputs) #outputs = tf.layers.conv2d_transpose(outputs, filters=64, kernel_size=3, strides=2, padding='same', activation=activation, # use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) #outputs = tf.contrib.layers.instance_norm(outputs) l12_shape = outputs.get_shape() #if is_training: # outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) #else: # outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 13 (120,160,filter_sizes[0]) -> (120,160,filter_sizes[0]) ''' outputs = snt.Conv2D(output_channels=128, kernel_shape=3, stride=1, padding="SAME")(outputs) outputs = activation(outputs) #outputs = tf.layers.conv2d(outputs, filters=64, kernel_size=3, strides=1, padding='same', activation=activation, use_bias=False, # kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) #outputs = tf.contrib.layers.instance_norm(outputs) l13_shape = outputs.get_shape() # outputs = outputs1 + outputs #if is_training: # outputs = tf.nn.dropout(outputs, keep_prob=keep_dropout_prop) #else: # outputs = tf.nn.dropout(outputs, keep_prob=1.0) ''' layer 14 (120,160,filter_sizes[0]) -> (120,160,filter_sizes[0]) ''' outputs = snt.Conv2DTranspose(output_channels=128, kernel_shape=3, stride=1, padding="SAME")(outputs) outputs = activation(outputs) #outputs = tf.layers.conv2d_transpose(outputs, filters=64, kernel_size=3, strides=1, padding='same', activation=activation, # use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) if EncodeProcessDecode_v6_no_core.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) #outputs = tf.contrib.layers.instance_norm(outputs) l14_shape = outputs.get_shape() ''' layer 15 (120,160,filter_sizes[0]) -> (120,160,2) ''' outputs = snt.Conv2D(output_channels=2, kernel_shape=3, stride=1, padding="SAME")(outputs) #outputs = activation(outputs) #outputs = tf.layers.conv2d(outputs, filters=2, kernel_size=3, strides=1, padding='same', activation=None, use_bias=False, # kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-05)) l15_shape = outputs.get_shape() #visual_latent_output = tf.layers.flatten(outputs) visual_latent_output = snt.BatchFlatten()(outputs) if verbose: print("Latent visual data shape", image_data.get_shape()) print("Layer01 decoder output shape", l01_shape) print("Layer02 decoder output shape", l02_shape) print("Layer1 decoder output shape", l1_shape) print("Layer2 decoder output shape", l2_shape) print("Layer3 decoder output shape", l3_shape) print("Layer4 decoder output shape", l5_shape) print("Layer5 decoder output shape", l6_shape) print("Layer6 decoder output shape", l7_shape) print("Layer7 decoder output shape", l8_shape) print("Layer8 decoder output shape", l9_shape) print("Layer9 decoder output shape", l10_shape) print("Layer10 decoder output shape", l11_shape) print("Layer11 decoder output shape", l12_shape) print("Layer12 decoder output shape", l13_shape) print("Layer13 decoder output shape", l14_shape) print("Layer14 decoder output shape", l15_shape) print("decoder shape before adding non-visual data", visual_latent_output.get_shape()) # print("shape before skip3 {}".format(l1_shape)) # print("shape after skip3 {}".format(after_skip3)) # print("shape before skip2 {}".format(l11_shape)) # print("shape after skip2 {}".format(after_skip2)) # print("shape before skip1 {}".format(l17_shape)) # print("shape after skip1 {}".format(after_skip1)) n_non_visual_elements = 6 """ get x,y,z-position and x,y,z-velocity from n_neurons_nodes_non_visual-dimensional space """ non_visual_latent_output = inputs[:, -EncodeProcessDecode_v6_no_core.n_neurons_nodes_non_visual:] # Transforms the outputs into the appropriate shape. """ map latent position/velocity (nodes) from 32d to original 6d space """ n_neurons = EncodeProcessDecode_v6_no_core.n_neurons_nodes_non_visual n_layers = 2 net = snt.nets.MLP([n_neurons] * n_layers, activate_final=False) non_visual_decoded_output = snt.Sequential([net, snt.LayerNorm(), snt.Linear(n_non_visual_elements)])(non_visual_latent_output) """ concatenate 6d space latent data with visual data (dimensions if segmentation image only: (?, 19200)) """ outputs = tf.concat([visual_latent_output, non_visual_decoded_output], axis=1) if verbose: print("shape decoded output (visual):", visual_latent_output.get_shape()) print("shape decoded output (latent):", non_visual_decoded_output.get_shape()) print("final decoder output shape after including non-visual data", outputs.get_shape()) return outputs
def _build(self, inputs): """Constructs the generator graph. Args: inputs: `tf.Tensor` with the input of the generator. Returns: `tf.Tensor`, the generated samples. """ leaky_relu_activation = lambda x: tf.maximum(0.2 * x, x) init_dict = { 'w': tf.truncated_normal_initializer(seed=547, stddev=0.02), 'b': tf.constant_initializer(0.3) } layer1 = snt.Linear(output_size=1024, initializers=init_dict)(inputs) layer2 = leaky_relu_activation( snt.BatchNorm(offset=1, scale=1, decay_rate=0.9)(layer1, is_training=True, test_local_stats=True)) layer3 = snt.Linear(output_size=128 * 7 * 7, initializers=init_dict)(layer2) layer4 = leaky_relu_activation( snt.BatchNorm(offset=1, scale=1, decay_rate=0.9)(layer3, is_training=True, test_local_stats=True)) layer5 = snt.BatchReshape((7, 7, 128))(layer4) # ("Conv2DTranspose" ,{ "output_channels" : 64 ,"output_shape" : [14,14], "kernel_shape" : [4,4], "stride" : 2, "padding":"SAME" }, 0), layer6 = snt.Conv2DTranspose(output_channels=64, output_shape=[14, 14], kernel_shape=[4, 4], stride=2, padding="SAME", initializers=init_dict)(layer5) layer7 = leaky_relu_activation( snt.BatchNorm(offset=1, scale=1, decay_rate=0.9)(layer6, is_training=True, test_local_stats=True)) # ("Conv2DTranspose" ,{ "output_channels" : 1 ,"output_shape" : [28,28], "kernel_shape" : [4,4], "stride" : 2, "padding":"SAME" }, 0), layer8 = snt.Conv2DTranspose(output_channels=1, output_shape=[28, 28], kernel_shape=[4, 4], stride=2, padding="SAME", initializers=init_dict)(layer7) # Reshape the data to have rank 4. # inputs = leaky_relu_activation(inputs) # net = snt.nets.ConvNet2DTranspose( # output_channels=[32, 1], # output_shapes=[[14, 14], [28, 28]], # strides=[2], # paddings=[snt.SAME], # kernel_shapes=[[5, 5]], # use_batch_norm=False, # initializers=init_dict) # # We use tanh to ensure that the generated samples are in the same range # # as the data. return tf.nn.sigmoid(layer8)
def _build(self, inputs, verbose=VERBOSITY): if EncodeProcessDecode_v7_dropout.convnet_tanh: activation = tf.nn.tanh else: activation = tf.nn.relu image_data = tf.expand_dims(inputs, axis=1) image_data = tf.expand_dims( image_data, axis=1) # yields shape (?,1,1,n_neurons_edges) if verbose: print(image_data.get_shape()) ''' 1,1,128 --> 2, 2, 64 ''' outputs = snt.Conv2DTranspose(output_channels=64, kernel_shape=2, stride=1, padding="VALID")(image_data) outputs = activation(outputs) if EncodeProcessDecode_v7_dropout.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) if verbose: print(outputs.get_shape()) ''' 2,2,64 --> 4, 4, 64 ''' outputs = snt.Conv2DTranspose(output_channels=64, kernel_shape=2, stride=2, padding="SAME")(outputs) outputs = activation(outputs) if EncodeProcessDecode_v7_dropout.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) if verbose: print(outputs.get_shape()) ''' 4,4,64 --> 7, 10, 32 ''' outputs = snt.Conv2DTranspose(output_channels=32, output_shape=[7, 10], kernel_shape=4, stride=[1, 2], padding="VALID")(outputs) outputs = activation(outputs) if EncodeProcessDecode_v7_dropout.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) if verbose: print(outputs.get_shape()) ''' 7,10,32 --> 15, 20, 16 ''' outputs = snt.Conv2DTranspose(output_channels=16, output_shape=[15, 20], kernel_shape=[3, 2], stride=2, padding="VALID")(outputs) outputs = activation(outputs) if EncodeProcessDecode_v7_dropout.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) if verbose: print(outputs.get_shape()) ''' 15, 20, 16 --> 30, 40, 8 ''' outputs = snt.Conv2DTranspose(output_channels=8, kernel_shape=2, stride=2, padding="SAME")(outputs) outputs = activation(outputs) if EncodeProcessDecode_v7_dropout.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) if verbose: print(outputs.get_shape()) ''' 30, 40, 8 --> 60, 80, 2 ''' outputs = snt.Conv2DTranspose(output_channels=2, kernel_shape=2, stride=2, padding="SAME")(outputs) outputs = activation(outputs) if EncodeProcessDecode_v7_dropout.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) if verbose: print(outputs.get_shape()) ''' 60, 80, 2 --> 120, 160, 1 ''' outputs = snt.Conv2DTranspose(output_channels=2, kernel_shape=2, stride=2, padding="SAME")(outputs) # no activation if EncodeProcessDecode_v7_dropout.conv_layer_instance_norm: outputs = snt.BatchNorm()(outputs, is_training=self._is_training) if verbose: print(outputs.get_shape()) outputs = snt.BatchFlatten()(outputs) if verbose: print(outputs.get_shape()) return outputs