def dense_aac_network(x, ac_space, name='dense_aac', linear_layer_ref=noisy_linear, reuse=False): """ Stage3 network: from LSTM flattened output to advantage actor-critic. Returns: logits tensor value function tensor action sampling function. """ with tf.variable_scope(name, reuse=reuse): # Center-logits: logits = norm_layer( linear_layer_ref( x=x, size=ac_space, name='action', initializer=normalized_columns_initializer(0.01), reuse=reuse ), center=True, scale=False, ) # logits = linear_layer_ref( # x=x, # size=ac_space, # name='action', # initializer=normalized_columns_initializer(0.01), # reuse=reuse # ) vf = tf.reshape( linear_layer_ref( x=x, size=1, name="value", initializer=normalized_columns_initializer(1.0), reuse=reuse ), [-1] ) sample = categorical_sample(logits, ac_space)[0, :] return logits, vf, sample
def conv2d_autoencoder(inputs, layer_config, resize_method=tf.image.ResizeMethod.BILINEAR, pad='SAME', linear_layer_ref=linear, name='base_conv2d_autoencoder', reuse=False, **kwargs): """ Basic convolutional autoencoder. Hidden state is passed through dense linear layer. Args: inputs: input tensor layer_config: layers configuration list: [layer_1_config, layer_2_config,...], where: layer_i_config = [num_filters(int), filter_size(list), stride(list)]; this list represent decoder part of autoencoder bottleneck, decoder part is inferred symmetrically resize_method: up-sampling method, one of supported tf.image.ResizeMethod's pad: str, padding scheme: 'SAME' or 'VALID' linear_layer_ref: linear layer class to use name: str, mame scope reuse: bool Returns: list of tensors holding encoded features, layer_wise from outer to inner tensor holding batch-wise flattened hidden state vector list of tensors holding decoded features, layer-wise from inner to outer tensor holding reconstructed output None value """ with tf.variable_scope(name, reuse=reuse): # Encode: encoder_layers, shapes = conv2d_encoder(x=inputs, layer_config=layer_config, pad=pad, reuse=reuse) # Flatten hidden state, pass through dense : z = batch_flatten(encoder_layers[-1]) h, w, c = encoder_layers[-1].get_shape().as_list()[1:] z = linear_layer_ref(x=z, size=h * w * c, name='hidden_dense', initializer=normalized_columns_initializer(1.0), reuse=reuse) # Reshape back and feed to decoder: decoder_layers = conv2d_decoder(z=tf.reshape(z, [-1, h, w, c]), layer_config=layer_config, layer_shapes=shapes, pad=pad, resize_method=resize_method, reuse=reuse) y_hat = decoder_layers[-1] return encoder_layers, z, decoder_layers, y_hat, None
def dense_rp_network(x, linear_layer_ref=noisy_linear): """ Stage3 network: From shared convolutions to reward-prediction task output tensor. """ # print('x_shape:', x.get_shape()) #x = tf.reshape(x, [1, -1]) # flatten to pretend we got batch of size 1 # Fully connected x128 followed by 3-way classifier [with softmax], as in paper: x = tf.nn.elu( linear_layer_ref(x=x, size=128, name='rp_dense', initializer=normalized_columns_initializer(0.01))) logits = linear_layer_ref(x=x, size=3, name='rp_classifier', initializer=normalized_columns_initializer(0.01)) # Note: softmax is actually not here but inside loss operation (see losses.py) return logits
def beta_var_conv2d_autoencoder( inputs, layer_config, resize_method=tf.image.ResizeMethod.BILINEAR, pad='SAME', linear_layer_ref=linear, name='vae_conv2d', max_batch_size=256, reuse=False ): """ Variational autoencoder. Papers: https://arxiv.org/pdf/1312.6114.pdf https://arxiv.org/pdf/1606.05908.pdf http://www.matthey.me/pdf/betavae_iclr_2017.pdf Args: inputs: input tensor layer_config: layers configuration list: [layer_1_config, layer_2_config,...], where: layer_i_config = [num_filters(int), filter_size(list), stride(list)]; this list represent decoder part of autoencoder bottleneck, decoder part is inferred symmetrically resize_method: up-sampling method, one of supported tf.image.ResizeMethod's pad: str, padding scheme: 'SAME' or 'VALID' linear_layer_ref: linear layer class - not used name: str, mame scope max_batch_size: int, dynamic batch size should be no greater than this value reuse: bool Returns: list of tensors holding encoded features, layer_wise from outer to inner tensor holding batch-wise flattened hidden state vector list of tensors holding decoded features, layer-wise from inner to outer tensor holding reconstructed output tensor holding estimated KL divergence """ with tf.variable_scope(name, reuse=reuse): # Encode: encoder_layers, shapes = conv2d_encoder( x=inputs, layer_config=layer_config, pad=pad, reuse=reuse ) # Flatten hidden state, pass through dense: z_flat = batch_flatten(encoder_layers[-1]) h, w, c = encoder_layers[-1].get_shape().as_list()[1:] z = tf.nn.elu( linear( x=z_flat, size=h * w * c, name='enc_dense', initializer=normalized_columns_initializer(1.0), reuse=reuse ) ) # TODO: revert back to dubled Z-size # half_size_z = h * w * c # size_z = 2 * half_size_z size_z = int(h * w * c/2) z = tf.nn.elu( linear( #x=z_flat, x=z, #size=size_z, size=size_z * 2, name='hidden_dense', initializer=normalized_columns_initializer(1.0), reuse=reuse ) ) # Get sample parameters: #mu, log_sigma = tf.split(z, [half_size_z, half_size_z], axis=-1) mu, log_sigma = tf.split(z, [size_z, size_z], axis=-1) # Oversized noise generator: #eps = tf.random_normal(shape=[max_batch_size, half_size_z], mean=0., stddev=1.) eps = tf.random_normal(shape=[max_batch_size, size_z], mean=0., stddev=1.) eps = eps[:tf.shape(z)[0],:] # Get sample z ~ Q(z|X): z_sampled = mu + tf.exp(log_sigma / 2) * eps # D_KL(Q(z|X) || P(z|X)): # TODO: where is sum?! d_kl = 0.5 * (tf.exp(log_sigma) + tf.square(mu) - 1. - log_sigma) # Reshape back and feed to decoder: z_sampled_dec = tf.nn.elu( linear( x=z_sampled, size=h * w * c, name='dec_dense', initializer=normalized_columns_initializer(1.0), reuse=reuse ) ) decoder_layers = conv2d_decoder( z=tf.reshape(z_sampled_dec, [-1, h, w, c]), layer_config=layer_config, layer_shapes=shapes, pad=pad, resize_method=resize_method, reuse=reuse ) y_hat = decoder_layers[-1] return encoder_layers, z_sampled, decoder_layers, y_hat, d_kl