def _build_graph(self, from_input): latent_dim = self.latent_space.flat_dim small = 1e-5 with self._variable_scope: with tf.variable_scope("dist_params"): if self._std_share_network: # mean and std networks share an MLP b = np.concatenate([ np.zeros(latent_dim), np.full(latent_dim, self._init_std_param) ], axis=0) b = tf.constant_initializer(b) # b = tf.truncated_normal_initializer( # mean=b, stddev=small) mean_std_network = mlp( with_input=from_input, output_dim=latent_dim * 2, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, output_nonlinearity=self._output_nonlinearity, output_b_init=b, name="mean_std_network") with tf.variable_scope("mean_network"): mean_network = mean_std_network[..., :latent_dim] with tf.variable_scope("std_network"): std_network = mean_std_network[..., latent_dim:] else: # separate MLPs for mean and std networks # mean network mean_network = mlp( with_input=from_input, output_dim=latent_dim, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, output_nonlinearity=self._output_nonlinearity, name="mean_network") # std network if self._adaptive_std: b = tf.constant_initializer(self._init_std_param) # b = tf.truncated_normal_initializer( # mean=self._init_std_param, stddev=small) std_network = mlp( with_input=from_input, output_dim=latent_dim, hidden_sizes=self._std_hidden_sizes, hidden_nonlinearity=self._std_hidden_nonlinearity, output_nonlinearity=self._output_nonlinearity, output_b_init=b, name="std_network") else: p = tf.constant_initializer(self._init_std_param) # p = tf.truncated_normal_initializer( # mean=self._init_std_param, stddev=small) std_network = parameter(with_input=from_input, length=latent_dim, initializer=p, trainable=self._learn_std, name="std_network") mean_var = mean_network std_param_var = std_network with tf.variable_scope("std_limits"): if self._min_std_param: std_param_var = tf.maximum(std_param_var, self._min_std_param) if self._max_std_param: std_param_var = tf.minimum(std_param_var, self._max_std_param) with tf.variable_scope("std_parameterization"): # build std_var with std parameterization if self._std_parameterization == "exp": std_var = tf.exp(std_param_var) elif std_parameterization == "softplus": std_var = tf.log(1. + tf.exp(std_param_var)) else: raise NotImplementedError dist = tf.contrib.distributions.MultivariateNormalDiag( mean_var, std_var) latent_var = dist.sample(seed=deterministic.get_seed()) return latent_var, mean_var, std_param_var, dist
def _build_graph(self, from_input): latent_dim = self.latent_space.flat_dim small = 1e-5 with self._variable_scope: with tf.variable_scope("word2vec"): lstm_cell = tf.nn.rnn_cell.BasicLSTMCell( num_units=self._sentence_embedding_dict_dim) self.sentence_embedding_dict = tf.Variable(np.zeros( (self._sentence_code_dim, self._sentence_embedding_dict_dim)), dtype=tf.float32) # (bs, max_sentence_len, sentence_embedding_dict_dim) self.sentence_embedding = tf.nn.embedding_lookup( params=self.sentence_embedding_dict, ids=from_input) data_mask = tf.cast(from_input, tf.bool) data_len = tf.reduce_sum(tf.cast(data_mask, tf.int32), axis=1) initial_state = lstm_cell.zero_state( tf.shape(self.sentence_embedding)[0], tf.float32) input_vec = tf.nn.dynamic_rnn( lstm_cell, self.sentence_embedding, sequence_length=data_len, initial_state=initial_state)[0][:, -1] with tf.variable_scope("dist_params"): if self._std_share_network: # mean and std networks share an MLP b = np.concatenate([ np.zeros(latent_dim), np.full(latent_dim, self._init_std_param) ], axis=0) b = tf.constant_initializer(b) mean_std_network = mlp( with_input=input_vec, output_dim=latent_dim * 2, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, output_nonlinearity=self._output_nonlinearity, # hidden_w_init=tf.orthogonal_initializer(1.0), # output_w_init=tf.orthogonal_initializer(1.0), output_b_init=b, name="mean_std_network") with tf.variable_scope("mean_network"): mean_network = mean_std_network[..., :latent_dim] with tf.variable_scope("std_network"): std_network = mean_std_network[..., latent_dim:] else: # separate MLPs for mean and std networks # mean network mean_network = mlp( with_input=input_vec, output_dim=latent_dim, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, output_nonlinearity=self._output_nonlinearity, name="mean_network") # std network if self._adaptive_std: b = tf.constant_initializer(self._init_std_param) std_network = mlp( with_input=input_vec, output_dim=latent_dim, hidden_sizes=self._std_hidden_sizes, hidden_nonlinearity=self._std_hidden_nonlinearity, output_nonlinearity=self._output_nonlinearity, output_b_init=b, name="std_network") else: p = tf.constant_initializer(self._init_std_param) std_network = parameter(with_input=input_vec, length=latent_dim, initializer=p, trainable=self._learn_std, name="std_network") if self._mean_scale != 1.: mean_var = tf.identity(mean_network * self._mean_scale, "mean_scale") else: mean_var = mean_network if self._mean_output_nonlinearity is not None: mean_var = self._mean_output_nonlinearity(mean_var) std_param_var = std_network with tf.variable_scope("std_limits"): if self._min_std_param: std_param_var = tf.maximum(std_param_var, self._min_std_param) if self._max_std_param: std_param_var = tf.minimum(std_param_var, self._max_std_param) with tf.variable_scope("std_parameterization"): # build std_var with std parameterization if self._std_parameterization == "exp": std_var = tf.exp(std_param_var) elif self._std_parameterization == "softplus": std_var = tf.log(1. + tf.exp(std_param_var)) else: raise NotImplementedError if self._normalize: mean_var = tf.nn.l2_normalize(mean_var) #std_var = tf.nn.l2_normalize(std_var) dist = tf.contrib.distributions.MultivariateNormalDiag( mean_var, std_var) latent_var = dist.sample(seed=deterministic.get_seed()) return latent_var, mean_var, std_param_var, dist