Exemple #1
0
    def _build_graph(self, from_input):
        latent_dim = self.latent_space.flat_dim
        small = 1e-5

        with self._variable_scope:
            with tf.variable_scope("dist_params"):
                if self._std_share_network:
                    # mean and std networks share an MLP
                    b = np.concatenate([
                        np.zeros(latent_dim),
                        np.full(latent_dim, self._init_std_param)
                    ],
                                       axis=0)
                    b = tf.constant_initializer(b)
                    # b = tf.truncated_normal_initializer(
                    #     mean=b, stddev=small)
                    mean_std_network = mlp(
                        with_input=from_input,
                        output_dim=latent_dim * 2,
                        hidden_sizes=self._hidden_sizes,
                        hidden_nonlinearity=self._hidden_nonlinearity,
                        output_nonlinearity=self._output_nonlinearity,
                        output_b_init=b,
                        name="mean_std_network")
                    with tf.variable_scope("mean_network"):
                        mean_network = mean_std_network[..., :latent_dim]
                    with tf.variable_scope("std_network"):
                        std_network = mean_std_network[..., latent_dim:]
                else:
                    # separate MLPs for mean and std networks
                    # mean network
                    mean_network = mlp(
                        with_input=from_input,
                        output_dim=latent_dim,
                        hidden_sizes=self._hidden_sizes,
                        hidden_nonlinearity=self._hidden_nonlinearity,
                        output_nonlinearity=self._output_nonlinearity,
                        name="mean_network")

                    # std network
                    if self._adaptive_std:
                        b = tf.constant_initializer(self._init_std_param)
                        # b = tf.truncated_normal_initializer(
                        #     mean=self._init_std_param, stddev=small)
                        std_network = mlp(
                            with_input=from_input,
                            output_dim=latent_dim,
                            hidden_sizes=self._std_hidden_sizes,
                            hidden_nonlinearity=self._std_hidden_nonlinearity,
                            output_nonlinearity=self._output_nonlinearity,
                            output_b_init=b,
                            name="std_network")
                    else:
                        p = tf.constant_initializer(self._init_std_param)
                        # p = tf.truncated_normal_initializer(
                        #     mean=self._init_std_param, stddev=small)
                        std_network = parameter(with_input=from_input,
                                                length=latent_dim,
                                                initializer=p,
                                                trainable=self._learn_std,
                                                name="std_network")

                mean_var = mean_network
                std_param_var = std_network

                with tf.variable_scope("std_limits"):
                    if self._min_std_param:
                        std_param_var = tf.maximum(std_param_var,
                                                   self._min_std_param)
                    if self._max_std_param:
                        std_param_var = tf.minimum(std_param_var,
                                                   self._max_std_param)

            with tf.variable_scope("std_parameterization"):
                # build std_var with std parameterization
                if self._std_parameterization == "exp":
                    std_var = tf.exp(std_param_var)
                elif std_parameterization == "softplus":
                    std_var = tf.log(1. + tf.exp(std_param_var))
                else:
                    raise NotImplementedError

            dist = tf.contrib.distributions.MultivariateNormalDiag(
                mean_var, std_var)

            latent_var = dist.sample(seed=deterministic.get_seed())

            return latent_var, mean_var, std_param_var, dist
    def _build_graph(self, from_input):
        latent_dim = self.latent_space.flat_dim
        small = 1e-5

        with self._variable_scope:
            with tf.variable_scope("word2vec"):
                lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(
                    num_units=self._sentence_embedding_dict_dim)
                self.sentence_embedding_dict = tf.Variable(np.zeros(
                    (self._sentence_code_dim,
                     self._sentence_embedding_dict_dim)),
                                                           dtype=tf.float32)

                # (bs, max_sentence_len, sentence_embedding_dict_dim)
                self.sentence_embedding = tf.nn.embedding_lookup(
                    params=self.sentence_embedding_dict, ids=from_input)

                data_mask = tf.cast(from_input, tf.bool)
                data_len = tf.reduce_sum(tf.cast(data_mask, tf.int32), axis=1)
                initial_state = lstm_cell.zero_state(
                    tf.shape(self.sentence_embedding)[0], tf.float32)
                input_vec = tf.nn.dynamic_rnn(
                    lstm_cell,
                    self.sentence_embedding,
                    sequence_length=data_len,
                    initial_state=initial_state)[0][:, -1]

            with tf.variable_scope("dist_params"):
                if self._std_share_network:
                    # mean and std networks share an MLP
                    b = np.concatenate([
                        np.zeros(latent_dim),
                        np.full(latent_dim, self._init_std_param)
                    ],
                                       axis=0)
                    b = tf.constant_initializer(b)
                    mean_std_network = mlp(
                        with_input=input_vec,
                        output_dim=latent_dim * 2,
                        hidden_sizes=self._hidden_sizes,
                        hidden_nonlinearity=self._hidden_nonlinearity,
                        output_nonlinearity=self._output_nonlinearity,
                        # hidden_w_init=tf.orthogonal_initializer(1.0),
                        # output_w_init=tf.orthogonal_initializer(1.0),
                        output_b_init=b,
                        name="mean_std_network")
                    with tf.variable_scope("mean_network"):
                        mean_network = mean_std_network[..., :latent_dim]
                    with tf.variable_scope("std_network"):
                        std_network = mean_std_network[..., latent_dim:]
                else:
                    # separate MLPs for mean and std networks
                    # mean network
                    mean_network = mlp(
                        with_input=input_vec,
                        output_dim=latent_dim,
                        hidden_sizes=self._hidden_sizes,
                        hidden_nonlinearity=self._hidden_nonlinearity,
                        output_nonlinearity=self._output_nonlinearity,
                        name="mean_network")

                    # std network
                    if self._adaptive_std:
                        b = tf.constant_initializer(self._init_std_param)
                        std_network = mlp(
                            with_input=input_vec,
                            output_dim=latent_dim,
                            hidden_sizes=self._std_hidden_sizes,
                            hidden_nonlinearity=self._std_hidden_nonlinearity,
                            output_nonlinearity=self._output_nonlinearity,
                            output_b_init=b,
                            name="std_network")
                    else:
                        p = tf.constant_initializer(self._init_std_param)
                        std_network = parameter(with_input=input_vec,
                                                length=latent_dim,
                                                initializer=p,
                                                trainable=self._learn_std,
                                                name="std_network")

                if self._mean_scale != 1.:
                    mean_var = tf.identity(mean_network * self._mean_scale,
                                           "mean_scale")
                else:
                    mean_var = mean_network

                if self._mean_output_nonlinearity is not None:
                    mean_var = self._mean_output_nonlinearity(mean_var)

                std_param_var = std_network

                with tf.variable_scope("std_limits"):
                    if self._min_std_param:
                        std_param_var = tf.maximum(std_param_var,
                                                   self._min_std_param)
                    if self._max_std_param:
                        std_param_var = tf.minimum(std_param_var,
                                                   self._max_std_param)

            with tf.variable_scope("std_parameterization"):
                # build std_var with std parameterization
                if self._std_parameterization == "exp":
                    std_var = tf.exp(std_param_var)
                elif self._std_parameterization == "softplus":
                    std_var = tf.log(1. + tf.exp(std_param_var))
                else:
                    raise NotImplementedError

            if self._normalize:
                mean_var = tf.nn.l2_normalize(mean_var)
                #std_var = tf.nn.l2_normalize(std_var)

            dist = tf.contrib.distributions.MultivariateNormalDiag(
                mean_var, std_var)

            latent_var = dist.sample(seed=deterministic.get_seed())

            return latent_var, mean_var, std_param_var, dist