Ejemplo n.º 1
0
 def make_latent_distributions(self, s, ac, s_next):
     """ 输出 proposal 网络和 prior 网络的输出, 输出均是 tfp 分布
     """
     # proposal 网络
     proposal_params = self.proposal_network([s, ac, s_next])
     proposal = normal_parse_params(proposal_params, 1e-3)
     # prior 网络
     prior_params = self.prior_network([s, ac])
     prior = normal_parse_params(prior_params, 1e-3)
     return proposal, prior
Ejemplo n.º 2
0
    def make_latent_distributions(self, s, ac, s_next):
        """ 输出 proposal 网络和, 输出是 tfp 分布;  prior 网络是 fix 的
        """
        # proposal 网络
        proposal_params = self.proposal_network([s, ac, s_next])
        proposal = normal_parse_params(proposal_params, 1e-3)
        # prior 网络是 fix 的
        prior_params = self.prior_network([s, ac])
        prior = standard_normal(prior_params)

        return proposal, prior
Ejemplo n.º 3
0
    def batch_var_mean(self, s, ac, s_next, k):
        proposal, prior = self.make_latent_distributions(s, ac, s_next)  # 返回分布
        var_estimates = []
        for ix in range(k):
            latent = proposal.sample()  # (None, None, 128) 重参数化并采样
            rec_params = self.generative_network(latent)  # (None, None, 1024)
            rec_distr = normal_parse_params(rec_params, min_sigma=1e-2)
            var_estimates.append(
                rec_distr.variance())  # 每个元素是 (None, None, 512)

        var_estimates_tensor = tf.stack(var_estimates,
                                        axis=-1)  # (None, None, 512, k)
        var_mean = tf.reduce_mean(var_estimates_tensor,
                                  axis=[2,
                                        3])  # variance, shape=(None,None,512)
        return var_mean  # 可直接用作内在激励
Ejemplo n.º 4
0
    def batch_pred_var(self, s, ac, s_next, k):
        """改成采样, 而不是直接取得均值"""
        proposal, prior = self.make_latent_distributions(s, ac, s_next)  # 返回分布
        mean_estimates = []
        for ix in range(k):
            latent = proposal.sample()  # (None, None, 128) 重参数化并采样
            rec_params = self.generative_network(latent)  # (None, None, 1024)
            rec_distr = normal_parse_params(rec_params, min_sigma=1e-2)

            #mean_estimates.append(rec_distr.sample())           # 1. 采样. (None, None, 512)
            mean_estimates.append(
                rec_distr.mean())  # 2. 直接把均值作为采样结果. (None, None, 512)

        mean_estimates_tensor = tf.stack(mean_estimates,
                                         axis=-1)  # (None, None, 512, k)
        var = tf.nn.moments(mean_estimates_tensor,
                            axes=-1)[-1]  # variance, shape=(None,None,512)
        var_mean = tf.reduce_mean(var, axis=-1)  # (None, None)
        return var_mean  # 可直接用作内在激励