Python DiagGaussianPd 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: baselines.common.distributions

클래스/타입: DiagGaussianPd

hotexamples.com에서의 예제들: 4

Python DiagGaussianPd - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 baselines.common.distributions.DiagGaussianPd에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

DiagGaussianPd(4)

p(3)

kl(2)

neglogp(1)

sample(1)

자주 사용되는 메소드들

DiagGaussianPd (4)

p (3)

kl (2)

neglogp (1)

sample (1)

예제 #1

파일 보기

파일: preparedata_KL2Clip_opt_tf.py 프로젝트: wangyuhuix/TRGPPO

 def get_fn_sample():
     mu0 = tf.placeholder(shape=[dim], dtype=tf.float32)
     a = tf.placeholder(shape=[batch_size, dim], dtype=tf.float32)
     logsigma0 = tf.placeholder(shape=[dim], dtype=tf.float32)
     sample_size = tf.placeholder(shape=(), dtype=tf.int32)
     dist = DiagGaussianPd(tf.concat((mu0, logsigma0), axis=0))
     samples = dist.sample(sample_size)
     fn_sample = U.function([mu0, logsigma0, sample_size], samples)
     fn_p = U.function([mu0, logsigma0, a], dist.p(a))
     return fn_sample, fn_p

예제 #2

파일 보기

def get_func_cons(batch_size):
    mu_logsigma = tf.placeholder(shape=[batch_size, 2], dtype=tf.float32)
    delta = tf.placeholder(shape=(1, ), dtype=tf.float32)
    x0 = tf.zeros(shape=[batch_size, 2])
    distNormal = DiagGaussianPd(x0)
    dist = DiagGaussianPd(mu_logsigma)
    kl = dist.kl(distNormal)
    cons = kl - delta
    fn_get_cons = U.function([mu_logsigma, delta], cons)
    return fn_get_cons

예제 #3

파일 보기

파일: preparedata_KL2Clip_opt_tf.py 프로젝트: wangyuhuix/TRGPPO

def get_com_batch(dim, sess, batch_size, share_size, sharelogsigma):
    # 这里的batch_size 不指定 用None会有bug
    with sess.as_default(), sess.graph.as_default():
        pls = DotMap()
        x0 = tf.placeholder(dtype=tf.float32,
                            shape=(batch_size, dim * 2),
                            name='x0')
        if not sharelogsigma:
            x_initial = tf.placeholder(dtype=tf.float32,
                                       shape=x0.shape,
                                       name='x0')
            x = tf.Variable(x_initial, name='x')

            pls.x_initial = x_initial
        else:
            mu_initial = tf.placeholder(dtype=tf.float32,
                                        shape=(batch_size, dim),
                                        name='x0')
            independent_size = batch_size // share_size
            logsigma_initial = tf.placeholder(dtype=tf.float32,
                                              shape=(independent_size, dim),
                                              name='x0')
            mu = tf.Variable(mu_initial, name='mu')
            logsigma = tf.Variable(logsigma_initial, name='logsigma')
            logsigma_all = tf.tile(logsigma, [1, share_size])
            logsigma_all = tf.reshape(logsigma_all, [-1, dim])
            x = tf.concat((mu, logsigma_all), axis=-1)

            pls.mu_initial = mu_initial
            pls.logsigma_initial = logsigma_initial

        a = tf.placeholder(dtype=tf.float32, shape=(batch_size, dim), name='a')
        delta = tf.placeholder(dtype=tf.float32, shape=(), name='delta')

        # --- objective function
        dist = DiagGaussianPd(x)
        f = dist.neglogp(a)
        p = dist.p(a)
        dist0 = DiagGaussianPd(x0)
        # con = dist.kl(dist0) - delta
        con = dist0.kl(dist) - delta  # 拟合 lambda

        p0 = dist0.p(a)
        ratio = p / p0

        pls_new = DotMap(x0=x0, a=a, delta=delta)
        pls.update(pls_new)
        ffs = DotMap(p=p, p0=p0)
        return f, con, ratio, x, ffs, pls

예제 #4

파일 보기

 def get_fn_ratio():
     batch_size = None
     action = tf.placeholder(shape=[batch_size, 1], dtype=tf.float32)
     mu_logstd_min = tf.placeholder(shape=[batch_size, 2], dtype=tf.float32)
     mu_logstd_max = tf.placeholder(shape=[batch_size, 2], dtype=tf.float32)
     x0 = tf.zeros_like( mu_logstd_min )
     distNormal = DiagGaussianPd(x0)
     dist_min, dist_max = DiagGaussianPd(mu_logstd_min), DiagGaussianPd(mu_logstd_max)
     ratio_min = dist_min.p(action) / distNormal.p(action)
     ratio_max = dist_max.p(action) / distNormal.p(action)
     fn_ratio = U.function([action, mu_logstd_min, mu_logstd_max], (ratio_min, ratio_max))
     return fn_ratio