コード例 #1
0
def nature_cnn_v2(unscaled_images, **conv_kwargs):
    scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
    activ = tf.nn.relu
    h = activ(
        conv(scaled_images,
             'c1',
             nf=32,
             rf=8,
             stride=4,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h2 = activ(
        conv(h,
             'c2',
             nf=64,
             rf=4,
             stride=2,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    out = activ(
        conv(h2,
             'c3',
             nf=64,
             rf=3,
             stride=1,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = conv_to_fc(out)
    return activ(fc(h3, 'fc1', nh=512,
                    init_scale=np.sqrt(2))), tf.layers.flatten(out)
コード例 #2
0
ファイル: policies.py プロジェクト: guikarist/rl-asc
    def __init__(self,
                 env,
                 observations,
                 latent,
                 f_features,
                 estimate_q=False,
                 vf_latent=None,
                 sess=None,
                 **tensors):
        self.Xs = observations
        self.X = observations[1]
        self.f_features = f_features

        self.state = tf.constant([])
        self.initial_state = None
        self.__dict__.update(tensors)

        vf_latent = vf_latent if vf_latent is not None else latent

        vf_latent = tf.layers.flatten(vf_latent)
        latent = tf.layers.flatten(latent)

        # Based on the action space, will select what probability distribution type
        self.pdtype = make_pdtype(env.action_space)

        self.pd, self.pi = self.pdtype.pdfromlatent(latent, init_scale=0.01)

        # Take an action
        self.action = self.pd.sample()

        # Calculate the neg log of our probability
        self.neglogp = self.pd.neglogp(self.action)
        self.sess = sess or tf.get_default_session()

        if estimate_q:
            assert isinstance(env.action_space, gym.spaces.Discrete)
            self.q = fc(vf_latent, 'q', env.action_space.n)
            self.vf = self.q
        else:
            self.vf = fc(vf_latent, 'vf', 1)
            self.vf = self.vf[:, 0]
コード例 #3
0
 def fc_func(x):
     return fc(x,
               'mlp_fc{}'.format(i),
               nh=num_hidden,
               init_scale=np.sqrt(2))
コード例 #4
0
 def h4_func(x):
     return activ(fc(x, 'fc1', nh=512, init_scale=np.sqrt(2)))