def nature_cnn_v2(unscaled_images, **conv_kwargs): scaled_images = tf.cast(unscaled_images, tf.float32) / 255. activ = tf.nn.relu h = activ( conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2), **conv_kwargs)) h2 = activ( conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs)) out = activ( conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) h3 = conv_to_fc(out) return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2))), tf.layers.flatten(out)
def __init__(self, env, observations, latent, f_features, estimate_q=False, vf_latent=None, sess=None, **tensors): self.Xs = observations self.X = observations[1] self.f_features = f_features self.state = tf.constant([]) self.initial_state = None self.__dict__.update(tensors) vf_latent = vf_latent if vf_latent is not None else latent vf_latent = tf.layers.flatten(vf_latent) latent = tf.layers.flatten(latent) # Based on the action space, will select what probability distribution type self.pdtype = make_pdtype(env.action_space) self.pd, self.pi = self.pdtype.pdfromlatent(latent, init_scale=0.01) # Take an action self.action = self.pd.sample() # Calculate the neg log of our probability self.neglogp = self.pd.neglogp(self.action) self.sess = sess or tf.get_default_session() if estimate_q: assert isinstance(env.action_space, gym.spaces.Discrete) self.q = fc(vf_latent, 'q', env.action_space.n) self.vf = self.q else: self.vf = fc(vf_latent, 'vf', 1) self.vf = self.vf[:, 0]
def fc_func(x): return fc(x, 'mlp_fc{}'.format(i), nh=num_hidden, init_scale=np.sqrt(2))
def h4_func(x): return activ(fc(x, 'fc1', nh=512, init_scale=np.sqrt(2)))