Exemple #1
0
    def add_head(self, h):
        with tf.variable_scope("model", reuse=self.reuse):
            self.vf = fc(h, 'v',
                         self.n_v)[:, 0]  # TODO: This is a hack, to be fixed
            self.pd, _ = self.pdtype.pdfromlatent(h, init_scale=0.01)
            # See implementation details in page 9 of 1904.08473
            self.wf = tf.log(1 +
                             tf.exp(fc(h, 'w', 1)))  # shape (batch_size, 1)

        self.a0 = self.pd.sample()
        self.neglogp0 = self.pd.neglogp(self.a0)
        self.pub_names = {
            'a': self.a0.name,
            'v': self.vf.name,
            'p': self.neglogp0.name
        }
Exemple #2
0
def mlp_net(x, nh=[], activ=tf.nn.tanh):
    """
    MLP net
    """
    layers = [tf.reshape(x, [x.shape[0], -1])]
    for i in range(len(nh)):
        h = activ(fc(layers[-1], 'fc%d' % i, nh=nh[i], init_scale=np.sqrt(2)))
        layers.append(h)
    return layers[-1]
Exemple #3
0
def nature_cnn(unscaled_images, images_format='NHWC', **conv_kwargs):
    """
    CNN from Nature paper.
    """
    scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
    if images_format == 'NHWC':
        pass
    elif images_format == 'NCHW':
        scaled_images = tf.transpose(scaled_images, perm=[0, 2, 3, 1])
    else:
        raise RuntimeError("Unknown images format")
    # We require the input format to be NHWC
    activ = tf.nn.relu
    h = activ(
        conv(scaled_images,
             'c1',
             nf=32,
             rf=8,
             stride=4,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h2 = activ(
        conv(h,
             'c2',
             nf=64,
             rf=4,
             stride=2,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = activ(
        conv(h2,
             'c3',
             nf=64,
             rf=3,
             stride=1,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = conv_to_fc(h3)
    return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
Exemple #4
0
 def pdfromlatent(self, latent_vector, init_scale=1.0, init_bias=0.0, temperature=tf.to_float(1.0)):
     pdparam = fc(latent_vector, 'pi', self.ncat, init_scale=init_scale, init_bias=init_bias)
     pdparam = tf.scalar_mul(temperature, pdparam)
     return self.pdfromflat(pdparam), pdparam
Exemple #5
0
 def pdfromlatent(self, latent_vector, init_scale=1.0, init_bias=0.0):
     mean = fc(latent_vector, 'pi', self.size, init_scale=init_scale, init_bias=init_bias)
     logstd = tf.get_variable(name='logstd', shape=[1, self.size], initializer=tf.zeros_initializer())
     pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1)
     return self.pdfromflat(pdparam), mean