def add_head(self, h): with tf.variable_scope("model", reuse=self.reuse): self.vf = fc(h, 'v', self.n_v)[:, 0] # TODO: This is a hack, to be fixed self.pd, _ = self.pdtype.pdfromlatent(h, init_scale=0.01) # See implementation details in page 9 of 1904.08473 self.wf = tf.log(1 + tf.exp(fc(h, 'w', 1))) # shape (batch_size, 1) self.a0 = self.pd.sample() self.neglogp0 = self.pd.neglogp(self.a0) self.pub_names = { 'a': self.a0.name, 'v': self.vf.name, 'p': self.neglogp0.name }
def mlp_net(x, nh=[], activ=tf.nn.tanh): """ MLP net """ layers = [tf.reshape(x, [x.shape[0], -1])] for i in range(len(nh)): h = activ(fc(layers[-1], 'fc%d' % i, nh=nh[i], init_scale=np.sqrt(2))) layers.append(h) return layers[-1]
def nature_cnn(unscaled_images, images_format='NHWC', **conv_kwargs): """ CNN from Nature paper. """ scaled_images = tf.cast(unscaled_images, tf.float32) / 255. if images_format == 'NHWC': pass elif images_format == 'NCHW': scaled_images = tf.transpose(scaled_images, perm=[0, 2, 3, 1]) else: raise RuntimeError("Unknown images format") # We require the input format to be NHWC activ = tf.nn.relu h = activ( conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2), **conv_kwargs)) h2 = activ( conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs)) h3 = activ( conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) h3 = conv_to_fc(h3) return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
def pdfromlatent(self, latent_vector, init_scale=1.0, init_bias=0.0, temperature=tf.to_float(1.0)): pdparam = fc(latent_vector, 'pi', self.ncat, init_scale=init_scale, init_bias=init_bias) pdparam = tf.scalar_mul(temperature, pdparam) return self.pdfromflat(pdparam), pdparam
def pdfromlatent(self, latent_vector, init_scale=1.0, init_bias=0.0): mean = fc(latent_vector, 'pi', self.size, init_scale=init_scale, init_bias=init_bias) logstd = tf.get_variable(name='logstd', shape=[1, self.size], initializer=tf.zeros_initializer()) pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1) return self.pdfromflat(pdparam), mean