def nature_cnn(unscaled_images, scope, **conv_kwargs): """ CNN from Nature paper. """ #unscaled_images = tf.placeholder(tf.float32, shape=[None, 84, 84, 1], name='unscaled_images') with tf.variable_scope(scope): scaled_images = tf.cast(unscaled_images, tf.float32) / 255. activ = tf.nn.relu # 8x8 filter size is common on the very 1st conv layer, looking at the input image h = activ( conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2), **conv_kwargs)) h2 = activ( conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs)) h3 = activ( conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) h3 = conv_to_fc(h3) return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
def custom_cnn(unscaled_images, **conv_kwargs): scaled_images = tf.cast(unscaled_images, tf.float32) / 255. activ = tf.nn.leaky_relu h = activ( conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2), **conv_kwargs)) h2 = activ( conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs)) h3 = activ( conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) h3 = conv_to_fc(h3) return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
def vqn_cnn(unscaled_images, **conv_kwargs): """ CNN from VQN paper. """ scaled_images = tf.cast(unscaled_images, tf.float32) / 255. activ = tf.nn.relu h = activ( conv(scaled_images, 'c1', nf=32, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) h2 = activ( conv(h, 'c2', nf=32, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) h3 = activ( conv(h2, 'c3', nf=64, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs)) return h3
def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, nlstm=256, reuse=False, scope_name="model"): nenv = nbatch // nsteps nh, nw, nc = ob_space.shape ob_shape = (nbatch, nh, nw, nc) nact = ac_space.n X = tf.placeholder(tf.uint8, ob_shape) #obs M = tf.placeholder(tf.float32, [nbatch]) #mask (done t-1) S = tf.placeholder(tf.float32, [nenv, nlstm * 2]) #states with tf.variable_scope(scope_name, reuse=reuse): h = conv(tf.cast(X, tf.float32) / 255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)) h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)) h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2)) h3 = conv_to_fc(h3) h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)) xs = batch_to_seq(h4, nenv, nsteps) ms = batch_to_seq(M, nenv, nsteps) h5, snew = lstm(xs, ms, S, 'lstm1', nh=nlstm) h5 = seq_to_batch(h5) pi = fc(h5, 'pi', nact, act=lambda x: x) vf = fc(h5, 'v', 1, act=lambda x: x) self.pdtype = make_pdtype(ac_space) self.pd = self.pdtype.pdfromflat(pi) v0 = vf[:, 0] a0 = self.pd.sample() neglogp0 = self.pd.neglogp(a0) self.initial_state = np.zeros((nenv, nlstm * 2), dtype=np.float32) def step(ob, state, mask): return sess.run([a0, v0, snew, neglogp0], { X: ob, S: state, M: mask }) def value(ob, state, mask): return sess.run(v0, {X: ob, S: state, M: mask}) self.X = X self.M = M self.S = S self.pi = pi self.vf = vf self.step = step self.value = value
def nature_cnn(unscaled_images, **conv_kwargs): """ CNN from Nature paper. """ scaled_images = tf.cast(unscaled_images, tf.float32) / 255. activ = tf.nn.relu # calculate the Rectitied Linear Unit h = activ( conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2), **conv_kwargs)) # np.sqrt() returns the squareroot of every element in the array # init_scale ?? h2 = activ( conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs)) h3 = activ( conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) h3 = conv_to_fc(h3) return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
def nature_cnn(unscaled_images, **conv_kwargs): """ CNN from Nature paper. """ scaled_images = tf.cast(unscaled_images, tf.float32) / 255. activ = tf.nn.relu h = activ( conv( scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt( 2 ), # 8x8 filter size is common on the very 1st conv layer, looking at the input image **conv_kwargs)) h2 = activ( conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs)) h3 = activ( conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) h3 = conv_to_fc(h3) return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, reuse=False): nbatch = nenv*nsteps nh, nw, nc = ob_space.shape ob_shape = (nbatch, nh, nw, nc*nstack) nact = ac_space.n X = tf.placeholder(tf.uint8, ob_shape) #obs with tf.variable_scope("model", reuse=reuse): h = conv(tf.cast(X, tf.float32)/255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)) h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)) h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2)) h3 = conv_to_fc(h3) h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)) pi = fc(h4, 'pi', nact, act=lambda x:x) vf = fc(h4, 'v', 1, act=lambda x:x) v0 = vf[:, 0] a0 = sample(pi) self.initial_state = [] #not stateful def step(ob, *_args, **_kwargs): a, v = sess.run([a0, v0], {X:ob}) return a, v, [] #dummy state def value(ob, *_args, **_kwargs): return sess.run(v0, {X:ob}) self.X = X self.pi = pi self.vf = vf self.step = step self.value = value
def network_fn(X): x = tf.cast(X, tf.float32) / 255. init = {'init_scale': np.sqrt(2)} x = conv(x, 'c1', nf=32, rf=8, stride=4, **init) x = tf.nn.relu(x) with tf.variable_scope('h1'): h1 = fls_module(x) x = x * h1 x = conv(x, 'c2', nf=64, rf=4, stride=2, **init) x = tf.nn.relu(x) x = conv(x, 'c3', nf=64, rf=3, stride=1, **init) x = tf.nn.relu(x) x = conv_to_fc(x) x = final_linear(x) return NetworkOutput( policy_latent=x, recurrent_tensors=None, extra=h1, )
def nature_cnn(unscaled_images, keep_probs, **conv_kwargs): """ CNN from Nature paper. """ # scaled_images = tf.cast(unscaled_images, tf.float32) / 255. activ = tf.nn.relu h = activ( conv(unscaled_images, 'c1', nf=32, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) h2 = activ( conv(h, 'c2', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) h3 = activ( conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) h3 = conv_to_fc(h3) h4 = activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2))) return tf.nn.dropout(h4, keep_prob=keep_probs)
def nature_cnn(input_shape, **conv_kwargs): """ CNN from Nature paper. """ print('input shape is {}'.format(input_shape)) x_input = tf.keras.Input(shape=input_shape, dtype=tf.uint8) h = x_input h = tf.cast(h, tf.float32) / 255. h = conv('c1', nf=32, rf=8, stride=4, activation='relu', init_scale=np.sqrt(2))(h) h2 = conv('c2', nf=64, rf=4, stride=2, activation='relu', init_scale=np.sqrt(2))(h) h3 = conv('c3', nf=64, rf=3, stride=1, activation='relu', init_scale=np.sqrt(2))(h2) h3 = tf.keras.layers.Flatten()(h3) h3 = tf.keras.layers.Dense(units=512, kernel_initializer=ortho_init(np.sqrt(2)), name='fc1', activation='relu')(h3) network = tf.keras.Model(inputs=[x_input], outputs=[h3]) return network
def __call__(self, obs, action, reuse=False): with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE): if self.network == 'mlp': x = tf.concat([obs, action], axis=-1) # this assumes observation and action can be concatenated x = self.network_builder(x) x = tf.layers.dense(x, 1, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3), name='output') elif self.network == 'cloth_cnn': # -------------------------------------------------------------- # To make this a little easier, here we're just going to explicitly design our net. # ASSUME WE ALREADY DIVIDE BY 255 IN DDPG.PY, so obs uses tf.float32. # This design is up to debate. For example we really should crop the net. # We actually have a `cloth_cnn` but that's for PPO w/only states as input. # -------------------------------------------------------------- activ = tf.nn.tanh h = obs with tf.variable_scope("convnet"): h = activ(conv(h, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2))) h = activ(conv(h, 'c2', nf=32, rf=4, stride=2, init_scale=np.sqrt(2))) h = activ(conv(h, 'c3', nf=32, rf=3, stride=1, init_scale=np.sqrt(2))) h = conv_to_fc(h) with tf.variable_scope("fcnet"): h = tf.concat([h, action], axis=-1) # this assumes observation and action can be concatenated h = activ(tf.layers.dense(h, 200, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3), name='fc1')) x = tf.layers.dense(h, 1, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3), name='output') else: raise ValueError(self.network) return x
def network_fn(X): x = tf.cast(X, tf.float32) / 255. init = {'init_scale': np.sqrt(2)} x = tf.pad(x, ((0, 0), (1, 1), (1, 1), (0, 0))) x = sparse_block(x) x = tf.nn.l2_normalize(x, axis=3) h = conv(x, 'h1', nf=512, rf=1, stride=1, pad='SAME', **init) h = tf.nn.elu(h) h = conv(h, 'h2', nf=2, rf=1, stride=1, pad='SAME', **init) h = softmax_pixelwise(h) h = tf.reduce_sum(h, axis=-1, keepdims=True) x = x * h x = tf.reshape(x, (-1, 3136)) x = final_linear(x) return NetworkOutput( policy_latent=x, recurrent_tensors=None, extra=h, )
def dcgan_cnn(images, dout, **conv_kwargs): activ = lambda name, inpt: tf.nn.leaky_relu(batch_norm(name, inpt), alpha=0.2) l1 = activ( 'l1', conv(images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2), **conv_kwargs)) l2 = activ( 'l2', conv(l1, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs)) l3 = activ( 'l3', conv(l2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) h = fc(conv_to_fc(l3), nh=512, scope='final') out = activ('out', fc(h, 'fc1', nh=dout, init_scale=np.sqrt(2))) return out, l3.shape
def network_fn(X): #[<tf.Tensor 'ppo2_model/Ob:0' shape=(1, 6) dtype=float32>, <tf.Tensor 'ppo2_model/Ob_1:0' shape=(1, 50, 50, 3) dtype=float32>] forces, im = X activ = tf.nn.relu im = activ( conv(im, 'c1', nf=8, rf=2, stride=2, init_scale=np.sqrt(2), **conv_kwargs)) #h = activ(tf.layers.MaxPooling2D(2,2)(h)) im = activ( conv(im, 'c2', nf=32, rf=2, stride=2, init_scale=np.sqrt(2), **conv_kwargs)) #h = activ(tf.layers.MaxPooling2D(2,2)(h)) im = conv_to_fc(im) h = tf.concat([im, forces], axis=1) h = activ(fc(h, 'fc1', nh=30, init_scale=np.sqrt(2))) h = activ(fc(h, 'fc2', nh=6, init_scale=np.sqrt(2))) return h
def dcgan_cnn(unscaled_images, **conv_kwargs): scaled_images = tf.cast(unscaled_images, tf.float32) / 255. activ = lambda name, inpt: tf.nn.leaky_relu(batch_norm(inpt, name)) h = activ( 'l1', conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2), **conv_kwargs)) h2 = activ( 'l2', conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs)) h3 = activ( 'l3', conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) return conv_to_fc(h3)
def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, reuse=False): #pylint: disable=W0613 nh, nw, nc = ob_space.shape ob_shape = (nbatch, nh, nw, nc) nact = ac_space.n X = tf.placeholder(tf.uint8, ob_shape) #obs with tf.variable_scope("model", reuse=reuse): h = conv(tf.cast(X, tf.float32)/255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)) h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)) h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2)) h3 = conv_to_fc(h3) h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)) pi = fc(h4, 'pi', nact, act=lambda x:x, init_scale=0.01) vf = fc(h4, 'v', 1, act=lambda x:x)[:,0] self.pdtype = make_pdtype(ac_space) self.pd = self.pdtype.pdfromflat(pi) a0 = self.pd.sample() neglogp0 = self.pd.neglogp(a0) self.initial_state = None def step(ob, *_args, **_kwargs): a, v, neglogp = sess.run([a0, vf, neglogp0], {X:ob}) return a, v, self.initial_state, neglogp def value(ob, *_args, **_kwargs): return sess.run(vf, {X:ob}) self.X = X self.pi = pi self.vf = vf self.step = step self.value = value
def nature_cnn(unscaled_images, **conv_kwargs): """ CNN from Nature paper. """ scaled_images = tf.cast(unscaled_images, tf.float32) / 255. activ = tf.nn.relu h = activ( conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2), **conv_kwargs)) h2 = activ( conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs)) h3 = activ( conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) h3 = conv_to_fc(h3) return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
def nature_cnn(scaled_images, **conv_kwargs): """ Model used in the paper "Human-level control through deep reinforcement learning" https://www.nature.com/articles/nature14236 """ def activ(curr): return tf.nn.relu(curr) h = activ( conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2), **conv_kwargs)) h2 = activ( conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs)) h3 = activ( conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) h3 = conv_to_fc(h3) return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, reuse=False): #pylint: disable=W0613 if isinstance(ac_space, gym.spaces.Discrete): self.is_discrete = True else: self.is_discrete = False ob_shape = (nbatch, ) + ob_space.shape if self.is_discrete: actdim = ac_space.n else: actdim = ac_space.shape[0] X = tf.placeholder(tf.float32, ob_shape, name='Ob') #obs with tf.variable_scope("model", reuse=reuse): h_c = conv(X, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)) h2_c = conv(h_c, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)) h2_c = conv_to_fc(h_c) h1 = fc(h2_c, 'pi_fc1', nh=64, init_scale=np.sqrt(2), act=tf.tanh) h2 = fc(h1, 'pi_fc2', nh=64, init_scale=np.sqrt(2), act=tf.tanh) pi = fc(h2, 'pi', actdim, init_scale=0.01) h1 = fc(h2_c, 'vf_fc1', nh=64, init_scale=np.sqrt(2), act=tf.tanh) h2 = fc(h1, 'vf_fc2', nh=64, init_scale=np.sqrt(2), act=tf.tanh) vf = fc(h2, 'vf', 1)[:, 0] logstd = tf.get_variable(name="logstd", shape=[1, actdim], initializer=tf.zeros_initializer()) pdparam = tf.concat([pi, pi * 0.0 + logstd], axis=1) self.pdtype = make_pdtype(ac_space) if self.is_discrete: self.pd = self.pdtype.pdfromflat(pi) a0 = self.pd.sample() else: self.pd = self.pdtype.pdfromflat(pdparam) a0 = self.pd.sample() neglogp0 = self.pd.neglogp(a0) self.initial_state = None def step(ob, *_args, **_kwargs): a, v, neglogp = sess.run([a0, vf, neglogp0], {X: ob.astype(np.float32) / 255.0}) a = a[0] return a, v, self.initial_state, neglogp def value(ob, *_args, **_kwargs): return sess.run(vf, {X: ob}) self.X = X self.pi = pi self.vf = vf self.step = step self.value = value
def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, reuse=False, is_discrete=False): #pylint: disable=W0613 if isinstance(ac_space, gym.spaces.Discrete): self.is_discrete = True else: self.is_discrete = False print("nbatch%d" % (nbatch)) nh, nw, nc = ob_space.shape ob_shape = (nbatch, nh, nw, nc) if self.is_discrete: nact = ac_space.n else: nact = ac_space.shape[0] X = tf.placeholder(tf.float32, ob_shape) #obs with tf.variable_scope("model", reuse=reuse): h = conv(X, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)) h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)) h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2)) h3 = conv_to_fc(h3) h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)) h5 = fc(h3, 'fc_vf', nh=512, init_scale=np.sqrt(2)) pi = fc(h4, 'pi', nact, init_scale=0.05) vf = fc(h5, 'v', 1, act=lambda x: x)[:, 0] if not self.is_discrete: logstd = tf.get_variable(name="logstd", shape=[1, nact], initializer=tf.zeros_initializer()) self.pdtype = make_pdtype(ac_space) if self.is_discrete: self.pd = self.pdtype.pdfromflat(pi) a0 = self.pd.sample() else: pdparam = tf.concat([pi, pi * 0.0 + logstd], axis=1) self.pd = self.pdtype.pdfromflat(pdparam) a0 = self.pd.sample() neglogp0 = self.pd.neglogp(a0) self.initial_state = None def step(ob, *_args, **_kwargs): a, v, neglogp = sess.run([a0, vf, neglogp0], {X: ob.astype(np.float32) / 255.0}) assert (a.shape[0] == 1) # make sure a = a[0] don't throw away actions a = a[0] print(a, v, neglogp) return a, v, self.initial_state, neglogp def value(ob, *_args, **_kwargs): return sess.run(vf, {X: ob}) self.X = X self.pi = pi self.vf = vf self.step = step self.value = value
def nature_cnn(X): #x1 = tf.nn.relu(conv2d(X, 128, 'x1', filter_size=(8,8), stride=(4,4))) #x2 = tf.nn.relu(conv2d(x1, 64, 'x2', filter_size=(4,4), stride=(2,2))) #x3 = tf.nn.relu(conv2d(x2, 64, 'x3', filter_size=(4,4), stride=(2,2))) x1 = tf.nn.relu(conv(X, 'x1', nf=128, rf=8, stride=4, init_scale=1.0)) x2 = tf.nn.relu(conv(x1, 'x2', nf=64, rf=4, stride=2, init_scale=1.0)) x3 = tf.nn.relu(conv(x2, 'x3', nf=64, rf=4, stride=2, init_scale=1.0)) return x3
def network_fn(X, mode="pi"): filtered_conv_kwargs = {} def filter_kwargs(k): if k in conv_kwargs.keys(): filtered_conv_kwargs[k] = conv_kwargs[k] filter_kwargs("pad") filter_kwargs("data_format") filter_kwargs("one_dim_bias") scaled_images = tf.cast(X, tf.float32) / 255. activ = tf.nn.relu bn = tf.contrib.layers.batch_norm drp = tf.nn.dropout def addbndrp(h): if (batchnormpi and mode == "pi") or (batchnormvf and mode == "vf"): h = bn(h, center=True, scale=True, is_training=isbnpitrainmode if mode == "pi" else isbnvftrainmode, updates_collections=None) h = activ(h) if (dropoutpi < 1.0 and mode == "pi"): h = drp(h, keep_prob=dropoutpi_keep_prob) if (dropoutvf < 1.0 and mode == "vf"): h = drp(h, keep_prob=dropoutvf_keep_prob) return h h = addbndrp( conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2), **filtered_conv_kwargs)) h2 = addbndrp( conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2), **filtered_conv_kwargs)) h3 = addbndrp( conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **filtered_conv_kwargs)) h3 = conv_to_fc(h3) return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
def network_fn(X): h = conv(X, 'c1', nf=16, rf=8, stride=4, pad="VALID") h = tf.nn.relu(h) h = conv(h, 'c2', nf=32, rf=4, stride=2, pad="VALID") h = tf.nn.relu(h) h = conv_to_fc(h) h = tf.nn.relu(fc(h, 'fc1', nh=30)) h = fc(h, 'fc3', nh=net_kwargs["nactions"]) return h
def network_fn(X): h = tf.nn.relu(conv(X, 'c1', nf=64, rf=3, stride=2, pad="VALID")) h = tf.nn.relu(conv(h, 'c2', nf=64, rf=3, stride=2, pad="VALID")) h = conv_to_fc(h) h = tf.nn.relu(fc(h, 'fc1', nh=128)) h = tf.nn.relu(fc(h, 'fc2', nh=64)) h = tf.nn.relu(fc(h, 'fc4', nh=32)) h = fc(h, 'fc5', nh=net_kwargs["nactions"]) return h
def network_fn(X): h = tf.cast(X, tf.float32) / 255. activ = tf.nn.relu h = activ(conv(h, 'c1', nf=8, rf=8, stride=4, init_scale=np.sqrt(2), **conv_kwargs)) h = activ(conv(h, 'c2', nf=16, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs)) h = conv_to_fc(h) h = activ(fc(h, 'fc1', nh=128, init_scale=np.sqrt(2))) return h
def feature_net(unscaled_images): scaled_images = tf.cast(unscaled_images, tf.float32) # / 255. activ = tf.nn.relu h = activ( conv(scaled_images, 'c1', nf=8, rf=8, stride=4, init_scale=np.sqrt(2))) h2 = activ(conv(h, 'c2', nf=16, rf=4, stride=2, init_scale=np.sqrt(2))) h3 = activ(conv(h2, 'c3', nf=16, rf=3, stride=1, init_scale=np.sqrt(2))) h3 = conv_to_fc(h3) return activ(fc(h3, 'fc1', nh=128, init_scale=np.sqrt(2)))
def sparse_block(x, relu=True): init = {'init_scale': np.sqrt(2)} x = conv(x, 'c1', nf=32, rf=8, stride=4, **init) x = tf.nn.relu(x) x = conv(x, 'c2', nf=64, rf=4, stride=2, **init) x = tf.nn.relu(x) x = conv(x, 'c3', nf=64, rf=3, stride=1, **init) if relu: x = tf.nn.relu(x) return x
def dense_block(x, relu=True): init = {'init_scale': np.sqrt(2)} x = conv(x, 'c1', nf=32, rf=7, stride=1, pad='SAME', **init) x = tf.nn.relu(x) x = conv(x, 'c2', nf=64, rf=5, stride=1, pad='SAME', **init) x = tf.nn.relu(x) x = conv(x, 'c3', nf=64, rf=3, stride=1, pad='SAME', **init) if relu: x = tf.nn.relu(x) return x
def preprocess(X): h = conv(tf.cast(X, tf.float32) / 255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)) h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)) h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2)) return conv_to_fc(h3)
def lenet_cnn(scaled_images, **conv_kwargs): activ = tf.nn.leaky_relu c1 = activ(conv(scaled_images, 'c1', nf=6, rf=5, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) p1 = tf.nn.max_pool(c1, [1, 2, 2, 1], [1, 1, 1, 1], padding='SAME', name='p1') c2 = activ(conv(p1, 'c2', nf=16, rf=5, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) p2 = tf.nn.max_pool(c2, [1, 2, 2, 1], [1, 1, 1, 1], padding='SAME', name='p2') c3 = activ(conv(p2, 'c3', nf=120, rf=5, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) h1 = conv_to_fc(c3) return activ(fc(h1, 'fc1', nh=128, init_scale=np.sqrt(2)))
def nature_cnn(unscaled_images): """ CNN from Nature paper. """ scaled_images = tf.cast(unscaled_images, tf.float32) / 255. activ = tf.nn.relu h = activ(conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2))) h2 = activ(conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))) h3 = activ(conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2))) h3 = conv_to_fc(h3) return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, nlstm=256, reuse=False): nbatch = nenv*nsteps nh, nw, nc = ob_space.shape ob_shape = (nbatch, nh, nw, nc*nstack) nact = ac_space.n X = tf.placeholder(tf.uint8, ob_shape) #obs M = tf.placeholder(tf.float32, [nbatch]) #mask (done t-1) S = tf.placeholder(tf.float32, [nenv, nlstm*2]) #states with tf.variable_scope("model", reuse=reuse): h = conv(tf.cast(X, tf.float32)/255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)) h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)) h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2)) h3 = conv_to_fc(h3) h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)) xs = batch_to_seq(h4, nenv, nsteps) ms = batch_to_seq(M, nenv, nsteps) h5, snew = lnlstm(xs, ms, S, 'lstm1', nh=nlstm) h5 = seq_to_batch(h5) pi = fc(h5, 'pi', nact, act=lambda x:x) vf = fc(h5, 'v', 1, act=lambda x:x) v0 = vf[:, 0] a0 = sample(pi) self.initial_state = np.zeros((nenv, nlstm*2), dtype=np.float32) def step(ob, state, mask): a, v, s = sess.run([a0, v0, snew], {X:ob, S:state, M:mask}) return a, v, s def value(ob, state, mask): return sess.run(v0, {X:ob, S:state, M:mask}) self.X = X self.M = M self.S = S self.pi = pi self.vf = vf self.step = step self.value = value
def nature_cnn(unscaled_images): """Convolutional parts of CNN from Nature paper Taken from baselines.ppo2.policies.py Parameters ------- unscaled_images : tensorflow tensor Frame of shape (batchsize, x, y, c) Returns ------- tensorflow tensor Output features of last convolutional layer with flattened x/y/c dimensions """ scaled_images = tf.cast(unscaled_images, tf.float32) / 255. activ = tf.nn.relu h = activ(conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2))) h2 = activ(conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))) h3 = activ(conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2))) h3 = conv_to_fc(h3) return h3
def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, reuse=False): nbatch = nenv * nsteps nh, nw, nc = (32, 32, 3) ob_shape = (nbatch, nh, nw, nc * nstack) nact = 3 # 524 # nsub3 = 2 # nsub4 = 5 # nsub5 = 10 # nsub6 = 4 # nsub7 = 2 # nsub8 = 4 # nsub9 = 500 # nsub10 = 4 # nsub11 = 10 # nsub12 = 500 # (64, 64, 13) # 80 * 24 X = tf.placeholder(tf.uint8, ob_shape) #obs with tf.variable_scope("model", reuse=reuse): with tf.variable_scope("common", reuse=reuse): h = conv( tf.cast(X, tf.float32), 'c1', nf=32, rf=5, stride=1, init_scale=np.sqrt(2), pad="SAME") # ?, 32, 32, 16 h2 = conv( h, 'c2', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), pad="SAME") # ?, 32, 32, 32 with tf.variable_scope("pi1", reuse=reuse): h3 = conv_to_fc(h2) # 131072 h4 = fc(h3, 'fc1', nh=256, init_scale=np.sqrt(2)) # ?, 256 pi_ = fc( h4, 'pi', nact) # ( nenv * nsteps, 524) # ?, 524 pi = tf.nn.softmax(pi_) vf = fc( h4, 'v', 1) # ( nenv * nsteps, 1) # ?, 1 # vf = tf.nn.l2_normalize(vf_, 1) with tf.variable_scope("xy0", reuse=reuse): # 1 x 1 convolution for dimensionality reduction pi_xy0_ = conv( h2, 'xy0', nf=1, rf=1, stride=1, init_scale=np.sqrt(2)) # (? nenv * nsteps, 32, 32, 1) pi_xy0__ = conv_to_fc(pi_xy0_) # 32 x 32 => 1024 pi_xy0 = tf.nn.softmax(pi_xy0__) with tf.variable_scope("xy1", reuse=reuse): pi_xy1_ = conv( h2, 'xy1', nf=1, rf=1, stride=1, init_scale=np.sqrt(2)) # (? nenv * nsteps, 32, 32, 1) pi_xy1__ = conv_to_fc(pi_xy1_) # 32 x 32 => 1024 pi_xy1 = tf.nn.softmax(pi_xy1__) v0 = vf[:, 0] a0 = sample(pi) self.initial_state = [] #not stateful def step(ob, *_args, **_kwargs): #obs, states, rewards, masks, actions, actions2, x1, y1, x2, y2, values _pi1, _xy0, _xy1, _v = sess.run([pi, pi_xy0, pi_xy1, v0], {X: ob}) return _pi1, _xy0, _xy1, _v, [] #dummy state def value(ob, *_args, **_kwargs): return sess.run(v0, {X: ob}) self.X = X self.pi = pi # self.pi_sub3 = pi_sub3 # self.pi_sub4 = pi_sub4 # self.pi_sub5 = pi_sub5 # self.pi_sub6 = pi_sub6 # self.pi_sub7 = pi_sub7 # self.pi_sub8 = pi_sub8 # self.pi_sub9 = pi_sub9 # self.pi_sub10 = pi_sub10 # self.pi_sub11 = pi_sub11 # self.pi_sub12 = pi_sub12 self.pi_xy0 = pi_xy0 self.pi_xy1 = pi_xy1 # self.pi_y0 = pi_y0 # self.pi_x1 = pi_x1 # self.pi_y1 = pi_y1 # self.pi_x2 = pi_x2 # self.pi_y2 = pi_y2 self.vf = vf self.step = step self.value = value