def mujoco_cnn(unscaled_images, name, nbatch, add_flownet, unscaled_previous_images, flownet, train_from_scratch, large_cnn, diff_frames): """ CNN from Nature paper. """ # scaling image input scaled_images = tf.cast(unscaled_images, tf.float32) / 255. if add_flownet: # concatenating scaled_images with flow from flownet assert unscaled_previous_images is not None assert flownet is not None scaled_previous_images = tf.cast(unscaled_previous_images, tf.float32) / 255. img_stack = tf.concat([scaled_previous_images, scaled_images], axis=3) flow, _ = flownet(img_stack, trainable=train_from_scratch, size=nbatch) if not train_from_scratch: flow = tf.stop_gradient(flow) scaled_images = tf.concat([flow, scaled_images], axis=3) if diff_frames: half_size = scaled_images.get_shape().as_list()[-1] // 2 img, pre_img = tf.split(scaled_images, [half_size, half_size], axis=3) pre_img = pre_img - img scaled_images = tf.concat([img, pre_img], axis=3) activ = tf.nn.relu if not large_cnn: h = activ(conv(scaled_images, name + '_c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2))) h2 = activ(conv(h, name + '_c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))) h3 = activ(conv(h2, name + '_c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2))) return conv_to_fc(h3) else: h = activ(conv(scaled_images, name + '_c1', nf=32, rf=3, stride=1, init_scale=np.sqrt(2))) skip = conv(h, name + '_c2', nf=64, rf=3, stride=2, init_scale=np.sqrt(2)) h = activ(skip) h = activ(conv(h, name + '_c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), pad='SAME')) h = activ(conv(h, name + '_c4', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), pad='SAME') + skip) skip = conv(h, name + '_c5', nf=64, rf=3, stride=2, init_scale=np.sqrt(2)) h = activ(skip) h = activ(conv(h, name + '_c6', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), pad='SAME')) h = activ(conv(h, name + '_c7', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), pad='SAME') + skip) h = activ(conv(h, name + '_c8', nf=64, rf=3, stride=2, init_scale=np.sqrt(2))) h = conv_to_fc(h) return activ(fc(h, 'fc1', nh=110, init_scale=np.sqrt(2)))
def network_fn(X): activ = tf.nn.relu h = activ( conv(X, 'c1', nf=32, rf=3, stride=1, pad='VALID', init_scale=np.sqrt(2), **conv_kwargs)) h2 = activ( conv(h, 'c2', nf=32, rf=3, stride=1, pad='VALID', init_scale=np.sqrt(2), **conv_kwargs)) h3 = activ( conv(h2, 'c3', nf=32, rf=3, stride=1, pad='VALID', init_scale=np.sqrt(2), **conv_kwargs)) h4 = activ( conv(h3, 'c4', nf=64, rf=3, stride=1, pad='VALID', init_scale=np.sqrt(2), **conv_kwargs)) h5 = activ( conv(h4, 'c5', nf=64, rf=2, stride=1, pad='VALID', init_scale=np.sqrt(2), **conv_kwargs)) h5 = conv_to_fc(h5) x = conv_to_fc(X) x = tf.concat(axis=1, values=[x, h5]) return activ(fc(x, 'fc1', nh=1, init_scale=np.sqrt(2)))
def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, reuse=False): ob_shape = (nbatch,) + ob_space.shape nact = ac_space.n # observation input X = tf.placeholder(tf.float32, ob_shape, name='Ob') #obs # scale input by maximum (=3 for multiple snakes) Xscaled = X / 3 with tf.variable_scope("model", reuse=reuse): activ = tf.nn.relu # policy network # 2 conv layers ph1 = activ(conv(Xscaled, 'pi_c1', nf=16, rf=3, stride=1, init_scale=np.sqrt(2))) ph2 = activ(conv(ph1, 'pi_c2', nf=32, rf=3, stride=1, init_scale=np.sqrt(2))) # convert to fully connected layer ph2 = conv_to_fc(ph2) # one fully connected hidden layer ph3 = activ(fc(ph2, 'pi_fc', nh=256, init_scale=np.sqrt(2))) # linear output layer to policy logits # initialize with small weights to ensure large initial policy entropy pi = fc(ph3, 'pi', nact, init_scale=0.01) # value network # 2 conv layers vh1 = activ(conv(Xscaled, 'vf_c1', nf=16, rf=3, stride=1, init_scale=np.sqrt(2))) vh2 = activ(conv(vh1, 'vf_c2', nf=32, rf=3, stride=1, init_scale=np.sqrt(2))) # convert to fully connected layer vh2 = conv_to_fc(vh2) # one fully connected hidden layer vh3 = activ(fc(vh2, 'vf_fc', nh=128, init_scale=np.sqrt(2))) # linear output of value function vf = fc(vh3, 'vf', 1)[:,0] self.pdtype = make_pdtype(ac_space) self.pd = self.pdtype.pdfromflat(pi) a0 = self.pd.sample() neglogp0 = self.pd.neglogp(a0) self.initial_state = None def step(ob, *_args, **_kwargs): a, v, neglogp = sess.run([a0, vf, neglogp0], {X:ob}) return a, v, self.initial_state, neglogp def value(ob, *_args, **_kwargs): return sess.run(vf, {X:ob}) self.X = X self.pi = pi self.vf = vf self.step = step self.value = value
def nature_cnn(unscaled_images, keep_probs, **conv_kwargs): """ CNN from Nature paper. """ # scaled_images = tf.cast(unscaled_images, tf.float32) / 255. activ = tf.nn.relu h = activ( conv(unscaled_images, 'c1', nf=32, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) h2 = activ( conv(h, 'c2', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) h3 = activ( conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) h3 = conv_to_fc(h3) h4 = activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2))) return tf.nn.dropout(h4, keep_prob=keep_probs)
def network_fn(X): x = tf.cast(X, tf.float32) / 255. init = {'init_scale': np.sqrt(2)} x = conv(x, 'c1', nf=32, rf=8, stride=4, **init) x = tf.nn.relu(x) with tf.variable_scope('h1'): h1 = fls_module(x) x = x * h1 x = conv(x, 'c2', nf=64, rf=4, stride=2, **init) x = tf.nn.relu(x) x = conv(x, 'c3', nf=64, rf=3, stride=1, **init) x = tf.nn.relu(x) x = conv_to_fc(x) x = final_linear(x) return NetworkOutput( policy_latent=x, recurrent_tensors=None, extra=h1, )
def dcgan_cnn(images, dout, **conv_kwargs): activ = lambda name, inpt: tf.nn.leaky_relu(batch_norm(name, inpt), alpha=0.2) l1 = activ( 'l1', conv(images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2), **conv_kwargs)) l2 = activ( 'l2', conv(l1, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs)) l3 = activ( 'l3', conv(l2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) h = fc(conv_to_fc(l3), nh=512, scope='final') out = activ('out', fc(h, 'fc1', nh=dout, init_scale=np.sqrt(2))) return out, l3.shape
def network_fn(X): #[<tf.Tensor 'ppo2_model/Ob:0' shape=(1, 6) dtype=float32>, <tf.Tensor 'ppo2_model/Ob_1:0' shape=(1, 50, 50, 3) dtype=float32>] forces, im = X activ = tf.nn.relu im = activ( conv(im, 'c1', nf=8, rf=2, stride=2, init_scale=np.sqrt(2), **conv_kwargs)) #h = activ(tf.layers.MaxPooling2D(2,2)(h)) im = activ( conv(im, 'c2', nf=32, rf=2, stride=2, init_scale=np.sqrt(2), **conv_kwargs)) #h = activ(tf.layers.MaxPooling2D(2,2)(h)) im = conv_to_fc(im) h = tf.concat([im, forces], axis=1) h = activ(fc(h, 'fc1', nh=30, init_scale=np.sqrt(2))) h = activ(fc(h, 'fc2', nh=6, init_scale=np.sqrt(2))) return h
def nature_cnn(unscaled_images, **conv_kwargs): """ CNN from Nature paper. """ scaled_images = tf.cast(unscaled_images, tf.float32) / 255. activ = tf.nn.relu h = activ( conv( scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt( 2 ), # 8x8 filter size is common on the very 1st conv layer, looking at the input image **conv_kwargs)) h2 = activ( conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs)) h3 = activ( conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) h3 = conv_to_fc(h3) return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
def custom_cnn(unscaled_images, **conv_kwargs): scaled_images = tf.cast(unscaled_images, tf.float32) / 255. activ = tf.nn.leaky_relu h = activ( conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2), **conv_kwargs)) h2 = activ( conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs)) h3 = activ( conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) h3 = conv_to_fc(h3) return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
def __call__(self, obs, action, reuse=False): with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE): if self.network == 'mlp': x = tf.concat([obs, action], axis=-1) # this assumes observation and action can be concatenated x = self.network_builder(x) x = tf.layers.dense(x, 1, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3), name='output') elif self.network == 'cloth_cnn': # -------------------------------------------------------------- # To make this a little easier, here we're just going to explicitly design our net. # ASSUME WE ALREADY DIVIDE BY 255 IN DDPG.PY, so obs uses tf.float32. # This design is up to debate. For example we really should crop the net. # We actually have a `cloth_cnn` but that's for PPO w/only states as input. # -------------------------------------------------------------- activ = tf.nn.tanh h = obs with tf.variable_scope("convnet"): h = activ(conv(h, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2))) h = activ(conv(h, 'c2', nf=32, rf=4, stride=2, init_scale=np.sqrt(2))) h = activ(conv(h, 'c3', nf=32, rf=3, stride=1, init_scale=np.sqrt(2))) h = conv_to_fc(h) with tf.variable_scope("fcnet"): h = tf.concat([h, action], axis=-1) # this assumes observation and action can be concatenated h = activ(tf.layers.dense(h, 200, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3), name='fc1')) x = tf.layers.dense(h, 1, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3), name='output') else: raise ValueError(self.network) return x
def network_fn(X, nenv=1): nbatch = X.shape[0] nsteps = nbatch // nenv fm = nature_cnn(X, **conv_kwargs) fm_flat = conv_to_fc(fm) h = tf.nn.relu(fc(fm_flat, 'fc1', nh=nh, init_scale=np.sqrt(2))) M = tf.placeholder(tf.float32, [nbatch]) # mask (done t-1) S = tf.placeholder(tf.float32, [nenv, 2 * nlstm]) # states xs = batch_to_seq(h, nenv, nsteps) ms = batch_to_seq(M, nenv, nsteps) if layer_norm: h5, snew = utils.lnlstm(xs, ms, S, scope='lnlstm', nh=nlstm) else: h5, snew = utils.lstm(xs, ms, S, scope='lstm', nh=nlstm) h = seq_to_batch(h5) initial_state = np.zeros(S.shape.as_list(), dtype=float) return fm, h, { 'S': S, 'M': M, 'state': snew, 'initial_state': initial_state }
def CNN7(unscaled_images,index,filmObj): with slim.arg_scope([slim.conv2d, slim.separable_conv2d], activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.variance_scaling_initializer()): scaled_images = tf.cast(unscaled_images, tf.float32) / 255. activ = tf.nn.relu # w_1 = tf.slice(filmObj.film_w_1,index*32,[32]) b_1 = tf.slice(filmObj.film_b_1,index*32,[32]) # w_2 = tf.slice(filmObj.film_w_2,index*64,[64]) b_2 = tf.slice(filmObj.film_b_2,index*64,[64]) # w_3 = tf.slice(filmObj.film_w_3,index*48,[48]) b_3 = tf.slice(filmObj.film_b_3,index*48,[48]) h = slim.separable_conv2d(scaled_images, 32, 8, 1, 4) # h = tf.math.add(tf.multiply(h, temp['weights_1']), temp['bias_1']) h = tf.math.add(h, b_1) h2 = slim.separable_conv2d(h, 64, 4, 1, 2) # h2 = tf.math.add(tf.multiply(h2, temp['weights_2']), temp['bias_2']) h2 = tf.math.add(h2, b_2) h3 = slim.separable_conv2d(h2, 48, 3, 1, 1) # h3 = tf.math.add(tf.multiply(h3, temp['weights_3']), temp['bias_3']) h3 = tf.math.add(h3, b_3) h3 = conv_to_fc(h3) return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
def cnn7(unscaled_images, **conv_kwargs): """ Network 96x96: model/SeparableConv2d/depthwise_weights:0 (8, 8, 4, 1) model/SeparableConv2d/pointwise_weights:0 (1, 1, 4, 32) model/SeparableConv2d/biases:0 (32,) model/SeparableConv2d_1/depthwise_weights:0 (4, 4, 32, 1) model/SeparableConv2d_1/pointwise_weights:0 (1, 1, 32, 64) model/SeparableConv2d_1/biases:0 (64,) model/SeparableConv2d_2/depthwise_weights:0 (3, 3, 64, 1) model/SeparableConv2d_2/pointwise_weights:0 (1, 1, 64, 48) model/SeparableConv2d_2/biases:0 (48,) model/fc1/w:0 (6912, 512) model/fc1/b:0 (512,) model/v/w:0 (512, 1) model/v/b:0 (1,) model/pi/w:0 (512, 7) model/pi/b:0 (7,) Trainable variables: 3550296 """ with slim.arg_scope([slim.conv2d, slim.separable_conv2d], activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.variance_scaling_initializer()): scaled_images = tf.cast(unscaled_images, tf.float32) / 255. activ = tf.nn.relu h = slim.separable_conv2d(scaled_images, 32, 8, 1, 4) h2 = slim.separable_conv2d(h, 64, 4, 1, 2) h3 = slim.separable_conv2d(h2, 48, 3, 1, 1) h3 = conv_to_fc(h3) return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, reuse=False): nbatch = nenv*nsteps nh, nw, nc = ob_space.shape ob_shape = (nbatch, nh, nw, nc*nstack) nact = ac_space.n X = tf.placeholder(tf.uint8, ob_shape) #obs with tf.variable_scope("model", reuse=reuse): h = conv(tf.cast(X, tf.float32)/255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)) h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)) h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2)) h3 = conv_to_fc(h3) h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)) pi = fc(h4, 'pi', nact, act=lambda x:x) vf = fc(h4, 'v', 1, act=lambda x:x) v0 = vf[:, 0] a0 = sample(pi) self.initial_state = [] #not stateful def step(ob, *_args, **_kwargs): a, v = sess.run([a0, v0], {X:ob}) return a, v, [] #dummy state def value(ob, *_args, **_kwargs): return sess.run(v0, {X:ob}) self.X = X self.pi = pi self.vf = vf self.step = step self.value = value
def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, reuse=False): nbatch = nenv*nsteps nh, nw, nc = ob_space.shape ob_shape = (nbatch, nh, nw, nc*nstack) nact = ac_space.n X = tf.placeholder(tf.uint8, ob_shape) #obs with tf.variable_scope("model", reuse=reuse): h = conv(tf.cast(X, tf.float32)/255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)) h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)) h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2)) h3 = conv_to_fc(h3) h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)) pi = fc(h4, 'pi', nact, act=lambda x:x) vf = fc(h4, 'v', 1, act=lambda x:x) v0 = vf[:, 0] a0 = sample(pi) self.initial_state = [] #not stateful def step(ob, *_args, **_kwargs): a, v = sess.run([a0, v0], {X:ob}) return a, v, [] #dummy state def value(ob, *_args, **_kwargs): return sess.run(v0, {X:ob}) self.X = X self.pi = pi self.vf = vf self.step = step self.value = value
def dcgan_cnn(unscaled_images, **conv_kwargs): scaled_images = tf.cast(unscaled_images, tf.float32) / 255. activ = lambda name, inpt: tf.nn.leaky_relu(batch_norm(inpt, name)) h = activ( 'l1', conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2), **conv_kwargs)) h2 = activ( 'l2', conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs)) h3 = activ( 'l3', conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) return conv_to_fc(h3)
def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, reuse=False): #pylint: disable=W0613 nh, nw, nc = ob_space.shape ob_shape = (nbatch, nh, nw, nc) nact = ac_space.n X = tf.placeholder(tf.uint8, ob_shape) #obs with tf.variable_scope("model", reuse=reuse): h = conv(tf.cast(X, tf.float32)/255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)) h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)) h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2)) h3 = conv_to_fc(h3) h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)) pi = fc(h4, 'pi', nact, act=lambda x:x, init_scale=0.01) vf = fc(h4, 'v', 1, act=lambda x:x)[:,0] self.pdtype = make_pdtype(ac_space) self.pd = self.pdtype.pdfromflat(pi) a0 = self.pd.sample() neglogp0 = self.pd.neglogp(a0) self.initial_state = None def step(ob, *_args, **_kwargs): a, v, neglogp = sess.run([a0, vf, neglogp0], {X:ob}) return a, v, self.initial_state, neglogp def value(ob, *_args, **_kwargs): return sess.run(vf, {X:ob}) self.X = X self.pi = pi self.vf = vf self.step = step self.value = value
def nature_cnn(unscaled_images, scope, **conv_kwargs): """ CNN from Nature paper. """ #unscaled_images = tf.placeholder(tf.float32, shape=[None, 84, 84, 1], name='unscaled_images') with tf.variable_scope(scope): scaled_images = tf.cast(unscaled_images, tf.float32) / 255. activ = tf.nn.relu # 8x8 filter size is common on the very 1st conv layer, looking at the input image h = activ( conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2), **conv_kwargs)) h2 = activ( conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs)) h3 = activ( conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) h3 = conv_to_fc(h3) return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
def nature_cnn(scaled_images, **conv_kwargs): """ Model used in the paper "Human-level control through deep reinforcement learning" https://www.nature.com/articles/nature14236 """ def activ(curr): return tf.nn.relu(curr) h = activ( conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2), **conv_kwargs)) h2 = activ( conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs)) h3 = activ( conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) h3 = conv_to_fc(h3) return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
def nature_cnn(unscaled_images, **conv_kwargs): """ CNN from Nature paper. """ scaled_images = tf.cast(unscaled_images, tf.float32) / 255. activ = tf.nn.relu h = activ( conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2), **conv_kwargs)) h2 = activ( conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs)) h3 = activ( conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) h3 = conv_to_fc(h3) return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
def nature_cnn(unscaled_images, **conv_kwargs): """ CNN from Nature paper. """ scaled_images = tf.cast(unscaled_images, tf.float32) / 255. activ = tf.nn.relu # calculate the Rectitied Linear Unit h = activ( conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2), **conv_kwargs)) # np.sqrt() returns the squareroot of every element in the array # init_scale ?? h2 = activ( conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs)) h3 = activ( conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) h3 = conv_to_fc(h3) return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
def cnn(unscaled_images, scope, activ=None, nfeat=None, reuse=False): scaled_images = tf.cast(unscaled_images, tf.float32) / 255. activ = activ or tf.nn.leaky_relu nfeat = nfeat or 512 h = activ( conv(scaled_images, scope + '_conv1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2), reuse=reuse)) h2 = activ( conv(h, scope + '_conv2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2), reuse=reuse)) h3 = activ( conv(h2, scope + '_conv3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), reuse=reuse)) h3 = conv_to_fc(h3) return fc(h3, scope + '_conv_to_fc', nh=nfeat, init_scale=np.sqrt(2), reuse=reuse)
def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, nlstm=256, reuse=False, scope_name="model"): nenv = nbatch // nsteps nh, nw, nc = ob_space.shape ob_shape = (nbatch, nh, nw, nc) nact = ac_space.n X = tf.placeholder(tf.uint8, ob_shape) #obs M = tf.placeholder(tf.float32, [nbatch]) #mask (done t-1) S = tf.placeholder(tf.float32, [nenv, nlstm * 2]) #states with tf.variable_scope(scope_name, reuse=reuse): h = conv(tf.cast(X, tf.float32) / 255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)) h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)) h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2)) h3 = conv_to_fc(h3) h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)) xs = batch_to_seq(h4, nenv, nsteps) ms = batch_to_seq(M, nenv, nsteps) h5, snew = lstm(xs, ms, S, 'lstm1', nh=nlstm) h5 = seq_to_batch(h5) pi = fc(h5, 'pi', nact, act=lambda x: x) vf = fc(h5, 'v', 1, act=lambda x: x) self.pdtype = make_pdtype(ac_space) self.pd = self.pdtype.pdfromflat(pi) v0 = vf[:, 0] a0 = self.pd.sample() neglogp0 = self.pd.neglogp(a0) self.initial_state = np.zeros((nenv, nlstm * 2), dtype=np.float32) def step(ob, state, mask): return sess.run([a0, v0, snew, neglogp0], { X: ob, S: state, M: mask }) def value(ob, state, mask): return sess.run(v0, {X: ob, S: state, M: mask}) self.X = X self.M = M self.S = S self.pi = pi self.vf = vf self.step = step self.value = value
def __init__(self, params, ob_space, ac_space, nbatch, nsteps): #pylint: disable=W0613 nenv = nbatch // nsteps ob_shape = (nbatch, ) + ob_space.shape nact = ac_space.n X = tf.compat.v1.placeholder(tf.float32, ob_shape, name='Ob') #obs with tf.name_scope('policy_new'): activ = tf.compat.v1.nn.relu h1 = activ( tf.compat.v1.nn.conv2d(X / 255.0, params['policy/c1/w:0'], [1, 4, 4, 1], 'VALID') + params['policy/c1/b:0']) h2 = activ( tf.compat.v1.nn.conv2d(h1, params['policy/c2/w:0'], [1, 2, 2, 1], 'VALID') + params['policy/c2/b:0']) h3 = activ( tf.compat.v1.nn.conv2d(h2, params['policy/c3/w:0'], [1, 1, 1, 1], 'VALID') + params['policy/c3/b:0']) h3 = conv_to_fc(h3) h4 = activ( tf.compat.v1.nn.xw_plus_b(h3, params['policy/fc1/w:0'], params['policy/fc1/b:0'])) pi = tf.compat.v1.nn.xw_plus_b(h4, params['policy/pi/w:0'], params['policy/pi/b:0']) self.pdtype = make_pdtype(ac_space) self.pd = self.pdtype.pdfromflat(pi) self.X = X
def __call__(self, obs, reuse=False): with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE): assert self.network == 'cloth_cnn', self.network # -------------------------------------------------------------- # To make this a little easier, we're just going to explicitly # design our net. ASSUME WE ALREADY DIVIDE BY 255 IN DDPG.PY, so # obs uses tf.float32. This design is up to debate. Similar to Jan # Matas' network, which I think is fair, but we give an option to # pretrain from ResNet. In either case, get a TENSOR as input. # -------------------------------------------------------------- h = obs activ = tf.nn.tanh if self.use_keras: # The `h`, before and after `h = conv_to_fc(h)`, is: # Tensor("obs0_f_imgs:0", shape=(?, 7, 7, 2048), dtype=float32) # Tensor("actor/Reshape:0", shape=(?, 100352), dtype=float32) h = conv_to_fc(h) h = activ(tf.layers.dense(h, 256, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3), name='fc1')) h = activ(tf.layers.dense(h, 256, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3), name='fc2')) h = activ(tf.layers.dense(h, 256, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3), name='fc3')) h = tf.layers.dense(h, self.nb_actions, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3), name='fc4') x = tf.nn.tanh(h) # restrict actions else: # Here's what the tensors looks like: # Tensor("truediv:0", shape=(?, 100, 100, 3), dtype=float32) # Tensor("actor/convnet/Tanh:0", shape=(?, 49, 49, 32), dtype=float32) # Tensor("actor/convnet/Tanh_1:0", shape=(?, 24, 24, 32), dtype=float32) # Tensor("actor/convnet/Tanh_2:0", shape=(?, 22, 22, 32), dtype=float32) # Tensor("actor/convnet/Tanh_3:0", shape=(?, 20, 20, 32), dtype=float32) # Tensor("actor/convnet/Reshape:0", shape=(?, 12800), dtype=float32) # Tensor("actor/fcnet/Tanh:0", shape=(?, 256), dtype=float32) # Tensor("actor/fcnet/Tanh_1:0", shape=(?, 256), dtype=float32) # Tensor("actor/fcnet/Tanh_2:0", shape=(?, 256), dtype=float32) # Tensor("actor/fcnet/fc4/BiasAdd:0", shape=(?, 4), dtype=float32) with tf.variable_scope("convnet"): h = activ(conv(h, 'c1', nf=32, rf=3, stride=2, init_scale=np.sqrt(2))) h = activ(conv(h, 'c2', nf=32, rf=3, stride=2, init_scale=np.sqrt(2))) h = activ(conv(h, 'c3', nf=32, rf=3, stride=1, init_scale=np.sqrt(2))) h = activ(conv(h, 'c4', nf=32, rf=3, stride=1, init_scale=np.sqrt(2))) h = conv_to_fc(h) with tf.variable_scope("fcnet"): h = activ(tf.layers.dense(h, 256, name='fc1')) h = activ(tf.layers.dense(h, 256, name='fc2')) h = activ(tf.layers.dense(h, 256, name='fc3')) h = tf.layers.dense(h, self.nb_actions, name='fc4') x = tf.nn.tanh(h) # restrict actions return x
def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, reuse=False, is_discrete=False): #pylint: disable=W0613 if isinstance(ac_space, gym.spaces.Discrete): self.is_discrete = True else: self.is_discrete = False print("nbatch%d" % (nbatch)) nh, nw, nc = ob_space.shape ob_shape = (nbatch, nh, nw, nc) if self.is_discrete: nact = ac_space.n else: nact = ac_space.shape[0] X = tf.placeholder(tf.float32, ob_shape) #obs with tf.variable_scope("model", reuse=reuse): h = conv(X, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)) h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)) h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2)) h3 = conv_to_fc(h3) h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)) h5 = fc(h3, 'fc_vf', nh=512, init_scale=np.sqrt(2)) pi = fc(h4, 'pi', nact, init_scale=0.05) vf = fc(h5, 'v', 1, act=lambda x: x)[:, 0] if not self.is_discrete: logstd = tf.get_variable(name="logstd", shape=[1, nact], initializer=tf.zeros_initializer()) self.pdtype = make_pdtype(ac_space) if self.is_discrete: self.pd = self.pdtype.pdfromflat(pi) a0 = self.pd.sample() else: pdparam = tf.concat([pi, pi * 0.0 + logstd], axis=1) self.pd = self.pdtype.pdfromflat(pdparam) a0 = self.pd.sample() neglogp0 = self.pd.neglogp(a0) self.initial_state = None def step(ob, *_args, **_kwargs): a, v, neglogp = sess.run([a0, vf, neglogp0], {X: ob.astype(np.float32) / 255.0}) assert (a.shape[0] == 1) # make sure a = a[0] don't throw away actions a = a[0] print(a, v, neglogp) return a, v, self.initial_state, neglogp def value(ob, *_args, **_kwargs): return sess.run(vf, {X: ob}) self.X = X self.pi = pi self.vf = vf self.step = step self.value = value
def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, reuse=False): #pylint: disable=W0613 if isinstance(ac_space, gym.spaces.Discrete): self.is_discrete = True else: self.is_discrete = False ob_shape = (nbatch, ) + ob_space.shape if self.is_discrete: actdim = ac_space.n else: actdim = ac_space.shape[0] X = tf.placeholder(tf.float32, ob_shape, name='Ob') #obs with tf.variable_scope("model", reuse=reuse): h_c = conv(X, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)) h2_c = conv(h_c, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)) h2_c = conv_to_fc(h_c) h1 = fc(h2_c, 'pi_fc1', nh=64, init_scale=np.sqrt(2), act=tf.tanh) h2 = fc(h1, 'pi_fc2', nh=64, init_scale=np.sqrt(2), act=tf.tanh) pi = fc(h2, 'pi', actdim, init_scale=0.01) h1 = fc(h2_c, 'vf_fc1', nh=64, init_scale=np.sqrt(2), act=tf.tanh) h2 = fc(h1, 'vf_fc2', nh=64, init_scale=np.sqrt(2), act=tf.tanh) vf = fc(h2, 'vf', 1)[:, 0] logstd = tf.get_variable(name="logstd", shape=[1, actdim], initializer=tf.zeros_initializer()) pdparam = tf.concat([pi, pi * 0.0 + logstd], axis=1) self.pdtype = make_pdtype(ac_space) if self.is_discrete: self.pd = self.pdtype.pdfromflat(pi) a0 = self.pd.sample() else: self.pd = self.pdtype.pdfromflat(pdparam) a0 = self.pd.sample() neglogp0 = self.pd.neglogp(a0) self.initial_state = None def step(ob, *_args, **_kwargs): a, v, neglogp = sess.run([a0, vf, neglogp0], {X: ob.astype(np.float32) / 255.0}) a = a[0] return a, v, self.initial_state, neglogp def value(ob, *_args, **_kwargs): return sess.run(vf, {X: ob}) self.X = X self.pi = pi self.vf = vf self.step = step self.value = value
def vgg19_cnn(scaled_images, **conv_kwargs): activ = tf.nn.relu #stage 1 c1_1 = activ(conv(scaled_images, 'c1_1', nf=16, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) c1_2 = activ(conv(c1_1, 'c1_2', nf=16, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) p1 = tf.nn.max_pool(c1_2, [1, 2, 2, 1], [1, 1, 1, 1], padding='SAME', name='p1') #stage 2 c2_1 = activ(conv(p1, 'c2_1', nf=32, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) c2_2 = activ(conv(c2_1, 'c2_2', nf=32, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) p2 = tf.nn.max_pool(c2_2, [1, 2, 2, 1], [1, 1, 1, 1], padding='SAME', name='p2') #stage 3 c3_1 = activ(conv(p2, 'c3_1', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) c3_2 = activ(conv(c3_1, 'c3_2', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) c3_3 = activ(conv(c3_2, 'c3_3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) c3_4 = activ(conv(c3_3, 'c3_4', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) p3 = tf.nn.max_pool(c3_4, [1, 2, 2, 1], [1, 1, 1, 1], padding='SAME', name='p3') #stage 4 c4_1 = activ(conv(p3, 'c4_1', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) c4_2 = activ(conv(c4_1, 'c4_2', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) c4_3 = activ(conv(c4_2, 'c4_3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) c4_4 = activ(conv(c4_3, 'c4_4', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) p4 = tf.nn.max_pool(c4_4, [1, 2, 2, 1], [1, 1, 1, 1], padding='SAME', name='p4') #stage 5 c5_1 = activ(conv(p4, 'c5_1', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) c5_2 =activ(conv(c5_1, 'c5_2', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) c5_3 = activ(conv(c5_2, 'c5_3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) c5_4 = activ(conv(c5_3, 'c5_4', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) p5 = tf.nn.max_pool(c5_4, [1, 2, 2, 1], [1, 1, 1, 1], padding='SAME', name='p5') print(np.shape(p5)) h1 = conv_to_fc(p5) print(np.shape(h1)) # h1 = activ(fc(h1, 'fc1', nh=256, init_scale=np.sqrt(2))) # h2 = activ(fc(h1, 'fc2', nh=128, init_scale=np.sqrt(2))) # h3 = activ(fc(h2, 'fc2', nh=128, init_scale=np.sqrt(2))) h1 = activ(fully_connected(h1, weight_variable([409600, 1024]), bias_variable([256]))) h2 = activ(fully_connected(h1, weight_variable([1024, 256]), bias_variable([128]))) print(np.shape(h2)) return h2
def network_fn(X, mode="pi"): filtered_conv_kwargs = {} def filter_kwargs(k): if k in conv_kwargs.keys(): filtered_conv_kwargs[k] = conv_kwargs[k] filter_kwargs("pad") filter_kwargs("data_format") filter_kwargs("one_dim_bias") scaled_images = tf.cast(X, tf.float32) / 255. activ = tf.nn.relu bn = tf.contrib.layers.batch_norm drp = tf.nn.dropout def addbndrp(h): if (batchnormpi and mode == "pi") or (batchnormvf and mode == "vf"): h = bn(h, center=True, scale=True, is_training=isbnpitrainmode if mode == "pi" else isbnvftrainmode, updates_collections=None) h = activ(h) if (dropoutpi < 1.0 and mode == "pi"): h = drp(h, keep_prob=dropoutpi_keep_prob) if (dropoutvf < 1.0 and mode == "vf"): h = drp(h, keep_prob=dropoutvf_keep_prob) return h h = addbndrp( conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2), **filtered_conv_kwargs)) h2 = addbndrp( conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2), **filtered_conv_kwargs)) h3 = addbndrp( conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **filtered_conv_kwargs)) h3 = conv_to_fc(h3) return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
def network_fn(X): h = conv(X, 'c1', nf=16, rf=8, stride=4, pad="VALID") h = tf.nn.relu(h) h = conv(h, 'c2', nf=32, rf=4, stride=2, pad="VALID") h = tf.nn.relu(h) h = conv_to_fc(h) h = tf.nn.relu(fc(h, 'fc1', nh=30)) h = fc(h, 'fc3', nh=net_kwargs["nactions"]) return h
def network_fn(X): h = tf.nn.relu(conv(X, 'c1', nf=64, rf=3, stride=2, pad="VALID")) h = tf.nn.relu(conv(h, 'c2', nf=64, rf=3, stride=2, pad="VALID")) h = conv_to_fc(h) h = tf.nn.relu(fc(h, 'fc1', nh=128)) h = tf.nn.relu(fc(h, 'fc2', nh=64)) h = tf.nn.relu(fc(h, 'fc4', nh=32)) h = fc(h, 'fc5', nh=net_kwargs["nactions"]) return h
def nature_cnn(unscaled_images): """ CNN from Nature paper. """ scaled_images = tf.cast(unscaled_images, tf.float32) / 255. activ = tf.nn.relu h = activ(conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2))) h2 = activ(conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))) h3 = activ(conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2))) h3 = conv_to_fc(h3) return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
def nature_cnn(unscaled_images): """Convolutional parts of CNN from Nature paper Taken from baselines.ppo2.policies.py Parameters ------- unscaled_images : tensorflow tensor Frame of shape (batchsize, x, y, c) Returns ------- tensorflow tensor Output features of last convolutional layer with flattened x/y/c dimensions """ scaled_images = tf.cast(unscaled_images, tf.float32) / 255. activ = tf.nn.relu h = activ(conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2))) h2 = activ(conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))) h3 = activ(conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2))) h3 = conv_to_fc(h3) return h3
def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, nlstm=256, reuse=False): nbatch = nenv*nsteps nh, nw, nc = ob_space.shape ob_shape = (nbatch, nh, nw, nc*nstack) nact = ac_space.n X = tf.placeholder(tf.uint8, ob_shape) #obs M = tf.placeholder(tf.float32, [nbatch]) #mask (done t-1) S = tf.placeholder(tf.float32, [nenv, nlstm*2]) #states with tf.variable_scope("model", reuse=reuse): h = conv(tf.cast(X, tf.float32)/255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)) h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)) h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2)) h3 = conv_to_fc(h3) h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)) xs = batch_to_seq(h4, nenv, nsteps) ms = batch_to_seq(M, nenv, nsteps) h5, snew = lnlstm(xs, ms, S, 'lstm1', nh=nlstm) h5 = seq_to_batch(h5) pi = fc(h5, 'pi', nact, act=lambda x:x) vf = fc(h5, 'v', 1, act=lambda x:x) v0 = vf[:, 0] a0 = sample(pi) self.initial_state = np.zeros((nenv, nlstm*2), dtype=np.float32) def step(ob, state, mask): a, v, s = sess.run([a0, v0, snew], {X:ob, S:state, M:mask}) return a, v, s def value(ob, state, mask): return sess.run(v0, {X:ob, S:state, M:mask}) self.X = X self.M = M self.S = S self.pi = pi self.vf = vf self.step = step self.value = value
def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, reuse=False): nbatch = nenv * nsteps nh, nw, nc = (32, 32, 3) ob_shape = (nbatch, nh, nw, nc * nstack) nact = 3 # 524 # nsub3 = 2 # nsub4 = 5 # nsub5 = 10 # nsub6 = 4 # nsub7 = 2 # nsub8 = 4 # nsub9 = 500 # nsub10 = 4 # nsub11 = 10 # nsub12 = 500 # (64, 64, 13) # 80 * 24 X = tf.placeholder(tf.uint8, ob_shape) #obs with tf.variable_scope("model", reuse=reuse): with tf.variable_scope("common", reuse=reuse): h = conv( tf.cast(X, tf.float32), 'c1', nf=32, rf=5, stride=1, init_scale=np.sqrt(2), pad="SAME") # ?, 32, 32, 16 h2 = conv( h, 'c2', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), pad="SAME") # ?, 32, 32, 32 with tf.variable_scope("pi1", reuse=reuse): h3 = conv_to_fc(h2) # 131072 h4 = fc(h3, 'fc1', nh=256, init_scale=np.sqrt(2)) # ?, 256 pi_ = fc( h4, 'pi', nact) # ( nenv * nsteps, 524) # ?, 524 pi = tf.nn.softmax(pi_) vf = fc( h4, 'v', 1) # ( nenv * nsteps, 1) # ?, 1 # vf = tf.nn.l2_normalize(vf_, 1) with tf.variable_scope("xy0", reuse=reuse): # 1 x 1 convolution for dimensionality reduction pi_xy0_ = conv( h2, 'xy0', nf=1, rf=1, stride=1, init_scale=np.sqrt(2)) # (? nenv * nsteps, 32, 32, 1) pi_xy0__ = conv_to_fc(pi_xy0_) # 32 x 32 => 1024 pi_xy0 = tf.nn.softmax(pi_xy0__) with tf.variable_scope("xy1", reuse=reuse): pi_xy1_ = conv( h2, 'xy1', nf=1, rf=1, stride=1, init_scale=np.sqrt(2)) # (? nenv * nsteps, 32, 32, 1) pi_xy1__ = conv_to_fc(pi_xy1_) # 32 x 32 => 1024 pi_xy1 = tf.nn.softmax(pi_xy1__) v0 = vf[:, 0] a0 = sample(pi) self.initial_state = [] #not stateful def step(ob, *_args, **_kwargs): #obs, states, rewards, masks, actions, actions2, x1, y1, x2, y2, values _pi1, _xy0, _xy1, _v = sess.run([pi, pi_xy0, pi_xy1, v0], {X: ob}) return _pi1, _xy0, _xy1, _v, [] #dummy state def value(ob, *_args, **_kwargs): return sess.run(v0, {X: ob}) self.X = X self.pi = pi # self.pi_sub3 = pi_sub3 # self.pi_sub4 = pi_sub4 # self.pi_sub5 = pi_sub5 # self.pi_sub6 = pi_sub6 # self.pi_sub7 = pi_sub7 # self.pi_sub8 = pi_sub8 # self.pi_sub9 = pi_sub9 # self.pi_sub10 = pi_sub10 # self.pi_sub11 = pi_sub11 # self.pi_sub12 = pi_sub12 self.pi_xy0 = pi_xy0 self.pi_xy1 = pi_xy1 # self.pi_y0 = pi_y0 # self.pi_x1 = pi_x1 # self.pi_y1 = pi_y1 # self.pi_x2 = pi_x2 # self.pi_y2 = pi_y2 self.vf = vf self.step = step self.value = value