def __init__(self, sess, obvs, ac_space, noise_size, batch_size, nstack, v_envs, nenvs, reuse=False): nact = ac_space.n noise = tf.placeholder(tf.float32, [None, noise_size], name='noise') with tf.variable_scope("model", reuse=reuse): h = conv(tf.cast(obvs, tf.float32)/255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)) h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)) h3 = conv(h2, 'c3', nf=32, rf=3, stride=1, init_scale=np.sqrt(2)) h3 = conv_to_fc(h3) generator_input = tf.concat([h3, noise, v_envs], axis=1) h4 = fc(generator_input, 'fc1', nh=512, init_scale=np.sqrt(2)) pi = fc(h4, 'pi', nact, act=lambda x:x) vf = fc(h4, 'v', 1, act=lambda x:x) v0 = vf[:, 0] a0 = sample(pi) self.initial_state = [] #not stateful def step(feed_dict, *_args, **_kwargs): a, v = sess.run([a0, v0], feed_dict) return a, v, [] #dummy state self.pi = pi self.vf = vf self.noise = noise self.step = step
def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, reuse=False): nbatch = nenv * nsteps nh, nw, nc = ob_space.shape ob_shape = (nbatch, nh, nw, nc * nstack) nact = ac_space.n X = tf.placeholder(tf.uint8, ob_shape) # obs with tf.variable_scope("model", reuse=reuse): h = conv(tf.cast(X, tf.float32) / 255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)) h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)) h3 = conv(h2, 'c3', nf=32, rf=3, stride=1, init_scale=np.sqrt(2)) h3 = conv_to_fc(h3) h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)) pi = fc(h4, 'pi', nact, act=lambda x: x) vf = fc(h4, 'v', 1, act=lambda x: x) v0 = vf[:, 0] a0 = sample(pi) self.initial_state = [] # not stateful def step(ob, *_args, **_kwargs): a, v = sess.run([a0, v0], {X: ob}) return a, v, [] # dummy state def value(ob, *_args, **_kwargs): return sess.run(v0, {X: ob}) self.X = X self.pi = pi self.vf = vf self.step = step self.value = value
def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, reuse=False): nbatch = nenv*nsteps nh, nw, nc = ob_space.shape ob_shape = (nbatch, nh, nw, nc*nstack) nact = ac_space.n X = tf.placeholder(tf.uint8, ob_shape) #obs with tf.variable_scope("model", reuse=reuse): h = conv(tf.cast(X, tf.float32)/255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)) h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)) h3 = conv(h2, 'c3', nf=32, rf=3, stride=1, init_scale=np.sqrt(2)) h3 = conv_to_fc(h3) h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)) pi = fc(h4, 'pi', nact, act=lambda x:x) vf = fc(h4, 'v', 1, act=lambda x:x) v0 = vf[:, 0] a0 = sample(pi) self.initial_state = [] #not stateful def step(ob, *_args, **_kwargs): a, v = sess.run([a0, v0], {X:ob}) return a, v, [] #dummy state def value(ob, *_args, **_kwargs): return sess.run(v0, {X:ob}) self.X = X self.pi = pi self.vf = vf self.step = step self.value = value
def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, nlstm=128, reuse=False): scope = "model" nbatch = nenv*nsteps nh, nw, nc = ob_space.shape ob_shape = (nbatch, nh, nw, nc*nstack) nact = ac_space.n X = tf.placeholder(tf.uint8, ob_shape, name="observations") #obs M = tf.placeholder(tf.float32, [nbatch], name="mask") #mask (done t-1) S = tf.placeholder(tf.float32, [nenv, nlstm*2], name="states") #states with tf.variable_scope(scope, reuse=reuse): h = conv(tf.cast(X, tf.float32)/255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)) h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)) h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2)) h3 = conv_to_fc(h3) h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)) xs = batch_to_seq(h4, nenv, nsteps) ms = batch_to_seq(M, nenv, nsteps) h5, snew = lstm(xs, ms, S, 'lstm1', nh=nlstm) h5 = seq_to_batch(h5) pi = fc(h5, 'pi', nact, act=lambda x:x) vf = fc(h5, 'v', 1, act=lambda x:x) trainable_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope) self._saver = tf.train.Saver(trainable_vars) v0 = vf[:, 0] a0 = sample(pi) self.initial_state = np.zeros((nenv, nlstm*2), dtype=np.float32) def step(ob, state, mask): a, v, s = sess.run([a0, v0, snew], {X:ob, S:state, M:mask}) return a, v, s def value(ob, state, mask): return sess.run(v0, {X:ob, S:state, M:mask}) def save(path, name): try: os.makedirs(path) except FileExistsError: pass self._saver.save(sess, path+name) def load(path, name): if os.path.exists(path+name+'.index'): self._saver.restore(sess, path+name) else: tf.logging.warn('Failed restoring vars from %s' % path) self.X = X self.pi = pi self.vf = vf self.step = step self.value = value self.save = save self.load = load
def nature_cnn(unscaled_images): """ CNN from Nature paper. """ scaled_images = tf.cast(unscaled_images, tf.float32) / 255. activ = tf.nn.relu h = activ( conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2))) h2 = activ(conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))) h3 = activ(conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2))) h3 = conv_to_fc(h3) return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, reuse=False): scope = "model" nbatch = nenv*nsteps nh, nw, nc = ob_space.shape ob_shape = (nbatch, nh, nw, nc*nstack) nact = ac_space.n X = tf.placeholder(tf.uint8, ob_shape) #obs with tf.variable_scope(scope, reuse=reuse): h = conv(tf.cast(X, tf.float32)/255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)) h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)) h3 = conv(h2, 'c3', nf=32, rf=3, stride=1, init_scale=np.sqrt(2)) h3 = conv_to_fc(h3) h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)) pi = fc(h4, 'pi', nact, act=lambda x:x) vf = fc(h4, 'v', 1, act=lambda x:x) trainable_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope) self._saver = tf.train.Saver(trainable_vars) v0 = vf[:, 0] a0 = sample(pi) self.initial_state = [] #not stateful def step(ob, *_args, **_kwargs): a, v = sess.run([a0, v0], {X:ob}) return a, v, [] #dummy state def value(ob, *_args, **_kwargs): return sess.run(v0, {X:ob}) def save(path, name): os.makedirs(path, exist_ok=True) self._saver.save(sess, path+name) def load(path, name): if os.path.exists(path+name+'.index'): self._saver.restore(sess, path+name) else: tf.logging.warn('Failed restoring vars from %s' % path) self.X = X self.pi = pi self.vf = vf self.step = step self.value = value self.save = save self.load = load
def __init__(self, sess, obvs, action_pi, batch_size, nstack, v_envs, nenvs, reuse=False): with tf.variable_scope("model", reuse=reuse): # Build a feature detection layer with the convolutional part h = conv(tf.cast(obvs, tf.float32)/255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)) h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)) h3 = conv(h2, 'c3', nf=32, rf=3, stride=1, init_scale=np.sqrt(2)) h3 = conv_to_fc(h3) # Concatenate the features with the input action discriminator_input = tf.concat([h3, action_pi, v_envs], axis=1) h4 = fc(discriminator_input, 'fc1', nh=512, init_scale=np.sqrt(2)) discriminator_decision = fc(h4, 'decision', 1, act=lambda x:x) self.initial_state = [] #not stateful - ??? self.discriminator_decision = discriminator_decision # Could also have a CNN feature detector that feeds into both a generator and a discriminator
def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, reuse=False): nbatch = nenv * nsteps nh, nw, nc = (32, 32, 3) ob_shape = (nbatch, nh, nw, nc * nstack) nact = 2 # 524 # nsub3 = 2 # nsub4 = 5 # nsub5 = 10 # nsub6 = 4 # nsub7 = 2 # nsub8 = 4 # nsub9 = 500 # nsub10 = 4 # nsub11 = 10 # nsub12 = 500 # (64, 64, 13) # 80 * 24 X = tf.placeholder(tf.uint8, ob_shape) #obs with tf.variable_scope("model", reuse=reuse): with tf.variable_scope("common", reuse=reuse): h = conv(tf.cast(X, tf.float32), 'c1', nf=16, rf=5, stride=1, init_scale=np.sqrt(2), pad="SAME") # ?, 32, 32, 16 h2 = conv(h, 'c2', nf=32, rf=3, stride=1, init_scale=np.sqrt(2), pad="SAME") # ?, 32, 32, 32 with tf.variable_scope("pi1", reuse=reuse): h3 = conv_to_fc(h2) # 131072 h4 = fc(h3, 'fc1', nh=256, init_scale=np.sqrt(2)) # ?, 256 pi_ = fc(h4, 'pi', nact, act=lambda x: x) # ( nenv * nsteps, 524) # ?, 524 pi = tf.nn.softmax(pi_) vf = fc(h4, 'v', 1, act=lambda x: x) # ( nenv * nsteps, 1) # ?, 1 # with tf.variable_scope("sub3", reuse=reuse): # pi_sub3 = fc(h4, 'pi_sub3', nsub3, act=lambda x:x) # ( nenv * nsteps, 2) # ?, 2 # with tf.variable_scope("sub4", reuse=reuse): # pi_sub4 = fc(h4, 'pi_sub4', nsub4, act=lambda x:x) # ( nenv * nsteps, 5) # ?, 5 # with tf.variable_scope("sub5", reuse=reuse): # pi_sub5 = fc(h4, 'pi_sub5', nsub5, act=lambda x:x) # ( nenv * nsteps, 10) # ?, 10 # with tf.variable_scope("sub6", reuse=reuse): # pi_sub6 = fc(h4, 'pi_sub6', nsub6, act=lambda x:x) # ( nenv * nsteps, 4) # ?, 4 # with tf.variable_scope("sub7", reuse=reuse): # pi_sub7_ = fc(pi, 'pi_sub7', nsub7, act=lambda x:x) # ( nenv * nsteps, 2) # ?, 2 # pi_sub7 = tf.nn.l2_normalize(pi_sub7_, 1) # with tf.variable_scope("sub8", reuse=reuse): # pi_sub8_ = fc(pi, 'pi_sub8', nsub8, act=lambda x:x) # ( nenv * nsteps, 4) # ?, 4 # pi_sub8 = tf.nn.l2_normalize(pi_sub8_, 1) # with tf.variable_scope("sub9", reuse=reuse): # pi_sub9_ = fc(pi, 'pi_sub9', nsub9, act=lambda x:x) # ( nenv * nsteps, 500) # ?, 500 # pi_sub9 = tf.nn.l2_normalize(pi_sub9_, 1) # with tf.variable_scope("sub10", reuse=reuse): # pi_sub10_ = fc(pi, 'pi_sub10', nsub10, act=lambda x:x) # ( nenv * nsteps, 4) # ?, 4 # pi_sub10 = tf.nn.l2_normalize(pi_sub10_, 1) # with tf.variable_scope("sub11", reuse=reuse): # pi_sub11_ = fc(pi, 'pi_sub11', nsub11, act=lambda x:x) # ( nenv * nsteps, 10) # ?, 10 # pi_sub11 = tf.nn.l2_normalize(pi_sub11_, 1) # with tf.variable_scope("sub12", reuse=reuse): # pi_sub12_ = fc(pi, 'pi_sub12', nsub12, act=lambda x:x) # ( nenv * nsteps, 500) # ?, 500 # pi_sub12 = tf.nn.l2_normalize(pi_sub12_, 1) # vf = tf.nn.l2_normalize(vf_, 1) with tf.variable_scope("xy0", reuse=reuse): # 1 x 1 convolution for dimensionality reduction pi_xy0_ = conv( h2, 'xy0', nf=1, rf=1, stride=1, init_scale=np.sqrt(2)) # (? nenv * nsteps, 32, 32, 1) pi_xy0__ = conv_to_fc(pi_xy0_) # 32 x 32 => 1024 pi_xy0 = tf.nn.softmax(pi_xy0__) with tf.variable_scope("xy1", reuse=reuse): pi_xy1_ = conv( h2, 'xy1', nf=1, rf=1, stride=1, init_scale=np.sqrt(2)) # (? nenv * nsteps, 32, 32, 1) pi_xy1__ = conv_to_fc(pi_xy1_) # 32 x 32 => 1024 pi_xy1 = tf.nn.softmax(pi_xy1__) v0 = vf[:, 0] a0 = sample(pi) self.initial_state = [] #not stateful def step(ob, *_args, **_kwargs): #obs, states, rewards, masks, actions, actions2, x1, y1, x2, y2, values _pi1, _xy0, _xy1, _v = sess.run([pi, pi_xy0, pi_xy1, v0], {X: ob}) return _pi1, _xy0, _xy1, _v, [] #dummy state def value(ob, *_args, **_kwargs): return sess.run(v0, {X: ob}) self.X = X self.pi = pi # self.pi_sub3 = pi_sub3 # self.pi_sub4 = pi_sub4 # self.pi_sub5 = pi_sub5 # self.pi_sub6 = pi_sub6 # self.pi_sub7 = pi_sub7 # self.pi_sub8 = pi_sub8 # self.pi_sub9 = pi_sub9 # self.pi_sub10 = pi_sub10 # self.pi_sub11 = pi_sub11 # self.pi_sub12 = pi_sub12 self.pi_xy0 = pi_xy0 self.pi_xy1 = pi_xy1 # self.pi_y0 = pi_y0 # self.pi_x1 = pi_x1 # self.pi_y1 = pi_y1 # self.pi_x2 = pi_x2 # self.pi_y2 = pi_y2 self.vf = vf self.step = step self.value = value
def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, reuse=False): nbatch = nenv * nsteps nh, nw, nc = (64, 64, 13) ob_shape = (nbatch, nc * nstack, nh, nw) nact = 524 nsub1 = 2 nsub2 = 10 nsub3 = 500 X = tf.placeholder(tf.uint8, ob_shape) #obs with tf.variable_scope("model", reuse=reuse): h = conv(tf.cast(X, tf.float32), 'c1', nf=16, rf=5, stride=2, init_scale=np.sqrt(2), pad="SAME") # 2, 16, 16, 16 h2 = conv(h, 'c2', nf=32, rf=3, stride=1, init_scale=np.sqrt(2), pad="SAME") # 2, 16, 16, 32 h3 = conv_to_fc(h2) # 8192 h4 = fc(h3, 'fc1', nh=256, init_scale=np.sqrt(2)) # 2, 256 pi = fc(h4, 'pi', nact, act=lambda x: x) # ( nenv * nsteps, 524) # 2, 524 pi_sub1 = fc(h4, 'pi_sub1', nsub1, act=lambda x: x) # ( nenv * nsteps, 500) # 2, 2 pi_sub2 = fc(h4, 'pi_sub2', nsub2, act=lambda x: x) # ( nenv * nsteps, 500) # 2, 10 pi_sub3 = fc(h4, 'pi_sub3', nsub3, act=lambda x: x) # ( nenv * nsteps, 500) # 2, 500 vf = fc(h4, 'v', 1, act=lambda x: x) # ( nenv * nsteps, 1) # 2, 1 # 1 x 1 convolution for dimensionality reduction xy1 = conv(h2, 'xy1', nf=1, rf=1, stride=1, init_scale=np.sqrt(2)) # (? nenv * nsteps, 64, 64, 1) pi_x1 = xy1[:, :, 0, 0] x1 = sample(pi_x1) pi_y1 = xy1[:, 0, :, 0] y1 = sample(pi_y1) xy2 = conv(h2, 'xy2', nf=1, rf=1, stride=1, init_scale=np.sqrt(2)) # (? nenv * nsteps, 64, 64, 1) pi_x2 = xy2[:, :, 0, 0] x2 = sample(pi_x2) pi_y2 = xy2[:, 0, :, 0] y2 = sample(pi_y2) v0 = vf[:, 0] a0 = sample(pi) self.initial_state = [] #not stateful def step(ob, *_args, **_kwargs): #obs, states, rewards, masks, actions, actions2, x1, y1, x2, y2, values _pi1, _pi_sub1, _pi_sub2, _pi_sub3, _x1, _y1, _x2, _y2, _v = sess.run( [pi, pi_sub1, pi_sub2, pi_sub3, x1, y1, x2, y2, v0], {X: ob}) return _pi1, _pi_sub1, _pi_sub2, _pi_sub3, _x1, _y1, _x2, _y2, _v, [ ] #dummy state def value(ob, *_args, **_kwargs): return sess.run(v0, {X: ob}) self.X = X self.pi = pi self.pi_sub1 = pi_sub1 self.pi_sub2 = pi_sub2 self.pi_sub3 = pi_sub3 self.pi_x1 = pi_x1 self.pi_y1 = pi_y1 self.pi_x2 = pi_x2 self.pi_y2 = pi_y2 self.vf = vf self.step = step self.value = value
def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, reuse=False): nbatch = nenv * nsteps nh, nw, nc = (64, 64, 1) ob_shape = (nbatch, nh, nw, nc * nstack) nact = 524 nsub3 = 2 nsub4 = 5 nsub5 = 10 nsub6 = 4 nsub7 = 2 nsub8 = 4 nsub9 = 500 nsub10 = 4 nsub11 = 10 nsub12 = 500 X = tf.placeholder(tf.uint8, ob_shape) #obs with tf.variable_scope("model", reuse=reuse): with tf.variable_scope("common", reuse=reuse): h = conv(tf.cast(X, tf.float32), 'c1', nf=16, rf=5, stride=2, init_scale=np.sqrt(2), pad="SAME") # ?, 64, 64, 16 h2 = conv(h, 'c2', nf=32, rf=3, stride=1, init_scale=np.sqrt(2), pad="SAME") # ?, 64, 64, 32 with tf.variable_scope("pi1", reuse=reuse): h3 = conv_to_fc(h2) # 131072 h4 = fc(h3, 'fc1', nh=256, init_scale=np.sqrt(2)) # ?, 256 pi = fc(h4, 'pi', nact, act=lambda x: x) # ( nenv * nsteps, 524) # ?, 524 with tf.variable_scope("sub3", reuse=reuse): pi_sub3 = fc(pi, 'pi_sub3', nsub3, act=lambda x: x) # ( nenv * nsteps, 2) # ?, 2 with tf.variable_scope("sub4", reuse=reuse): pi_sub4 = fc(pi, 'pi_sub4', nsub4, act=lambda x: x) # ( nenv * nsteps, 5) # ?, 5 with tf.variable_scope("sub5", reuse=reuse): pi_sub5 = fc(pi, 'pi_sub5', nsub5, act=lambda x: x) # ( nenv * nsteps, 10) # ?, 10 with tf.variable_scope("sub6", reuse=reuse): pi_sub6 = fc(pi, 'pi_sub6', nsub6, act=lambda x: x) # ( nenv * nsteps, 4) # ?, 4 with tf.variable_scope("sub7", reuse=reuse): pi_sub7 = fc(pi, 'pi_sub7', nsub7, act=lambda x: x) # ( nenv * nsteps, 2) # ?, 2 with tf.variable_scope("sub8", reuse=reuse): pi_sub8 = fc(pi, 'pi_sub8', nsub8, act=lambda x: x) # ( nenv * nsteps, 4) # ?, 4 with tf.variable_scope("sub9", reuse=reuse): pi_sub9 = fc(pi, 'pi_sub9', nsub9, act=lambda x: x) # ( nenv * nsteps, 500) # ?, 500 with tf.variable_scope("sub10", reuse=reuse): pi_sub10 = fc(pi, 'pi_sub10', nsub10, act=lambda x: x) # ( nenv * nsteps, 4) # ?, 4 with tf.variable_scope("sub11", reuse=reuse): pi_sub11 = fc(pi, 'pi_sub11', nsub11, act=lambda x: x) # ( nenv * nsteps, 10) # ?, 10 with tf.variable_scope("sub12", reuse=reuse): pi_sub12 = fc( pi, 'pi_sub12', nsub12, act=lambda x: x) # ( nenv * nsteps, 500) # ?, 500 vf = fc(h4, 'v', 1, act=lambda x: x) # ( nenv * nsteps, 1) # ?, 1 with tf.variable_scope("xy0", reuse=reuse): # 1 x 1 convolution for dimensionality reduction xy0 = conv( h2, 'xy0', nf=1, rf=1, stride=2, init_scale=np.sqrt(2)) # (? nenv * nsteps, 64, 64, 1) pi_x0 = xy0[:, :, 0, 0] # ?, 64 x0 = sample(pi_x0) # ?, pi_y0 = xy0[:, 0, :, 0] # ?, 64 y0 = sample(pi_y0) # ?, with tf.variable_scope("xy1", reuse=reuse): xy1 = conv( h2, 'xy1', nf=1, rf=1, stride=1, init_scale=np.sqrt(2)) # (? nenv * nsteps, 64, 64, 1) pi_x1 = xy1[:, :, 0, 0] # ?, 64 x1 = sample(pi_x1) # ?, pi_y1 = xy1[:, 0, :, 0] # ?, 64 y1 = sample(pi_y1) # ?, with tf.variable_scope("xy2", reuse=reuse): xy2 = conv( h2, 'xy2', nf=1, rf=1, stride=1, init_scale=np.sqrt(2)) # (? nenv * nsteps, 64, 64, 1) pi_x2 = xy2[:, :, 0, 0] # ?, 64 x2 = sample(pi_x2) # ?, pi_y2 = xy2[:, 0, :, 0] # ?, 64 y2 = sample(pi_y2) # ?, v0 = vf[:, 0] a0 = sample(pi) self.initial_state = [] #not stateful def step(ob, *_args, **_kwargs): #obs, states, rewards, masks, actions, actions2, x1, y1, x2, y2, values _pi1, _pi_sub3, _pi_sub4, _pi_sub5, _pi_sub6, _pi_sub7, _pi_sub8, _pi_sub9, _pi_sub10, _pi_sub11, _pi_sub12, _x0, _y0, _x1, _y1, _x2, _y2, _v = sess.run( [ pi, pi_sub3, pi_sub4, pi_sub5, pi_sub6, pi_sub7, pi_sub8, pi_sub9, pi_sub10, pi_sub11, pi_sub12, x0, y0, x1, y1, x2, y2, v0 ], {X: ob}) return _pi1, _pi_sub3, _pi_sub4, _pi_sub5, _pi_sub6, _pi_sub7, _pi_sub8, _pi_sub9, _pi_sub10, _pi_sub11, _pi_sub12, _x0, _y0, _x1, _y1, _x2, _y2, _v, [ ] #dummy state def value(ob, *_args, **_kwargs): return sess.run(v0, {X: ob}) self.X = X self.pi = pi self.pi_sub3 = pi_sub3 self.pi_sub4 = pi_sub4 self.pi_sub5 = pi_sub5 self.pi_sub6 = pi_sub6 self.pi_sub7 = pi_sub7 self.pi_sub8 = pi_sub8 self.pi_sub9 = pi_sub9 self.pi_sub10 = pi_sub10 self.pi_sub11 = pi_sub11 self.pi_sub12 = pi_sub12 self.pi_x0 = pi_x0 self.pi_y0 = pi_y0 self.pi_x1 = pi_x1 self.pi_y1 = pi_y1 self.pi_x2 = pi_x2 self.pi_y2 = pi_y2 self.vf = vf self.step = step self.value = value
def __init__(self, sess, obvs, batch_size, nstack, num_Gaussians, reuse=False): with tf.variable_scope("model", reuse=reuse): c_h = conv(tf.cast(obvs, tf.float32) / 255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)) c_h2 = conv(c_h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)) c_h3 = conv(c_h2, 'c3', nf=32, rf=3, stride=1, init_scale=np.sqrt(2)) h3 = conv_to_fc(c_h3) h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)) z_mean = fc(h4, 'z_mean', nh=num_Gaussians, act=lambda x: x) z_log_sigma_sq = fc(h4, 'z_sigma', nh=num_Gaussians, act=lambda x: x) eps = tf.random_normal((batch_size, num_Gaussians), 0, 1, dtype=tf.float32) z_sample = tf.add( z_mean, tf.multiply(tf.sqrt(tf.exp(z_log_sigma_sq)), eps)) g_h = fc(z_sample, 'g_fc1', nh=512, init_scale=np.sqrt(2)) g_h2 = fc(g_h, 'g_fc2', nh=h3.get_shape()[-1], init_scale=np.sqrt(2)) batch_size = tf.shape(g_h2)[0] g_c_h2 = tf.reshape(g_h2, [ batch_size, c_h3.get_shape()[1].value, c_h3.get_shape()[2].value, c_h3.get_shape()[3].value ]) deconv_shape = [ batch_size, c_h2.get_shape()[1].value, c_h2.get_shape()[2].value, c_h2.get_shape()[3].value ] g_c_h3 = deconv(g_c_h2, 'g_c1', nf=64, rf=3, stride=1, output_size=deconv_shape, init_scale=np.sqrt(2)) deconv_shape = [ batch_size, c_h.get_shape()[1].value, c_h.get_shape()[2].value, c_h.get_shape()[3].value ] g_c_h4 = deconv(g_c_h3, 'g_c2', nf=32, rf=4, stride=2, output_size=deconv_shape, init_scale=np.sqrt(2)) deconv_shape = [ batch_size, obvs.get_shape()[1].value, obvs.get_shape()[2].value, obvs.get_shape()[3].value ] reconstruction_mean = deconv(g_c_h4, 'g_c3', nf=4, rf=8, stride=4, output_size=deconv_shape, init_scale=np.sqrt(2), act=tf.nn.sigmoid) self.z_mean = z_mean self.z_log_sigma_sq = z_log_sigma_sq self.z_sample = z_sample self.reconstruction = reconstruction_mean