Exemple #1
0
    def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, reuse=False):
        nbatch = nenv * nsteps
        nh, nw, nc = ob_space.shape
        ob_shape = (nbatch, nh, nw, nc * nstack)
        nact = ac_space.n
        X = tf.placeholder(tf.uint8, ob_shape)  # obs
        with tf.variable_scope("model", reuse=reuse):
            h = conv(tf.cast(X, tf.float32) / 255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2))
            h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))
            h3 = conv(h2, 'c3', nf=32, rf=3, stride=1, init_scale=np.sqrt(2))
            h3 = conv_to_fc(h3)
            h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2))
            pi = fc(h4, 'pi', nact, act=lambda x: x)
            vf = fc(h4, 'v', 1, act=lambda x: x)

        v0 = vf[:, 0]
        a0 = sample(pi)
        self.initial_state = []  # not stateful

        def step(ob, *_args, **_kwargs):
            a, v = sess.run([a0, v0], {X: ob})
            return a, v, []  # dummy state

        def value(ob, *_args, **_kwargs):
            return sess.run(v0, {X: ob})

        self.X = X
        self.pi = pi
        self.vf = vf
        self.step = step
        self.value = value
Exemple #2
0
    def __init__(self, sess, obvs, ac_space, noise_size, batch_size, nstack, v_envs, nenvs, reuse=False):
        nact = ac_space.n
        noise = tf.placeholder(tf.float32, [None, noise_size], name='noise')
        with tf.variable_scope("model", reuse=reuse):
            h = conv(tf.cast(obvs, tf.float32)/255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2))
            h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))
            h3 = conv(h2, 'c3', nf=32, rf=3, stride=1, init_scale=np.sqrt(2))
            h3 = conv_to_fc(h3)
            generator_input = tf.concat([h3, noise, v_envs], axis=1)
            h4 = fc(generator_input, 'fc1', nh=512, init_scale=np.sqrt(2))
            pi = fc(h4, 'pi', nact, act=lambda x:x)
            vf = fc(h4, 'v', 1, act=lambda x:x)

        v0 = vf[:, 0]
        a0 = sample(pi)
        self.initial_state = [] #not stateful
        
        def step(feed_dict, *_args, **_kwargs):
            a, v = sess.run([a0, v0], feed_dict)
            return a, v, [] #dummy state

        self.pi = pi
        self.vf = vf
        self.noise = noise
        self.step = step
Exemple #3
0
    def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, reuse=False):
        nbatch = nenv*nsteps
        nh, nw, nc = ob_space.shape
        ob_shape = (nbatch, nh, nw, nc*nstack)
        nact = ac_space.n
        X = tf.placeholder(tf.uint8, ob_shape) #obs
        with tf.variable_scope("model", reuse=reuse):
            h = conv(tf.cast(X, tf.float32)/255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2))
            h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))
            h3 = conv(h2, 'c3', nf=32, rf=3, stride=1, init_scale=np.sqrt(2))
            h3 = conv_to_fc(h3)
            h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2))
            pi = fc(h4, 'pi', nact, act=lambda x:x)
            vf = fc(h4, 'v', 1, act=lambda x:x)

        v0 = vf[:, 0]
        a0 = sample(pi)
        self.initial_state = [] #not stateful

        def step(ob, *_args, **_kwargs):
            a, v = sess.run([a0, v0], {X:ob})
            return a, v, [] #dummy state

        def value(ob, *_args, **_kwargs):
            return sess.run(v0, {X:ob})

        self.X = X
        self.pi = pi
        self.vf = vf
        self.step = step
        self.value = value
Exemple #4
0
    def __init__(self,
                 sess,
                 features,
                 ac_space,
                 noise_size,
                 batch_size,
                 nstack,
                 reuse=False):
        nact = ac_space.n
        with tf.variable_scope("model", reuse=reuse):
            h4 = fc(features, 'fc1', nh=512, init_scale=np.sqrt(2))
            pi = fc(h4, 'pi', nact, act=lambda x: x)
            vf = fc(h4, 'v', 1, act=lambda x: x)

        v0 = vf[:, 0]
        a0 = sample(pi)
        self.initial_state = []  #not stateful

        def step(feed_dict, *_args, **_kwargs):
            a, v = sess.run([a0, v0], feed_dict)
            return a, v, []  #dummy state

        self.pi = pi
        self.vf = vf
        self.step = step


# Could also have a CNN feature detector that feeds into both a generator and a discriminator
Exemple #5
0
    def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, reuse=False):  #pylint: disable=W0613
        nh, nw, nc = ob_space.shape
        ob_shape = (nbatch, nh, nw, nc)
        nact = ac_space.n
        X = tf.placeholder(tf.uint8, ob_shape)  #obs
        with tf.variable_scope("model", reuse=reuse):
            h = nature_cnn(X)
            pi = fc(h, 'pi', nact, init_scale=0.01)
            vf = fc(h, 'v', 1)[:, 0]

        self.pdtype = make_pdtype(ac_space)
        self.pd = self.pdtype.pdfromflat(pi)

        a0 = self.pd.sample()
        neglogp0 = self.pd.neglogp(a0)
        self.initial_state = None

        def step(ob, *_args, **_kwargs):
            a, v, neglogp = sess.run([a0, vf, neglogp0], {X: ob})
            return a, v, self.initial_state, neglogp

        def value(ob, *_args, **_kwargs):
            return sess.run(vf, {X: ob})

        self.X = X
        self.pi = pi
        self.vf = vf
        self.step = step
        self.value = value
Exemple #6
0
    def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, nlstm=128, reuse=False):
        scope = "model"
        nbatch = nenv*nsteps
        nh, nw, nc = ob_space.shape
        ob_shape = (nbatch, nh, nw, nc*nstack)
        nact = ac_space.n
        X = tf.placeholder(tf.uint8, ob_shape, name="observations") #obs
        M = tf.placeholder(tf.float32, [nbatch], name="mask") #mask (done t-1)
        S = tf.placeholder(tf.float32, [nenv, nlstm*2], name="states") #states
        with tf.variable_scope(scope, reuse=reuse):
            h = conv(tf.cast(X, tf.float32)/255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2))
            h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))
            h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2))
            h3 = conv_to_fc(h3)
            h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2))
            xs = batch_to_seq(h4, nenv, nsteps)
            ms = batch_to_seq(M, nenv, nsteps)
            h5, snew = lstm(xs, ms, S, 'lstm1', nh=nlstm)
            h5 = seq_to_batch(h5)
            pi = fc(h5, 'pi', nact, act=lambda x:x)
            vf = fc(h5, 'v', 1, act=lambda x:x)

            trainable_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope)
            self._saver = tf.train.Saver(trainable_vars)

        v0 = vf[:, 0]
        a0 = sample(pi)
        self.initial_state = np.zeros((nenv, nlstm*2), dtype=np.float32)

        def step(ob, state, mask):
            a, v, s = sess.run([a0, v0, snew], {X:ob, S:state, M:mask})
            return a, v, s

        def value(ob, state, mask):
            return sess.run(v0, {X:ob, S:state, M:mask})

        def save(path, name):
            try:
                os.makedirs(path)
            except FileExistsError:
                pass
            self._saver.save(sess, path+name)

        def load(path, name):
            if os.path.exists(path+name+'.index'):
                self._saver.restore(sess, path+name)
            else:
                tf.logging.warn('Failed restoring vars from %s' % path)

        self.X = X
        self.pi = pi
        self.vf = vf
        self.step = step
        self.value = value
        self.save = save
        self.load = load
Exemple #7
0
    def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, reuse=False):
        scope = "model"
        nbatch = nenv*nsteps
        nh, nw, nc = ob_space.shape
        ob_shape = (nbatch, nh, nw, nc*nstack)
        nact = ac_space.n
        X = tf.placeholder(tf.uint8, ob_shape) #obs
        with tf.variable_scope(scope, reuse=reuse):
            h = conv(tf.cast(X, tf.float32)/255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2))
            h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))
            h3 = conv(h2, 'c3', nf=32, rf=3, stride=1, init_scale=np.sqrt(2))
            h3 = conv_to_fc(h3)
            h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2))
            pi = fc(h4, 'pi', nact, act=lambda x:x)
            vf = fc(h4, 'v', 1, act=lambda x:x)

            trainable_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope)
            self._saver = tf.train.Saver(trainable_vars)

        v0 = vf[:, 0]
        a0 = sample(pi)
        self.initial_state = [] #not stateful

        def step(ob, *_args, **_kwargs):
            a, v = sess.run([a0, v0], {X:ob})
            return a, v, [] #dummy state

        def value(ob, *_args, **_kwargs):
            return sess.run(v0, {X:ob})

        def save(path, name):
            os.makedirs(path, exist_ok=True)
            self._saver.save(sess, path+name)

        def load(path, name):
            if os.path.exists(path+name+'.index'):
                self._saver.restore(sess, path+name)
            else:
                tf.logging.warn('Failed restoring vars from %s' % path)

        self.X = X
        self.pi = pi
        self.vf = vf
        self.step = step
        self.value = value
        self.save = save
        self.load = load
Exemple #8
0
    def __init__(self, sess, obvs, action_pi, batch_size, nstack, v_envs, nenvs, reuse=False):
        with tf.variable_scope("model", reuse=reuse):
            # Build a feature detection layer with the convolutional part
            h = conv(tf.cast(obvs, tf.float32)/255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2))
            h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))
            h3 = conv(h2, 'c3', nf=32, rf=3, stride=1, init_scale=np.sqrt(2))
            h3 = conv_to_fc(h3)
            # Concatenate the features with the input action
            discriminator_input = tf.concat([h3, action_pi, v_envs], axis=1)
            h4 = fc(discriminator_input, 'fc1', nh=512, init_scale=np.sqrt(2))
            discriminator_decision = fc(h4, 'decision', 1, act=lambda x:x)
            
        self.initial_state = [] #not stateful - ???

        self.discriminator_decision = discriminator_decision

# Could also have a CNN feature detector that feeds into both a generator and a discriminator
Exemple #9
0
def nature_cnn(unscaled_images):
    """
    CNN from Nature paper.
    """
    scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
    activ = tf.nn.relu
    h = activ(
        conv(scaled_images, 'c1', nf=32, rf=8, stride=4,
             init_scale=np.sqrt(2)))
    h2 = activ(conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)))
    h3 = activ(conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2)))
    h3 = conv_to_fc(h3)
    return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
Exemple #10
0
    def __init__(self,
                 sess,
                 ob_space,
                 ac_space,
                 nenv,
                 nsteps,
                 nstack,
                 reuse=False):
        nbatch = nenv * nsteps
        ob_dim = ob_space.shape[0]
        ob_shape = (nbatch, ob_dim * nstack)
        nact = ac_space.n
        X = tf.placeholder(tf.float32, ob_shape)  #obs
        with tf.variable_scope("model", reuse=reuse):
            h = fc(X, 'fc1', nh=256, init_scale=np.sqrt(2))
            h2 = fc(h, 'fc2', nh=256, init_scale=np.sqrt(2))
            #h3 = fc(h2, 'fc3', nh=256, init_scale=np.sqrt(2))
            h4 = fc(h2, 'fc4', nh=256, init_scale=np.sqrt(2))
            pi = fc(h4, 'pi', nact, act=lambda x: x)
            vf = fc(h4, 'v', 1, act=lambda x: x)

        v0 = vf[:, 0]
        a0 = sample(pi)
        self.initial_state = []  #not stateful

        def step(ob, *_args, **_kwargs):
            a, v = sess.run([a0, v0], {X: ob})
            return a, v, []  #dummy state

        def value(ob, *_args, **_kwargs):
            return sess.run(v0, {X: ob})

        self.X = X
        self.pi = pi
        self.vf = vf
        self.step = step
        self.value = value
Exemple #11
0
    def __init__(self,
                 sess,
                 ob_space,
                 ac_space,
                 nenv,
                 nsteps,
                 nstack,
                 reuse=False):
        nbatch = nenv * nsteps
        nh, nw, nc = (32, 32, 3)
        ob_shape = (nbatch, nh, nw, nc * nstack)
        nact = 2  # 524
        # nsub3 = 2
        # nsub4 = 5
        # nsub5 = 10
        # nsub6 = 4
        # nsub7 = 2
        # nsub8 = 4
        # nsub9 = 500
        # nsub10 = 4
        # nsub11 = 10
        # nsub12 = 500

        # (64, 64, 13)
        # 80 * 24

        X = tf.placeholder(tf.uint8, ob_shape)  #obs
        with tf.variable_scope("model", reuse=reuse):
            with tf.variable_scope("common", reuse=reuse):
                h = conv(tf.cast(X, tf.float32),
                         'c1',
                         nf=16,
                         rf=5,
                         stride=1,
                         init_scale=np.sqrt(2),
                         pad="SAME")  # ?, 32, 32, 16
                h2 = conv(h,
                          'c2',
                          nf=32,
                          rf=3,
                          stride=1,
                          init_scale=np.sqrt(2),
                          pad="SAME")  # ?, 32, 32, 32

            with tf.variable_scope("pi1", reuse=reuse):
                h3 = conv_to_fc(h2)  # 131072
                h4 = fc(h3, 'fc1', nh=256, init_scale=np.sqrt(2))  # ?, 256
                pi_ = fc(h4, 'pi', nact,
                         act=lambda x: x)  # ( nenv * nsteps, 524) # ?, 524
                pi = tf.nn.softmax(pi_)

                vf = fc(h4, 'v', 1,
                        act=lambda x: x)  # ( nenv * nsteps, 1) # ?, 1

            # with tf.variable_scope("sub3", reuse=reuse):
            #   pi_sub3 = fc(h4, 'pi_sub3', nsub3, act=lambda x:x) # ( nenv * nsteps, 2) # ?, 2
            # with tf.variable_scope("sub4", reuse=reuse):
            #   pi_sub4 = fc(h4, 'pi_sub4', nsub4, act=lambda x:x) # ( nenv * nsteps, 5) # ?, 5
            # with tf.variable_scope("sub5", reuse=reuse):
            #   pi_sub5 = fc(h4, 'pi_sub5', nsub5, act=lambda x:x) # ( nenv * nsteps, 10) # ?, 10
            # with tf.variable_scope("sub6", reuse=reuse):
            #   pi_sub6 = fc(h4, 'pi_sub6', nsub6, act=lambda x:x) # ( nenv * nsteps, 4) # ?, 4
            # with tf.variable_scope("sub7", reuse=reuse):
            #   pi_sub7_ = fc(pi, 'pi_sub7', nsub7, act=lambda x:x) # ( nenv * nsteps, 2) # ?, 2
            #   pi_sub7 = tf.nn.l2_normalize(pi_sub7_, 1)
            # with tf.variable_scope("sub8", reuse=reuse):
            #   pi_sub8_ = fc(pi, 'pi_sub8', nsub8, act=lambda x:x) # ( nenv * nsteps, 4) # ?, 4
            #   pi_sub8 = tf.nn.l2_normalize(pi_sub8_, 1)
            # with tf.variable_scope("sub9", reuse=reuse):
            #   pi_sub9_ = fc(pi, 'pi_sub9', nsub9, act=lambda x:x) # ( nenv * nsteps, 500) # ?, 500
            #   pi_sub9 = tf.nn.l2_normalize(pi_sub9_, 1)
            # with tf.variable_scope("sub10", reuse=reuse):
            #   pi_sub10_ = fc(pi, 'pi_sub10', nsub10, act=lambda x:x) # ( nenv * nsteps, 4) # ?, 4
            #   pi_sub10 = tf.nn.l2_normalize(pi_sub10_, 1)
            # with tf.variable_scope("sub11", reuse=reuse):
            #   pi_sub11_ = fc(pi, 'pi_sub11', nsub11, act=lambda x:x) # ( nenv * nsteps, 10) # ?, 10
            #   pi_sub11 = tf.nn.l2_normalize(pi_sub11_, 1)
            # with tf.variable_scope("sub12", reuse=reuse):
            #   pi_sub12_ = fc(pi, 'pi_sub12', nsub12, act=lambda x:x) # ( nenv * nsteps, 500) # ?, 500
            #   pi_sub12 = tf.nn.l2_normalize(pi_sub12_, 1)

            # vf = tf.nn.l2_normalize(vf_, 1)

            with tf.variable_scope("xy0", reuse=reuse):
                # 1 x 1 convolution for dimensionality reduction
                pi_xy0_ = conv(
                    h2, 'xy0', nf=1, rf=1, stride=1,
                    init_scale=np.sqrt(2))  # (? nenv * nsteps, 32, 32, 1)
                pi_xy0__ = conv_to_fc(pi_xy0_)  # 32 x 32 => 1024
                pi_xy0 = tf.nn.softmax(pi_xy0__)

            with tf.variable_scope("xy1", reuse=reuse):
                pi_xy1_ = conv(
                    h2, 'xy1', nf=1, rf=1, stride=1,
                    init_scale=np.sqrt(2))  # (? nenv * nsteps, 32, 32, 1)
                pi_xy1__ = conv_to_fc(pi_xy1_)  # 32 x 32 => 1024
                pi_xy1 = tf.nn.softmax(pi_xy1__)

        v0 = vf[:, 0]
        a0 = sample(pi)
        self.initial_state = []  #not stateful

        def step(ob, *_args, **_kwargs):
            #obs, states, rewards, masks, actions, actions2, x1, y1, x2, y2, values
            _pi1, _xy0, _xy1, _v = sess.run([pi, pi_xy0, pi_xy1, v0], {X: ob})
            return _pi1, _xy0, _xy1, _v, []  #dummy state

        def value(ob, *_args, **_kwargs):
            return sess.run(v0, {X: ob})

        self.X = X
        self.pi = pi
        # self.pi_sub3 = pi_sub3
        # self.pi_sub4 = pi_sub4
        # self.pi_sub5 = pi_sub5
        # self.pi_sub6 = pi_sub6
        # self.pi_sub7 = pi_sub7
        # self.pi_sub8 = pi_sub8
        # self.pi_sub9 = pi_sub9
        # self.pi_sub10 = pi_sub10
        # self.pi_sub11 = pi_sub11
        # self.pi_sub12 = pi_sub12
        self.pi_xy0 = pi_xy0
        self.pi_xy1 = pi_xy1
        # self.pi_y0 = pi_y0
        # self.pi_x1 = pi_x1
        # self.pi_y1 = pi_y1
        # self.pi_x2 = pi_x2
        # self.pi_y2 = pi_y2
        self.vf = vf
        self.step = step
        self.value = value
Exemple #12
0
    def __init__(self,
                 sess,
                 ob_space,
                 ac_space,
                 nenv,
                 nsteps,
                 nstack,
                 reuse=False):
        nbatch = nenv * nsteps
        nh, nw, nc = (64, 64, 13)
        ob_shape = (nbatch, nc * nstack, nh, nw)
        nact = 524
        nsub1 = 2
        nsub2 = 10
        nsub3 = 500

        X = tf.placeholder(tf.uint8, ob_shape)  #obs
        with tf.variable_scope("model", reuse=reuse):
            h = conv(tf.cast(X, tf.float32),
                     'c1',
                     nf=16,
                     rf=5,
                     stride=2,
                     init_scale=np.sqrt(2),
                     pad="SAME")  # 2, 16, 16, 16
            h2 = conv(h,
                      'c2',
                      nf=32,
                      rf=3,
                      stride=1,
                      init_scale=np.sqrt(2),
                      pad="SAME")  # 2, 16, 16, 32
            h3 = conv_to_fc(h2)  # 8192
            h4 = fc(h3, 'fc1', nh=256, init_scale=np.sqrt(2))  # 2, 256
            pi = fc(h4, 'pi', nact,
                    act=lambda x: x)  # ( nenv * nsteps, 524) # 2, 524
            pi_sub1 = fc(h4, 'pi_sub1', nsub1,
                         act=lambda x: x)  # ( nenv * nsteps, 500) # 2, 2
            pi_sub2 = fc(h4, 'pi_sub2', nsub2,
                         act=lambda x: x)  # ( nenv * nsteps, 500) # 2, 10
            pi_sub3 = fc(h4, 'pi_sub3', nsub3,
                         act=lambda x: x)  # ( nenv * nsteps, 500) # 2, 500

            vf = fc(h4, 'v', 1, act=lambda x: x)  # ( nenv * nsteps, 1) # 2, 1

            # 1 x 1 convolution for dimensionality reduction
            xy1 = conv(h2, 'xy1', nf=1, rf=1, stride=1,
                       init_scale=np.sqrt(2))  # (? nenv * nsteps, 64, 64, 1)
            pi_x1 = xy1[:, :, 0, 0]
            x1 = sample(pi_x1)
            pi_y1 = xy1[:, 0, :, 0]
            y1 = sample(pi_y1)
            xy2 = conv(h2, 'xy2', nf=1, rf=1, stride=1,
                       init_scale=np.sqrt(2))  # (? nenv * nsteps, 64, 64, 1)
            pi_x2 = xy2[:, :, 0, 0]
            x2 = sample(pi_x2)
            pi_y2 = xy2[:, 0, :, 0]
            y2 = sample(pi_y2)

        v0 = vf[:, 0]
        a0 = sample(pi)
        self.initial_state = []  #not stateful

        def step(ob, *_args, **_kwargs):
            #obs, states, rewards, masks, actions, actions2, x1, y1, x2, y2, values
            _pi1, _pi_sub1, _pi_sub2, _pi_sub3, _x1, _y1, _x2, _y2, _v = sess.run(
                [pi, pi_sub1, pi_sub2, pi_sub3, x1, y1, x2, y2, v0], {X: ob})
            return _pi1, _pi_sub1, _pi_sub2, _pi_sub3, _x1, _y1, _x2, _y2, _v, [
            ]  #dummy state

        def value(ob, *_args, **_kwargs):
            return sess.run(v0, {X: ob})

        self.X = X
        self.pi = pi
        self.pi_sub1 = pi_sub1
        self.pi_sub2 = pi_sub2
        self.pi_sub3 = pi_sub3
        self.pi_x1 = pi_x1
        self.pi_y1 = pi_y1
        self.pi_x2 = pi_x2
        self.pi_y2 = pi_y2
        self.vf = vf
        self.step = step
        self.value = value
    def __init__(self,
                 sess,
                 ob_space,
                 ac_space,
                 nenv,
                 nsteps,
                 nstack,
                 reuse=False):
        nbatch = nenv * nsteps
        nh, nw, nc = (64, 64, 1)
        ob_shape = (nbatch, nh, nw, nc * nstack)
        nact = 524
        nsub3 = 2
        nsub4 = 5
        nsub5 = 10
        nsub6 = 4
        nsub7 = 2
        nsub8 = 4
        nsub9 = 500
        nsub10 = 4
        nsub11 = 10
        nsub12 = 500

        X = tf.placeholder(tf.uint8, ob_shape)  #obs
        with tf.variable_scope("model", reuse=reuse):
            with tf.variable_scope("common", reuse=reuse):
                h = conv(tf.cast(X, tf.float32),
                         'c1',
                         nf=16,
                         rf=5,
                         stride=2,
                         init_scale=np.sqrt(2),
                         pad="SAME")  # ?, 64, 64, 16
                h2 = conv(h,
                          'c2',
                          nf=32,
                          rf=3,
                          stride=1,
                          init_scale=np.sqrt(2),
                          pad="SAME")  # ?, 64, 64, 32

            with tf.variable_scope("pi1", reuse=reuse):
                h3 = conv_to_fc(h2)  # 131072
                h4 = fc(h3, 'fc1', nh=256, init_scale=np.sqrt(2))  # ?, 256
                pi = fc(h4, 'pi', nact,
                        act=lambda x: x)  # ( nenv * nsteps, 524) # ?, 524

            with tf.variable_scope("sub3", reuse=reuse):
                pi_sub3 = fc(pi, 'pi_sub3', nsub3,
                             act=lambda x: x)  # ( nenv * nsteps, 2) # ?, 2
            with tf.variable_scope("sub4", reuse=reuse):
                pi_sub4 = fc(pi, 'pi_sub4', nsub4,
                             act=lambda x: x)  # ( nenv * nsteps, 5) # ?, 5
            with tf.variable_scope("sub5", reuse=reuse):
                pi_sub5 = fc(pi, 'pi_sub5', nsub5,
                             act=lambda x: x)  # ( nenv * nsteps, 10) # ?, 10
            with tf.variable_scope("sub6", reuse=reuse):
                pi_sub6 = fc(pi, 'pi_sub6', nsub6,
                             act=lambda x: x)  # ( nenv * nsteps, 4) # ?, 4
            with tf.variable_scope("sub7", reuse=reuse):
                pi_sub7 = fc(pi, 'pi_sub7', nsub7,
                             act=lambda x: x)  # ( nenv * nsteps, 2) # ?, 2
            with tf.variable_scope("sub8", reuse=reuse):
                pi_sub8 = fc(pi, 'pi_sub8', nsub8,
                             act=lambda x: x)  # ( nenv * nsteps, 4) # ?, 4
            with tf.variable_scope("sub9", reuse=reuse):
                pi_sub9 = fc(pi, 'pi_sub9', nsub9,
                             act=lambda x: x)  # ( nenv * nsteps, 500) # ?, 500
            with tf.variable_scope("sub10", reuse=reuse):
                pi_sub10 = fc(pi, 'pi_sub10', nsub10,
                              act=lambda x: x)  # ( nenv * nsteps, 4) # ?, 4
            with tf.variable_scope("sub11", reuse=reuse):
                pi_sub11 = fc(pi, 'pi_sub11', nsub11,
                              act=lambda x: x)  # ( nenv * nsteps, 10) # ?, 10
            with tf.variable_scope("sub12", reuse=reuse):
                pi_sub12 = fc(
                    pi, 'pi_sub12', nsub12,
                    act=lambda x: x)  # ( nenv * nsteps, 500) # ?, 500

            vf = fc(h4, 'v', 1, act=lambda x: x)  # ( nenv * nsteps, 1) # ?, 1

            with tf.variable_scope("xy0", reuse=reuse):
                # 1 x 1 convolution for dimensionality reduction
                xy0 = conv(
                    h2, 'xy0', nf=1, rf=1, stride=2,
                    init_scale=np.sqrt(2))  # (? nenv * nsteps, 64, 64, 1)
                pi_x0 = xy0[:, :, 0, 0]  # ?, 64
                x0 = sample(pi_x0)  # ?,
                pi_y0 = xy0[:, 0, :, 0]  # ?, 64
                y0 = sample(pi_y0)  # ?,

            with tf.variable_scope("xy1", reuse=reuse):
                xy1 = conv(
                    h2, 'xy1', nf=1, rf=1, stride=1,
                    init_scale=np.sqrt(2))  # (? nenv * nsteps, 64, 64, 1)
                pi_x1 = xy1[:, :, 0, 0]  # ?, 64
                x1 = sample(pi_x1)  # ?,
                pi_y1 = xy1[:, 0, :, 0]  # ?, 64
                y1 = sample(pi_y1)  # ?,

            with tf.variable_scope("xy2", reuse=reuse):
                xy2 = conv(
                    h2, 'xy2', nf=1, rf=1, stride=1,
                    init_scale=np.sqrt(2))  # (? nenv * nsteps, 64, 64, 1)
                pi_x2 = xy2[:, :, 0, 0]  # ?, 64
                x2 = sample(pi_x2)  # ?,
                pi_y2 = xy2[:, 0, :, 0]  # ?, 64
                y2 = sample(pi_y2)  # ?,

        v0 = vf[:, 0]
        a0 = sample(pi)
        self.initial_state = []  #not stateful

        def step(ob, *_args, **_kwargs):
            #obs, states, rewards, masks, actions, actions2, x1, y1, x2, y2, values
            _pi1, _pi_sub3, _pi_sub4, _pi_sub5, _pi_sub6, _pi_sub7, _pi_sub8, _pi_sub9, _pi_sub10, _pi_sub11, _pi_sub12, _x0, _y0, _x1, _y1, _x2, _y2, _v = sess.run(
                [
                    pi, pi_sub3, pi_sub4, pi_sub5, pi_sub6, pi_sub7, pi_sub8,
                    pi_sub9, pi_sub10, pi_sub11, pi_sub12, x0, y0, x1, y1, x2,
                    y2, v0
                ], {X: ob})
            return _pi1, _pi_sub3, _pi_sub4, _pi_sub5, _pi_sub6, _pi_sub7, _pi_sub8, _pi_sub9, _pi_sub10, _pi_sub11, _pi_sub12, _x0, _y0, _x1, _y1, _x2, _y2, _v, [
            ]  #dummy state

        def value(ob, *_args, **_kwargs):
            return sess.run(v0, {X: ob})

        self.X = X
        self.pi = pi
        self.pi_sub3 = pi_sub3
        self.pi_sub4 = pi_sub4
        self.pi_sub5 = pi_sub5
        self.pi_sub6 = pi_sub6
        self.pi_sub7 = pi_sub7
        self.pi_sub8 = pi_sub8
        self.pi_sub9 = pi_sub9
        self.pi_sub10 = pi_sub10
        self.pi_sub11 = pi_sub11
        self.pi_sub12 = pi_sub12
        self.pi_x0 = pi_x0
        self.pi_y0 = pi_y0
        self.pi_x1 = pi_x1
        self.pi_y1 = pi_y1
        self.pi_x2 = pi_x2
        self.pi_y2 = pi_y2
        self.vf = vf
        self.step = step
        self.value = value
Exemple #14
0
    def __init__(self,
                 sess,
                 obvs,
                 batch_size,
                 nstack,
                 num_Gaussians,
                 reuse=False):
        with tf.variable_scope("model", reuse=reuse):
            c_h = conv(tf.cast(obvs, tf.float32) / 255.,
                       'c1',
                       nf=32,
                       rf=8,
                       stride=4,
                       init_scale=np.sqrt(2))
            c_h2 = conv(c_h,
                        'c2',
                        nf=64,
                        rf=4,
                        stride=2,
                        init_scale=np.sqrt(2))
            c_h3 = conv(c_h2,
                        'c3',
                        nf=32,
                        rf=3,
                        stride=1,
                        init_scale=np.sqrt(2))
            h3 = conv_to_fc(c_h3)
            h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2))
            z_mean = fc(h4, 'z_mean', nh=num_Gaussians, act=lambda x: x)
            z_log_sigma_sq = fc(h4,
                                'z_sigma',
                                nh=num_Gaussians,
                                act=lambda x: x)

            eps = tf.random_normal((batch_size, num_Gaussians),
                                   0,
                                   1,
                                   dtype=tf.float32)
            z_sample = tf.add(
                z_mean, tf.multiply(tf.sqrt(tf.exp(z_log_sigma_sq)), eps))

            g_h = fc(z_sample, 'g_fc1', nh=512, init_scale=np.sqrt(2))
            g_h2 = fc(g_h,
                      'g_fc2',
                      nh=h3.get_shape()[-1],
                      init_scale=np.sqrt(2))

            batch_size = tf.shape(g_h2)[0]
            g_c_h2 = tf.reshape(g_h2, [
                batch_size,
                c_h3.get_shape()[1].value,
                c_h3.get_shape()[2].value,
                c_h3.get_shape()[3].value
            ])

            deconv_shape = [
                batch_size,
                c_h2.get_shape()[1].value,
                c_h2.get_shape()[2].value,
                c_h2.get_shape()[3].value
            ]
            g_c_h3 = deconv(g_c_h2,
                            'g_c1',
                            nf=64,
                            rf=3,
                            stride=1,
                            output_size=deconv_shape,
                            init_scale=np.sqrt(2))

            deconv_shape = [
                batch_size,
                c_h.get_shape()[1].value,
                c_h.get_shape()[2].value,
                c_h.get_shape()[3].value
            ]
            g_c_h4 = deconv(g_c_h3,
                            'g_c2',
                            nf=32,
                            rf=4,
                            stride=2,
                            output_size=deconv_shape,
                            init_scale=np.sqrt(2))

            deconv_shape = [
                batch_size,
                obvs.get_shape()[1].value,
                obvs.get_shape()[2].value,
                obvs.get_shape()[3].value
            ]
            reconstruction_mean = deconv(g_c_h4,
                                         'g_c3',
                                         nf=4,
                                         rf=8,
                                         stride=4,
                                         output_size=deconv_shape,
                                         init_scale=np.sqrt(2),
                                         act=tf.nn.sigmoid)

        self.z_mean = z_mean
        self.z_log_sigma_sq = z_log_sigma_sq
        self.z_sample = z_sample
        self.reconstruction = reconstruction_mean