Esempio n. 1
0
    def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, reuse=False): #pylint: disable=W0613
        self.pdtype = make_pdtype(ac_space)
        with tf.variable_scope("model", reuse=reuse):
            X, processed_x = observation_input(ob_space, nbatch)
            activ = tf.tanh
            processed_x = tf.layers.flatten(processed_x)
            pi_h1 = activ(fc(processed_x, 'pi_fc1', nh=64, init_scale=np.sqrt(2)))
            pi_h2 = activ(fc(pi_h1, 'pi_fc2', nh=64, init_scale=np.sqrt(2)))
            vf_h1 = activ(fc(processed_x, 'vf_fc1', nh=64, init_scale=np.sqrt(2)))
            vf_h2 = activ(fc(vf_h1, 'vf_fc2', nh=64, init_scale=np.sqrt(2)))
            vf = fc(vf_h2, 'vf', 1)[:,0]

            self.pd, self.pi = self.pdtype.pdfromlatent(pi_h2, init_scale=0.01)


        a0 = self.pd.sample()
        neglogp0 = self.pd.neglogp(a0)
        self.initial_state = None

        def step(ob, *_args, **_kwargs):
            a, v, neglogp = sess.run([a0, vf, neglogp0], {X:ob})
            return a, v, self.initial_state, neglogp

        def value(ob, *_args, **_kwargs):
            return sess.run(vf, {X:ob})

        self.X = X
        self.vf = vf
        self.step = step
        self.value = value
Esempio n. 2
0
    def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, nlstm=256, reuse=False):
        nenv = nbatch // nsteps
        self.pdtype = make_pdtype(ac_space)
        X, processed_x = observation_input(ob_space, nbatch)

        M = tf.placeholder(tf.float32, [nbatch]) #mask (done t-1)
        S = tf.placeholder(tf.float32, [nenv, nlstm*2]) #states
        with tf.variable_scope("model", reuse=reuse):
            h = nature_cnn(X)
            xs = batch_to_seq(h, nenv, nsteps)
            ms = batch_to_seq(M, nenv, nsteps)
            h5, snew = lstm(xs, ms, S, 'lstm1', nh=nlstm)
            h5 = seq_to_batch(h5)
            vf = fc(h5, 'v', 1)
            self.pd, self.pi = self.pdtype.pdfromlatent(h5)

        v0 = vf[:, 0]
        a0 = self.pd.sample()
        neglogp0 = self.pd.neglogp(a0)
        self.initial_state = np.zeros((nenv, nlstm*2), dtype=np.float32)

        def step(ob, state, mask):
            return sess.run([a0, v0, snew, neglogp0], {X:ob, S:state, M:mask})

        def value(ob, state, mask):
            return sess.run(v0, {X:ob, S:state, M:mask})

        self.X = X
        self.M = M
        self.S = S
        self.vf = vf
        self.step = step
        self.value = value
Esempio n. 3
0
    def __init__(self,
                 sess,
                 ob_space,
                 ac_space,
                 nbatch,
                 nsteps,
                 reuse=False,
                 **conv_kwargs):  #pylint: disable=W0613
        self.pdtype = make_pdtype(ac_space)
        X, processed_x = observation_input(ob_space, nbatch)
        with tf.variable_scope("model", reuse=reuse):
            h = nature_cnn(processed_x, **conv_kwargs)
            vf = fc(h, 'v', 1)[:, 0]
            self.pd, self.pi = self.pdtype.pdfromlatent(h, init_scale=0.01)

        a0 = self.pd.sample()
        neglogp0 = self.pd.neglogp(a0)
        self.initial_state = None

        def step(ob, *_args, **_kwargs):
            a, v, neglogp = sess.run([a0, vf, neglogp0], {X: ob})
            return a, v, self.initial_state, neglogp

        def value(ob, *_args, **_kwargs):
            return sess.run(vf, {X: ob})

        self.X = X
        self.vf = vf
        self.step = step
        self.value = value
Esempio n. 4
0
def nature_cnn(unscaled_images, **conv_kwargs):
    """
    CNN from Nature paper.
    """
    scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
    activ = tf.nn.relu
    h = activ(
        conv(scaled_images,
             'c1',
             nf=32,
             rf=8,
             stride=4,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h2 = activ(
        conv(h,
             'c2',
             nf=64,
             rf=4,
             stride=2,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = activ(
        conv(h2,
             'c3',
             nf=64,
             rf=3,
             stride=1,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = conv_to_fc(h3)
    return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
Esempio n. 5
0
             # however, I like the idea of activation as feature existence probablty
             # Q: could these be good for transfer learning? Maybe.
         # Tensor valued working memories (generalizing NTM)
 # List of job application plans
     # OpenAI fellowship
     # AI2 software engineer
     # AI for Brain Science -- look up positions
     # Google DeepMind -- research engineer
 X, processed_x = observation_input(ob_space, nbatch)
 M = tf.placeholder(tf.float32, [nbatch]) #mask
 self.pdtype = make_pdtype(ac_space)
 with tf.variable_scope('model', reuse=reuse):
     h = caps_cnn(processed_x)
     h = capsule_conv(h, 'capsconv', 4, 2, 32, 8)
     h = capsule(h, 'caps', 16, 8, from_conv=True)
     vf = fc(h, 'v', 1)[:, 0] # value function
     # for discrete action spaces, create a final capsule layer
     # one capsule for each possible action
     if isinstance(ac_space, spaces.Discrete): 
         p = capsule(h, 'pcaps', ac_space.n, 4, from_conv=False)
         pnorm = tf.reduce_sum(tf.square(p), axis=2)
         self.pd, self.pi = self.pdtype.pdfromflat(pnorm), pnorm
     else:
         self.pd, self.pi = self.pdtype.pdfromlatent(h)
     
 a0 = self.pd.sample()
 neglogp0 = self.pd.neglogp(a0)
 self.initial_state = None
 
 def step(ob, *_args, **_kwargs):
     a, v, neglogp = sess.run([a0, vf, neglogp0], {X:ob})
Esempio n. 6
0
    def __init__(self,
                 sess,
                 ob_space,
                 ac_space,
                 nbatch,
                 nsteps,
                 reuse=False,
                 **conv_kwargs):  #pylint: disable=W0613
        self.pdtype = make_pdtype(ac_space)
        X, processed_x = observation_input(ob_space, nbatch)
        with tf.variable_scope("model", reuse=reuse):
            conv1 = caps_cnn(processed_x, **conv_kwargs)
            conv1 = tf.transpose(
                conv1, [0, 3, 1, 2])  # reshape to expected input format
            conv1 = tf.expand_dims(conv1, 1)
            capsule1 = layers.conv_slim_capsule(
                conv1,
                input_dim=1,
                output_dim=32,
                layer_name='conv_capsule1',
                num_routing=1,
                input_atoms=256,
                output_atoms=8,
                stride=2,
                kernel_size=9,
                padding='VALID',
                leaky=False,
            )
            capsule1_atom_last = tf.transpose(capsule1, [0, 1, 3, 4, 2])
            capsule1_3d = tf.reshape(capsule1_atom_last,
                                     [tf.shape(conv1)[0], -1, 8])
            _, _, _, height, width = capsule1.get_shape()
            input_dim1 = 32 * height.value * width.value
            # main encoding layer
            h = layers.capsule(
                input_tensor=capsule1_3d,
                input_dim=input_dim1,
                output_dim=8,
                layer_name='capsule2',
                input_atoms=8,
                output_atoms=16,
                num_routing=3,
                leaky=False,
            )
            # capsule policy layer
            hpi = layers.capsule(
                input_tensor=h,
                input_dim=8,
                output_dim=4,
                layer_name='capsule_pi',
                input_atoms=16,
                output_atoms=4,
                num_routing=3,
                leaky=False,
            )
            pnorm = tf.reduce_sum(tf.square(hpi), axis=-1)
            # value function
            hvf = conv_to_fc(h)
            vf = fc(hvf, 'v', 1)[:, 0]
            # policy based on pnorm (the squared norms of policy capsule vecs)
            self.pd, self.pi = self.pdtype.pdfromflat(pnorm), pnorm

        a0 = self.pd.sample()
        neglogp0 = self.pd.neglogp(a0)
        self.initial_state = None

        def step(ob, *_args, **_kwargs):
            a, v, neglogp = sess.run([a0, vf, neglogp0], {X: ob})
            return a, v, self.initial_state, neglogp

        def value(ob, *_args, **_kwargs):
            return sess.run(vf, {X: ob})

        self.X = X
        self.vf = vf
        self.step = step
        self.value = value