Exemple #1
0
def nature_cnn(unscaled_images, scope, **conv_kwargs):
    """
    CNN from Nature paper.
    """
    #unscaled_images = tf.placeholder(tf.float32, shape=[None, 84, 84, 1], name='unscaled_images')
    with tf.variable_scope(scope):
        scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
        activ = tf.nn.relu
        # 8x8 filter size is common on the very 1st conv layer, looking at the input image
        h = activ(
            conv(scaled_images,
                 'c1',
                 nf=32,
                 rf=8,
                 stride=4,
                 init_scale=np.sqrt(2),
                 **conv_kwargs))
        h2 = activ(
            conv(h,
                 'c2',
                 nf=64,
                 rf=4,
                 stride=2,
                 init_scale=np.sqrt(2),
                 **conv_kwargs))
        h3 = activ(
            conv(h2,
                 'c3',
                 nf=64,
                 rf=3,
                 stride=1,
                 init_scale=np.sqrt(2),
                 **conv_kwargs))
        h3 = conv_to_fc(h3)
        return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
def custom_cnn(unscaled_images, **conv_kwargs):
    scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
    activ = tf.nn.leaky_relu
    h = activ(
        conv(scaled_images,
             'c1',
             nf=32,
             rf=8,
             stride=4,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h2 = activ(
        conv(h,
             'c2',
             nf=64,
             rf=4,
             stride=2,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = activ(
        conv(h2,
             'c3',
             nf=64,
             rf=3,
             stride=1,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = conv_to_fc(h3)
    return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
Exemple #3
0
def vqn_cnn(unscaled_images, **conv_kwargs):
    """
    CNN from VQN paper.
    """
    scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
    activ = tf.nn.relu
    h = activ(
        conv(scaled_images,
             'c1',
             nf=32,
             rf=3,
             stride=1,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h2 = activ(
        conv(h,
             'c2',
             nf=32,
             rf=3,
             stride=1,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = activ(
        conv(h2,
             'c3',
             nf=64,
             rf=4,
             stride=2,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    return h3
Exemple #4
0
    def __init__(self,
                 sess,
                 ob_space,
                 ac_space,
                 nbatch,
                 nsteps,
                 nlstm=256,
                 reuse=False,
                 scope_name="model"):
        nenv = nbatch // nsteps

        nh, nw, nc = ob_space.shape
        ob_shape = (nbatch, nh, nw, nc)
        nact = ac_space.n
        X = tf.placeholder(tf.uint8, ob_shape)  #obs
        M = tf.placeholder(tf.float32, [nbatch])  #mask (done t-1)
        S = tf.placeholder(tf.float32, [nenv, nlstm * 2])  #states
        with tf.variable_scope(scope_name, reuse=reuse):
            h = conv(tf.cast(X, tf.float32) / 255.,
                     'c1',
                     nf=32,
                     rf=8,
                     stride=4,
                     init_scale=np.sqrt(2))
            h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))
            h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2))
            h3 = conv_to_fc(h3)
            h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2))
            xs = batch_to_seq(h4, nenv, nsteps)
            ms = batch_to_seq(M, nenv, nsteps)
            h5, snew = lstm(xs, ms, S, 'lstm1', nh=nlstm)
            h5 = seq_to_batch(h5)
            pi = fc(h5, 'pi', nact, act=lambda x: x)
            vf = fc(h5, 'v', 1, act=lambda x: x)

        self.pdtype = make_pdtype(ac_space)
        self.pd = self.pdtype.pdfromflat(pi)

        v0 = vf[:, 0]
        a0 = self.pd.sample()
        neglogp0 = self.pd.neglogp(a0)
        self.initial_state = np.zeros((nenv, nlstm * 2), dtype=np.float32)

        def step(ob, state, mask):
            return sess.run([a0, v0, snew, neglogp0], {
                X: ob,
                S: state,
                M: mask
            })

        def value(ob, state, mask):
            return sess.run(v0, {X: ob, S: state, M: mask})

        self.X = X
        self.M = M
        self.S = S
        self.pi = pi
        self.vf = vf
        self.step = step
        self.value = value
Exemple #5
0
def nature_cnn(unscaled_images, **conv_kwargs):
    """
    CNN from Nature paper.
    """
    scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
    activ = tf.nn.relu  # calculate the Rectitied Linear Unit

    h = activ(
        conv(scaled_images,
             'c1',
             nf=32,
             rf=8,
             stride=4,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    # np.sqrt() returns the squareroot of every element in the array
    # init_scale ??
    h2 = activ(
        conv(h,
             'c2',
             nf=64,
             rf=4,
             stride=2,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = activ(
        conv(h2,
             'c3',
             nf=64,
             rf=3,
             stride=1,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = conv_to_fc(h3)
    return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
Exemple #6
0
def nature_cnn(unscaled_images, **conv_kwargs):
    """
    CNN from Nature paper.
    """
    scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
    activ = tf.nn.relu
    h = activ(
        conv(
            scaled_images,
            'c1',
            nf=32,
            rf=8,
            stride=4,
            init_scale=np.sqrt(
                2
            ),  # 8x8 filter size is common on the very 1st conv layer, looking at the input image
            **conv_kwargs))
    h2 = activ(
        conv(h,
             'c2',
             nf=64,
             rf=4,
             stride=2,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = activ(
        conv(h2,
             'c3',
             nf=64,
             rf=3,
             stride=1,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = conv_to_fc(h3)
    return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
Exemple #7
0
    def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, reuse=False):
        nbatch = nenv*nsteps
        nh, nw, nc = ob_space.shape
        ob_shape = (nbatch, nh, nw, nc*nstack)
        nact = ac_space.n
        X = tf.placeholder(tf.uint8, ob_shape) #obs
        with tf.variable_scope("model", reuse=reuse):
            h = conv(tf.cast(X, tf.float32)/255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2))
            h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))
            h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2))
            h3 = conv_to_fc(h3)
            h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2))
            pi = fc(h4, 'pi', nact, act=lambda x:x)
            vf = fc(h4, 'v', 1, act=lambda x:x)

        v0 = vf[:, 0]
        a0 = sample(pi)
        self.initial_state = [] #not stateful

        def step(ob, *_args, **_kwargs):
            a, v = sess.run([a0, v0], {X:ob})
            return a, v, [] #dummy state

        def value(ob, *_args, **_kwargs):
            return sess.run(v0, {X:ob})

        self.X = X
        self.pi = pi
        self.vf = vf
        self.step = step
        self.value = value
Exemple #8
0
    def network_fn(X):
        x = tf.cast(X, tf.float32) / 255.

        init = {'init_scale': np.sqrt(2)}

        x = conv(x, 'c1', nf=32, rf=8, stride=4, **init)
        x = tf.nn.relu(x)
        with tf.variable_scope('h1'):
            h1 = fls_module(x)
        x = x * h1

        x = conv(x, 'c2', nf=64, rf=4, stride=2, **init)
        x = tf.nn.relu(x)

        x = conv(x, 'c3', nf=64, rf=3, stride=1, **init)
        x = tf.nn.relu(x)

        x = conv_to_fc(x)

        x = final_linear(x)

        return NetworkOutput(
            policy_latent=x,
            recurrent_tensors=None,
            extra=h1,
        )
Exemple #9
0
def nature_cnn(unscaled_images, keep_probs, **conv_kwargs):
    """
    CNN from Nature paper.
    """
    # scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
    activ = tf.nn.relu
    h = activ(
        conv(unscaled_images,
             'c1',
             nf=32,
             rf=3,
             stride=1,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h2 = activ(
        conv(h,
             'c2',
             nf=64,
             rf=3,
             stride=1,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = activ(
        conv(h2,
             'c3',
             nf=64,
             rf=3,
             stride=1,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = conv_to_fc(h3)
    h4 = activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))

    return tf.nn.dropout(h4, keep_prob=keep_probs)
Exemple #10
0
def nature_cnn(input_shape, **conv_kwargs):
    """
    CNN from Nature paper.
    """
    print('input shape is {}'.format(input_shape))
    x_input = tf.keras.Input(shape=input_shape, dtype=tf.uint8)
    h = x_input
    h = tf.cast(h, tf.float32) / 255.
    h = conv('c1',
             nf=32,
             rf=8,
             stride=4,
             activation='relu',
             init_scale=np.sqrt(2))(h)
    h2 = conv('c2',
              nf=64,
              rf=4,
              stride=2,
              activation='relu',
              init_scale=np.sqrt(2))(h)
    h3 = conv('c3',
              nf=64,
              rf=3,
              stride=1,
              activation='relu',
              init_scale=np.sqrt(2))(h2)
    h3 = tf.keras.layers.Flatten()(h3)
    h3 = tf.keras.layers.Dense(units=512,
                               kernel_initializer=ortho_init(np.sqrt(2)),
                               name='fc1',
                               activation='relu')(h3)
    network = tf.keras.Model(inputs=[x_input], outputs=[h3])
    return network
Exemple #11
0
 def __call__(self, obs, action, reuse=False):
     with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE):
         if self.network == 'mlp':
             x = tf.concat([obs, action], axis=-1) # this assumes observation and action can be concatenated
             x = self.network_builder(x)
             x = tf.layers.dense(x, 1, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3), name='output')
         elif self.network == 'cloth_cnn':
             # --------------------------------------------------------------
             # To make this a little easier, here we're just going to explicitly design our net.
             # ASSUME WE ALREADY DIVIDE BY 255 IN DDPG.PY, so obs uses tf.float32.
             # This design is up to debate. For example we really should crop the net.
             # We actually have a `cloth_cnn` but that's for PPO w/only states as input.
             # --------------------------------------------------------------
             activ = tf.nn.tanh
             h = obs
             with tf.variable_scope("convnet"):
                 h = activ(conv(h, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)))
                 h = activ(conv(h, 'c2', nf=32, rf=4, stride=2, init_scale=np.sqrt(2)))
                 h = activ(conv(h, 'c3', nf=32, rf=3, stride=1, init_scale=np.sqrt(2)))
                 h = conv_to_fc(h)
             with tf.variable_scope("fcnet"):
                 h = tf.concat([h, action], axis=-1) # this assumes observation and action can be concatenated
                 h = activ(tf.layers.dense(h, 200, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3), name='fc1'))
                 x = tf.layers.dense(h, 1, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3), name='output')
         else:
             raise ValueError(self.network)
     return x
Exemple #12
0
    def network_fn(X):
        x = tf.cast(X, tf.float32) / 255.

        init = {'init_scale': np.sqrt(2)}

        x = tf.pad(x, ((0, 0), (1, 1), (1, 1), (0, 0)))
        x = sparse_block(x)
        x = tf.nn.l2_normalize(x, axis=3)

        h = conv(x, 'h1', nf=512, rf=1, stride=1, pad='SAME', **init)
        h = tf.nn.elu(h)
        h = conv(h, 'h2', nf=2, rf=1, stride=1, pad='SAME', **init)
        h = softmax_pixelwise(h)
        h = tf.reduce_sum(h, axis=-1, keepdims=True)

        x = x * h
        x = tf.reshape(x, (-1, 3136))

        x = final_linear(x)

        return NetworkOutput(
            policy_latent=x,
            recurrent_tensors=None,
            extra=h,
        )
Exemple #13
0
def dcgan_cnn(images, dout, **conv_kwargs):
    activ = lambda name, inpt: tf.nn.leaky_relu(batch_norm(name, inpt),
                                                alpha=0.2)
    l1 = activ(
        'l1',
        conv(images,
             'c1',
             nf=32,
             rf=8,
             stride=4,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    l2 = activ(
        'l2',
        conv(l1,
             'c2',
             nf=64,
             rf=4,
             stride=2,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    l3 = activ(
        'l3',
        conv(l2,
             'c3',
             nf=64,
             rf=3,
             stride=1,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h = fc(conv_to_fc(l3), nh=512, scope='final')
    out = activ('out', fc(h, 'fc1', nh=dout, init_scale=np.sqrt(2)))
    return out, l3.shape
Exemple #14
0
 def network_fn(X):
     #[<tf.Tensor 'ppo2_model/Ob:0' shape=(1, 6) dtype=float32>, <tf.Tensor 'ppo2_model/Ob_1:0' shape=(1, 50, 50, 3) dtype=float32>]
     forces, im = X
     activ = tf.nn.relu
     im = activ(
         conv(im,
              'c1',
              nf=8,
              rf=2,
              stride=2,
              init_scale=np.sqrt(2),
              **conv_kwargs))
     #h = activ(tf.layers.MaxPooling2D(2,2)(h))
     im = activ(
         conv(im,
              'c2',
              nf=32,
              rf=2,
              stride=2,
              init_scale=np.sqrt(2),
              **conv_kwargs))
     #h = activ(tf.layers.MaxPooling2D(2,2)(h))
     im = conv_to_fc(im)
     h = tf.concat([im, forces], axis=1)
     h = activ(fc(h, 'fc1', nh=30, init_scale=np.sqrt(2)))
     h = activ(fc(h, 'fc2', nh=6, init_scale=np.sqrt(2)))
     return h
Exemple #15
0
def dcgan_cnn(unscaled_images, **conv_kwargs):
    scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
    activ = lambda name, inpt: tf.nn.leaky_relu(batch_norm(inpt, name))
    h = activ(
        'l1',
        conv(scaled_images,
             'c1',
             nf=32,
             rf=8,
             stride=4,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h2 = activ(
        'l2',
        conv(h,
             'c2',
             nf=64,
             rf=4,
             stride=2,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = activ(
        'l3',
        conv(h2,
             'c3',
             nf=64,
             rf=3,
             stride=1,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    return conv_to_fc(h3)
Exemple #16
0
    def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, reuse=False):
        nbatch = nenv*nsteps
        nh, nw, nc = ob_space.shape
        ob_shape = (nbatch, nh, nw, nc*nstack)
        nact = ac_space.n
        X = tf.placeholder(tf.uint8, ob_shape) #obs
        with tf.variable_scope("model", reuse=reuse):
            h = conv(tf.cast(X, tf.float32)/255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2))
            h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))
            h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2))
            h3 = conv_to_fc(h3)
            h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2))
            pi = fc(h4, 'pi', nact, act=lambda x:x)
            vf = fc(h4, 'v', 1, act=lambda x:x)

        v0 = vf[:, 0]
        a0 = sample(pi)
        self.initial_state = [] #not stateful

        def step(ob, *_args, **_kwargs):
            a, v = sess.run([a0, v0], {X:ob})
            return a, v, [] #dummy state

        def value(ob, *_args, **_kwargs):
            return sess.run(v0, {X:ob})

        self.X = X
        self.pi = pi
        self.vf = vf
        self.step = step
        self.value = value
Exemple #17
0
    def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, reuse=False): #pylint: disable=W0613
        nh, nw, nc = ob_space.shape
        ob_shape = (nbatch, nh, nw, nc)
        nact = ac_space.n
        X = tf.placeholder(tf.uint8, ob_shape) #obs
        with tf.variable_scope("model", reuse=reuse):
            h = conv(tf.cast(X, tf.float32)/255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2))
            h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))
            h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2))
            h3 = conv_to_fc(h3)
            h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2))
            pi = fc(h4, 'pi', nact, act=lambda x:x, init_scale=0.01)
            vf = fc(h4, 'v', 1, act=lambda x:x)[:,0]

        self.pdtype = make_pdtype(ac_space)
        self.pd = self.pdtype.pdfromflat(pi)

        a0 = self.pd.sample()
        neglogp0 = self.pd.neglogp(a0)
        self.initial_state = None

        def step(ob, *_args, **_kwargs):
            a, v, neglogp = sess.run([a0, vf, neglogp0], {X:ob})
            return a, v, self.initial_state, neglogp

        def value(ob, *_args, **_kwargs):
            return sess.run(vf, {X:ob})

        self.X = X
        self.pi = pi
        self.vf = vf
        self.step = step
        self.value = value
Exemple #18
0
def nature_cnn(unscaled_images, **conv_kwargs):
    """
    CNN from Nature paper.
    """
    scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
    activ = tf.nn.relu
    h = activ(
        conv(scaled_images,
             'c1',
             nf=32,
             rf=8,
             stride=4,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h2 = activ(
        conv(h,
             'c2',
             nf=64,
             rf=4,
             stride=2,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = activ(
        conv(h2,
             'c3',
             nf=64,
             rf=3,
             stride=1,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = conv_to_fc(h3)
    return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
Exemple #19
0
def nature_cnn(scaled_images, **conv_kwargs):
    """
    Model used in the paper "Human-level control through deep reinforcement learning" 
    https://www.nature.com/articles/nature14236
    """
    def activ(curr):
        return tf.nn.relu(curr)

    h = activ(
        conv(scaled_images,
             'c1',
             nf=32,
             rf=8,
             stride=4,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h2 = activ(
        conv(h,
             'c2',
             nf=64,
             rf=4,
             stride=2,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = activ(
        conv(h2,
             'c3',
             nf=64,
             rf=3,
             stride=1,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = conv_to_fc(h3)
    return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
Exemple #20
0
    def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, reuse=False):  #pylint: disable=W0613
        if isinstance(ac_space, gym.spaces.Discrete):
            self.is_discrete = True
        else:
            self.is_discrete = False

        ob_shape = (nbatch, ) + ob_space.shape
        if self.is_discrete:
            actdim = ac_space.n
        else:
            actdim = ac_space.shape[0]

        X = tf.placeholder(tf.float32, ob_shape, name='Ob')  #obs
        with tf.variable_scope("model", reuse=reuse):
            h_c = conv(X, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2))
            h2_c = conv(h_c,
                        'c2',
                        nf=64,
                        rf=4,
                        stride=2,
                        init_scale=np.sqrt(2))
            h2_c = conv_to_fc(h_c)
            h1 = fc(h2_c, 'pi_fc1', nh=64, init_scale=np.sqrt(2), act=tf.tanh)
            h2 = fc(h1, 'pi_fc2', nh=64, init_scale=np.sqrt(2), act=tf.tanh)
            pi = fc(h2, 'pi', actdim, init_scale=0.01)
            h1 = fc(h2_c, 'vf_fc1', nh=64, init_scale=np.sqrt(2), act=tf.tanh)
            h2 = fc(h1, 'vf_fc2', nh=64, init_scale=np.sqrt(2), act=tf.tanh)
            vf = fc(h2, 'vf', 1)[:, 0]
            logstd = tf.get_variable(name="logstd",
                                     shape=[1, actdim],
                                     initializer=tf.zeros_initializer())

        pdparam = tf.concat([pi, pi * 0.0 + logstd], axis=1)

        self.pdtype = make_pdtype(ac_space)
        if self.is_discrete:
            self.pd = self.pdtype.pdfromflat(pi)
            a0 = self.pd.sample()
        else:
            self.pd = self.pdtype.pdfromflat(pdparam)
            a0 = self.pd.sample()

        neglogp0 = self.pd.neglogp(a0)
        self.initial_state = None

        def step(ob, *_args, **_kwargs):
            a, v, neglogp = sess.run([a0, vf, neglogp0],
                                     {X: ob.astype(np.float32) / 255.0})
            a = a[0]
            return a, v, self.initial_state, neglogp

        def value(ob, *_args, **_kwargs):
            return sess.run(vf, {X: ob})

        self.X = X
        self.pi = pi
        self.vf = vf
        self.step = step
        self.value = value
Exemple #21
0
    def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, reuse=False, is_discrete=False):    #pylint: disable=W0613
        if isinstance(ac_space, gym.spaces.Discrete):
            self.is_discrete = True
        else:
            self.is_discrete = False

        print("nbatch%d" % (nbatch))

        nh, nw, nc = ob_space.shape
        ob_shape = (nbatch, nh, nw, nc)
        if self.is_discrete:
            nact = ac_space.n
        else:
            nact = ac_space.shape[0]
        X = tf.placeholder(tf.float32, ob_shape)    #obs

        with tf.variable_scope("model", reuse=reuse):
            h = conv(X, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2))
            h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))
            h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2))
            h3 = conv_to_fc(h3)
            h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2))
            h5 = fc(h3, 'fc_vf', nh=512, init_scale=np.sqrt(2))

            pi = fc(h4, 'pi', nact, init_scale=0.05)
            vf = fc(h5, 'v', 1, act=lambda x: x)[:, 0]

            if not self.is_discrete:
                logstd = tf.get_variable(name="logstd",
                                         shape=[1, nact],
                                         initializer=tf.zeros_initializer())

        self.pdtype = make_pdtype(ac_space)
        if self.is_discrete:
            self.pd = self.pdtype.pdfromflat(pi)
            a0 = self.pd.sample()
        else:
            pdparam = tf.concat([pi, pi * 0.0 + logstd], axis=1)
            self.pd = self.pdtype.pdfromflat(pdparam)
            a0 = self.pd.sample()

        neglogp0 = self.pd.neglogp(a0)
        self.initial_state = None

        def step(ob, *_args, **_kwargs):
            a, v, neglogp = sess.run([a0, vf, neglogp0], {X: ob.astype(np.float32) / 255.0})
            assert (a.shape[0] == 1)    # make sure a = a[0] don't throw away actions
            a = a[0]
            print(a, v, neglogp)
            return a, v, self.initial_state, neglogp

        def value(ob, *_args, **_kwargs):
            return sess.run(vf, {X: ob})

        self.X = X
        self.pi = pi
        self.vf = vf
        self.step = step
        self.value = value
Exemple #22
0
def nature_cnn(X):
    #x1 = tf.nn.relu(conv2d(X, 128, 'x1', filter_size=(8,8), stride=(4,4)))
    #x2 = tf.nn.relu(conv2d(x1, 64, 'x2', filter_size=(4,4), stride=(2,2)))
    #x3 = tf.nn.relu(conv2d(x2, 64, 'x3', filter_size=(4,4), stride=(2,2)))
    x1 = tf.nn.relu(conv(X, 'x1', nf=128, rf=8, stride=4, init_scale=1.0))
    x2 = tf.nn.relu(conv(x1, 'x2', nf=64, rf=4, stride=2, init_scale=1.0))
    x3 = tf.nn.relu(conv(x2, 'x3', nf=64, rf=4, stride=2, init_scale=1.0))
    return x3
Exemple #23
0
    def network_fn(X, mode="pi"):
        filtered_conv_kwargs = {}

        def filter_kwargs(k):
            if k in conv_kwargs.keys():
                filtered_conv_kwargs[k] = conv_kwargs[k]

        filter_kwargs("pad")
        filter_kwargs("data_format")
        filter_kwargs("one_dim_bias")

        scaled_images = tf.cast(X, tf.float32) / 255.
        activ = tf.nn.relu
        bn = tf.contrib.layers.batch_norm
        drp = tf.nn.dropout

        def addbndrp(h):
            if (batchnormpi and mode == "pi") or (batchnormvf
                                                  and mode == "vf"):
                h = bn(h,
                       center=True,
                       scale=True,
                       is_training=isbnpitrainmode
                       if mode == "pi" else isbnvftrainmode,
                       updates_collections=None)
            h = activ(h)
            if (dropoutpi < 1.0 and mode == "pi"):
                h = drp(h, keep_prob=dropoutpi_keep_prob)
            if (dropoutvf < 1.0 and mode == "vf"):
                h = drp(h, keep_prob=dropoutvf_keep_prob)
            return h

        h = addbndrp(
            conv(scaled_images,
                 'c1',
                 nf=32,
                 rf=8,
                 stride=4,
                 init_scale=np.sqrt(2),
                 **filtered_conv_kwargs))
        h2 = addbndrp(
            conv(h,
                 'c2',
                 nf=64,
                 rf=4,
                 stride=2,
                 init_scale=np.sqrt(2),
                 **filtered_conv_kwargs))
        h3 = addbndrp(
            conv(h2,
                 'c3',
                 nf=64,
                 rf=3,
                 stride=1,
                 init_scale=np.sqrt(2),
                 **filtered_conv_kwargs))
        h3 = conv_to_fc(h3)
        return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
 def network_fn(X):
     h = conv(X, 'c1', nf=16, rf=8, stride=4, pad="VALID")
     h = tf.nn.relu(h)
     h = conv(h, 'c2', nf=32, rf=4, stride=2, pad="VALID")
     h = tf.nn.relu(h)
     h = conv_to_fc(h)
     h = tf.nn.relu(fc(h, 'fc1', nh=30))
     h = fc(h, 'fc3', nh=net_kwargs["nactions"])
     return h
 def network_fn(X):
     h = tf.nn.relu(conv(X, 'c1', nf=64, rf=3, stride=2, pad="VALID"))
     h = tf.nn.relu(conv(h, 'c2', nf=64, rf=3, stride=2, pad="VALID"))
     h = conv_to_fc(h)
     h = tf.nn.relu(fc(h, 'fc1', nh=128))
     h = tf.nn.relu(fc(h, 'fc2', nh=64))
     h = tf.nn.relu(fc(h, 'fc4', nh=32))
     h = fc(h, 'fc5', nh=net_kwargs["nactions"])
     return h
Exemple #26
0
    def network_fn(X):
        h = tf.cast(X, tf.float32) / 255.

        activ = tf.nn.relu
        h = activ(conv(h, 'c1', nf=8, rf=8, stride=4, init_scale=np.sqrt(2), **conv_kwargs))
        h = activ(conv(h, 'c2', nf=16, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs))
        h = conv_to_fc(h)
        h = activ(fc(h, 'fc1', nh=128, init_scale=np.sqrt(2)))
        return h
def feature_net(unscaled_images):
    scaled_images = tf.cast(unscaled_images, tf.float32)  # / 255.
    activ = tf.nn.relu
    h = activ(
        conv(scaled_images, 'c1', nf=8, rf=8, stride=4, init_scale=np.sqrt(2)))
    h2 = activ(conv(h, 'c2', nf=16, rf=4, stride=2, init_scale=np.sqrt(2)))
    h3 = activ(conv(h2, 'c3', nf=16, rf=3, stride=1, init_scale=np.sqrt(2)))
    h3 = conv_to_fc(h3)
    return activ(fc(h3, 'fc1', nh=128, init_scale=np.sqrt(2)))
Exemple #28
0
def sparse_block(x, relu=True):
    init = {'init_scale': np.sqrt(2)}
    x = conv(x, 'c1', nf=32, rf=8, stride=4, **init)
    x = tf.nn.relu(x)
    x = conv(x, 'c2', nf=64, rf=4, stride=2, **init)
    x = tf.nn.relu(x)
    x = conv(x, 'c3', nf=64, rf=3, stride=1, **init)
    if relu:
        x = tf.nn.relu(x)
    return x
Exemple #29
0
def dense_block(x, relu=True):
    init = {'init_scale': np.sqrt(2)}
    x = conv(x, 'c1', nf=32, rf=7, stride=1, pad='SAME', **init)
    x = tf.nn.relu(x)
    x = conv(x, 'c2', nf=64, rf=5, stride=1, pad='SAME', **init)
    x = tf.nn.relu(x)
    x = conv(x, 'c3', nf=64, rf=3, stride=1, pad='SAME', **init)
    if relu:
        x = tf.nn.relu(x)
    return x
Exemple #30
0
 def preprocess(X):
     h = conv(tf.cast(X, tf.float32) / 255.,
              'c1',
              nf=32,
              rf=8,
              stride=4,
              init_scale=np.sqrt(2))
     h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))
     h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2))
     return conv_to_fc(h3)
Exemple #31
0
def lenet_cnn(scaled_images, **conv_kwargs):
    activ = tf.nn.leaky_relu
    c1 = activ(conv(scaled_images, 'c1', nf=6, rf=5, stride=1, init_scale=np.sqrt(2),
                    **conv_kwargs))
    p1 = tf.nn.max_pool(c1, [1, 2, 2, 1], [1, 1, 1, 1], padding='SAME', name='p1')
    c2 = activ(conv(p1, 'c2', nf=16, rf=5, stride=1, init_scale=np.sqrt(2), **conv_kwargs))
    p2 = tf.nn.max_pool(c2, [1, 2, 2, 1], [1, 1, 1, 1], padding='SAME', name='p2')
    c3 = activ(conv(p2, 'c3', nf=120, rf=5, stride=1, init_scale=np.sqrt(2), **conv_kwargs))
    h1 = conv_to_fc(c3)
    return activ(fc(h1, 'fc1', nh=128, init_scale=np.sqrt(2)))
Exemple #32
0
def nature_cnn(unscaled_images):
    """
    CNN from Nature paper.
    """
    scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
    activ = tf.nn.relu
    h = activ(conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)))
    h2 = activ(conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)))
    h3 = activ(conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2)))
    h3 = conv_to_fc(h3)
    return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
Exemple #33
0
    def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, nlstm=256, reuse=False):
        nbatch = nenv*nsteps
        nh, nw, nc = ob_space.shape
        ob_shape = (nbatch, nh, nw, nc*nstack)
        nact = ac_space.n
        X = tf.placeholder(tf.uint8, ob_shape) #obs
        M = tf.placeholder(tf.float32, [nbatch]) #mask (done t-1)
        S = tf.placeholder(tf.float32, [nenv, nlstm*2]) #states
        with tf.variable_scope("model", reuse=reuse):
            h = conv(tf.cast(X, tf.float32)/255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2))
            h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))
            h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2))
            h3 = conv_to_fc(h3)
            h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2))
            xs = batch_to_seq(h4, nenv, nsteps)
            ms = batch_to_seq(M, nenv, nsteps)
            h5, snew = lnlstm(xs, ms, S, 'lstm1', nh=nlstm)
            h5 = seq_to_batch(h5)
            pi = fc(h5, 'pi', nact, act=lambda x:x)
            vf = fc(h5, 'v', 1, act=lambda x:x)

        v0 = vf[:, 0]
        a0 = sample(pi)
        self.initial_state = np.zeros((nenv, nlstm*2), dtype=np.float32)

        def step(ob, state, mask):
            a, v, s = sess.run([a0, v0, snew], {X:ob, S:state, M:mask})
            return a, v, s

        def value(ob, state, mask):
            return sess.run(v0, {X:ob, S:state, M:mask})

        self.X = X
        self.M = M
        self.S = S
        self.pi = pi
        self.vf = vf
        self.step = step
        self.value = value
Exemple #34
0
def nature_cnn(unscaled_images):
    """Convolutional parts of CNN from Nature paper
    
    Taken from baselines.ppo2.policies.py
    
    Parameters
    -------
    unscaled_images : tensorflow tensor
        Frame of shape (batchsize, x, y, c)
    
    Returns
    -------
    tensorflow tensor
        Output features of last convolutional layer with flattened x/y/c dimensions
    """
    scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
    activ = tf.nn.relu
    h = activ(conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)))
    h2 = activ(conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)))
    h3 = activ(conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2)))
    h3 = conv_to_fc(h3)
    return h3
Exemple #35
0
  def __init__(self,
               sess,
               ob_space,
               ac_space,
               nenv,
               nsteps,
               nstack,
               reuse=False):
    nbatch = nenv * nsteps
    nh, nw, nc = (32, 32, 3)
    ob_shape = (nbatch, nh, nw, nc * nstack)
    nact = 3  # 524
    # nsub3 = 2
    # nsub4 = 5
    # nsub5 = 10
    # nsub6 = 4
    # nsub7 = 2
    # nsub8 = 4
    # nsub9 = 500
    # nsub10 = 4
    # nsub11 = 10
    # nsub12 = 500

    # (64, 64, 13)
    # 80 * 24

    X = tf.placeholder(tf.uint8, ob_shape)  #obs
    with tf.variable_scope("model", reuse=reuse):
      with tf.variable_scope("common", reuse=reuse):
        h = conv(
            tf.cast(X, tf.float32),
            'c1',
            nf=32,
            rf=5,
            stride=1,
            init_scale=np.sqrt(2),
            pad="SAME")  # ?, 32, 32, 16
        h2 = conv(
            h,
            'c2',
            nf=64,
            rf=3,
            stride=1,
            init_scale=np.sqrt(2),
            pad="SAME")  # ?, 32, 32, 32

      with tf.variable_scope("pi1", reuse=reuse):
        h3 = conv_to_fc(h2)  # 131072
        h4 = fc(h3, 'fc1', nh=256, init_scale=np.sqrt(2))  # ?, 256
        pi_ = fc(
            h4, 'pi', nact)  # ( nenv * nsteps, 524) # ?, 524
        pi = tf.nn.softmax(pi_)

        vf = fc(
            h4, 'v', 1)  # ( nenv * nsteps, 1) # ?, 1

      # vf = tf.nn.l2_normalize(vf_, 1)

      with tf.variable_scope("xy0", reuse=reuse):
        # 1 x 1 convolution for dimensionality reduction
        pi_xy0_ = conv(
            h2, 'xy0', nf=1, rf=1, stride=1,
            init_scale=np.sqrt(2))  # (? nenv * nsteps, 32, 32, 1)
        pi_xy0__ = conv_to_fc(pi_xy0_)  # 32 x 32 => 1024
        pi_xy0 = tf.nn.softmax(pi_xy0__)


      with tf.variable_scope("xy1", reuse=reuse):
        pi_xy1_ = conv(
            h2, 'xy1', nf=1, rf=1, stride=1,
            init_scale=np.sqrt(2))  # (? nenv * nsteps, 32, 32, 1)
        pi_xy1__ = conv_to_fc(pi_xy1_)  # 32 x 32 => 1024
        pi_xy1 = tf.nn.softmax(pi_xy1__)

    v0 = vf[:, 0]
    a0 = sample(pi)
    self.initial_state = []  #not stateful

    def step(ob, *_args, **_kwargs):
      #obs, states, rewards, masks, actions, actions2, x1, y1, x2, y2, values
      _pi1, _xy0, _xy1, _v = sess.run([pi, pi_xy0, pi_xy1, v0], {X: ob})
      return _pi1, _xy0, _xy1, _v, []  #dummy state

    def value(ob, *_args, **_kwargs):
      return sess.run(v0, {X: ob})

    self.X = X
    self.pi = pi
    # self.pi_sub3 = pi_sub3
    # self.pi_sub4 = pi_sub4
    # self.pi_sub5 = pi_sub5
    # self.pi_sub6 = pi_sub6
    # self.pi_sub7 = pi_sub7
    # self.pi_sub8 = pi_sub8
    # self.pi_sub9 = pi_sub9
    # self.pi_sub10 = pi_sub10
    # self.pi_sub11 = pi_sub11
    # self.pi_sub12 = pi_sub12
    self.pi_xy0 = pi_xy0
    self.pi_xy1 = pi_xy1
    # self.pi_y0 = pi_y0
    # self.pi_x1 = pi_x1
    # self.pi_y1 = pi_y1
    # self.pi_x2 = pi_x2
    # self.pi_y2 = pi_y2
    self.vf = vf
    self.step = step
    self.value = value