Esempio n. 1
0
def mujoco_cnn(unscaled_images, name, nbatch, add_flownet,
               unscaled_previous_images, flownet,
               train_from_scratch,
               large_cnn, diff_frames):
    """
    CNN from Nature paper.
    """
    # scaling image input
    scaled_images = tf.cast(unscaled_images, tf.float32) / 255.

    if add_flownet:
        # concatenating scaled_images with flow from flownet

        assert unscaled_previous_images is not None
        assert flownet is not None

        scaled_previous_images = tf.cast(unscaled_previous_images, tf.float32) / 255.

        img_stack = tf.concat([scaled_previous_images, scaled_images], axis=3)

        flow, _ = flownet(img_stack, trainable=train_from_scratch, size=nbatch)

        if not train_from_scratch:
            flow = tf.stop_gradient(flow)

        scaled_images = tf.concat([flow, scaled_images], axis=3)

    if diff_frames:
        half_size = scaled_images.get_shape().as_list()[-1] // 2
        img, pre_img = tf.split(scaled_images, [half_size, half_size], axis=3)
        pre_img = pre_img - img
        scaled_images = tf.concat([img, pre_img], axis=3)

    activ = tf.nn.relu

    if not large_cnn:
        h = activ(conv(scaled_images, name + '_c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)))
        h2 = activ(conv(h, name + '_c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)))
        h3 = activ(conv(h2, name + '_c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2)))
        return conv_to_fc(h3)
    else:
        h = activ(conv(scaled_images, name + '_c1', nf=32, rf=3, stride=1, init_scale=np.sqrt(2)))
        skip = conv(h, name + '_c2', nf=64, rf=3, stride=2, init_scale=np.sqrt(2))
        h = activ(skip)
        h = activ(conv(h, name + '_c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), pad='SAME'))
        h = activ(conv(h, name + '_c4', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), pad='SAME') + skip)

        skip = conv(h, name + '_c5', nf=64, rf=3, stride=2, init_scale=np.sqrt(2))
        h = activ(skip)

        h = activ(conv(h, name + '_c6', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), pad='SAME'))
        h = activ(conv(h, name + '_c7', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), pad='SAME') + skip)

        h = activ(conv(h, name + '_c8', nf=64, rf=3, stride=2, init_scale=np.sqrt(2)))
        h = conv_to_fc(h)

        return activ(fc(h, 'fc1', nh=110, init_scale=np.sqrt(2)))
Esempio n. 2
0
 def network_fn(X):
     activ = tf.nn.relu
     h = activ(
         conv(X,
              'c1',
              nf=32,
              rf=3,
              stride=1,
              pad='VALID',
              init_scale=np.sqrt(2),
              **conv_kwargs))
     h2 = activ(
         conv(h,
              'c2',
              nf=32,
              rf=3,
              stride=1,
              pad='VALID',
              init_scale=np.sqrt(2),
              **conv_kwargs))
     h3 = activ(
         conv(h2,
              'c3',
              nf=32,
              rf=3,
              stride=1,
              pad='VALID',
              init_scale=np.sqrt(2),
              **conv_kwargs))
     h4 = activ(
         conv(h3,
              'c4',
              nf=64,
              rf=3,
              stride=1,
              pad='VALID',
              init_scale=np.sqrt(2),
              **conv_kwargs))
     h5 = activ(
         conv(h4,
              'c5',
              nf=64,
              rf=2,
              stride=1,
              pad='VALID',
              init_scale=np.sqrt(2),
              **conv_kwargs))
     h5 = conv_to_fc(h5)
     x = conv_to_fc(X)
     x = tf.concat(axis=1, values=[x, h5])
     return activ(fc(x, 'fc1', nh=1, init_scale=np.sqrt(2)))
Esempio n. 3
0
    def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, reuse=False):
        ob_shape = (nbatch,) + ob_space.shape
        nact = ac_space.n
        # observation input
        X = tf.placeholder(tf.float32, ob_shape, name='Ob') #obs
        # scale input by maximum (=3 for multiple snakes)
        Xscaled = X / 3
        with tf.variable_scope("model", reuse=reuse):
            activ = tf.nn.relu
            # policy network
            # 2 conv layers
            ph1 = activ(conv(Xscaled, 'pi_c1', nf=16, rf=3, stride=1, init_scale=np.sqrt(2)))
            ph2 = activ(conv(ph1, 'pi_c2', nf=32, rf=3, stride=1, init_scale=np.sqrt(2)))
            # convert to fully connected layer
            ph2 = conv_to_fc(ph2)
            # one fully connected hidden layer
            ph3 = activ(fc(ph2, 'pi_fc', nh=256, init_scale=np.sqrt(2)))
            # linear output layer to policy logits
            # initialize with small weights to ensure large initial policy entropy
            pi = fc(ph3, 'pi', nact, init_scale=0.01)
            # value network
            # 2 conv layers
            vh1 = activ(conv(Xscaled, 'vf_c1', nf=16, rf=3, stride=1, init_scale=np.sqrt(2)))
            vh2 = activ(conv(vh1, 'vf_c2', nf=32, rf=3, stride=1, init_scale=np.sqrt(2)))
            # convert to fully connected layer
            vh2 = conv_to_fc(vh2)
            # one fully connected hidden layer
            vh3 = activ(fc(vh2, 'vf_fc', nh=128, init_scale=np.sqrt(2)))
            # linear output of value function
            vf = fc(vh3, 'vf', 1)[:,0]

        self.pdtype = make_pdtype(ac_space)
        self.pd = self.pdtype.pdfromflat(pi)

        a0 = self.pd.sample()
        neglogp0 = self.pd.neglogp(a0)
        self.initial_state = None

        def step(ob, *_args, **_kwargs):
            a, v, neglogp = sess.run([a0, vf, neglogp0], {X:ob})
            return a, v, self.initial_state, neglogp

        def value(ob, *_args, **_kwargs):
            return sess.run(vf, {X:ob})

        self.X = X
        self.pi = pi
        self.vf = vf
        self.step = step
        self.value = value
Esempio n. 4
0
def nature_cnn(unscaled_images, keep_probs, **conv_kwargs):
    """
    CNN from Nature paper.
    """
    # scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
    activ = tf.nn.relu
    h = activ(
        conv(unscaled_images,
             'c1',
             nf=32,
             rf=3,
             stride=1,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h2 = activ(
        conv(h,
             'c2',
             nf=64,
             rf=3,
             stride=1,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = activ(
        conv(h2,
             'c3',
             nf=64,
             rf=3,
             stride=1,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = conv_to_fc(h3)
    h4 = activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))

    return tf.nn.dropout(h4, keep_prob=keep_probs)
Esempio n. 5
0
    def network_fn(X):
        x = tf.cast(X, tf.float32) / 255.

        init = {'init_scale': np.sqrt(2)}

        x = conv(x, 'c1', nf=32, rf=8, stride=4, **init)
        x = tf.nn.relu(x)
        with tf.variable_scope('h1'):
            h1 = fls_module(x)
        x = x * h1

        x = conv(x, 'c2', nf=64, rf=4, stride=2, **init)
        x = tf.nn.relu(x)

        x = conv(x, 'c3', nf=64, rf=3, stride=1, **init)
        x = tf.nn.relu(x)

        x = conv_to_fc(x)

        x = final_linear(x)

        return NetworkOutput(
            policy_latent=x,
            recurrent_tensors=None,
            extra=h1,
        )
Esempio n. 6
0
def dcgan_cnn(images, dout, **conv_kwargs):
    activ = lambda name, inpt: tf.nn.leaky_relu(batch_norm(name, inpt),
                                                alpha=0.2)
    l1 = activ(
        'l1',
        conv(images,
             'c1',
             nf=32,
             rf=8,
             stride=4,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    l2 = activ(
        'l2',
        conv(l1,
             'c2',
             nf=64,
             rf=4,
             stride=2,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    l3 = activ(
        'l3',
        conv(l2,
             'c3',
             nf=64,
             rf=3,
             stride=1,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h = fc(conv_to_fc(l3), nh=512, scope='final')
    out = activ('out', fc(h, 'fc1', nh=dout, init_scale=np.sqrt(2)))
    return out, l3.shape
Esempio n. 7
0
 def network_fn(X):
     #[<tf.Tensor 'ppo2_model/Ob:0' shape=(1, 6) dtype=float32>, <tf.Tensor 'ppo2_model/Ob_1:0' shape=(1, 50, 50, 3) dtype=float32>]
     forces, im = X
     activ = tf.nn.relu
     im = activ(
         conv(im,
              'c1',
              nf=8,
              rf=2,
              stride=2,
              init_scale=np.sqrt(2),
              **conv_kwargs))
     #h = activ(tf.layers.MaxPooling2D(2,2)(h))
     im = activ(
         conv(im,
              'c2',
              nf=32,
              rf=2,
              stride=2,
              init_scale=np.sqrt(2),
              **conv_kwargs))
     #h = activ(tf.layers.MaxPooling2D(2,2)(h))
     im = conv_to_fc(im)
     h = tf.concat([im, forces], axis=1)
     h = activ(fc(h, 'fc1', nh=30, init_scale=np.sqrt(2)))
     h = activ(fc(h, 'fc2', nh=6, init_scale=np.sqrt(2)))
     return h
Esempio n. 8
0
def nature_cnn(unscaled_images, **conv_kwargs):
    """
    CNN from Nature paper.
    """
    scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
    activ = tf.nn.relu
    h = activ(
        conv(
            scaled_images,
            'c1',
            nf=32,
            rf=8,
            stride=4,
            init_scale=np.sqrt(
                2
            ),  # 8x8 filter size is common on the very 1st conv layer, looking at the input image
            **conv_kwargs))
    h2 = activ(
        conv(h,
             'c2',
             nf=64,
             rf=4,
             stride=2,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = activ(
        conv(h2,
             'c3',
             nf=64,
             rf=3,
             stride=1,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = conv_to_fc(h3)
    return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
def custom_cnn(unscaled_images, **conv_kwargs):
    scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
    activ = tf.nn.leaky_relu
    h = activ(
        conv(scaled_images,
             'c1',
             nf=32,
             rf=8,
             stride=4,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h2 = activ(
        conv(h,
             'c2',
             nf=64,
             rf=4,
             stride=2,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = activ(
        conv(h2,
             'c3',
             nf=64,
             rf=3,
             stride=1,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = conv_to_fc(h3)
    return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
Esempio n. 10
0
 def __call__(self, obs, action, reuse=False):
     with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE):
         if self.network == 'mlp':
             x = tf.concat([obs, action], axis=-1) # this assumes observation and action can be concatenated
             x = self.network_builder(x)
             x = tf.layers.dense(x, 1, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3), name='output')
         elif self.network == 'cloth_cnn':
             # --------------------------------------------------------------
             # To make this a little easier, here we're just going to explicitly design our net.
             # ASSUME WE ALREADY DIVIDE BY 255 IN DDPG.PY, so obs uses tf.float32.
             # This design is up to debate. For example we really should crop the net.
             # We actually have a `cloth_cnn` but that's for PPO w/only states as input.
             # --------------------------------------------------------------
             activ = tf.nn.tanh
             h = obs
             with tf.variable_scope("convnet"):
                 h = activ(conv(h, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)))
                 h = activ(conv(h, 'c2', nf=32, rf=4, stride=2, init_scale=np.sqrt(2)))
                 h = activ(conv(h, 'c3', nf=32, rf=3, stride=1, init_scale=np.sqrt(2)))
                 h = conv_to_fc(h)
             with tf.variable_scope("fcnet"):
                 h = tf.concat([h, action], axis=-1) # this assumes observation and action can be concatenated
                 h = activ(tf.layers.dense(h, 200, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3), name='fc1'))
                 x = tf.layers.dense(h, 1, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3), name='output')
         else:
             raise ValueError(self.network)
     return x
Esempio n. 11
0
    def network_fn(X, nenv=1):
        nbatch = X.shape[0]
        nsteps = nbatch // nenv

        fm = nature_cnn(X, **conv_kwargs)
        fm_flat = conv_to_fc(fm)
        h = tf.nn.relu(fc(fm_flat, 'fc1', nh=nh, init_scale=np.sqrt(2)))

        M = tf.placeholder(tf.float32, [nbatch])  # mask (done t-1)
        S = tf.placeholder(tf.float32, [nenv, 2 * nlstm])  # states

        xs = batch_to_seq(h, nenv, nsteps)
        ms = batch_to_seq(M, nenv, nsteps)

        if layer_norm:
            h5, snew = utils.lnlstm(xs, ms, S, scope='lnlstm', nh=nlstm)
        else:
            h5, snew = utils.lstm(xs, ms, S, scope='lstm', nh=nlstm)

        h = seq_to_batch(h5)
        initial_state = np.zeros(S.shape.as_list(), dtype=float)

        return fm, h, {
            'S': S,
            'M': M,
            'state': snew,
            'initial_state': initial_state
        }
def CNN7(unscaled_images,index,filmObj):
    with slim.arg_scope([slim.conv2d, slim.separable_conv2d],
                        activation_fn=tf.nn.relu,
                        weights_initializer=tf.contrib.layers.variance_scaling_initializer()):
        scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
        activ = tf.nn.relu

        # w_1 = tf.slice(filmObj.film_w_1,index*32,[32])
        b_1 = tf.slice(filmObj.film_b_1,index*32,[32])
        # w_2 = tf.slice(filmObj.film_w_2,index*64,[64])
        b_2 = tf.slice(filmObj.film_b_2,index*64,[64])
        # w_3 = tf.slice(filmObj.film_w_3,index*48,[48])
        b_3 = tf.slice(filmObj.film_b_3,index*48,[48])

        h = slim.separable_conv2d(scaled_images, 32, 8, 1, 4)
        # h = tf.math.add(tf.multiply(h, temp['weights_1']), temp['bias_1'])
        h = tf.math.add(h, b_1)

        h2 = slim.separable_conv2d(h, 64, 4, 1, 2)
        # h2 = tf.math.add(tf.multiply(h2, temp['weights_2']), temp['bias_2'])
        h2 = tf.math.add(h2, b_2)
        
        h3 = slim.separable_conv2d(h2, 48, 3, 1, 1)
        # h3 = tf.math.add(tf.multiply(h3, temp['weights_3']), temp['bias_3'])
        h3 = tf.math.add(h3, b_3)

        h3 = conv_to_fc(h3)
        return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
Esempio n. 13
0
def cnn7(unscaled_images, **conv_kwargs):
    """
    Network 96x96:
    model/SeparableConv2d/depthwise_weights:0 (8, 8, 4, 1)
    model/SeparableConv2d/pointwise_weights:0 (1, 1, 4, 32)
    model/SeparableConv2d/biases:0 (32,)
    model/SeparableConv2d_1/depthwise_weights:0 (4, 4, 32, 1)
    model/SeparableConv2d_1/pointwise_weights:0 (1, 1, 32, 64)
    model/SeparableConv2d_1/biases:0 (64,)
    model/SeparableConv2d_2/depthwise_weights:0 (3, 3, 64, 1)
    model/SeparableConv2d_2/pointwise_weights:0 (1, 1, 64, 48)
    model/SeparableConv2d_2/biases:0 (48,)
    model/fc1/w:0 (6912, 512)
    model/fc1/b:0 (512,)
    model/v/w:0 (512, 1)
    model/v/b:0 (1,)
    model/pi/w:0 (512, 7)
    model/pi/b:0 (7,)
    Trainable variables:
    3550296
    """
    with slim.arg_scope([slim.conv2d, slim.separable_conv2d],
                        activation_fn=tf.nn.relu,
                        weights_initializer=tf.contrib.layers.variance_scaling_initializer()):
        scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
        activ = tf.nn.relu
        h = slim.separable_conv2d(scaled_images, 32, 8, 1, 4)
        h2 = slim.separable_conv2d(h, 64, 4, 1, 2)
        h3 = slim.separable_conv2d(h2, 48, 3, 1, 1)
        h3 = conv_to_fc(h3)
        return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
Esempio n. 14
0
    def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, reuse=False):
        nbatch = nenv*nsteps
        nh, nw, nc = ob_space.shape
        ob_shape = (nbatch, nh, nw, nc*nstack)
        nact = ac_space.n
        X = tf.placeholder(tf.uint8, ob_shape) #obs
        with tf.variable_scope("model", reuse=reuse):
            h = conv(tf.cast(X, tf.float32)/255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2))
            h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))
            h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2))
            h3 = conv_to_fc(h3)
            h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2))
            pi = fc(h4, 'pi', nact, act=lambda x:x)
            vf = fc(h4, 'v', 1, act=lambda x:x)

        v0 = vf[:, 0]
        a0 = sample(pi)
        self.initial_state = [] #not stateful

        def step(ob, *_args, **_kwargs):
            a, v = sess.run([a0, v0], {X:ob})
            return a, v, [] #dummy state

        def value(ob, *_args, **_kwargs):
            return sess.run(v0, {X:ob})

        self.X = X
        self.pi = pi
        self.vf = vf
        self.step = step
        self.value = value
Esempio n. 15
0
    def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, reuse=False):
        nbatch = nenv*nsteps
        nh, nw, nc = ob_space.shape
        ob_shape = (nbatch, nh, nw, nc*nstack)
        nact = ac_space.n
        X = tf.placeholder(tf.uint8, ob_shape) #obs
        with tf.variable_scope("model", reuse=reuse):
            h = conv(tf.cast(X, tf.float32)/255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2))
            h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))
            h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2))
            h3 = conv_to_fc(h3)
            h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2))
            pi = fc(h4, 'pi', nact, act=lambda x:x)
            vf = fc(h4, 'v', 1, act=lambda x:x)

        v0 = vf[:, 0]
        a0 = sample(pi)
        self.initial_state = [] #not stateful

        def step(ob, *_args, **_kwargs):
            a, v = sess.run([a0, v0], {X:ob})
            return a, v, [] #dummy state

        def value(ob, *_args, **_kwargs):
            return sess.run(v0, {X:ob})

        self.X = X
        self.pi = pi
        self.vf = vf
        self.step = step
        self.value = value
Esempio n. 16
0
def dcgan_cnn(unscaled_images, **conv_kwargs):
    scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
    activ = lambda name, inpt: tf.nn.leaky_relu(batch_norm(inpt, name))
    h = activ(
        'l1',
        conv(scaled_images,
             'c1',
             nf=32,
             rf=8,
             stride=4,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h2 = activ(
        'l2',
        conv(h,
             'c2',
             nf=64,
             rf=4,
             stride=2,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = activ(
        'l3',
        conv(h2,
             'c3',
             nf=64,
             rf=3,
             stride=1,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    return conv_to_fc(h3)
Esempio n. 17
0
    def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, reuse=False): #pylint: disable=W0613
        nh, nw, nc = ob_space.shape
        ob_shape = (nbatch, nh, nw, nc)
        nact = ac_space.n
        X = tf.placeholder(tf.uint8, ob_shape) #obs
        with tf.variable_scope("model", reuse=reuse):
            h = conv(tf.cast(X, tf.float32)/255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2))
            h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))
            h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2))
            h3 = conv_to_fc(h3)
            h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2))
            pi = fc(h4, 'pi', nact, act=lambda x:x, init_scale=0.01)
            vf = fc(h4, 'v', 1, act=lambda x:x)[:,0]

        self.pdtype = make_pdtype(ac_space)
        self.pd = self.pdtype.pdfromflat(pi)

        a0 = self.pd.sample()
        neglogp0 = self.pd.neglogp(a0)
        self.initial_state = None

        def step(ob, *_args, **_kwargs):
            a, v, neglogp = sess.run([a0, vf, neglogp0], {X:ob})
            return a, v, self.initial_state, neglogp

        def value(ob, *_args, **_kwargs):
            return sess.run(vf, {X:ob})

        self.X = X
        self.pi = pi
        self.vf = vf
        self.step = step
        self.value = value
Esempio n. 18
0
def nature_cnn(unscaled_images, scope, **conv_kwargs):
    """
    CNN from Nature paper.
    """
    #unscaled_images = tf.placeholder(tf.float32, shape=[None, 84, 84, 1], name='unscaled_images')
    with tf.variable_scope(scope):
        scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
        activ = tf.nn.relu
        # 8x8 filter size is common on the very 1st conv layer, looking at the input image
        h = activ(
            conv(scaled_images,
                 'c1',
                 nf=32,
                 rf=8,
                 stride=4,
                 init_scale=np.sqrt(2),
                 **conv_kwargs))
        h2 = activ(
            conv(h,
                 'c2',
                 nf=64,
                 rf=4,
                 stride=2,
                 init_scale=np.sqrt(2),
                 **conv_kwargs))
        h3 = activ(
            conv(h2,
                 'c3',
                 nf=64,
                 rf=3,
                 stride=1,
                 init_scale=np.sqrt(2),
                 **conv_kwargs))
        h3 = conv_to_fc(h3)
        return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
Esempio n. 19
0
def nature_cnn(scaled_images, **conv_kwargs):
    """
    Model used in the paper "Human-level control through deep reinforcement learning" 
    https://www.nature.com/articles/nature14236
    """
    def activ(curr):
        return tf.nn.relu(curr)

    h = activ(
        conv(scaled_images,
             'c1',
             nf=32,
             rf=8,
             stride=4,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h2 = activ(
        conv(h,
             'c2',
             nf=64,
             rf=4,
             stride=2,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = activ(
        conv(h2,
             'c3',
             nf=64,
             rf=3,
             stride=1,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = conv_to_fc(h3)
    return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
Esempio n. 20
0
def nature_cnn(unscaled_images, **conv_kwargs):
    """
    CNN from Nature paper.
    """
    scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
    activ = tf.nn.relu
    h = activ(
        conv(scaled_images,
             'c1',
             nf=32,
             rf=8,
             stride=4,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h2 = activ(
        conv(h,
             'c2',
             nf=64,
             rf=4,
             stride=2,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = activ(
        conv(h2,
             'c3',
             nf=64,
             rf=3,
             stride=1,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = conv_to_fc(h3)
    return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
Esempio n. 21
0
def nature_cnn(unscaled_images, **conv_kwargs):
    """
    CNN from Nature paper.
    """
    scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
    activ = tf.nn.relu  # calculate the Rectitied Linear Unit

    h = activ(
        conv(scaled_images,
             'c1',
             nf=32,
             rf=8,
             stride=4,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    # np.sqrt() returns the squareroot of every element in the array
    # init_scale ??
    h2 = activ(
        conv(h,
             'c2',
             nf=64,
             rf=4,
             stride=2,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = activ(
        conv(h2,
             'c3',
             nf=64,
             rf=3,
             stride=1,
             init_scale=np.sqrt(2),
             **conv_kwargs))
    h3 = conv_to_fc(h3)
    return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
Esempio n. 22
0
def cnn(unscaled_images, scope, activ=None, nfeat=None, reuse=False):
    scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
    activ = activ or tf.nn.leaky_relu
    nfeat = nfeat or 512
    h = activ(
        conv(scaled_images,
             scope + '_conv1',
             nf=32,
             rf=8,
             stride=4,
             init_scale=np.sqrt(2),
             reuse=reuse))
    h2 = activ(
        conv(h,
             scope + '_conv2',
             nf=64,
             rf=4,
             stride=2,
             init_scale=np.sqrt(2),
             reuse=reuse))
    h3 = activ(
        conv(h2,
             scope + '_conv3',
             nf=64,
             rf=3,
             stride=1,
             init_scale=np.sqrt(2),
             reuse=reuse))
    h3 = conv_to_fc(h3)
    return fc(h3,
              scope + '_conv_to_fc',
              nh=nfeat,
              init_scale=np.sqrt(2),
              reuse=reuse)
Esempio n. 23
0
    def __init__(self,
                 sess,
                 ob_space,
                 ac_space,
                 nbatch,
                 nsteps,
                 nlstm=256,
                 reuse=False,
                 scope_name="model"):
        nenv = nbatch // nsteps

        nh, nw, nc = ob_space.shape
        ob_shape = (nbatch, nh, nw, nc)
        nact = ac_space.n
        X = tf.placeholder(tf.uint8, ob_shape)  #obs
        M = tf.placeholder(tf.float32, [nbatch])  #mask (done t-1)
        S = tf.placeholder(tf.float32, [nenv, nlstm * 2])  #states
        with tf.variable_scope(scope_name, reuse=reuse):
            h = conv(tf.cast(X, tf.float32) / 255.,
                     'c1',
                     nf=32,
                     rf=8,
                     stride=4,
                     init_scale=np.sqrt(2))
            h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))
            h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2))
            h3 = conv_to_fc(h3)
            h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2))
            xs = batch_to_seq(h4, nenv, nsteps)
            ms = batch_to_seq(M, nenv, nsteps)
            h5, snew = lstm(xs, ms, S, 'lstm1', nh=nlstm)
            h5 = seq_to_batch(h5)
            pi = fc(h5, 'pi', nact, act=lambda x: x)
            vf = fc(h5, 'v', 1, act=lambda x: x)

        self.pdtype = make_pdtype(ac_space)
        self.pd = self.pdtype.pdfromflat(pi)

        v0 = vf[:, 0]
        a0 = self.pd.sample()
        neglogp0 = self.pd.neglogp(a0)
        self.initial_state = np.zeros((nenv, nlstm * 2), dtype=np.float32)

        def step(ob, state, mask):
            return sess.run([a0, v0, snew, neglogp0], {
                X: ob,
                S: state,
                M: mask
            })

        def value(ob, state, mask):
            return sess.run(v0, {X: ob, S: state, M: mask})

        self.X = X
        self.M = M
        self.S = S
        self.pi = pi
        self.vf = vf
        self.step = step
        self.value = value
Esempio n. 24
0
    def __init__(self, params, ob_space, ac_space, nbatch, nsteps):  #pylint: disable=W0613
        nenv = nbatch // nsteps
        ob_shape = (nbatch, ) + ob_space.shape
        nact = ac_space.n
        X = tf.compat.v1.placeholder(tf.float32, ob_shape, name='Ob')  #obs
        with tf.name_scope('policy_new'):
            activ = tf.compat.v1.nn.relu
            h1 = activ(
                tf.compat.v1.nn.conv2d(X / 255.0, params['policy/c1/w:0'],
                                       [1, 4, 4, 1], 'VALID') +
                params['policy/c1/b:0'])
            h2 = activ(
                tf.compat.v1.nn.conv2d(h1, params['policy/c2/w:0'],
                                       [1, 2, 2, 1], 'VALID') +
                params['policy/c2/b:0'])
            h3 = activ(
                tf.compat.v1.nn.conv2d(h2, params['policy/c3/w:0'],
                                       [1, 1, 1, 1], 'VALID') +
                params['policy/c3/b:0'])
            h3 = conv_to_fc(h3)
            h4 = activ(
                tf.compat.v1.nn.xw_plus_b(h3, params['policy/fc1/w:0'],
                                          params['policy/fc1/b:0']))
            pi = tf.compat.v1.nn.xw_plus_b(h4, params['policy/pi/w:0'],
                                           params['policy/pi/b:0'])

        self.pdtype = make_pdtype(ac_space)
        self.pd = self.pdtype.pdfromflat(pi)

        self.X = X
Esempio n. 25
0
 def __call__(self, obs, reuse=False):
     with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE):
         assert self.network == 'cloth_cnn', self.network
         # --------------------------------------------------------------
         # To make this a little easier, we're just going to explicitly
         # design our net.  ASSUME WE ALREADY DIVIDE BY 255 IN DDPG.PY, so
         # obs uses tf.float32.  This design is up to debate. Similar to Jan
         # Matas' network, which I think is fair, but we give an option to
         # pretrain from ResNet. In either case, get a TENSOR as input.
         # --------------------------------------------------------------
         h = obs
         activ = tf.nn.tanh
         if self.use_keras:
             # The `h`, before and after `h = conv_to_fc(h)`, is:
             # Tensor("obs0_f_imgs:0", shape=(?, 7, 7, 2048), dtype=float32)
             # Tensor("actor/Reshape:0", shape=(?, 100352), dtype=float32)
             h = conv_to_fc(h)
             h = activ(tf.layers.dense(h, 256, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3), name='fc1'))
             h = activ(tf.layers.dense(h, 256, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3), name='fc2'))
             h = activ(tf.layers.dense(h, 256, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3), name='fc3'))
             h = tf.layers.dense(h, self.nb_actions, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3), name='fc4')
             x = tf.nn.tanh(h) # restrict actions
         else:
             #   Here's what the tensors looks like:
             # Tensor("truediv:0", shape=(?, 100, 100, 3), dtype=float32)
             # Tensor("actor/convnet/Tanh:0", shape=(?, 49, 49, 32), dtype=float32)
             # Tensor("actor/convnet/Tanh_1:0", shape=(?, 24, 24, 32), dtype=float32)
             # Tensor("actor/convnet/Tanh_2:0", shape=(?, 22, 22, 32), dtype=float32)
             # Tensor("actor/convnet/Tanh_3:0", shape=(?, 20, 20, 32), dtype=float32)
             # Tensor("actor/convnet/Reshape:0", shape=(?, 12800), dtype=float32)
             # Tensor("actor/fcnet/Tanh:0", shape=(?, 256), dtype=float32)
             # Tensor("actor/fcnet/Tanh_1:0", shape=(?, 256), dtype=float32)
             # Tensor("actor/fcnet/Tanh_2:0", shape=(?, 256), dtype=float32)
             # Tensor("actor/fcnet/fc4/BiasAdd:0", shape=(?, 4), dtype=float32)
             with tf.variable_scope("convnet"):
                 h = activ(conv(h, 'c1', nf=32, rf=3, stride=2, init_scale=np.sqrt(2)))
                 h = activ(conv(h, 'c2', nf=32, rf=3, stride=2, init_scale=np.sqrt(2)))
                 h = activ(conv(h, 'c3', nf=32, rf=3, stride=1, init_scale=np.sqrt(2)))
                 h = activ(conv(h, 'c4', nf=32, rf=3, stride=1, init_scale=np.sqrt(2)))
                 h = conv_to_fc(h)
             with tf.variable_scope("fcnet"):
                 h = activ(tf.layers.dense(h, 256, name='fc1'))
                 h = activ(tf.layers.dense(h, 256, name='fc2'))
                 h = activ(tf.layers.dense(h, 256, name='fc3'))
                 h = tf.layers.dense(h, self.nb_actions, name='fc4')
                 x = tf.nn.tanh(h) # restrict actions
     return x
Esempio n. 26
0
    def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, reuse=False, is_discrete=False):    #pylint: disable=W0613
        if isinstance(ac_space, gym.spaces.Discrete):
            self.is_discrete = True
        else:
            self.is_discrete = False

        print("nbatch%d" % (nbatch))

        nh, nw, nc = ob_space.shape
        ob_shape = (nbatch, nh, nw, nc)
        if self.is_discrete:
            nact = ac_space.n
        else:
            nact = ac_space.shape[0]
        X = tf.placeholder(tf.float32, ob_shape)    #obs

        with tf.variable_scope("model", reuse=reuse):
            h = conv(X, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2))
            h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))
            h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2))
            h3 = conv_to_fc(h3)
            h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2))
            h5 = fc(h3, 'fc_vf', nh=512, init_scale=np.sqrt(2))

            pi = fc(h4, 'pi', nact, init_scale=0.05)
            vf = fc(h5, 'v', 1, act=lambda x: x)[:, 0]

            if not self.is_discrete:
                logstd = tf.get_variable(name="logstd",
                                         shape=[1, nact],
                                         initializer=tf.zeros_initializer())

        self.pdtype = make_pdtype(ac_space)
        if self.is_discrete:
            self.pd = self.pdtype.pdfromflat(pi)
            a0 = self.pd.sample()
        else:
            pdparam = tf.concat([pi, pi * 0.0 + logstd], axis=1)
            self.pd = self.pdtype.pdfromflat(pdparam)
            a0 = self.pd.sample()

        neglogp0 = self.pd.neglogp(a0)
        self.initial_state = None

        def step(ob, *_args, **_kwargs):
            a, v, neglogp = sess.run([a0, vf, neglogp0], {X: ob.astype(np.float32) / 255.0})
            assert (a.shape[0] == 1)    # make sure a = a[0] don't throw away actions
            a = a[0]
            print(a, v, neglogp)
            return a, v, self.initial_state, neglogp

        def value(ob, *_args, **_kwargs):
            return sess.run(vf, {X: ob})

        self.X = X
        self.pi = pi
        self.vf = vf
        self.step = step
        self.value = value
Esempio n. 27
0
    def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, reuse=False):  #pylint: disable=W0613
        if isinstance(ac_space, gym.spaces.Discrete):
            self.is_discrete = True
        else:
            self.is_discrete = False

        ob_shape = (nbatch, ) + ob_space.shape
        if self.is_discrete:
            actdim = ac_space.n
        else:
            actdim = ac_space.shape[0]

        X = tf.placeholder(tf.float32, ob_shape, name='Ob')  #obs
        with tf.variable_scope("model", reuse=reuse):
            h_c = conv(X, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2))
            h2_c = conv(h_c,
                        'c2',
                        nf=64,
                        rf=4,
                        stride=2,
                        init_scale=np.sqrt(2))
            h2_c = conv_to_fc(h_c)
            h1 = fc(h2_c, 'pi_fc1', nh=64, init_scale=np.sqrt(2), act=tf.tanh)
            h2 = fc(h1, 'pi_fc2', nh=64, init_scale=np.sqrt(2), act=tf.tanh)
            pi = fc(h2, 'pi', actdim, init_scale=0.01)
            h1 = fc(h2_c, 'vf_fc1', nh=64, init_scale=np.sqrt(2), act=tf.tanh)
            h2 = fc(h1, 'vf_fc2', nh=64, init_scale=np.sqrt(2), act=tf.tanh)
            vf = fc(h2, 'vf', 1)[:, 0]
            logstd = tf.get_variable(name="logstd",
                                     shape=[1, actdim],
                                     initializer=tf.zeros_initializer())

        pdparam = tf.concat([pi, pi * 0.0 + logstd], axis=1)

        self.pdtype = make_pdtype(ac_space)
        if self.is_discrete:
            self.pd = self.pdtype.pdfromflat(pi)
            a0 = self.pd.sample()
        else:
            self.pd = self.pdtype.pdfromflat(pdparam)
            a0 = self.pd.sample()

        neglogp0 = self.pd.neglogp(a0)
        self.initial_state = None

        def step(ob, *_args, **_kwargs):
            a, v, neglogp = sess.run([a0, vf, neglogp0],
                                     {X: ob.astype(np.float32) / 255.0})
            a = a[0]
            return a, v, self.initial_state, neglogp

        def value(ob, *_args, **_kwargs):
            return sess.run(vf, {X: ob})

        self.X = X
        self.pi = pi
        self.vf = vf
        self.step = step
        self.value = value
Esempio n. 28
0
def vgg19_cnn(scaled_images, **conv_kwargs):
    activ = tf.nn.relu
    #stage 1
    c1_1 = activ(conv(scaled_images, 'c1_1', nf=16, rf=3, stride=1, init_scale=np.sqrt(2),
                    **conv_kwargs))
    c1_2 = activ(conv(c1_1, 'c1_2', nf=16, rf=3, stride=1, init_scale=np.sqrt(2),
                **conv_kwargs))
    p1 = tf.nn.max_pool(c1_2, [1, 2, 2, 1], [1, 1, 1, 1], padding='SAME', name='p1')

    #stage 2
    c2_1 = activ(conv(p1, 'c2_1', nf=32, rf=3, stride=1, init_scale=np.sqrt(2),
                **conv_kwargs))
    c2_2 = activ(conv(c2_1, 'c2_2', nf=32, rf=3, stride=1, init_scale=np.sqrt(2),
                **conv_kwargs))
    p2 = tf.nn.max_pool(c2_2, [1, 2, 2, 1], [1, 1, 1, 1], padding='SAME', name='p2')

    #stage 3
    c3_1 = activ(conv(p2, 'c3_1', nf=64, rf=3, stride=1, init_scale=np.sqrt(2),
                **conv_kwargs))
    c3_2 = activ(conv(c3_1, 'c3_2', nf=64, rf=3, stride=1, init_scale=np.sqrt(2),
                **conv_kwargs))
    c3_3 = activ(conv(c3_2, 'c3_3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2),
                **conv_kwargs))
    c3_4 = activ(conv(c3_3, 'c3_4', nf=64, rf=3, stride=1, init_scale=np.sqrt(2),
                **conv_kwargs))
    p3 = tf.nn.max_pool(c3_4, [1, 2, 2, 1], [1, 1, 1, 1], padding='SAME', name='p3')

    #stage 4
    c4_1 = activ(conv(p3, 'c4_1', nf=64, rf=3, stride=1, init_scale=np.sqrt(2),
                **conv_kwargs))
    c4_2 = activ(conv(c4_1, 'c4_2', nf=64, rf=3, stride=1, init_scale=np.sqrt(2),
                **conv_kwargs))
    c4_3 = activ(conv(c4_2, 'c4_3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2),
                **conv_kwargs))
    c4_4 = activ(conv(c4_3, 'c4_4', nf=64, rf=3, stride=1, init_scale=np.sqrt(2),
                **conv_kwargs))
    p4 = tf.nn.max_pool(c4_4, [1, 2, 2, 1], [1, 1, 1, 1], padding='SAME', name='p4')

    #stage 5
    c5_1 = activ(conv(p4, 'c5_1', nf=64, rf=3, stride=1, init_scale=np.sqrt(2),
                **conv_kwargs))
    c5_2 =activ(conv(c5_1, 'c5_2', nf=64, rf=3, stride=1, init_scale=np.sqrt(2),
                **conv_kwargs))
    c5_3 = activ(conv(c5_2, 'c5_3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2),
                **conv_kwargs))
    c5_4 = activ(conv(c5_3, 'c5_4', nf=64, rf=3, stride=1, init_scale=np.sqrt(2),
                **conv_kwargs))
    p5 = tf.nn.max_pool(c5_4, [1, 2, 2, 1], [1, 1, 1, 1], padding='SAME', name='p5')
    print(np.shape(p5))
    h1 = conv_to_fc(p5)
    print(np.shape(h1))
    # h1 = activ(fc(h1, 'fc1', nh=256, init_scale=np.sqrt(2)))
    # h2 = activ(fc(h1, 'fc2', nh=128, init_scale=np.sqrt(2)))
    # h3 = activ(fc(h2, 'fc2', nh=128, init_scale=np.sqrt(2)))
    h1 = activ(fully_connected(h1, weight_variable([409600, 1024]), bias_variable([256])))
    h2 = activ(fully_connected(h1, weight_variable([1024, 256]), bias_variable([128])))
    print(np.shape(h2))

    return h2
Esempio n. 29
0
    def network_fn(X, mode="pi"):
        filtered_conv_kwargs = {}

        def filter_kwargs(k):
            if k in conv_kwargs.keys():
                filtered_conv_kwargs[k] = conv_kwargs[k]

        filter_kwargs("pad")
        filter_kwargs("data_format")
        filter_kwargs("one_dim_bias")

        scaled_images = tf.cast(X, tf.float32) / 255.
        activ = tf.nn.relu
        bn = tf.contrib.layers.batch_norm
        drp = tf.nn.dropout

        def addbndrp(h):
            if (batchnormpi and mode == "pi") or (batchnormvf
                                                  and mode == "vf"):
                h = bn(h,
                       center=True,
                       scale=True,
                       is_training=isbnpitrainmode
                       if mode == "pi" else isbnvftrainmode,
                       updates_collections=None)
            h = activ(h)
            if (dropoutpi < 1.0 and mode == "pi"):
                h = drp(h, keep_prob=dropoutpi_keep_prob)
            if (dropoutvf < 1.0 and mode == "vf"):
                h = drp(h, keep_prob=dropoutvf_keep_prob)
            return h

        h = addbndrp(
            conv(scaled_images,
                 'c1',
                 nf=32,
                 rf=8,
                 stride=4,
                 init_scale=np.sqrt(2),
                 **filtered_conv_kwargs))
        h2 = addbndrp(
            conv(h,
                 'c2',
                 nf=64,
                 rf=4,
                 stride=2,
                 init_scale=np.sqrt(2),
                 **filtered_conv_kwargs))
        h3 = addbndrp(
            conv(h2,
                 'c3',
                 nf=64,
                 rf=3,
                 stride=1,
                 init_scale=np.sqrt(2),
                 **filtered_conv_kwargs))
        h3 = conv_to_fc(h3)
        return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
 def network_fn(X):
     h = conv(X, 'c1', nf=16, rf=8, stride=4, pad="VALID")
     h = tf.nn.relu(h)
     h = conv(h, 'c2', nf=32, rf=4, stride=2, pad="VALID")
     h = tf.nn.relu(h)
     h = conv_to_fc(h)
     h = tf.nn.relu(fc(h, 'fc1', nh=30))
     h = fc(h, 'fc3', nh=net_kwargs["nactions"])
     return h
 def network_fn(X):
     h = tf.nn.relu(conv(X, 'c1', nf=64, rf=3, stride=2, pad="VALID"))
     h = tf.nn.relu(conv(h, 'c2', nf=64, rf=3, stride=2, pad="VALID"))
     h = conv_to_fc(h)
     h = tf.nn.relu(fc(h, 'fc1', nh=128))
     h = tf.nn.relu(fc(h, 'fc2', nh=64))
     h = tf.nn.relu(fc(h, 'fc4', nh=32))
     h = fc(h, 'fc5', nh=net_kwargs["nactions"])
     return h
Esempio n. 32
0
def nature_cnn(unscaled_images):
    """
    CNN from Nature paper.
    """
    scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
    activ = tf.nn.relu
    h = activ(conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)))
    h2 = activ(conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)))
    h3 = activ(conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2)))
    h3 = conv_to_fc(h3)
    return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
Esempio n. 33
0
def nature_cnn(unscaled_images):
    """Convolutional parts of CNN from Nature paper
    
    Taken from baselines.ppo2.policies.py
    
    Parameters
    -------
    unscaled_images : tensorflow tensor
        Frame of shape (batchsize, x, y, c)
    
    Returns
    -------
    tensorflow tensor
        Output features of last convolutional layer with flattened x/y/c dimensions
    """
    scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
    activ = tf.nn.relu
    h = activ(conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)))
    h2 = activ(conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)))
    h3 = activ(conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2)))
    h3 = conv_to_fc(h3)
    return h3
Esempio n. 34
0
    def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, nlstm=256, reuse=False):
        nbatch = nenv*nsteps
        nh, nw, nc = ob_space.shape
        ob_shape = (nbatch, nh, nw, nc*nstack)
        nact = ac_space.n
        X = tf.placeholder(tf.uint8, ob_shape) #obs
        M = tf.placeholder(tf.float32, [nbatch]) #mask (done t-1)
        S = tf.placeholder(tf.float32, [nenv, nlstm*2]) #states
        with tf.variable_scope("model", reuse=reuse):
            h = conv(tf.cast(X, tf.float32)/255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2))
            h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))
            h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2))
            h3 = conv_to_fc(h3)
            h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2))
            xs = batch_to_seq(h4, nenv, nsteps)
            ms = batch_to_seq(M, nenv, nsteps)
            h5, snew = lnlstm(xs, ms, S, 'lstm1', nh=nlstm)
            h5 = seq_to_batch(h5)
            pi = fc(h5, 'pi', nact, act=lambda x:x)
            vf = fc(h5, 'v', 1, act=lambda x:x)

        v0 = vf[:, 0]
        a0 = sample(pi)
        self.initial_state = np.zeros((nenv, nlstm*2), dtype=np.float32)

        def step(ob, state, mask):
            a, v, s = sess.run([a0, v0, snew], {X:ob, S:state, M:mask})
            return a, v, s

        def value(ob, state, mask):
            return sess.run(v0, {X:ob, S:state, M:mask})

        self.X = X
        self.M = M
        self.S = S
        self.pi = pi
        self.vf = vf
        self.step = step
        self.value = value
Esempio n. 35
0
  def __init__(self,
               sess,
               ob_space,
               ac_space,
               nenv,
               nsteps,
               nstack,
               reuse=False):
    nbatch = nenv * nsteps
    nh, nw, nc = (32, 32, 3)
    ob_shape = (nbatch, nh, nw, nc * nstack)
    nact = 3  # 524
    # nsub3 = 2
    # nsub4 = 5
    # nsub5 = 10
    # nsub6 = 4
    # nsub7 = 2
    # nsub8 = 4
    # nsub9 = 500
    # nsub10 = 4
    # nsub11 = 10
    # nsub12 = 500

    # (64, 64, 13)
    # 80 * 24

    X = tf.placeholder(tf.uint8, ob_shape)  #obs
    with tf.variable_scope("model", reuse=reuse):
      with tf.variable_scope("common", reuse=reuse):
        h = conv(
            tf.cast(X, tf.float32),
            'c1',
            nf=32,
            rf=5,
            stride=1,
            init_scale=np.sqrt(2),
            pad="SAME")  # ?, 32, 32, 16
        h2 = conv(
            h,
            'c2',
            nf=64,
            rf=3,
            stride=1,
            init_scale=np.sqrt(2),
            pad="SAME")  # ?, 32, 32, 32

      with tf.variable_scope("pi1", reuse=reuse):
        h3 = conv_to_fc(h2)  # 131072
        h4 = fc(h3, 'fc1', nh=256, init_scale=np.sqrt(2))  # ?, 256
        pi_ = fc(
            h4, 'pi', nact)  # ( nenv * nsteps, 524) # ?, 524
        pi = tf.nn.softmax(pi_)

        vf = fc(
            h4, 'v', 1)  # ( nenv * nsteps, 1) # ?, 1

      # vf = tf.nn.l2_normalize(vf_, 1)

      with tf.variable_scope("xy0", reuse=reuse):
        # 1 x 1 convolution for dimensionality reduction
        pi_xy0_ = conv(
            h2, 'xy0', nf=1, rf=1, stride=1,
            init_scale=np.sqrt(2))  # (? nenv * nsteps, 32, 32, 1)
        pi_xy0__ = conv_to_fc(pi_xy0_)  # 32 x 32 => 1024
        pi_xy0 = tf.nn.softmax(pi_xy0__)


      with tf.variable_scope("xy1", reuse=reuse):
        pi_xy1_ = conv(
            h2, 'xy1', nf=1, rf=1, stride=1,
            init_scale=np.sqrt(2))  # (? nenv * nsteps, 32, 32, 1)
        pi_xy1__ = conv_to_fc(pi_xy1_)  # 32 x 32 => 1024
        pi_xy1 = tf.nn.softmax(pi_xy1__)

    v0 = vf[:, 0]
    a0 = sample(pi)
    self.initial_state = []  #not stateful

    def step(ob, *_args, **_kwargs):
      #obs, states, rewards, masks, actions, actions2, x1, y1, x2, y2, values
      _pi1, _xy0, _xy1, _v = sess.run([pi, pi_xy0, pi_xy1, v0], {X: ob})
      return _pi1, _xy0, _xy1, _v, []  #dummy state

    def value(ob, *_args, **_kwargs):
      return sess.run(v0, {X: ob})

    self.X = X
    self.pi = pi
    # self.pi_sub3 = pi_sub3
    # self.pi_sub4 = pi_sub4
    # self.pi_sub5 = pi_sub5
    # self.pi_sub6 = pi_sub6
    # self.pi_sub7 = pi_sub7
    # self.pi_sub8 = pi_sub8
    # self.pi_sub9 = pi_sub9
    # self.pi_sub10 = pi_sub10
    # self.pi_sub11 = pi_sub11
    # self.pi_sub12 = pi_sub12
    self.pi_xy0 = pi_xy0
    self.pi_xy1 = pi_xy1
    # self.pi_y0 = pi_y0
    # self.pi_x1 = pi_x1
    # self.pi_y1 = pi_y1
    # self.pi_x2 = pi_x2
    # self.pi_y2 = pi_y2
    self.vf = vf
    self.step = step
    self.value = value