コード例 #1
0
ファイル: vi_module.py プロジェクト: hou-yz/baselines
def rollout_step(s, a, n_actions):
    state_shape = s.shape.as_list()[1:]
    op_trans_state = tf.make_template('trans_state',
                                      vi_trans,
                                      n_actions=n_actions)
    r = tf.layers.dense(s,
                        n_actions,
                        activation=None,
                        name='reward',
                        kernel_initializer=ortho_init(1))
    s = op_trans_state(s)
    v = tf.reshape(
        tf.layers.dense(s,
                        1,
                        activation=None,
                        name='value',
                        kernel_initializer=ortho_init(1)), [-1, n_actions])
    if a is not None:
        l = tf.expand_dims(tf.range(0, tf.shape(r)[0]), 1)
        l = tf.concat([l, tf.dtypes.cast(tf.reshape(a, [-1, 1]), tf.int32)],
                      axis=1)
        r = tf.gather_nd(r, l)
        s = tf.gather_nd(tf.reshape(s, [-1, n_actions] + state_shape), l)
        v = tf.gather_nd(v, l)
    return r, v, s
コード例 #2
0
ファイル: base.py プロジェクト: wwxFromTju/rl-generalization
def gru(xs, ms, s, scope, nh, init_scale=1.0, activ='tanh'):
    """ Implements a gated recurrent unit """
    nbatch, nin = [v.value for v in xs[0].get_shape()]
    nsteps = len(xs)

    with tf.variable_scope(scope):
        wx1 = tf.get_variable("wx1", [nin, nh * 2],
                              initializer=ortho_init(init_scale))
        wh1 = tf.get_variable("wh1", [nh, nh * 2],
                              initializer=ortho_init(init_scale))
        b1 = tf.get_variable("b1", [nh * 2],
                             initializer=tf.constant_initializer(0.0))
        wx2 = tf.get_variable("wx2", [nin, nh],
                              initializer=ortho_init(init_scale))
        wh2 = tf.get_variable("wh2", [nh, nh],
                              initializer=ortho_init(init_scale))
        b2 = tf.get_variable("b2", [nh],
                             initializer=tf.constant_initializer(0.0))

    for idx, (x, m) in enumerate(zip(xs, ms)):
        s = s * (1 - m)  # resets hidden state of RNN
        y = tf.matmul(x, wx1) + tf.matmul(s, wh1) + b1
        z, r = tf.split(axis=1, num_or_size_splits=2, value=y)
        z = tf.nn.sigmoid(z)
        r = tf.nn.sigmoid(r)
        h = tf.matmul(x, wx2) + tf.matmul(s * r, wh2) + b2
        if activ == 'tanh':
            h = tf.tanh(h)
        elif activ == 'relu':
            h = tf.nn.relu(h)
        else:
            raise ValueError(activ)
        s = (1 - z) * h + z * s
        xs[idx] = s
    return xs, s
コード例 #3
0
ファイル: models.py プロジェクト: tanzhenyu/baselines-tf2
def nature_cnn(input_shape, **conv_kwargs):
    """
    CNN from Nature paper.
    """
    print('input shape is {}'.format(input_shape))
    x_input = tf.keras.Input(shape=input_shape, dtype=tf.uint8)
    h = x_input
    h = tf.cast(h, tf.float32) / 255.
    h = conv('c1',
             nf=32,
             rf=8,
             stride=4,
             activation='relu',
             init_scale=np.sqrt(2))(h)
    h2 = conv('c2',
              nf=64,
              rf=4,
              stride=2,
              activation='relu',
              init_scale=np.sqrt(2))(h)
    h3 = conv('c3',
              nf=64,
              rf=3,
              stride=1,
              activation='relu',
              init_scale=np.sqrt(2))(h2)
    h3 = tf.keras.layers.Flatten()(h3)
    h3 = tf.keras.layers.Dense(units=512,
                               kernel_initializer=ortho_init(np.sqrt(2)),
                               name='fc1',
                               activation='relu')(h3)
    network = tf.keras.Model(inputs=[x_input], outputs=[h3])
    return network
コード例 #4
0
ファイル: utils.py プロジェクト: AnonymousSubmittedCode/SVIB
def deconv(x,
           scope,
           *,
           nf,
           rf,
           stride,
           output_shape,
           pad='VALID',
           init_scale=1.0,
           data_format='NHWC'):
    if data_format == 'NHWC':
        channel_ax = 3
        strides = [1, stride, stride, 1]
        bshape = [1, 1, 1, nf]
    elif data_format == 'NCHW':
        channel_ax = 1
        strides = [1, 1, stride, stride]
        bshape = [1, nf, 1, 1]
    else:
        raise NotImplementedError
    nin = x.get_shape()[channel_ax].value
    wshape = [rf, rf, nf, nin]
    with tf.variable_scope(scope):
        w = tf.get_variable("w", wshape, initializer=ortho_init(init_scale))
        b = tf.get_variable("b", [1, nf, 1, 1],
                            initializer=tf.constant_initializer(0.0))
        if data_format == 'NHWC': b = tf.reshape(b, bshape)
        return b + tf.nn.conv2d_transpose(x,
                                          w,
                                          output_shape,
                                          strides=strides,
                                          padding=pad,
                                          data_format=data_format)
コード例 #5
0
def ppo_cnn_model(state):
    kernel_initializer = ortho_init(np.sqrt(2))
    conv_kwargs = {
        "activation": tf.nn.relu,
        "kernel_initializer": kernel_initializer,
        # "padding": "same",
    }
    pool_kwargs = {
        # "padding": "same",
    }

    c1 = tf.layers.conv2d(state, filters=32, kernel_size=8, strides=1,
                          name="c1", **conv_kwargs)
    p1 = tf.layers.max_pooling2d(c1, pool_size=2, strides=2, name="p1",
                                 **pool_kwargs)
    c2 = tf.layers.conv2d(p1, filters=64, kernel_size=4, strides=1,
                          name="c2", **conv_kwargs)
    p2 = tf.layers.max_pooling2d(c2, pool_size=2, strides=2, name="p2",
                                 **pool_kwargs)
    c3 = tf.layers.conv2d(p2, filters=64, kernel_size=3, strides=1,
                          name="c3", **conv_kwargs)
    p3 = tf.layers.max_pooling2d(c3, pool_size=2, strides=2, name="p3",
                                 **pool_kwargs)
    f = tf.layers.flatten(p3)
    return tf.layers.dense(f, units=512, activation=tf.nn.relu,
                           kernel_initializer=kernel_initializer)
コード例 #6
0
def fc(x, scope, nh, act=tf.nn.relu, init_scale=1.0):
    with tf.variable_scope(scope):
        nin = x.get_shape()[1].value
        w = tf.get_variable("w", [nin, nh], initializer=ortho_init(init_scale))
        b = tf.get_variable("b", [nh], initializer=tf.constant_initializer(0.0))
        z = tf.matmul(x, w)+b
        h = act(z)
        return h
コード例 #7
0
def fc(x, scope, nh, *, init_scale=1.0, init_bias=0.0, collections=None, trainable=True):
    with tf.variable_scope(scope):
        nin = x.get_shape()[1].value
        w = tf.get_variable("w", [nin, nh], initializer=ortho_init(init_scale), collections=collections, trainable=trainable)
        b = tf.get_variable("b", [nh], initializer=tf.constant_initializer(init_bias), collections=collections, trainable=trainable)

        if trainable and not collections:
            tf.add_to_collection("l2_losses", tf.contrib.layers.l2_regularizer(1.0)(w))
            tf.add_to_collection("l2_losses", tf.contrib.layers.l2_regularizer(1.0)(b))

        return tf.matmul(x, w)+b
コード例 #8
0
ファイル: models.py プロジェクト: tanzhenyu/baselines-tf2
    def network_fn(input_shape):
        print('input shape is {}'.format(input_shape))
        x_input = tf.keras.Input(shape=input_shape)
        # h = tf.keras.layers.Flatten(x_input)
        h = x_input
        for i in range(num_layers):
            h = tf.keras.layers.Dense(units=num_hidden,
                                      kernel_initializer=ortho_init(
                                          np.sqrt(2)),
                                      name='mlp_fc{}'.format(i),
                                      activation=activation)(h)

        network = tf.keras.Model(inputs=[x_input], outputs=[h])
        return network
コード例 #9
0
ファイル: utils.py プロジェクト: AnonymousSubmittedCode/SVIB
def deconv_unequ_size(x,
                      scope,
                      *,
                      nf,
                      rf,
                      stride,
                      output_shape,
                      pad='VALID',
                      init_scale=1.0,
                      data_format='NHWC'):
    '''
    :param x:
    :param scope:
    :param nf:
    :param rf: a 2-element list, [kernel_h, kernel_w]
    :param stride: a 2-element list, [h, w]
    :param pad:
    :param init_scale:
    :param data_format:
    :return:
    '''
    if data_format == 'NHWC':
        channel_ax = 3
        strides = [1, stride[0], stride[1], 1]
        bshape = [1, 1, 1, nf]
    elif data_format == 'NCHW':
        channel_ax = 1
        strides = [1, 1, stride[0], stride[1]]
        bshape = [1, nf, 1, 1]
    else:
        raise NotImplementedError
    nin = x.get_shape()[channel_ax].value
    wshape = [rf[0], rf[1], nf, nin]
    with tf.variable_scope(scope):
        w = tf.get_variable("w", wshape, initializer=ortho_init(init_scale))
        b = tf.get_variable("b", [1, nf, 1, 1],
                            initializer=tf.constant_initializer(0.0))
        if data_format == 'NHWC': b = tf.reshape(b, bshape)
        return b + tf.nn.conv2d_transpose(x,
                                          w,
                                          output_shape,
                                          strides=strides,
                                          padding=pad,
                                          data_format=data_format)
コード例 #10
0
ファイル: models.py プロジェクト: tremonia/ma_neural_map
        def context_read(nm, s_flat, r, c_dim, initializer):
            # input: neural map (nm), flattened state (s_flat), r
            # output: c-dimensional context read vector (c)

            scope = 'cr'

            with tf.variable_scope(scope):
                input = tf.concat([s_flat, r], 1)

                no_rows_W = input.get_shape()[1].value
                if initializer == 'ortho_init':
                    W = tf.get_variable("W", [no_rows_W, c_dim],
                                        initializer=ortho_init(np.sqrt(2)))
                elif initializer == 'random_normal':
                    W = tf.get_variable(
                        "W", [no_rows_W, c_dim],
                        initializer=tf.random_normal_initializer(mean=0.0,
                                                                 stddev=0.1))
                elif initializer == 'glorot_uniform':
                    W = tf.get_variable(
                        "W", [no_rows_W, c_dim],
                        initializer=tf.glorot_uniform_initializer())
                batch_size = s_flat.shape[0]
                nm_reshaped = tf.reshape(nm, [batch_size, -1, c_dim])

                q = tf.matmul(input, W, name='cr_matmul')
                a = tf.keras.backend.batch_dot(nm_reshaped, q, (2, 1))
                #a_exp = tf.math.exp(a)
                #norm_fac = tf.reduce_sum(a_exp, 1)
                #norm_fac_expanded = tf.expand_dims(norm_fac, -1)
                #alpha = tf.math.divide(a_exp, norm_fac_expanded)
                alpha = tf.nn.softmax(a, name='cr_softmax')
                alpha_expanded = tf.expand_dims(alpha, -1)
                nm_scored = tf.math.multiply(alpha_expanded,
                                             nm_reshaped,
                                             name='cr_multiply')
                c = tf.reduce_sum(nm_scored, 1, name='cr_reduce_sum')

                return c
コード例 #11
0
    def network_fn(X):

        assert X.get_shape()[1].value == 45
        batch_size = X.get_shape()[0].value
        h = list()
        for i in range(1, 10):
            h.append(tf.layers.flatten(X[:, 5 * (i - 1):5 * i]))
        h.append(X[:, -1:])
        h.append(tf.layers.flatten(X[:, 40:-1]))
        nin_edge = h[0].get_shape()[1].value
        # nin_cloud = h[8].get_shape()[1].value
        nh = 4

        with tf.variable_scope("forMEC_fc_edge") as scope:
            w = tf.get_variable("w", [nin_edge, nh],
                                initializer=ortho_init(np.sqrt(2)))
            b = tf.get_variable("b", [nh],
                                initializer=tf.constant_initializer(0.0))

        for i in range(8):
            with tf.variable_scope("forMEC_fc_edge") as scope:
                scope.reuse_variables()
                w = tf.get_variable("w", [nin_edge, nh])
                b = tf.get_variable("b", [nh])
                h[i] = tf.reshape(tf.matmul(h[i], w) + b, [batch_size, -1])
        h_e = tf.concat(axis=1, values=h[:9])
        h_e = fc(h_e,
                 'forMEC_fc_edge2',
                 nh=int(num_hidden) / 2,
                 init_scale=np.sqrt(2))
        h_e = activation(h_e)

        h_c = fc(h[9], 'forMEC_fc_cloud', nh=nh, init_scale=np.sqrt(2))
        h_c = tf.reshape(h_c, [batch_size, -1])
        h = tf.concat(axis=1, values=[h_e, h_c])
        h = activation(h)
        h = fc(h, 'forMEC_fc2', nh=num_hidden, init_scale=np.sqrt(2))
        h = activation(h)
        return h
コード例 #12
0
def conv(x, scope, *, nf, rf, stride, pad='VALID', init_scale=1.0, data_format='NHWC', collections=None, trainable=True):
    if data_format == 'NHWC':
        channel_ax = 3
        strides = [1, stride, stride, 1]
        bshape = [1, 1, 1, nf]
    elif data_format == 'NCHW':
        channel_ax = 1
        strides = [1, 1, stride, stride]
        bshape = [1, nf, 1, 1]
    else:
        raise NotImplementedError
    nin = x.get_shape()[channel_ax].value
    wshape = [rf, rf, nin, nf]
    with tf.variable_scope(scope):
        w = tf.get_variable("w", wshape, initializer=ortho_init(init_scale), collections=collections, trainable=trainable)
        b = tf.get_variable("b", [1, nf, 1, 1], initializer=tf.constant_initializer(0.0), collections=collections, trainable=trainable)

        if trainable and not collections:
            tf.add_to_collection("l2_losses", tf.contrib.layers.l2_regularizer(1.0)(w))
            tf.add_to_collection("l2_losses", tf.contrib.layers.l2_regularizer(1.0)(b))

        if data_format == 'NHWC': b = tf.reshape(b, bshape)
        return b + tf.nn.conv2d(x, w, strides=strides, padding=pad, data_format=data_format)
コード例 #13
0
    def policy_fn(nbatch=None,
                  nsteps=None,
                  sess=None,
                  state_placeholder=None,
                  goal_placeholder=None,
                  summary_stats=False):
        assert state_placeholder is not None
        X = state_placeholder
        extra_tensors = {}

        if normalize_observations and X.dtype == tf.float32:
            encoded_x, rms = _normalize_clip_observation(X)
            extra_tensors['rms'] = rms
        else:
            encoded_x = X

        encoded_x = tf.to_float(encoded_x)

        with tf.variable_scope('pi', reuse=tf.AUTO_REUSE):
            policy_latent = policy_network(encoded_x)
            if goal_placeholder is not None:
                logger.info("concat obs and goals on latent")
                addition_layers = True
                if addition_layers:
                    nin = policy_latent.get_shape()[1].value
                    nh = 64
                    activ = tf.tanh
                    w = tf.get_variable("addition_fc_w", [nin, nh],
                                        initializer=ortho_init(np.sqrt(2)))
                    b = tf.get_variable(
                        "addition_fc_b", [nh],
                        initializer=tf.constant_initializer(0.))
                    policy_latent = activ(tf.matmul(policy_latent, w) + b)
                    logger.info('additional mlp on policy latent')
                if summary_stats:
                    variable_summaries(policy_latent, 'policy_latent')
                    variable_summaries(goal_placeholder, 'goal_placeholder')
                policy_latent = tf.concat([policy_latent, goal_placeholder],
                                          axis=-1,
                                          name="concat_latent")
            if isinstance(policy_latent, tuple):
                policy_latent, recurrent_tensors = policy_latent

                if recurrent_tensors is not None:
                    # recurrent architecture, need a few more steps
                    nenv = nbatch // nsteps
                    assert nenv > 0, 'Bad input for recurrent policy: batch size {} smaller than nsteps {}'.format(
                        nbatch, nsteps)
                    policy_latent, recurrent_tensors = policy_network(
                        encoded_x, nenv)
                    extra_tensors.update(recurrent_tensors)

        _v_net = value_network

        if _v_net is None or _v_net == 'shared':
            vf_latent = policy_latent
        else:
            if _v_net == 'copy':
                _v_net = policy_network
            else:
                assert callable(_v_net)

            with tf.variable_scope('vf', reuse=tf.AUTO_REUSE):
                # TODO recurrent architectures are not supported with value_network=copy yet
                vf_latent = _v_net(encoded_x)

        policy = PolicyWithValue(env=env,
                                 observations=state_placeholder,
                                 goals=goal_placeholder,
                                 latent=policy_latent,
                                 vf_latent=vf_latent,
                                 sess=sess,
                                 estimate_q=estimate_q,
                                 **extra_tensors)
        return policy