コード例 #1
0
def gaussian_ff_arch(obs, env_spec):
    action_dim = env_spec.action_space.flat_dim

    features = tf.nn.relu(linear(obs, dout=64, name='pol_feats1'))
    features = tf.nn.relu(linear(features, dout=64, name='pol_feats2'))

    mean = 0.1 * linear(features, dout=action_dim, name='pol_mean')
    log_std = 0.1 * linear(features, dout=action_dim, name='pol_log_std') - 1
    return mean, log_std
コード例 #2
0
def relu_net(x, layers=2, dout=1, d_hidden=32, vae=False, is_train=None):
    out = x
    for i in range(layers):
        out = relu_layer(out, dout=d_hidden, name='l%d' % i)
    if vae:
        assert is_train is not None
        out, mean, logstd = vae_layer(out, is_train=is_train)
        out = linear(out, dout=dout, name='lfinal')
        return out, mean, logstd
    out = linear(out, dout=dout, name='lfinal')
    return out
コード例 #3
0
def gaussian_conv_jnt_arch(obs,
                           env_spec,
                           ff_layers=2,
                           ff_d_hidden=64,
                           obs_img_shape=(64, 64, 3),
                           obs_jnt_dims=7):
    #obs_shape = env_spec.observation_space.shape
    action_dim = env_spec.action_space.flat_dim

    x_img = obs[:, :-obs_jnt_dims]
    x_jnt = obs[:, -obs_jnt_dims:]
    if len(obs_img_shape) == 2:
        x_img = tf.reshape(x_img, (-1, ) + obs_img_shape + (1, ))
    else:
        x_img = tf.reshape(x_img, (-1, ) + obs_img_shape)

    out = x_img
    out = tf.layers.conv2d(inputs=out,
                           filters=2,
                           kernel_size=[5, 5],
                           strides=2,
                           padding='valid',
                           activation=tf.nn.relu,
                           name='pol_conv_l1')
    out = tf.layers.conv2d(inputs=out,
                           filters=2,
                           kernel_size=[5, 5],
                           strides=2,
                           padding='valid',
                           activation=tf.nn.relu,
                           name='pol_conv_l2')
    out_size = np.prod([int(size) for size in out.shape[1:]])
    out_flat = tf.reshape(out, [-1, out_size])

    # concat action
    out = tf.concat([out_flat, x_jnt], axis=1)
    for i in range(ff_layers):
        out = relu_layer(out, dout=ff_d_hidden, name='pol_ff_l%d' % i)
    out = linear(out, dout=ff_d_hidden, name='pol_ff_lfinal')
    features = out

    mean = 0.1 * linear(features, dout=action_dim, name='pol_mean')
    log_std = 0.1 * linear(features, dout=action_dim, name='pol_log_std') - 1
    return mean, log_std
コード例 #4
0
ファイル: gmm_policy.py プロジェクト: qxcv/vdb-irl
def test_ff_relu_arch(obs,
                      clusters=2,
                      dout=1,
                      name='ff_relu',
                      reuse=False,
                      cluster_hack=True):
    from inverse_rl.models.tf_util import linear
    with tf.variable_scope(name, reuse=reuse):
        features = linear(obs, dout=32, name='features')
        features = tf.nn.relu(features)

        cluster_wts = tf.nn.softmax(
            linear(features, dout=clusters, name='cluster_wts'))
        mu_flat = linear(features, dout=clusters * dout, name='mu_flat') * 1e-1
        log_std_flat = linear(
            features, dout=clusters * dout, name='log_std_flat') * 1e-1
    mu = tf.reshape(mu_flat, [-1, clusters, dout])
    log_std = tf.reshape(log_std_flat, [-1, clusters, dout])
    return mu, log_std, cluster_wts
コード例 #5
0
def vae_layer(x, is_train, name=None):
    if name is None:
        name = 'vae'
    else:
        name = name + '/vae'
    with tf.name_scope(name):
        in_dim = x.get_shape().as_list()[-1]
        assert in_dim is not None, "input tensor has variable last dim (?)"
        mean = linear(x,
                      in_dim,
                      name='mean',
                      w_init=tf.initializers.random_uniform(-0.01, 0.01))
        logstd = linear(x,
                        in_dim,
                        name='logstd',
                        w_init=tf.initializers.random_uniform(-0.01, 0.01))
        std = tf.exp(logstd, name='exp')
        noise = tf.random_normal(tf.shape(x), name='eps')
        noise_zeros = tf.zeros_like(noise)
        noise_cond = tf.case([(is_train, lambda: noise)],
                             default=lambda: noise_zeros)
        reparam = std * noise_cond + mean
        return reparam, mean, logstd
コード例 #6
0
def conv_net_airl(x, dout=1, ff_layers=1, ff_d_hidden=16, env_spec=None):
    # undo reshaping based on env_spec
    obs_shape = env_spec.observation_space.shape
    dA = env_spec.action_space.flat_dim
    # x_obs = x[:, :-dA]
    # x_act = x[:, -dA:]
    x_obs = x
    if len(obs_shape) == 2:
        x_obs = tf.reshape(x_obs, (-1, ) + obs_shape + (1, ))
    else:
        x_obs = tf.reshape(x_obs, (-1, ) + obs_shape)

    out = x_obs

    out = tf.layers.conv2d(
        inputs=out,
        filters=2,
        kernel_size=[5, 5],
        strides=2,
        padding='valid',
        activation=tf.nn.relu,
        name='conv_l1')
    out = tf.layers.conv2d(
        inputs=out,
        filters=2,
        kernel_size=[5, 5],
        strides=2,
        padding='valid',
        activation=tf.nn.relu,
        name='conv_l2')
    out_size = np.prod([int(size) for size in out.shape[1:]])
    out_flat = tf.reshape(out, [-1, out_size])

    # concat action
    #out = tf.concat([out_flat, x_act], axis=1)
    out = out_flat
    for i in range(ff_layers):
        out = relu_layer(out, dout=ff_d_hidden, name='ff_l%d' % i)
    out = linear(out, dout=dout, name='ff_lfinal')
    return out
コード例 #7
0
ファイル: architectures.py プロジェクト: morrimax/inverse_rl
 def relu_net(x, last_layer_bias=True):
     out = x
     for i in range(layers):
         out = relu_layer(out, dout=d_hidden, name='l%d' % i)
     out = linear(out, dout=dout, name='lfinal', bias=last_layer_bias)
     return out
コード例 #8
0
ファイル: architectures.py プロジェクト: morrimax/inverse_rl
def linear_net(x, dout=1):
    out = x
    out = linear(out, dout=dout, name='lfinal')
    return out
コード例 #9
0
ファイル: architectures.py プロジェクト: morrimax/inverse_rl
def relu_net(x, layers=2, dout=1, d_hidden=32):
    out = x
    for i in range(layers):
        out = relu_layer(out, dout=d_hidden, name='l%d' % i)
    out = linear(out, dout=dout, name='lfinal')
    return out
コード例 #10
0
ファイル: architectures.py プロジェクト: Usaywook/irl
def relu_net_dropout(x, layers=2, dout=1, d_hidden=32):
    out = x
    for i in range(layers):
        out = relu_layer(out, dout=d_hidden, name='l%d' % i)
    out = tf.nn.dropout(linear(out, dout=dout, name='lfinal'), 0.7)
    return out
コード例 #11
0
def ff_relu_net(obs_act):
    l1 = relu_layer(obs_act, dout=32, name='l1')
    l2 = relu_layer(l1, dout=32, name='l2')
    return linear(l2, dout=1, name='lfinal')
コード例 #12
0
 def relu_net(x, last_layer_bias=True):
     out = x
     for i in range(layers):
         out = relu_layer(out, dout=d_hidden, name='l%d'%i)
     out = linear(out, dout=dout, name='lfinal', bias=last_layer_bias)
     return out
コード例 #13
0
def linear_net(x, dout=1):
    out = x
    out = linear(out, dout=dout, name='lfinal')
    return out
コード例 #14
0
def relu_net(x, layers=2, dout=1, d_hidden=32):
    out = x
    for i in range(layers):
        out = relu_layer(out, dout=d_hidden, name='l%d'%i)
    out = linear(out, dout=dout, name='lfinal')
    return out