def gaussian_ff_arch(obs, env_spec): action_dim = env_spec.action_space.flat_dim features = tf.nn.relu(linear(obs, dout=64, name='pol_feats1')) features = tf.nn.relu(linear(features, dout=64, name='pol_feats2')) mean = 0.1 * linear(features, dout=action_dim, name='pol_mean') log_std = 0.1 * linear(features, dout=action_dim, name='pol_log_std') - 1 return mean, log_std
def relu_net(x, layers=2, dout=1, d_hidden=32, vae=False, is_train=None): out = x for i in range(layers): out = relu_layer(out, dout=d_hidden, name='l%d' % i) if vae: assert is_train is not None out, mean, logstd = vae_layer(out, is_train=is_train) out = linear(out, dout=dout, name='lfinal') return out, mean, logstd out = linear(out, dout=dout, name='lfinal') return out
def gaussian_conv_jnt_arch(obs, env_spec, ff_layers=2, ff_d_hidden=64, obs_img_shape=(64, 64, 3), obs_jnt_dims=7): #obs_shape = env_spec.observation_space.shape action_dim = env_spec.action_space.flat_dim x_img = obs[:, :-obs_jnt_dims] x_jnt = obs[:, -obs_jnt_dims:] if len(obs_img_shape) == 2: x_img = tf.reshape(x_img, (-1, ) + obs_img_shape + (1, )) else: x_img = tf.reshape(x_img, (-1, ) + obs_img_shape) out = x_img out = tf.layers.conv2d(inputs=out, filters=2, kernel_size=[5, 5], strides=2, padding='valid', activation=tf.nn.relu, name='pol_conv_l1') out = tf.layers.conv2d(inputs=out, filters=2, kernel_size=[5, 5], strides=2, padding='valid', activation=tf.nn.relu, name='pol_conv_l2') out_size = np.prod([int(size) for size in out.shape[1:]]) out_flat = tf.reshape(out, [-1, out_size]) # concat action out = tf.concat([out_flat, x_jnt], axis=1) for i in range(ff_layers): out = relu_layer(out, dout=ff_d_hidden, name='pol_ff_l%d' % i) out = linear(out, dout=ff_d_hidden, name='pol_ff_lfinal') features = out mean = 0.1 * linear(features, dout=action_dim, name='pol_mean') log_std = 0.1 * linear(features, dout=action_dim, name='pol_log_std') - 1 return mean, log_std
def test_ff_relu_arch(obs, clusters=2, dout=1, name='ff_relu', reuse=False, cluster_hack=True): from inverse_rl.models.tf_util import linear with tf.variable_scope(name, reuse=reuse): features = linear(obs, dout=32, name='features') features = tf.nn.relu(features) cluster_wts = tf.nn.softmax( linear(features, dout=clusters, name='cluster_wts')) mu_flat = linear(features, dout=clusters * dout, name='mu_flat') * 1e-1 log_std_flat = linear( features, dout=clusters * dout, name='log_std_flat') * 1e-1 mu = tf.reshape(mu_flat, [-1, clusters, dout]) log_std = tf.reshape(log_std_flat, [-1, clusters, dout]) return mu, log_std, cluster_wts
def vae_layer(x, is_train, name=None): if name is None: name = 'vae' else: name = name + '/vae' with tf.name_scope(name): in_dim = x.get_shape().as_list()[-1] assert in_dim is not None, "input tensor has variable last dim (?)" mean = linear(x, in_dim, name='mean', w_init=tf.initializers.random_uniform(-0.01, 0.01)) logstd = linear(x, in_dim, name='logstd', w_init=tf.initializers.random_uniform(-0.01, 0.01)) std = tf.exp(logstd, name='exp') noise = tf.random_normal(tf.shape(x), name='eps') noise_zeros = tf.zeros_like(noise) noise_cond = tf.case([(is_train, lambda: noise)], default=lambda: noise_zeros) reparam = std * noise_cond + mean return reparam, mean, logstd
def conv_net_airl(x, dout=1, ff_layers=1, ff_d_hidden=16, env_spec=None): # undo reshaping based on env_spec obs_shape = env_spec.observation_space.shape dA = env_spec.action_space.flat_dim # x_obs = x[:, :-dA] # x_act = x[:, -dA:] x_obs = x if len(obs_shape) == 2: x_obs = tf.reshape(x_obs, (-1, ) + obs_shape + (1, )) else: x_obs = tf.reshape(x_obs, (-1, ) + obs_shape) out = x_obs out = tf.layers.conv2d( inputs=out, filters=2, kernel_size=[5, 5], strides=2, padding='valid', activation=tf.nn.relu, name='conv_l1') out = tf.layers.conv2d( inputs=out, filters=2, kernel_size=[5, 5], strides=2, padding='valid', activation=tf.nn.relu, name='conv_l2') out_size = np.prod([int(size) for size in out.shape[1:]]) out_flat = tf.reshape(out, [-1, out_size]) # concat action #out = tf.concat([out_flat, x_act], axis=1) out = out_flat for i in range(ff_layers): out = relu_layer(out, dout=ff_d_hidden, name='ff_l%d' % i) out = linear(out, dout=dout, name='ff_lfinal') return out
def relu_net(x, last_layer_bias=True): out = x for i in range(layers): out = relu_layer(out, dout=d_hidden, name='l%d' % i) out = linear(out, dout=dout, name='lfinal', bias=last_layer_bias) return out
def linear_net(x, dout=1): out = x out = linear(out, dout=dout, name='lfinal') return out
def relu_net(x, layers=2, dout=1, d_hidden=32): out = x for i in range(layers): out = relu_layer(out, dout=d_hidden, name='l%d' % i) out = linear(out, dout=dout, name='lfinal') return out
def relu_net_dropout(x, layers=2, dout=1, d_hidden=32): out = x for i in range(layers): out = relu_layer(out, dout=d_hidden, name='l%d' % i) out = tf.nn.dropout(linear(out, dout=dout, name='lfinal'), 0.7) return out
def ff_relu_net(obs_act): l1 = relu_layer(obs_act, dout=32, name='l1') l2 = relu_layer(l1, dout=32, name='l2') return linear(l2, dout=1, name='lfinal')
def relu_net(x, last_layer_bias=True): out = x for i in range(layers): out = relu_layer(out, dout=d_hidden, name='l%d'%i) out = linear(out, dout=dout, name='lfinal', bias=last_layer_bias) return out
def linear_net(x, dout=1): out = x out = linear(out, dout=dout, name='lfinal') return out
def relu_net(x, layers=2, dout=1, d_hidden=32): out = x for i in range(layers): out = relu_layer(out, dout=d_hidden, name='l%d'%i) out = linear(out, dout=dout, name='lfinal') return out