def relu_net(x, layers=2, dout=1, d_hidden=32, vae=False, is_train=None): out = x for i in range(layers): out = relu_layer(out, dout=d_hidden, name='l%d' % i) if vae: assert is_train is not None out, mean, logstd = vae_layer(out, is_train=is_train) out = linear(out, dout=dout, name='lfinal') return out, mean, logstd out = linear(out, dout=dout, name='lfinal') return out
def conv_net_airl_softplus(x, dout=1, ff_layers=1, ff_d_hidden=16, env_spec=None): # undo reshaping based on env_spec obs_shape = env_spec.observation_space.shape dA = env_spec.action_space.flat_dim # x_obs = x[:, :-dA] # x_act = x[:, -dA:] x_obs = x if len(obs_shape) == 2: x_obs = tf.reshape(x_obs, (-1, ) + obs_shape + (1, )) else: x_obs = tf.reshape(x_obs, (-1, ) + obs_shape) out = x_obs out = tf.layers.conv2d( inputs=out, filters=2, kernel_size=[5, 5], strides=2, padding='valid', activation=tf.nn.relu, name='conv_l1') out = tf.layers.conv2d( inputs=out, filters=2, kernel_size=[5, 5], strides=2, padding='valid', activation=tf.nn.relu, name='conv_l2') out_size = np.prod([int(size) for size in out.shape[1:]]) out_flat = tf.reshape(out, [-1, out_size]) # concat action #out = tf.concat([out_flat, x_act], axis=1) out = out_flat for i in range(ff_layers): out = relu_layer(out, dout=ff_d_hidden, name='ff_l%d' % i) out = softplus_layer(out, dout=dout, name='lfinal') return out
def gaussian_conv_jnt_arch(obs, env_spec, ff_layers=2, ff_d_hidden=64, obs_img_shape=(64, 64, 3), obs_jnt_dims=7): #obs_shape = env_spec.observation_space.shape action_dim = env_spec.action_space.flat_dim x_img = obs[:, :-obs_jnt_dims] x_jnt = obs[:, -obs_jnt_dims:] if len(obs_img_shape) == 2: x_img = tf.reshape(x_img, (-1, ) + obs_img_shape + (1, )) else: x_img = tf.reshape(x_img, (-1, ) + obs_img_shape) out = x_img out = tf.layers.conv2d(inputs=out, filters=2, kernel_size=[5, 5], strides=2, padding='valid', activation=tf.nn.relu, name='pol_conv_l1') out = tf.layers.conv2d(inputs=out, filters=2, kernel_size=[5, 5], strides=2, padding='valid', activation=tf.nn.relu, name='pol_conv_l2') out_size = np.prod([int(size) for size in out.shape[1:]]) out_flat = tf.reshape(out, [-1, out_size]) # concat action out = tf.concat([out_flat, x_jnt], axis=1) for i in range(ff_layers): out = relu_layer(out, dout=ff_d_hidden, name='pol_ff_l%d' % i) out = linear(out, dout=ff_d_hidden, name='pol_ff_lfinal') features = out mean = 0.1 * linear(features, dout=action_dim, name='pol_mean') log_std = 0.1 * linear(features, dout=action_dim, name='pol_log_std') - 1 return mean, log_std
def relu_net(x, last_layer_bias=True): out = x for i in range(layers): out = relu_layer(out, dout=d_hidden, name='l%d' % i) out = linear(out, dout=dout, name='lfinal', bias=last_layer_bias) return out
def relu_net(x, layers=2, dout=1, d_hidden=32): out = x for i in range(layers): out = relu_layer(out, dout=d_hidden, name='l%d' % i) out = linear(out, dout=dout, name='lfinal') return out
def relu_net_dropout(x, layers=2, dout=1, d_hidden=32): out = x for i in range(layers): out = relu_layer(out, dout=d_hidden, name='l%d' % i) out = tf.nn.dropout(linear(out, dout=dout, name='lfinal'), 0.7) return out
def ff_relu_net(obs_act): l1 = relu_layer(obs_act, dout=32, name='l1') l2 = relu_layer(l1, dout=32, name='l2') return linear(l2, dout=1, name='lfinal')
def relu_net(x, last_layer_bias=True): out = x for i in range(layers): out = relu_layer(out, dout=d_hidden, name='l%d'%i) out = linear(out, dout=dout, name='lfinal', bias=last_layer_bias) return out
def relu_net(x, layers=2, dout=1, d_hidden=32): out = x for i in range(layers): out = relu_layer(out, dout=d_hidden, name='l%d'%i) out = linear(out, dout=dout, name='lfinal') return out