def __init__( self, name, model, abstract_dim, reward_fn=None, hidden_dim=32, hidden_nonlinearity=tf.tanh, output_nonlinearity=None, lstm_layer_cls=L.LSTMLayer, ): # possible to pass in reward_fn? with tf.variable_scope(name): self.obs_dim = abstract_dim self.net = LSTMNetwork( input_shape=self.obs_dim, input_layer=l_feature, output_dim=self.obs_dim, hidden_dim=hidden_dim, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity, lstm_layer_cls=lstm_layer_cls, name="planner", ) self.obs_var = self.net.input_layer.input_var self.output = L.get_output(self.net.output_layer, self.obs_var) env = HalfCheetahTargEnv() target_init = tf.constant(env.TARGET) target = tf.get_variable('target', initializer=init, trainable=False) self.loss = -self.model.get_loglikelihood( self.obs_var, self.output) * tf.norm(target - self.output) self.optimizer = optim self.train_op = optim.minimize(self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) self.sess.run(tf.global_variables_initializer())
def __init__( self, name, env_spec, hidden_dim=32, feature_network=None, state_include_action=True, hidden_nonlinearity=tf.tanh, learn_std=True, init_std=1.0, output_nonlinearity=None, lstm_layer_cls=L.LSTMLayer, ): """ :param env_spec: A spec for the env. :param hidden_dim: dimension of hidden layer :param hidden_nonlinearity: nonlinearity used for each hidden layer :return: """ with tf.variable_scope(name): Serializable.quick_init(self, locals()) super(GaussianLSTMPolicy, self).__init__(env_spec) obs_dim = env_spec.observation_space.flat_dim action_dim = env_spec.action_space.flat_dim if state_include_action: input_dim = obs_dim + action_dim else: input_dim = obs_dim l_input = L.InputLayer(shape=(None, None, input_dim), name="input") if feature_network is None: feature_dim = input_dim l_flat_feature = None l_feature = l_input else: feature_dim = feature_network.output_layer.output_shape[-1] l_flat_feature = feature_network.output_layer l_feature = L.OpLayer( l_flat_feature, extras=[l_input], name="reshape_feature", op=lambda flat_feature, input: tf.reshape( flat_feature, tf.stack([ tf.shape(input)[0], tf.shape(input)[1], feature_dim ])), shape_op=lambda _, input_shape: (input_shape[0], input_shape[1], feature_dim)) mean_network = LSTMNetwork(input_shape=(feature_dim, ), input_layer=l_feature, output_dim=action_dim, hidden_dim=hidden_dim, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity, lstm_layer_cls=lstm_layer_cls, name="mean_network") l_log_std = L.ParamLayer( mean_network.input_layer, num_units=action_dim, param=tf.constant_initializer(np.log(init_std)), name="output_log_std", trainable=learn_std, ) l_step_log_std = L.ParamLayer( mean_network.step_input_layer, num_units=action_dim, param=l_log_std.param, name="step_output_log_std", trainable=learn_std, ) self.mean_network = mean_network self.feature_network = feature_network self.l_input = l_input self.state_include_action = state_include_action flat_input_var = tf.placeholder(dtype=tf.float32, shape=(None, input_dim), name="flat_input") if feature_network is None: feature_var = flat_input_var else: feature_var = L.get_output( l_flat_feature, {feature_network.input_layer: flat_input_var}) self.f_step_mean_std = tensor_utils.compile_function( [ flat_input_var, mean_network.step_prev_state_layer.input_var, ], L.get_output([ mean_network.step_output_layer, l_step_log_std, mean_network.step_hidden_layer, mean_network.step_cell_layer ], {mean_network.step_input_layer: feature_var})) self.l_log_std = l_log_std self.input_dim = input_dim self.action_dim = action_dim self.hidden_dim = hidden_dim self.prev_actions = None self.prev_hiddens = None self.prev_cells = None self.dist = RecurrentDiagonalGaussian(action_dim) out_layers = [mean_network.output_layer, l_log_std] if feature_network is not None: out_layers.append(feature_network.output_layer) LayersPowered.__init__(self, out_layers)
def __init__(self, name, env_spec, hidden_dim=32, feature_network=None, prob_network=None, state_include_action=True, hidden_nonlinearity=tf.tanh, forget_bias=1.0, use_peepholes=False, lstm_layer_cls=L.LSTMLayer): """ :param env_spec: A spec for the env. :param hidden_dim: dimension of hidden layer :param hidden_nonlinearity: nonlinearity used for each hidden layer :return: """ with tf.variable_scope(name): assert isinstance(env_spec.action_space, Discrete) Serializable.quick_init(self, locals()) super(CategoricalLSTMPolicy, self).__init__(env_spec) obs_dim = env_spec.observation_space.flat_dim action_dim = env_spec.action_space.flat_dim if state_include_action: input_dim = obs_dim + action_dim else: input_dim = obs_dim l_input = L.InputLayer(shape=(None, None, input_dim), name="input") if feature_network is None: feature_dim = input_dim l_flat_feature = None l_feature = l_input else: feature_dim = feature_network.output_layer.output_shape[-1] l_flat_feature = feature_network.output_layer l_feature = L.OpLayer( l_flat_feature, extras=[l_input], name="reshape_feature", op=lambda flat_feature, input: tf.reshape( flat_feature, tf.stack([ tf.shape(input)[0], tf.shape(input)[1], feature_dim ])), shape_op=lambda _, input_shape: (input_shape[0], input_shape[1], feature_dim)) if prob_network is None: prob_network = LSTMNetwork( input_shape=(feature_dim, ), input_layer=l_feature, output_dim=env_spec.action_space.n, hidden_dim=hidden_dim, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=tf.nn.softmax, forget_bias=forget_bias, use_peepholes=use_peepholes, lstm_layer_cls=lstm_layer_cls, name="prob_network") self.prob_network = prob_network self.feature_network = feature_network self.l_input = l_input self.state_include_action = state_include_action flat_input_var = tf.placeholder(dtype=tf.float32, shape=(None, input_dim), name="flat_input") if feature_network is None: feature_var = flat_input_var else: feature_var = L.get_output( l_flat_feature, {feature_network.input_layer: flat_input_var}) self.f_step_prob = tensor_utils.compile_function( [ flat_input_var, #prob_network.step_prev_hidden_layer.input_var, #prob_network.step_prev_cell_layer.input_var prob_network.step_prev_state_layer.input_var, ], L.get_output([ prob_network.step_output_layer, prob_network.step_hidden_layer, prob_network.step_cell_layer ], {prob_network.step_input_layer: feature_var})) self.input_dim = input_dim self.action_dim = action_dim self.hidden_dim = hidden_dim self.prev_actions = None self.prev_hiddens = None self.prev_cells = None self.dist = RecurrentCategorical(env_spec.action_space.n) out_layers = [prob_network.output_layer] if feature_network is not None: out_layers.append(feature_network.output_layer) LayersPowered.__init__(self, out_layers)