Ejemplo n.º 1
0
 def update_objective_params(self, params=None, oid=None):
     if params is not None:
         self.locals.objective.set_variables(self.sess, params)
     if oid is not None:
         utils.plasma_prefetch([oid])
         feed_dict = {self.locals.plasma_var_oid: oid}
         self.sess.run(self.locals.plasma_read_vars, feed_dict)
Ejemplo n.º 2
0
 def update(self,
            t,
            var_oid=None,
            grad_oid=None,
            critic=False,
            policy=False,
            objective_local=False,
            objective_grads=False):
     """
     Update the agent: critic, policy, and / or objective
     :param t: current time step
     :param var_oid: ray object id for objective parameters
     :param grad_oid: ray object id for objective gradients
     :param critic: whether to update the critic
     :param policy: whether to udpate the policy
     :param objective_local:  whether to update the objective locally
     :param objective_grads:  whether to compute gradients for the objective to update globally
     """
     if var_oid is not None:
         self.objective_vars_oid = var_oid
         utils.plasma_prefetch([var_oid])
     # TODO merge critic, policy, and objective OPs to single graph call?
     if critic:
         self.update_critic(t)
     if policy:
         self.update_policy()
     if objective_local:
         self.local_update_objective()
     if objective_grads:
         return self.compute_objective_gradients(t, grad_oid)
Ejemplo n.º 3
0
    def apply_gradients(self, grad_oids, var_oid):
        utils.plasma_prefetch(grad_oids)
        feed_dict = {
            self.plasma_grads_oids: grad_oids,
            self.plasma_vars_oid: var_oid
        }
        self.sess.run(self.update_vars, feed_dict)

        # Free resources
        if self.var_oid is not None:
            utils.plasma_free([self.var_oid])
        self.var_oid = var_oid
        utils.plasma_free(grad_oids)