Beispiel #1
0
    def __init__(self, hyperparams):
        config = copy.deepcopy(ALG_MF)
        config.update(hyperparams)
        Algorithm.__init__(self, config)

        self.policy_opt = self._hyperparams['policy_opt']['type'](
            self._hyperparams['policy_opt'], self.dO, self.dU)

        self.baseline = LinearFeatureBaseline()
 def _advance_iteration_variables(self):
     """
     Move all 'cur' variables to 'prev', reinitialize 'cur'
     variables, and advance iteration counter.
     """
     Algorithm._advance_iteration_variables(self)
     for m in range(self.M):
         self.cur[m].traj_info.last_kl_step = \
                 self.prev[m].traj_info.last_kl_step
         self.cur[m].pol_info = copy.deepcopy(self.prev[m].pol_info)
    def __init__(self, hyperparams):
        config = copy.deepcopy(ALG_MDGPS)
        config.update(hyperparams)
        Algorithm.__init__(self, config)

        policy_prior = self._hyperparams['policy_prior']
        for m in range(self.M):
            self.cur[m].pol_info = PolicyInfo(self._hyperparams)
            self.cur[m].pol_info.policy_prior = \
                    policy_prior['type'](policy_prior)

        self.policy_opt = self._hyperparams['policy_opt']['type'](
            self._hyperparams['policy_opt'], self.dO, self.dU)