예제 #1
0
 def _advance_iteration_variables(self):
     """
     Move all 'cur' variables to 'prev', and advance iteration
     counter.
     """
     self.iteration_count += 1
     self.prev = copy.deepcopy(self.cur)
     self.cur = [IterationData() for _ in range(self.M)]
     self.traj_distr[self.iteration_count] = []
     self.traj_info[self.iteration_count] = []
     self.kl_div[self.iteration_count] = []
     self.dists_to_target[self.iteration_count] = []
     self.previous_cost = []
     for m in range(self.M):
         self.cur[m].traj_info = TrajectoryInfo()
         self.cur[m].traj_info.dynamics = copy.deepcopy(
             self.prev[m].traj_info.dynamics)
         self.cur[m].step_mult = self.prev[m].step_mult
         self.cur[m].eta = self.prev[m].eta
         self.cur[m].traj_distr = self.new_traj_distr[m]
         self.traj_distr[self.iteration_count].append(
             self.new_traj_distr[m])
         self.traj_info[self.iteration_count].append(self.cur[m].traj_info)
         if self._hyperparams['ioc']:
             self.cur[m].prevcost_traj_info = TrajectoryInfo()
             self.previous_cost.append(self.cost[m].copy())
     delattr(self, 'new_traj_distr')
예제 #2
0
    def __init__(self, hyperparams):
        config = copy.deepcopy(ALG)
        config.update(hyperparams)
        self._hyperparams = config

        if 'train_conditions' in hyperparams:
            self._cond_idx = hyperparams['train_conditions']
            self.M = len(self._cond_idx)
        else:
            self.M = hyperparams['conditions']
            self._cond_idx = range(self.M)
            self._hyperparams['train_conditions'] = self._cond_idx
            self._hyperparams['test_conditions'] = self._cond_idx
        self.iteration_count = 0

        # Grab a few values from the agent.
        agent = self._hyperparams['agent']
        self.T = self._hyperparams['T'] = agent.T
        self.dU = self._hyperparams['dU'] = agent.dU
        self.dX = self._hyperparams['dX'] = agent.dX
        self.dO = self._hyperparams['dO'] = agent.dO

        init_traj_distr = config['init_traj_distr']
        init_traj_distr['x0'] = agent.x0
        init_traj_distr['dX'] = agent.dX
        init_traj_distr['dU'] = agent.dU
        del self._hyperparams['agent']  # Don't want to pickle this.

        # IterationData objects for each condition.
        self.cur = [IterationData() for _ in range(self.M)]
        self.prev = [IterationData() for _ in range(self.M)]

        if self._hyperparams['fit_dynamics']:
            dynamics = self._hyperparams['dynamics']

        for m in range(self.M):
            self.cur[m].traj_info = TrajectoryInfo()
            if self._hyperparams['fit_dynamics']:
                self.cur[m].traj_info.dynamics = dynamics['type'](dynamics)
            init_traj_distr = extract_condition(
                self._hyperparams['init_traj_distr'], self._cond_idx[m]
            )
            self.cur[m].traj_distr = init_traj_distr['type'](init_traj_distr)

        self.traj_opt = hyperparams['traj_opt']['type'](
            hyperparams['traj_opt']
        )
        if type(hyperparams['cost']) == list:
            self.cost = [
                hyperparams['cost'][i]['type'](hyperparams['cost'][i])
                for i in range(self.M)
            ]
        else:
            self.cost = [
                hyperparams['cost']['type'](hyperparams['cost'])
                for _ in range(self.M)
            ]
        self.base_kl_step = self._hyperparams['kl_step']
        
        self.mpc = [] # For initialize
예제 #3
0
파일: algorithm.py 프로젝트: qiyuanpang/lto
    def _advance_iteration_variables(self):
        """
        Move all 'cur' variables to 'prev', reinitialize 'cur'
        variables, and advance iteration counter.
        """
        self.iteration_count += 1
        self.prev = self.cur
        # TODO: change IterationData to reflect new stuff better
        for m in range(self.M):
            self.prev[m].new_traj_distr = self.new_traj_distr[m]
        self.cur = [IterationData() for _ in range(self.M)]
        for m in range(self.M):
            self.cur[m].traj_info = TrajectoryInfo()

            cur_dynamics_prior = self.prev[m].traj_info.dynamics.prior
            self.prev[m].traj_info.dynamics.prior = None
            self.cur[m].traj_info.dynamics = copy.deepcopy(
                self.prev[m].traj_info.dynamics)
            self.cur[m].traj_info.dynamics.prior = cur_dynamics_prior

            self.cur[m].step_mult = self.prev[m].step_mult
            self.cur[m].eta = self.prev[m].eta
            self.cur[m].traj_distr = self.new_traj_distr[m]

        delattr(self, 'new_traj_distr')

        for m in range(self.M):
            self.cur[m].traj_info.last_kl_step = \
                    self.prev[m].traj_info.last_kl_step

            cur_policy_prior = self.prev[m].pol_info.policy_prior
            self.prev[m].pol_info.policy_prior = None
            self.cur[m].pol_info = copy.deepcopy(self.prev[m].pol_info)
            self.cur[m].pol_info.policy_prior = cur_policy_prior
예제 #4
0
    def __init__(self, hyperparams):
        config = copy.deepcopy(ALG)
        config.update(hyperparams)
        self._hyperparams = config

        if 'train_conditions' in hyperparams:
            self._cond_idx = hyperparams['train_conditions']
            self.M = len(self._cond_idx)
        else:
            self.M = hyperparams['conditions']
            self._cond_idx = range(self.M)
        self.iteration_count = 0

        # Grab a few values from the agent.
        agent = self._hyperparams['agent']
        self.T = self._hyperparams['T'] = agent.T
        self.dU = self._hyperparams['dU'] = agent.dU
        self.dX = self._hyperparams['dX'] = agent.dX
        self.dO = self._hyperparams['dO'] = agent.dO

        init_traj_distr = config['init_traj_distr']
        init_traj_distr['x0'] = agent.x0
        init_traj_distr['dX'] = agent.dX
        init_traj_distr['dU'] = agent.dU
        del self._hyperparams['agent']  # Don't want to pickle this.

        # IterationData objects for each condition.
        self.cur = [IterationData() for _ in range(self.M)]
        self.prev = [IterationData() for _ in range(self.M)]
        self.traj_distr = {self.iteration_count: []}
        self.traj_info = {self.iteration_count: []}
        self.kl_div = {self.iteration_count: []}
        self.dists_to_target = {self.iteration_count: []}
        self.sample_list = {i: SampleList([]) for i in range(self.M)}

        for m in range(self.M):
            self.cur[m].traj_info = TrajectoryInfo()
            dynamics = self._hyperparams['dynamics']
            self.cur[m].traj_info.dynamics = dynamics['type'](dynamics)
            init_traj_distr = extract_condition(
                self._hyperparams['init_traj_distr'], self._cond_idx[m])
            self.cur[m].traj_distr = init_traj_distr['type'](init_traj_distr)
            self.traj_distr[self.iteration_count].append(
                self.cur[m].traj_distr)
            self.traj_info[self.iteration_count].append(self.cur[m].traj_info)

        self.traj_opt = hyperparams['traj_opt']['type'](
            hyperparams['traj_opt'])
        self.cost = [
            hyperparams['cost']['type'](hyperparams['cost'])
            for _ in range(self.M)
        ]
        if self._hyperparams['ioc']:
            self.gt_cost = [
                hyperparams['gt_cost']['type'](hyperparams['gt_cost'])
                for _ in range(self.M)
            ]
        self.base_kl_step = self._hyperparams['kl_step']
예제 #5
0
파일: algorithm.py 프로젝트: DiddiZ/gps
    def __init__(self, hyperparams):
        config = copy.deepcopy(ALG)
        config.update(hyperparams)
        self._hyperparams = config
        self.timers = OrderedDict()

        if 'train_conditions' in hyperparams:
            self._cond_idx = hyperparams['train_conditions']
            self.M = len(self._cond_idx)
        else:
            self.M = hyperparams['tac'][
                'clusters'] if 'tac' in hyperparams else hyperparams[
                    'conditions']
            self._cond_idx = range(self.M)
            self._hyperparams['train_conditions'] = self._cond_idx
            self._hyperparams['test_conditions'] = self._cond_idx
        self.iteration_count = 0

        # Grab a few values from the agent.
        agent = self._hyperparams['agent']
        self.T = self._hyperparams['T'] = agent.T
        self.dU = self._hyperparams['dU'] = agent.dU
        self.dX = self._hyperparams['dX'] = agent.dX
        self.dO = self._hyperparams['dO'] = agent.dO
        #self.dX = self._hyperparams['agent']['dtgtX'] + self.dX

        init_traj_distr = config['init_traj_distr']
        init_traj_distr['x0'] = agent.x0
        init_traj_distr['dX'] = agent.dX
        init_traj_distr['dU'] = agent.dU
        del self._hyperparams['agent']  # Don't want to pickle this.

        # IterationData objects for each condition.
        self.cur = [IterationData() for _ in range(self.M)]
        self.prev = [IterationData() for _ in range(self.M)]

        self.new_mu = [None] * self.M
        self.new_sigma = [None] * self.M
        dynamics = self._hyperparams['dynamics']
        for m in range(self.M):
            self.cur[m].traj_info = TrajectoryInfo()
            if dynamics is not None:
                self.cur[m].traj_info.dynamics = dynamics['type'](dynamics)
            init_traj_distr = extract_condition(
                self._hyperparams['init_traj_distr'],
                self._cond_idx[0]  # TODO Global x0
            )
            self.cur[m].traj_distr = init_traj_distr['type'](init_traj_distr)

        #self.traj_opt = hyperparams['traj_opt']['type'](
        #    hyperparams['traj_opt']
        #)
        self.cost = [
            hyperparams['cost']['type'](hyperparams['cost'])
            for _ in range(self.M)
        ]
        self.base_kl_step = self._hyperparams['kl_step']
예제 #6
0
 def _advance_iteration_variables(self):
     """
     Move all 'cur' variables to 'prev', and advance iteration
     counter.
     """
     self.iteration_count += 1
     self.prev = copy.deepcopy(self.cur)
     self.cur = [IterationData() for _ in range(self.M)]
     for m in range(self.M):
         self.cur[m].traj_info = TrajectoryInfo()
예제 #7
0
파일: algorithm.py 프로젝트: turinglife/gps
 def _advance_iteration_variables(self):
     """
     Move all 'cur' variables to 'prev', and advance iteration
     counter.
     """
     self.iteration_count += 1
     self.prev = self.cur
     self.cur = [IterationData() for _ in range(self.M)]
     for m in range(self.M):
         self.cur[m].traj_info = TrajectoryInfo()
         self.cur[m].traj_info.dynamics = self.prev[m].traj_info.dynamics
         self.cur[m].step_mult = self.prev[m].step_mult
         self.cur[m].eta = self.prev[m].eta
         self.cur[m].traj_distr = self.new_traj_distr[m]
     delattr(self, 'new_traj_distr')
예제 #8
0
파일: algorithm.py 프로젝트: DiddiZ/gps
 def _advance_iteration_variables(self):
     """Move all 'cur' variables to 'prev', and advance iteration counter."""
     self.iteration_count += 1
     self.prev = copy.deepcopy(self.cur)
     # TODO: change IterationData to reflect new stuff better
     for m in range(self.M):
         self.prev[m].new_traj_distr = self.new_traj_distr[m]
     self.cur = [IterationData() for _ in range(self.M)]
     for m in range(self.M):
         self.cur[m].traj_info = TrajectoryInfo()
         self.cur[m].traj_info.dynamics = copy.deepcopy(
             self.prev[m].traj_info.dynamics)
         self.cur[m].step_mult = self.prev[m].step_mult
         self.cur[m].eta = self.prev[m].eta
         self.cur[m].traj_distr = self.new_traj_distr[m]
     delattr(self, 'new_traj_distr')
예제 #9
0
파일: algorithm.py 프로젝트: turinglife/gps
    def __init__(self, hyperparams):
        config = copy.deepcopy(ALG)
        config.update(hyperparams)
        self._hyperparams = config

        self.M = hyperparams['conditions']
        self.iteration_count = 0

        # Grab a few values from the agent.
        agent = self._hyperparams['agent']
        self.T = self._hyperparams['T'] = agent.T
        self.dU = self._hyperparams['dU'] = agent.dU
        self.dX = self._hyperparams['dX'] = agent.dX
        self.dO = self._hyperparams['dO'] = agent.dO

        init_traj_distr = config['init_traj_distr']
        init_traj_distr['x0'] = agent.x0
        init_traj_distr['dX'] = agent.dX
        init_traj_distr['dU'] = agent.dU
        del self._hyperparams['agent']  # Don't want to pickle this.

        # IterationData objects for each condition.
        self.cur = [IterationData() for _ in range(self.M)]
        self.prev = [IterationData() for _ in range(self.M)]

        for m in range(self.M):
            self.cur[m].traj_info = TrajectoryInfo()
            dynamics = self._hyperparams['dynamics']
            self.cur[m].traj_info.dynamics = dynamics['type'](dynamics)
            init_traj_distr = extract_condition(
                self._hyperparams['init_traj_distr'], m)
            self.cur[m].traj_distr = init_traj_distr['type'](init_traj_distr)

        self.traj_opt = hyperparams['traj_opt']['type'](
            hyperparams['traj_opt'])
        self.cost = [
            hyperparams['cost']['type'](hyperparams['cost'])
            for _ in range(self.M)
        ]
        self.base_kl_step = self._hyperparams['kl_step']
예제 #10
0
    def reset_alg(self):
        """
        reset the algorithm to initial state at the beginning.
        reset the traj_distr and traj_info but keep policy_opt
        """
        last_pol = list()
        for m in range(self.M):
            last_pol.append(copy.deepcopy(self.cur[m].last_pol))
        self.cur = [IterationData() for _ in range(self.M)]

        if self._hyperparams['fit_dynamics']:
            dynamics = self._hyperparams['dynamics']

        for m in range(self.M):
            self.cur[m].last_pol = copy.deepcopy(last_pol[m])
            self.cur[m].traj_info = TrajectoryInfo()
            if self._hyperparams['fit_dynamics']:
                self.cur[m].traj_info.dynamics = dynamics['type'](dynamics)
            init_traj_distr = extract_condition(
                self._hyperparams['init_traj_distr'], self._cond_idx[m]
            )
            self.cur[m].traj_distr = init_traj_distr['type'](init_traj_distr)
        self.traj_opt = self._hyperparams['traj_opt']['type'](
            self._hyperparams['traj_opt']
        )
        if type(self._hyperparams['cost']) == list:
            self.cost = [
                self._hyperparams['cost'][i]['type'](self._hyperparams['cost'][i])
                for i in range(self.M)
                ]
        else:
            self.cost = [
                self._hyperparams['cost']['type'](self._hyperparams['cost'])
                for _ in range(self.M)
                ]
        self.base_kl_step = self._hyperparams['kl_step']
예제 #11
0
    def __init__(self, hyperparams):
        config = copy.deepcopy(ALG)
        config.update(hyperparams)
        self._hyperparams = config

        # False
        if 'train_conditions' in hyperparams:
            self._cond_idx = hyperparams['train_conditions']
            self.M = len(self._cond_idx)
        else:
            self.M = hyperparams['conditions']  # 2
            self._cond_idx = range(self.M)  # [0, 1]
            self._hyperparams['train_conditions'] = self._cond_idx
            self._hyperparams['test_conditions'] = self._cond_idx

        self.iteration_count = 0

        # Grab a few values from the agent.
        agent = self._hyperparams['agent']
        self.T = self._hyperparams['T'] = agent.T
        self.dU = self._hyperparams['dU'] = agent.dU
        self.dX = self._hyperparams['dX'] = agent.dX
        self.dO = self._hyperparams['dO'] = agent.dO

        init_traj_distr = config['init_traj_distr']
        init_traj_distr['x0'] = agent.x0
        init_traj_distr['dX'] = agent.dX
        init_traj_distr['dU'] = agent.dU
        del self._hyperparams['agent']  # Don't want to pickle this.

        # IterationData objects for each condition.
        # class IterationData(BundleType):
        # """ Collection of iteration variables. """
        # def __init__(self):
        #     variables = {
        #         'sample_list': None,  # List of samples for the current iteration.
        #         'traj_info': None,  # Current TrajectoryInfo object.
        #         'pol_info': None,  # Current PolicyInfo object.
        #         'traj_distr': None,  # Initial trajectory distribution.
        #         'new_traj_distr': None, # Updated trajectory distribution.
        #         'cs': None,  # Sample costs of the current iteration.
        #         'step_mult': 1.0,  # KL step multiplier for the current iteration.
        #         'eta': 1.0,  # Dual variable used in LQR backward pass.
        #     }
        #     BundleType.__init__(self, variables)
        self.cur = [IterationData() for _ in range(self.M)]
        self.prev = [IterationData() for _ in range(self.M)]

        # False
        if self._hyperparams['fit_dynamics']:
            dynamics = self._hyperparams['dynamics']

        for m in range(self.M):
            # class TrajectoryInfo(BundleType):
            #     """ Collection of trajectory-related variables. """
            #     def __init__(self):
            #         variables = {
            #             'dynamics': None,  # Dynamics object for the current iteration.
            #             'x0mu': None,  # Mean for the initial state, used by the dynamics.
            #             'x0sigma': None,  # Covariance for the initial state distribution.
            #             'cc': None,  # Cost estimate constant term.
            #             'cv': None,  # Cost estimate vector term.
            #             'Cm': None,  # Cost estimate matrix term.
            #             'last_kl_step': float('inf'),  # KL step of the previous iteration.
            #         }
            #         BundleType.__init__(self, variables)

            self.cur[m].traj_info = TrajectoryInfo()

            # False
            if self._hyperparams['fit_dynamics']:
                self.cur[m].traj_info.dynamics = dynamics['type'](dynamics)

            # algorithm['init_traj_distr'] = {
            #     'type': init_lqr,
            #     'init_gains':  1.0 / PR2_GAINS,
            #     'init_acc': np.zeros(SENSOR_DIMS[ACTION]),
            #     'init_var': 1.0,
            #     'stiffness': 0.5,
            #     'stiffness_vel': 0.25,
            #     'final_weight': 50,
            #     'dt': agent['dt'],
            #     'T': agent['T'],
            # }
            # self._cond_idx = [0, 1]

            init_traj_distr = extract_condition(
                self._hyperparams['init_traj_distr'], self._cond_idx[m])

            # def extract_condition(hyperparams, m):
            #     """
            #     Pull the relevant hyperparameters corresponding to the specified
            #     condition, and return a new hyperparameter dictionary.
            #     """
            #     return {var: val[m] if isinstance(val, list) else val
            #             for var, val in hyperparams.items()}

            #  If list, extract index m, otherwise extract whole value.
            # Save to traj_distr (Initial trajectory distribution)

            self.cur[m].traj_distr = init_traj_distr['type'](init_traj_distr)

        # algorithm['traj_opt'] = 'type': TrajOptLQRPython
        # self.traj_opt = TrajOptLQRPython(hyperparams)
        self.traj_opt = hyperparams['traj_opt']['type'](
            hyperparams['traj_opt'])

        # algorithm['cost'] = {
        #     'type': CostSum,
        #     'costs': [torque_cost, fk_cost1, fk_cost2],
        #     'weights': [1.0, 1.0, 1.0],
        # }
        if type(hyperparams['cost']) == list:  # False
            self.cost = [
                hyperparams['cost'][i]['type'](hyperparams['cost'][i])
                for i in range(self.M)
            ]
        else:  # self.cost = [CostSum(cost_param), CostSum(cost_param)]
            self.cost = [
                hyperparams['cost']['type'](hyperparams['cost'])
                for _ in range(self.M)
            ]

        # 'kl_step': 5.0
        self.base_kl_step = self._hyperparams['kl_step']
예제 #12
0
파일: algorithm.py 프로젝트: qiyuanpang/lto
    def __init__(self, hyperparams):
        config = copy.deepcopy(ALG)
        config.update(hyperparams)
        self._hyperparams = config

        if 'train_conditions' in hyperparams:
            self._cond_idx = hyperparams['train_conditions']
            self.M = len(self._cond_idx)
        else:
            self.M = hyperparams['conditions']
            self._cond_idx = range(self.M)
            self._hyperparams['train_conditions'] = self._cond_idx
            self._hyperparams['test_conditions'] = self._cond_idx
        self.iteration_count = 0

        # Grab a few values from the agent.
        agent = self._hyperparams['agent']
        #print(agent)
        self.agent = agent

        self.T = self._hyperparams['T'] = agent.T
        self.dU = self._hyperparams['dU'] = agent.dU
        self.dX = self._hyperparams['dX'] = agent.dX
        self.dO = self._hyperparams['dO'] = agent.dO

        init_traj_distr = config['init_traj_distr']
        init_traj_distr['x0'] = agent.x0
        init_traj_distr['dX'] = agent.dX
        init_traj_distr['dU'] = agent.dU
        del self._hyperparams['agent']  # Don't want to pickle this.

        # IterationData objects for each condition.
        self.cur = [IterationData() for _ in range(self.M)]
        self.prev = [IterationData() for _ in range(self.M)]

        dynamics = self._hyperparams['dynamics']
        for m in range(self.M):
            self.cur[m].traj_info = TrajectoryInfo()
            self.cur[m].traj_info.dynamics = dynamics['type'](dynamics)
            cur_init_traj_distr = extract_condition(init_traj_distr,
                                                    self._cond_idx[m])
            cur_init_traj_distr['cur_cond_idx'] = self._cond_idx[m]
            #print(cur_init_traj_distr)
            self.cur[m].traj_distr = cur_init_traj_distr['type'](
                cur_init_traj_distr, agent)

        self.traj_opt = hyperparams['traj_opt']['type'](
            hyperparams['traj_opt'])
        self.cost = []
        for m in range(self.M):
            cost_hyperparams = hyperparams['cost'].copy()
            cost_hyperparams['cur_cond_idx'] = self._cond_idx[m]
            self.cost.append(hyperparams['cost']['type'](cost_hyperparams))

        self.base_kl_step = self._hyperparams['kl_step']

        policy_prior = self._hyperparams['policy_prior']
        for m in range(self.M):
            self.cur[m].pol_info = PolicyInfo(self._hyperparams)
            self.cur[m].pol_info.policy_prior = \
                    policy_prior['type'](policy_prior)

        self.policy_opt = self._hyperparams['policy_opt']['type'](
            self._hyperparams['policy_opt'], self.dO, self.dU)