Beispiel #1
0
    def _run_rollout(self, itr):
        T = params['probcoll']['T']
        label_with_noise = params['probcoll']['label_with_noise']

        samples = []
        world_infos = []
        mpc_infos = []

        self._conditions.reset()
        for cond in xrange(self._conditions.length):
            rep = 0
            while rep < self._conditions.repeats:
                self._logger.info(
                    '\t\tStarting cond {0} rep {1} itr {2}'.format(
                        cond, rep, itr))
                start = time.time()
                if (cond == 0 and rep == 0) or self._world.randomize:
                    self._reset_world(itr, cond, rep)

                x0 = self._conditions.get_cond(cond, rep=rep)
                sample_T = Sample(meta_data=params, T=T)
                sample_T.set_X(x0, t=0)

                for t in xrange(T):
                    self._update_world(sample_T, t)

                    x0 = sample_T.get_X(t=t)

                    rollout, rollout_no_noise = self._agent.sample_policy(
                        x0, self._mpc_policy, T=1, only_noise=label_with_noise)

                    o = rollout.get_O(t=0)
                    if label_with_noise:
                        u = rollout.get_U(t=0)
                    else:
                        u = rollout_no_noise.get_U(t=0)

                    sample_T.set_U(u, t=t)
                    sample_T.set_O(o, t=t)

                    if not self._use_dynamics:
                        sample_T.set_X(rollout.get_X(t=0), t=t)

                    if self._world.is_collision(sample_T, t=t):
                        self._logger.warning(
                            '\t\t\tCrashed at t={0}'.format(t))
                        break

                    if self._use_dynamics:
                        if t < T - 1:
                            x_tp1 = self._dynamics.evolve(x0, u)
                            sample_T.set_X(x_tp1, t=t + 1)

                    if hasattr(self._mpc_policy, '_curr_traj'):
                        self._world.update_visualization(
                            sample_T, self._mpc_policy._curr_traj, t)

                else:
                    self._logger.info('\t\t\tLasted for t={0}'.format(t))

                sample = sample_T.match(slice(0, t + 1))
                world_info = self._get_world_info()
                mpc_info = self._mpc_policy.get_info()

                if not self._is_good_rollout(sample, t):
                    self._logger.warning(
                        '\t\t\tNot good rollout. Repeating rollout.'.format(t))
                    continue

                samples.append(sample)
                world_infos.append(world_info)
                mpc_infos.append(mpc_info)

                assert (samples[-1].isfinite())
                elapsed = time.time() - start
                self._logger.info(
                    '\t\t\tFinished cond {0} rep {1} ({2:.1f}s, {3:.3f}x real-time)'
                    .format(cond, rep, elapsed, t * params['dt'] / elapsed))
                rep += 1

        self._itr_save_samples(itr, samples)
        self._itr_save_worlds(itr, world_infos)
        self._itr_save_mpcs(itr, mpc_infos)

        self._reset_world(itr, 0, 0)  # leave the world as it was
Beispiel #2
0
    def _run_testing(self, itr):
        if (itr != 0 and (itr == self._max_iter - 1 \
                or itr % params['world']['testing']['itr_freq'] == 0)):
            self._logger.info('Itr {0} testing'.format(itr))
            if self._agent.sim:
                #                if self._async_on:
                #                    self._logger.debug('Recovering probcoll model')
                #                    self._probcoll_model.recover()
                T = params['probcoll']['T']
                conditions = params['world']['testing']['positions']
                samples = []
                reset_pos, reset_quat = self._agent.get_sim_state_data()
                for cond in xrange(len(conditions)):
                    self._logger.info('\t\tTesting cond {0} itr {1}'.format(
                        cond, itr))
                    start = time.time()
                    self._agent.execute_control(None,
                                                reset=True,
                                                pos=conditions[cond])
                    x0 = self._conditions.get_cond(0)
                    sample_T = Sample(meta_data=params, T=T)
                    sample_T.set_X(x0, t=0)
                    for t in xrange(T):
                        x0 = sample_T.get_X(t=t)

                        rollout, rollout_no_noise = self._agent.sample_policy(
                            x0, self._mpc_policy, T=1, use_noise=False)

                        o = rollout.get_O(t=0)
                        u = rollout_no_noise.get_U(t=0)

                        sample_T.set_U(u, t=t)
                        sample_T.set_O(o, t=t)

                        if not self._use_dynamics:
                            sample_T.set_X(rollout.get_X(t=0), t=t)

                        if self._world.is_collision(sample_T, t=t):
                            self._logger.warning(
                                '\t\t\tCrashed at t={0}'.format(t))
                            break

                        if self._use_dynamics:
                            if t < T - 1:
                                x_tp1 = self._dynamics.evolve(x0, u)
                                sample_T.set_X(x_tp1, t=t + 1)

                    else:
                        self._logger.info('\t\t\tLasted for t={0}'.format(t))

                    sample = sample_T.match(slice(0, t + 1))

                    if not self._is_good_rollout(sample, t):
                        self._logger.warning(
                            '\t\t\tNot good rollout. Repeating rollout.'.
                            format(t))
                        continue

                    samples.append(sample)

                    assert (samples[-1].isfinite())
                    elapsed = time.time() - start
                    self._logger.info(
                        '\t\t\tFinished cond {0} of testing ({1:.1f}s, {2:.3f}x real-time)'
                        .format(cond, elapsed, t * params['dt'] / elapsed))

                self._itr_save_samples(itr, samples, prefix='testing_')
                self._agent.execute_control(None,
                                            reset=True,
                                            pos=reset_pos,
                                            quat=reset_quat)
            else:
                pass