def _run_rollout(self, itr): T = params['probcoll']['T'] label_with_noise = params['probcoll']['label_with_noise'] samples = [] world_infos = [] mpc_infos = [] self._conditions.reset() for cond in xrange(self._conditions.length): rep = 0 while rep < self._conditions.repeats: self._logger.info( '\t\tStarting cond {0} rep {1} itr {2}'.format( cond, rep, itr)) start = time.time() if (cond == 0 and rep == 0) or self._world.randomize: self._reset_world(itr, cond, rep) x0 = self._conditions.get_cond(cond, rep=rep) sample_T = Sample(meta_data=params, T=T) sample_T.set_X(x0, t=0) for t in xrange(T): self._update_world(sample_T, t) x0 = sample_T.get_X(t=t) rollout, rollout_no_noise = self._agent.sample_policy( x0, self._mpc_policy, T=1, only_noise=label_with_noise) o = rollout.get_O(t=0) if label_with_noise: u = rollout.get_U(t=0) else: u = rollout_no_noise.get_U(t=0) sample_T.set_U(u, t=t) sample_T.set_O(o, t=t) if not self._use_dynamics: sample_T.set_X(rollout.get_X(t=0), t=t) if self._world.is_collision(sample_T, t=t): self._logger.warning( '\t\t\tCrashed at t={0}'.format(t)) break if self._use_dynamics: if t < T - 1: x_tp1 = self._dynamics.evolve(x0, u) sample_T.set_X(x_tp1, t=t + 1) if hasattr(self._mpc_policy, '_curr_traj'): self._world.update_visualization( sample_T, self._mpc_policy._curr_traj, t) else: self._logger.info('\t\t\tLasted for t={0}'.format(t)) sample = sample_T.match(slice(0, t + 1)) world_info = self._get_world_info() mpc_info = self._mpc_policy.get_info() if not self._is_good_rollout(sample, t): self._logger.warning( '\t\t\tNot good rollout. Repeating rollout.'.format(t)) continue samples.append(sample) world_infos.append(world_info) mpc_infos.append(mpc_info) assert (samples[-1].isfinite()) elapsed = time.time() - start self._logger.info( '\t\t\tFinished cond {0} rep {1} ({2:.1f}s, {3:.3f}x real-time)' .format(cond, rep, elapsed, t * params['dt'] / elapsed)) rep += 1 self._itr_save_samples(itr, samples) self._itr_save_worlds(itr, world_infos) self._itr_save_mpcs(itr, mpc_infos) self._reset_world(itr, 0, 0) # leave the world as it was
def _run_testing(self, itr): if (itr != 0 and (itr == self._max_iter - 1 \ or itr % params['world']['testing']['itr_freq'] == 0)): self._logger.info('Itr {0} testing'.format(itr)) if self._agent.sim: # if self._async_on: # self._logger.debug('Recovering probcoll model') # self._probcoll_model.recover() T = params['probcoll']['T'] conditions = params['world']['testing']['positions'] samples = [] reset_pos, reset_quat = self._agent.get_sim_state_data() for cond in xrange(len(conditions)): self._logger.info('\t\tTesting cond {0} itr {1}'.format( cond, itr)) start = time.time() self._agent.execute_control(None, reset=True, pos=conditions[cond]) x0 = self._conditions.get_cond(0) sample_T = Sample(meta_data=params, T=T) sample_T.set_X(x0, t=0) for t in xrange(T): x0 = sample_T.get_X(t=t) rollout, rollout_no_noise = self._agent.sample_policy( x0, self._mpc_policy, T=1, use_noise=False) o = rollout.get_O(t=0) u = rollout_no_noise.get_U(t=0) sample_T.set_U(u, t=t) sample_T.set_O(o, t=t) if not self._use_dynamics: sample_T.set_X(rollout.get_X(t=0), t=t) if self._world.is_collision(sample_T, t=t): self._logger.warning( '\t\t\tCrashed at t={0}'.format(t)) break if self._use_dynamics: if t < T - 1: x_tp1 = self._dynamics.evolve(x0, u) sample_T.set_X(x_tp1, t=t + 1) else: self._logger.info('\t\t\tLasted for t={0}'.format(t)) sample = sample_T.match(slice(0, t + 1)) if not self._is_good_rollout(sample, t): self._logger.warning( '\t\t\tNot good rollout. Repeating rollout.'. format(t)) continue samples.append(sample) assert (samples[-1].isfinite()) elapsed = time.time() - start self._logger.info( '\t\t\tFinished cond {0} of testing ({1:.1f}s, {2:.3f}x real-time)' .format(cond, elapsed, t * params['dt'] / elapsed)) self._itr_save_samples(itr, samples, prefix='testing_') self._agent.execute_control(None, reset=True, pos=reset_pos, quat=reset_quat) else: pass