def sample_policy(self, x0, policy, T=None, **policy_args): if T is None: T = policy._T policy_sample = Sample(meta_data=params, T=T) noise = policy_args.get('noise', ZeroNoise(params)) rate = rospy.Rate(1. / params['dt']) policy_sample.set_X(x0, t=0) for t in xrange(T): # get observation and act x_t = policy_sample.get_X(t=t) o_t = self.get_observation(x_t) u_t = policy.act(x_t, o_t, t, noise=noise) if params['probcoll']['planner_type'] != 'teleop': # TODO hack self.execute_control(u_t) # record policy_sample.set_X(x_t, t=t) policy_sample.set_O(o_t, t=t) policy_sample.set_U(u_t, t=t) # propagate dynamics if t < T-1: x_tp1 = self._dynamics.evolve(x_t, u_t) policy_sample.set_X(x_tp1, t=t+1) rate.sleep() # see if collision in the past cycle policy_sample.set_O([int(self.coll_callback.get() is not None)], t=t, sub_obs='collision') return policy_sample
def _run_itr_analyze(self, itr): T = 1 samples = [] world_infos = [] mpc_infos = [] self._conditions.reset() for cond in xrange(self._conditions.length): for rep in xrange(self._conditions.repeats): self._logger.info('\t\tStarting cond {0} rep {1}'.format( cond, rep)) if (cond == 0 and rep == 0) or self._world.randomize: self._reset_world(itr, cond, rep) x0 = self._conditions.get_cond(cond, rep=rep) sample_T = Sample(meta_data=params, T=T) sample_T.set_X(x0, t=0) mpc_policy = self._create_mpc(itr, x0) control_noise = self._create_control_noise( ) # create each time b/c may not be memoryless for t in xrange(T): self._update_world(sample_T, t) x0 = sample_T.get_X(t=t) rollout = self._agent.sample_policy(x0, mpc_policy, noise=control_noise, T=1) u = rollout.get_U(t=0) o = rollout.get_O(t=0) u_label = u sample_T.set_U(u_label, t=t) sample_T.set_O(o, t=t) if hasattr(mpc_policy, '_curr_traj'): self._world.update_visualization( sample_T, mpc_policy._curr_traj, t) else: self._logger.info('\t\t\tLasted for t={0}'.format(t)) samples.append(sample_T) world_infos.append(self._get_world_info()) mpc_infos.append(mpc_policy.get_info()) assert (samples[-1].isfinite()) rep += 1 self._itr_save_samples(itr, samples) self._itr_save_worlds(itr, world_infos) self._itr_save_mpcs(itr, mpc_infos)
def sample_policy(self, x0, policy, T=None, use_noise=True, only_noise=False, **policy_args): if T is None: T = policy._T rate = rospy.Rate(1. / params['dt']) policy_sample = Sample(meta_data=params, T=T) policy_sample.set_X(x0, t=0) policy_sample_no_noise = Sample(meta_data=params, T=T) for t in xrange(T): # get observation and act x_t = policy_sample.get_X(t=t) o_t = self.get_observation(x_t) if self.sim: x_t = self._get_sim_state(x_t) start = time.time() u_t, u_t_no_noise = policy.act(x_t, o_t, t, only_noise=only_noise) self._logger.debug(time.time() - start) # only execute control if no collision # TODO is this necessary if int(o_t[policy_sample.get_O_idxs(sub_obs='collision')][0]) == 0: if use_noise: self.execute_control(u_t) else: self.execute_control(u_t_no_noise) # record policy_sample.set_X(x_t, t=t) policy_sample.set_O(o_t, t=t) policy_sample.set_U(u_t, t=t) if not only_noise: policy_sample_no_noise.set_U(u_t_no_noise, t=t) # In sim we do not have cycles if self.sim: policy_sample.set_O([int(self.sim_coll)], t=t, sub_obs='collision') else: # propagate dynamics if t < T - 1: x_tp1 = self._dynamics.evolve(x_t, u_t) policy_sample.set_X(x_tp1, t=t + 1) rate.sleep() # see if collision in the past cycle policy_sample.set_O( [int(self.coll_callback.get() is not None)], t=t, sub_obs='collision') return policy_sample, policy_sample_no_noise
def _get_sim_state(self, xt): state_sample = Sample(meta_data=params, T=1) state_msg = self.sim_state state = np.array([ state_msg.position.x, state_msg.position.y, state_msg.position.z, state_msg.orientation.x, state_msg.orientation.y, state_msg.orientation.z, state_msg.orientation.w ]) state_sample.set_X(state[:3], t=0, sub_state='position') state_sample.set_X(state[3:], t=0, sub_state='orientation') x = np.nan_to_num(xt) + np.nan_to_num(state_sample.get_X(t=0)) return x
def sample_policy(self, x0, policy, T=None, only_noise=False, **policy_args): """ Run the policy and collect the trajectory data :param x0: initial state :param policy: to execute :param policy_args: e.g. ref_traj, noise, etc :rtype Sample """ if T is None: T = policy._T policy_sample = Sample(meta_data=params, T=T) policy_sample.set_X(x0, t=0) policy_sample_no_noise = Sample(meta_data=params, T=T) for t in xrange(T): # get observation and act x_t = policy_sample.get_X(t=t) o_t = self.get_observation(x_t) u_t, u_t_no_noise = policy.act(x_t, o_t, t) # record policy_sample.set_X(x_t, t=t) policy_sample.set_O(o_t, t=t) policy_sample.set_U(u_t, t=t) if not only_noise: policy_sample_no_noise.set_U(u_t_no_noise, t=t) # propagate dynamics if t < T - 1: x_tp1 = self._dynamics.evolve(x_t, u_t) policy_sample.set_X(x_tp1, t=t + 1) return policy_sample, policy_sample_no_noise
def act(self, x, obs, t, noise, ref_traj=None): ### create desired path (radiates outward) T, dt, theta, speed = self._T, self._meta_data[ 'dt'], self.theta, self.speed traj = Sample(meta_data=self._meta_data, T=T) ### initialize traj.set_X(x, t=0) traj.set_O(obs, t=0) ### create desired path (radiates outward) linearvel = [speed * np.cos(theta), speed * np.sin(theta)] for t in xrange(T - 1): x_t = traj.get_X(t=t) x_tp1 = self._dynamics.evolve(x_t, linearvel) traj.set_X(x_tp1, t=t + 1) traj.set_U(linearvel, t=t) traj.set_U(linearvel, t=-1) self._curr_traj = traj u = traj.get_U(t=0) return u + noise.sample(u)
def _cem_step(self, x0, obs, mean, cov, M, K, lower_bound, upper_bound, dU, T, dynamics, cost_funcs, plot_env=None): """ :param mean: mean of trajectory controls Gaussian :param obs: observation :param cov: covariance of trajectory controls Gaussian :param M: sample M controls :param K: keep K lowest cost trajectories :return: mean, cov """ ### sample trajectories M_controls = [] while len(M_controls) < M: control = np.random.multivariate_normal(mean, cov) if np.all(control < upper_bound) and np.all(control > lower_bound): M_controls.append(control) samples = [] for U_flat in M_controls: sample = Sample(meta_data=self._meta_data, T=T) U = U_flat.reshape(T, dU) U[:, -1] = 0. # TODO hack sample.set_U(U, t=slice(0, T)) sample.set_O(obs, t=0) sample.set_X(x0, t=0) for sub_control, u_sub in self._config['fixed'].items(): sample.set_U(np.array([list(u_sub) * T]).T, t=slice(0, T), sub_control=sub_control) sample.rollout(dynamics) samples.append(sample) ### keep lowest K cost trajectories costs = [0.] * len(samples) for cost_func in cost_funcs: if hasattr(cost_func, 'eval_batch'): costs = [ c + cost_func_approx.J for c, cost_func_approx in zip( costs, cost_func.eval_batch(samples)) ] else: costs = [ c + cost_func.eval(sample).J for c, sample in zip(costs, samples) ] samples_costs = zip(samples, costs) samples_costs = sorted(samples_costs, key=lambda x: x[1]) K_samples = [x[0] for x in samples_costs[:K]] ### fit Gaussian data = np.vstack([s.get_U().ravel() for s in K_samples]) new_mean = np.mean(data, axis=0) new_cov = np.cov(data, rowvar=0) if plot_env: plot_env.rave_env.clear_plots() colors = [(0, 1, 0)] * K + [(1, 0, 0)] * (M - K) for sample, color in zip([x[0] for x in samples_costs], colors): for t in xrange(T - 1): plot_env.rave_env.plot_segment( sample.get_X(t=t, sub_state='position'), sample.get_X(t=t + 1, sub_state='position'), color=color) return new_mean, new_cov
def _run_rollout(self, itr): T = params['probcoll']['T'] label_with_noise = params['probcoll']['label_with_noise'] samples = [] world_infos = [] mpc_infos = [] self._conditions.reset() for cond in xrange(self._conditions.length): rep = 0 while rep < self._conditions.repeats: self._logger.info( '\t\tStarting cond {0} rep {1} itr {2}'.format( cond, rep, itr)) start = time.time() if (cond == 0 and rep == 0) or self._world.randomize: self._reset_world(itr, cond, rep) x0 = self._conditions.get_cond(cond, rep=rep) sample_T = Sample(meta_data=params, T=T) sample_T.set_X(x0, t=0) for t in xrange(T): self._update_world(sample_T, t) x0 = sample_T.get_X(t=t) rollout, rollout_no_noise = self._agent.sample_policy( x0, self._mpc_policy, T=1, only_noise=label_with_noise) o = rollout.get_O(t=0) if label_with_noise: u = rollout.get_U(t=0) else: u = rollout_no_noise.get_U(t=0) sample_T.set_U(u, t=t) sample_T.set_O(o, t=t) if not self._use_dynamics: sample_T.set_X(rollout.get_X(t=0), t=t) if self._world.is_collision(sample_T, t=t): self._logger.warning( '\t\t\tCrashed at t={0}'.format(t)) break if self._use_dynamics: if t < T - 1: x_tp1 = self._dynamics.evolve(x0, u) sample_T.set_X(x_tp1, t=t + 1) if hasattr(self._mpc_policy, '_curr_traj'): self._world.update_visualization( sample_T, self._mpc_policy._curr_traj, t) else: self._logger.info('\t\t\tLasted for t={0}'.format(t)) sample = sample_T.match(slice(0, t + 1)) world_info = self._get_world_info() mpc_info = self._mpc_policy.get_info() if not self._is_good_rollout(sample, t): self._logger.warning( '\t\t\tNot good rollout. Repeating rollout.'.format(t)) continue samples.append(sample) world_infos.append(world_info) mpc_infos.append(mpc_info) assert (samples[-1].isfinite()) elapsed = time.time() - start self._logger.info( '\t\t\tFinished cond {0} rep {1} ({2:.1f}s, {3:.3f}x real-time)' .format(cond, rep, elapsed, t * params['dt'] / elapsed)) rep += 1 self._itr_save_samples(itr, samples) self._itr_save_worlds(itr, world_infos) self._itr_save_mpcs(itr, mpc_infos) self._reset_world(itr, 0, 0) # leave the world as it was
def _run_testing(self, itr): if (itr != 0 and (itr == self._max_iter - 1 \ or itr % params['world']['testing']['itr_freq'] == 0)): self._logger.info('Itr {0} testing'.format(itr)) if self._agent.sim: # if self._async_on: # self._logger.debug('Recovering probcoll model') # self._probcoll_model.recover() T = params['probcoll']['T'] conditions = params['world']['testing']['positions'] samples = [] reset_pos, reset_quat = self._agent.get_sim_state_data() for cond in xrange(len(conditions)): self._logger.info('\t\tTesting cond {0} itr {1}'.format( cond, itr)) start = time.time() self._agent.execute_control(None, reset=True, pos=conditions[cond]) x0 = self._conditions.get_cond(0) sample_T = Sample(meta_data=params, T=T) sample_T.set_X(x0, t=0) for t in xrange(T): x0 = sample_T.get_X(t=t) rollout, rollout_no_noise = self._agent.sample_policy( x0, self._mpc_policy, T=1, use_noise=False) o = rollout.get_O(t=0) u = rollout_no_noise.get_U(t=0) sample_T.set_U(u, t=t) sample_T.set_O(o, t=t) if not self._use_dynamics: sample_T.set_X(rollout.get_X(t=0), t=t) if self._world.is_collision(sample_T, t=t): self._logger.warning( '\t\t\tCrashed at t={0}'.format(t)) break if self._use_dynamics: if t < T - 1: x_tp1 = self._dynamics.evolve(x0, u) sample_T.set_X(x_tp1, t=t + 1) else: self._logger.info('\t\t\tLasted for t={0}'.format(t)) sample = sample_T.match(slice(0, t + 1)) if not self._is_good_rollout(sample, t): self._logger.warning( '\t\t\tNot good rollout. Repeating rollout.'. format(t)) continue samples.append(sample) assert (samples[-1].isfinite()) elapsed = time.time() - start self._logger.info( '\t\t\tFinished cond {0} of testing ({1:.1f}s, {2:.3f}x real-time)' .format(cond, elapsed, t * params['dt'] / elapsed)) self._itr_save_samples(itr, samples, prefix='testing_') self._agent.execute_control(None, reset=True, pos=reset_pos, quat=reset_quat) else: pass