def build_action_space(self, v_pref): """ Action space consists of 25 uniformly sampled actions in permitted range and 25 randomly sampled actions. """ holonomic = True if self.kinematics == 'holonomic' else False # speeds = [(np.exp((i + 1) / self.speed_samples) - 1) / (np.e - 1) * v_pref for i in range(self.speed_samples)] speeds = [(i + 1) / self.speed_samples * v_pref for i in range(self.speed_samples)] if holonomic: rotations = np.linspace(0, 2 * np.pi, self.rotation_samples, endpoint=False) else: if self.rotation_constraint == np.pi: rotations = np.linspace(-self.rotation_constraint, self.rotation_constraint, self.rotation_samples, endpoint=False) else: rotations = np.linspace(-self.rotation_constraint, self.rotation_constraint, self.rotation_samples) action_space = [ActionXY(0, 0) if holonomic else ActionRot(0, 0)] self.action_group_index.append(0) for j, speed in enumerate(speeds): for i, rotation in enumerate(rotations): action_index = j * self.rotation_samples + i + 1 self.action_group_index.append(action_index) if holonomic: action_space.append(ActionXY(speed * np.cos(rotation), speed * np.sin(rotation))) else: action_space.append(ActionRot(speed, rotation)) self.speeds = speeds self.rotations = rotations self.action_space = action_space
def predict(self, state): if self.phase is None or self.device is None: raise AttributeError('Phase, device attributes have to be set!') if self.phase == 'train' and self.epsilon is None: raise AttributeError( 'Epsilon attribute has to be set in training phase') if self.reach_destination(state): return ActionXY( 0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0) self.last_state = self.transform(state) #print(self.last_state) action, feats = self.model(self.last_state[0].unsqueeze(0), self.last_state[1].unsqueeze(0)) action = action.squeeze() action = ActionXY( action[0].item(), action[1].item()) if self.kinematics == 'holonomic' else ActionRot( action[0].item(), action[1].item()) self.last_state = self.last_state, feats return action
def predict(self, state): self_state = state.self_state close_obst = [] for a in state.human_states: distance = math.sqrt((a.px - self_state.px)**2 + (a.py - self_state.py)**2) if (distance <= 1): close_obst.append([a.px, a.py, a.radius]) # Go to goal if (len(close_obst) == 0): # No obstacle in sensor skirt theta = np.arctan2(self_state.gy - self_state.py, self_state.gx - self_state.px) vx = np.cos(theta) * self_state.v_pref vy = np.sin(theta) * self_state.v_pref action = ActionXY(vx, vy) # # Paranoid behavior - stop else: theta = np.arctan2(self_state.gy - self_state.py, self_state.gx - self_state.px) vx = 0.5 * np.cos(theta) * self_state.v_pref vy = 0.5 * np.sin(theta) * self_state.v_pref action = ActionXY(vx, vy) return action
def predict(self, state): """ A base class for all methods that takes pairwise joint state as input to value network. The input to the value network is always of shape (batch_size, # humans, rotated joint state length) """ if self.phase is None or self.device is None: raise AttributeError('Phase, device attributes have to be set!') if self.phase == 'train' and self.epsilon is None: raise AttributeError('Epsilon attribute has to be set in training phase') if self.phase == 'train': self.last_state = self.transform(state) if self.reach_destination(state): return ActionXY(0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0) state_tensor = state.to_tensor(add_batch_size=True, device=self.device) if self.phase == 'train': action = ( self.actor(state_tensor).squeeze().detach().numpy() + np.random.normal(0, self.max_action * self.expl_noise, size=self.action_dim) ).clip(-self.max_action, self.max_action) speed = action[0] theta = action[1] Action = ActionXY(speed * np.cos(theta), speed * np.sin(theta)) \ if self.kinematics == 'holonomic' else ActionRot(speed, theta) return Action, torch.tensor(action).float() else: with torch.no_grad(): action = self.actor(state_tensor).squeeze().numpy() speed = action[0] theta = action[1] Action = ActionXY(speed * np.cos(theta), speed * np.sin(theta)) \ if self.kinematics == 'holonomic' else ActionRot(speed, theta) return Action, torch.tensor(action).float()
def build_action_space(self, v_pref): """ Action space consists of 25 uniformly sampled actions in permitted range and 25 randomly sampled actions. """ holonomic = True if self.kinematics == 'holonomic' else False speeds = [(np.exp( (i + 1) / self.speed_samples) - 1) / (np.e - 1) * v_pref for i in range(self.speed_samples)] if holonomic: rotations = np.linspace(0, 2 * np.pi, self.rotation_samples, endpoint=False) else: rotations = np.linspace(-np.pi / 2, np.pi / 2, self.rotation_samples) action_space = [ActionXY(0, 0) if holonomic else ActionRot(0, 0)] for rotation, speed in itertools.product(rotations, speeds): if holonomic: action_space.append( ActionXY(speed * np.cos(rotation), speed * np.sin(rotation))) else: action_space.append(ActionRot(speed, rotation)) self.speeds = speeds self.rotations = rotations self.action_space = action_space
def predict(self, state): """ A base class for all methods that takes pairwise joint state as input to value network. The input to the value network is always of shape (batch_size, # humans, rotated joint state length) """ if self.phase is None or self.device is None: raise AttributeError('Phase, device attributes have to be set!') if self.phase == 'train' and self.epsilon is None: raise AttributeError('Epsilon attribute has to be set in training phase') if self.reach_destination(state): return ActionXY(0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0) if self.action_space is None: self.build_action_space(state.self_state.v_pref) occupancy_maps = None probability = np.random.random() if self.phase == 'train' and probability < self.epsilon: max_action = self.action_space[np.random.choice(len(self.action_space))] else: self.action_values = list() max_value = float('-inf') max_action = None for action in self.action_space: next_self_state = self.propagate(state.self_state, action) if self.query_env: next_human_states, reward, done, info = self.env.onestep_lookahead(action) else: next_human_states = [self.propagate(human_state, ActionXY(human_state.vx, human_state.vy)) for human_state in state.human_states] reward = self.compute_reward(next_self_state, next_human_states) batch_next_states = torch.cat([torch.Tensor([next_self_state + next_human_state]).to(self.device) for next_human_state in next_human_states], dim=0) #print('multi human_rl --- line 45 ') #print('prediction phase! ----> in RL ----') #print('rotation is callled ---------------------------------------------------------------------------------------------------------------------!') rotated_batch_input = self.rotate(batch_next_states).unsqueeze(0) if self.with_om: if occupancy_maps is None: occupancy_maps = self.build_occupancy_maps(next_human_states).unsqueeze(0) rotated_batch_input = torch.cat([rotated_batch_input, occupancy_maps], dim=2) # VALUE UPDATE next_state_value = self.model(rotated_batch_input).data.item() value = reward + pow(self.gamma, self.time_step * state.self_state.v_pref) * next_state_value self.action_values.append(value) if value > max_value: max_value = value max_action = action if max_action is None: raise ValueError('Value network is not well trained. ') if self.phase == 'train': #print('---------multi_human_rl.py line 64 ----->transformation done!') self.last_state = self.transform(state) return max_action
def predict(self, state): """ A base class for all methods that takes pairwise joint state as input to value network. The input to the value network is always of shape (batch_size, # humans, rotated joint state length) """ if self.phase is None or self.device is None: raise AttributeError('Phase, device attributes have to be set!') if self.phase == 'train' and self.epsilon is None: raise AttributeError('Epsilon attribute has to be set in training phase') if self.reach_destination(state): return ActionXY(0, 0) if self.action_space is None: build_action_space() occupancy_maps = None probability = np.random.random() if self.phase == 'train' and probability < self.epsilon: max_action_id = np.random.choice(self.action_space.shape[0]) max_action = self.action_space[max_action_id] else: self.action_values = list() max_value = float('-inf') max_action = None max_action_id = None for action_id, action in enumerate(self.action_space): action = ActionXY(action[0], action[1]) next_self_state = self.propagate(state.self_state, action) if self.query_env: next_human_states, reward, done, info = self.env.onestep_lookahead(action) else: next_human_states = [self.propagate(human_state, ActionXY(human_state.vx, human_state.vy)) for human_state in state.human_states] reward = self.compute_reward(next_self_state, next_human_states) next_human_states = self.obstacle_to_human(next_human_states) batch_next_states = torch.cat([torch.Tensor([next_self_state + next_human_state]).to(self.device) for next_human_state in next_human_states if isinstance(next_human_state, ObservableState)], dim=0) rotated_batch_input = self.rotate(batch_next_states).unsqueeze(0) if self.with_om: if occupancy_maps is None: self_state = np.array([next_self_state.px.numpy(), next_self_state.py.numpy(), next_self_state.vx.numpy(), next_self_state.vx.numpy()]) occupancy_maps = self.build_state_space(self_state, next_human_states).unsqueeze(0).to(self.device) #occupancy_maps = self.build_occupancy_maps(next_human_states).unsqueeze(0).to(self.device) rotated_batch_input = torch.cat([rotated_batch_input, occupancy_maps], dim=2) # VALUE UPDATE max_action, max_value, max_action_id = self.value_update(state, action, max_action, max_value, reward, rotated_batch_input, action_id, max_action_id) if max_action is None: raise ValueError('Value network is not well trained. ') if self.phase == 'train': self.last_state = self.transform(state) self.last_action_id = max_action_id return max_action
def predict(self, state): """ Input state is the joint state of robot concatenated by the observable state of other agents To predict the best action, agent samples actions and propagates one step to see how good the next state is thus the reward function is needed """ if self.phase is None or self.device is None: raise AttributeError('Phase, device attributes have to be set!') if self.phase == 'train' and self.epsilon is None: raise AttributeError('Epsilon attribute has to be set in training phase') if self.reach_destination(state): return ActionXY(0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0) if self.action_space is None: self.build_action_space(state.self_state.v_pref) if not state.human_states: assert self.phase != 'train' return self.select_greedy_action(state.self_state) probability = np.random.random() if self.phase == 'train' and probability < self.epsilon: max_action = self.action_space[np.random.choice(len(self.action_space))] else: self.action_values = list() max_min_value = float('-inf') max_action = None for action in self.action_space: next_self_state = self.propagate(state.self_state, action) if self.query_env: next_human_states, reward, done, info = self.env.onestep_lookahead(action) else: next_human_states = [self.propagate(human_state, ActionXY(human_state.vx, human_state.vy)) for human_state in state.human_states] reward = self.compute_reward(next_self_state, next_human_states) batch_next_states = torch.cat([torch.Tensor([next_self_state + next_human_state]).to(self.device) for next_human_state in next_human_states], dim=0) # VALUE UPDATE outputs = self.model(self.rotate(batch_next_states)) min_output, min_index = torch.min(outputs, 0) min_value = reward + pow(self.gamma, self.time_step * state.self_state.v_pref) * min_output.data.item() self.action_values.append(min_value) if min_value > max_min_value: max_min_value = min_value max_action = action if self.phase == 'train': self.last_state = self.transform(state) return max_action
def build_action_space(v_pref=1.0, speed_samples=5, rotation_samples=16): """ Action space consists of 25 uniformly sampled actions in permitted range and 25 randomly sampled actions. """ speeds = [(np.exp((i + 1) / speed_samples) - 1) / (np.e - 1) * v_pref for i in range(speed_samples)] rotations = np.linspace(0, 2 * np.pi, rotation_samples, endpoint=False) action_space = [ActionXY(0, 0)] for rotation, speed in itertools.product(rotations, speeds): action_space.append(ActionXY(speed * np.cos(rotation), speed * np.sin(rotation))) return torch.FloatTensor(action_space)
def build_action_space(self, v_pref): """ Action space consists of 25 uniformly sampled actions in permitted range and 25 randomly sampled actions. """ holonomic = True if self.kinematics == "holonomic" else False speeds = [(np.exp( (i + 1) / self.speed_samples) - 1) / (np.e - 1) * v_pref for i in range(self.speed_samples)] if holonomic: rotations = np.linspace(0, 2 * np.pi, self.rotation_samples, endpoint=False) else: rotations = np.linspace( -self.rotation_constraint, self.rotation_constraint, self.rotation_samples, ) action_space = [ActionXY(0, 0) if holonomic else ActionRot(0, 0)] for j, speed in enumerate(speeds): if j == 0: # index for action (0, 0) self.action_group_index.append(0) # only two groups in speeds if j < 3: speed_index = 0 else: speed_index = 1 for i, rotation in enumerate(rotations): rotation_index = i // 2 action_index = (speed_index * self.sparse_rotation_samples + rotation_index) self.action_group_index.append(action_index) if holonomic: action_space.append( ActionXY(speed * np.cos(rotation), speed * np.sin(rotation))) else: action_space.append(ActionRot(speed, rotation)) self.speeds = speeds self.rotations = rotations self.action_space = action_space
def optimize_epoch(self, num_epochs): if self.v_optimizer is None: raise ValueError('Learning rate is not set!') if self.data_loader is None: self.data_loader = DataLoader(self.memory, self.batch_size, shuffle=True) for epoch in range(num_epochs): epoch_v_loss = 0 epoch_s_loss = 0 logging.debug('{}-th epoch starts'.format(epoch)) update_counter = 0 for data in self.data_loader: robot_states, human_states, actions, values, dones, rewards, next_robot_state, next_human_states = data # optimize value estimator self.v_optimizer.zero_grad() actions = actions.to(self.device) outputs = self.value_estimator( (robot_states, human_states)).gather(1, actions.unsqueeze(1)) values = values.to(self.device) loss = self.criterion(outputs, values) loss.backward() self.v_optimizer.step() epoch_v_loss += loss.data.item() # optimize state predictor if self.state_predictor.trainable: update_state_predictor = True if update_counter % self.state_predictor_update_interval != 0: update_state_predictor = False if update_state_predictor: self.s_optimizer.zero_grad() _, next_human_states_est = self.state_predictor( (robot_states, human_states), None) loss = self.criterion(next_human_states_est, next_human_states) loss.backward() self.s_optimizer.step() epoch_s_loss += loss.data.item() update_counter += 1 else: _, next_human_states_est = self.state_predictor( (robot_states, human_states), ActionXY(0, 0)) loss = self.criterion(next_human_states_est, next_human_states) epoch_s_loss += loss.data.item() logging.debug('{}-th epoch ends'.format(epoch)) self.writer.add_scalar('IL/epoch_v_loss', epoch_v_loss / len(self.memory), epoch) self.writer.add_scalar('IL/epoch_s_loss', epoch_s_loss / len(self.memory), epoch) logging.info('Average loss in epoch %d: %.2E, %.2E', epoch, epoch_v_loss / len(self.memory), epoch_s_loss / len(self.memory)) return
def predict(self, state): sf_state = [] for agent_state in state: sf_state.append(( agent_state.px, agent_state.py, agent_state.vx, agent_state.vy, agent_state.gx, agent_state.gy, )) sim = socialforce.Simulator( np.array(sf_state), delta_t=self.time_step, initial_speed=self.initial_speed, v0=self.v0, sigma=self.sigma, ) sim.step() actions = [ ActionXY(sim.state[i, 2], sim.state[i, 3]) for i in range(len(state)) ] del sim return actions
def clip_action(self, raw_action, v_pref): """ Input state is the joint state of robot concatenated by the observable state of other agents To predict the best action, agent samples actions and propagates one step to see how good the next state is thus the reward function is needed """ # quantize the action holonomic = True if self.config.action_space.kinematics == 'holonomic' else False # clip the action if holonomic: act_norm = np.linalg.norm(raw_action) if act_norm > v_pref: raw_action[0] = raw_action[0] / act_norm * v_pref raw_action[1] = raw_action[1] / act_norm * v_pref return ActionXY(raw_action[0], raw_action[1]) else: # for sim2real raw_action[0] = np.clip(raw_action[0], -0.1, 0.1) # action[0] is change of v # raw[0, 1] = np.clip(raw[0, 1], -0.25, 0.25) # action[1] is change of w # raw[0, 0] = np.clip(raw[0, 0], -state.self_state.v_pref, state.self_state.v_pref) # action[0] is v raw_action[1] = np.clip(raw_action[1], -0.1, 0.1) # action[1] is change of theta return ActionRot(raw_action[0], raw_action[1])
def predict(self, state): """ :param state: :return: """ sf_state = [] self_state = state.self_state sf_state.append((self_state.px, self_state.py, self_state.vx, self_state.vy, self_state.gx, self_state.gy)) for human_state in state.human_states: # approximate desired direction with current velocity if human_state.vx == 0 and human_state.vy == 0: gx = np.random.random() gy = np.random.random() else: gx = human_state.px + human_state.vx gy = human_state.py + human_state.vy sf_state.append((human_state.px, human_state.py, human_state.vx, human_state.vy, gx, gy)) sim = socialforce.Simulator(np.array(sf_state), delta_t=self.time_step, initial_speed=self.initial_speed, v0=self.v0, sigma=self.sigma) sim.step() action = ActionXY(sim.state[0, 2], sim.state[0, 3]) self.last_state = state return action
def predict(self, state): """ Centralized planning for all agents """ params = self.neighbor_dist, self.max_neighbors, self.time_horizon, self.time_horizon_obst if self.sim is not None and self.sim.getNumAgents() != len(state): del self.sim self.sim = None if self.sim is None: self.sim = rvo2.PyRVOSimulator(self.time_step, *params, self.radius, self.max_speed) for agent_state in state: self.sim.addAgent(agent_state.position, *params, agent_state.radius + 0.01 + self.safety_space, self.max_speed, agent_state.velocity) else: for i, agent_state in enumerate(state): self.sim.setAgentPosition(i, agent_state.position) self.sim.setAgentVelocity(i, agent_state.velocity) # Set the preferred velocity to be a vector of unit magnitude (speed) in the direction of the goal. for i, agent_state in enumerate(state): velocity = np.array((agent_state.gx - agent_state.px, agent_state.gy - agent_state.py)) speed = np.linalg.norm(velocity) pref_vel = velocity / speed if speed > 1 else velocity self.sim.setAgentPrefVelocity(i, (pref_vel[0], pref_vel[1])) self.sim.doStep() actions = [ActionXY(*self.sim.getAgentVelocity(i)) for i in range(len(state))] return actions
def test_propagate_occupancy_map_agent_center_movement_left_low_channel_size_three( self): cell_num = 4 cell_size = 1 om_channel_size = 3 time_step = .1 human = Human() human.set(px=0, py=0, vx=0, vy=0, gx=0, gy=0, theta=0) other_agents = np.zeros((1, 4)) other_agents[0, 0] = -1.5 other_agents[0, 1] = 1.5 occupancy_map = build_occupancy_map(human, other_agents, cell_num, cell_size, om_channel_size) action = ActionXY(-1, -1) next_state = propagate(human.get_observable_state(), action, time_step, kinematics='holonomic') next_occupancy_map = propagate_occupancy_map( occupancy_map, human.get_observable_state(), action, time_step, 'holonomic', cell_num, cell_size, om_channel_size) expected_next_occupancy_map = build_occupancy_map( next_state, other_agents, cell_num, cell_size, om_channel_size) self.assertTrue( np.array_equal(next_occupancy_map, expected_next_occupancy_map))
def act(self, state_tensor ): # state is a batch of tensors rather than a joint state # value, mu, cov = self.value_action_predictor(state_tensor) # dist = MultivariateNormal(mu, cov) # actions = dist.sample() # action_log_probs = dist.log_prob(actions) # action_to_take = [ActionXY(action[0], action[1]) for action in actions.cpu().numpy()] value, alpha_beta_1, alpha_beta_2 = self.value_action_predictor( state_tensor) vx_dist = Beta(alpha_beta_1[:, 0], alpha_beta_1[:, 1]) vy_dist = Beta(alpha_beta_2[:, 0], alpha_beta_2[:, 1]) actions = torch.cat( [vx_dist.sample().unsqueeze(1), vy_dist.sample().unsqueeze(1)], dim=1) action_log_probs = vx_dist.log_prob( actions[:, 0]).unsqueeze(1) + vy_dist.log_prob( actions[:, 1]).unsqueeze(1) action_to_take = [ ActionXY(action[0] * 2 - 1, action[1] * 2 - 1) for action in actions.cpu().numpy() ] return value, actions, action_log_probs, action_to_take
def compute_action_vals_activs(self, action, state): next_self_state = self.propagate(state.self_state, action) if self.query_env: next_human_states, reward, done, info = self.env.onestep_lookahead( action) else: next_human_states = [ self.propagate(human_state, ActionXY(human_state.vx, human_state.vy)) for human_state in state.human_states ] reward = self.compute_reward(next_self_state, next_human_states) batch_next_states = torch.cat([ torch.Tensor([next_self_state + next_human_state]).to(self.device) for next_human_state in next_human_states ], dim=0) rotated_batch_input = self.rotate(batch_next_states) occupancy_maps = None if self.with_om: if occupancy_maps is None: occupancy_maps = self.build_occupancy_maps( next_human_states).unsqueeze(0) rotated_batch_input = torch.cat( [rotated_batch_input, occupancy_maps.to(self.device)], dim=2) # VALUE UPDATE rotated_batch_input = rotated_batch_input.unsqueeze(0) return action, rotated_batch_input, reward
def test_overlap_no_movement(self): cell_num = 4 cell_size = 1 om_channel_size = 1 human = Human() human.set(px=0, py=0, vx=0, vy=0, gx=0, gy=0, theta=0) other_agents = np.zeros((1, 4)) occupancy_map = build_occupancy_map(human, other_agents, cell_num, cell_size, om_channel_size) radius = 1 state = ObservableState(0, 0, 0, 0, radius) action = ActionXY(0, 0) next_state = propagate(state, action, time_step=.1, kinematics='holonomic') next_occupancy_map = propagate_occupancy_map(occupancy_map, state, action, .1, 'holonomic', cell_num, cell_size, om_channel_size) expected_next_occupancy_map = build_occupancy_map( next_state, other_agents, cell_num, cell_size, om_channel_size) self.assertTrue( np.array_equal(next_occupancy_map, expected_next_occupancy_map))
def __init__(self, robot_data): self.robot_radius = robot_data['radius'] self.robot_pref_speed = robot_data[ 'pref_speed'] # TODO: Make this a dynamic variable self.robot_mpc = robot_data['mpc'] if self.robot_mpc: self.dt = 0.1 self.times_steps = int(1.0 / self.dt) self.mpc = traj_opt(dt=self.dt, time_steps=self.times_steps) self.mpc_state = ModelState() self.mpc_psi = None self.stop_moving_flag = False self.state = ModelState() self.STATE_SET_FLAG = False self.goal = PoseStamped() self.GOAL_RECEIVED_FLAG = False self.GOAL_THRESH = 0.5 # External Agent(s) state self.other_agents_state = [ ObservableState(float("inf"), float("inf"), 0, 0, 0.3) ] self.OBS_RECEIVED_FLAG = False # what we use to send commands self.desired_action = ActionXY(0, 0)
def predict(self, state): """ Produce action for agent with circular specification of social force model. """ # Pull force to goal delta_x = state.self_state.gx - state.self_state.px delta_y = state.self_state.gy - state.self_state.py dist_to_goal = np.sqrt(delta_x**2 + delta_y**2) desired_vx = (delta_x / dist_to_goal) * state.self_state.v_pref desired_vy = (delta_y / dist_to_goal) * state.self_state.v_pref KI = self.config.sf.KI # Inverse of relocation time K_i curr_delta_vx = KI * (desired_vx - state.self_state.vx) curr_delta_vy = KI * (desired_vy - state.self_state.vy) # Push force(s) from other agents A = self.config.sf.A # Other observations' interaction strength: 1.5 B = self.config.sf.B # Other observations' interaction range: 1.0 interaction_vx = 0 interaction_vy = 0 for other_human_state in state.human_states: delta_x = state.self_state.px - other_human_state.px delta_y = state.self_state.py - other_human_state.py dist_to_human = np.sqrt(delta_x**2 + delta_y**2) interaction_vx += A * np.exp( (state.self_state.radius + other_human_state.radius - dist_to_human) / B) * (delta_x / dist_to_human) interaction_vy += A * np.exp( (state.self_state.radius + other_human_state.radius - dist_to_human) / B) * (delta_y / dist_to_human) # Sum of push & pull forces total_delta_vx = (curr_delta_vx + interaction_vx) * self.config.env.time_step total_delta_vy = (curr_delta_vy + interaction_vy) * self.config.env.time_step # clip the speed so that sqrt(vx^2 + vy^2) <= v_pref new_vx = state.self_state.vx + total_delta_vx new_vy = state.self_state.vy + total_delta_vy act_norm = np.linalg.norm([new_vx, new_vy]) if act_norm > state.self_state.v_pref: return ActionXY(new_vx / act_norm * state.self_state.v_pref, new_vy / act_norm * state.self_state.v_pref) else: return ActionXY(new_vx, new_vy)
def predict(self, state): """ A base class for all methods that takes pairwise joint state as input to value network. The input to the value network is always of shape (batch_size, # humans, rotated joint state length) """ if self.phase is None or self.device is None: raise AttributeError('Phase, device attributes have to be set!') if self.phase == 'train' and self.epsilon is None: raise AttributeError( 'Epsilon attribute has to be set in training phase') if self.reach_destination(state): return ActionXY( 0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0) if self.action_space is None: self.build_action_space(state.robot_state.v_pref) probability = np.random.random() if self.phase == 'train' and probability < self.epsilon: max_action = self.action_space[np.random.choice( len(self.action_space))] else: max_action = None max_value = float('-inf') max_traj = None if self.do_action_clip: state_tensor = state.to_tensor(add_batch_size=True, device=self.device) action_space_clipped = self.action_clip( state_tensor, self.action_space, self.planning_width) else: action_space_clipped = self.action_space for action in action_space_clipped: state_tensor = state.to_tensor(add_batch_size=True, device=self.device) next_state = self.state_predictor(state_tensor, action) max_next_return, max_next_traj = self.V_planning( next_state, self.planning_depth, self.planning_width) reward_est = self.estimate_reward(state, action) value = reward_est + self.get_normalized_gamma( ) * max_next_return if value > max_value: max_value = value max_action = action max_traj = [(state_tensor, action, reward_est) ] + max_next_traj if max_action is None: raise ValueError('Value network is not well trained.') if self.phase == 'train': self.last_state = self.transform(state) else: self.traj = max_traj return max_action
def predict(self, state): """ Create a rvo2 simulation at each time step and run one step Python-RVO2 API: https://github.com/sybrenstuvel/Python-RVO2/blob/master/src/rvo2.pyx How simulation is done in RVO2: https://github.com/sybrenstuvel/Python-RVO2/blob/master/src/Agent.cpp Agent doesn't stop moving after it reaches the goal, because once it stops moving, the reciprocal rule is broken :param state: :return: """ self_state = state.self_state params = self.neighbor_dist, self.max_neighbors, self.time_horizon, self.time_horizon_obst if self.sim is not None and self.sim.getNumAgents() != len( state.human_states) + 1: del self.sim self.sim = None if self.sim is None: self.sim = rvo2.PyRVOSimulator(self.time_step, *params, self.radius, self.max_speed) self.sim.addAgent(self_state.position, *params, self_state.radius + 0.01 + self.safety_space, self_state.v_pref, self_state.velocity) for human_state in state.human_states: self.sim.addAgent( human_state.position, *params, human_state.radius + 0.01 + self.safety_space, self.max_speed, human_state.velocity) else: self.sim.setAgentPosition(0, self_state.position) self.sim.setAgentVelocity(0, self_state.velocity) for i, human_state in enumerate(state.human_states): self.sim.setAgentPosition(i + 1, human_state.position) self.sim.setAgentVelocity(i + 1, human_state.velocity) # Set the preferred velocity to be a vector of unit magnitude (speed) in the direction of the goal. velocity = np.array( (self_state.gx - self_state.px, self_state.gy - self_state.py)) speed = np.linalg.norm(velocity) pref_vel = velocity / speed if speed > 1 else velocity # Perturb a little to avoid deadlocks due to perfect symmetry. # perturb_angle = np.random.random() * 2 * np.pi # perturb_dist = np.random.random() * 0.01 # perturb_vel = np.array((np.cos(perturb_angle), np.sin(perturb_angle))) * perturb_dist # pref_vel += perturb_vel self.sim.setAgentPrefVelocity(0, tuple(pref_vel)) for i, human_state in enumerate(state.human_states): # unknown goal position of other humans self.sim.setAgentPrefVelocity(i + 1, (0, 0)) self.sim.doStep() action = ActionXY(*self.sim.getAgentVelocity(0)) self.last_state = state return action
def test_no_movement(self): radius = 1 state = ObservableState(0, 0, 0, 0, radius) action = ActionXY(0, 0) next_state = propagate(state, action, time_step=.1, kinematics='holonomic') self.assertEqual(next_state, state)
def predict(self, state): vx = 0 vy = 0 if self.move[0]: vx = -1 if self.move[1]: vx = 1 if self.move[2]: vy = 1 if self.move[3]: vy = -1 action = ActionXY(vx, vy) return action
def predict(self, state): self_state = state.self_state theta = np.arctan2(self_state.gy - self_state.py, self_state.gx - self_state.px) vx = np.cos(theta) * self_state.v_pref vy = np.sin(theta) * self_state.v_pref action = ActionXY(vx, vy) return action
def act(self, ob): if self.policy is None: raise AttributeError('Policy attribute has to be set!') state = JointState(self.get_full_state(), ob) # print('----debug robot.py line 15 ') # print('robot cordinate = [',self.px,',',self.py,']') action = self.policy.predict(state) #IMPORTANT !!!!!!!!!!!!!!!!!!!!!!!! CHNGE IT IF YOU DONT WANT TO CREATE SIMULATION DATA action = ActionXY(0, 0) return action
def test_holonomic_diagonal_up_movement(self): radius = 1 time_step = .1 state = ObservableState(0, 0, 0, 0, radius) action = ActionXY(np.sqrt(2), np.sqrt(2)) next_state = propagate(state, action, time_step=time_step, kinematics='holonomic') expected_state = ObservableState(time_step * np.sqrt(2), time_step * np.sqrt(2), action.vx, action.vy, radius) self.assertEqual(next_state, expected_state)
def action_clip(self, state, action_space, width, depth=1): values = [] actions = [] if self.kinematics == "holonomic": actions.append(ActionXY(0, 0)) else: actions.append(ActionRot(0, 0)) # actions.append(ActionXY(0, 0)) next_robot_states = None next_human_states = None pre_next_state = self.state_predictor(state, actions) for action in action_space: # actions = [] # actions.append(action) next_robot_state = self.compute_next_robot_state(state[0], action) next_human_state = pre_next_state[1] if next_robot_states is None and next_human_states is None: next_robot_states = next_robot_state next_human_states = next_human_state else: next_robot_states = torch.cat((next_robot_states, next_robot_state), dim=0) next_human_states = torch.cat((next_human_states, next_human_state), dim=0) next_state_tensor = (next_robot_state, next_human_state) next_state = tensor_to_joint_state(next_state_tensor) reward_est, _ = self.reward_estimator.estimate_reward_on_predictor(state, next_state) values.append(reward_est) next_return = self.value_estimator((next_robot_states, next_human_states)).squeeze() next_return = np.array(next_return.data.detach()) values = np.array(values) + self.get_normalized_gamma() * next_return values = values.tolist() if self.sparse_search: # self.sparse_speed_samples = 2 # search in a sparse grained action space added_groups = set() max_indices = np.argsort(np.array(values))[::-1] clipped_action_space = [] for index in max_indices: if self.action_group_index[index] not in added_groups: clipped_action_space.append(action_space[index]) added_groups.add(self.action_group_index[index]) if len(clipped_action_space) == width: break else: max_indexes = np.argpartition(np.array(values), -width)[-width:] clipped_action_space = [action_space[i] for i in max_indexes] # print(clipped_action_space) return clipped_action_space
def predict(self, state): """ Input state is the joint state of robot concatenated by the observable state of other agents To predict the best action, agent samples actions and propagates one step to see how good the next state is thus the reward function is needed """ if self.phase is None or self.device is None: raise AttributeError('Phase, device attributes have to be set!') if self.phase == 'train' and self.epsilon is None: raise AttributeError( 'Epsilon attribute has to be set in training phase') if self.reach_destination(state): return ActionXY( 0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0) host_agent = agent.Agent(state.self_state.px, state.self_state.py, state.self_state.gx, state.self_state.gy, state.self_state.radius, state.self_state.v_pref, state.self_state.theta, 0) host_agent.vel_global_frame = np.array( [state.self_state.vx, state.self_state.vy]) other_agents = [] for i, human_state in enumerate(state.human_states): if self.human_state_in_FOV(state.self_state, human_state): x = human_state.px y = human_state.py v_x = human_state.vx v_y = human_state.vy heading_angle = np.arctan2(v_y, v_x) pref_speed = np.linalg.norm(np.array([v_x, v_y])) goal_x = x + 5.0 goal_y = y + 5.0 other_agents.append( agent.Agent(x, y, goal_x, goal_y, human_state.radius, pref_speed, heading_angle, i + 1)) other_agents[-1].vel_global_frame = np.array([v_x, v_y]) obs = host_agent.observe(other_agents)[1:] obs = np.expand_dims(obs, axis=0) predictions = self.net.predict_p(obs, None)[0] raw_action = self.possible_actions.actions[np.argmax(predictions)] action = ActionRot(host_agent.pref_speed * raw_action[0], util.wrap(raw_action[1])) return action