def _state_to_condition(self, state): """Converts state to equality Conditions. This function takes a State object and converts it into a Conjunction condition (if >1 state factor), with each sub-condition being an equality condition over one of the state factors. NOTE: If there is only one state factor, an Equality condition is returned. Args: state: The state to convert Returns: cond: The condition """ cond = ConjunctionCondition() for factor in self._state_factors: eq = EqualityCondition(self._state_factors[factor], state[factor]) cond.add_cond(eq) if len(cond._cond_list) == 1: return cond._cond_list[0] return cond
def make_sink_state_transitions(start_node, state_factors, node_to_loc_sv): """ Makes transitions related to traversing to sink state. Args: start_node: The N object corresponding to the start location state_factors: A dictionary mapping from state factor names to state factor objects node_to_loc_sv: A dictionary mapping from N objects to values for the 'location' state factor. Returns: A list of ProbTransition objects for transitions related to traversing doorways """ # Add a finish action to when completed task finish_precond = \ ConjunctionCondition( EqualityCondition(state_factors['location'], node_to_loc_sv[start_node]), EqualityCondition(state_factors['num_people_found'], NUM_PEOPLE), ) finish_action_name = 'finish' finish_prob_postconds = { EqualityCondition(state_factors['location'], -1): 1.0 } finish_transition = ProbTransition(pre_cond=finish_precond, action_name=finish_action_name, prob_post_conds=finish_prob_postconds) # Also add a run_out_of_time action when have run out of time no_time_precond = ConjunctionCondition( GreaterThanCondition(state_factors['time'], TIME_HORIZON - 1), GreaterThanCondition(state_factors['location'], -1), ) no_time_action_name = 'run_out_of_time' no_time_prob_postconds = { EqualityCondition(state_factors['location'], -1): 1.0 } no_time_transition = ProbTransition(pre_cond=no_time_precond, action_name=no_time_action_name, prob_post_conds=no_time_prob_postconds) return [finish_transition, no_time_transition]
def add_rubble_clear_costs(cost, room_info, state_factors, node_to_loc_sv): """ Add costs associated with clearing rubble to 'cost' Args: cost: The StateActionCost object for the SSP room_info: A list of tuples (doorway_edge, person_factor, rubble_factor) where 'doorway_edge' is an edge that may be blocked by rubble, 'person_factor' corresponds to a state factor 'room_i_person' for some i and similarly 'rubble_factor' corresponds to a state factor 'room_i_rubble' state_factors: A dictionary mapping from state factor names to state factor objects node_to_loc_sv: A dictionary mapping from N objects to values for the 'location' state factor. Returns: Nothing """ for edge, _, rubble_factor in room_info: pre_cond = \ ConjunctionCondition( EqualityCondition(state_factors['location'], node_to_loc_sv[edge.n1]), EqualityCondition(rubble_factor, 'small_pile'), ) action_name = 'clear_rubble' cost_value = SMALL_RUBBLE_PILE_CLEARING_TIME_COST cost.append(pre_cond=pre_cond, action_name=action_name, cost_value=cost_value) pre_cond = \ ConjunctionCondition( EqualityCondition(state_factors['location'], node_to_loc_sv[edge.n1]), EqualityCondition(rubble_factor, 'large_pile'), ) action_name = 'clear_rubble' cost_value = LARGE_RUBBLE_PILE_CLEARING_TIME_COST cost.append(pre_cond=pre_cond, action_name=action_name, cost_value=cost_value)
def to_ssp(self, add_time=False): """ Returns an MDP for the deep sea treasure environment """ # Three state variable, where we are, and the time xloc_sf = IntegerStateFactor(name='x', min=-2, max=self.num_cols) yloc_sf = IntegerStateFactor(name='y', min=-1, max=self.num_rows) if add_time: time_sf = IntegerStateFactor(name='t', min=0, max=self.max_steps) # Time progression post cond if add_time: time_prog_post_cond = CumulativeCondition(time_sf, value=1) # Conditions for being "in" a column (not at the top or bottom) in_col_conditions = [] not_at_top_cond = GreaterThanCondition(yloc_sf, value=0) for col in range(self.num_cols): x_cond = EqualityCondition(xloc_sf, value=col) not_at_bottom_cond = LessThanCondition(yloc_sf, value=self.depths[col]) col_cond = ConjunctionCondition(x_cond, not_at_top_cond, not_at_bottom_cond) in_col_conditions.append(col_cond) # Condition for being able to move in any direction free_space_cond = DisjunctionCondition(*(in_col_conditions[1:-1])) # Condition for being in the corners x_cond = EqualityCondition(xloc_sf, value=0) y_cond = EqualityCondition(yloc_sf, value=0) top_left_cond = ConjunctionCondition(x_cond, y_cond) x_cond = EqualityCondition(xloc_sf, value=self.num_cols-1) y_cond = EqualityCondition(yloc_sf, value=0) top_right_cond = ConjunctionCondition(x_cond, y_cond) # Condition for being in the top row x_gr_cond = GreaterThanCondition(xloc_sf, value=0) x_le_cond = LessThanCondition(xloc_sf, value=self.num_cols-1) y_cond = EqualityCondition(yloc_sf, value=0) top_row_cond = ConjunctionCondition(x_gr_cond, x_le_cond, y_cond) # Condition for being in any one of the treasure locations treasure_loc_conditions = [] for col in range(self.num_cols): x_cond = EqualityCondition(xloc_sf, value=col) y_cond = EqualityCondition(yloc_sf, value=self.depths[col]) at_treasure_loc_cond = ConjunctionCondition(x_cond, y_cond) treasure_loc_conditions.append(at_treasure_loc_cond) any_treasure_loc_cond = DisjunctionCondition(*treasure_loc_conditions) # Define the post conditions for moving left_cum_cond = CumulativeCondition(xloc_sf, value=-1) right_cum_cond = CumulativeCondition(xloc_sf, value=1) up_cum_cond = CumulativeCondition(yloc_sf, value=-1) down_cum_cond = CumulativeCondition(yloc_sf, value=1) same_cum_condx = CumulativeCondition(xloc_sf, value=0) same_cum_condy = CumulativeCondition(yloc_sf, value=0) if add_time: left_cum_cond = ConjunctionCondition( left_cum_cond, time_prog_post_cond) right_cum_cond = ConjunctionCondition( right_cum_cond, time_prog_post_cond) up_cum_cond = ConjunctionCondition( up_cum_cond, time_prog_post_cond) down_cum_cond = ConjunctionCondition( down_cum_cond, time_prog_post_cond) same_cum_condx = ConjunctionCondition( same_cum_condx, time_prog_post_cond) same_cum_condy = ConjunctionCondition( same_cum_condy, time_prog_post_cond) # Post cond for finishing x_cond = EqualityCondition(xloc_sf, value=-1) y_cond = EqualityCondition(yloc_sf, value=-1) finished_post_cond = ConjunctionCondition(x_cond, y_cond) transitions = [] # add transitions for free space (move in all directions) can_move_any_cond = free_space_cond if add_time: can_move_any_cond = ConjunctionCondition( can_move_any_cond, LessThanCondition(time_sf, value=self.max_steps)) self._add_transitions(transitions=transitions, pre_cond=can_move_any_cond, move_left_result_cond=left_cum_cond, move_right_result_cond=right_cum_cond, move_up_result_cond=up_cum_cond, move_down_result_cond=down_cum_cond) # add transitions for left col (cant move left) cant_move_left_cond = in_col_conditions[0] if add_time: cant_move_left_cond = ConjunctionCondition( cant_move_left_cond, LessThanCondition(time_sf, value=self.max_steps)) self._add_transitions(transitions=transitions, pre_cond=cant_move_left_cond, move_left_result_cond=same_cum_condx, move_right_result_cond=right_cum_cond, move_up_result_cond=up_cum_cond, move_down_result_cond=down_cum_cond) # add transitions for right col (cant move right) cant_move_right_cond = in_col_conditions[-1] if add_time: cant_move_right_cond = ConjunctionCondition( cant_move_right_cond, LessThanCondition(time_sf, value=self.max_steps)) self._add_transitions(transitions=transitions, pre_cond=cant_move_right_cond, move_left_result_cond=left_cum_cond, move_right_result_cond=same_cum_condx, move_up_result_cond=up_cum_cond, move_down_result_cond=down_cum_cond) # add transitions for top row (cant move up) cant_move_up_cond = top_row_cond if add_time: cant_move_up_cond = ConjunctionCondition( cant_move_up_cond, LessThanCondition(time_sf, value=self.max_steps)) self._add_transitions(transitions=transitions, pre_cond=cant_move_up_cond, move_left_result_cond=left_cum_cond, move_right_result_cond=right_cum_cond, move_up_result_cond=same_cum_condy, move_down_result_cond=down_cum_cond) # add transitions for top left cornder (cant move up or left) cant_move_up_or_left_cond = top_left_cond if add_time: cant_move_up_or_left_cond = ConjunctionCondition( cant_move_up_or_left_cond, LessThanCondition(time_sf, value=self.max_steps)) self._add_transitions(transitions=transitions, pre_cond=cant_move_up_or_left_cond, move_left_result_cond=same_cum_condx, move_right_result_cond=right_cum_cond, move_up_result_cond=same_cum_condy, move_down_result_cond=down_cum_cond) # add transitions for top row (cant move up) cant_move_up_or_right_cond = top_right_cond if add_time: cant_move_up_or_right_cond = ConjunctionCondition( cant_move_up_or_right_cond, LessThanCondition(time_sf, value=self.max_steps)) self._add_transitions(transitions=transitions, pre_cond=cant_move_up_or_right_cond, move_left_result_cond=left_cum_cond, move_right_result_cond=same_cum_condx, move_up_result_cond=same_cum_condy, move_down_result_cond=down_cum_cond) # Add a transition for collecting a treasure collect_treasure_trans = ProbTransition( action_name="collect_bounty", pre_cond=any_treasure_loc_cond, prob_post_conds={finished_post_cond: 1.0}, ) transitions.append(collect_treasure_trans) # Add a reward for the treasure sar_tuples = [] for col in range(self.num_cols): pre_cond = EqualityCondition(xloc_sf, value=col) action = "collect_bounty" reward_value = -self.treasure[col] * self.treasure_reward_scale sar_tuples.append((pre_cond, action, reward_value)) treasure_cost = StateActionCost( sac_tuples=sar_tuples) # Add a reward for number of steps taken + if we have a positive reward # for reaching the treasure, add that sar_tuples = [(None, "left", self.distance_reward_scale), (None, "right", self.distance_reward_scale), (None, "up", self.distance_reward_scale), (None, "down", self.distance_reward_scale),] distance_cost = StateActionCost( sac_tuples=sar_tuples) # Make the initial state distributions if add_time: init_state = State({'x': 0, 'y': 0, 't': 0}) else: init_state = State({'x': 0, 'y': 0}) init_state_distr = {init_state: 1.0} # Make and return the MDP state_factors = {'x': xloc_sf, 'y': yloc_sf} if add_time: state_factors['t'] = time_sf return SSP(state_factors=state_factors, initial_state_probs=init_state_distr, transitions=transitions, costs=[distance_cost, treasure_cost], bypass_sanity_checks=True)
def make_search_and_rescue_ssp(use_real_map=True): """ Returns a SSP that encapsulates a search and rescue mission """ # Read in topologcial map from ROS topic/spoof function if use_real_map: node_to_loc_sv, _, topological_edges = read_in_topo_map() else: node_to_loc_sv, _, topological_edges = build_spoof_topo_map() # Give sensible names to each node for building the SSP # Rooms are labelled in a CLOCKWISE order, with rooms 1 and 2 being closest # to the robots starting position. If this is confusing try draw it out :) loc_sv_to_node = { node_to_loc_sv[k]: k for k in list(node_to_loc_sv.keys()) } start_node = loc_sv_to_node[0] outside_room_1_node = loc_sv_to_node[1] room_1_node = loc_sv_to_node[2] outside_room_2_node = loc_sv_to_node[3] room_2_node = loc_sv_to_node[4] outside_room_3_node = loc_sv_to_node[5] room_3_node = loc_sv_to_node[6] outside_room_4_node = loc_sv_to_node[7] room_4_node = loc_sv_to_node[8] # Similarly give sensible names to each of the edges start_to_room_1_edge = topological_edges[0] start_to_room_2_edge = topological_edges[1] room_1_to_room_2_edge = topological_edges[2] room_1_to_room_3_edge = topological_edges[3] room_1_to_room_4_edge = topological_edges[4] room_2_to_room_3_edge = topological_edges[5] room_2_to_room_4_edge = topological_edges[6] room_3_to_room_4_edge = topological_edges[7] room_1_doorway_edge = topological_edges[8] room_2_doorway_edge = topological_edges[9] room_3_doorway_edge = topological_edges[10] room_4_doorway_edge = topological_edges[11] tunnel_edge = topological_edges[12] # Build state factors state_factors = make_search_and_rescue_state_factors(topological_edges) # Create the initial state initial_state_values = { 'location': 0, 'time': 0, 'num_people_found': 0, 'num_rooms_searched': 0, 'room_1_person': 'unknown', 'room_2_person': 'unknown', 'room_3_person': 'unknown', 'room_4_person': 'unknown', 'room_1_rubble': 'unknown', 'room_2_rubble': 'unknown', 'room_3_rubble': 'unknown', 'room_4_rubble': 'unknown', } initial_state = State(initial_state_values) # Defining transitions - traversal excluding doorways transitions = [] non_doorway_edges = [ start_to_room_1_edge, start_to_room_2_edge, room_1_to_room_2_edge, room_1_to_room_3_edge, room_1_to_room_4_edge, room_2_to_room_3_edge, room_2_to_room_4_edge, room_3_to_room_4_edge, tunnel_edge, ] non_doorway_traversal_transitions = \ make_non_doorway_traversal_transitions(non_doorway_edges, state_factors, node_to_loc_sv) transitions.extend(non_doorway_traversal_transitions) # Defining transitions - rubble observations room_info = [ (room_1_doorway_edge, state_factors['room_1_person'], state_factors['room_1_rubble']), (room_2_doorway_edge, state_factors['room_2_person'], state_factors['room_2_rubble']), (room_3_doorway_edge, state_factors['room_3_person'], state_factors['room_3_rubble']), (room_4_doorway_edge, state_factors['room_4_person'], state_factors['room_4_rubble']), ] rubble_transitions = make_rubble_transitions(room_info, state_factors, node_to_loc_sv) transitions.extend(rubble_transitions) # Defining transitions - doorway traversal + checking for people doorway_transitions = make_doorway_transitions(room_info, state_factors, node_to_loc_sv) transitions.extend(doorway_transitions) people_transitions = make_check_for_person_transitions( room_info, state_factors, node_to_loc_sv) transitions.extend(people_transitions) # Defining transitions - enforcing the time horizon # To make all states over the time horizon sink states, we need to add the # pre condition that the time is below the horizon. This means any state # with time having surpassed the time horizon will have zero enabled actions for transition in transitions: transition.pre_cond = ConjunctionCondition( transition.pre_cond, LessThanCondition(state_factors['time'], TIME_HORIZON), ) # Defining transitions - sink state sink_transitions = make_sink_state_transitions(start_node, state_factors, node_to_loc_sv) transitions.extend(sink_transitions) # Defining costs - travel cost cost = StateActionCost() add_traversal_costs(cost, topological_edges, state_factors, node_to_loc_sv) add_rubble_clear_costs(cost, room_info, state_factors, node_to_loc_sv) add_not_finishing_cost(cost, topological_edges, state_factors) # Finally, make and return the SSP object return SSP(state_factors=state_factors, initial_state_probs={initial_state: 1.0}, transitions=transitions, costs=[cost], index_factors_names=['location', 'num_people_found'], bypass_sanity_checks=True)
def make_doorway_transitions(room_info, state_factors, node_to_loc_sv): """ Makes transitions related to traversing through doorways. We need to model that the robot cannot traverse through a doorway without knowing that it is clear Args: room_info: A list of tuples (doorway_edge, person_factor, rubble_factor) where 'doorway_edge' is an edge that may be blocked by rubble, 'person_factor' corresponds to a state factor 'room_i_person' for some i and similarly 'rubble_factor' corresponds to a state factor 'room_i_rubble' state_factors: A dictionary mapping from state factor names to state factor objects node_to_loc_sv: A dictionary mapping from N objects to values for the 'location' state factor. Returns: A list of ProbTransition objects for transitions related to traversing doorways """ transitions = [] for doorway_edge, person_factor, rubble_factor in room_info: time_cost = math.ceil(doorway_edge.length() * EDGE_TIME_COST_TO_LEN_RATIO) # moving into room forward_precond = \ ConjunctionCondition( EqualityCondition(state_factors['location'], node_to_loc_sv[doorway_edge.n1]), EqualityCondition(rubble_factor, 'cleared'), ) forward_action_name = doorway_edge.n2.name forward_prob_postconds = { ConjunctionCondition( EqualityCondition(state_factors['location'], node_to_loc_sv[doorway_edge.n2]), CumulativeCondition(state_factors['time'], time_cost)): 1.0 } forward_transition = ProbTransition( pre_cond=forward_precond, action_name=forward_action_name, prob_post_conds=forward_prob_postconds) transitions.append(forward_transition) # moving out of room backward_precond = \ ConjunctionCondition( EqualityCondition(state_factors['location'], node_to_loc_sv[doorway_edge.n2]), EqualityCondition(rubble_factor, 'cleared'), ) backward_action_name = doorway_edge.n1.name backward_prob_postconds = { ConjunctionCondition( EqualityCondition(state_factors['location'], node_to_loc_sv[doorway_edge.n1]), CumulativeCondition(state_factors['time'], time_cost)): 1.0 } backward_transition = ProbTransition( pre_cond=backward_precond, action_name=backward_action_name, prob_post_conds=backward_prob_postconds) transitions.append(backward_transition) return transitions
def make_rubble_transitions(room_info, state_factors, node_to_loc_sv): """ Makes transitions related to observing and clearing rubble in doorways Args: room_info: A list of tuples (doorway_edge, person_factor, rubble_factor) where 'doorway_edge' is an edge that may be blocked by rubble, 'person_factor' corresponds to a state factor 'room_i_person' for some i and similarly 'rubble_factor' corresponds to a state factor 'room_i_rubble' state_factors: A dictionary mapping from state factor names to state factor objects node_to_loc_sv: A dictionary mapping from N objects to values for the 'location' state factor. Returns: A list of ProbTransition objects for transitions related to observing and clearing rubble """ transitions = [] for doorway_edge, _, rubble_factor in room_info: # observing what rubble there is observe_rubble_precond = \ ConjunctionCondition( EqualityCondition(state_factors['location'], node_to_loc_sv[doorway_edge.n1]), EqualityCondition(rubble_factor, 'unknown'), ) observe_rubble_action_name = 'check_for_rubble' observe_rubble_prob_postconds = { EqualityCondition(rubble_factor, 'cleared'): RUBBLE_PILE_CLEAR_PROB, EqualityCondition(rubble_factor, 'small_pile'): (1.0 - RUBBLE_PILE_CLEAR_PROB - LARGE_RUBBLE_PILE_PROB), EqualityCondition(rubble_factor, 'large_pile'): LARGE_RUBBLE_PILE_PROB, } observe_rubble_transition = ProbTransition( pre_cond=observe_rubble_precond, action_name=observe_rubble_action_name, prob_post_conds=observe_rubble_prob_postconds) transitions.append(observe_rubble_transition) # clearing large rubble clear_rubble_precond = \ ConjunctionCondition( EqualityCondition(state_factors['location'], node_to_loc_sv[doorway_edge.n1]), EqualityCondition(rubble_factor, 'large_pile'), ) clear_rubble_action_name = 'clear_rubble' clear_rubble_prob_postconds = { ConjunctionCondition( EqualityCondition(rubble_factor, 'cleared'), CumulativeCondition(state_factors['time'], LARGE_RUBBLE_PILE_CLEARING_TIME_COST)): 1.0, } clear_rubble_transition = ProbTransition( pre_cond=clear_rubble_precond, action_name=clear_rubble_action_name, prob_post_conds=clear_rubble_prob_postconds) transitions.append(clear_rubble_transition) # clearing small rubble clear_rubble_precond = \ ConjunctionCondition( EqualityCondition(state_factors['location'], node_to_loc_sv[doorway_edge.n1]), EqualityCondition(rubble_factor, 'small_pile'), ) clear_rubble_action_name = 'clear_rubble' clear_rubble_prob_postconds = { ConjunctionCondition( EqualityCondition(rubble_factor, 'cleared'), CumulativeCondition(state_factors['time'], SMALL_RUBBLE_PILE_CLEARING_TIME_COST)): 1.0, } clear_rubble_transition = ProbTransition( pre_cond=clear_rubble_precond, action_name=clear_rubble_action_name, prob_post_conds=clear_rubble_prob_postconds) transitions.append(clear_rubble_transition) return transitions
def make_non_doorway_traversal_transitions(edges, state_factors, node_to_loc_sv): """ Make ProbTransition objects for traversing edges in 'edges'. We assume that there is no restrictions on being able to traverse these edges, and that they are bidirectional. Args: edges: A list of E objects to make traversal transitions for state_factors: A dictionary mapping from state factor names to state factor objects node_to_loc_sv: A dictionary mapping from N objects to values for the 'location' state factor. Returns: A list of ProbTransition objects for traversal around the map """ transitions = [] for edge in edges: forward_precond = EqualityCondition(state_factors['location'], node_to_loc_sv[edge.n1]) forward_action_name = edge.n2.name forward_prob_postconds = { ConjunctionCondition( EqualityCondition(state_factors['location'], node_to_loc_sv[edge.n2]), CumulativeCondition( state_factors['time'], math.ceil(edge.length() * EDGE_TIME_COST_TO_LEN_RATIO))): 1.0 } backward_precond = EqualityCondition(state_factors['location'], node_to_loc_sv[edge.n2]) backward_action_name = edge.n1.name backward_prob_postconds = { EqualityCondition(state_factors['location'], node_to_loc_sv[edge.n1]): 1.0 } backward_prob_postconds = { ConjunctionCondition( EqualityCondition(state_factors['location'], node_to_loc_sv[edge.n1]), CumulativeCondition( state_factors['time'], math.ceil(edge.length() * EDGE_TIME_COST_TO_LEN_RATIO))): 1.0 } forward_transition = ProbTransition( pre_cond=forward_precond, action_name=forward_action_name, prob_post_conds=forward_prob_postconds) backward_transition = ProbTransition( pre_cond=backward_precond, action_name=backward_action_name, prob_post_conds=backward_prob_postconds) transitions.append(forward_transition) transitions.append(backward_transition) return transitions