def __call__(self, domain, state, timeout): act_preds = [domain.predicates[a] for a in list(domain.actions)] act_space = LiteralSpace( act_preds, type_to_parent_types=domain.type_to_parent_types) dom_file = tempfile.NamedTemporaryFile(delete=False).name prob_file = tempfile.NamedTemporaryFile(delete=False).name domain.write(dom_file) lits = set(state.literals) if not domain.operators_as_actions: lits |= set(act_space.all_ground_literals(state, valid_only=False)) PDDLProblemParser.create_pddl_file( prob_file, state.objects, lits, "myproblem", domain.domain_name, state.goal, fast_downward_order=True) cmd_str = self._get_cmd_str(dom_file, prob_file, timeout) start_time = time.time() output = subprocess.getoutput(cmd_str) self._cleanup() os.remove(dom_file) os.remove(prob_file) if time.time()-start_time > timeout: raise PlanningTimeout("Planning timed out!") pddl_plan = self._output_to_plan(output) plan = [self._plan_step_to_action(domain, state, act_preds, plan_step) for plan_step in pddl_plan] return plan
def __call__(self, domain, state, horizon=np.inf, timeout=10, return_files=False): act_predicates = [domain.predicates[a] for a in list(domain.actions)] act_space = LiteralSpace( act_predicates, type_to_parent_types=domain.type_to_parent_types) dom_file = tempfile.NamedTemporaryFile(delete=False).name prob_file = tempfile.NamedTemporaryFile(delete=False).name domain.write(dom_file) lits = set(state.literals) if not domain.operators_as_actions: lits |= set(act_space.all_ground_literals(state, valid_only=False)) PDDLProblemParser.create_pddl_file(prob_file, state.objects, lits, "myproblem", domain.domain_name, state.goal, fast_downward_order=True) pddl_plan = self.plan_from_pddl(dom_file, prob_file, horizon=horizon, timeout=timeout, remove_files=(not return_files)) plan = [ self._plan_step_to_action(domain, state, act_predicates, plan_step) for plan_step in pddl_plan ] if return_files: return plan, dom_file, prob_file return plan
def test_hierarchical_spaces(): dir_path = os.path.dirname(os.path.realpath(__file__)) domain_file = os.path.join(dir_path, 'pddl', 'hierarchical_type_test_domain.pddl') problem_file = os.path.join(dir_path, 'pddl', 'hierarchical_type_test_domain', 'hierarchical_type_test_problem.pddl') domain = PDDLDomainParser(domain_file) problem = PDDLProblemParser(problem_file, domain.domain_name, domain.types, domain.predicates, domain.actions) actions = list(domain.actions) action_predicates = [domain.predicates[a] for a in actions] space = LiteralSpace(set(domain.predicates.values()) - set(action_predicates), type_to_parent_types=domain.type_to_parent_types) all_ground_literals = space.all_ground_literals( State(problem.initial_state, problem.objects, problem.goal)) ispresent = Predicate("ispresent", 1, [Type("entity")]) islight = Predicate("islight", 1, [Type("object")]) isfurry = Predicate("isfurry", 1, [Type("animal")]) ishappy = Predicate("ishappy", 1, [Type("animal")]) attending = Predicate("attending", 2, [Type("animal"), Type("object")]) nomsy = Type("jindo")("nomsy") rover = Type("corgi")("rover") rene = Type("cat")("rene") block1 = Type("block")("block1") block2 = Type("block")("block2") cylinder1 = Type("cylinder")("cylinder1") assert all_ground_literals == { ispresent(nomsy), ispresent(rover), ispresent(rene), ispresent(block1), ispresent(block2), ispresent(cylinder1), islight(block1), islight(block2), islight(cylinder1), isfurry(nomsy), isfurry(rover), isfurry(rene), ishappy(nomsy), ishappy(rover), ishappy(rene), attending(nomsy, block1), attending(nomsy, block2), attending(nomsy, cylinder1), attending(rover, block1), attending(rover, block2), attending(rover, cylinder1), attending(rene, block1), attending(rene, block2), attending(rene, cylinder1), } print("Test passed.")
def __init__(self, domain_file, problem_dir, render=None, seed=0, raise_error_on_invalid_action=False, operators_as_actions=False, dynamic_action_space=False): self._state = None self._domain_file = domain_file self._problem_dir = problem_dir self._render = render self.seed(seed) self._raise_error_on_invalid_action = raise_error_on_invalid_action self.operators_as_actions = operators_as_actions # Set by self.fix_problem_index self._problem_index_fixed = False self._problem_idx = None # Parse the PDDL files self.domain, self.problems = self.load_pddl( domain_file, problem_dir, operators_as_actions=self.operators_as_actions) # Determine if the domain is STRIPS self._domain_is_strips = _check_domain_for_strips(self.domain) self._inference_mode = "csp" if self._domain_is_strips else "prolog" # Initialize action space with problem-independent components actions = list(self.domain.actions) self.action_predicates = [self.domain.predicates[a] for a in actions] self._dynamic_action_space = dynamic_action_space if dynamic_action_space: if self.domain.operators_as_actions and self._domain_is_strips: self._action_space = LiteralActionSpace( self.domain, self.action_predicates, type_hierarchy=self.domain.type_hierarchy, type_to_parent_types=self.domain.type_to_parent_types) else: self._action_space = LiteralSpace( self.action_predicates, lit_valid_test=self._action_valid_test, type_hierarchy=self.domain.type_hierarchy, type_to_parent_types=self.domain.type_to_parent_types) else: self._action_space = LiteralSpace( self.action_predicates, type_to_parent_types=self.domain.type_to_parent_types) # Initialize observation space with problem-independent components self._observation_space = LiteralSetSpace( set(self.domain.predicates.values()) - set(self.action_predicates), type_hierarchy=self.domain.type_hierarchy, type_to_parent_types=self.domain.type_to_parent_types)
def __init__(self, domain_file, problem_dir, render=None, seed=0, raise_error_on_invalid_action=False, operators_as_actions=False, dynamic_action_space=False, shape_reward_mode=None, shaping_discount=1.): self._state = None self._domain_file = domain_file self._problem_dir = problem_dir self._render = render self.seed(seed) self._raise_error_on_invalid_action = raise_error_on_invalid_action self.operators_as_actions = operators_as_actions if shape_reward_mode == "none": shape_reward_mode = None self._shape_reward_mode = shape_reward_mode self._shaping_discount = shaping_discount self._current_heuristic = None self._heuristic = None # Set by self.fix_problem_index self._problem_index_fixed = False self._problem_idx = None # Parse the PDDL files self.domain, self.problems = self.load_pddl( domain_file, problem_dir, operators_as_actions=self.operators_as_actions) # Initialize action space with problem-independent components actions = list(self.domain.actions) self.action_predicates = [self.domain.predicates[a] for a in actions] if dynamic_action_space: self._action_space = LiteralSpace( self.action_predicates, lit_valid_test=self._action_valid_test, type_to_parent_types=self.domain.type_to_parent_types) else: self._action_space = LiteralSpace( self.action_predicates, type_to_parent_types=self.domain.type_to_parent_types) # Initialize observation space with problem-independent components self._observation_space = LiteralSetSpace( set(self.domain.predicates.values()) - set(self.action_predicates), type_to_parent_types=self.domain.type_to_parent_types)
def __init__(self, domain_file, problem_dir, render=None, seed=0, raise_error_on_invalid_action=False, operators_as_actions=False, dynamic_action_space=False, compute_approx_reachable_set=False, shape_reward_mode=None): self._state = None self._domain_file = domain_file self._problem_dir = problem_dir self._render = render self.seed(seed) self._raise_error_on_invalid_action = raise_error_on_invalid_action self.operators_as_actions = operators_as_actions self._compute_approx_reachable_set = compute_approx_reachable_set self._shape_reward_mode = shape_reward_mode # Set by self.fix_problem_index self._problem_index_fixed = False # Parse the PDDL files self.domain, self.problems = self.load_pddl( domain_file, problem_dir, operators_as_actions=self.operators_as_actions) # Initialize action space with problem-independent components actions = list(self.domain.actions) self.action_predicates = [self.domain.predicates[a] for a in actions] if dynamic_action_space: self._action_space = LiteralSpace( self.action_predicates, lit_valid_test=self._action_valid_test, type_to_parent_types=self.domain.type_to_parent_types) else: self._action_space = LiteralSpace( self.action_predicates, type_to_parent_types=self.domain.type_to_parent_types) # Initialize observation space with problem-independent components self._observation_space = LiteralSetSpace( set(self.domain.predicates.values()) - set(self.action_predicates), type_to_parent_types=self.domain.type_to_parent_types) if self._compute_approx_reachable_set: self._delete_relaxed_ops = self._get_delete_relaxed_ops()
class PDDLEnv(gym.Env): """ Parameters ---------- domain_file : str Path to a PDDL domain file. problem_dir : str Path to a directory of PDDL problem files. render : fn or None An optional render function (obs -> img). seed : int Random seed used to sample new problems upon reset. raise_error_on_invalid_action : bool When an action is taken for which no operator's preconditions holds, raise InvalidAction() if True; otherwise silently make no changes to the state. dynamic_action_space : bool Let self.action_space dynamically change on each iteration to include only valid actions (must match operator preconditions). compute_approx_reachable_set : bool On each step, compute the approximate reachable set of literals under the delete-relaxed version of the domain. Add it to info dict. shape_reward : bool Whether to shape the reward by evaluating the state against the cost given by an optimal planner. Shaped reward is: 1 for goal state, 0 for initial state, and any other state gives reward (-inf, 1) normalized based on the optimal distance between the initial state and the goal. """ def __init__(self, domain_file, problem_dir, render=None, seed=0, raise_error_on_invalid_action=False, dynamic_action_space=False, compute_approx_reachable_set=False, shape_reward=False): self._domain_file = domain_file self._problem_dir = problem_dir self._render = render self.seed(seed) self._raise_error_on_invalid_action = raise_error_on_invalid_action self._compute_approx_reachable_set = compute_approx_reachable_set self._shape_reward = shape_reward # Set by self.fix_problem_index self._problem_index_fixed = False # Parse the PDDL files self.domain, self.problems = self.load_pddl(domain_file, problem_dir) # Initialize action space with problem-independent components actions = list(self.domain.actions) self.action_predicates = [self.domain.predicates[a] for a in actions] if dynamic_action_space: self._action_space = LiteralSpace( self.action_predicates, lit_valid_test=self._action_valid_test) else: self._action_space = LiteralSpace(self.action_predicates) # Initialize observation space with problem-independent components self._observation_space = LiteralSetSpace( set(self.domain.predicates.values()) - set(self.action_predicates)) if self._compute_approx_reachable_set: self._delete_relaxed_ops = self._get_delete_relaxed_ops() @staticmethod def load_pddl(domain_file, problem_dir): """ Parse domain and problem PDDL files. Parameters ---------- domain_file : str Path to a PDDL domain file. problem_dir : str Path to a directory of PDDL problem files. Returns ------- domain : PDDLDomainParser problems : [ PDDLProblemParser ] """ domain = PDDLDomainParser(domain_file) problems = [] problem_files = [ f for f in glob.glob(os.path.join(problem_dir, "*.pddl")) ] for problem_file in sorted(problem_files): problem = PDDLProblemParser(problem_file, domain.domain_name, domain.types, domain.predicates) problems.append(problem) return domain, problems @property def observation_space(self): return self._observation_space @property def action_space(self): return self._action_space def seed(self, seed): self._seed = seed self.rng = np.random.RandomState(seed) def fix_problem_index(self, problem_idx): """ Fix the PDDL problem used when reset is called. Useful for reproducible testing. The order of PDDL problems is determined by the names of their files. See PDDLEnv.load_pddl. Parameters ---------- problem_idx : int """ self._problem_idx = problem_idx self._problem_index_fixed = True def reset(self, rend=False): """ Set up a new PDDL problem and start a new episode. Note that the PDDL files are included in debug_info. Returns ------- obs : { Literal } See self._get_observation() debug_info : dict See self._get_debug_info() """ if not self._problem_index_fixed: self._problem_idx = self.rng.choice(len(self.problems)) self._problem = self.problems[self._problem_idx] self._state = self._problem.initial_state self._goal = self._problem.goal # The action and observation spaces depend on the objects self._action_space.update(self._problem.objects) self._observation_space.update(self._problem.objects) debug_info = self._get_debug_info(rend) if self._shape_reward: # Update problem file by ordering objects, init, and goal properly. with open(debug_info["problem_file"], "r") as f: problem = f.read().lower() obj_ind = problem.index("(:objects") objects = PDDLParser._find_balanced_expression(problem, obj_ind) init_ind = problem.index("(:init") init = PDDLParser._find_balanced_expression(problem, init_ind) goal_ind = problem.index("(:goal") goal = PDDLParser._find_balanced_expression(problem, goal_ind) start = problem[:min(obj_ind, init_ind, goal_ind)] updated_problem = start + objects + "\n" + init + "\n" + goal + "\n)" with open("updated_problem.pddl", "w") as f: f.write(updated_problem) self._initial_distance = get_fd_optimal_plan_cost( debug_info["domain_file"], "updated_problem.pddl") os.remove("updated_problem.pddl") return self._get_observation(), debug_info def _get_observation(self): """ Observations are sets of ground literals. Action literals are not included in the observation. Returns ------- obs : { Literal } """ obs = set() for lit in self._state: if lit.predicate.name in self.domain.actions: continue obs.add(lit) return obs def _get_debug_info(self, rend=False): """ Contains the problem file, domain file, and objects for interaction with a planner. """ info = { 'problem_file': self._problem.problem_fname, 'domain_file': self.domain.domain_fname, 'objects': self._problem.objects } if rend: info['image'] = self.render() else: info['image'] = None if self._compute_approx_reachable_set: info['approx_reachable_set'] = self._get_approx_reachable_set() return info def _get_delete_relaxed_ops(self): relaxed_ops = {} for name, operator in self.domain.operators.items(): relaxed_op = copy.deepcopy(operator) for precond in operator.preconds.literals: if precond.is_negative: relaxed_op.preconds.literals.remove(precond) assert not precond.is_anti, "Should be impossible" for effect in operator.effects.literals: assert not effect.is_negative, "Should be impossible" if effect.is_anti: relaxed_op.effects.literals.remove(effect) relaxed_ops[name] = relaxed_op return relaxed_ops def _get_approx_reachable_set(self): obs = self._get_observation() old_ops = self.domain.operators self.domain.operators = self._delete_relaxed_ops prev_len = 0 while prev_len != len(obs): # do the fixed-point iteration prev_len = len(obs) for action in self.action_space.all_ground_literals(): selected_operator, assignment = self._select_operator(action) if assignment is not None: for lifted_effect in selected_operator.effects.literals: effect = ground_literal(lifted_effect, assignment) assert not effect.is_anti # should be relaxed obs.add(effect) self.domain.operators = old_ops return obs def _select_operator(self, action): """ Helper function for step. """ # Knowledge base: literals in the state + action taken kb = self._get_observation() | {action} selected_operator = None assignment = None for operator in self.domain.operators.values(): conds = operator.preconds.literals assignments = find_satisfying_assignments(kb, conds) num_assignments = len(assignments) if num_assignments > 0: assert num_assignments == 1, "Nondeterministic envs not supported" selected_operator = operator assignment = assignments[0] break return selected_operator, assignment def step(self, action): """ Execute an action and update the state. Tries to find a ground operator for which the preconditions hold when this action is taken. If none exist, optionally raises InvalidAction. If multiple exist, raises an AssertionError, since we assume deterministic environments only. Once the operator is found, the ground effects are executed to update the state. Parameters ---------- action : Literal Returns ------- obs : { Literal } See self._get_observation. reward : float 1 if the goal is reached and 0 otherwise. done : bool True if the goal is reached. debug_info : dict See self._get_debug_info. """ selected_operator, assignment = self._select_operator(action) # A ground operator was found; execute the ground effects if assignment is not None: self._execute_effects(selected_operator.effects.literals, assignment) # No operator was found elif self._raise_error_on_invalid_action: # import ipdb; ipdb.set_trace() raise InvalidAction() return self._finish_step() def _finish_step(self): """Helper for step.""" obs = self._get_observation() goal_reached = self._is_goal_reached() debug_info = self._get_debug_info() if goal_reached: reward = 1. done = True else: reward = 0. done = False if self._shape_reward: # Update problem file to contain current state. with open(debug_info["problem_file"], "r") as f: problem = f.read().lower() obj_ind = problem.index("(:objects") objects = PDDLParser._find_balanced_expression(problem, obj_ind) init_ind = problem.index("(:init") init = "(:init\n" for lit in obs: init += lit.pddl_str() + "\n" for lit in self._state: if lit.predicate.name in self.domain.actions: init += lit.pddl_str() + "\n" init += "\n)" goal_ind = problem.index("(:goal") goal = PDDLParser._find_balanced_expression(problem, goal_ind) start = problem[:min(obj_ind, init_ind, goal_ind)] updated_problem = start + objects + "\n" + init + "\n" + goal + "\n)" with open("updated_problem.pddl", "w") as f: f.write(updated_problem) distance = get_fd_optimal_plan_cost(debug_info["domain_file"], "updated_problem.pddl") os.remove("updated_problem.pddl") reward = 1.0 - distance / self._initial_distance # range: (-inf, 1] return obs, reward, done, debug_info def _is_goal_reached(self): """ Used to calculate reward. """ return self._goal.holds(self._state) def _action_valid_test(self, action): _, assignment = self._select_operator(action) return assignment is not None def _execute_effects(self, lifted_effects, assignments): """ Update the state given lifted operator effects and assignments of variables to objects. Parameters ---------- lifted_effects : { Literal } assignments : { TypedEntity : TypedEntity } Maps variables to objects. """ new_state = {lit for lit in self._state} for lifted_effect in lifted_effects: effect = ground_literal(lifted_effect, assignments) # Negative effect if effect.is_anti: literal = effect.inverted_anti if literal in new_state: new_state.remove(literal) else: new_state.add(effect) self._state = new_state def render(self, *args, **kwargs): if self._render: obs = self._get_observation() return self._render(obs, *args, **kwargs)
class PDDLEnv(gym.Env): """ Parameters ---------- domain_file : str Path to a PDDL domain file. problem_dir : str Path to a directory of PDDL problem files. render : fn or None An optional render function (obs -> img). seed : int Random seed used to sample new problems upon reset. raise_error_on_invalid_action : bool When an action is taken for which no operator's preconditions holds, raise InvalidAction() if True; otherwise silently make no changes to the state. operators_as_actions : bool If True, the PDDL operators are treated as the actions. Otherwise, actions must be specified separately in the PDDL file. dynamic_action_space : bool Let self.action_space dynamically change on each iteration to include only valid actions (must match operator preconditions). compute_approx_reachable_set : bool On each step, compute the approximate reachable set of literals under the delete-relaxed version of the domain. Add it to info dict. shape_reward_mode : Method for reward shaping. - None: no reward shaping is done. - "optimal": Evaluates the current state against the distance to goal given by an optimal planner. Gives rewards in the range (-inf, 1), with 1 for the goal state and 0 for the initial state. - any heuristic in pyperplan.HEURISTICS: https://github.com/aibasel/pyperplan/tree/master/src/heuristics e.g., "blind", "hadd", "hff", "hmax", "hsa", "lmcut" Note that "blind" is equivalent to None. Note that "landmark" is path-dependent and so disallowed. Gives rewards in the range (-inf, 1), with 1 for the goal state and 0 for the initial state. """ def __init__(self, domain_file, problem_dir, render=None, seed=0, raise_error_on_invalid_action=False, operators_as_actions=False, dynamic_action_space=False, compute_approx_reachable_set=False, shape_reward_mode=None): self._state = None self._domain_file = domain_file self._problem_dir = problem_dir self._render = render self.seed(seed) self._raise_error_on_invalid_action = raise_error_on_invalid_action self.operators_as_actions = operators_as_actions self._compute_approx_reachable_set = compute_approx_reachable_set self._shape_reward_mode = shape_reward_mode # Set by self.fix_problem_index self._problem_index_fixed = False # Parse the PDDL files self.domain, self.problems = self.load_pddl( domain_file, problem_dir, operators_as_actions=self.operators_as_actions) # Initialize action space with problem-independent components actions = list(self.domain.actions) self.action_predicates = [self.domain.predicates[a] for a in actions] if dynamic_action_space: self._action_space = LiteralSpace( self.action_predicates, lit_valid_test=self._action_valid_test, type_to_parent_types=self.domain.type_to_parent_types) else: self._action_space = LiteralSpace( self.action_predicates, type_to_parent_types=self.domain.type_to_parent_types) # Initialize observation space with problem-independent components self._observation_space = LiteralSetSpace( set(self.domain.predicates.values()) - set(self.action_predicates), type_to_parent_types=self.domain.type_to_parent_types) if self._compute_approx_reachable_set: self._delete_relaxed_ops = self._get_delete_relaxed_ops() @staticmethod def load_pddl(domain_file, problem_dir, operators_as_actions=False): """ Parse domain and problem PDDL files. Parameters ---------- domain_file : str Path to a PDDL domain file. problem_dir : str Path to a directory of PDDL problem files. operators_as_actions : bool See class docstirng. Returns ------- domain : PDDLDomainParser problems : [ PDDLProblemParser ] """ domain = PDDLDomainParser( domain_file, expect_action_preds=(not operators_as_actions), operators_as_actions=operators_as_actions) problems = [] problem_files = [ f for f in glob.glob(os.path.join(problem_dir, "*.pddl")) ] for problem_file in sorted(problem_files): problem = PDDLProblemParser(problem_file, domain.domain_name, domain.types, domain.predicates) problems.append(problem) return domain, problems @property def observation_space(self): return self._observation_space @property def action_space(self): return self._action_space def seed(self, seed): self._seed = seed self.rng = np.random.RandomState(seed) def fix_problem_index(self, problem_idx): """ Fix the PDDL problem used when reset is called. Useful for reproducible testing. The order of PDDL problems is determined by the names of their files. See PDDLEnv.load_pddl. Parameters ---------- problem_idx : int """ self._problem_idx = problem_idx self._problem_index_fixed = True def reset(self): """ Set up a new PDDL problem and start a new episode. Note that the PDDL files are included in debug_info. Returns ------- obs : { Literal } See self._get_observation() debug_info : dict See self._get_debug_info() """ if not self._problem_index_fixed: self._problem_idx = self.rng.choice(len(self.problems)) self._problem = self.problems[self._problem_idx] self._state = { lit for lit in self._problem.initial_state \ if lit.predicate.name not in self.domain.actions } self._action_lits = { lit for lit in self._problem.initial_state \ if lit.predicate.name in self.domain.actions } self._goal = self._problem.goal # The action and observation spaces depend on the objects self._action_space.update(self._problem.objects) self._observation_space.update(self._problem.objects) debug_info = self._get_debug_info() self._initialize_reward_shaping_data(debug_info) return self._get_observation(self._state), debug_info def _get_observation(self, state): """ Observations are sets of ground literals. Action literals are not included in the observation. Returns ------- obs : { Literal } """ return state def _initialize_reward_shaping_data(self, debug_info): """At the start of each episode, initialize whatever is needed to perform reward shaping. """ if self._shape_reward_mode is not None: # Update problem file by ordering objects, init, and goal properly. with open(debug_info["problem_file"], "r") as f: problem = f.read().lower() obj_ind = problem.index("(:objects") objects = PDDLParser._find_balanced_expression(problem, obj_ind) init_ind = problem.index("(:init") init = PDDLParser._find_balanced_expression(problem, init_ind) goal_ind = problem.index("(:goal") goal = PDDLParser._find_balanced_expression(problem, goal_ind) start = problem[:min(obj_ind, init_ind, goal_ind)] updated_problem = start + objects + "\n" + init + "\n" + goal + "\n)" with open("updated_problem.pddl", "w") as f: f.write(updated_problem) self._initial_distance = self._get_distance( debug_info["domain_file"], "updated_problem.pddl") os.remove("updated_problem.pddl") def _get_debug_info(self): """ Contains the problem file, domain file, and objects for interaction with a planner. """ info = { 'problem_file': self._problem.problem_fname, 'domain_file': self.domain.domain_fname, 'objects': self._problem.objects, 'image': self.render() } if self._compute_approx_reachable_set: info['approx_reachable_set'] = self._get_approx_reachable_set() return info def _get_delete_relaxed_ops(self): relaxed_ops = {} for name, operator in self.domain.operators.items(): relaxed_op = copy.deepcopy(operator) for precond in operator.preconds.literals: if precond.is_negative: relaxed_op.preconds.literals.remove(precond) assert not precond.is_anti, "Should be impossible" for effect in operator.effects.literals: assert not effect.is_negative, "Should be impossible" if effect.is_anti: relaxed_op.effects.literals.remove(effect) relaxed_ops[name] = relaxed_op return relaxed_ops def _get_approx_reachable_set(self): obs = self._get_observation(self._state) old_ops = self.domain.operators self.domain.operators = self._delete_relaxed_ops prev_len = 0 while prev_len != len(obs): # do the fixed-point iteration prev_len = len(obs) for action in self.action_space.all_ground_literals(): selected_operator, assignment = self._select_operator( self._state, action) if assignment is not None: for lifted_effect in selected_operator.effects.literals: effect = ground_literal(lifted_effect, assignment) assert not effect.is_anti # should be relaxed obs.add(effect) self.domain.operators = old_ops return obs def _select_operator(self, state, action): """ Helper function for step. """ if self.operators_as_actions: # There should be only one possible operator if actions are operators possible_operators = set() for name, operator in self.domain.operators.items(): if name.lower() == action.predicate.name.lower(): assert len(possible_operators) == 0 possible_operators.add(operator) else: # Possibly multiple operators per action possible_operators = set(self.domain.operators.values()) # Knowledge base: literals in the state + action taken kb = self._get_observation(state) | {action} selected_operator = None assignment = None for operator in possible_operators: if isinstance(operator.preconds, Literal): conds = [operator.preconds] else: conds = operator.preconds.literals # Necessary for binding the operator arguments to the variables if self.operators_as_actions: conds = [action.predicate(*operator.params)] + conds # Check whether action is in the preconditions action_literal = None for lit in conds: if lit.predicate == action.predicate: action_literal = lit break if action_literal is None: continue # For proving, consider action variable first action_variables = action_literal.variables variable_sort_fn = lambda v: (not v in action_variables, v) assignments = find_satisfying_assignments( kb, conds, variable_sort_fn=variable_sort_fn, type_to_parent_types=self.domain.type_to_parent_types) num_assignments = len(assignments) if num_assignments > 0: assert num_assignments == 1, "Nondeterministic envs not supported" selected_operator = operator assignment = assignments[0] break return selected_operator, assignment def step(self, action): """ Execute an action and update the state. Tries to find a ground operator for which the preconditions hold when this action is taken. If none exist, optionally raises InvalidAction. If multiple exist, raises an AssertionError, since we assume deterministic environments only. Once the operator is found, the ground effects are executed to update the state. Parameters ---------- action : Literal Returns ------- obs : { Literal } See self._get_observation. reward : float 1 if the goal is reached and 0 otherwise. done : bool True if the goal is reached. debug_info : dict See self._get_debug_info. """ selected_operator, assignment = self._select_operator( self._state, action) # A ground operator was found; execute the ground effects if assignment is not None: self._state = _apply_effects( self._state, selected_operator.effects.literals, assignment, ) # No operator was found elif self._raise_error_on_invalid_action: # import ipdb; ipdb.set_trace() raise InvalidAction() obs = self._get_observation(self._state) done = self._is_goal_reached(self._state) debug_info = self._get_debug_info() reward = self._reward(self._state, done) return obs, reward, done, debug_info def sample_transition(self, action): selected_operator, assignment = self._select_operator( self._state, action) state = self._state # A ground operator was found; execute the ground effects if assignment is not None: state = _apply_effects( self._state, selected_operator.effects.literals, assignment, ) # No operator was found elif self._raise_error_on_invalid_action: raise InvalidAction() obs = self._get_observation(state) done = self._is_goal_reached(state) reward = self._reward(state, done) return obs, reward, done, {} def _reward(self, state, done): if done: reward = 1. else: reward = 0. if self._shape_reward_mode is not None: # Update problem file to contain current state. with open(self._problem.problem_fname, "r") as f: problem = f.read().lower() obj_ind = problem.index("(:objects") objects = PDDLParser._find_balanced_expression(problem, obj_ind) init_ind = problem.index("(:init") init = "(:init\n" for lit in state: init += lit.pddl_str() + "\n" for lit in self._action_lits: init += lit.pddl_str() + "\n" init += "\n)" goal_ind = problem.index("(:goal") goal = PDDLParser._find_balanced_expression(problem, goal_ind) start = problem[:min(obj_ind, init_ind, goal_ind)] updated_problem = start + objects + "\n" + init + "\n" + goal + "\n)" with open("updated_problem.pddl", "w") as f: f.write(updated_problem) distance = self._get_distance(self.domain.domain_fname, "updated_problem.pddl") os.remove("updated_problem.pddl") reward = 1.0 - distance / self._initial_distance # range: (-inf, 1] return reward def _get_distance(self, domain_file, problem_file): """Used by reward shaping methods. Get distance from initial state to goal in the problem_file. """ if self._shape_reward_mode == "optimal": return get_fd_optimal_plan_cost(domain_file, problem_file) return get_pyperplan_heuristic(self._shape_reward_mode, domain_file, problem_file) def _is_goal_reached(self, state): """ Used to calculate reward. """ return self._goal.holds(state) def _action_valid_test(self, action): _, assignment = self._select_operator(self._state, action) return assignment is not None def render(self, *args, **kwargs): if self._render: obs = self._get_observation(self._state) return self._render(obs, *args, **kwargs)
class PDDLEnv(gym.Env): """ Parameters ---------- domain_file : str Path to a PDDL domain file. problem_dir : str Path to a directory of PDDL problem files. render : fn or None An optional render function (obs -> img). seed : int Random seed used to sample new problems upon reset. raise_error_on_invalid_action : bool When an action is taken for which no operator's preconditions holds, raise InvalidAction() if True; otherwise silently make no changes to the state. operators_as_actions : bool If True, the PDDL operators are treated as the actions. Otherwise, actions must be specified separately in the PDDL file. dynamic_action_space : bool Let self.action_space dynamically change on each iteration to include only valid actions (must match operator preconditions). """ def __init__(self, domain_file, problem_dir, render=None, seed=0, raise_error_on_invalid_action=False, operators_as_actions=False, dynamic_action_space=False): self._state = None self._domain_file = domain_file self._problem_dir = problem_dir self._render = render self.seed(seed) self._raise_error_on_invalid_action = raise_error_on_invalid_action self.operators_as_actions = operators_as_actions # Set by self.fix_problem_index self._problem_index_fixed = False self._problem_idx = None # Parse the PDDL files self.domain, self.problems = self.load_pddl( domain_file, problem_dir, operators_as_actions=self.operators_as_actions) # Determine if the domain is STRIPS self._domain_is_strips = _check_domain_for_strips(self.domain) self._inference_mode = "csp" if self._domain_is_strips else "prolog" # Initialize action space with problem-independent components actions = list(self.domain.actions) self.action_predicates = [self.domain.predicates[a] for a in actions] self._dynamic_action_space = dynamic_action_space if dynamic_action_space: if self.domain.operators_as_actions and self._domain_is_strips: self._action_space = LiteralActionSpace( self.domain, self.action_predicates, type_hierarchy=self.domain.type_hierarchy, type_to_parent_types=self.domain.type_to_parent_types) else: self._action_space = LiteralSpace( self.action_predicates, lit_valid_test=self._action_valid_test, type_hierarchy=self.domain.type_hierarchy, type_to_parent_types=self.domain.type_to_parent_types) else: self._action_space = LiteralSpace( self.action_predicates, type_to_parent_types=self.domain.type_to_parent_types) # Initialize observation space with problem-independent components self._observation_space = LiteralSetSpace( set(self.domain.predicates.values()) - set(self.action_predicates), type_hierarchy=self.domain.type_hierarchy, type_to_parent_types=self.domain.type_to_parent_types) @staticmethod def load_pddl(domain_file, problem_dir, operators_as_actions=False): """ Parse domain and problem PDDL files. Parameters ---------- domain_file : str Path to a PDDL domain file. problem_dir : str Path to a directory of PDDL problem files. operators_as_actions : bool See class docstirng. Returns ------- domain : PDDLDomainParser problems : [ PDDLProblemParser ] """ domain = PDDLDomainParser( domain_file, expect_action_preds=(not operators_as_actions), operators_as_actions=operators_as_actions) problems = [] problem_files = [ f for f in glob.glob(os.path.join(problem_dir, "*.pddl")) ] for problem_file in sorted(problem_files): problem = PDDLProblemParser(problem_file, domain.domain_name, domain.types, domain.predicates, domain.actions, domain.constants) problems.append(problem) return domain, problems @property def observation_space(self): return self._observation_space @property def action_space(self): return self._action_space def set_state(self, state): self._state = state def get_state(self): return self._state def seed(self, seed): self._seed = seed self.rng = np.random.RandomState(seed) def fix_problem_index(self, problem_idx): """ Fix the PDDL problem used when reset is called. Useful for reproducible testing. The order of PDDL problems is determined by the names of their files. See PDDLEnv.load_pddl. Parameters ---------- problem_idx : int """ self._problem_idx = problem_idx self._problem_index_fixed = True def reset(self): """ Set up a new PDDL problem and start a new episode. Note that the PDDL files are included in debug_info. Returns ------- obs : { Literal } The set of active predicates. debug_info : dict See self._get_debug_info() """ if not self._problem_index_fixed: self._problem_idx = self.rng.choice(len(self.problems)) self._problem = self.problems[self._problem_idx] initial_state = State(frozenset(self._problem.initial_state), frozenset(self._problem.objects), self._problem.goal) initial_state = self._handle_derived_literals(initial_state) self.set_state(initial_state) self._goal = self._problem.goal debug_info = self._get_debug_info() self._action_space.reset_initial_state(initial_state) return self.get_state(), debug_info def _get_debug_info(self): """ Contains the problem file and domain file for interaction with a planner. """ info = { 'problem_file': self._problem.problem_fname, 'domain_file': self.domain.domain_fname } return info def step(self, action): """ Execute an action and update the state. Tries to find a ground operator for which the preconditions hold when this action is taken. If none exist, optionally raises InvalidAction. If multiple exist, raises an AssertionError, since we assume deterministic environments only. Once the operator is found, the ground effects are executed to update the state. Parameters ---------- action : Literal Returns ------- state : State The set of active predicates. reward : float 1 if the goal is reached and 0 otherwise. done : bool True if the goal is reached. debug_info : dict See self._get_debug_info. """ state, reward, done, debug_info = self.sample_transition(action) self.set_state(state) return state, reward, done, debug_info def _get_new_state_info(self, state): state = self._handle_derived_literals(state) done = self._is_goal_reached(state) reward = self.extrinsic_reward(state, done) debug_info = self._get_debug_info() return state, reward, done, debug_info def sample_transition(self, action): state = self._get_successor_state( self._state, action, self.domain, inference_mode=self._inference_mode, raise_error_on_invalid_action=self._raise_error_on_invalid_action) return self._get_new_state_info(state) def _get_successor_state(self, *args, **kwargs): """Separated out to allow for overrides in subclasses """ return get_successor_state(*args, **kwargs) def _get_successor_states(self, *args, **kwargs): """Separated out to allow for overrides in subclasses """ return get_successor_states(*args, **kwargs) def get_all_possible_transitions(self, action, return_probs=False): assert self.domain.is_probabilistic states = self._get_successor_states( self._state, action, self.domain, inference_mode=self._inference_mode, raise_error_on_invalid_action=self._raise_error_on_invalid_action, return_probs=return_probs) if return_probs: return [(self._get_new_state_info(state), prob) for state, prob in states.items()] return [self._get_new_state_info(state) for state in states] def extrinsic_reward(self, state, done): if done: reward = 1. else: reward = 0. return reward def _is_goal_reached(self, state): """ Check if the terminal condition is met, i.e., the goal is reached. """ return check_goal(state, self._goal) def _action_valid_test(self, state, action): _, assignment = _select_operator(state, action, self.domain, inference_mode=self._inference_mode) return assignment is not None def render(self, *args, **kwargs): if self._render: return self._render(self._state.literals, *args, **kwargs) def _handle_derived_literals(self, state): # first remove any old derived literals since they're outdated to_remove = set() for lit in state.literals: if lit.predicate.is_derived: to_remove.add(lit) state = state.with_literals(state.literals - to_remove) while True: # loop, because derived predicates can be recursive new_derived_literals = set() for pred in self.domain.predicates.values(): if not pred.is_derived: continue assignments = find_satisfying_assignments( state.literals, pred.body, type_to_parent_types=self.domain.type_to_parent_types, constants=self.domain.constants, mode="prolog", max_assignment_count=99999) for assignment in assignments: objects = [ assignment[param_type(param_name)] for param_name, param_type in zip( pred.param_names, pred.var_types) ] derived_literal = pred(*objects) if derived_literal not in state.literals: new_derived_literals.add(derived_literal) if new_derived_literals: state = state.with_literals(state.literals | new_derived_literals) else: # terminate break return state
def __call__(self, domain, state, timeout): act_preds = [domain.predicates[a] for a in list(domain.actions)] act_space = LiteralSpace( act_preds, type_to_parent_types=domain.type_to_parent_types) dom_file = tempfile.NamedTemporaryFile(delete=False).name prob_file = tempfile.NamedTemporaryFile(delete=False).name domain.write(dom_file) lits = set(state.literals) if not domain.operators_as_actions: lits |= set(act_space.all_ground_literals(state, valid_only=False)) PDDLProblemParser.create_pddl_file(prob_file, state.objects, lits, "myproblem", domain.domain_name, state.goal, fast_downward_order=True) cur_objects = set() start_time = time.time() if self._force_include_goal_objects: # Always start off considering objects in the goal. for lit in state.goal.literals: cur_objects |= set(lit.variables) # Get scores once. object_to_score = { obj: self._guidance.score_object(obj, state) for obj in state.objects if obj not in cur_objects } # Initialize threshold. threshold = self._gamma for _ in range(self._max_iterations): # Find new objects by incrementally lowering threshold. unused_objs = sorted(list(state.objects - cur_objects)) new_objs = set() while unused_objs: # Geometrically lower threshold. threshold *= self._gamma # See if there are any new objects. new_objs = { o for o in unused_objs if object_to_score[o] > threshold } # If there are, try planning with them. if new_objs: break cur_objects |= new_objs # Keep only literals referencing currently considered objects. cur_lits = set() for lit in state.literals: if all(var in cur_objects for var in lit.variables): cur_lits.add(lit) dummy_state = State(cur_lits, cur_objects, state.goal) # Try planning with only this object set. print("[Trying to plan with {} objects of {} total, " "threshold is {}...]".format(len(cur_objects), len(state.objects), threshold), flush=True) try: time_elapsed = time.time() - start_time # Get a plan from base planner & validate it. plan = self._planner(domain, dummy_state, timeout - time_elapsed) if not validate_strips_plan(domain_file=dom_file, problem_file=prob_file, plan=plan): raise PlanningFailure("Invalid plan") except PlanningFailure: # Try again with more objects. if len(cur_objects) == len(state.objects): # We already tried with all objects, give up. break continue return plan raise PlanningFailure("Plan not found! Reached max_iterations.")