def __init__( self, n_rooms: int, fluents: AbstractFluents, saved_automaton: str, reward: float, save_to: Optional[str] = None, ): """Initialize. :param n_rooms: the number of rooms in this environment. From this will follow the fluents and automaton states. :param fluents: this object contains the fluents valuation function. This method will check that the valuated fluents are the expected ones. :param saved_automaton: path to a saved automaton corresponding to a temporal goal. Fluents must match. :param reward: reward suplied when reward is reached. :param save_to: path where the automaton should be exported. """ # Define the propositional symbols expected_fluents = {"bip", "person", "closed"} for i in range(1, n_rooms + 1): at_room, in_room = f"at{i}", f"in{i}" expected_fluents.add(at_room) expected_fluents.add(in_room) # Load automaton with open(saved_automaton, "rb") as f: automaton: SimpleDFA = pickle.load(f) # Check same fluents in valuations if not fluents.fluents == expected_fluents: raise ValueError( f"Symbols do not match: {fluents.fluents} != {expected_fluents}" ) # NOTE: not checking also for automaton. Assuming correct # Super TemporalGoal.__init__( self, formula=None, # Provinding automaton directly reward=reward, automaton=automaton, labels=fluents, extract_fluents=fluents.evaluate, reward_shaping=False, zero_terminal_state=False, ) # Maybe save if save_to: self.automaton.to_graphviz().render(save_to)
def make_env_from_dfa(config: BreakoutConfiguration, dfa: DFA, goal_reward: float = 1000.0, reward_shaping: bool = True) -> gym.Env: """ Make the Breakout environment. :param config: the Breakout configuration. :param dfa: the automaton that constitutes the goal. :param goal_reward: the reward associated to the goal. :param reward_shaping: apply automata-based reward shaping. :return: the Gym environment. """ unwrapped_env = BreakoutLearner2Wrapper(config) tg = TemporalGoal(automaton=dfa, reward=goal_reward, reward_shaping=reward_shaping, zero_terminal_state=False, extract_fluents=extract_breakout_fluents) env = TemporalGoalWrapper(unwrapped_env, [tg], combine=lambda obs, qs: tuple((*obs, *qs)), feature_extractor=(lambda obs, action: ( obs["paddle_x"], obs["ball_x"], obs["ball_y"], obs["ball_x_speed"], obs["ball_y_speed"], ))) return env
def make_env_from_dfa(config: SapientinoConfiguration, dfa: pythomata.dfa.DFA, goal_reward: float = 1000.0, reward_shaping: bool = True) -> gym.Env: """ Make the Breakout environment. :param config: the Breakout configuration. :param dfa: the automaton that constitutes the goal. :param goal_reward: the reward associated to the goal. :param reward_shaping: apply automata-based reward shaping. :return: the Gym environment. """ tg = TemporalGoal(automaton=dfa, reward=goal_reward, reward_shaping=reward_shaping, zero_terminal_state=False, extract_fluents=extract_sapientino_fluents) env = SapientinoTemporalWrapper(SapientinoWrapper( SapientinoDictSpace(config)), [tg], combine=lambda obs, qs: tuple((*obs, *qs)), feature_extractor=lambda obs, action: (obs["x"], obs["y"], obs["theta"]) if config.differential else (obs["x"], obs["y"])) return env
def make_env_from_dfa(config: MinecraftConfiguration, dfa: pythomata.dfa.DFA, goal_reward: float = 1000.0, reward_shaping: bool = True) -> gym.Env: """ Make the Breakout environment. :param config: the Minecraft configuration. :param dfa: the automaton that constitutes the goal. :param goal_reward: the reward associated to the goal. :param reward_shaping: apply automata-based reward shaping. :return: the Gym environment. """ tg = TemporalGoal(automaton=dfa, reward=goal_reward, reward_shaping=reward_shaping, zero_terminal_state=False, extract_fluents=extract_minecraft_fluents) env = MinecraftTemporalWrapper( MinecraftLearnerWrapper(config), [tg], combine=lambda obs, qs: tuple((*obs, *qs)), feature_extractor=lambda obs, action: (obs["x"], obs["y"], obs["theta"]) if config.action_space_type == ActionSpaceType.DIFFERENTIAL else (obs["x"], obs["y"])) return env
def __init__( self, colors: Sequence[str], fluents: AbstractFluents, reward: Optional[float] = 1.0, save_to: Optional[str] = None, ): """Initialize. :param colors: a sequence of colors, these are the positions that the agent must reach with the correct order. :param fluents: a fluents evaluator. All colors must be fluents, so that we know when the agent is in each position. :param reward: reward suplied when reward is reached. :param save_to: path where the automaton should be exported. """ # Check if not all((color in fluents.fluents for color in colors)): raise ValueError( "Some color has no associated fluent to evaluate it") # Make automaton for this sequence automaton = self._make_sapientino_automaton(colors) # Super TemporalGoal.__init__( self, formula=None, # Provinding automaton directly reward=reward, automaton=automaton, labels=set(colors), extract_fluents=fluents.evaluate, reward_shaping=False, zero_terminal_state=False, ) # Maybe save if save_to: self.automaton.to_graphviz().render(save_to)
def make_env(config: MinecraftConfiguration, output_dir, goal_reward: float = 1000.0, reward_shaping: bool = True) -> gym.Env: """ Make the Minecraft environment. :param config: the Minecraft configuration. :param output_dir: the path to the output directory. :param reward_shaping: apply automata-based reward shaping. :return: the Gym environment. """ temporal_goals = [] for t in config.tasks: formula_string = make_goal(t) print("Formula: {}".format(formula_string)) formula = LDLfParser()(formula_string) tg = TemporalGoal( formula=formula, reward=goal_reward, # labels=LABELS, reward_shaping=reward_shaping, zero_terminal_state=False, extract_fluents=extract_minecraft_fluents) temporal_goals.append(tg) tg._automaton.to_dot( os.path.join(output_dir, "true_automaton_{}".format(t.name))) print("Original automaton at {}".format( os.path.join(output_dir, "true_automaton_{}.svg".format(t.name)))) env = MinecraftTemporalWrapper( MinecraftExpertWrapper(config), temporal_goals, combine=lambda obs, qs: tuple((*obs, *qs)), feature_extractor=lambda obs, action: (obs["x"], obs["y"], obs["theta"]) if config.action_space_type == ActionSpaceType.DIFFERENTIAL else (obs["x"], obs["y"])) positive_traces_path = Path(output_dir, "positive_traces.txt") negative_traces_path = Path(output_dir, "negative_traces.txt") env = TemporalGoalWrapperLogTraces(env, extract_minecraft_fluents, positive_traces_path, negative_traces_path) return env
def make_env(config: BreakoutConfiguration, output_dir, goal_reward: float = 1000.0, reward_shaping: bool = True) -> gym.Env: """ Make the Breakout environment. :param config: the Breakout configuration. :param output_dir: the path to the output directory. :param reward_shaping: apply automata-based reward shaping. :return: the Gym environment. """ unwrapped_env = BreakoutExpertWrapper(config) formula_string = make_goal(config.brick_cols) formula = LDLfParser()(formula_string) labels = {"c{}".format(i) for i in range(config.brick_cols)} tg = TemporalGoal(formula=formula, reward=goal_reward, labels=labels, reward_shaping=reward_shaping, zero_terminal_state=False, extract_fluents=extract_breakout_fluents) print("Formula: {}".format(formula_string)) tg._automaton.to_dot(os.path.join(output_dir, "true_automaton")) print("Original automaton at {}".format( os.path.join(output_dir, "true_automaton.svg"))) env = TemporalGoalWrapper( unwrapped_env, [tg], combine=lambda obs, qs: tuple((*obs, *qs)), feature_extractor=( lambda obs, action: ( obs["paddle_x"], #obs["paddleup_x"], ))) positive_traces_path = Path(output_dir, "positive_traces.txt") negative_traces_path = Path(output_dir, "negative_traces.txt") env = TemporalGoalWrapperLogTraces(env, extract_breakout_fluents, positive_traces_path, negative_traces_path) return env
def make_env(config: SapientinoConfiguration, output_dir, goal_reward: float = 1000.0, reward_shaping: bool = True) -> gym.Env: """ Make the Breakout environment. :param config: the Breakout configuration. :param output_dir: the path to the output directory. :param reward_shaping: apply automata-based reward shaping. :return: the Gym environment. """ formula_string = make_goal() print("Formula: {}".format(formula_string)) formula = LDLfParser()(formula_string) tg = TemporalGoal(formula=formula, reward=goal_reward, labels={color for color in colors}.union({"bad_beep"}), reward_shaping=reward_shaping, zero_terminal_state=False, extract_fluents=extract_sapientino_fluents) tg._automaton.to_dot(os.path.join(output_dir, "true_automaton")) print("Original automaton at {}".format( os.path.join(output_dir, "true_automaton.svg"))) env = SapientinoTemporalWrapper( SapientinoWrapper(SapientinoDictSpace(config)), [tg], combine=lambda obs, qs: tuple((*obs, *qs)), feature_extractor=lambda obs, action: (obs["x"], obs["y"], obs["th"]) if config.differential else (obs["x"], obs["y"])) positive_traces_path = Path(output_dir, "positive_traces.txt") negative_traces_path = Path(output_dir, "negative_traces.txt") env = TemporalGoalWrapperLogTraces(env, extract_sapientino_fluents, positive_traces_path, negative_traces_path) return env