Ejemplo n.º 1
0
    def __init__(
        self,
        n_rooms: int,
        fluents: AbstractFluents,
        saved_automaton: str,
        reward: float,
        save_to: Optional[str] = None,
    ):
        """Initialize.

        :param n_rooms: the number of rooms in this environment. From this
            will follow the fluents and automaton states.
        :param fluents: this object contains the fluents valuation function.
            This method will check that the valuated fluents are the expected
            ones.
        :param saved_automaton: path to a saved automaton corresponding
            to a temporal goal. Fluents must match.
        :param reward: reward suplied when reward is reached.
        :param save_to: path where the automaton should be exported.
        """
        # Define the propositional symbols
        expected_fluents = {"bip", "person", "closed"}
        for i in range(1, n_rooms + 1):
            at_room, in_room = f"at{i}", f"in{i}"
            expected_fluents.add(at_room)
            expected_fluents.add(in_room)

        # Load automaton
        with open(saved_automaton, "rb") as f:
            automaton: SimpleDFA = pickle.load(f)

        # Check same fluents in valuations
        if not fluents.fluents == expected_fluents:
            raise ValueError(
                f"Symbols do not match: {fluents.fluents} != {expected_fluents}"
            )
        # NOTE: not checking also for automaton. Assuming correct

        # Super
        TemporalGoal.__init__(
            self,
            formula=None,  # Provinding automaton directly
            reward=reward,
            automaton=automaton,
            labels=fluents,
            extract_fluents=fluents.evaluate,
            reward_shaping=False,
            zero_terminal_state=False,
        )

        # Maybe save
        if save_to:
            self.automaton.to_graphviz().render(save_to)
def make_env_from_dfa(config: BreakoutConfiguration,
                      dfa: DFA,
                      goal_reward: float = 1000.0,
                      reward_shaping: bool = True) -> gym.Env:
    """
    Make the Breakout environment.

    :param config: the Breakout configuration.
    :param dfa: the automaton that constitutes the goal.
    :param goal_reward: the reward associated to the goal.
    :param reward_shaping: apply automata-based reward shaping.
    :return: the Gym environment.
    """
    unwrapped_env = BreakoutLearner2Wrapper(config)

    tg = TemporalGoal(automaton=dfa,
                      reward=goal_reward,
                      reward_shaping=reward_shaping,
                      zero_terminal_state=False,
                      extract_fluents=extract_breakout_fluents)

    env = TemporalGoalWrapper(unwrapped_env, [tg],
                              combine=lambda obs, qs: tuple((*obs, *qs)),
                              feature_extractor=(lambda obs, action: (
                                  obs["paddle_x"],
                                  obs["ball_x"],
                                  obs["ball_y"],
                                  obs["ball_x_speed"],
                                  obs["ball_y_speed"],
                              )))

    return env
Ejemplo n.º 3
0
def make_env_from_dfa(config: SapientinoConfiguration,
                      dfa: pythomata.dfa.DFA,
                      goal_reward: float = 1000.0,
                      reward_shaping: bool = True) -> gym.Env:
    """
    Make the Breakout environment.

    :param config: the Breakout configuration.
    :param dfa: the automaton that constitutes the goal.
    :param goal_reward: the reward associated to the goal.
    :param reward_shaping: apply automata-based reward shaping.
    :return: the Gym environment.
    """
    tg = TemporalGoal(automaton=dfa,
                      reward=goal_reward,
                      reward_shaping=reward_shaping,
                      zero_terminal_state=False,
                      extract_fluents=extract_sapientino_fluents)

    env = SapientinoTemporalWrapper(SapientinoWrapper(
        SapientinoDictSpace(config)), [tg],
                                    combine=lambda obs, qs: tuple((*obs, *qs)),
                                    feature_extractor=lambda obs, action:
                                    (obs["x"], obs["y"], obs["theta"])
                                    if config.differential else
                                    (obs["x"], obs["y"]))

    return env
Ejemplo n.º 4
0
def make_env_from_dfa(config: MinecraftConfiguration,
                      dfa: pythomata.dfa.DFA,
                      goal_reward: float = 1000.0,
                      reward_shaping: bool = True) -> gym.Env:
    """
    Make the Breakout environment.

    :param config: the Minecraft configuration.
    :param dfa: the automaton that constitutes the goal.
    :param goal_reward: the reward associated to the goal.
    :param reward_shaping: apply automata-based reward shaping.
    :return: the Gym environment.
    """
    tg = TemporalGoal(automaton=dfa,
                      reward=goal_reward,
                      reward_shaping=reward_shaping,
                      zero_terminal_state=False,
                      extract_fluents=extract_minecraft_fluents)

    env = MinecraftTemporalWrapper(
        MinecraftLearnerWrapper(config), [tg],
        combine=lambda obs, qs: tuple((*obs, *qs)),
        feature_extractor=lambda obs, action:
        (obs["x"], obs["y"], obs["theta"])
        if config.action_space_type == ActionSpaceType.DIFFERENTIAL else
        (obs["x"], obs["y"]))

    return env
Ejemplo n.º 5
0
    def __init__(
        self,
        colors: Sequence[str],
        fluents: AbstractFluents,
        reward: Optional[float] = 1.0,
        save_to: Optional[str] = None,
    ):
        """Initialize.

        :param colors: a sequence of colors, these are the positions that
            the agent must reach with the correct order.
        :param fluents: a fluents evaluator. All colors must be fluents, so
            that we know when the agent is in each position.
        :param reward: reward suplied when reward is reached.
        :param save_to: path where the automaton should be exported.
        """
        # Check
        if not all((color in fluents.fluents for color in colors)):
            raise ValueError(
                "Some color has no associated fluent to evaluate it")

        # Make automaton for this sequence
        automaton = self._make_sapientino_automaton(colors)

        # Super
        TemporalGoal.__init__(
            self,
            formula=None,  # Provinding automaton directly
            reward=reward,
            automaton=automaton,
            labels=set(colors),
            extract_fluents=fluents.evaluate,
            reward_shaping=False,
            zero_terminal_state=False,
        )

        # Maybe save
        if save_to:
            self.automaton.to_graphviz().render(save_to)
Ejemplo n.º 6
0
def make_env(config: MinecraftConfiguration,
             output_dir,
             goal_reward: float = 1000.0,
             reward_shaping: bool = True) -> gym.Env:
    """
    Make the Minecraft environment.

    :param config: the Minecraft configuration.
    :param output_dir: the path to the output directory.
    :param reward_shaping: apply automata-based reward shaping.
    :return: the Gym environment.
    """
    temporal_goals = []
    for t in config.tasks:
        formula_string = make_goal(t)
        print("Formula: {}".format(formula_string))
        formula = LDLfParser()(formula_string)
        tg = TemporalGoal(
            formula=formula,
            reward=goal_reward,
            # labels=LABELS,
            reward_shaping=reward_shaping,
            zero_terminal_state=False,
            extract_fluents=extract_minecraft_fluents)
        temporal_goals.append(tg)

        tg._automaton.to_dot(
            os.path.join(output_dir, "true_automaton_{}".format(t.name)))
        print("Original automaton at {}".format(
            os.path.join(output_dir, "true_automaton_{}.svg".format(t.name))))

    env = MinecraftTemporalWrapper(
        MinecraftExpertWrapper(config),
        temporal_goals,
        combine=lambda obs, qs: tuple((*obs, *qs)),
        feature_extractor=lambda obs, action:
        (obs["x"], obs["y"], obs["theta"])
        if config.action_space_type == ActionSpaceType.DIFFERENTIAL else
        (obs["x"], obs["y"]))

    positive_traces_path = Path(output_dir, "positive_traces.txt")
    negative_traces_path = Path(output_dir, "negative_traces.txt")
    env = TemporalGoalWrapperLogTraces(env, extract_minecraft_fluents,
                                       positive_traces_path,
                                       negative_traces_path)

    return env
def make_env(config: BreakoutConfiguration,
             output_dir,
             goal_reward: float = 1000.0,
             reward_shaping: bool = True) -> gym.Env:
    """
    Make the Breakout environment.

    :param config: the Breakout configuration.
    :param output_dir: the path to the output directory.
    :param reward_shaping: apply automata-based reward shaping.
    :return: the Gym environment.
    """
    unwrapped_env = BreakoutExpertWrapper(config)

    formula_string = make_goal(config.brick_cols)
    formula = LDLfParser()(formula_string)
    labels = {"c{}".format(i) for i in range(config.brick_cols)}
    tg = TemporalGoal(formula=formula,
                      reward=goal_reward,
                      labels=labels,
                      reward_shaping=reward_shaping,
                      zero_terminal_state=False,
                      extract_fluents=extract_breakout_fluents)

    print("Formula: {}".format(formula_string))
    tg._automaton.to_dot(os.path.join(output_dir, "true_automaton"))
    print("Original automaton at {}".format(
        os.path.join(output_dir, "true_automaton.svg")))

    env = TemporalGoalWrapper(
        unwrapped_env,
        [tg],
        combine=lambda obs, qs: tuple((*obs, *qs)),
        feature_extractor=(
            lambda obs, action: (
                obs["paddle_x"],  #obs["paddleup_x"],
            )))

    positive_traces_path = Path(output_dir, "positive_traces.txt")
    negative_traces_path = Path(output_dir, "negative_traces.txt")
    env = TemporalGoalWrapperLogTraces(env, extract_breakout_fluents,
                                       positive_traces_path,
                                       negative_traces_path)

    return env
Ejemplo n.º 8
0
def make_env(config: SapientinoConfiguration,
             output_dir,
             goal_reward: float = 1000.0,
             reward_shaping: bool = True) -> gym.Env:
    """
    Make the Breakout environment.

    :param config: the Breakout configuration.
    :param output_dir: the path to the output directory.
    :param reward_shaping: apply automata-based reward shaping.
    :return: the Gym environment.
    """

    formula_string = make_goal()
    print("Formula: {}".format(formula_string))
    formula = LDLfParser()(formula_string)
    tg = TemporalGoal(formula=formula,
                      reward=goal_reward,
                      labels={color
                              for color in colors}.union({"bad_beep"}),
                      reward_shaping=reward_shaping,
                      zero_terminal_state=False,
                      extract_fluents=extract_sapientino_fluents)

    tg._automaton.to_dot(os.path.join(output_dir, "true_automaton"))
    print("Original automaton at {}".format(
        os.path.join(output_dir, "true_automaton.svg")))

    env = SapientinoTemporalWrapper(
        SapientinoWrapper(SapientinoDictSpace(config)), [tg],
        combine=lambda obs, qs: tuple((*obs, *qs)),
        feature_extractor=lambda obs, action: (obs["x"], obs["y"], obs["th"])
        if config.differential else (obs["x"], obs["y"]))

    positive_traces_path = Path(output_dir, "positive_traces.txt")
    negative_traces_path = Path(output_dir, "negative_traces.txt")
    env = TemporalGoalWrapperLogTraces(env, extract_sapientino_fluents,
                                       positive_traces_path,
                                       negative_traces_path)

    return env