Beispiel #1
0
def replay_entrypoint(
    save_directory,
    id,
    wrapped_agent_locator,
    wrapped_agent_params=None,
    read=False,
):
    if wrapped_agent_params is None:
        wrapped_agent_params = {}
    from .replay_agent import ReplayAgent

    internal_spec = make(wrapped_agent_locator, **wrapped_agent_params)
    global social_index
    global replay_save_dir
    global replay_read
    spec = AgentSpec(
        interface=internal_spec.interface,
        agent_params={
            "save_directory": replay_save_dir,
            "id": f"{id}_{social_index}",
            "internal_spec": internal_spec,
            "wrapped_agent_params": wrapped_agent_params,
            "read": replay_read,
        },
        agent_builder=ReplayAgent,
    )
    social_index += 1
    return spec
Beispiel #2
0
 def _agent_spec_callback(self, ros_agent_spec: AgentSpec):
     assert (len(ros_agent_spec.tasks) == 1
             ), "more than 1 task per agent is not yet supported"
     task = ros_agent_spec.tasks[0]
     task_params = json.loads(task.params_json) if task.params_json else {}
     task_version = task.task_ver or "latest"
     agent_locator = f"{self._zoo_module}:{task.task_ref}-{task_version}"
     agent_spec = None
     try:
         agent_spec = registry.make(agent_locator, **task_params)
     except ImportError as ie:
         rospy.logerr(
             f"Unable to locate agent with locator={agent_locator}:  {ie}")
     if not agent_spec:
         rospy.logwarn(
             f"got unknown task_ref '{task.task_ref}' in AgentSpec message with params='{task.param_json}'.  ignoring."
         )
         return
     if (ros_agent_spec.end_pose.position.x != 0.0
             or ros_agent_spec.end_pose.position.y != 0.0):
         goal = PositionalGoal(
             (
                 ros_agent_spec.end_pose.position.x,
                 ros_agent_spec.end_pose.position.y,
             ),
             ros_agent_spec.veh_length,
         )
     else:
         goal = EndlessGoal()
     mission = Mission(
         start=Start.from_pose(
             ROSDriver._pose_from_ros(ros_agent_spec.start_pose)),
         goal=goal,
         # TODO:  how to prevent them from spawning on top of another existing vehicle? (see how it's done in SUMO traffic)
         entry_tactic=default_entry_tactic(ros_agent_spec.start_speed),
         vehicle_spec=VehicleSpec(
             veh_id=f"veh_for_agent_{ros_agent_spec.agent_id}",
             veh_config_type=ROSDriver._decode_vehicle_type(
                 ros_agent_spec.veh_type),
             dimensions=Dimensions(
                 ros_agent_spec.veh_length,
                 ros_agent_spec.veh_width,
                 ros_agent_spec.veh_height,
             ),
         ),
     )
     with self._reset_lock:
         if (ros_agent_spec.agent_id in self._agents
                 or ros_agent_spec.agent_id in self._agents_to_add):
             rospy.logwarn(
                 f"trying to add new agent with existing agent_id '{ros_agent_spec.agent_id}'.  ignoring."
             )
             return
         self._agents_to_add[ros_agent_spec.agent_id] = (agent_spec,
                                                         mission)
Beispiel #3
0
    def test_check_agents_from_pool(self):
        seed = 2
        policy = ""

        with open("ultra/agent_pool.json") as f:
            data = json.load(f)
            for policy in data["agents"].keys():
                policy_path = data["agents"][policy]["path"]
                policy_locator = data["agents"][policy]["locator"]
                policy_class = str(policy_path) + ":" + str(policy_locator)
                try:
                    spec = make(locator=policy_class)
                    agent = spec.build_agent()
                except ImportError as err:
                    self.assertTrue(False)
Beispiel #4
0
def prepare_test_env_agent(headless=True):
    timestep_sec = 0.1
    # [throttle, brake, steering]
    policy_class = "ultra.baselines.ppo:ppo-v0"
    spec = make(locator=policy_class)
    env = gym.make(
        "ultra.env:ultra-v0",
        agent_specs={AGENT_ID: spec},
        scenario_info=("00", "easy"),
        headless=headless,
        timestep_sec=timestep_sec,
        seed=seed,
    )
    agent = spec.build_agent()
    return agent, env
Beispiel #5
0
def build_agents(policy_classes, policy_ids, max_episode_steps):
    # Make agent_ids in the form of 000, 001, ..., 010, 011, ..., 999, 1000, ...;
    # or use the provided policy_ids if available.
    agent_ids = ([
        "0" * max(0, 3 - len(str(i))) + str(i)
        for i in range(len(policy_classes))
    ] if not policy_ids else policy_ids)
    # Ensure there is an ID for each policy, and a policy for each ID.
    assert len(agent_ids) == len(policy_classes), (
        "The number of agent IDs provided ({}) must be equal to "
        "the number of policy classes provided ({}).".format(
            len(agent_ids), len(policy_classes)))

    # Assign the policy classes to their associated ID.
    agent_classes = {
        agent_id: policy_class
        for agent_id, policy_class in zip(agent_ids, policy_classes)
    }
    # Create the agent specifications matched with their associated ID.
    agent_specs = {
        agent_id: make(locator=policy_class,
                       max_episode_steps=max_episode_steps)
        for agent_id, policy_class in agent_classes.items()
    }
    # Create the agents matched with their associated ID.
    agents = {
        agent_id: agent_spec.build_agent()
        for agent_id, agent_spec in agent_specs.items()
    }

    # Define an 'etag' for this experiment's data directory based off policy_classes.
    # E.g. From a ["ultra.baselines.dqn:dqn-v0", "ultra.baselines.ppo:ppo-v0"]
    # policy_classes list, transform it to an etag of "dqn-v0:ppo-v0".
    etag = ":".join(
        [policy_class.split(":")[-1] for policy_class in policy_classes])

    return agent_ids, agent_classes, agent_specs, agents, etag
Beispiel #6
0
 def test_agent_is_instance_policy(self):
     policy_class = "ultra.baselines.sac:sac-v0"
     spec = make(locator=policy_class)
     agent = spec.build_agent()
     self.assertIsInstance(agent, SACPolicy)
Beispiel #7
0
 def test_spec_is_instance_agentspec(self):
     policy_class = "ultra.baselines.sac:sac-v0"
     spec = make(locator=policy_class)
     self.assertIsInstance(spec, AgentSpec)
Beispiel #8
0
def train(
    scenario_info,
    num_episodes,
    policy_classes,
    max_episode_steps,
    eval_info,
    timestep_sec,
    headless,
    seed,
    log_dir,
    policy_ids=None,
):
    torch.set_num_threads(1)
    total_step = 0
    finished = False

    # Make agent_ids in the form of 000, 001, ..., 010, 011, ..., 999, 1000, ...;
    # or use the provided policy_ids if available.
    agent_ids = (
        ["0" * max(0, 3 - len(str(i))) + str(i) for i in range(len(policy_classes))]
        if not policy_ids
        else policy_ids
    )
    # Ensure there is an ID for each policy, and a policy for each ID.
    assert len(agent_ids) == len(policy_classes), (
        "The number of agent IDs provided ({}) must be equal to "
        "the number of policy classes provided ({}).".format(
            len(agent_ids), len(policy_classes)
        )
    )

    # Assign the policy classes to their associated ID.
    agent_classes = {
        agent_id: policy_class
        for agent_id, policy_class in zip(agent_ids, policy_classes)
    }
    # Create the agent specifications matched with their associated ID.
    agent_specs = {
        agent_id: make(locator=policy_class, max_episode_steps=max_episode_steps)
        for agent_id, policy_class in agent_classes.items()
    }
    # Create the agents matched with their associated ID.
    agents = {
        agent_id: agent_spec.build_agent()
        for agent_id, agent_spec in agent_specs.items()
    }

    # Create the environment.
    env = gym.make(
        "ultra.env:ultra-v0",
        agent_specs=agent_specs,
        scenario_info=scenario_info,
        headless=headless,
        timestep_sec=timestep_sec,
        seed=seed,
    )

    # Define an 'etag' for this experiment's data directory based off policy_classes.
    # E.g. From a ["ultra.baselines.dqn:dqn-v0", "ultra.baselines.ppo:ppo-v0"]
    # policy_classes list, transform it to an etag of "dqn-v0:ppo-v0".
    etag = ":".join([policy_class.split(":")[-1] for policy_class in policy_classes])

    for episode in episodes(num_episodes, etag=etag, log_dir=log_dir):
        # Reset the environment and retrieve the initial observations.
        observations = env.reset()
        dones = {"__all__": False}
        infos = None
        episode.reset()
        experiment_dir = episode.experiment_dir

        # Save relevant agent metadata.
        if not os.path.exists(f"{experiment_dir}/agent_metadata.pkl"):
            if not os.path.exists(experiment_dir):
                os.makedirs(experiment_dir)
            with open(f"{experiment_dir}/agent_metadata.pkl", "wb") as metadata_file:
                dill.dump(
                    {
                        "agent_ids": agent_ids,
                        "agent_classes": agent_classes,
                        "agent_specs": agent_specs,
                    },
                    metadata_file,
                    pickle.HIGHEST_PROTOCOL,
                )

        while not dones["__all__"]:
            # Break if any of the agent's step counts is 1000000 or greater.
            if any([episode.get_itr(agent_id) >= 1000000 for agent_id in agents]):
                finished = True
                break

            # Perform the evaluation check.
            evaluation_check(
                agents=agents,
                agent_ids=agent_ids,
                policy_classes=agent_classes,
                episode=episode,
                log_dir=log_dir,
                max_episode_steps=max_episode_steps,
                **eval_info,
                **env.info,
            )

            # Request and perform actions on each agent that received an observation.
            actions = {
                agent_id: agents[agent_id].act(observation, explore=True)
                for agent_id, observation in observations.items()
            }
            next_observations, rewards, dones, infos = env.step(actions)

            # Active agents are those that receive observations in this step and the next
            # step. Step each active agent (obtaining their network loss if applicable).
            active_agent_ids = observations.keys() & next_observations.keys()
            loss_outputs = {
                agent_id: agents[agent_id].step(
                    state=observations[agent_id],
                    action=actions[agent_id],
                    reward=rewards[agent_id],
                    next_state=next_observations[agent_id],
                    done=dones[agent_id],
                    info=infos[agent_id],
                )
                for agent_id in active_agent_ids
            }

            # Record the data from this episode.
            episode.record_step(
                agent_ids_to_record=active_agent_ids,
                infos=infos,
                rewards=rewards,
                total_step=total_step,
                loss_outputs=loss_outputs,
            )

            # Update variables for the next step.
            total_step += 1
            observations = next_observations

        # Normalize the data and record this episode on tensorboard.
        episode.record_episode()
        episode.record_tensorboard()

        if finished:
            break

    env.close()
Beispiel #9
0
def evaluate(
    experiment_dir,
    seed,
    agent_ids,
    policy_classes,
    checkpoint_dirs,
    scenario_info,
    num_episodes,
    max_episode_steps,
    headless,
    timestep_sec,
    log_dir,
    eval_mode=True,
):
    torch.set_num_threads(1)

    # Create the agent specifications matched with their associated ID.
    agent_specs = {
        agent_id: make(
            locator=policy_classes[agent_id],
            checkpoint_dir=checkpoint_dirs[agent_id],
            experiment_dir=experiment_dir,
            max_episode_steps=max_episode_steps,
            agent_id=agent_id,
        )
        for agent_id in agent_ids
    }

    # Create the environment with the specified agents.
    env = gym.make(
        "ultra.env:ultra-v0",
        agent_specs=agent_specs,
        scenario_info=scenario_info,
        headless=headless,
        timestep_sec=timestep_sec,
        seed=seed,
        eval_mode=eval_mode,
    )

    # Build each agent from its specification.
    agents = {
        agent_id: agent_spec.build_agent()
        for agent_id, agent_spec in agent_specs.items()
    }

    # A dictionary to hold the evaluation data for each agent.
    summary_log = {agent_id: LogInfo() for agent_id in agent_ids}

    # Define an 'etag' for this experiment's data directory based off policy_classes.
    # E.g. From a ["ultra.baselines.dqn:dqn-v0", "ultra.baselines.ppo:ppo-v0"]
    # policy_classes list, transform it to an etag of "dqn-v0:ppo-v0".
    etag = ":".join(
        [policy_class.split(":")[-1] for policy_class in policy_classes])

    for episode in episodes(num_episodes, etag=etag, log_dir=log_dir):
        # Reset the environment and retrieve the initial observations.
        observations = env.reset()
        dones = {"__all__": False}
        infos = None
        episode.reset(mode="Evaluation")

        while not dones["__all__"]:
            # Get and perform the available agents' actions.
            actions = {
                agent_id: agents[agent_id].act(observation, explore=False)
                for agent_id, observation in observations.items()
            }
            observations, rewards, dones, infos = env.step(actions)

            # Record the data from this episode.
            episode.record_step(agent_ids_to_record=infos.keys(),
                                infos=infos,
                                rewards=rewards)

        episode.record_episode()

        for agent_id, agent_data in episode.info[episode.active_tag].items():
            for key, value in agent_data.data.items():
                if not isinstance(value, (list, tuple, np.ndarray)):
                    summary_log[agent_id].data[key] += value

    # Normalize by the number of evaluation episodes.
    for agent_id, agent_data in summary_log.items():
        for key, value in agent_data.data.items():
            if not isinstance(value, (list, tuple, np.ndarray)):
                summary_log[agent_id].data[key] /= num_episodes

    env.close()

    return summary_log
Beispiel #10
0
def evaluate(
    experiment_dir,
    seed,
    agent_id,
    policy_class,
    itr_count,
    checkpoint_dir,
    scenario_info,
    num_episodes,
    headless,
    timestep_sec,
):

    torch.set_num_threads(1)
    spec = make(
        locator=policy_class,
        checkpoint_dir=checkpoint_dir,
        experiment_dir=experiment_dir,
    )

    env = gym.make(
        "ultra.env:ultra-v0",
        agent_specs={agent_id: spec},
        scenario_info=scenario_info,
        headless=headless,
        timestep_sec=timestep_sec,
        seed=seed,
        eval_mode=True,
    )

    agent = spec.build_agent()
    summary_log = LogInfo()
    logs = []

    for episode in episodes(num_episodes):
        observations = env.reset()
        state = observations[agent_id]
        dones, infos = {"__all__": False}, None

        episode.reset(mode="Evaluation")
        while not dones["__all__"]:
            action = agent.act(state, explore=False)
            observations, rewards, dones, infos = env.step({agent_id: action})

            next_state = observations[agent_id]

            state = next_state

            episode.record_step(agent_id=agent_id, infos=infos, rewards=rewards)

        episode.record_episode()
        logs.append(episode.info[episode.active_tag].data)

        for key, value in episode.info[episode.active_tag].data.items():
            if not isinstance(value, (list, tuple, np.ndarray)):
                summary_log.data[key] += value

    for key, val in summary_log.data.items():
        if not isinstance(val, (list, tuple, np.ndarray)):
            summary_log.data[key] /= num_episodes

    env.close()

    return summary_log
Beispiel #11
0
def train(
    scenario_info,
    num_episodes,
    max_episode_steps,
    policy_class,
    eval_info,
    timestep_sec,
    headless,
    seed,
    log_dir,
):
    torch.set_num_threads(1)
    total_step = 0
    finished = False

    AGENT_ID = "007"

    spec = make(locator=policy_class, max_episode_steps=max_episode_steps)
    env = gym.make(
        "ultra.env:ultra-v0",
        agent_specs={AGENT_ID: spec},
        scenario_info=scenario_info,
        headless=headless,
        timestep_sec=timestep_sec,
        seed=seed,
    )

    agent = spec.build_agent()

    for episode in episodes(num_episodes, etag=policy_class, log_dir=log_dir):
        observations = env.reset()
        state = observations[AGENT_ID]
        dones, infos = {"__all__": False}, None
        episode.reset()
        experiment_dir = episode.experiment_dir

        # save entire spec [ policy_params, reward_adapter, observation_adapter]
        if not os.path.exists(f"{experiment_dir}/spec.pkl"):
            if not os.path.exists(experiment_dir):
                os.makedirs(experiment_dir)
            with open(f"{experiment_dir}/spec.pkl", "wb") as spec_output:
                dill.dump(spec, spec_output, pickle.HIGHEST_PROTOCOL)

        while not dones["__all__"]:
            if episode.get_itr(AGENT_ID) >= 1000000:
                finished = True
                break
            evaluation_check(
                agent=agent,
                agent_id=AGENT_ID,
                policy_class=policy_class,
                episode=episode,
                log_dir=log_dir,
                max_episode_steps=max_episode_steps,
                **eval_info,
                **env.info,
            )
            action = agent.act(state, explore=True)
            observations, rewards, dones, infos = env.step({AGENT_ID: action})
            next_state = observations[AGENT_ID]

            loss_output = agent.step(
                state=state,
                action=action,
                reward=rewards[AGENT_ID],
                next_state=next_state,
                done=dones[AGENT_ID],
            )
            episode.record_step(
                agent_id=AGENT_ID,
                infos=infos,
                rewards=rewards,
                total_step=total_step,
                loss_output=loss_output,
            )
            total_step += 1
            state = next_state

        episode.record_episode()
        episode.record_tensorboard(agent_id=AGENT_ID)
        if finished:
            break

    env.close()
Beispiel #12
0
 def to_agent_spec(self) -> AgentSpec:
     return make(locator=self.agent_locator, **self.policy_kwargs)
Beispiel #13
0
 def to_agent_spec(self) -> AgentSpec:
     """Generate an agent spec."""
     return make(locator=self.agent_locator, **self.policy_kwargs)
Beispiel #14
0
def tune_train(
    config,
    scenario_info,
    num_episodes,
    policy_classes,
    max_episode_steps,
    save_rate,
    timestep_sec,
    headless,
    seed,
    log_dir,
    metric,
):
    torch.set_num_threads(1)
    total_step = 0
    finished = False

    assert len(
        policy_classes) == 1, "Can only tune with single agent experiments."

    # Make agent_ids in the form of 000, 001, ..., 010, 011, ..., 999, 1000, ...
    agent_ids = [
        "0" * max(0, 3 - len(str(i))) + str(i)
        for i in range(len(policy_classes))
    ]
    # Assign the policy classes to their associated ID.
    agent_classes = {
        agent_id: policy_class
        for agent_id, policy_class in zip(agent_ids, policy_classes)
    }
    # Create the agent specifications matched with their associated ID.
    agent_specs = {
        agent_id: make(
            locator=policy_class,
            agent_params=config,
            max_episode_steps=max_episode_steps,
        )
        for agent_id, policy_class in agent_classes.items()
    }
    # Create the agents matched with their associated ID.
    agents = {
        agent_id: agent_spec.build_agent()
        for agent_id, agent_spec in agent_specs.items()
    }

    # Create the environment.
    env = gym.make(
        "ultra.env:ultra-v0",
        agent_specs=agent_specs,
        scenario_info=scenario_info,
        headless=headless,
        timestep_sec=timestep_sec,
        seed=seed,
    )

    # Define an 'etag' for this experiment's data directory based off policy_classes.
    # E.g. From a ["ultra.baselines.dqn:dqn-v0", "ultra.baselines.ppo:ppo-v0"]
    # policy_classes list, transform it to an etag of "dqn-v0:ppo-v0".
    etag = ":".join(
        [policy_class.split(":")[-1] for policy_class in policy_classes])

    for episode in episodes(num_episodes, etag=etag, log_dir=log_dir):
        # Reset the environment and retrieve the initial observations.
        observations = env.reset()
        dones = {"__all__": False}
        infos = None
        episode.reset()
        experiment_dir = episode.experiment_dir

        # Save relevant agent metadata.
        if not os.path.exists(f"{experiment_dir}/agent_metadata.pkl"):
            if not os.path.exists(experiment_dir):
                os.makedirs(experiment_dir)
            with open(f"{experiment_dir}/agent_metadata.pkl",
                      "wb") as metadata_file:
                dill.dump(
                    {
                        "agent_ids": agent_ids,
                        "agent_classes": agent_classes,
                        "agent_specs": agent_specs,
                    },
                    metadata_file,
                    pickle.HIGHEST_PROTOCOL,
                )

        while not dones["__all__"]:
            # Break if any of the agent's step counts is 1000000 or greater.
            if any(
                [episode.get_itr(agent_id) >= 1000000 for agent_id in agents]):
                finished = True
                break

            # Request and perform actions on each agent that received an observation.
            actions = {
                agent_id: agents[agent_id].act(observation, explore=True)
                for agent_id, observation in observations.items()
            }
            next_observations, rewards, dones, infos = env.step(actions)

            # Active agents are those that receive observations in this step and the next
            # step. Step each active agent (obtaining their network loss if applicable).
            active_agent_ids = observations.keys() & next_observations.keys()
            loss_outputs = {
                agent_id: agents[agent_id].step(
                    state=observations[agent_id],
                    action=actions[agent_id],
                    reward=rewards[agent_id],
                    next_state=next_observations[agent_id],
                    done=dones[agent_id],
                    info=infos[agent_id],
                )
                for agent_id in active_agent_ids
            }

            # Record the data from this episode.
            episode.record_step(
                agent_ids_to_record=active_agent_ids,
                infos=infos,
                rewards=rewards,
                total_step=total_step,
                loss_outputs=loss_outputs,
            )

            # Update variables for the next step.
            total_step += 1
            observations = next_observations

        # Normalize the data and record this episode on tensorboard.
        episode.record_episode()
        episode.record_tensorboard(recording_step=episode.index)

        # Save the agent if we have reached its save rate.
        if (episode.index + 1) % save_rate == 0:
            for agent_id in agent_ids:
                checkpoint_directory = episode.checkpoint_dir(
                    agent_id, episode.index)
                agents[agent_id].save(checkpoint_directory)

        # Average the metric over the number of agents (1 agent).
        tune_value = sum([
            episode.info[episode.active_tag][agent_id].data[metric]
            for agent_id in agent_ids
        ]) / len(agent_ids)
        tune.report(**{metric: tune_value})

        if finished:
            break

    env.close()