Ejemplo n.º 1
0
def run(agent_spec, callback):
    AGENT_ID = "Agent-007"
    env = gym.make(
        "smarts.env:hiway-v0",
        scenarios=["scenarios/intersections/2lane"],
        agent_specs={AGENT_ID: agent_spec},
        headless=True,
        timestep_sec=0.01,
        seed=42,
    )
    i = 0
    for episode in episodes(n=EPISODE_COUNT):
        agent = agent_spec.build_agent()
        observations = env.reset()

        episode.record_scenario(env.scenario_log)

        dones = {"__all__": False}
        while not dones["__all__"]:
            agent_obs = observations[AGENT_ID]
            agent_action = agent.act(agent_obs)
            observations, rewards, dones, infos = env.step({AGENT_ID: agent_action})

            episode.record_step(observations, rewards, dones, infos)

            if i % CAPTURE_STEP == 0:
                callback(rewards, agent_obs, dones, int(i / CAPTURE_STEP))
            i += 1

    env.close()
Ejemplo n.º 2
0
def main(scenarios,
         sim_name,
         headless,
         num_episodes,
         seed,
         max_episode_steps=None):
    env = gym.make(
        "smarts.env:hiway-v0",
        scenarios=scenarios,
        agent_specs={},
        sim_name=sim_name,
        headless=headless,
        sumo_headless=True,
        visdom=False,
        seed=seed,
        timestep_sec=0.1,
    )

    if max_episode_steps is None:
        max_episode_steps = 1000

    for episode in episodes(n=num_episodes):
        env.reset()
        episode.record_scenario(env.scenario_log)

        for _ in range(max_episode_steps):
            env.step({})
            episode.record_step({}, {}, {}, {})

    env.close()
Ejemplo n.º 3
0
def main(scenarios, headless, num_episodes, max_episode_steps=None):
    agent_spec = AgentSpec(
        interface=AgentInterface.from_type(
            AgentType.LanerWithSpeed, max_episode_steps=max_episode_steps
        ),
        agent_builder=ChaseViaPointsAgent,
    )

    env = gym.make(
        "smarts.env:hiway-v0",
        scenarios=scenarios,
        agent_specs={"SingleAgent": agent_spec},
        headless=headless,
        sumo_headless=True,
    )

    # Convert `env.step()` and `env.reset()` from multi-agent interface to
    # single-agent interface.
    env = SingleAgent(env=env)

    for episode in episodes(n=num_episodes):
        agent = agent_spec.build_agent()
        observation = env.reset()
        episode.record_scenario(env.scenario_log)

        done = False
        while not done:
            agent_action = agent.act(observation)
            observation, reward, done, info = env.step(agent_action)
            episode.record_step(observation, reward, done, info)

    env.close()
Ejemplo n.º 4
0
def test_social_agents(env, agent_spec):
    for episode in episodes(n=MAX_EPISODES):
        agent = agent_spec.build_agent()
        observations = env.reset()
        episode.record_scenario(env.scenario_log)

        dones = {"__all__": False}
        while not dones["__all__"]:
            obs = observations[AGENT_ID]
            observations, rewards, dones, infos = env.step({AGENT_ID: agent.act(obs)})
            episode.record_step(observations, rewards, dones, infos)

            assert SOCIAL_AGENT_ID not in observations
            assert SOCIAL_AGENT_ID not in dones

            # Reward is currently the delta in distance travelled by this agent.
            # We want to make sure that this is infact a delta and not total distance
            # travelled since this bug has appeared a few times.
            #
            # The way to verify this is by making sure the reward does not grow without bounds
            assert -3 < rewards[AGENT_ID] < 3

    assert episode.index == (
        MAX_EPISODES - 1
    ), "Simulation must cycle through to the final episode"
Ejemplo n.º 5
0
def train(args, agent_specs, eval_interval: int = None):
    scenario = Path(args.scenario).absolute()
    eval_scenario = Path(args.eval_scenario).absolute()
    env = gym.make(
        "smarts.env:hiway-v0",
        scenarios=[scenario],
        agent_specs=agent_specs,
        headless=False,
        visdom=False,
        timestep_sec=0.1,
    )

    agents = {_id: agent_spec.build_agent() for _id, agent_spec in agent_specs.items()}

    for episode in episodes(n=50):
        observations = env.reset()
        episode.record_scenario(env.scenario_log)

        dones = {"__all__": False}
        while not dones["__all__"]:
            agent_actions = {
                _id: agents[_id].act(obs) for _id, obs in observations.items()
            }
            observations, rewards, dones, infos = env.step(agent_actions)
            episode.record_step(observations, rewards, dones, infos)

        if eval_interval and episode.index % eval_interval == 0:
            # Block for evaluation
            ray.wait([evaluate.remote(episode.index, eval_scenario, agent_specs)])
            # Optionally, instead, you can run your evaluation concurrently by omitting the `ray.wait([..])`.
            #
            #   evaluate.remote(episode.index, args.eval_scenario, agent)

    env.close()
Ejemplo n.º 6
0
def main(scenarios, sim_name, headless, num_episodes, seed, max_episode_steps=None):
    agent_spec = AgentSpec(
        interface=AgentInterface.from_type(
            AgentType.LanerWithSpeed, max_episode_steps=max_episode_steps
        ),
        agent_builder=ChaseViaPointsAgent,
    )

    env = gym.make(
        "smarts.env:hiway-v0",
        scenarios=scenarios,
        agent_specs={AGENT_ID: agent_spec},
        sim_name=sim_name,
        headless=headless,
        visdom=False,
        timestep_sec=0.1,
        sumo_headless=True,
        seed=seed,
        # zoo_addrs=[("10.193.241.236", 7432)], # Sample server address (ip, port), to distribute social agents in remote server.
        # envision_record_data_replay_path="./data_replay",
    )

    for episode in episodes(n=num_episodes):
        agent = agent_spec.build_agent()
        observations = env.reset()
        episode.record_scenario(env.scenario_log)

        dones = {"__all__": False}
        while not dones["__all__"]:
            agent_obs = observations[AGENT_ID]
            agent_action = agent.act(agent_obs)
            observations, rewards, dones, infos = env.step({AGENT_ID: agent_action})
            episode.record_step(observations, rewards, dones, infos)

    env.close()
Ejemplo n.º 7
0
def main(
    scenarios,
    headless,
    num_episodes,
    seed,
):
    agent_spec = AgentSpec(
        interface=AgentInterface.from_type(
            AgentType.StandardWithAbsoluteSteering, max_episode_steps=3000),
        policy_builder=HumanKeyboardPolicy,
    )

    env = gym.make(
        "smarts.env:hiway-v0",
        scenarios=scenarios,
        agent_specs={AGENT_ID: agent_spec},
        headless=headless,
        timestep_sec=0.1,
        seed=seed,
    )

    for episode in episodes(n=num_episodes):
        agent = agent_spec.build_agent()
        observations = env.reset()
        episode.record_scenario(env.scenario_log)

        dones = {"__all__": False}
        while not dones["__all__"]:
            agent_obs = observations[AGENT_ID]
            agent_action = agent.act(agent_obs)
            observations, rewards, dones, infos = env.step(
                {AGENT_ID: agent_action})
            episode.record_step(observations, rewards, dones, infos)

    env.close()
Ejemplo n.º 8
0
def test_hiway_env(env, agent_spec):
    for episode in episodes(n=MAX_EPISODES):
        agent = agent_spec.build_agent()
        observations = env.reset()
        episode.record_scenario(env.scenario_log)

        dones = {"__all__": False}
        while not dones["__all__"]:
            obs = observations[AGENT_ID]
            observations, rewards, dones, infos = env.step({AGENT_ID: agent.act(obs)})
            episode.record_step(observations, rewards, dones, infos)

            assert (
                OBSERVATION_EXPECTED in observations[AGENT_ID]
            ), "Failed to apply observation adapter"

            assert (
                REWARD_EXPECTED == rewards[AGENT_ID]
            ), "Failed to apply reward adapter"

            assert INFO_EXTRA_KEY in infos[AGENT_ID], "Failed to apply info adapter"

    assert episode.index == (
        MAX_EPISODES - 1
    ), "Simulation must cycle through to the final episode."
Ejemplo n.º 9
0
def run(agent_spec, callback, scenarios, episode_count, capture_step):
    AGENT_ID = "Agent-007"
    env = gym.make(
        "smarts.env:hiway-v0",
        scenarios=[scenarios],
        agent_specs={AGENT_ID: agent_spec},
        headless=True,
        fixed_timestep_sec=0.01,
        seed=42,
    )
    i = 0
    for episode in episodes(n=episode_count):
        agent = agent_spec.build_agent()
        observations = env.reset()

        episode.record_scenario(env.scenario_log)

        dones = {"__all__": False}
        while not dones["__all__"]:
            agent_obs = observations[AGENT_ID]
            agent_action = agent.act(agent_obs)
            observations, rewards, dones, infos = env.step({AGENT_ID: agent_action})

            episode.record_step(observations, rewards, dones, infos)

            if i % capture_step == 0:
                callback(rewards, agent_obs, dones, int(i / capture_step))
            i += 1

    env.close()
Ejemplo n.º 10
0
def main(scenarios, sim_name, headless, num_episodes, seed):
    open_agent_spec = open_agent.entrypoint(debug=False, aggressiveness=3)
    env = gym.make(
        "smarts.env:hiway-v0",
        scenarios=scenarios,
        agent_specs={AGENT_ID: open_agent_spec},
        sim_name=sim_name,
        headless=headless,
        visdom=False,
        timestep_sec=0.1,
        sumo_headless=True,
        seed=seed,
        # envision_record_data_replay_path="./data_replay",
    )

    for episode in episodes(n=num_episodes):
        agent = open_agent_spec.build_agent()

        observations = env.reset()
        episode.record_scenario(env.scenario_log)

        dones = {"__all__": False}
        while not dones["__all__"]:
            agent_obs = observations[AGENT_ID]
            agent_action = agent.act(agent_obs)
            observations, rewards, dones, infos = env.step(
                {AGENT_ID: agent_action})
            episode.record_step(observations, rewards, dones, infos)

        del agent

    env.close()
Ejemplo n.º 11
0
def main(scenarios, headless, num_episodes, seed):
    agent_spec = AgentSpec(
        interface=AgentInterface.from_type(AgentType.Laner, max_episode_steps=None),
        agent_builder=KeepLaneAgent,
    )

    env = gym.make(
        "smarts.env:hiway-v0",
        scenarios=scenarios,
        agent_specs={AGENT_ID: agent_spec},
        headless=headless,
        visdom=False,
        timestep_sec=0.1,
        sumo_headless=True,
        seed=seed,
        # envision_record_data_replay_path="./data_replay",
    )

    for episode in episodes(n=num_episodes):
        agent = agent_spec.build_agent()
        observations = env.reset()
        episode.record_scenario(env.scenario_log)

        dones = {"__all__": False}
        while not dones["__all__"]:
            agent_obs = observations[AGENT_ID]
            agent_action = agent.act(agent_obs)
            observations, rewards, dones, infos = env.step({AGENT_ID: agent_action})
            episode.record_step(observations, rewards, dones, infos)

    env.close()
Ejemplo n.º 12
0
def train(training_scenarios, evaluation_scenarios, sim_name, headless,
          num_episodes, seed):
    agent_params = {"input_dims": 4, "hidden_dims": 7, "output_dims": 3}
    agent_spec = AgentSpec(
        interface=AgentInterface.from_type(AgentType.Standard,
                                           max_episode_steps=5000),
        agent_params=agent_params,
        agent_builder=PyTorchAgent,
        observation_adapter=observation_adapter,
    )

    env = gym.make(
        "smarts.env:hiway-v0",
        scenarios=training_scenarios,
        agent_specs={AGENT_ID: agent_spec},
        sim_name=sim_name,
        headless=headless,
        fixed_timestep_sec=0.1,
        seed=seed,
    )

    steps = 0
    for episode in episodes(n=num_episodes):
        agent = agent_spec.build_agent()
        observations = env.reset()
        episode.record_scenario(env.scenario_log)

        dones = {"__all__": False}
        while not dones["__all__"]:
            agent_obs = observations[AGENT_ID]
            agent_action = agent.act(agent_obs)
            observations, rewards, dones, infos = env.step(
                {AGENT_ID: agent_action})
            episode.record_step(observations, rewards, dones, infos)
            steps += 1

            if steps % 500 == 0:
                print("Evaluating agent")

                # We construct an evaluation agent based on the saved
                # state of the agent in training.
                model_path = tempfile.mktemp()
                agent.save(model_path)

                eval_agent_spec = agent_spec.replace(
                    agent_params=dict(agent_params, model_path=model_path))

                # Remove the call to ray.wait if you want evaluation to run
                # in parallel with training
                ray.wait([
                    evaluate.remote(eval_agent_spec, evaluation_scenarios,
                                    headless, seed)
                ])

    env.close()
Ejemplo n.º 13
0
def main(
    scenarios,
    sim_name,
    headless,
    num_episodes,
    seed,
    auth_key=None,
    max_episode_steps=None,
):
    agent_spec = AgentSpec(
        interface=AgentInterface.from_type(
            AgentType.Laner, max_episode_steps=max_episode_steps),
        agent_builder=ChaseViaPointsAgent,
    )

    env = gym.make(
        "smarts.env:hiway-v0",
        scenarios=scenarios,
        agent_specs={AGENT_ID: agent_spec},
        sim_name=sim_name,
        headless=headless,
        visdom=False,
        timestep_sec=0.1,
        sumo_headless=True,
        seed=seed,
        # zoo_workers=[("143.110.210.157", 7432)], # Distribute social agents across these workers
        auth_key=auth_key,
        # envision_record_data_replay_path="./data_replay",
    )

    for episode in episodes(n=num_episodes):
        agent = agent_spec.build_agent()
        observations = env.reset()
        episode.record_scenario(env.scenario_log)

        dones = {"__all__": False}
        while not dones["__all__"]:
            agent_obs = observations[AGENT_ID]
            agent_action = agent.act(agent_obs)
            observations, rewards, dones, infos = env.step(
                {AGENT_ID: agent_action})
            episode.record_step(observations, rewards, dones, infos)

    env.close()
Ejemplo n.º 14
0
def main(scenarios,
         sim_name,
         headless,
         num_episodes,
         seed,
         max_episode_steps=None):
    agent_specs = {
        agent_id: AgentSpec(
            interface=AgentInterface.from_type(
                AgentType.Laner, max_episode_steps=max_episode_steps),
            agent_builder=KeepLaneAgent,
        )
        for agent_id in AGENT_IDS
    }

    env = gym.make(
        "smarts.env:hiway-v0",
        scenarios=scenarios,
        agent_specs=agent_specs,
        sim_name=sim_name,
        headless=headless,
        seed=seed,
    )

    for episode in episodes(n=num_episodes):
        agents = {
            agent_id: agent_spec.build_agent()
            for agent_id, agent_spec in agent_specs.items()
        }
        observations = env.reset()
        episode.record_scenario(env.scenario_log)

        dones = {"__all__": False}
        while not dones["__all__"]:
            actions = {
                agent_id: agents[agent_id].act(agent_obs)
                for agent_id, agent_obs in observations.items()
            }

            observations, rewards, dones, infos = env.step(actions)
            episode.record_step(observations, rewards, dones, infos)

    env.close()
Ejemplo n.º 15
0
def main(scenarios,
         sim_name,
         headless,
         num_episodes,
         seed,
         max_episode_steps=None):
    agent_spec = AgentSpec(
        interface=AgentInterface.from_type(
            AgentType.LanerWithSpeed, max_episode_steps=max_episode_steps),
        agent_builder=ChaseViaPointsAgent,
    )

    env = gym.make(
        "smarts.env:hiway-v0",
        scenarios=scenarios,
        agent_specs={AGENT_ID: agent_spec},
        sim_name=sim_name,
        headless=headless,
        visdom=False,
        fixed_timestep_sec=0.1,
        sumo_headless=True,
        seed=seed,
        # zoo_addrs=[("10.193.241.236", 7432)], # Sample server address (ip, port), to distribute social agents in remote server.
        # envision_record_data_replay_path="./data_replay",
    )

    # Wrap a single-agent env with SingleAgent wrapper to make `step` and `reset`
    # output compliant with gym spaces.
    env = SingleAgent(env)

    for episode in episodes(n=num_episodes):
        agent = agent_spec.build_agent()
        observation = env.reset()
        episode.record_scenario(env.scenario_log)

        done = False
        while not done:
            agent_action = agent.act(observation)
            observation, reward, done, info = env.step(agent_action)
            episode.record_step(observation, reward, done, info)

    env.close()
Ejemplo n.º 16
0
def main(scenarios, headless, num_episodes, max_episode_steps=None):
    env = gym.make(
        "smarts.env:hiway-v0",
        scenarios=scenarios,
        agent_specs={},
        headless=headless,
        sumo_headless=True,
    )

    if max_episode_steps is None:
        max_episode_steps = 1000

    for episode in episodes(n=num_episodes):
        env.reset()
        episode.record_scenario(env.scenario_log)

        for _ in range(max_episode_steps):
            env.step({})
            episode.record_step({}, {}, {}, {})

    env.close()
Ejemplo n.º 17
0
def test_env_frame_test(scenarios, seed):
    env, agent_spec = env_and_spec(scenarios, seed)
    episode_counter = 0
    for episode in episodes(n=10):
        episode_counter += 1
        agent = agent_spec.build_agent()
        observations = env.reset()
        episode.record_scenario(env.scenario_log)

        dones = {"__all__": False}

        maximum_frame_rate = 0
        minimum_frame_rate = float("inf")
        step_counter = 0
        fps_sum = 0

        while not dones["__all__"]:
            agent_obs = observations[AGENT_ID]
            agent_action = agent.act(agent_obs)
            step_start_time = int(time.time() * 1000)
            observations, rewards, dones, infos = env.step(
                {AGENT_ID: agent_action})
            step_end_time = int(time.time() * 1000)
            delta = step_end_time - step_start_time
            step_fps = round(1000 / delta, 2)
            maximum_frame_rate = max(maximum_frame_rate, step_fps)
            minimum_frame_rate = min(minimum_frame_rate, step_fps)
            fps_sum += step_fps
            test_logger.info(
                f"The time delta at episode {episode_counter}, step {step_counter+1} is {delta} milliseconds which is {step_fps} fps."
            )

            episode.record_step(observations, rewards, dones, infos)
            step_counter += 1
        avg_frame_rate = fps_sum / (step_counter or 1)
        test_logger.info(
            f"Episode {episode_counter}, Minimum fps: {minimum_frame_rate}, Maximum fps: {maximum_frame_rate}, Average fps: {avg_frame_rate}."
        )
        assert (minimum_frame_rate >= 10) and (avg_frame_rate >= 20)
    env.close()
def main(scenarios,
         sim_name,
         headless,
         num_episodes,
         seed,
         max_episode_steps=None):
    agent_spec = AgentSpec(
        interface=AgentInterface(
            waypoints=True,
            action=ActionSpaceType.LaneWithContinuousSpeed,
            neighborhood_vehicles=True,
            rgb=True),
        agent_builder=perpetual_rider_agent.PerpetualRiderAgent,
    )

    env = gym.make(
        "smarts.env:hiway-v0",
        scenarios=scenarios,
        agent_specs={AGENT_ID: agent_spec},
        sim_name=sim_name,
        headless=headless,
        visdom=True,
        timestep_sec=0.1,
        sumo_headless=True,
        seed=seed,
    )

    camera_pose = make_camera_pose()
    camera_intrinsic = np.array(
        (250.0, 0.0, FRAME_WIDTH / 2, 0.0, 250.0, FRAME_HEIGHT / 2)).reshape(
            (1, 2, 3))
    color_rng = RandomColor(10)

    scene_idx = 7001
    end_scene_idx = 8001

    rm(f"{OUTPUT_DIR}")
    mkdir(f"{OUTPUT_DIR}/annotations/")
    mkdir(f"{OUTPUT_DIR}/ego_poses/")
    for episode in episodes(n=num_episodes):
        agent = agent_spec.build_agent()
        observations = env.reset()
        episode.record_scenario(env.scenario_log)
        episode_sim_time_epoch = 0
        episode_sim_time_frame_with_visible_object = 0

        mkdir(f"{OUTPUT_DIR}/frames/scene-{scene_idx:04d}/")
        dones = {"__all__": False}
        while not dones["__all__"]:
            agent_obs = observations[AGENT_ID]
            agent_action = agent.act(agent_obs)
            observations, rewards, dones, infos = env.step(
                {AGENT_ID: agent_action})
            episode.record_step(observations, rewards, dones, infos)
            timestamp = episode.start_time + episode.sim_time

            # at most 18 seconds total
            # if episode.sim_time < 2.:
            #     continue
            if episode.sim_time > 31.99:
                scene_idx += 1
                break

            # 10 seconds for each scene
            if episode.sim_time - episode_sim_time_epoch > 9.99:
                scene_idx += 1
                episode_sim_time_epoch = episode.sim_time
                mkdir(f"{OUTPUT_DIR}/frames/scene-{scene_idx:04d}/")

            # generate ego_poses
            ego_rot_quat = Rotation.from_euler(
                'z', agent_obs.ego_vehicle_state.heading,
                degrees=False).as_quat().flatten()
            ego_translate = agent_obs.ego_vehicle_state.position.flatten()
            ego_pose = ', '.join([
                str(x) for x in np.concatenate((ego_rot_quat,
                                                ego_translate)).tolist()
            ])
            with open(
                    f'{OUTPUT_DIR}/ego_poses/scene-{scene_idx:04d}_ego_pose.csv',
                    'a') as ego_pose_file:
                ego_pose_file.write(f'{timestamp}, {ego_pose}\n')

            # generate frame
            frame_ego = np.zeros((FRAME_HEIGHT, FRAME_WIDTH, 3),
                                 dtype=np.uint8)
            ego_vehicle_pose = np.array(
                (0, 0, agent_obs.ego_vehicle_state.heading,
                 *agent_obs.ego_vehicle_state.position))
            color_rng.reset()
            visible_object_counter = 0
            for object_uid, neighborhood_vehicle_state in enumerate(
                    agent_obs.neighborhood_vehicle_states):
                other_vehicle_pose = np.array(
                    (0, 0, neighborhood_vehicle_state.heading,
                     *neighborhood_vehicle_state.position))
                other_vehicle_size = neighborhood_vehicle_state.bounding_box
                color = color_rng()
                frame_ego, xyxy = project(ego_vehicle_pose, other_vehicle_pose,
                                          camera_pose, camera_intrinsic,
                                          other_vehicle_size, frame_ego, color)
                # generate annotations
                if xyxy is not None:
                    with open(
                            f'{OUTPUT_DIR}/annotations/scene-{scene_idx:04d}_instances_ann.csv',
                            'a') as annotation_file:
                        annotation_file.write(f"{timestamp}, {object_uid}, " +
                                              ", ".join([str(x)
                                                         for x in xyxy]) +
                                              "\n")
                    visible_object_counter += 1

            # remove a scene with large blank

            if visible_object_counter < 1:
                if episode.sim_time - episode_sim_time_frame_with_visible_object > 0.5:
                    break
            else:
                episode_sim_time_frame_with_visible_object = episode.sim_time

        # remove scenes less than 6 seconds
        if episode.sim_time - episode_sim_time_epoch < 9.99:
            rm(f"{OUTPUT_DIR}/frames/scene-{scene_idx:04d}/")
            rm(f'{OUTPUT_DIR}/annotations/scene-{scene_idx:04d}_instances_ann.csv'
               )
            rm(f'{OUTPUT_DIR}/ego_poses/scene-{scene_idx:04d}_ego_pose.csv')

        time.sleep(2)
        if scene_idx >= end_scene_idx:
            break

    env.close()
Ejemplo n.º 19
0
def test(test_scenarios, sim_name, headless, num_episodes, seed):
    config = HyperParameters()
    configProto = init_tensorflow()
    # init env
    agent_spec = AgentSpec(
        # you can custom AgentInterface to control what obs information you need and the action type
        interface=cross_interface,
        # agent_builder=actor,
        # you can custom your observation adapter, reward adapter, info adapter, action adapter and so on.
        observation_adapter=observation_adapter,
        reward_adapter=reward_adapter,
        action_adapter=action_adapter,
    )

    env = gym.make(
        "smarts.env:hiway-v0",
        scenarios=test_scenarios,
        agent_specs={AGENT_ID: agent_spec},
        sim_name=sim_name,
        headless=headless,
        timestep_sec=0.1,
        seed=seed,
    )
    # init nets structure
    if WITH_SOC_MT:
        model_name = "Soc_Mt_TD3Network"
        actor = SocMtActorNetwork(name="actor")
        critic_1 = SocMtCriticNetwork(name="critic_1")
        critic_2 = SocMtCriticNetwork(name="critic_2")
    else:
        model_name = "TD3Network"
        actor = ActorNetwork(name="actor")
        critic_1 = CriticNetwork(name="critic_1")
        critic_2 = CriticNetwork(name="critic_2")
    saver = tf.compat.v1.train.Saver()
    with tf.compat.v1.Session(config=configProto) as sess:
        # load network
        saver = tf.compat.v1.train.import_meta_graph("models/" + model_name +
                                                     ".ckpt" + ".meta")
        saver.restore(sess, "models/" + model_name + ".ckpt")
        if saver is None:
            print("did not load")

        # init testing params
        test_num = 100
        test_ep = 0
        # results record
        success = 0
        failure = 0
        passed_case = 0

        collision = 0
        trouble_collision = 0
        time_exceed = 0
        episode_time_record = []

        # start testing
        for episode in episodes(n=num_episodes):
            episode_reward = 0
            env_steps = 0  # step in one episode
            observations = env.reset()  # states of all vehs
            state = observations[AGENT_ID]  # ego state
            episode.record_scenario(env.scenario_log)
            dones = {"__all__": False}
            while not dones["__all__"]:
                action = actor.get_action_noise(sess, state, rate=-1)
                observations, rewards, dones, infos = env.step(
                    {AGENT_ID: action})  # states of all vehs in next step

                # ego state in next step
                state = observations[AGENT_ID]
                if WITH_SOC_MT:
                    reward = rewards[AGENT_ID]
                else:
                    reward = np.sum(rewards.values())
                done = dones[AGENT_ID]
                info = infos[AGENT_ID]
                aux_info = get_aux_info(infos[AGENT_ID]["env_obs"])
                episode.record_step(observations, rewards, dones, infos)
                if WITH_SOC_MT:
                    episode_reward += np.sum(reward)
                else:
                    episode_reward += reward
                env_steps += 1

                if done:
                    test_ep += 1
                    # record result
                    if aux_info == "collision":
                        collision += 1
                        failure += 1
                    elif aux_info == "trouble_collision":
                        trouble_collision += 1
                        passed_case += 1
                    elif aux_info == "time_exceed":
                        time_exceed += 1
                        failure += 1
                    else:
                        # get episode time
                        episode_time_record.append(env_steps * 0.1)
                        success += 1
                    # print
                    print(
                        episode.index,
                        "EPISODE ended",
                        "TOTAL REWARD {:.4f}".format(episode_reward),
                        "Result:",
                        aux_info,
                    )
                    print("total step of this episode: ", env_steps)
                    episode_reward = 0
                    env_steps = 0
                    observations = env.reset()  # states of all vehs
                    state = observations[AGENT_ID]  # ego state
        env.close()

        print("-*" * 15, " result ", "-*" * 15)
        print("success: ", success, "/", test_num)
        print("collision: ", collision, "/", test_num)
        print("time_exceed: ", time_exceed, "/", test_num)
        print("passed_case: ", passed_case, "/", test_num)
        print("average time: ", np.mean(episode_time_record))
Ejemplo n.º 20
0
def train(
    training_scenarios,
    sim_name,
    headless,
    num_episodes,
    seed,
    without_soc_mt,
    session_dir,
):
    WITH_SOC_MT = without_soc_mt
    config = HyperParameters()
    configProto = init_tensorflow()

    # init env
    agent_spec = AgentSpec(
        # you can custom AgentInterface to control what obs information you need and the action type
        interface=cross_interface,
        # agent_builder=actor,
        # you can custom your observation adapter, reward adapter, info adapter, action adapter and so on.
        observation_adapter=observation_adapter,
        reward_adapter=reward_adapter,
        action_adapter=action_adapter,
    )

    env = gym.make(
        "smarts.env:hiway-v0",
        scenarios=training_scenarios,
        agent_specs={AGENT_ID: agent_spec},
        sim_name=sim_name,
        headless=headless,
        timestep_sec=0.1,
        seed=seed,
    )

    # init nets structure
    if WITH_SOC_MT:
        model_name = "Soc_Mt_TD3Network"
        actor = SocMtActorNetwork(name="actor")
        critic_1 = SocMtCriticNetwork(name="critic_1")
        critic_2 = SocMtCriticNetwork(name="critic_2")
    else:
        model_name = "TD3Network"
        actor = ActorNetwork(name="actor")
        critic_1 = CriticNetwork(name="critic_1")
        critic_2 = CriticNetwork(name="critic_2")
    # tensorflow summary for tensorboard visualization
    writer = tf.compat.v1.summary.FileWriter("summary")
    # losses
    tf.compat.v1.summary.scalar("Loss", critic_1.loss)
    tf.compat.v1.summary.scalar("Hubor_loss", critic_1.loss_2)
    tf.compat.v1.summary.histogram("ISWeights", critic_1.ISWeights)
    write_op = tf.compat.v1.summary.merge_all()
    saver = tf.compat.v1.train.Saver(max_to_keep=1000)

    # init memory buffer
    buffer = Buffer(config.buffer_size, config.pretrain_length)
    if config.load_buffer:  # !!!the capacity of the buffer is limited with buffer file
        buffer = buffer.load_buffer(config.buffer_load_path)
        print("BUFFER: Buffer Loaded")
    else:
        buffer.fill_buffer(env, AGENT_ID)
        print("BUFFER: Buffer Filled")
        buffer.save_buffer(config.buffer_save_path, buffer)
    print("BUFFER: Buffer initialize")

    with tf.compat.v1.Session(config=configProto) as sess:
        # init nets params
        sess.run(tf.compat.v1.global_variables_initializer())
        writer.add_graph(sess.graph)
        # update params of the target network
        actor.update_target(sess)
        critic_1.update_target(sess)
        critic_2.update_target(sess)

        # Reinforcement Learning loop
        print("Training Starts...")
        # experiment results
        recent_rewards = []  # rewards from recent 100 episodes
        avarage_rewards = []  # avareage reward of recent 100 episodes
        recent_success = []
        recent_success_rate = []
        EPSILON = 1

        for episode in episodes(n=num_episodes):
            env_steps = 0
            # save the model from time to time
            if config.model_save_frequency:
                if episode.index % config.model_save_frequency == 0:
                    save_path = saver.save(sess,
                                           f"{session_dir}/{model_name}.ckpt")
                    print("latest model saved")
                if episode.index % config.model_save_frequency_no_paste == 0:
                    saver.save(
                        sess,
                        f"{session_dir}/{model_name}_{str(episode.index)}.ckpt",
                    )
                    print("model saved")

            # initialize
            EPSILON = (config.noised_episodes -
                       episode.index) / config.noised_episodes
            episode_reward = 0

            observations = env.reset()  # states of all vehs
            state = observations[AGENT_ID]  # ego state
            episode.record_scenario(env.scenario_log)
            dones = {"__all__": False}
            while not dones["__all__"]:
                action_noise = actor.get_action_noise(sess,
                                                      state,
                                                      rate=EPSILON)
                observations, rewards, dones, infos = env.step(
                    {AGENT_ID:
                     action_noise})  # states of all vehs in next step

                # ego state in next step
                next_state = observations[AGENT_ID]
                if WITH_SOC_MT:
                    reward = rewards[AGENT_ID]
                else:
                    reward = np.sum(rewards.values())
                done = dones[AGENT_ID]
                info = infos[AGENT_ID]
                aux_info = get_aux_info(infos[AGENT_ID]["env_obs"])
                episode.record_step(observations, rewards, dones, infos)
                if WITH_SOC_MT:
                    episode_reward += np.sum(reward)
                else:
                    episode_reward += reward

                # store the experience
                experience = state, action_noise, reward, next_state, done
                # print(state)
                buffer.store(experience)

                ## Model training STARTS
                if env_steps % config.train_frequency == 0:
                    # "Delayed" Policy Updates
                    policy_delayed = 2
                    for _ in range(policy_delayed):
                        # First we need a mini-batch with experiences (s, a, r, s', done)
                        tree_idx, batch, ISWeights_mb = buffer.sample(
                            config.batch_size)
                        s_mb, a_mb, r_mb, next_s_mb, dones_mb = get_split_batch(
                            batch)
                        task_mb = s_mb[:, -config.task_size:]
                        next_task_mb = next_s_mb[:, -config.task_size:]

                        # Get q_target values for next_state from the critic_target
                        if WITH_SOC_MT:
                            a_target_next_state = actor.get_action_target(
                                sess,
                                next_s_mb)  # with Target Policy Smoothing
                            q_target_next_state_1 = critic_1.get_q_value_target(
                                sess, next_s_mb, a_target_next_state)
                            q_target_next_state_1 = (q_target_next_state_1 *
                                                     next_task_mb
                                                     )  # multi task q value
                            q_target_next_state_2 = critic_2.get_q_value_target(
                                sess, next_s_mb, a_target_next_state)
                            q_target_next_state_2 = (q_target_next_state_2 *
                                                     next_task_mb
                                                     )  # multi task q value
                            q_target_next_state = np.minimum(
                                q_target_next_state_1, q_target_next_state_2)
                        else:
                            a_target_next_state = actor.get_action_target(
                                sess,
                                next_s_mb)  # with Target Policy Smoothing
                            q_target_next_state_1 = critic_1.get_q_value_target(
                                sess, next_s_mb, a_target_next_state)
                            q_target_next_state_2 = critic_2.get_q_value_target(
                                sess, next_s_mb, a_target_next_state)
                            q_target_next_state = np.minimum(
                                q_target_next_state_1, q_target_next_state_2)

                        # Set Q_target = r if the episode ends at s+1, otherwise Q_target = r + gamma * Qtarget(s',a')
                        target_Qs_batch = []
                        for i in range(0, len(dones_mb)):
                            terminal = dones_mb[i]
                            # if we are in a terminal state. only equals reward
                            if terminal:
                                target_Qs_batch.append((r_mb[i] * task_mb[i]))
                            else:
                                # take the Q taregt for action a'
                                target = (
                                    r_mb[i] * task_mb[i] +
                                    config.gamma * q_target_next_state[i])
                                target_Qs_batch.append(target)
                        targets_mb = np.array(
                            [each for each in target_Qs_batch])

                        # critic train
                        if len(a_mb.shape) > 2:
                            a_mb = np.squeeze(a_mb, axis=1)
                        loss, absolute_errors = critic_1.train(
                            sess, s_mb, a_mb, targets_mb, ISWeights_mb)
                        loss_2, absolute_errors_2 = critic_2.train(
                            sess, s_mb, a_mb, targets_mb, ISWeights_mb)
                    # actor train
                    a_for_grad = actor.get_action(sess, s_mb)
                    a_gradients = critic_1.get_gradients(
                        sess, s_mb, a_for_grad)
                    # print(a_gradients)
                    actor.train(sess, s_mb, a_gradients[0])
                    # target train
                    actor.update_target(sess)
                    critic_1.update_target(sess)
                    critic_2.update_target(sess)

                    # update replay memory priorities
                    if WITH_SOC_MT:
                        absolute_errors = np.sum(absolute_errors, axis=1)
                    buffer.batch_update(tree_idx, absolute_errors)
                    ## Model training ENDS

                if done:
                    # visualize reward data
                    recent_rewards.append(episode_reward)
                    if len(recent_rewards) > 100:
                        recent_rewards.pop(0)
                    avarage_rewards.append(np.mean(recent_rewards))
                    avarage_rewards_data = np.array(avarage_rewards)
                    d = {"avarage_rewards": avarage_rewards_data}
                    with open(os.path.join("results", "reward_data" + ".pkl"),
                              "wb") as f:
                        pickle.dump(d, f, pickle.HIGHEST_PROTOCOL)
                    # visualize success rate data
                    if aux_info == "success":
                        recent_success.append(1)
                    else:
                        recent_success.append(0)
                    if len(recent_success) > 100:
                        recent_success.pop(0)
                    avarage_success_rate = recent_success.count(1) / len(
                        recent_success)
                    recent_success_rate.append(avarage_success_rate)
                    recent_success_rate_data = np.array(recent_success_rate)
                    d = {"recent_success_rates": recent_success_rate_data}
                    with open(
                            os.path.join("results",
                                         "success_rate_data" + ".pkl"),
                            "wb") as f:
                        pickle.dump(d, f, pickle.HIGHEST_PROTOCOL)
                    # print results on the terminal
                    print("Episode total reward:", episode_reward)
                    print("Episode time:", env_steps * 0.1)
                    print("Success rate:", avarage_success_rate)
                    print(episode.index, "episode finished.")
                    buffer.measure_utilization()
                    print("---" * 15)
                    break
                else:
                    state = next_state
                    env_steps += 1
        env.close()
Ejemplo n.º 21
0
    def get_total_actions(self):
        """ Returns the total number of actions an agent could ever take """
        return self.n_actions

    def get_stats(self):
        return None

    def render(self):
        raise NotImplementedError

    def close(self):
        pass

    def seed(self):
        raise NotImplementedError


if __name__ == "__main__":
    env = SMARTSEnv()
    base_nev = env.base_env

    for episode in episodes(n=100):
        observations = env.reset()
        episode.record_scenario(env.base_env.scenario_log)

        dones = {"__all__": False}
        while not np.all(dones.values()):
            observations, rewards, dones, infos = env.step([0, 1])
            episode.record_step(observations, rewards, dones, infos)
Ejemplo n.º 22
0
        type=str,
        nargs="+",
    )
    parser.add_argument("--headless",
                        help="run simulation in headless mode",
                        action="store_true")
    args = parser.parse_args()

    data_replay_path = (
        f"./{args.replay_data}/{args.scenarios[0].split('/')[-1]}/data_replay")
    env = gym.make(
        "smarts.env:hiway-v0",
        scenarios=args.scenarios,
        agent_specs={},
        headless=args.headless,
        visdom=False,
        fixed_timestep_sec=0.1,
        endless_traffic=False,
        envision_record_data_replay_path=data_replay_path,
    )

    for episode in episodes(n=1):
        env.reset()
        episode.record_scenario(env.scenario_log)

        for _ in range(600):
            env.step({})
            episode.record_step({}, {}, {}, {})

    env.close()
Ejemplo n.º 23
0
def main(scenarios, sim_name, headless, seed, speed, max_steps, save_dir, write):
    from zoo import policies

    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    policies.replay_save_dir = save_dir
    policies.replay_read = not write

    # This is how you can wrap an agent in replay-agent-v0 wrapper to store and load its inputs and actions
    # and replay it
    agent_spec = zoo_make(
        "zoo.policies:replay-agent-v0",
        save_directory=save_dir,
        id="agent_007",
        wrapped_agent_locator="zoo.policies:keep-left-with-speed-agent-v0",
        wrapped_agent_params={"speed": speed},
    )
    # copy the scenarios to the replay directory to make sure it's not changed
    copy_scenarios(save_dir, scenarios)

    env = gym.make(
        "smarts.env:hiway-v0",
        scenarios=scenarios,
        agent_specs={AGENT_ID: agent_spec},
        sim_name=sim_name,
        headless=headless,
        visdom=False,
        timestep_sec=0.1,
        sumo_headless=True,
        seed=seed,
    )

    # Carry out the experiment
    episode = next(episodes(n=1))
    agent = agent_spec.build_agent()
    observations = env.reset()

    dones = {"__all__": False}
    MAX_STEPS = 2550
    i = 0
    try:
        while not dones["__all__"] and i < max_steps:
            agent_obs = observations[AGENT_ID]
            agent_action = agent.act(agent_obs)
            observations, rewards, dones, infos = env.step({AGENT_ID: agent_action})
            i += 1
            if i % 10 == 0:
                print("Step: ", i)
            episode.record_step(observations, rewards, dones, infos)
    except KeyboardInterrupt:
        # discard result
        i = MAX_STEPS
    finally:
        if dones["__all__"]:
            i = MAX_STEPS
        try:
            episode.record_scenario(env.scenario_log)
            env.close()
        finally:
            sys.exit(i // 10)