def test_agents(self):
    params = ParameterServer()
    params["ML"]["BaseAgent"]["NumSteps"] = 2
    params["ML"]["BaseAgent"]["MaxEpisodeSteps"] = 2

    bp = DiscreteHighwayBlueprint(params, num_scenarios=10, random_seed=0)
    env = SingleAgentRuntime(blueprint=bp, render=False)

    # IQN Agent
    iqn_agent = IQNAgent(env=env, test_env=env, params=params)
    env.ml_behavior = iqn_agent
    self.assertEqual(env.ml_behavior.set_action_externally, False)
    iqn_agent.run()
    self.assertEqual(env.ml_behavior.set_action_externally, True)

    # FQF Agent
    fqf_agent = FQFAgent(env=env, test_env=env, params=params)
    env.ml_behavior = fqf_agent
    self.assertEqual(env.ml_behavior.set_action_externally, False)
    fqf_agent.run()
    self.assertEqual(env.ml_behavior.set_action_externally, True)

    # QRDQN Agent
    qrdqn_agent = QRDQNAgent(env=env, test_env=env, params=params)
    env.ml_behavior = qrdqn_agent
    self.assertEqual(env.ml_behavior.set_action_externally, False)
    qrdqn_agent.run()
    self.assertEqual(env.ml_behavior.set_action_externally, True)
예제 #2
0
파일: fqf.py 프로젝트: eeshakumar/bark-ml
def run_configuration(argv):
    params = ParameterServer()
    params["ML"]["BaseAgent"][
        "SummaryPath"] = "/home/mansoor/Study/Werkstudent/fortiss/code/bark-ml/summaries"
    params["ML"]["BaseAgent"][
        "CheckpointPath"] = "/home/mansoor/Study/Werkstudent/fortiss/code/bark-ml/checkpoints"

    env = gym.make(FLAGS.env, params=params)
    agent = FQFAgent(env=env, test_env=env, params=params)

    if FLAGS.load and params["ML"]["BaseAgent"]["CheckpointPath"]:
        agent.load_models(
            os.path.join(params["ML"]["BaseAgent"]["CheckpointPath"], "best"))

    if FLAGS.mode == "train":
        agent.train()

    elif FLAGS.mode == "visualize":
        agent.visualize()

    elif FLAGS.mode == "evaluate":
        # writes evaluaion data using summary writer in summary path
        agent.evaluate()

    else:
        raise Exception("Invalid argument for --mode")
예제 #3
0
def main():
    args = configure_args()
    exp_dir = args.exp_dir or "results/training/toy_evaluation"
    params_filename = glob.glob(os.path.join(exp_dir, "params_*"))
    params = ParameterServer(filename=params_filename[0])
    behavior_params_filename = glob.glob(
        os.path.join(exp_dir, "behavior_params*"))
    if behavior_params_filename:
        splits = 8
        behavior_params = ParameterServer(filename=behavior_params_filename[0])
        behavior_space = BehaviorSpace(behavior_params)
        hypothesis_set, _ = behavior_space.create_hypothesis_set_fixed_split(
            split=splits)
        observer = BeliefObserver(params, hypothesis_set, splits=splits)
        behavior = BehaviorDiscreteMacroActionsML(behavior_params)
    else:
        behavior = BehaviorDiscreteMacroActionsML(params)
        observer = NearestAgentsObserver(params)

    evaluator = GoalReached(params)

    scenario_params = ParameterServer(
        filename=
        "configuration/database/scenario_sets/interaction_merging_light_dense_1D.json"
    )
    scenario_generator = ConfigurableScenarioGeneration(params=scenario_params,
                                                        num_scenarios=5)
    scenario_file = glob.glob(os.path.join(exp_dir, "scenarios_list*"))
    scenario_generator.load_scenario_list(scenario_file[0])
    viewer = MPViewer(params=params,
                      x_range=[-35, 35],
                      y_range=[-35, 35],
                      follow_agent_id=True)
    env = HyDiscreteHighway(behavior=behavior,
                            observer=observer,
                            evaluator=evaluator,
                            viewer=viewer,
                            scenario_generation=scenario_generator,
                            render=True)

    env.reset()
    actions = [0, 1, 2, 3, 4, 5, 6]
    for action in actions:
        print(action)
        env.step(action)
    agent = FQFAgent(env=env, test_env=env, params=params)

    agent.load_models(os.path.join(exp_dir, "agent/checkpoints/final"))
 def test_agent_wrapping(self):
   params = ParameterServer()
   env = gym.make("highway-v1", params=params)
   env.reset()
   agent = IQNAgent(env=env, test_env=env, params=params)
   agent = FQFAgent(env=env, test_env=env, params=params)
   agent = QRDQNAgent(env=env, test_env=env, params=params)
예제 #5
0
    def test_behavior_wrapping(self):
        # create scenario
        params = ParameterServer()
        bp = DiscreteHighwayBlueprint(params,
                                      number_of_senarios=10,
                                      random_seed=0)
        env = SingleAgentRuntime(blueprint=bp, render=False)
        #env = gym.make("highway-v1", params=params)
        ml_behaviors = []
        # ml_behaviors.append(IQNAgent(env=env, test_env=env, params=params))
        ml_behaviors.append(FQFAgent(env=env, params=params))
        # ml_behaviors.append(QRDQNAgent(env=env, test_env=env, params=params))

        for ml_behavior in ml_behaviors:
            # set agent
            env.ml_behavior = ml_behavior
            env.reset()
            action = np.random.randint(low=0, high=env.action_space.n)
            observed_next_state, reward, done, info = env.step(action)
            print(
                f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}"
            )

            # action is set externally
            ml_behavior._set_action_externally = True
            agent_id = list(env._world.agents.keys())[0]
            observed_world = env._world.Observe([agent_id])[0]

            # do a random action and plan trajectory
            action = np.random.randint(low=1, high=env.action_space.n)
            ml_behavior.ActionToBehavior(action)
            a = ml_behavior.Plan(0.2, observed_world)

            # sample another different random action
            another_action = action
            while another_action == action:
                another_action = np.random.randint(low=1,
                                                   high=env.action_space.n)

            # plan trajectory for the another action
            ml_behavior.ActionToBehavior(another_action)
            b = ml_behavior.Plan(0.2, observed_world)

            # the trajectory generated by two different actions shoould be different
            self.assertEqual(np.any(np.not_equal(a, b)), True)

            # action will be calculated within the Plan(..) fct.
            ml_behavior._set_action_externally = False
            a = ml_behavior.Plan(0.2, observed_world)
            b = ml_behavior.Plan(0.2, observed_world)
            last_action = ml_behavior.GetLastAction()
            self.assertTrue(isinstance(last_action, float))

            # same trajectory for same state
            np.testing.assert_array_equal(a, b)
예제 #6
0
 def test_agent_wrapping(self):
     params = ParameterServer()
     env = gym.make("highway-v1", params=params)
     env.reset()
     # agent = IQNAgent(env=env, test_env=env, params=params)
     params["ML"]["BaseAgent"]["MaxEpisodeSteps"] = 2
     params["ML"]["BaseAgent"]["NumEvalEpisodes"] = 2
     agent = FQFAgent(env=env, params=params)
     agent.train_episode()
     agent.evaluate()
예제 #7
0
 def test_agent_wrapping(self):
     params = ParameterServer()
     env = gym.make("highway-v1", params=params)
     env.reset()
     params["ML"]["BaseAgent"]["MaxEpisodeSteps"] = 2
     params["ML"]["BaseAgent"]["NumEvalEpisodes"] = 2
     train_bench = TrainingBenchmarkDatabase()
     agent = FQFAgent(env=env,
                      agent_save_dir="./save_dir",
                      params=params,
                      training_benchmark=train_bench)
     agent.train_episode()
     agent.evaluate()
예제 #8
0
def configure_agent(params, env):
    agent = FQFAgent(env=env, test_env=env, params=params)
    return agent
  def test_model_loader(self):
    # env using default params
    env = gym.make("highway-v1")

    networks = ["iqn", "fqf", "qrdqn"]

    action_space_size = env.action_space.n
    state_space_size = env.observation_space.shape[0]

    # a sample random state [0-1] to evaluate actions
    random_state = np.random.rand(state_space_size).tolist()

    # test all networks
    for network in networks:
      # Do inference using C++ wrapped model
      model = pytorch_script_wrapper.ModelLoader(
          os.path.join(
              os.path.dirname(__file__),
              "lib_fqf_iqn_qrdqn_test_data/{}/online_net_script.pt"
              .format(network)), action_space_size, state_space_size)
      model.LoadModel()

      num_iters = 1000  # Number of times to repeat experiment to calcualte runtime

      # Time num_iters iterations for inference using C++ model
      start = time.time()
      for _ in range(num_iters):
        actions_cpp = model.Inference(random_state)
      end = time.time()
      time_cpp = end - start  # todo - how to analyze python vs cpp test time in tests?

      # Load and perform inference using python model
      if network == "iqn":
        agent = IQNAgent(env=env, test_env=env, params=ParameterServer())

      elif network == "fqf":
        agent = FQFAgent(env=env, test_env=env, params=ParameterServer())

      elif network == "qrdqn":
        agent = QRDQNAgent(env=env, test_env=env, params=ParameterServer())

      agent.load_models(
          os.path.join(
              os.path.dirname(__file__),
              "lib_fqf_iqn_qrdqn_test_data",
              network))

      # Time num_iters iterations for inference using python model
      start = time.time()
      for _ in range(num_iters):
        actions_py = agent.calculate_actions(random_state)

      end = time.time()
      time_py = end - start

      # assert that Python and Cpp models are close enough to 6 decimal places
      np.testing.assert_array_almost_equal(
          actions_py.flatten().numpy(),
          np.asarray(actions_cpp),
          decimal=6,
          err_msg="C++ and python models don't match")
예제 #10
0
    def test_agents(self):
        params = ParameterServer()
        params["ML"]["BaseAgent"]["NumSteps"] = 2
        params["ML"]["BaseAgent"]["MaxEpisodeSteps"] = 2

        bp = DiscreteHighwayBlueprint(params,
                                      number_of_senarios=10,
                                      random_seed=0)
        env = SingleAgentRuntime(blueprint=bp, render=False)
        env._observer = NearestAgentsObserver(params)
        env._action_wrapper = BehaviorDiscreteMacroActionsML(params)

        fqf_agent = FQFAgent(agent_save_dir="./save_dir",
                             env=env,
                             params=params)
        fqf_agent.train_episode()

        fqf_agent.save(checkpoint_type="best")
        fqf_agent.save(checkpoint_type="last")

        loaded_agent = FQFAgent(agent_save_dir="./save_dir",
                                checkpoint_load="best")
        loaded_agent2 = FQFAgent(agent_save_dir="./save_dir",
                                 checkpoint_load="last")

        loaded_agent_with_env = FQFAgent(env=env,
                                         agent_save_dir="./save_dir",
                                         checkpoint_load="last")
        loaded_agent_with_env.train_episode()

        self.assertEqual(loaded_agent.ml_behavior.action_space.n,
                         fqf_agent.ml_behavior.action_space.n)
        self.assertEqual(loaded_agent.ent_coef, fqf_agent.ent_coef)
        return