def test_agents(self):
    params = ParameterServer()
    params["ML"]["BaseAgent"]["NumSteps"] = 2
    params["ML"]["BaseAgent"]["MaxEpisodeSteps"] = 2

    bp = DiscreteHighwayBlueprint(params, num_scenarios=10, random_seed=0)
    env = SingleAgentRuntime(blueprint=bp, render=False)

    # IQN Agent
    iqn_agent = IQNAgent(env=env, test_env=env, params=params)
    env.ml_behavior = iqn_agent
    self.assertEqual(env.ml_behavior.set_action_externally, False)
    iqn_agent.run()
    self.assertEqual(env.ml_behavior.set_action_externally, True)

    # FQF Agent
    fqf_agent = FQFAgent(env=env, test_env=env, params=params)
    env.ml_behavior = fqf_agent
    self.assertEqual(env.ml_behavior.set_action_externally, False)
    fqf_agent.run()
    self.assertEqual(env.ml_behavior.set_action_externally, True)

    # QRDQN Agent
    qrdqn_agent = QRDQNAgent(env=env, test_env=env, params=params)
    env.ml_behavior = qrdqn_agent
    self.assertEqual(env.ml_behavior.set_action_externally, False)
    qrdqn_agent.run()
    self.assertEqual(env.ml_behavior.set_action_externally, True)
Ejemplo n.º 2
0
def run_configuration(argv):

    params = ParameterServer(
        filename="examples/example_params/iqn_params.json")
    params["ML"]["BaseAgent"][
        "SummaryPath"] = "/home/mansoor/Study/Werkstudent/fortiss/code/bark-ml/summaries"
    params["ML"]["BaseAgent"][
        "CheckpointPath"] = "/home/mansoor/Study/Werkstudent/fortiss/code/bark-ml/checkpoints"

    env = gym.make(FLAGS.env, params=params)
    agent = IQNAgent(env=env, test_env=env, params=params)

    if FLAGS.load and params["ML"]["BaseAgent"]["CheckpointPath"]:
        agent.load_models(
            os.path.join(params["ML"]["BaseAgent"]["CheckpointPath"], "best"))

    if FLAGS.mode == "train":
        agent.run()

    elif FLAGS.mode == "visualize":
        agent.visualize()

    elif FLAGS.mode == "evaluate":
        # writes evaluaion data using summary writer in summary path
        agent.evaluate()

    else:
        raise Exception("Invalid argument for --mode")
 def test_agent_wrapping(self):
   params = ParameterServer()
   env = gym.make("highway-v1", params=params)
   env.reset()
   agent = IQNAgent(env=env, test_env=env, params=params)
   agent = FQFAgent(env=env, test_env=env, params=params)
   agent = QRDQNAgent(env=env, test_env=env, params=params)
Ejemplo n.º 4
0
def configure_agent(params, env):
    agent_save_dir = os.path.join(params["Experiment"]["dir"], "agent")
    training_benchmark = None  #TrainingBenchmarkDatabase()
    agent = IQNAgent(env=env,
                     params=params,
                     agent_save_dir=agent_save_dir,
                     training_benchmark=training_benchmark)
    return agent
def configure_agent(params, env, checkpoint_load=None, is_online_demo=False):
    agent_save_dir = os.path.join(params["Experiment"]["dir"], "agent")
    training_benchmark = None #TrainingBenchmarkDatabase()
    agent = IQNAgent(env=env, params=params, agent_save_dir=agent_save_dir,
                     training_benchmark=training_benchmark,
                     checkpoint_load=checkpoint_load, 
                     is_learn_from_demonstrations=True,
                     is_online_demo=is_online_demo,
                     is_common_taus=True)
    return agent
Ejemplo n.º 6
0
def pick_agent(exp_root, env, params):
    agent_save_dir = os.path.join(exp_root, "agent")
    agent = IQNAgent(env=env,
                     params=params,
                     agent_save_dir=agent_save_dir,
                     checkpoint_load="trained_only_demonstrations",
                     is_online_demo=True)
    print(agent.memory.capacity, agent.memory.agent_capacity, agent.memory._an,
          agent.memory._dn)
    return agent
Ejemplo n.º 7
0
def configure_agent(params, env):
    agent_save_dir = os.path.join(trained_only_demonstrations_path, "agent")
    training_benchmark = None  #TrainingBenchmarkDatabase()
    agent = IQNAgent(env=env,
                     params=params,
                     agent_save_dir=agent_save_dir,
                     training_benchmark=training_benchmark,
                     is_learn_from_demonstrations=True,
                     checkpoint_load="trained_only_demonstrations",
                     is_online_demo=True)
    return agent
  def test_behavior_wrapping(self):
    # create scenario
    params = ParameterServer()
    bp = DiscreteHighwayBlueprint(params, num_scenarios=10, random_seed=0)
    env = SingleAgentRuntime(blueprint=bp, render=False)
    #env = gym.make("highway-v1", params=params)
    ml_behaviors = []
    ml_behaviors.append(IQNAgent(env=env, test_env=env, params=params))
    ml_behaviors.append(FQFAgent(env=env, test_env=env, params=params))
    ml_behaviors.append(QRDQNAgent(env=env, test_env=env, params=params))

    for ml_behavior in ml_behaviors:
      # set agent
      env.ml_behavior = ml_behavior
      env.reset()
      done = False
      while done is False:
        action = np.random.randint(low=0, high=env.action_space.n)
        observed_next_state, reward, done, info = env.step(action)
        print(
            f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}"
        )

      # action is set externally
      ml_behavior._set_action_externally = True
      agent_id = list(env._world.agents.keys())[0]
      observed_world = env._world.Observe([agent_id])[0]

      # do a random action and plan trajectory
      action = np.random.randint(low=1, high=env.action_space.n)
      ml_behavior.ActionToBehavior(action)
      a = ml_behavior.Plan(0.2, observed_world)

      # sample another different random action
      another_action = action
      while another_action == action:
        another_action = np.random.randint(low=1, high=env.action_space.n)

      # plan trajectory for the another action
      ml_behavior.ActionToBehavior(another_action)
      b = ml_behavior.Plan(0.2, observed_world)

      # the trajectory generated by two different actions shoould be different
      self.assertEqual(np.any(np.not_equal(a, b)), True)

      # action will be calculated within the Plan(..) fct.
      ml_behavior._set_action_externally = False
      a = ml_behavior.Plan(0.2, observed_world)
      b = ml_behavior.Plan(0.2, observed_world)

      # same trajectory for same state
      np.testing.assert_array_equal(a, b)
Ejemplo n.º 9
0
def load_agent(params, env, exp_dir, checkpoint='last'):
    agent_dir = os.path.join(exp_dir, 'agent')
    return IQNAgent(params=params, env=env, agent_save_dir=agent_dir, checkpoint_load=checkpoint)
Ejemplo n.º 10
0
else:
  behavior = BehaviorDiscreteMacroActionsML(params)
  observer = NearestAgentsObserver(params)

evaluator = GoalReached(params)

viewer = MPViewer(
  params=params,
  center= [960, 1000.8],
  enforce_x_length=True,
  x_length = 100.0,
  use_world_bounds=False)

# load env
env = HyDiscreteHighway(params=params,
                        scenario_generation=scenario_generator,
                        behavior=behavior,
                        evaluator=evaluator,
                        observer=observer,
                        viewer=viewer,
                        render=False)

# agent saved directory
agent_dir = os.path.join(exp_dir, 'agent')

# load agent
agent = IQNAgent(env=env, params=params, agent_save_dir=agent_dir, 
                 is_checkpoint_run=True, is_online_demo=False)
agent.load_models(IQNAgent.check_point_directory(agent.agent_save_dir, "best"))

agent.evaluate()
  def test_model_loader(self):
    # env using default params
    env = gym.make("highway-v1")

    networks = ["iqn", "fqf", "qrdqn"]

    action_space_size = env.action_space.n
    state_space_size = env.observation_space.shape[0]

    # a sample random state [0-1] to evaluate actions
    random_state = np.random.rand(state_space_size).tolist()

    # test all networks
    for network in networks:
      # Do inference using C++ wrapped model
      model = pytorch_script_wrapper.ModelLoader(
          os.path.join(
              os.path.dirname(__file__),
              "lib_fqf_iqn_qrdqn_test_data/{}/online_net_script.pt"
              .format(network)), action_space_size, state_space_size)
      model.LoadModel()

      num_iters = 1000  # Number of times to repeat experiment to calcualte runtime

      # Time num_iters iterations for inference using C++ model
      start = time.time()
      for _ in range(num_iters):
        actions_cpp = model.Inference(random_state)
      end = time.time()
      time_cpp = end - start  # todo - how to analyze python vs cpp test time in tests?

      # Load and perform inference using python model
      if network == "iqn":
        agent = IQNAgent(env=env, test_env=env, params=ParameterServer())

      elif network == "fqf":
        agent = FQFAgent(env=env, test_env=env, params=ParameterServer())

      elif network == "qrdqn":
        agent = QRDQNAgent(env=env, test_env=env, params=ParameterServer())

      agent.load_models(
          os.path.join(
              os.path.dirname(__file__),
              "lib_fqf_iqn_qrdqn_test_data",
              network))

      # Time num_iters iterations for inference using python model
      start = time.time()
      for _ in range(num_iters):
        actions_py = agent.calculate_actions(random_state)

      end = time.time()
      time_py = end - start

      # assert that Python and Cpp models are close enough to 6 decimal places
      np.testing.assert_array_almost_equal(
          actions_py.flatten().numpy(),
          np.asarray(actions_cpp),
          decimal=6,
          err_msg="C++ and python models don't match")
Ejemplo n.º 12
0
    def test_iqn_agent(self):
        params = ParameterServer()
        params["ML"]["BaseAgent"]["NumSteps"] = 2
        params["ML"]["BaseAgent"]["MaxEpisodeSteps"] = 2

        bp = DiscreteHighwayBlueprint(params,
                                      number_of_senarios=10,
                                      random_seed=0)
        env = SingleAgentRuntime(blueprint=bp, render=False)
        env._observer = NearestAgentsObserver(params)
        env._action_wrapper = BehaviorDiscreteMacroActionsML(params)

        iqn_agent = IQNAgent(agent_save_dir="./save_dir",
                             env=env,
                             params=params)
        iqn_agent.train_episode()

        iqn_agent.save(checkpoint_type="best")
        iqn_agent.save(checkpoint_type="last")

        loaded_agent = IQNAgent(agent_save_dir="./save_dir",
                                checkpoint_load="best")
        loaded_agent2 = IQNAgent(agent_save_dir="./save_dir",
                                 checkpoint_load="last")

        loaded_agent_with_env = IQNAgent(env=env,
                                         agent_save_dir="./save_dir",
                                         checkpoint_load="last")
        loaded_agent_with_env.train_episode()

        self.assertEqual(loaded_agent.ml_behavior.action_space.n,
                         iqn_agent.ml_behavior.action_space.n)
        return
Ejemplo n.º 13
0
# load env
env_to_pass_observer_behavior = SingleAgentRuntime(ml_behavior=behavior,
                                                   observer=observer,
                                                   step_time=-1.0,
                                                   viewer=-1.0,
                                                   scenario_generator=-1.0,
                                                   evaluator=-1.0)

# agent saved directory
agent_dir = os.path.join(exp_dir, 'agent')

# load agent
agent = IQNAgent(env=env_to_pass_observer_behavior,
                 params=params,
                 agent_save_dir=agent_dir,
                 is_checkpoint_run=is_checkpoint_run,
                 is_be_obs=is_belief_observer,
                 checkpoint_load='best',
                 is_online_demo=False)
agent.is_be_obs = is_belief_observer

behaviors = {"behavior_iqn_agent": agent}
benchmark_runner = BenchmarkRunner(benchmark_database=db,
                                   evaluators=evaluators,
                                   terminal_when=terminal_when,
                                   behaviors=behaviors,
                                   num_scenarios=num_scenarios,
                                   log_eval_avg_every=1,
                                   checkpoint_dir="checkpoints",
                                   deepcopy=False)