def test_agents(self): params = ParameterServer() params["ML"]["BaseAgent"]["NumSteps"] = 2 params["ML"]["BaseAgent"]["MaxEpisodeSteps"] = 2 bp = DiscreteHighwayBlueprint(params, num_scenarios=10, random_seed=0) env = SingleAgentRuntime(blueprint=bp, render=False) # IQN Agent iqn_agent = IQNAgent(env=env, test_env=env, params=params) env.ml_behavior = iqn_agent self.assertEqual(env.ml_behavior.set_action_externally, False) iqn_agent.run() self.assertEqual(env.ml_behavior.set_action_externally, True) # FQF Agent fqf_agent = FQFAgent(env=env, test_env=env, params=params) env.ml_behavior = fqf_agent self.assertEqual(env.ml_behavior.set_action_externally, False) fqf_agent.run() self.assertEqual(env.ml_behavior.set_action_externally, True) # QRDQN Agent qrdqn_agent = QRDQNAgent(env=env, test_env=env, params=params) env.ml_behavior = qrdqn_agent self.assertEqual(env.ml_behavior.set_action_externally, False) qrdqn_agent.run() self.assertEqual(env.ml_behavior.set_action_externally, True)
def run_configuration(argv): params = ParameterServer() params["ML"]["BaseAgent"][ "SummaryPath"] = "/home/mansoor/Study/Werkstudent/fortiss/code/bark-ml/summaries" params["ML"]["BaseAgent"][ "CheckpointPath"] = "/home/mansoor/Study/Werkstudent/fortiss/code/bark-ml/checkpoints" env = gym.make(FLAGS.env, params=params) agent = FQFAgent(env=env, test_env=env, params=params) if FLAGS.load and params["ML"]["BaseAgent"]["CheckpointPath"]: agent.load_models( os.path.join(params["ML"]["BaseAgent"]["CheckpointPath"], "best")) if FLAGS.mode == "train": agent.train() elif FLAGS.mode == "visualize": agent.visualize() elif FLAGS.mode == "evaluate": # writes evaluaion data using summary writer in summary path agent.evaluate() else: raise Exception("Invalid argument for --mode")
def main(): args = configure_args() exp_dir = args.exp_dir or "results/training/toy_evaluation" params_filename = glob.glob(os.path.join(exp_dir, "params_*")) params = ParameterServer(filename=params_filename[0]) behavior_params_filename = glob.glob( os.path.join(exp_dir, "behavior_params*")) if behavior_params_filename: splits = 8 behavior_params = ParameterServer(filename=behavior_params_filename[0]) behavior_space = BehaviorSpace(behavior_params) hypothesis_set, _ = behavior_space.create_hypothesis_set_fixed_split( split=splits) observer = BeliefObserver(params, hypothesis_set, splits=splits) behavior = BehaviorDiscreteMacroActionsML(behavior_params) else: behavior = BehaviorDiscreteMacroActionsML(params) observer = NearestAgentsObserver(params) evaluator = GoalReached(params) scenario_params = ParameterServer( filename= "configuration/database/scenario_sets/interaction_merging_light_dense_1D.json" ) scenario_generator = ConfigurableScenarioGeneration(params=scenario_params, num_scenarios=5) scenario_file = glob.glob(os.path.join(exp_dir, "scenarios_list*")) scenario_generator.load_scenario_list(scenario_file[0]) viewer = MPViewer(params=params, x_range=[-35, 35], y_range=[-35, 35], follow_agent_id=True) env = HyDiscreteHighway(behavior=behavior, observer=observer, evaluator=evaluator, viewer=viewer, scenario_generation=scenario_generator, render=True) env.reset() actions = [0, 1, 2, 3, 4, 5, 6] for action in actions: print(action) env.step(action) agent = FQFAgent(env=env, test_env=env, params=params) agent.load_models(os.path.join(exp_dir, "agent/checkpoints/final"))
def test_agent_wrapping(self): params = ParameterServer() env = gym.make("highway-v1", params=params) env.reset() agent = IQNAgent(env=env, test_env=env, params=params) agent = FQFAgent(env=env, test_env=env, params=params) agent = QRDQNAgent(env=env, test_env=env, params=params)
def test_behavior_wrapping(self): # create scenario params = ParameterServer() bp = DiscreteHighwayBlueprint(params, number_of_senarios=10, random_seed=0) env = SingleAgentRuntime(blueprint=bp, render=False) #env = gym.make("highway-v1", params=params) ml_behaviors = [] # ml_behaviors.append(IQNAgent(env=env, test_env=env, params=params)) ml_behaviors.append(FQFAgent(env=env, params=params)) # ml_behaviors.append(QRDQNAgent(env=env, test_env=env, params=params)) for ml_behavior in ml_behaviors: # set agent env.ml_behavior = ml_behavior env.reset() action = np.random.randint(low=0, high=env.action_space.n) observed_next_state, reward, done, info = env.step(action) print( f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}" ) # action is set externally ml_behavior._set_action_externally = True agent_id = list(env._world.agents.keys())[0] observed_world = env._world.Observe([agent_id])[0] # do a random action and plan trajectory action = np.random.randint(low=1, high=env.action_space.n) ml_behavior.ActionToBehavior(action) a = ml_behavior.Plan(0.2, observed_world) # sample another different random action another_action = action while another_action == action: another_action = np.random.randint(low=1, high=env.action_space.n) # plan trajectory for the another action ml_behavior.ActionToBehavior(another_action) b = ml_behavior.Plan(0.2, observed_world) # the trajectory generated by two different actions shoould be different self.assertEqual(np.any(np.not_equal(a, b)), True) # action will be calculated within the Plan(..) fct. ml_behavior._set_action_externally = False a = ml_behavior.Plan(0.2, observed_world) b = ml_behavior.Plan(0.2, observed_world) last_action = ml_behavior.GetLastAction() self.assertTrue(isinstance(last_action, float)) # same trajectory for same state np.testing.assert_array_equal(a, b)
def test_agent_wrapping(self): params = ParameterServer() env = gym.make("highway-v1", params=params) env.reset() # agent = IQNAgent(env=env, test_env=env, params=params) params["ML"]["BaseAgent"]["MaxEpisodeSteps"] = 2 params["ML"]["BaseAgent"]["NumEvalEpisodes"] = 2 agent = FQFAgent(env=env, params=params) agent.train_episode() agent.evaluate()
def test_agent_wrapping(self): params = ParameterServer() env = gym.make("highway-v1", params=params) env.reset() params["ML"]["BaseAgent"]["MaxEpisodeSteps"] = 2 params["ML"]["BaseAgent"]["NumEvalEpisodes"] = 2 train_bench = TrainingBenchmarkDatabase() agent = FQFAgent(env=env, agent_save_dir="./save_dir", params=params, training_benchmark=train_bench) agent.train_episode() agent.evaluate()
def configure_agent(params, env): agent = FQFAgent(env=env, test_env=env, params=params) return agent
def test_model_loader(self): # env using default params env = gym.make("highway-v1") networks = ["iqn", "fqf", "qrdqn"] action_space_size = env.action_space.n state_space_size = env.observation_space.shape[0] # a sample random state [0-1] to evaluate actions random_state = np.random.rand(state_space_size).tolist() # test all networks for network in networks: # Do inference using C++ wrapped model model = pytorch_script_wrapper.ModelLoader( os.path.join( os.path.dirname(__file__), "lib_fqf_iqn_qrdqn_test_data/{}/online_net_script.pt" .format(network)), action_space_size, state_space_size) model.LoadModel() num_iters = 1000 # Number of times to repeat experiment to calcualte runtime # Time num_iters iterations for inference using C++ model start = time.time() for _ in range(num_iters): actions_cpp = model.Inference(random_state) end = time.time() time_cpp = end - start # todo - how to analyze python vs cpp test time in tests? # Load and perform inference using python model if network == "iqn": agent = IQNAgent(env=env, test_env=env, params=ParameterServer()) elif network == "fqf": agent = FQFAgent(env=env, test_env=env, params=ParameterServer()) elif network == "qrdqn": agent = QRDQNAgent(env=env, test_env=env, params=ParameterServer()) agent.load_models( os.path.join( os.path.dirname(__file__), "lib_fqf_iqn_qrdqn_test_data", network)) # Time num_iters iterations for inference using python model start = time.time() for _ in range(num_iters): actions_py = agent.calculate_actions(random_state) end = time.time() time_py = end - start # assert that Python and Cpp models are close enough to 6 decimal places np.testing.assert_array_almost_equal( actions_py.flatten().numpy(), np.asarray(actions_cpp), decimal=6, err_msg="C++ and python models don't match")
def test_agents(self): params = ParameterServer() params["ML"]["BaseAgent"]["NumSteps"] = 2 params["ML"]["BaseAgent"]["MaxEpisodeSteps"] = 2 bp = DiscreteHighwayBlueprint(params, number_of_senarios=10, random_seed=0) env = SingleAgentRuntime(blueprint=bp, render=False) env._observer = NearestAgentsObserver(params) env._action_wrapper = BehaviorDiscreteMacroActionsML(params) fqf_agent = FQFAgent(agent_save_dir="./save_dir", env=env, params=params) fqf_agent.train_episode() fqf_agent.save(checkpoint_type="best") fqf_agent.save(checkpoint_type="last") loaded_agent = FQFAgent(agent_save_dir="./save_dir", checkpoint_load="best") loaded_agent2 = FQFAgent(agent_save_dir="./save_dir", checkpoint_load="last") loaded_agent_with_env = FQFAgent(env=env, agent_save_dir="./save_dir", checkpoint_load="last") loaded_agent_with_env.train_episode() self.assertEqual(loaded_agent.ml_behavior.action_space.n, fqf_agent.ml_behavior.action_space.n) self.assertEqual(loaded_agent.ent_coef, fqf_agent.ent_coef) return