# TODO: CHOICE TO ϕ DICTIONARY??? # Init the Agent's environment env = Environment() # Init the expert agent # Feed it the expert trajectories a = Agent(type='expert', action_list=['a', 'b', 'c', 'd', 'e', 'f', 'g'], environment=env, trajectories=[['a', 'b', 'c', 'e', 'g', 'b', 'c', 'e', 'g', 'c'], ['a', 'b', 'c', 'a', 'g', 'g', 'a', 'g', 'g', 'c'], ['c', 'd', 'f', 'b', 'c', 'a', 'd', 'f', 'b', 'c']]) # Build said expert trajectories a.build_trajectories() # Build the Agent's initial state distribution a.build_D() # Init a standalone environment for the state itself simul_env = Environment() # Init the simulation sim = Simulation(agents=a, environment=simul_env, alpha=1) # Need to initialize Q(s,a) # This method will initalize a matrix of state-action pairs and their values (currently set to init all # at 0). This will build a matrix that represents all of the states that the expert agent has visited sim.reset_q(trajectories=sim.agents['expert'].state_trajectories)
# Feed it the expert trajectories a = Agent(type='expert', action_list=e_action_list, environment=e_env, trajectories=e_trajs) # a.environment._reset(action_list=a.action_list, attribute_based=True) # # # a.environment._update_state(action='1', attribute_based=True) # # print(e_action_map['1']) # print(a.environment.current_state) # Build said expert trajectories a.build_trajectories(attribute_based=True) # # Build the Agent's initial state distribution a.build_D() # Building the feature expectations for the expert # Init a standalone environment for the state itself simul_env = Environment(attribute_mapping=e_action_map) # Init the simulation sim = Simulation(agents=a, environment=simul_env, alpha=.2) # Computing the feature expectation of the expert mu_e = sim.μ_estimate(trajectories=sim.agents['expert'].trajectories, gamma=0.99)