コード例 #1
0
# TODO: CHOICE TO ϕ DICTIONARY???

# Init the Agent's environment
env = Environment()

# Init the expert agent
# Feed it the expert trajectories
a = Agent(type='expert',
          action_list=['a', 'b', 'c', 'd', 'e', 'f', 'g'],
          environment=env,
          trajectories=[['a', 'b', 'c', 'e', 'g', 'b', 'c', 'e', 'g', 'c'],
                        ['a', 'b', 'c', 'a', 'g', 'g', 'a', 'g', 'g', 'c'],
                        ['c', 'd', 'f', 'b', 'c', 'a', 'd', 'f', 'b', 'c']])

# Build said expert trajectories
a.build_trajectories()

# Build the Agent's initial state distribution
a.build_D()

# Init a standalone environment for the state itself
simul_env = Environment()

# Init the simulation
sim = Simulation(agents=a, environment=simul_env, alpha=1)

# Need to initialize Q(s,a)

# This method will initalize a matrix of state-action pairs and their values (currently set to init all
# at 0). This  will build a matrix that represents all of the states that the expert agent has visited
sim.reset_q(trajectories=sim.agents['expert'].state_trajectories)
コード例 #2
0
# Feed it the expert trajectories
a = Agent(type='expert',
          action_list=e_action_list,
          environment=e_env,
          trajectories=e_trajs)

# a.environment._reset(action_list=a.action_list, attribute_based=True)
#
#
# a.environment._update_state(action='1', attribute_based=True)
#
# print(e_action_map['1'])
# print(a.environment.current_state)

# Build said expert trajectories
a.build_trajectories(attribute_based=True)

# # Build the Agent's initial state distribution
a.build_D()

# Building the feature expectations for the expert
# Init a standalone environment for the state itself
simul_env = Environment(attribute_mapping=e_action_map)

# Init the simulation
sim = Simulation(agents=a, environment=simul_env, alpha=.2)

# Computing the feature expectation of the expert
mu_e = sim.μ_estimate(trajectories=sim.agents['expert'].trajectories,
                      gamma=0.99)