env_name = 'gridworld:gridworld-v1' expt_type = 'MF' env = gym.make(env_name) # get parameters to specific Model Free Network params = basic_agent_params(env) # build network & memory modules network = nets.ActorCritic(params) memory = Memory(entry_size=params.action_dims, cache_limit=400) # construct agent instance with network and memory modules agent = Agent(network, memory=memory) # choose action controller if expt_type == 'EC': agent.get_action = agent.EC_action elif expt_type == 'MF': agent.get_action = agent.MF_action # instantiate experiment run = ex(agent, env) print(env.obstacles_list) #run.run(5000,250, printfreq=10, render=False) #run.record_log(expt_type, env_name, n_trials=5000, dir='../../Data/', file='test_environments.csv') ''' # show examples from state_obs = [] states = [] for i in range(env.nstates): state = np.zeros(env.nstates) state[i] = 1
# using gridworld and actor critic network architecture import gym from basic.modules.Agents.Networks import params as basic_agent_params import basic.modules.Agents.Networks as nets from basic.modules.Agents.EpisodicMemory import EpisodicMemory as Memory from basic.modules.Agents import Agent from basic.modules.Experiments import Experiment as ex import matplotlib.pyplot as plt import basic.modules.Utils.gridworld_plotting as gp # Make Environment to Test Agent in env = gym.make('gym_grid:gridworld-v1') params = basic_agent_params(env) network = nets.ActorCritic(params) memory = Memory(entry_size=params.action_dims, cache_limit=400) agent = Agent(network, memory=memory) agent.get_action = agent.MF_action run = ex(agent, env) run.run(100, 250, printfreq=10, render=False) fig, ax = plt.subplots(2, 1, sharex=True) ax[0].plot(run.data['total_reward']) ax[1].plot(run.data['loss'][0], label='p') ax[1].plot(run.data['loss'][1], label='v') ax[1].legend(bbox_to_anchor=(1.05, 0.95)) plt.show()