Esempio n. 1
0
env_name = 'gridworld:gridworld-v1'
expt_type = 'MF'
env = gym.make(env_name)

# get parameters to specific Model Free Network
params = basic_agent_params(env)
# build network & memory modules
network = nets.ActorCritic(params)
memory = Memory(entry_size=params.action_dims, cache_limit=400)

# construct agent instance with network and memory modules
agent = Agent(network, memory=memory)

# choose action controller
if expt_type == 'EC':
    agent.get_action = agent.EC_action
elif expt_type == 'MF':
    agent.get_action = agent.MF_action

# instantiate experiment
run = ex(agent, env)
print(env.obstacles_list)
#run.run(5000,250, printfreq=10, render=False)
#run.record_log(expt_type, env_name, n_trials=5000, dir='../../Data/', file='test_environments.csv')
'''
# show examples from
state_obs = []
states = []
for i in range(env.nstates):
    state = np.zeros(env.nstates)
    state[i] = 1
Esempio n. 2
0
# using gridworld and actor critic network architecture
import gym
from basic.modules.Agents.Networks import params as basic_agent_params

import basic.modules.Agents.Networks as nets
from basic.modules.Agents.EpisodicMemory import EpisodicMemory as Memory
from basic.modules.Agents import Agent
from basic.modules.Experiments import Experiment as ex

import matplotlib.pyplot as plt
import basic.modules.Utils.gridworld_plotting as gp

# Make Environment to Test Agent in
env = gym.make('gym_grid:gridworld-v1')

params = basic_agent_params(env)
network = nets.ActorCritic(params)
memory = Memory(entry_size=params.action_dims, cache_limit=400)
agent = Agent(network, memory=memory)
agent.get_action = agent.MF_action

run = ex(agent, env)

run.run(100, 250, printfreq=10, render=False)

fig, ax = plt.subplots(2, 1, sharex=True)
ax[0].plot(run.data['total_reward'])
ax[1].plot(run.data['loss'][0], label='p')
ax[1].plot(run.data['loss'][1], label='v')
ax[1].legend(bbox_to_anchor=(1.05, 0.95))
plt.show()