예제 #1
0
actor      = 'EC'
ntrials    = 5000


# create environment
env = gym.make(env_name)
plt.close()

# generate network
if network_id == None:
    # generate parameters for network from environment observation shape
    params = nets.fc_params(env)
    network = nets.ActorCritic(params)
else:
    network = torch.load(f'./Data/agents/load_agents/{network_id}.pt')

memory = Memory.EpisodicMemory(cache_limit=400, entry_size=env.action_space.n)

agent = Agent(network, memory=memory)

if actor == 'MF':
    agent.get_action = agent.MF_action
elif actor == 'EC':
    agent.get_action = agent.EC_action

run = expt(agent, env)
run.run(NUM_TRIALS=ntrials, NUM_EVENTS=250)
run.record_log(f'{actor}', env_name, n_trials=ntrials)


예제 #2
0
    params = nets.fc_params(env)
    params.lr = 0.005
    params.hidden_types = ['linear']
    params.hidden_dims = [40]
    params.temp = 1.1
    network = nets.ActorCritic(params)
else:
    network = torch.load(f=f'./Data/agents/load_agents/{network_id}.pt')

memtemp = 1
memory = Memory.EpisodicMemory(cache_limit=400,
                               entry_size=env.action_space.n,
                               mem_temp=memtemp)

agent = Agent(network, memory=memory)
agent.get_action = agent.MF_action

opt_values = np.zeros(env.nstates)
reward_loc = env.twoD2oneD(list(env.rewards.keys())[0])
opt_values[reward_loc] = list(env.rewards.values())[0]

for ind in reversed(range(len(opt_values) - 1)):
    opt_values[ind] = env.step_penalization + agent.gamma * opt_values[ind + 1]

ntrials = 1000
nevents = 100
run = expt(agent, env)
run.data['opt_values'] = opt_values

run.run(NUM_TRIALS=ntrials, NUM_EVENTS=nevents)
run.record_log(dir='../../../Data/',
pc_state_reps = {}
oh_state_reps = {}
for state in env.useable:
    oh_state_reps[env.twoD2oneD(state)] = one_hot_state(env.twoD2oneD(state))
    pc_state_reps[env.twoD2oneD(state)] = place_cells.get_activities([state
                                                                      ])[0]

place_cells.plot_placefields(env_states_to_map=env.useable)

#oh_network = Network(input_dims=[input_dims],fc1_dims=200,fc2_dims=200,output_dims=env.action_space.n, lr=0.0005)
#oh_network = torch.load(data_dir+f'agents/{load_id}.pt')
#oh_agent = Agent(oh_network, state_representations=oh_state_reps)

#pc_network = Network(input_dims=[input_dims],fc1_dims=200,fc2_dims=200,output_dims=env.action_space.n, lr=0.0005)
pc_network = torch.load(data_dir + f'agents/{load_id}.pt')
pc_agent = Agent(pc_network,
                 state_representations=pc_state_reps,
                 memory=memory)
pc_agent.get_action = pc_agent.EC_action

# retraining
env.set_reward({(15, 15): 10})

ex = expt(pc_agent, env)
ntrials = 2000
nsteps = 250
#ex.run(ntrials, nsteps, printfreq=1)
#ex.data['place_cells'] = place_cells
#ex.record_log('pc_episodic',env_name,ntrials,nsteps, dir=data_dir,file='ac_representation.csv')
# save place cells
예제 #4
0
memory = EpisodicMemory(env.action_space.n, cache_limit=env.nstates)

oh_state_reps = {}
for state in env.useable:
    oh_state_reps[env.twoD2oneD(state)] = one_hot_state(env.twoD2oneD(state))

oh_network = Network(input_dims=[input_dims],
                     fc1_dims=200,
                     fc2_dims=200,
                     output_dims=env.action_space.n,
                     lr=0.0005)
oh_network = torch.load(data_dir + f'agents/{load_id}.pt')
oh_agent = Agent(oh_network,
                 state_representations=oh_state_reps,
                 memory=memory)
oh_agent.get_action = oh_agent.EC_action

# retraining
env.set_reward({(15, 15): 10})

ex = expt(oh_agent, env)
ntrials = 2000
nsteps = 250
ex.run(ntrials, nsteps, printfreq=1)
ex.record_log('oh_episodic',
              env_name,
              ntrials,
              nsteps,
              dir=data_dir,
              file='ac_representation.csv')
# save place cells