actor = 'EC' ntrials = 5000 # create environment env = gym.make(env_name) plt.close() # generate network if network_id == None: # generate parameters for network from environment observation shape params = nets.fc_params(env) network = nets.ActorCritic(params) else: network = torch.load(f'./Data/agents/load_agents/{network_id}.pt') memory = Memory.EpisodicMemory(cache_limit=400, entry_size=env.action_space.n) agent = Agent(network, memory=memory) if actor == 'MF': agent.get_action = agent.MF_action elif actor == 'EC': agent.get_action = agent.EC_action run = expt(agent, env) run.run(NUM_TRIALS=ntrials, NUM_EVENTS=250) run.record_log(f'{actor}', env_name, n_trials=ntrials)
params = nets.fc_params(env) params.lr = 0.005 params.hidden_types = ['linear'] params.hidden_dims = [40] params.temp = 1.1 network = nets.ActorCritic(params) else: network = torch.load(f=f'./Data/agents/load_agents/{network_id}.pt') memtemp = 1 memory = Memory.EpisodicMemory(cache_limit=400, entry_size=env.action_space.n, mem_temp=memtemp) agent = Agent(network, memory=memory) agent.get_action = agent.MF_action opt_values = np.zeros(env.nstates) reward_loc = env.twoD2oneD(list(env.rewards.keys())[0]) opt_values[reward_loc] = list(env.rewards.values())[0] for ind in reversed(range(len(opt_values) - 1)): opt_values[ind] = env.step_penalization + agent.gamma * opt_values[ind + 1] ntrials = 1000 nevents = 100 run = expt(agent, env) run.data['opt_values'] = opt_values run.run(NUM_TRIALS=ntrials, NUM_EVENTS=nevents) run.record_log(dir='../../../Data/',
pc_state_reps = {} oh_state_reps = {} for state in env.useable: oh_state_reps[env.twoD2oneD(state)] = one_hot_state(env.twoD2oneD(state)) pc_state_reps[env.twoD2oneD(state)] = place_cells.get_activities([state ])[0] place_cells.plot_placefields(env_states_to_map=env.useable) #oh_network = Network(input_dims=[input_dims],fc1_dims=200,fc2_dims=200,output_dims=env.action_space.n, lr=0.0005) #oh_network = torch.load(data_dir+f'agents/{load_id}.pt') #oh_agent = Agent(oh_network, state_representations=oh_state_reps) #pc_network = Network(input_dims=[input_dims],fc1_dims=200,fc2_dims=200,output_dims=env.action_space.n, lr=0.0005) pc_network = torch.load(data_dir + f'agents/{load_id}.pt') pc_agent = Agent(pc_network, state_representations=pc_state_reps, memory=memory) pc_agent.get_action = pc_agent.EC_action # retraining env.set_reward({(15, 15): 10}) ex = expt(pc_agent, env) ntrials = 2000 nsteps = 250 #ex.run(ntrials, nsteps, printfreq=1) #ex.data['place_cells'] = place_cells #ex.record_log('pc_episodic',env_name,ntrials,nsteps, dir=data_dir,file='ac_representation.csv') # save place cells
memory = EpisodicMemory(env.action_space.n, cache_limit=env.nstates) oh_state_reps = {} for state in env.useable: oh_state_reps[env.twoD2oneD(state)] = one_hot_state(env.twoD2oneD(state)) oh_network = Network(input_dims=[input_dims], fc1_dims=200, fc2_dims=200, output_dims=env.action_space.n, lr=0.0005) oh_network = torch.load(data_dir + f'agents/{load_id}.pt') oh_agent = Agent(oh_network, state_representations=oh_state_reps, memory=memory) oh_agent.get_action = oh_agent.EC_action # retraining env.set_reward({(15, 15): 10}) ex = expt(oh_agent, env) ntrials = 2000 nsteps = 250 ex.run(ntrials, nsteps, printfreq=1) ex.record_log('oh_episodic', env_name, ntrials, nsteps, dir=data_dir, file='ac_representation.csv') # save place cells