actor = 'EC' ntrials = 5000 # create environment env = gym.make(env_name) plt.close() # generate network if network_id == None: # generate parameters for network from environment observation shape params = nets.fc_params(env) network = nets.ActorCritic(params) else: network = torch.load(f'./Data/agents/load_agents/{network_id}.pt') memory = Memory.EpisodicMemory(cache_limit=400, entry_size=env.action_space.n) agent = Agent(network, memory=memory) if actor == 'MF': agent.get_action = agent.MF_action elif actor == 'EC': agent.get_action = agent.EC_action run = expt(agent, env) run.run(NUM_TRIALS=ntrials, NUM_EVENTS=250) run.record_log(f'{actor}', env_name, n_trials=ntrials)
25: 96 }, 'gridworld:gridworld-v51': { 100: 286, 75: 214, 50: 143, 25: 71 } } cache_size_for_env = int(cache_limits[test_env][100] * (cache_size / 100)) memory = Memory(entry_size=env.action_space.n, cache_limit=cache_size_for_env, distance=distance_metric) # reinitalize agent with new network agent = Agent(network, memory, state_representations=latent_state_reps) #verify_env = gym.make(env_name) #ver_ex = expt(agent,verify_env) # expt - redefines logging function to keep track of network details ex = expt(agent, env) ex.run(num_trials, num_events) ex.record_log(env_name=test_env, representation_type=representation_name, n_trials=num_trials, n_steps=num_events, dir=relative_path_to_data, file=write_to_file, load_from=agent_id)
pc_state_reps = {} oh_state_reps = {} for state in env.useable: oh_state_reps[env.twoD2oneD(state)] = one_hot_state(env.twoD2oneD(state)) pc_state_reps[env.twoD2oneD(state)] = place_cells.get_activities([state ])[0] place_cells.plot_placefields(env_states_to_map=env.useable) #oh_network = Network(input_dims=[input_dims],fc1_dims=200,fc2_dims=200,output_dims=env.action_space.n, lr=0.0005) #oh_network = torch.load(data_dir+f'agents/{load_id}.pt') #oh_agent = Agent(oh_network, state_representations=oh_state_reps) #pc_network = Network(input_dims=[input_dims],fc1_dims=200,fc2_dims=200,output_dims=env.action_space.n, lr=0.0005) pc_network = torch.load(data_dir + f'agents/{load_id}.pt') pc_agent = Agent(pc_network, state_representations=pc_state_reps, memory=memory) pc_agent.get_action = pc_agent.EC_action # retraining env.set_reward({(15, 15): 10}) ex = expt(pc_agent, env) ntrials = 2000 nsteps = 250 #ex.run(ntrials, nsteps, printfreq=1) #ex.data['place_cells'] = place_cells #ex.record_log('pc_episodic',env_name,ntrials,nsteps, dir=data_dir,file='ac_representation.csv') # save place cells
top_layer_dict = {} top_layer_dict['pol.weight'] = state_dict['output.0.weight'] top_layer_dict['pol.bias'] = state_dict['output.0.bias'] top_layer_dict['val.weight'] = state_dict['output.1.weight'] top_layer_dict['val.bias'] = state_dict['output.1.bias'] AC_head_agent.load_state_dict(top_layer_dict) # get state inputs h0, h1 = get_net_activity(env_name, rep_type, net) state_reps, representation_name, = h1, f'h1_{rep_type}_latents' memory = None #Memory.EpisodicMemory(cache_limit=cache_size_for_env, entry_size=env.action_space.n) agent = Agent(AC_head_agent, memory=memory, state_representations=state_reps) run = expt(agent, env) run.run(NUM_TRIALS=num_trials, NUM_EVENTS=num_events) test_env_name = env_name + '1' test_env = gym.make(test_env_name) plt.close() print(test_env.rewards) # get test env state inputs h0, h1 = get_net_activity(test_env_name, rep_type, net) state_reps, representation_name, = h1, f'h1_{rep_type}_latents' # update agent with new state_reps agent.state_reps = state_reps test_run = expt(agent, test_env) test_run.data = run.data test_run.run(NUM_TRIALS=num_trials * 2, NUM_EVENTS=num_events)
memory = Memory.EpisodicMemory(cache_limit=400, entry_size=env.action_space.n, mem_temp=memtemp) agent = Agent(network, memory) #agent.get_action = agent.MF_action opt_values = np.zeros(env.nstates) reward_loc = env.twoD2oneD(list(env.rewards.keys())[0]) opt_values[reward_loc] = list(env.rewards.values())[0] for ind in reversed(range(len(opt_values) - 1)): opt_values[ind] = env.step_penalization + agent.gamma * opt_values[ind + 1] ntrials = 1000 nevents = 50 run = expt(environment=env, agent=agent) run.run(ntrials, nevents, printfreq=10) run.data['opt_values'] = opt_values run.record_log(dir='../../../Data/', file='linear_track.csv', expt_type=f'{type(run).__name__}_lr{params.lr}', env_name=env_name, n_trials=ntrials, n_steps=nevents, extra=[params.lr]) #plt.plot(rm(run.data['total_reward'],10)) #plt.show()
oh_state_reps = {} for state in env.useable: oh_state_reps[env.twoD2oneD(state)] = one_hot_state(env.twoD2oneD(state)) oh_network = Network(input_dims=[input_dims], fc1_dims=200, fc2_dims=200, output_dims=env.action_space.n, lr=0.0005) oh_network = torch.load(data_dir + f'agents/{load_id}.pt') oh_agent = Agent(oh_network, state_representations=oh_state_reps, memory=memory) oh_agent.get_action = oh_agent.EC_action # retraining env.set_reward({(15, 15): 10}) ex = expt(oh_agent, env) ntrials = 2000 nsteps = 250 ex.run(ntrials, nsteps, printfreq=1) ex.record_log('oh_episodic', env_name, ntrials, nsteps, dir=data_dir, file='ac_representation.csv') # save place cells