actor = 'EC' ntrials = 5000 # create environment env = gym.make(env_name) plt.close() # generate network if network_id == None: # generate parameters for network from environment observation shape params = nets.fc_params(env) network = nets.ActorCritic(params) else: network = torch.load(f'./Data/agents/load_agents/{network_id}.pt') memory = Memory.EpisodicMemory(cache_limit=400, entry_size=env.action_space.n) agent = Agent(network, memory=memory) if actor == 'MF': agent.get_action = agent.MF_action elif actor == 'EC': agent.get_action = agent.EC_action run = expt(agent, env) run.run(NUM_TRIALS=ntrials, NUM_EVENTS=250) run.record_log(f'{actor}', env_name, n_trials=ntrials)
25: 96 }, 'gridworld:gridworld-v51': { 100: 286, 75: 214, 50: 143, 25: 71 } } cache_size_for_env = int(cache_limits[test_env][100] * (cache_size / 100)) memory = Memory(entry_size=env.action_space.n, cache_limit=cache_size_for_env, distance=distance_metric) # reinitalize agent with new network agent = Agent(network, memory, state_representations=latent_state_reps) #verify_env = gym.make(env_name) #ver_ex = expt(agent,verify_env) # expt - redefines logging function to keep track of network details ex = expt(agent, env) ex.run(num_trials, num_events) ex.record_log(env_name=test_env, representation_type=representation_name, n_trials=num_trials, n_steps=num_events, dir=relative_path_to_data, file=write_to_file, load_from=agent_id)
'gridworld:gridworld-v1': 'c34544ac-45ed-492c-b2eb-4431b403a3a8', 'gridworld:gridworld-v3': '32301262-cd74-4116-b776-57354831c484', 'gridworld:gridworld-v4': 'b50926a2-0186-4bb9-81ec-77063cac6861', 'gridworld:gridworld-v5': '15b5e27b-444f-4fc8-bf25-5b7807df4c7f' } run_id = conv_ids[f'gridworld:gridworld-v{version}'] agent_path = relative_path_to_data + f'agents/saved_agents/{run_id}.pt' state_reps, representation_name, input_dims, _ = latents(env, agent_path) else: state_reps, representation_name, input_dims, _ = rep_types[rep_type](env) AC_head_agent = head_AC(input_dims, env.action_space.n, lr=learning_rate) memory = Memory(entry_size=env.action_space.n, cache_limit=cache_size_for_env, distance=distance_metric) agent = Agent(AC_head_agent, memory=memory, state_representations=state_reps) ex = flat_expt(agent, env) print( f"Experiment running {env.unwrapped.spec.id} \nRepresentation: {representation_name} \nCache Limit:{cache_size_for_env} \nDistance: {distance_metric}" ) ex.run(num_trials, num_events, snapshot_logging=False) ex.record_log(env_name=test_env_name, representation_type=representation_name, n_trials=num_trials, n_steps=num_events, dir=relative_path_to_data, file=write_to_file)
num_events = 250 # valid representation types for this experiment rep_types = {'conv': convs, 'rwd_conv': reward_convs} param_set = {'conv': conv_PO_params, 'rwd_conv': conv_FO_params} # instantiate the environment for the experiment env = gym.make(env_name) plt.close() # get representation type, associated parameters to specify the network dimensions state_reps, representation_name, input_dims, _ = rep_types[ representation_type](env) params = param_set[representation_type] network_parameters = params(env) # make a new network instance network = Network(network_parameters, softmax_temp=1) # reinitalize agent with new network agent = Agent(network, memory=None, state_representations=state_reps) # expt - redefines logging function to keep track of network details ex = conv_expt(agent, env) ex.run(num_trials, num_events) ex.record_log(env_name=env_name, representation_type=representation_name, n_trials=num_trials, n_steps=num_events, dir=relative_path_to_data, file=write_to_file)
pc_state_reps = {} oh_state_reps = {} for state in env.useable: oh_state_reps[env.twoD2oneD(state)] = one_hot_state(env.twoD2oneD(state)) pc_state_reps[env.twoD2oneD(state)] = place_cells.get_activities([state ])[0] place_cells.plot_placefields(env_states_to_map=env.useable) #oh_network = Network(input_dims=[input_dims],fc1_dims=200,fc2_dims=200,output_dims=env.action_space.n, lr=0.0005) #oh_network = torch.load(data_dir+f'agents/{load_id}.pt') #oh_agent = Agent(oh_network, state_representations=oh_state_reps) #pc_network = Network(input_dims=[input_dims],fc1_dims=200,fc2_dims=200,output_dims=env.action_space.n, lr=0.0005) pc_network = torch.load(data_dir + f'agents/{load_id}.pt') pc_agent = Agent(pc_network, state_representations=pc_state_reps, memory=memory) pc_agent.get_action = pc_agent.EC_action # retraining env.set_reward({(15, 15): 10}) ex = expt(pc_agent, env) ntrials = 2000 nsteps = 250 #ex.run(ntrials, nsteps, printfreq=1) #ex.data['place_cells'] = place_cells #ex.record_log('pc_episodic',env_name,ntrials,nsteps, dir=data_dir,file='ac_representation.csv') # save place cells
## generate the environment object env = gym.make(env_name) plt.close() ## get state representations to be used state_reps, representation_name, input_dims, _ = rep_types[ representation_type](env) ## create an actor-critic network and associated agent network = Network(input_dims=[input_dims], fc1_dims=200, fc2_dims=200, output_dims=env.action_space.n, lr=0.0005) memory = Memory(entry_size=env.action_space.n, cache_limit=400, mem_temp=1) agent = Agent(network, state_representations=state_reps, memory=memory) # create an experiment class instance ex = expt(agent, env) ex.run(num_trials, num_events) ex.record_log(env_name=env_name, representation_type=representation_name, n_trials=num_trials, n_steps=num_events, dir='../../Data/', file=write_to_file) ''' # print results of training fig, ax = plt.subplots(2,1, sharex=True)
env = gym.make(env_name) plt.close() # generate network if network_id == None: # generate parameters for network from environment observation shape params = nets.fc_params(env) params.lr = 0.001 params.temp = 1.1 print(params.__dict__) network = nets.ActorCritic(params) else: network = torch.load(f=f'./Data/agents/load_agents/{network_id}.pt') memtemp = 1 memory = Memory.EpisodicMemory(cache_limit=400, entry_size=env.action_space.n, mem_temp=memtemp) agent = Agent(network, memory=memory) run = expt(agent, env) ntrials = 1000 run.run(NUM_TRIALS=ntrials, NUM_EVENTS=100) #run.record_log(f'mf_ec_t{memtemp}', env_name, n_trials=ntrials) smoothing = 10 plt.figure() plt.plot(rm(run.data['total_reward'], smoothing), c='k', alpha=0.5) if 'bootstrap_reward' in run.data.keys(): plt.plot(rm(run.data['bootstrap_reward'], smoothing), c='r') plt.show()
load_id = 'd80ea92c-422c-436a-b0ff-84673d43a30d' memory = EpisodicMemory(env.action_space.n, cache_limit=env.nstates) oh_state_reps = {} for state in env.useable: oh_state_reps[env.twoD2oneD(state)] = one_hot_state(env.twoD2oneD(state)) oh_network = Network(input_dims=[input_dims], fc1_dims=200, fc2_dims=200, output_dims=env.action_space.n, lr=0.0005) oh_network = torch.load(data_dir + f'agents/{load_id}.pt') oh_agent = Agent(oh_network, state_representations=oh_state_reps, memory=memory) oh_agent.get_action = oh_agent.EC_action # retraining env.set_reward({(15, 15): 10}) ex = expt(oh_agent, env) ntrials = 2000 nsteps = 250 ex.run(ntrials, nsteps, printfreq=1) ex.record_log('oh_episodic', env_name, ntrials, nsteps, dir=data_dir,
# generate network if network_id == None: # generate parameters for network from environment observation shape params = nets.fc_params(env) params.lr = 0.1 network = nets.ActorCritic(params) else: network = torch.load(f=f'./Data/agents/load_agents/{network_id}.pt') ## build a memory module that knows all the right actions memory = Memory.EpisodicMemory(cache_limit=400, entry_size=env.action_space.n, mem_temp=1) agent = Agent(network, memory=memory) agent.EC_storage = EC_pass run = expt(agent, env) for coord, rep in zip(run.sample_states, run.sample_reps): actions = correct_actions(coord) if len(actions) == 0: item = {} item['activity'] = tuple(rep) item['action'] = 0 item['delta'] = 0 item['timestamp'] = 0 item['trial'] = 0 item['readable'] = coord run.agent.EC.add_mem(item)
num_trials = 25000 num_events = 250 relative_path_to_data = '../../Data' # ../../Data if you are in Tests/CH2 # make gym environment env = gym.make(env_name) plt.close() rep_types = { 'onehot': onehot, 'random': random, 'place_cell': place_cell, 'sr': sr, 'latent': latents } state_reps, representation_name, input_dims, _ = rep_types[ representation_type](env) # load weights to head_ac network from previously learned agent AC_head_agent = head_AC(input_dims, test_env.action_space.n, lr=0.0005) agent = Agent(AC_head_agent, state_representations=state_reps) ex = flat_expt(agent, test_env) ex.run(num_trials, num_events, snapshot_logging=False) ex.record_log(env_name=test_env_name, representation_type=representation_name, n_trials=num_trials, n_steps=num_events, dir=relative_path_to_data, file=write_to_file)
# valid representation types for this experiment rep_types = {'conv':convs, 'rwd_conv':reward_convs} param_set = {'conv': conv_PO_params, 'rwd_conv': conv_FO_params} # instantiate the environment for the experiment env = gym.make(env_name) plt.close() # get representation type, associated parameters to specify the network dimensions state_reps, representation_name, input_dims, _ = rep_types[representation_type](env) params = param_set[representation_type] network_parameters = params(env) print(network_parameters.__dict__) # make a new network instance network = Network(network_parameters) #from torchsummary import summary #print(summary(network, (2,20,20))) # reinitalize agent with new network agent = Agent(network, state_representations=state_reps) # expt - redefines logging function to keep track of network details ex = conv_expt(agent, env) ex.run(num_trials,num_events) ex.record_log(env_name=env_name, representation_type=representation_name, n_trials=num_trials, n_steps=num_events, dir=relative_path_to_data, file=write_to_file)
place_cells = (pickle.load(f))['place_cells'] pc_state_reps = {} oh_state_reps = {} for state in env.useable: oh_state_reps[env.twoD2oneD(state)] = one_hot_state(env.twoD2oneD(state)) pc_state_reps[env.twoD2oneD(state)] = place_cells.get_activities([state ])[0] #oh_network = Network(input_dims=[input_dims],fc1_dims=200,fc2_dims=200,output_dims=env.action_space.n, lr=0.0005) #oh_network = torch.load(data_dir+f'agents/{load_id}.pt') #oh_agent = Agent(oh_network, state_representations=oh_state_reps) #pc_network = Network(input_dims=[input_dims],fc1_dims=200,fc2_dims=200,output_dims=env.action_space.n, lr=0.0005) pc_network = torch.load(data_dir + f'agents/{load_id}.pt') pc_agent = Agent(pc_network, state_representations=pc_state_reps) # retraining env.set_reward({(15, 15): 10}) ex = expt(pc_agent, env) ntrials = 10000 nsteps = 250 ex.run(ntrials, nsteps) ex.data['place_cells'] = place_cells ex.record_log('pc_retraining', env_name, ntrials, nsteps, dir=data_dir, file='ac_representation.csv')
import modules.Agents.Networks as nets from modules.Agents import Agent from Tests.agent_test import agent_test from Tests.representation_learn_test import rep_learning, latent_space_distance import matplotlib.pyplot as plt # create environment env = gym.make('gym_grid:gridworld-v1') plt.close() # generate parameters for network from environment observation shape params = nets.params(env) # generate network network = nets.ActorCritic(params) agent = Agent(network, memory=None) autoencoder, data, loss = rep_learning('onehot', env, n_samples=1000, training_cycles=500) states = [] for i in range(env.nstates): s = np.zeros((1, env.nstates)) s[0, i] = 1 states.append(s) actions = data[1][0:400] latent_states, _, __ = autoencoder(states, actions)
from modules.Agents import Agent from modules.Experiments import flat_expt sys.path.append('../../../') write_to_file = 'flat_ac_training.csv' version = 1 env_name = f'gridworld:gridworld-v{version}' representation_type = 'latent' num_trials = 5000 num_events = 250 # make gym environment env = gym.make(env_name) plt.close() state_reps, representation_name, input_dims, _ = rep_types[ representation_type](env) for _ in range(1): empty_net = head_AC(input_dims, env.action_space.n, lr=0.0005) memory = Memory(entry_size=4, cache_limit=400) agent = Agent(empty_net, memory, state_representations=state_reps) ex = flat_expt(agent, env) ex.run(num_trials, num_events, snapshot_logging=False) ex.record_log(env_name=env_name, representation_type=representation_name, n_trials=num_trials, n_steps=num_events, dir='./Data/', file=write_to_file)