plt.close() rep_types = { 'unstructured': onehot, 'random': random, 'place_cell': place_cell, 'structured': sr, 'conv': convs } if rep == 'conv': # saved weights saved_network = torch.load( '../../Data/agents/6a956906-c06c-47ef-aad1-3593fb9068d1.pt') # load agent weights into new network network = nets.shallow_ActorCritic(input_dims=600, hidden_dims=400, output_dims=4, lr=5e-4) new_state_dict = {} for key in saved_network.keys(): if key[0:6] == 'output': if key[7] == '0': new_key = 'pol' + key[8:] new_state_dict[new_key] = saved_network[key] elif key[7] == '1': new_key = 'val' + key[8:] new_state_dict[new_key] = saved_network[key] elif key[0:8] == 'hidden.5': new_key = 'hidden' + key[8:] new_state_dict[new_key] = saved_network[key] network.load_state_dict(new_state_dict)
# set up parameters env_name = 'gridworld:gridworld-v1' #v1 novel #v11 moved reward network_id = None # '97b5f281-a60e-4738-895d-191a04edddd6' actor = 'EC' ntrials = 5000 # create environment env = gym.make(env_name) plt.close() # generate network if network_id == None: # generate parameters for network from environment observation shape params = nets.fc_params(env) network = nets.ActorCritic(params) else: network = torch.load(f'./Data/agents/load_agents/{network_id}.pt') memory = Memory.EpisodicMemory(cache_limit=400, entry_size=env.action_space.n) agent = Agent(network, memory=memory) if actor == 'MF': agent.get_action = agent.MF_action elif actor == 'EC': agent.get_action = agent.EC_action run = expt(agent, env) run.run(NUM_TRIALS=ntrials, NUM_EVENTS=250)
directory = '../../Data/' # ../../Data if you are in Tests/CH2 env_name = f'gridworld:gridworld-v{version}' # make gym environment env = gym.make(env_name) plt.close() print(env.rewards) num_trials = 10 num_events = 250 # load in trained network net, state_dict = load_network(version, rep_type, directory) # load weights to head_ac network from previously learned agent AC_head_agent = nets.flat_ActorCritic(400, output_dims=env.action_space.n, lr=learning_rate) top_layer_dict = {} top_layer_dict['pol.weight'] = state_dict['output.0.weight'] top_layer_dict['pol.bias'] = state_dict['output.0.bias'] top_layer_dict['val.weight'] = state_dict['output.1.weight'] top_layer_dict['val.bias'] = state_dict['output.1.bias'] AC_head_agent.load_state_dict(top_layer_dict) # get state inputs h0, h1 = get_net_activity(env_name, rep_type, net) state_reps, representation_name, = h1, f'h1_{rep_type}_latents' memory = None #Memory.EpisodicMemory(cache_limit=cache_size_for_env, entry_size=env.action_space.n) agent = Agent(AC_head_agent, memory=memory, state_representations=state_reps)
plt.close() cache_size_for_env = int(len(env.useable) * (cache_size / 100)) print(env.rewards) rep_types = { 'onehot': onehot, 'random': random, 'place_cell': place_cell, 'sr': sr, 'latent': latents } state_reps, representation_name, input_dims, _ = rep_types[rep_type](env) # load weights to head_ac network from previously learned agent AC_head_agent = nets.fc_ActorCritic([input_dims], fc1_dims=200, fc2_dims=200, output_dims=env.action_space.n, lr=learning_rate) memory = None #Memory.EpisodicMemory(cache_limit=cache_size_for_env, entry_size=env.action_space.n) agent = Agent(AC_head_agent, memory=memory, state_representations=state_reps) run = expt(agent, env) run.run(NUM_TRIALS=num_trials, NUM_EVENTS=num_events) run.record_log(env_name, representation_name, num_trials, num_events, dir=directory, file=write_to_file)
env = gym.make(env_name) plt.close() cache_size_for_env = int(len(env.useable) * (cache_size / 100)) print(env.rewards) rep_types = { 'onehot': onehot, 'random': random, 'place_cell': place_cell, 'sr': sr, 'latent': latents } state_reps, representation_name, input_dims, _ = rep_types[rep_type](env) # load weights to head_ac network from previously learned agent AC_head_agent = nets.shallow_ActorCritic(input_dims, hidden_dims=200, output_dims=env.action_space.n, lr=learning_rate) memory = None #Memory.EpisodicMemory(cache_limit=cache_size_for_env, entry_size=env.action_space.n) agent = Agent(AC_head_agent, memory=memory, state_representations=state_reps) #from torchsummary import summary #print(summary(AC_head_agent, (2,20,20))) run = expt(agent, env) run.run(NUM_TRIALS=num_trials, NUM_EVENTS=num_events) run.record_log(env_name, representation_name, num_trials, num_events, dir=directory,
env = gym.make(env_name) plt.close() cache_size_for_env = int(len(env.useable) * (cache_size / 100)) print(env.rewards) rep_types = { 'onehot': onehot, 'random': random, 'place_cell': place_cell, 'sr': sr, 'latent': latents } state_reps, representation_name, input_dims, _ = rep_types[rep_type](env) # load weights to head_ac network from previously learned agent AC_head_agent = nets.flat_ActorCritic(input_dims, env.action_space.n, lr=learning_rate) if load_from != None: AC_head_agent.load_state_dict( torch.load(directory + f'agents/{load_from}.pt')) print(f"weights loaded from {load_from}") memory = Memory.EpisodicMemory(cache_limit=cache_size_for_env, entry_size=env.action_space.n) agent = Agent(AC_head_agent, memory=memory, state_representations=state_reps) run = expt(agent, env) run.run(NUM_TRIALS=num_trials, NUM_EVENTS=num_events) run.record_log(env_name,
num_trials = 1000 num_events = 250 # make gym environment env = gym.make(env_name) plt.close() cache_size_for_env = int(len(env.useable)*(cache_size/100)) print(env.rewards) rep_types = {'onehot':onehot, 'random':random, 'place_cell':place_cell, 'sr':sr, 'latent':latents} state_reps, representation_name, input_dims, _ = rep_types[rep_type](env) # load weights to head_ac network from previously learned agent AC_head_agent = nets.shallow_ActorCritic(input_dims, 200, env.action_space.n, lr=learning_rate) if load_from != None: AC_head_agent.load_state_dict(torch.load(directory+f'agents/{load_from}.pt')) print(f"weights loaded from {load_from}") memory = None#Memory.EpisodicMemory(cache_limit=cache_size_for_env, entry_size=env.action_space.n) agent = Agent(AC_head_agent, memory=memory, state_representations=state_reps) run = expt(agent, env) run.run(NUM_TRIALS=num_trials, NUM_EVENTS=num_events) print([(x, len(run.data[x])) for x in run.data.keys()]) run.record_log(env_name, representation_name,num_trials,num_events,dir=directory, file=write_to_file, load_from=load_from, extra=['randomwalk'])