env = gym.make(env_name) plt.close() cache_size_for_env = int(len(env.useable) * (cache_size / 100)) print(env.rewards) rep_types = { 'onehot': onehot, 'random': random, 'place_cell': place_cell, 'sr': sr, 'latent': latents } state_reps, representation_name, input_dims, _ = rep_types[rep_type](env) # load weights to head_ac network from previously learned agent AC_head_agent = nets.shallow_ActorCritic(input_dims, hidden_dims=200, output_dims=env.action_space.n, lr=learning_rate) memory = None #Memory.EpisodicMemory(cache_limit=cache_size_for_env, entry_size=env.action_space.n) agent = Agent(AC_head_agent, memory=memory, state_representations=state_reps) #from torchsummary import summary #print(summary(AC_head_agent, (2,20,20))) run = expt(agent, env) run.run(NUM_TRIALS=num_trials, NUM_EVENTS=num_events) run.record_log(env_name, representation_name, num_trials, num_events, dir=directory,
plt.close() rep_types = { 'unstructured': onehot, 'random': random, 'place_cell': place_cell, 'structured': sr, 'conv': convs } if rep == 'conv': # saved weights saved_network = torch.load( '../../Data/agents/6a956906-c06c-47ef-aad1-3593fb9068d1.pt') # load agent weights into new network network = nets.shallow_ActorCritic(input_dims=600, hidden_dims=400, output_dims=4, lr=5e-4) new_state_dict = {} for key in saved_network.keys(): if key[0:6] == 'output': if key[7] == '0': new_key = 'pol' + key[8:] new_state_dict[new_key] = saved_network[key] elif key[7] == '1': new_key = 'val' + key[8:] new_state_dict[new_key] = saved_network[key] elif key[0:8] == 'hidden.5': new_key = 'hidden' + key[8:] new_state_dict[new_key] = saved_network[key] network.load_state_dict(new_state_dict)
num_trials = 1000 num_events = 250 # make gym environment env = gym.make(env_name) plt.close() cache_size_for_env = int(len(env.useable)*(cache_size/100)) print(env.rewards) rep_types = {'onehot':onehot, 'random':random, 'place_cell':place_cell, 'sr':sr, 'latent':latents} state_reps, representation_name, input_dims, _ = rep_types[rep_type](env) # load weights to head_ac network from previously learned agent AC_head_agent = nets.shallow_ActorCritic(input_dims, 200, env.action_space.n, lr=learning_rate) if load_from != None: AC_head_agent.load_state_dict(torch.load(directory+f'agents/{load_from}.pt')) print(f"weights loaded from {load_from}") memory = None#Memory.EpisodicMemory(cache_limit=cache_size_for_env, entry_size=env.action_space.n) agent = Agent(AC_head_agent, memory=memory, state_representations=state_reps) run = expt(agent, env) run.run(NUM_TRIALS=num_trials, NUM_EVENTS=num_events) print([(x, len(run.data[x])) for x in run.data.keys()]) run.record_log(env_name, representation_name,num_trials,num_events,dir=directory, file=write_to_file, load_from=load_from, extra=['randomwalk'])