예제 #1
0
env = gym.make(env_name)
plt.close()
cache_size_for_env = int(len(env.useable) * (cache_size / 100))
print(env.rewards)
rep_types = {
    'onehot': onehot,
    'random': random,
    'place_cell': place_cell,
    'sr': sr,
    'latent': latents
}
state_reps, representation_name, input_dims, _ = rep_types[rep_type](env)

# load weights to head_ac network from previously learned agent
AC_head_agent = nets.shallow_ActorCritic(input_dims,
                                         hidden_dims=200,
                                         output_dims=env.action_space.n,
                                         lr=learning_rate)

memory = None  #Memory.EpisodicMemory(cache_limit=cache_size_for_env, entry_size=env.action_space.n)

agent = Agent(AC_head_agent, memory=memory, state_representations=state_reps)
#from torchsummary import summary
#print(summary(AC_head_agent, (2,20,20)))

run = expt(agent, env)
run.run(NUM_TRIALS=num_trials, NUM_EVENTS=num_events)
run.record_log(env_name,
               representation_name,
               num_trials,
               num_events,
               dir=directory,
예제 #2
0
plt.close()
rep_types = {
    'unstructured': onehot,
    'random': random,
    'place_cell': place_cell,
    'structured': sr,
    'conv': convs
}
if rep == 'conv':
    # saved weights
    saved_network = torch.load(
        '../../Data/agents/6a956906-c06c-47ef-aad1-3593fb9068d1.pt')

    # load agent weights into new network
    network = nets.shallow_ActorCritic(input_dims=600,
                                       hidden_dims=400,
                                       output_dims=4,
                                       lr=5e-4)
    new_state_dict = {}
    for key in saved_network.keys():
        if key[0:6] == 'output':
            if key[7] == '0':
                new_key = 'pol' + key[8:]
                new_state_dict[new_key] = saved_network[key]
            elif key[7] == '1':
                new_key = 'val' + key[8:]
                new_state_dict[new_key] = saved_network[key]
        elif key[0:8] == 'hidden.5':
            new_key = 'hidden' + key[8:]
            new_state_dict[new_key] = saved_network[key]

    network.load_state_dict(new_state_dict)
예제 #3
0

num_trials = 1000
num_events = 250

# make gym environment
env = gym.make(env_name)
plt.close()
cache_size_for_env = int(len(env.useable)*(cache_size/100))
print(env.rewards)
rep_types = {'onehot':onehot, 'random':random, 'place_cell':place_cell, 'sr':sr, 'latent':latents}
state_reps, representation_name, input_dims, _ = rep_types[rep_type](env)


# load weights to head_ac network from previously learned agent
AC_head_agent = nets.shallow_ActorCritic(input_dims, 200, env.action_space.n, lr=learning_rate)


if load_from != None:
    AC_head_agent.load_state_dict(torch.load(directory+f'agents/{load_from}.pt'))
    print(f"weights loaded from {load_from}")

memory = None#Memory.EpisodicMemory(cache_limit=cache_size_for_env, entry_size=env.action_space.n)

agent = Agent(AC_head_agent, memory=memory, state_representations=state_reps)


run = expt(agent, env)
run.run(NUM_TRIALS=num_trials, NUM_EVENTS=num_events)
print([(x, len(run.data[x])) for x in run.data.keys()])
run.record_log(env_name, representation_name,num_trials,num_events,dir=directory, file=write_to_file, load_from=load_from, extra=['randomwalk'])