Ejemplo n.º 1
0
plt.close()
rep_types = {
    'unstructured': onehot,
    'random': random,
    'place_cell': place_cell,
    'structured': sr,
    'conv': convs
}
if rep == 'conv':
    # saved weights
    saved_network = torch.load(
        '../../Data/agents/6a956906-c06c-47ef-aad1-3593fb9068d1.pt')

    # load agent weights into new network
    network = nets.shallow_ActorCritic(input_dims=600,
                                       hidden_dims=400,
                                       output_dims=4,
                                       lr=5e-4)
    new_state_dict = {}
    for key in saved_network.keys():
        if key[0:6] == 'output':
            if key[7] == '0':
                new_key = 'pol' + key[8:]
                new_state_dict[new_key] = saved_network[key]
            elif key[7] == '1':
                new_key = 'val' + key[8:]
                new_state_dict[new_key] = saved_network[key]
        elif key[0:8] == 'hidden.5':
            new_key = 'hidden' + key[8:]
            new_state_dict[new_key] = saved_network[key]

    network.load_state_dict(new_state_dict)
Ejemplo n.º 2
0
# set up parameters
env_name   = 'gridworld:gridworld-v1' #v1 novel #v11 moved reward
network_id = None # '97b5f281-a60e-4738-895d-191a04edddd6'
actor      = 'EC'
ntrials    = 5000


# create environment
env = gym.make(env_name)
plt.close()

# generate network
if network_id == None:
    # generate parameters for network from environment observation shape
    params = nets.fc_params(env)
    network = nets.ActorCritic(params)
else:
    network = torch.load(f'./Data/agents/load_agents/{network_id}.pt')

memory = Memory.EpisodicMemory(cache_limit=400, entry_size=env.action_space.n)

agent = Agent(network, memory=memory)

if actor == 'MF':
    agent.get_action = agent.MF_action
elif actor == 'EC':
    agent.get_action = agent.EC_action

run = expt(agent, env)
run.run(NUM_TRIALS=ntrials, NUM_EVENTS=250)
Ejemplo n.º 3
0
directory = '../../Data/'  # ../../Data if you are in Tests/CH2
env_name = f'gridworld:gridworld-v{version}'
# make gym environment
env = gym.make(env_name)
plt.close()
print(env.rewards)

num_trials = 10
num_events = 250

# load in trained network
net, state_dict = load_network(version, rep_type, directory)

# load weights to head_ac network from previously learned agent
AC_head_agent = nets.flat_ActorCritic(400,
                                      output_dims=env.action_space.n,
                                      lr=learning_rate)
top_layer_dict = {}
top_layer_dict['pol.weight'] = state_dict['output.0.weight']
top_layer_dict['pol.bias'] = state_dict['output.0.bias']
top_layer_dict['val.weight'] = state_dict['output.1.weight']
top_layer_dict['val.bias'] = state_dict['output.1.bias']
AC_head_agent.load_state_dict(top_layer_dict)

# get state inputs
h0, h1 = get_net_activity(env_name, rep_type, net)
state_reps, representation_name, = h1, f'h1_{rep_type}_latents'

memory = None  #Memory.EpisodicMemory(cache_limit=cache_size_for_env, entry_size=env.action_space.n)

agent = Agent(AC_head_agent, memory=memory, state_representations=state_reps)
Ejemplo n.º 4
0
plt.close()
cache_size_for_env = int(len(env.useable) * (cache_size / 100))
print(env.rewards)
rep_types = {
    'onehot': onehot,
    'random': random,
    'place_cell': place_cell,
    'sr': sr,
    'latent': latents
}
state_reps, representation_name, input_dims, _ = rep_types[rep_type](env)

# load weights to head_ac network from previously learned agent
AC_head_agent = nets.fc_ActorCritic([input_dims],
                                    fc1_dims=200,
                                    fc2_dims=200,
                                    output_dims=env.action_space.n,
                                    lr=learning_rate)

memory = None  #Memory.EpisodicMemory(cache_limit=cache_size_for_env, entry_size=env.action_space.n)

agent = Agent(AC_head_agent, memory=memory, state_representations=state_reps)

run = expt(agent, env)
run.run(NUM_TRIALS=num_trials, NUM_EVENTS=num_events)
run.record_log(env_name,
               representation_name,
               num_trials,
               num_events,
               dir=directory,
               file=write_to_file)
Ejemplo n.º 5
0
env = gym.make(env_name)
plt.close()
cache_size_for_env = int(len(env.useable) * (cache_size / 100))
print(env.rewards)
rep_types = {
    'onehot': onehot,
    'random': random,
    'place_cell': place_cell,
    'sr': sr,
    'latent': latents
}
state_reps, representation_name, input_dims, _ = rep_types[rep_type](env)

# load weights to head_ac network from previously learned agent
AC_head_agent = nets.shallow_ActorCritic(input_dims,
                                         hidden_dims=200,
                                         output_dims=env.action_space.n,
                                         lr=learning_rate)

memory = None  #Memory.EpisodicMemory(cache_limit=cache_size_for_env, entry_size=env.action_space.n)

agent = Agent(AC_head_agent, memory=memory, state_representations=state_reps)
#from torchsummary import summary
#print(summary(AC_head_agent, (2,20,20)))

run = expt(agent, env)
run.run(NUM_TRIALS=num_trials, NUM_EVENTS=num_events)
run.record_log(env_name,
               representation_name,
               num_trials,
               num_events,
               dir=directory,
Ejemplo n.º 6
0
env = gym.make(env_name)
plt.close()
cache_size_for_env = int(len(env.useable) * (cache_size / 100))
print(env.rewards)
rep_types = {
    'onehot': onehot,
    'random': random,
    'place_cell': place_cell,
    'sr': sr,
    'latent': latents
}
state_reps, representation_name, input_dims, _ = rep_types[rep_type](env)

# load weights to head_ac network from previously learned agent
AC_head_agent = nets.flat_ActorCritic(input_dims,
                                      env.action_space.n,
                                      lr=learning_rate)

if load_from != None:
    AC_head_agent.load_state_dict(
        torch.load(directory + f'agents/{load_from}.pt'))
    print(f"weights loaded from {load_from}")

memory = Memory.EpisodicMemory(cache_limit=cache_size_for_env,
                               entry_size=env.action_space.n)

agent = Agent(AC_head_agent, memory=memory, state_representations=state_reps)

run = expt(agent, env)
run.run(NUM_TRIALS=num_trials, NUM_EVENTS=num_events)
run.record_log(env_name,
Ejemplo n.º 7
0

num_trials = 1000
num_events = 250

# make gym environment
env = gym.make(env_name)
plt.close()
cache_size_for_env = int(len(env.useable)*(cache_size/100))
print(env.rewards)
rep_types = {'onehot':onehot, 'random':random, 'place_cell':place_cell, 'sr':sr, 'latent':latents}
state_reps, representation_name, input_dims, _ = rep_types[rep_type](env)


# load weights to head_ac network from previously learned agent
AC_head_agent = nets.shallow_ActorCritic(input_dims, 200, env.action_space.n, lr=learning_rate)


if load_from != None:
    AC_head_agent.load_state_dict(torch.load(directory+f'agents/{load_from}.pt'))
    print(f"weights loaded from {load_from}")

memory = None#Memory.EpisodicMemory(cache_limit=cache_size_for_env, entry_size=env.action_space.n)

agent = Agent(AC_head_agent, memory=memory, state_representations=state_reps)


run = expt(agent, env)
run.run(NUM_TRIALS=num_trials, NUM_EVENTS=num_events)
print([(x, len(run.data[x])) for x in run.data.keys()])
run.record_log(env_name, representation_name,num_trials,num_events,dir=directory, file=write_to_file, load_from=load_from, extra=['randomwalk'])