예제 #1
0
actor      = 'EC'
ntrials    = 5000


# create environment
env = gym.make(env_name)
plt.close()

# generate network
if network_id == None:
    # generate parameters for network from environment observation shape
    params = nets.fc_params(env)
    network = nets.ActorCritic(params)
else:
    network = torch.load(f'./Data/agents/load_agents/{network_id}.pt')

memory = Memory.EpisodicMemory(cache_limit=400, entry_size=env.action_space.n)

agent = Agent(network, memory=memory)

if actor == 'MF':
    agent.get_action = agent.MF_action
elif actor == 'EC':
    agent.get_action = agent.EC_action

run = expt(agent, env)
run.run(NUM_TRIALS=ntrials, NUM_EVENTS=250)
run.record_log(f'{actor}', env_name, n_trials=ntrials)


예제 #2
0
        25: 96
    },
    'gridworld:gridworld-v51': {
        100: 286,
        75: 214,
        50: 143,
        25: 71
    }
}
cache_size_for_env = int(cache_limits[test_env][100] * (cache_size / 100))
memory = Memory(entry_size=env.action_space.n,
                cache_limit=cache_size_for_env,
                distance=distance_metric)

# reinitalize agent with new network
agent = Agent(network, memory, state_representations=latent_state_reps)

#verify_env = gym.make(env_name)
#ver_ex = expt(agent,verify_env)

# expt - redefines logging function to keep track of network details
ex = expt(agent, env)
ex.run(num_trials, num_events)
ex.record_log(env_name=test_env,
              representation_type=representation_name,
              n_trials=num_trials,
              n_steps=num_events,
              dir=relative_path_to_data,
              file=write_to_file,
              load_from=agent_id)
예제 #3
0
        'gridworld:gridworld-v1': 'c34544ac-45ed-492c-b2eb-4431b403a3a8',
        'gridworld:gridworld-v3': '32301262-cd74-4116-b776-57354831c484',
        'gridworld:gridworld-v4': 'b50926a2-0186-4bb9-81ec-77063cac6861',
        'gridworld:gridworld-v5': '15b5e27b-444f-4fc8-bf25-5b7807df4c7f'
    }
    run_id = conv_ids[f'gridworld:gridworld-v{version}']
    agent_path = relative_path_to_data + f'agents/saved_agents/{run_id}.pt'
    state_reps, representation_name, input_dims, _ = latents(env, agent_path)
else:
    state_reps, representation_name, input_dims, _ = rep_types[rep_type](env)

AC_head_agent = head_AC(input_dims, env.action_space.n, lr=learning_rate)

memory = Memory(entry_size=env.action_space.n,
                cache_limit=cache_size_for_env,
                distance=distance_metric)

agent = Agent(AC_head_agent, memory=memory, state_representations=state_reps)

ex = flat_expt(agent, env)
print(
    f"Experiment running {env.unwrapped.spec.id} \nRepresentation: {representation_name} \nCache Limit:{cache_size_for_env} \nDistance: {distance_metric}"
)
ex.run(num_trials, num_events, snapshot_logging=False)
ex.record_log(env_name=test_env_name,
              representation_type=representation_name,
              n_trials=num_trials,
              n_steps=num_events,
              dir=relative_path_to_data,
              file=write_to_file)
예제 #4
0
num_events = 250

# valid representation types for this experiment
rep_types = {'conv': convs, 'rwd_conv': reward_convs}
param_set = {'conv': conv_PO_params, 'rwd_conv': conv_FO_params}

# instantiate the environment for the experiment
env = gym.make(env_name)
plt.close()

# get representation type, associated parameters to specify the network dimensions
state_reps, representation_name, input_dims, _ = rep_types[
    representation_type](env)
params = param_set[representation_type]
network_parameters = params(env)

# make a new network instance
network = Network(network_parameters, softmax_temp=1)
# reinitalize agent with new network
agent = Agent(network, memory=None, state_representations=state_reps)

# expt - redefines logging function to keep track of network details
ex = conv_expt(agent, env)
ex.run(num_trials, num_events)
ex.record_log(env_name=env_name,
              representation_type=representation_name,
              n_trials=num_trials,
              n_steps=num_events,
              dir=relative_path_to_data,
              file=write_to_file)
pc_state_reps = {}
oh_state_reps = {}
for state in env.useable:
    oh_state_reps[env.twoD2oneD(state)] = one_hot_state(env.twoD2oneD(state))
    pc_state_reps[env.twoD2oneD(state)] = place_cells.get_activities([state
                                                                      ])[0]

place_cells.plot_placefields(env_states_to_map=env.useable)

#oh_network = Network(input_dims=[input_dims],fc1_dims=200,fc2_dims=200,output_dims=env.action_space.n, lr=0.0005)
#oh_network = torch.load(data_dir+f'agents/{load_id}.pt')
#oh_agent = Agent(oh_network, state_representations=oh_state_reps)

#pc_network = Network(input_dims=[input_dims],fc1_dims=200,fc2_dims=200,output_dims=env.action_space.n, lr=0.0005)
pc_network = torch.load(data_dir + f'agents/{load_id}.pt')
pc_agent = Agent(pc_network,
                 state_representations=pc_state_reps,
                 memory=memory)
pc_agent.get_action = pc_agent.EC_action

# retraining
env.set_reward({(15, 15): 10})

ex = expt(pc_agent, env)
ntrials = 2000
nsteps = 250
#ex.run(ntrials, nsteps, printfreq=1)
#ex.data['place_cells'] = place_cells
#ex.record_log('pc_episodic',env_name,ntrials,nsteps, dir=data_dir,file='ac_representation.csv')
# save place cells
예제 #6
0
파일: fc_training.py 프로젝트: annikc/MEMRL
    ## generate the environment object
    env = gym.make(env_name)
    plt.close()

    ## get state representations to be used
    state_reps, representation_name, input_dims, _ = rep_types[
        representation_type](env)

    ## create an actor-critic network and associated agent
    network = Network(input_dims=[input_dims],
                      fc1_dims=200,
                      fc2_dims=200,
                      output_dims=env.action_space.n,
                      lr=0.0005)
    memory = Memory(entry_size=env.action_space.n, cache_limit=400, mem_temp=1)
    agent = Agent(network, state_representations=state_reps, memory=memory)

    # create an experiment class instance
    ex = expt(agent, env)

    ex.run(num_trials, num_events)

    ex.record_log(env_name=env_name,
                  representation_type=representation_name,
                  n_trials=num_trials,
                  n_steps=num_events,
                  dir='../../Data/',
                  file=write_to_file)
'''
# print results of training
fig, ax = plt.subplots(2,1, sharex=True)
예제 #7
0
env = gym.make(env_name)
plt.close()

# generate network
if network_id == None:
    # generate parameters for network from environment observation shape
    params = nets.fc_params(env)
    params.lr = 0.001
    params.temp = 1.1
    print(params.__dict__)
    network = nets.ActorCritic(params)
else:
    network = torch.load(f=f'./Data/agents/load_agents/{network_id}.pt')
memtemp = 1
memory = Memory.EpisodicMemory(cache_limit=400,
                               entry_size=env.action_space.n,
                               mem_temp=memtemp)

agent = Agent(network, memory=memory)

run = expt(agent, env)
ntrials = 1000
run.run(NUM_TRIALS=ntrials, NUM_EVENTS=100)
#run.record_log(f'mf_ec_t{memtemp}', env_name, n_trials=ntrials)

smoothing = 10
plt.figure()
plt.plot(rm(run.data['total_reward'], smoothing), c='k', alpha=0.5)
if 'bootstrap_reward' in run.data.keys():
    plt.plot(rm(run.data['bootstrap_reward'], smoothing), c='r')
plt.show()
예제 #8
0
load_id = 'd80ea92c-422c-436a-b0ff-84673d43a30d'

memory = EpisodicMemory(env.action_space.n, cache_limit=env.nstates)

oh_state_reps = {}
for state in env.useable:
    oh_state_reps[env.twoD2oneD(state)] = one_hot_state(env.twoD2oneD(state))

oh_network = Network(input_dims=[input_dims],
                     fc1_dims=200,
                     fc2_dims=200,
                     output_dims=env.action_space.n,
                     lr=0.0005)
oh_network = torch.load(data_dir + f'agents/{load_id}.pt')
oh_agent = Agent(oh_network,
                 state_representations=oh_state_reps,
                 memory=memory)
oh_agent.get_action = oh_agent.EC_action

# retraining
env.set_reward({(15, 15): 10})

ex = expt(oh_agent, env)
ntrials = 2000
nsteps = 250
ex.run(ntrials, nsteps, printfreq=1)
ex.record_log('oh_episodic',
              env_name,
              ntrials,
              nsteps,
              dir=data_dir,
예제 #9
0
# generate network
if network_id == None:
    # generate parameters for network from environment observation shape
    params = nets.fc_params(env)
    params.lr = 0.1
    network = nets.ActorCritic(params)
else:
    network = torch.load(f=f'./Data/agents/load_agents/{network_id}.pt')

## build a memory module that knows all the right actions
memory = Memory.EpisodicMemory(cache_limit=400,
                               entry_size=env.action_space.n,
                               mem_temp=1)

agent = Agent(network, memory=memory)
agent.EC_storage = EC_pass

run = expt(agent, env)

for coord, rep in zip(run.sample_states, run.sample_reps):
    actions = correct_actions(coord)
    if len(actions) == 0:
        item = {}
        item['activity'] = tuple(rep)
        item['action'] = 0
        item['delta'] = 0
        item['timestamp'] = 0
        item['trial'] = 0
        item['readable'] = coord
        run.agent.EC.add_mem(item)
예제 #10
0
num_trials = 25000
num_events = 250
relative_path_to_data = '../../Data'  # ../../Data if you are in Tests/CH2

# make gym environment
env = gym.make(env_name)
plt.close()

rep_types = {
    'onehot': onehot,
    'random': random,
    'place_cell': place_cell,
    'sr': sr,
    'latent': latents
}
state_reps, representation_name, input_dims, _ = rep_types[
    representation_type](env)

# load weights to head_ac network from previously learned agent
AC_head_agent = head_AC(input_dims, test_env.action_space.n, lr=0.0005)

agent = Agent(AC_head_agent, state_representations=state_reps)

ex = flat_expt(agent, test_env)
ex.run(num_trials, num_events, snapshot_logging=False)
ex.record_log(env_name=test_env_name,
              representation_type=representation_name,
              n_trials=num_trials,
              n_steps=num_events,
              dir=relative_path_to_data,
              file=write_to_file)
예제 #11
0
# valid representation types for this experiment
rep_types = {'conv':convs, 'rwd_conv':reward_convs}
param_set = {'conv': conv_PO_params, 'rwd_conv': conv_FO_params}

# instantiate the environment for the experiment
env = gym.make(env_name)
plt.close()

# get representation type, associated parameters to specify the network dimensions
state_reps, representation_name, input_dims, _ = rep_types[representation_type](env)
params = param_set[representation_type]
network_parameters = params(env)
print(network_parameters.__dict__)

# make a new network instance
network = Network(network_parameters)
#from torchsummary import summary
#print(summary(network, (2,20,20)))

# reinitalize agent with new network
agent = Agent(network, state_representations=state_reps)

# expt - redefines logging function to keep track of network details
ex = conv_expt(agent, env)
ex.run(num_trials,num_events)
ex.record_log(env_name=env_name, representation_type=representation_name,
                  n_trials=num_trials, n_steps=num_events,
                  dir=relative_path_to_data, file=write_to_file)

예제 #12
0
    place_cells = (pickle.load(f))['place_cells']

pc_state_reps = {}
oh_state_reps = {}
for state in env.useable:
    oh_state_reps[env.twoD2oneD(state)] = one_hot_state(env.twoD2oneD(state))
    pc_state_reps[env.twoD2oneD(state)] = place_cells.get_activities([state
                                                                      ])[0]

#oh_network = Network(input_dims=[input_dims],fc1_dims=200,fc2_dims=200,output_dims=env.action_space.n, lr=0.0005)
#oh_network = torch.load(data_dir+f'agents/{load_id}.pt')
#oh_agent = Agent(oh_network, state_representations=oh_state_reps)

#pc_network = Network(input_dims=[input_dims],fc1_dims=200,fc2_dims=200,output_dims=env.action_space.n, lr=0.0005)
pc_network = torch.load(data_dir + f'agents/{load_id}.pt')
pc_agent = Agent(pc_network, state_representations=pc_state_reps)

# retraining
env.set_reward({(15, 15): 10})

ex = expt(pc_agent, env)
ntrials = 10000
nsteps = 250
ex.run(ntrials, nsteps)
ex.data['place_cells'] = place_cells
ex.record_log('pc_retraining',
              env_name,
              ntrials,
              nsteps,
              dir=data_dir,
              file='ac_representation.csv')
예제 #13
0
import modules.Agents.Networks as nets
from modules.Agents import Agent
from Tests.agent_test import agent_test
from Tests.representation_learn_test import rep_learning, latent_space_distance

import matplotlib.pyplot as plt

# create environment
env = gym.make('gym_grid:gridworld-v1')
plt.close()
# generate parameters for network from environment observation shape
params = nets.params(env)
# generate network
network = nets.ActorCritic(params)

agent = Agent(network, memory=None)

autoencoder, data, loss = rep_learning('onehot',
                                       env,
                                       n_samples=1000,
                                       training_cycles=500)

states = []

for i in range(env.nstates):
    s = np.zeros((1, env.nstates))
    s[0, i] = 1
    states.append(s)

actions = data[1][0:400]
latent_states, _, __ = autoencoder(states, actions)
예제 #14
0
from modules.Agents import Agent
from modules.Experiments import flat_expt
sys.path.append('../../../')

write_to_file = 'flat_ac_training.csv'
version = 1
env_name = f'gridworld:gridworld-v{version}'
representation_type = 'latent'
num_trials = 5000
num_events = 250

# make gym environment
env = gym.make(env_name)
plt.close()

state_reps, representation_name, input_dims, _ = rep_types[
    representation_type](env)

for _ in range(1):
    empty_net = head_AC(input_dims, env.action_space.n, lr=0.0005)
    memory = Memory(entry_size=4, cache_limit=400)
    agent = Agent(empty_net, memory, state_representations=state_reps)

    ex = flat_expt(agent, env)
    ex.run(num_trials, num_events, snapshot_logging=False)
    ex.record_log(env_name=env_name,
                  representation_type=representation_name,
                  n_trials=num_trials,
                  n_steps=num_events,
                  dir='./Data/',
                  file=write_to_file)