from basic.modules.Agents import Agent from basic.modules.Experiments import flat_expt sys.path.append('../../../') write_to_file = 'flat_ac_training.csv' version = 1 env_name = f'gridworld:gridworld-v{version}' representation_type = 'latent' num_trials = 25000 num_events = 250 # make gym environment env = gym.make(env_name) plt.close() state_reps, representation_name, input_dims, _ = rep_types[representation_type](env) for _ in range(1): empty_net = head_AC(input_dims, env.action_space.n, lr=0.0005) agent = Agent(empty_net, state_representations=state_reps) ex = flat_expt(agent, env) ex.run(num_trials,num_events,snapshot_logging=True) ex.record_log(env_name=env_name, representation_type=representation_name, n_trials=num_trials, n_steps=num_events, dir='../../Data/', file=write_to_file, mock_log=True)
import basic.modules.Utils.gridworld_plotting as gp import matplotlib.pyplot as plt import torch # Make Environment to Test Agent in env_name = 'gridworld:gridworld-v1' expt_type = 'MF' env = gym.make(env_name) # get parameters to specific Model Free Network params = basic_agent_params(env) # build network & memory modules network = nets.ActorCritic(params) memory = Memory(entry_size=params.action_dims, cache_limit=400) # construct agent instance with network and memory modules agent = Agent(network, memory=memory) # choose action controller if expt_type == 'EC': agent.get_action = agent.EC_action elif expt_type == 'MF': agent.get_action = agent.MF_action # instantiate experiment run = ex(agent, env) print(env.obstacles_list) #run.run(5000,250, printfreq=10, render=False) #run.record_log(expt_type, env_name, n_trials=5000, dir='../../Data/', file='test_environments.csv') ''' # show examples from state_obs = []
num_trials = 25000 num_events = 250 relative_path_to_data = '../../Data' # ../../Data if you are in Tests/CH2 # make gym environment env = gym.make(env_name) plt.close() rep_types = { 'onehot': onehot, 'random': random, 'place_cell': place_cell, 'sr': sr, 'latent': saved_latents } state_reps, representation_name, input_dims, _ = rep_types[ representation_type](env) # load weights to head_ac network from previously learned agent AC_head_agent = head_AC(input_dims, test_env.action_space.n, lr=0.0005) agent = Agent(AC_head_agent, state_representations=state_reps) ex = flat_expt(agent, test_env) ex.run(num_trials, num_events, snapshot_logging=False) ex.record_log(env_name=test_env_name, representation_type=representation_name, n_trials=num_trials, n_steps=num_events, dir=relative_path_to_data, file=write_to_file)
for _ in range(1): ## generate the environment object env = gym.make(env_name) plt.close() ## get state representations to be used state_reps, representation_name, input_dims, _ = rep_types[representation_type](env) ## create an actor-critic network and associated agent network = Network(input_dims=[input_dims], fc1_dims=200, fc2_dims=200, output_dims=env.action_space.n, lr=0.0005) memory = Memory(entry_size=env.action_space.n, cache_limit=400, mem_temp=1) agent = Agent(network, state_representations=state_reps, memory=memory) # create an experiment class instance ex = expt(agent, env) ex.run(num_trials, num_events) ex.record_log(env_name=env_name, representation_type=representation_name, n_trials=num_trials, n_steps=num_events, dir='../../Data/', file=write_to_file) ''' # print results of training fig, ax = plt.subplots(2,1, sharex=True) ax[0].plot(ex.data['total_reward']) ax[1].plot(ex.data['loss'][0], label='P_loss') ax[1].plot(ex.data['loss'][1], label='V_loss')
num_events = 250 # valid representation types for this experiment rep_types = {'conv': convs, 'rwd_conv': reward_convs} param_set = {'conv': conv_PO_params, 'rwd_conv': conv_FO_params} # instantiate the environment for the experiment env = gym.make(env_name) plt.close() # get representation type, associated parameters to specify the network dimensions state_reps, representation_name, input_dims, _ = rep_types[ representation_type](env) params = param_set[representation_type] network_parameters = params(env) # make a new network instance network = Network(network_parameters) # reinitalize agent with new network agent = Agent(network, state_representations=state_reps) # expt - redefines logging function to keep track of network details ex = conv_expt(agent, env) ex.run(num_trials, num_events, printfreq=10) ex.record_log(env_name=env_name, representation_type=representation_name, n_trials=num_trials, n_steps=num_events, dir=relative_path_to_data, file=write_to_file)
env_name = 'gym_grid:gridworld-v1' # create environment env = gym.make(env_name) plt.close() network_params = nets.fc_params(env) network_params.lr = 0.005 print(network_params.__dict__) network = nets.ActorCritic(network_params) # generate agent ntrials = 20000 nevents = 250 agent = Agent(network, None) run = expt(agent, env) run.run(NUM_TRIALS=ntrials, NUM_EVENTS=nevents, printfreq=10, snap=False) run.record_log(dir='../../../Data/', file='actor_critic_learning.csv', expt_type='MFonly', env_name=env_name, n_trials=ntrials, n_steps=nevents) plt.figure() plt.plot(run.data['total_reward']) """ plt.figure() ac_pref = pref_ac_action(run.data['P_snap'][-1]) plt.imshow(ac_pref)