def test_speed_dataset(env): batch_size = 16 seq_len = 100 kwargs = {} dataset = ngym.Dataset( env, env_kwargs=kwargs, batch_size=batch_size, seq_len=seq_len) n_batch = 100 start_time = time.time() for batch in range(n_batch): _, _ = dataset() total_time = time.time() - start_time time_per_batch = total_time / n_batch time_per_step = total_time / n_batch / batch_size / seq_len print('Time/batch {:0.3f}us [with dataset]'.format(time_per_batch * 1e6)) print('Time/step {:0.3f}us [with dataset]'.format(time_per_step * 1e6))
def _test_examples_different(env): """Test that each example in a batch is different.""" batch_size = 32 # need to be long enough to make sure variability in inputs or target seq_len = 1000 dataset = ngym.Dataset(env, batch_size=batch_size, seq_len=seq_len) inputs, target = dataset() # Average across batch batch_mean_inputs = np.mean(inputs, axis=1, keepdims=True) batch_mean_target = np.mean(target, axis=1, keepdims=True) batch_diff_inputs = inputs - batch_mean_inputs batch_diff_target = target - batch_mean_target assert np.sum(batch_diff_inputs**2) > 0 assert np.sum(batch_diff_target**2) > 0
def _test_env(env): """Test if one environment can at least be run with Dataset.""" batch_size = 32 seq_len = 40 dataset = ngym.Dataset(env, env_kwargs={'dt': 100}, batch_size=batch_size, seq_len=seq_len) for i in range(2): inputs, target = dataset() assert inputs.shape[0] == seq_len assert inputs.shape[1] == batch_size assert target.shape[0] == seq_len assert target.shape[1] == batch_size return inputs, target
def run(alg, alg_kwargs, task, task_kwargs, wrappers_kwargs, expl_params, rollout, num_trials, folder, n_thrds, n_lstm, rerun=False, test_kwargs={}, num_retrains=10, seed=0, train_mode=None, sl_kwargs=None): train_mode = train_mode or 'RL' env = test_env(task, kwargs=task_kwargs, num_steps=1000) num_timesteps = int(1000 * num_trials / (env.num_tr)) files = glob.glob(folder + '/*model*') vars_ = { 'alg': alg, 'alg_kwargs': alg_kwargs, 'task': task, 'task_kwargs': task_kwargs, 'wrappers_kwargs': wrappers_kwargs, 'expl_params': expl_params, 'rollout': rollout, 'folder': folder, 'num_trials': num_trials, 'n_thrds': n_thrds, 'n_lstm': n_lstm } np.savez(folder + '/params.npz', **vars_) if len(files) == 0 or rerun: if train_mode == 'RL': if alg == "A2C": from stable_baselines import A2C as algo elif alg == "ACER": from stable_baselines import ACER as algo elif alg == "ACKTR": from stable_baselines import ACKTR as algo elif alg == "PPO2": from stable_baselines import PPO2 as algo env = SubprocVecEnv([ make_env(env_id=task, rank=i, seed=seed, wrapps=wrappers_kwargs, **task_kwargs) for i in range(n_thrds) ]) model = algo(LstmPolicy, env, verbose=0, n_steps=rollout, n_cpu_tf_sess=n_thrds, tensorboard_log=None, policy_kwargs={ "feature_extraction": "mlp", "n_lstm": n_lstm }, **alg_kwargs) # this assumes 1 trial ~ 10 steps sv_freq = 5 * wrappers_kwargs['MonitorExtended-v0']['sv_per'] chckpnt_cllbck = CheckpointCallback(save_freq=sv_freq, save_path=folder, name_prefix='model') model.learn(total_timesteps=num_timesteps, callback=chckpnt_cllbck) model.save(f"{folder}/model_{num_timesteps}_steps.zip") plotting.plot_rew_across_training(folder=folder) elif train_mode == 'SL': stps_ep = sl_kwargs['steps_per_epoch'] wraps_sl = deepc(wrappers_kwargs) del wraps_sl['PassAction-v0'] del wraps_sl['PassReward-v0'] del wraps_sl['MonitorExtended-v0'] env = make_env(env_id=task, rank=0, seed=seed, wrapps=wraps_sl, **task_kwargs)() dataset = ngym.Dataset(env, batch_size=sl_kwargs['btch_s'], seq_len=rollout, batch_first=True) obs_size = env.observation_space.shape[0] act_size = env.action_space.n model = define_model(seq_len=rollout, num_h=n_lstm, obs_size=obs_size, act_size=act_size, batch_size=sl_kwargs['btch_s'], stateful=sl_kwargs['stateful'], loss=sl_kwargs['loss']) # Train network data_generator = (dataset() for i in range(stps_ep)) model.fit(data_generator, verbose=1, steps_per_epoch=stps_ep) model.save(f"{folder}/model_{stps_ep}_steps") if len(test_kwargs) != 0: for key in test_kwargs.keys(): sv_folder = folder + key test_kwargs[key]['seed'] = seed if train_mode == 'RL': if '_all' not in key: ga.get_activity(folder, alg, sv_folder, **test_kwargs[key]) else: files = glob.glob(folder + '/model_*_steps.zip') for f in files: model_name = os.path.basename(f) sv_f = folder + key + '_' + model_name[:-4] ga.get_activity(folder, alg, sv_folder=sv_f, model_name=model_name, **test_kwargs[key]) elif train_mode == 'SL': stps_ep = sl_kwargs['steps_per_epoch'] wraps_sl = deepc(wrappers_kwargs) wraps_sl.update(test_kwargs[key]['wrappers']) del wraps_sl['PassAction-v0'] del wraps_sl['PassReward-v0'] env = make_env(env_id=task, rank=0, seed=seed, wrapps=wraps_sl, **task_kwargs)() obs_size = env.observation_space.shape[0] act_size = env.action_space.n model_test = define_model(seq_len=1, batch_size=1, obs_size=obs_size, act_size=act_size, stateful=sl_kwargs['stateful'], num_h=n_lstm, loss=sl_kwargs['loss']) ld_f = folder + 'model_' + str(stps_ep) + '_steps'.replace( '//', '/') model_test.load_weights(ld_f) env.reset() for ind_stp in range(sl_kwargs['test_steps']): obs = env.ob_now obs = obs[np.newaxis] obs = obs[np.newaxis] action = model_test.predict(obs) action = np.argmax(action, axis=-1)[0] _, _, _, _ = env.step(action)
import network_pytorch1 as net1 import json import torch import numpy as np import neurogym as ngym import matplotlib as plt # Environment task = 'PerceptualDecisionMaking-v0' kwargs = {'dt': 100} seq_len = 100 # Make supervised dataset dataset = ngym.Dataset(task, env_kwargs=kwargs, batch_size=16, seq_len=seq_len) # A sample environment from dataset env = dataset.env # Visualize the environment with 2 sample trials _ = ngym.utils.plot_env(env, num_trials=2) # Network input and output size input_size = env.observation_space.shape[0] output_size = env.action_space.n x = torch.empty(20,32,device='cpu') target = torch.ones(20,33,device='cpu') torch.nn.init.xavier_normal_(x) with open('model_tf/debug/hp.json') as f:
self._seq_end = self._seq_start + self.seq_len if self._seq_end >= self._cache_len: self._cache(**kwargs) if self.batch_first: inputs = self._inputs[:, self._seq_start:self._seq_end, ...] target = self._target[:, self._seq_start:self._seq_end, ...] else: inputs = self._inputs[self._seq_start:self._seq_end] target = self._target[self._seq_start:self._seq_end] self._seq_start = self._seq_end return inputs, target # return inputs, np.expand_dims(target, axis=2) if __name__ == '__main__': import neurogym as ngym dataset = ngym.Dataset('PerceptualDecisionMaking-v0', env_kwargs={'dt': 100}, batch_size=32, seq_len=40) inputs_list = list() for i in range(2): inputs, target = dataset() inputs_list.append(inputs) # print(inputs.shape) # print(target.shape)
from neurogym.wrappers import ScheduleEnvs from neurogym.utils.scheduler import RandomSchedule from models import RNNNet, get_performance # Environment kwargs = {'dt': 100} # kwargs = {'dt': 100, 'sigma': 0, 'dim_ring': 2, 'cohs': [0.1, 0.3, 0.6, 1.0]} seq_len = 100 # Make supervised dataset tasks = ngym.get_collection('yang19') envs = [gym.make(task, **kwargs) for task in tasks] schedule = RandomSchedule(len(envs)) env = ScheduleEnvs(envs, schedule=schedule, env_input=True) dataset = ngym.Dataset(env, batch_size=4, seq_len=seq_len) env = dataset.env ob_size = env.observation_space.shape[0] act_size = env.action_space.n device = 'cuda' if torch.cuda.is_available() else 'cpu' print(device) model = RNNNet(input_size=ob_size, hidden_size=256, output_size=act_size, dt=env.dt).to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) print_step = 200
def train_task(task, pretrain_task=None): # Environment kwargs = {'dt': 100} seq_len = 100 env = gym.make(task, **kwargs) dataset = ngym.Dataset(env, batch_size=4, seq_len=seq_len) env = dataset.env ob_size = env.observation_space.shape[0] act_size = env.action_space.n device = 'cuda' if torch.cuda.is_available() else 'cpu' print(device) model = RNNNet(input_size=ob_size, hidden_size=256, output_size=act_size, dt=env.dt).to(device) if pretrain_task is not None: fname = os.path.join('files', get_modelname(pretrain_task)) model.load_state_dict( torch.load(fname, map_location=torch.device(device))) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) print_step = 200 running_loss = 0.0 running_task_time = 0 running_train_time = 0 log = defaultdict(list) for i in range(2000): task_time_start = time.time() inputs, labels = dataset() running_task_time += time.time() - task_time_start inputs = torch.from_numpy(inputs).type(torch.float).to(device) labels = torch.from_numpy(labels.flatten()).type(torch.long).to(device) train_time_start = time.time() # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs, _ = model(inputs) loss = criterion(outputs.view(-1, act_size), labels) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) optimizer.step() running_train_time += time.time() - train_time_start # print statistics running_loss += loss.item() if i % print_step == (print_step - 1): running_loss /= print_step log['step'].append(i) log['loss'].append(running_loss) print('{:d} loss: {:0.5f}'.format(i + 1, running_loss)) running_loss = 0.0 if True: print('Task/Train time {:0.1f}/{:0.1f} ms/step'.format( running_task_time / print_step * 1e3, running_train_time / print_step * 1e3)) running_task_time, running_train_time = 0, 0 perf = get_performance(model, env, num_trial=200, device=device) log['perf'].append(perf) print('{:d} perf: {:0.2f}'.format(i + 1, perf)) fname = os.path.join('files', get_logname(task, pretrain_task)) np.savez_compressed(fname, **log) fname = os.path.join('files', get_modelname(task, pretrain_task)) torch.save(model.state_dict(), fname) print('Finished Training')
def train_network(envid): """Supervised training networks. Save network in a path determined by environment ID. Args: envid: str, environment ID. """ modelpath = get_modelpath(envid) config = { 'dt': 100, 'hidden_size': 64, 'lr': 1e-2, 'batch_size': 16, 'seq_len': 100, 'envid': envid, } env_kwargs = {'dt': config['dt']} config['env_kwargs'] = env_kwargs # Save config with open(modelpath / 'config.json', 'w') as f: json.dump(config, f) # Make supervised dataset dataset = ngym.Dataset(envid, env_kwargs=env_kwargs, batch_size=config['batch_size'], seq_len=config['seq_len']) env = dataset.env act_size = env.action_space.n # Train network net = Net(input_size=env.observation_space.shape[0], hidden_size=config['hidden_size'], output_size=act_size) net = net.to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(net.parameters(), lr=config['lr']) print('Training task ', envid) running_loss = 0.0 for i in range(2000): inputs, labels = dataset() inputs = torch.from_numpy(inputs).type(torch.float).to(device) labels = torch.from_numpy(labels.flatten()).type(torch.long).to(device) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs, _ = net(inputs) loss = criterion(outputs.view(-1, act_size), labels) loss.backward() optimizer.step() # print statistics running_loss += loss.item() if i % 200 == 199: print('{:d} loss: {:0.5f}'.format(i + 1, running_loss / 200)) running_loss = 0.0 torch.save(net.state_dict(), modelpath / 'net.pth') print('Finished Training')