Esempio n. 1
0
def test_speed_dataset(env):
    batch_size = 16
    seq_len = 100
    kwargs = {}
    dataset = ngym.Dataset(
        env, env_kwargs=kwargs, batch_size=batch_size, seq_len=seq_len)
    n_batch = 100
    start_time = time.time()
    for batch in range(n_batch):
        _, _ = dataset()
    total_time = time.time() - start_time
    time_per_batch = total_time / n_batch
    time_per_step = total_time / n_batch / batch_size / seq_len
    print('Time/batch {:0.3f}us [with dataset]'.format(time_per_batch * 1e6))
    print('Time/step {:0.3f}us [with dataset]'.format(time_per_step * 1e6))
Esempio n. 2
0
def _test_examples_different(env):
    """Test that each example in a batch is different."""
    batch_size = 32
    # need to be long enough to make sure variability in inputs or target
    seq_len = 1000
    dataset = ngym.Dataset(env, batch_size=batch_size, seq_len=seq_len)
    inputs, target = dataset()
    # Average across batch
    batch_mean_inputs = np.mean(inputs, axis=1, keepdims=True)
    batch_mean_target = np.mean(target, axis=1, keepdims=True)

    batch_diff_inputs = inputs - batch_mean_inputs
    batch_diff_target = target - batch_mean_target

    assert np.sum(batch_diff_inputs**2) > 0
    assert np.sum(batch_diff_target**2) > 0
Esempio n. 3
0
def _test_env(env):
    """Test if one environment can at least be run with Dataset."""
    batch_size = 32
    seq_len = 40
    dataset = ngym.Dataset(env,
                           env_kwargs={'dt': 100},
                           batch_size=batch_size,
                           seq_len=seq_len)
    for i in range(2):
        inputs, target = dataset()
        assert inputs.shape[0] == seq_len
        assert inputs.shape[1] == batch_size
        assert target.shape[0] == seq_len
        assert target.shape[1] == batch_size

    return inputs, target
Esempio n. 4
0
def run(alg,
        alg_kwargs,
        task,
        task_kwargs,
        wrappers_kwargs,
        expl_params,
        rollout,
        num_trials,
        folder,
        n_thrds,
        n_lstm,
        rerun=False,
        test_kwargs={},
        num_retrains=10,
        seed=0,
        train_mode=None,
        sl_kwargs=None):
    train_mode = train_mode or 'RL'
    env = test_env(task, kwargs=task_kwargs, num_steps=1000)
    num_timesteps = int(1000 * num_trials / (env.num_tr))
    files = glob.glob(folder + '/*model*')
    vars_ = {
        'alg': alg,
        'alg_kwargs': alg_kwargs,
        'task': task,
        'task_kwargs': task_kwargs,
        'wrappers_kwargs': wrappers_kwargs,
        'expl_params': expl_params,
        'rollout': rollout,
        'folder': folder,
        'num_trials': num_trials,
        'n_thrds': n_thrds,
        'n_lstm': n_lstm
    }
    np.savez(folder + '/params.npz', **vars_)
    if len(files) == 0 or rerun:
        if train_mode == 'RL':
            if alg == "A2C":
                from stable_baselines import A2C as algo
            elif alg == "ACER":
                from stable_baselines import ACER as algo
            elif alg == "ACKTR":
                from stable_baselines import ACKTR as algo
            elif alg == "PPO2":
                from stable_baselines import PPO2 as algo
            env = SubprocVecEnv([
                make_env(env_id=task,
                         rank=i,
                         seed=seed,
                         wrapps=wrappers_kwargs,
                         **task_kwargs) for i in range(n_thrds)
            ])
            model = algo(LstmPolicy,
                         env,
                         verbose=0,
                         n_steps=rollout,
                         n_cpu_tf_sess=n_thrds,
                         tensorboard_log=None,
                         policy_kwargs={
                             "feature_extraction": "mlp",
                             "n_lstm": n_lstm
                         },
                         **alg_kwargs)
            # this assumes 1 trial ~ 10 steps
            sv_freq = 5 * wrappers_kwargs['MonitorExtended-v0']['sv_per']
            chckpnt_cllbck = CheckpointCallback(save_freq=sv_freq,
                                                save_path=folder,
                                                name_prefix='model')
            model.learn(total_timesteps=num_timesteps, callback=chckpnt_cllbck)
            model.save(f"{folder}/model_{num_timesteps}_steps.zip")
            plotting.plot_rew_across_training(folder=folder)
        elif train_mode == 'SL':
            stps_ep = sl_kwargs['steps_per_epoch']
            wraps_sl = deepc(wrappers_kwargs)
            del wraps_sl['PassAction-v0']
            del wraps_sl['PassReward-v0']
            del wraps_sl['MonitorExtended-v0']
            env = make_env(env_id=task,
                           rank=0,
                           seed=seed,
                           wrapps=wraps_sl,
                           **task_kwargs)()
            dataset = ngym.Dataset(env,
                                   batch_size=sl_kwargs['btch_s'],
                                   seq_len=rollout,
                                   batch_first=True)
            obs_size = env.observation_space.shape[0]
            act_size = env.action_space.n
            model = define_model(seq_len=rollout,
                                 num_h=n_lstm,
                                 obs_size=obs_size,
                                 act_size=act_size,
                                 batch_size=sl_kwargs['btch_s'],
                                 stateful=sl_kwargs['stateful'],
                                 loss=sl_kwargs['loss'])
            # Train network
            data_generator = (dataset() for i in range(stps_ep))
            model.fit(data_generator, verbose=1, steps_per_epoch=stps_ep)
            model.save(f"{folder}/model_{stps_ep}_steps")

    if len(test_kwargs) != 0:
        for key in test_kwargs.keys():
            sv_folder = folder + key
            test_kwargs[key]['seed'] = seed
            if train_mode == 'RL':
                if '_all' not in key:
                    ga.get_activity(folder, alg, sv_folder, **test_kwargs[key])
                else:
                    files = glob.glob(folder + '/model_*_steps.zip')
                    for f in files:
                        model_name = os.path.basename(f)
                        sv_f = folder + key + '_' + model_name[:-4]
                        ga.get_activity(folder,
                                        alg,
                                        sv_folder=sv_f,
                                        model_name=model_name,
                                        **test_kwargs[key])

            elif train_mode == 'SL':
                stps_ep = sl_kwargs['steps_per_epoch']
                wraps_sl = deepc(wrappers_kwargs)
                wraps_sl.update(test_kwargs[key]['wrappers'])
                del wraps_sl['PassAction-v0']
                del wraps_sl['PassReward-v0']
                env = make_env(env_id=task,
                               rank=0,
                               seed=seed,
                               wrapps=wraps_sl,
                               **task_kwargs)()
                obs_size = env.observation_space.shape[0]
                act_size = env.action_space.n
                model_test = define_model(seq_len=1,
                                          batch_size=1,
                                          obs_size=obs_size,
                                          act_size=act_size,
                                          stateful=sl_kwargs['stateful'],
                                          num_h=n_lstm,
                                          loss=sl_kwargs['loss'])
                ld_f = folder + 'model_' + str(stps_ep) + '_steps'.replace(
                    '//', '/')
                model_test.load_weights(ld_f)
                env.reset()
                for ind_stp in range(sl_kwargs['test_steps']):
                    obs = env.ob_now
                    obs = obs[np.newaxis]
                    obs = obs[np.newaxis]
                    action = model_test.predict(obs)
                    action = np.argmax(action, axis=-1)[0]
                    _, _, _, _ = env.step(action)
Esempio n. 5
0
import network_pytorch1 as net1
import json
import torch
import numpy as np
import neurogym as ngym
import matplotlib as plt

# Environment
task = 'PerceptualDecisionMaking-v0'
kwargs = {'dt': 100}
seq_len = 100

# Make supervised dataset
dataset = ngym.Dataset(task, env_kwargs=kwargs, batch_size=16,
                       seq_len=seq_len)

# A sample environment from dataset
env = dataset.env
# Visualize the environment with 2 sample trials
_ = ngym.utils.plot_env(env, num_trials=2)

# Network input and output size
input_size = env.observation_space.shape[0]
output_size = env.action_space.n


x = torch.empty(20,32,device='cpu')
target = torch.ones(20,33,device='cpu')
torch.nn.init.xavier_normal_(x)

with open('model_tf/debug/hp.json') as f:
Esempio n. 6
0
        self._seq_end = self._seq_start + self.seq_len

        if self._seq_end >= self._cache_len:
            self._cache(**kwargs)

        if self.batch_first:
            inputs = self._inputs[:, self._seq_start:self._seq_end, ...]
            target = self._target[:, self._seq_start:self._seq_end, ...]
        else:
            inputs = self._inputs[self._seq_start:self._seq_end]
            target = self._target[self._seq_start:self._seq_end]

        self._seq_start = self._seq_end
        return inputs, target
        # return inputs, np.expand_dims(target, axis=2)


if __name__ == '__main__':
    import neurogym as ngym
    dataset = ngym.Dataset('PerceptualDecisionMaking-v0',
                           env_kwargs={'dt': 100},
                           batch_size=32,
                           seq_len=40)
    inputs_list = list()
    for i in range(2):
        inputs, target = dataset()
        inputs_list.append(inputs)
    # print(inputs.shape)
    # print(target.shape)
Esempio n. 7
0
from neurogym.wrappers import ScheduleEnvs
from neurogym.utils.scheduler import RandomSchedule

from models import RNNNet, get_performance

# Environment
kwargs = {'dt': 100}
# kwargs = {'dt': 100, 'sigma': 0, 'dim_ring': 2, 'cohs': [0.1, 0.3, 0.6, 1.0]}
seq_len = 100

# Make supervised dataset
tasks = ngym.get_collection('yang19')
envs = [gym.make(task, **kwargs) for task in tasks]
schedule = RandomSchedule(len(envs))
env = ScheduleEnvs(envs, schedule=schedule, env_input=True)
dataset = ngym.Dataset(env, batch_size=4, seq_len=seq_len)

env = dataset.env
ob_size = env.observation_space.shape[0]
act_size = env.action_space.n

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)
model = RNNNet(input_size=ob_size,
               hidden_size=256,
               output_size=act_size,
               dt=env.dt).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

print_step = 200
Esempio n. 8
0
def train_task(task, pretrain_task=None):
    # Environment
    kwargs = {'dt': 100}
    seq_len = 100

    env = gym.make(task, **kwargs)
    dataset = ngym.Dataset(env, batch_size=4, seq_len=seq_len)

    env = dataset.env
    ob_size = env.observation_space.shape[0]
    act_size = env.action_space.n

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(device)
    model = RNNNet(input_size=ob_size,
                   hidden_size=256,
                   output_size=act_size,
                   dt=env.dt).to(device)

    if pretrain_task is not None:
        fname = os.path.join('files', get_modelname(pretrain_task))
        model.load_state_dict(
            torch.load(fname, map_location=torch.device(device)))

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

    print_step = 200
    running_loss = 0.0
    running_task_time = 0
    running_train_time = 0
    log = defaultdict(list)
    for i in range(2000):
        task_time_start = time.time()
        inputs, labels = dataset()
        running_task_time += time.time() - task_time_start
        inputs = torch.from_numpy(inputs).type(torch.float).to(device)
        labels = torch.from_numpy(labels.flatten()).type(torch.long).to(device)

        train_time_start = time.time()
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs, _ = model(inputs)

        loss = criterion(outputs.view(-1, act_size), labels)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        running_train_time += time.time() - train_time_start
        # print statistics
        running_loss += loss.item()
        if i % print_step == (print_step - 1):
            running_loss /= print_step
            log['step'].append(i)
            log['loss'].append(running_loss)
            print('{:d} loss: {:0.5f}'.format(i + 1, running_loss))
            running_loss = 0.0
            if True:
                print('Task/Train time {:0.1f}/{:0.1f} ms/step'.format(
                    running_task_time / print_step * 1e3,
                    running_train_time / print_step * 1e3))
                running_task_time, running_train_time = 0, 0

            perf = get_performance(model, env, num_trial=200, device=device)
            log['perf'].append(perf)
            print('{:d} perf: {:0.2f}'.format(i + 1, perf))

            fname = os.path.join('files', get_logname(task, pretrain_task))
            np.savez_compressed(fname, **log)

            fname = os.path.join('files', get_modelname(task, pretrain_task))
            torch.save(model.state_dict(), fname)

    print('Finished Training')
Esempio n. 9
0
def train_network(envid):
    """Supervised training networks.

    Save network in a path determined by environment ID.

    Args:
        envid: str, environment ID.
    """
    modelpath = get_modelpath(envid)
    config = {
        'dt': 100,
        'hidden_size': 64,
        'lr': 1e-2,
        'batch_size': 16,
        'seq_len': 100,
        'envid': envid,
    }

    env_kwargs = {'dt': config['dt']}
    config['env_kwargs'] = env_kwargs

    # Save config
    with open(modelpath / 'config.json', 'w') as f:
        json.dump(config, f)

    # Make supervised dataset
    dataset = ngym.Dataset(envid,
                           env_kwargs=env_kwargs,
                           batch_size=config['batch_size'],
                           seq_len=config['seq_len'])
    env = dataset.env
    act_size = env.action_space.n
    # Train network
    net = Net(input_size=env.observation_space.shape[0],
              hidden_size=config['hidden_size'],
              output_size=act_size)
    net = net.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=config['lr'])

    print('Training task ', envid)

    running_loss = 0.0
    for i in range(2000):
        inputs, labels = dataset()
        inputs = torch.from_numpy(inputs).type(torch.float).to(device)
        labels = torch.from_numpy(labels.flatten()).type(torch.long).to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs, _ = net(inputs)

        loss = criterion(outputs.view(-1, act_size), labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 200 == 199:
            print('{:d} loss: {:0.5f}'.format(i + 1, running_loss / 200))
            running_loss = 0.0
            torch.save(net.state_dict(), modelpath / 'net.pth')

    print('Finished Training')