예제 #1
0
def run_algo(**kwargs):
    config = {}
    config["kwargs"] = kwargs
    config["kwargs"]['seed'] = random.randint(0, 1000000)
    _, _, algo_config = algo_select(kwargs)
    # Prepare Dataset
    load_data_from_neorl(algo_config["task"], algo_config["task_data_type"],
                         algo_config["task_train_num"])
    grid_tune = algo_config["grid_tune"]
    for k, v in grid_tune.items():
        config[k] = tune.grid_search(v)

    analysis = tune.run(
        training_function,
        config=config,
        resources_per_trial={"gpu": 1},
        queue_trials=True,
    )
예제 #2
0
def run_algo(**kwargs):
    algo_init_fn, algo_trainer_obj, algo_config = algo_select(kwargs)
    train_buffer, val_buffer = load_data_from_neorl(
        algo_config["task"], algo_config["task_data_type"],
        algo_config["task_train_num"])
    algo_init = algo_init_fn(algo_config)
    algo_trainer = algo_trainer_obj(algo_init, algo_config)
    callback = OnlineCallBackFunction()
    callback.initialize(train_buffer=train_buffer,
                        val_buffer=val_buffer,
                        task=algo_config["task"])

    algo_trainer.train(train_buffer, val_buffer, callback_fn=callback)
예제 #3
0
def training_function(config):
    algo_init_fn, algo_trainer_obj, algo_config = algo_select(config["kwargs"])
    train_buffer, val_buffer = load_data_from_neorl(
        algo_config["task"], algo_config["task_data_type"],
        algo_config["task_train_num"])
    algo_config.update(config)
    algo_config["device"] = "cuda"
    algo_init = algo_init_fn(algo_config)
    algo_trainer = algo_trainer_obj(algo_init, algo_config)

    callback = OnlineCallBackFunction()
    callback.initialize(train_buffer=train_buffer,
                        val_buffer=val_buffer,
                        task=algo_config["task"])

    score = algo_trainer.train(train_buffer, val_buffer, callback_fn=callback)

    return score
예제 #4
0
def training_function(config):
    ''' run on a seed '''
    config["kwargs"]['seed'] = config['seed']
    algo_init_fn, algo_trainer_obj, algo_config = algo_select(config["kwargs"])
    train_buffer, val_buffer = load_data_from_neorl(
        algo_config["task"], algo_config["task_data_type"],
        algo_config["task_train_num"])
    algo_config.update(config)
    algo_config["device"] = "cuda"
    algo_config['dynamics_path'] = os.path.join(
        config['dynamics_root'],
        f'{algo_config["task"]}-{algo_config["task_data_type"]}-{algo_config["task_train_num"]}-{config["seed"]}.pt'
    )
    algo_config['behavior_path'] = os.path.join(
        config['behavior_root'],
        f'{algo_config["task"]}-{algo_config["task_data_type"]}-{algo_config["task_train_num"]}-{config["seed"]}.pt'
    )
    algo_init = algo_init_fn(algo_config)
    algo_trainer = algo_trainer_obj(algo_init, algo_config)

    callback = PeriodicCallBack(OnlineCallBackFunction(), 50)
    callback.initialize(train_buffer=train_buffer,
                        val_buffer=val_buffer,
                        task=algo_config["task"],
                        number_of_runs=1000)

    algo_trainer.train(train_buffer, val_buffer, callback_fn=callback)
    algo_trainer.exp_logger.flush()
    time.sleep(
        10
    )  # sleep ensure the log is flushed even if the disks or cpus are busy

    result, parameter = find_result(algo_trainer.index_path)

    return {
        'reward': result,
        'parameter': parameter,
        'seed': config['seed'],
    }
예제 #5
0
def training_dynamics(config):
    if config["task"] == 'finance' and config["amount"] == 10000:
        return {
            'performance': [],
            'path': '',
        }

    seed = config['seed']
    setup_seed(seed)

    train_buffer, val_buffer = load_data_from_neorl(config["task"],
                                                    config["level"],
                                                    config["amount"])

    obs_shape = train_buffer['obs'].shape[-1]
    action_shape = train_buffer['act'].shape[-1]

    device = 'cuda'

    hidden_units = 1024 if config["task"] in ['ib', 'finance', 'citylearn'
                                              ] else 256
    transition = EnsembleTransition(obs_shape, action_shape, hidden_units, 4,
                                    7).to(device)
    transition_optim = torch.optim.AdamW(transition.parameters(),
                                         lr=1e-3,
                                         weight_decay=0.000075)

    data_size = len(train_buffer)
    val_size = min(int(data_size * 0.2) + 1, 1000)
    train_size = data_size - val_size
    train_splits, val_splits = torch.utils.data.random_split(
        range(data_size), (train_size, val_size))
    valdata = train_buffer[val_splits.indices]
    train_buffer = train_buffer[train_splits.indices]

    batch_size = 256

    val_losses = [float('inf') for i in range(7)]

    epoch = 0
    cnt = 0

    while True:
        epoch += 1
        idxs = np.random.randint(train_buffer.shape[0],
                                 size=[7, train_buffer.shape[0]])
        for batch_num in range(int(np.ceil(idxs.shape[-1] / batch_size))):
            batch_idxs = idxs[:, batch_num * batch_size:(batch_num + 1) *
                              batch_size]
            batch = train_buffer[batch_idxs]
            _train_transition(transition, batch, transition_optim, device)
        new_val_losses = _eval_transition(transition, valdata, device)

        indexes = []
        for i, new_loss, old_loss in zip(range(len(val_losses)),
                                         new_val_losses, val_losses):
            if new_loss < old_loss:
                indexes.append(i)
                val_losses[i] = new_loss

        if len(indexes) > 0:
            transition.update_save(indexes)
            cnt = 0
        else:
            cnt += 1

        if cnt >= 5:
            break

    indexes = _select_best_indexes(val_losses, n=5)
    transition.set_select(indexes)
    performance = _eval_transition(transition, valdata, device)
    transition_path = os.path.join(
        config['dynamics_path'],
        f'{config["task"]}-{config["level"]}-{config["amount"]}-{seed}.pt')

    torch.save(transition, transition_path)

    return {
        'performance': performance,
        'path': transition_path,
    }