def run_algo(**kwargs): config = {} config["kwargs"] = kwargs config["kwargs"]['seed'] = random.randint(0, 1000000) _, _, algo_config = algo_select(kwargs) # Prepare Dataset load_data_from_neorl(algo_config["task"], algo_config["task_data_type"], algo_config["task_train_num"]) grid_tune = algo_config["grid_tune"] for k, v in grid_tune.items(): config[k] = tune.grid_search(v) analysis = tune.run( training_function, config=config, resources_per_trial={"gpu": 1}, queue_trials=True, )
def run_algo(**kwargs): algo_init_fn, algo_trainer_obj, algo_config = algo_select(kwargs) train_buffer, val_buffer = load_data_from_neorl( algo_config["task"], algo_config["task_data_type"], algo_config["task_train_num"]) algo_init = algo_init_fn(algo_config) algo_trainer = algo_trainer_obj(algo_init, algo_config) callback = OnlineCallBackFunction() callback.initialize(train_buffer=train_buffer, val_buffer=val_buffer, task=algo_config["task"]) algo_trainer.train(train_buffer, val_buffer, callback_fn=callback)
def training_function(config): algo_init_fn, algo_trainer_obj, algo_config = algo_select(config["kwargs"]) train_buffer, val_buffer = load_data_from_neorl( algo_config["task"], algo_config["task_data_type"], algo_config["task_train_num"]) algo_config.update(config) algo_config["device"] = "cuda" algo_init = algo_init_fn(algo_config) algo_trainer = algo_trainer_obj(algo_init, algo_config) callback = OnlineCallBackFunction() callback.initialize(train_buffer=train_buffer, val_buffer=val_buffer, task=algo_config["task"]) score = algo_trainer.train(train_buffer, val_buffer, callback_fn=callback) return score
def training_function(config): ''' run on a seed ''' config["kwargs"]['seed'] = config['seed'] algo_init_fn, algo_trainer_obj, algo_config = algo_select(config["kwargs"]) train_buffer, val_buffer = load_data_from_neorl( algo_config["task"], algo_config["task_data_type"], algo_config["task_train_num"]) algo_config.update(config) algo_config["device"] = "cuda" algo_config['dynamics_path'] = os.path.join( config['dynamics_root'], f'{algo_config["task"]}-{algo_config["task_data_type"]}-{algo_config["task_train_num"]}-{config["seed"]}.pt' ) algo_config['behavior_path'] = os.path.join( config['behavior_root'], f'{algo_config["task"]}-{algo_config["task_data_type"]}-{algo_config["task_train_num"]}-{config["seed"]}.pt' ) algo_init = algo_init_fn(algo_config) algo_trainer = algo_trainer_obj(algo_init, algo_config) callback = PeriodicCallBack(OnlineCallBackFunction(), 50) callback.initialize(train_buffer=train_buffer, val_buffer=val_buffer, task=algo_config["task"], number_of_runs=1000) algo_trainer.train(train_buffer, val_buffer, callback_fn=callback) algo_trainer.exp_logger.flush() time.sleep( 10 ) # sleep ensure the log is flushed even if the disks or cpus are busy result, parameter = find_result(algo_trainer.index_path) return { 'reward': result, 'parameter': parameter, 'seed': config['seed'], }
def training_dynamics(config): if config["task"] == 'finance' and config["amount"] == 10000: return { 'performance': [], 'path': '', } seed = config['seed'] setup_seed(seed) train_buffer, val_buffer = load_data_from_neorl(config["task"], config["level"], config["amount"]) obs_shape = train_buffer['obs'].shape[-1] action_shape = train_buffer['act'].shape[-1] device = 'cuda' hidden_units = 1024 if config["task"] in ['ib', 'finance', 'citylearn' ] else 256 transition = EnsembleTransition(obs_shape, action_shape, hidden_units, 4, 7).to(device) transition_optim = torch.optim.AdamW(transition.parameters(), lr=1e-3, weight_decay=0.000075) data_size = len(train_buffer) val_size = min(int(data_size * 0.2) + 1, 1000) train_size = data_size - val_size train_splits, val_splits = torch.utils.data.random_split( range(data_size), (train_size, val_size)) valdata = train_buffer[val_splits.indices] train_buffer = train_buffer[train_splits.indices] batch_size = 256 val_losses = [float('inf') for i in range(7)] epoch = 0 cnt = 0 while True: epoch += 1 idxs = np.random.randint(train_buffer.shape[0], size=[7, train_buffer.shape[0]]) for batch_num in range(int(np.ceil(idxs.shape[-1] / batch_size))): batch_idxs = idxs[:, batch_num * batch_size:(batch_num + 1) * batch_size] batch = train_buffer[batch_idxs] _train_transition(transition, batch, transition_optim, device) new_val_losses = _eval_transition(transition, valdata, device) indexes = [] for i, new_loss, old_loss in zip(range(len(val_losses)), new_val_losses, val_losses): if new_loss < old_loss: indexes.append(i) val_losses[i] = new_loss if len(indexes) > 0: transition.update_save(indexes) cnt = 0 else: cnt += 1 if cnt >= 5: break indexes = _select_best_indexes(val_losses, n=5) transition.set_select(indexes) performance = _eval_transition(transition, valdata, device) transition_path = os.path.join( config['dynamics_path'], f'{config["task"]}-{config["level"]}-{config["amount"]}-{seed}.pt') torch.save(transition, transition_path) return { 'performance': performance, 'path': transition_path, }