def test_L1Ball(algorithm, lr): # Setup constraint = chop.constraints.L1Ball(alpha) prox = constraint.prox lmo = constraint.lmo assert (constraint.prox(w) == w).all() w_t = Variable(torch.zeros_like(w), requires_grad=True) constraint_oracles = { stochastic.PGD.name: { 'prox': [prox] }, stochastic.PGDMadry.name: { 'prox': [prox], 'lmo': [lmo] }, stochastic.FrankWolfe.name: { 'lmo': [lmo] }, stochastic.S3CM.name: { 'prox1': [prox], 'prox2': [prox] } } optimizer = algorithm([w_t], **(constraint_oracles[algorithm.name]), lr=lr) criterion = torch.nn.MSELoss(reduction='mean') # Logging store = Store(OUT_DIR) store.add_table('metadata', {'algorithm': str, 'lr': float}) store['metadata'].append_row({'algorithm': optimizer.name, 'lr': lr}) store.add_table(optimizer.name, { 'func_val': float, 'certificate': float, 'norm(w_t)': float }) cert = torch.tensor(np.inf) for ii in range(MAX_ITER): optimizer.zero_grad() loss = criterion(X.mv(w_t), y) loss.backward() optimizer.step() try: cert = next(optimizer.certificate) # only one parameter here except AttributeError: cert = torch.tensor(np.nan) store.log_table_and_tb( optimizer.name, { 'func_val': loss.item(), 'certificate': cert.item(), 'norm(w_t)': sum(abs(w_t)).item() }) store[optimizer.name].flush_row() store.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--out-dir', type=str, required=True, help='Out directory to save results to') parser.add_argument('--df-path', type=str, required=True, help='Input dataframe to draw annotations from') parser.add_argument('--debug', action='store_true') args = parser.parse_args() out_dir = Path(args.out_dir) out_dir.mkdir(exist_ok=True) s = Store(str(args.out_dir)) # held_outs s.add_table('out', {'dists': s.OBJECT}) df = load_dfs(path=args.df_path) dists = model_fit(args.debug, NUM_BETAS, df) s['out'].append_row({'dists': dists}) print('store located in:', s.path) print( 'In Beta-Binomial Model Analysis.ipynb: set INPUT_DATA = \'f{s.path}\'' )
def train_configs(configs: List[Config], n_seeds: List[int], targets: List[int], n_epochs: int, save_dir_artifacts: str, save_dir_models: str, data_aug: bool = False, save_models: bool = True, adv_acc: bool = False, run_time_str: str = None): loaders_BIN, normalization_function_BIN, label_map_BIN = utils.get_binary_dataset( batch_size=256, transfer=True, data_aug=data_aug, targets=targets, per_target=128, random=False ) train_loader_BIN, test_loader_BIN = loaders_BIN run_time_str = run_time_str if run_time_str is not None else datetime.now().strftime("%Y_%m_%d_%H_%M_%S") for seed in n_seeds: for config in configs: dir_name = f"{config.name}_{run_time_str}_{seed}" store = Store(save_dir_artifacts, dir_name) writer = store.tensorboard if adv_acc: metrics = { 'test_accuracy_True': float, 'test_accuracy_False': float, 'epoch': int} else: metrics = { 'test_accuracy_False': float, 'epoch': int} store.add_table('result', metrics) # std_linear_net = utils.Linear(Nfeatures=3*32*32, Nclasses=2).cuda() model = get_new_model(config.robust_model) if not config.fine_tune: for param in model.parameters(): param.requires_grad = False model.fc = nn.Linear(512, len(targets)).cuda() model.train() train_model(model, train_loader_BIN, test_loader_BIN, train_loop, eval_loop, step_size=config.step_size, epochs=n_epochs, normalization=normalization_function_BIN, store=store, adv_train=config.adv_train, log_iterations=10, adv_acc=adv_acc ) if save_models: path = f"{save_dir_models}/{dir_name}.pt" torch.save(model.state_dict(), path)
def train_model(args, model, optim, train_dl: DataLoader, valid_dl: DataLoader, store: Store, device, attack=None, ratio: int = 0): """ Generic training routine, which is flexible to allow both standard and adversarial training. """ start_time = time.time() # Initial setup store.add_table(consts.LOGS_TABLE, consts.LOGS_SCHEMA) store.add_table(consts.ARGS_TABLE, consts.ARGS_SCHEMA) args_info = { 'epochs': args['--epochs'], 'batch_size': args['--batch-size'], 'model': 'mnist' } # store[consts.ARGS_TABLE].append_row(args_info) model.to(device) for epoch in range(args['--epochs']): # Train for one epoch train_acc, train_loss = _internal_loop(args, True, model, optim, train_dl, epoch, store, device) # Evaluate on validation with torch.no_grad(): valid_acc, valid_loss = _internal_loop(args, False, model, None, valid_dl, epoch, store, device) # Log log_info = { 'epoch': epoch, 'train_loss': train_loss, 'valid_loss': valid_loss, 'train_error_rate': 1 - train_acc, 'valid_error_rate': 1 - valid_acc, 'valid_adv_error_rate': -1, 'time': time.time() - start_time } # store[consts.LOGS_TABLE].append_row(log_info) return model
def plot_metric(metrics: List[str], metric_labels: List[str], labels: List[str], n_seeds: int, config_groups: List[str], plot_name: str, save_dir_artifacts: str, save_dir_plots: str, figsize: List[int] = None): # plt.figure(figsize=(8, 6)) conf_group_n = len(config_groups) metrics_n = len(metrics) n_rows = metrics_n * conf_group_n // 2 n_cols = 2 if len(config_groups) > 1 else 1 fig, axs = plt.subplots(n_rows, n_cols, figsize=figsize if figsize else (6 * conf_group_n, 6 * metrics_n), sharex=False, sharey='row') if n_rows == 1 and n_cols == 1: axs = [axs] elif n_rows > 1: axs = [ax for ax_row in axs for ax in ax_row] for metric_idx, metric in enumerate(metrics): for conf_group_idx, config_group in enumerate(config_groups): for config_idx, run_id in enumerate(config_group.run_ids): df_sum = None df_sum_squared = None K = None for seed in range(n_seeds): dir_path = f'{run_id}_{seed}' df = Store(save_dir_artifacts, dir_path)['result'].df # plt.plot(df['epoch'], df['test_accuracy_False'], label=config.name) if K is None: K = df df_sum = df - K df_sum_squared = (df - K) ** 2 else: df_sum += df - K df_sum_squared += (df - K) ** 2 df_mean = K + df_sum / 3 df_var = (df_sum_squared - df_sum * df_sum / 3) / (3 - 1) mean = df_mean[metric] stddev = df_var[metric] ** (0.5) label = labels[config_idx] if labels is not None else run_id linestyle = 'dashed' if 'STD_network' in run_id else 'solid' color = '#ff7f0e' if 'STD_train' in run_id else '#1f77b4' axs[metric_idx * conf_group_n + conf_group_idx].plot(df['epoch'], mean, label=label, linestyle=linestyle, color=color) axs[metric_idx * conf_group_n + conf_group_idx].fill_between(df['epoch'], mean - stddev, mean + stddev, alpha=0.2, color=color) axs[metric_idx * conf_group_n + conf_group_idx].set_title(config_group.title) axs[metric_idx * conf_group_n + conf_group_idx].set_xlabel('Epoch') if conf_group_idx == 0: axs[metric_idx * conf_group_n + conf_group_idx].set_ylabel(metric_labels[metric_idx]) if metric_idx * conf_group_n + conf_group_idx == 1: axs[metric_idx * conf_group_n + conf_group_idx].legend(loc='best') fig.tight_layout() fig.savefig(f'{save_dir_plots}/{plot_name}.pdf')
def initialize_cox_store(cox_dir='cox') -> Store: store = Store(cox_dir) store.add_table( 'experiments', { 'k': int, 'random_state': int, 'Train AUC': float, 'Validation AUC': float, 'Test AUC': float, 'Test MCC': float, 'start_time': str, # 'runtime(sec)': float, 'classifier': str, 'classifier_full': str }) return store
def test_L1Ball(algorithm, step_size): # Setup constraint = constopt.constraints.L1Ball(alpha) assert (constraint.prox(w) == w).all() w_t = Variable(torch.zeros_like(w), requires_grad=True) optimizer = algorithm([w_t], constraint) criterion = torch.nn.MSELoss(reduction='mean') # Logging store = Store(OUT_DIR) store.add_table('metadata', {'algorithm': str, 'step-size': float}) store['metadata'].append_row({ 'algorithm': optimizer.name, 'step-size': step_size }) store.add_table(optimizer.name, { 'func_val': float, 'FW gap': float, 'norm(w_t)': float }) gap = torch.tensor(np.inf) for ii in range(MAX_ITER): optimizer.zero_grad() loss = criterion(X.mv(w_t), y) loss.backward() # Compute gap with torch.no_grad(): gap = constraint.fw_gap(w_t.grad, w_t) optimizer.step(step_size) store.log_table_and_tb( optimizer.name, { 'func_val': loss.item(), 'FW gap': gap.item(), 'norm(w_t)': sum(abs(w_t)).item() }) store[optimizer.name].flush_row() store.close()
def test_L1Ball(algorithm, lr): # Setup constraint = chop.constraints.L1Ball(alpha) assert (constraint.prox(w) == w).all() w_t = Variable(torch.zeros_like(w), requires_grad=True) optimizer = algorithm([w_t], constraint, lr=lr) criterion = torch.nn.MSELoss(reduction='mean') # Logging store = Store(OUT_DIR) store.add_table('metadata', {'algorithm': str, 'lr': float}) store['metadata'].append_row({'algorithm': optimizer.name, 'lr': lr}) store.add_table(optimizer.name, { 'func_val': float, 'certificate': float, 'norm(w_t)': float }) cert = torch.tensor(np.inf) for ii in range(MAX_ITER): optimizer.zero_grad() loss = criterion(X.mv(w_t), y) loss.backward() optimizer.step() cert = next(optimizer.certificate) # only one parameter here store.log_table_and_tb( optimizer.name, { 'func_val': loss.item(), 'certificate': cert.item(), 'norm(w_t)': sum(abs(w_t)).item() }) store[optimizer.name].flush_row() store.close()
def run(args): if args['--local'] == True: args[ '--base-path'] = '/Users/andrei/Google Drive/_Facultate/MPhil Cambridge/Dissertation/project' else: args[ '--base-path'] = '/content/drive/My Drive/_Facultate/MPhil Cambridge/Dissertation/project' initialize(args, seed=0) device = 'cuda:0' if torch.cuda.is_available() else 'cpu' OUT_DIR = path.join(args['--base-path'], 'logs') store = Store(OUT_DIR) if args['--model-to-train'] == 'mnist': model = MnistClassifier() optim = torch.optim.Adam(model.parameters(), lr=1e-3) train_dl = get_mnist_dl(args, train=True) valid_dl = get_mnist_dl(args, train=False) train_model(args, model, optim, train_dl, valid_dl, store, device)
def main(params): for k, v in zip(params.keys(), params.values()): assert v is not None, f"Value for {k} is None" # # # Setup logging # # metadata_schema = schema_from_dict(params) base_directory = params['out_dir'] store = Store(base_directory) # redirect stderr, stdout to file """ def make_err_redirector(stream_name): tee = Tee(os.path.join(store.path, stream_name + '.txt'), stream_name) return tee stderr_tee = make_err_redirector('stderr') stdout_tee = make_err_redirector('stdout') """ # Store the experiment path and the git commit for this experiment metadata_schema.update({ 'store_path':str, 'git_commit':str }) repo = git.Repo(path=os.path.dirname(os.path.realpath(__file__)), search_parent_directories=True) metadata_table = store.add_table('metadata', metadata_schema) metadata_table.update_row(params) metadata_table.update_row({ 'store_path':store.path, 'git_commit':repo.head.object.hexsha }) metadata_table.flush_row() # Table for checkpointing models and envs if params['save_iters'] > 0: store.add_table('checkpoints', { 'val_model':store.PYTORCH_STATE, 'policy_model':store.PYTORCH_STATE, 'envs':store.PICKLE, 'policy_opt': store.PYTORCH_STATE, 'val_opt': store.PYTORCH_STATE, 'iteration':int }) # The trainer object is in charge of sampling trajectories and # taking PPO/TRPO optimization steps p = Trainer.agent_from_params(params, store=store) if 'load_model' in params and params['load_model']: print('Loading pretrained model', params['load_model']) pretrained_models = torch.load(params['load_model']) p.policy_model.load_state_dict(pretrained_models['policy_model']) p.val_model.load_state_dict(pretrained_models['val_model']) # Load optimizer states. Note that # p.POLICY_ADAM.load_state_dict(pretrained_models['policy_opt']) # p.val_opt.load_state_dict(pretrained_models['val_opt']) # Restore environment parameters, like mean and std. p.envs = pretrained_models['envs'] rewards = [] # Table for final results final_table = store.add_table('final_results', { 'iteration':int, '5_rewards':float, 'terminated_early':bool, 'val_model':store.PYTORCH_STATE, 'policy_model':store.PYTORCH_STATE, 'envs':store.PICKLE, 'policy_opt': store.PYTORCH_STATE, 'val_opt': store.PYTORCH_STATE, 'iteration':int }) def finalize_table(iteration, terminated_early, rewards): final_5_rewards = np.array(rewards)[-5:].mean() final_table.append_row({ 'iteration':iteration, '5_rewards':final_5_rewards, 'terminated_early':terminated_early, 'iteration':iteration, 'val_model': p.val_model.state_dict(), 'policy_model': p.policy_model.state_dict(), 'policy_opt': p.POLICY_ADAM.state_dict(), 'val_opt': p.val_opt.state_dict(), 'envs':p.envs }) # Try-except so that we save if the user interrupts the process try: for i in range(params['train_steps']): print('Step %d' % (i,)) if params['save_iters'] > 0 and i % params['save_iters'] == 0: store['checkpoints'].append_row({ 'iteration':i, 'val_model': p.val_model.state_dict(), 'policy_model': p.policy_model.state_dict(), 'policy_opt': p.POLICY_ADAM.state_dict(), 'val_opt': p.val_opt.state_dict(), 'envs':p.envs }) mean_reward = p.train_step() rewards.append(mean_reward) finalize_table(i, False, rewards) except KeyboardInterrupt: torch.save(p.val_model, 'saved_experts/%s-expert-vf' % (params['game'],)) torch.save(p.policy_model, 'saved_experts/%s-expert-pol' % (params['game'],)) finalize_table(i, True, rewards) store.close()
def main(params): for k, v in zip(params.keys(), params.values()): assert v is not None, f"Value for {k} is None" # # # Setup logging # # metadata_schema = schema_from_dict(params) base_directory = params['out_dir'] store = Store(base_directory) # redirect stderr, stdout to file """ def make_err_redirector(stream_name): tee = Tee(os.path.join(store.path, stream_name + '.txt'), stream_name) return tee stderr_tee = make_err_redirector('stderr') stdout_tee = make_err_redirector('stdout') """ # Store the experiment path and the git commit for this experiment metadata_schema.update({ 'store_path': str, 'git_commit': str }) repo = git.Repo(path=os.path.dirname(os.path.realpath(__file__)), search_parent_directories=True) metadata_table = store.add_table('metadata', metadata_schema) metadata_table.update_row(params) metadata_table.update_row({ 'store_path': store.path, 'git_commit': repo.head.object.hexsha }) metadata_table.flush_row() # Extra items in table when minimax training is enabled. if params['mode'] == "adv_ppo" or params['mode'] == 'adv_trpo' or params['mode'] == 'adv_sa_ppo': adversary_table_dict = { 'adversary_policy_model': store.PYTORCH_STATE, 'adversary_policy_opt': store.PYTORCH_STATE, 'adversary_val_model': store.PYTORCH_STATE, 'adversary_val_opt': store.PYTORCH_STATE, } else: adversary_table_dict = {} # Table for checkpointing models and envs if params['save_iters'] > 0: checkpoint_dict = { 'val_model': store.PYTORCH_STATE, 'policy_model': store.PYTORCH_STATE, 'envs': store.PICKLE, 'policy_opt': store.PYTORCH_STATE, 'val_opt': store.PYTORCH_STATE, 'iteration': int, '5_rewards': float, } checkpoint_dict.update(adversary_table_dict) store.add_table('checkpoints', checkpoint_dict) # The trainer object is in charge of sampling trajectories and # taking PPO/TRPO optimization steps p = Trainer.agent_from_params(params, store=store) if params['initial_std'] != 1.0: p.policy_model.log_stdev.data[:] = np.log(params['initial_std']) if 'load_model' in params and params['load_model']: print('Loading pretrained model', params['load_model']) pretrained_model = torch.load(params['load_model']) if 'policy_model' in pretrained_model: p.policy_model.load_state_dict(pretrained_model['policy_model']) if params['deterministic']: print('Policy runs in deterministic mode. Ignoring Gaussian noise.') p.policy_model.log_stdev.data[:] = -100 else: print('Policy runs in non deterministic mode with Gaussian noise.') if 'val_model' in pretrained_model: p.val_model.load_state_dict(pretrained_model['val_model']) if 'policy_opt' in pretrained_model: p.POLICY_ADAM.load_state_dict(pretrained_model['policy_opt']) if 'val_opt' in pretrained_model: p.val_opt.load_state_dict(pretrained_model['val_opt']) # Load adversary models. if 'no_load_adv_policy' in params and params['no_load_adv_policy']: print('Skipping loading adversary models.') else: if 'adversary_policy_model' in pretrained_model and hasattr(p, 'adversary_policy_model'): p.adversary_policy_model.load_state_dict(pretrained_model['adversary_policy_model']) if 'adversary_val_model' in pretrained_model and hasattr(p, 'adversary_val_model'): p.adversary_val_model.load_state_dict(pretrained_model['adversary_val_model']) if 'adversary_policy_opt' in pretrained_model and hasattr(p, 'adversary_policy_opt'): p.adversary_policy_opt.load_state_dict(pretrained_model['adversary_policy_opt']) if 'adversary_val_opt' in pretrained_model and hasattr(p, 'adversary_val_opt'): p.adversary_val_opt.load_state_dict(pretrained_model['adversary_val_opt']) # Load optimizer states. # p.POLICY_ADAM.load_state_dict(pretrained_models['policy_opt']) # p.val_opt.load_state_dict(pretrained_models['val_opt']) # Restore environment parameters, like mean and std. if 'envs' in pretrained_model: p.envs = pretrained_model['envs'] for e in p.envs: e.setup_visualization(params['show_env'], params['save_frames'], params['save_frames_path']) rewards = [] # Table for final results final_dict = { 'iteration': int, '5_rewards': float, 'terminated_early': bool, 'val_model': store.PYTORCH_STATE, 'policy_model': store.PYTORCH_STATE, 'envs': store.PICKLE, 'policy_opt': store.PYTORCH_STATE, 'val_opt': store.PYTORCH_STATE, } final_dict.update(adversary_table_dict) final_table = store.add_table('final_results', final_dict) def add_adversary_to_table(p, table_dict): if params['mode'] == "adv_ppo" or params['mode'] == 'adv_trpo' or params['mode'] == 'adv_sa_ppo': table_dict["adversary_policy_model"] = p.adversary_policy_model.state_dict() table_dict["adversary_policy_opt"] = p.ADV_POLICY_ADAM.state_dict() table_dict["adversary_val_model"] = p.adversary_val_model.state_dict() table_dict["adversary_val_opt"] = p.adversary_val_opt.state_dict() return table_dict def finalize_table(iteration, terminated_early, rewards): final_5_rewards = np.array(rewards)[-5:].mean() final_dict = { 'iteration': iteration, '5_rewards': final_5_rewards, 'terminated_early': terminated_early, 'val_model': p.val_model.state_dict(), 'policy_model': p.policy_model.state_dict(), 'policy_opt': p.POLICY_ADAM.state_dict(), 'val_opt': p.val_opt.state_dict(), 'envs': p.envs } final_dict = add_adversary_to_table(p, final_dict) final_table.append_row(final_dict) ret = 0 # Try-except so that we save if the user interrupts the process try: for i in range(params['train_steps']): print('Step %d' % (i,)) if params['save_iters'] > 0 and i % params['save_iters'] == 0 and i != 0: final_5_rewards = np.array(rewards)[-5:].mean() print(f'Saving checkpoints to {store.path} with reward {final_5_rewards:.5g}') checkpoint_dict = { 'iteration': i, 'val_model': p.val_model.state_dict(), 'policy_model': p.policy_model.state_dict(), 'policy_opt': p.POLICY_ADAM.state_dict(), 'val_opt': p.val_opt.state_dict(), 'envs': p.envs, '5_rewards': final_5_rewards, } checkpoint_dict = add_adversary_to_table(p, checkpoint_dict) store['checkpoints'].append_row(checkpoint_dict) mean_reward = p.train_step() rewards.append(mean_reward) # For debugging and tuning, we can break in the middle. if i == params['force_stop_step']: print('Terminating early because --force-stop-step is set.') raise KeyboardInterrupt finalize_table(i, False, rewards) except KeyboardInterrupt: finalize_table(i, True, rewards) ret = 1 except: print("An error occurred during training:") traceback.print_exc() # Other errors, make sure to finalize the cox store before exiting. finalize_table(i, True, rewards) ret = -1 print(f'Models saved to {store.path}') store.close() return ret
from cox.readers import CollectionReader ## Code sample to go alongside Walkthrough #2 in README.md OUT_DIR = '/tmp/cox_example/' try: shutil.rmtree(OUT_DIR) except: pass os.mkdir(OUT_DIR) if __name__ == "__main__": for slope in range(5): store = Store(OUT_DIR) store.add_table('metadata', {'slope': int}) store.add_table('line_graphs', {'mx': int, 'mx^2': int}) store['metadata'].append_row({'slope': slope}) for x in range(100): store.log_table_and_tb('line_graphs', { 'mx': slope * x, 'mx^2': slope * (x**2) }) store['line_graphs'].flush_row() store.close() ### Collection reading print("Done experiments, printing results...")
def main(params): override_params = copy.deepcopy(params) excluded_params = [ 'config_path', 'out_dir_prefix', 'num_episodes', 'row_id', 'exp_id', 'load_model', 'seed', 'deterministic', 'scan_config', 'compute_kl_cert', 'use_full_backward' ] sarsa_params = [ 'sarsa_enable', 'sarsa_steps', 'sarsa_eps', 'sarsa_reg', 'sarsa_model_path' ] # original_params contains all flags in config files that are overridden via command. for k in list(override_params.keys()): if k in excluded_params: del override_params[k] # Append a prefix for output path. if params['out_dir_prefix']: params['out_dir'] = os.path.join(params['out_dir_prefix'], params['out_dir']) print(f"setting output dir to {params['out_dir']}") if params['config_path']: # Load from a pretrained model using existing config. # First we need to create the model using the given config file. json_params = json.load(open(params['config_path'])) params = override_json_params(params, json_params, excluded_params + sarsa_params) if params['sarsa_enable']: assert params['attack_method'] == "none" or params['attack_method'] is None, \ "--train-sarsa is only available when --attack-method=none, but got {}".format(params['attack_method']) if 'load_model' in params and params['load_model']: for k, v in zip(params.keys(), params.values()): assert v is not None, f"Value for {k} is None" # Create the agent from config file. p = Trainer.agent_from_params(params, store=None) print('Loading pretrained model', params['load_model']) pretrained_model = torch.load(params['load_model']) if 'policy_model' in pretrained_model: p.policy_model.load_state_dict(pretrained_model['policy_model']) if 'val_model' in pretrained_model: p.val_model.load_state_dict(pretrained_model['val_model']) if 'policy_opt' in pretrained_model: p.POLICY_ADAM.load_state_dict(pretrained_model['policy_opt']) if 'val_opt' in pretrained_model: p.val_opt.load_state_dict(pretrained_model['val_opt']) # Restore environment parameters, like mean and std. if 'envs' in pretrained_model: p.envs = pretrained_model['envs'] for e in p.envs: e.normalizer_read_only = True e.setup_visualization(params['show_env'], params['save_frames'], params['save_frames_path']) else: # Load from experiment directory. No need to use a config. base_directory = params['out_dir'] store = Store(base_directory, params['exp_id'], mode='r') if params['row_id'] < 0: row = store['final_results'].df else: checkpoints = store['checkpoints'].df row_id = params['row_id'] row = checkpoints.iloc[row_id:row_id + 1] print("row to test: ", row) if params['cpu'] == None: cpu = False else: cpu = params['cpu'] p, _ = Trainer.agent_from_data(store, row, cpu, extra_params=params, override_params=override_params, excluded_params=excluded_params) store.close() rewards = [] if params['sarsa_enable']: num_steps = params['sarsa_steps'] # learning rate scheduler: linearly annealing learning rate after lr_decrease_point = num_steps * 2 / 3 decreasing_steps = num_steps - lr_decrease_point lr_sch = lambda epoch: 1.0 if epoch < lr_decrease_point else ( decreasing_steps - epoch + lr_decrease_point) / decreasing_steps # robust training scheduler. Currently using 1/3 epochs for warmup, 1/3 for schedule and 1/3 for final training. eps_start_point = int(num_steps * 1 / 3) robust_eps_scheduler = LinearScheduler( params['sarsa_eps'], f"start={eps_start_point},length={eps_start_point}") robust_beta_scheduler = LinearScheduler( 1.0, f"start={eps_start_point},length={eps_start_point}") # reinitialize value model, and run value function learning steps. p.setup_sarsa(lr_schedule=lr_sch, eps_scheduler=robust_eps_scheduler, beta_scheduler=robust_beta_scheduler) # Run Sarsa training. for i in range(num_steps): print( f'Step {i+1} / {num_steps}, lr={p.sarsa_scheduler.get_last_lr()}' ) mean_reward = p.sarsa_step() rewards.append(mean_reward) # for w in p.val_model.parameters(): # print(f'{w.size()}, {torch.norm(w.view(-1), 2)}') # Save Sarsa model. saved_model = { 'state_dict': p.sarsa_model.state_dict(), 'metadata': params, } torch.save(saved_model, params['sarsa_model_path']) else: print('Gaussian noise in policy:') print(torch.exp(p.policy_model.log_stdev)) if params['deterministic']: print( 'Policy runs in deterministic mode. Ignoring Gaussian noise.') p.policy_model.log_stdev.data[:] = -100 num_episodes = params['num_episodes'] all_rewards = [] all_lens = [] all_kl_certificates = [] for i in range(num_episodes): print('Episode %d / %d' % (i + 1, num_episodes)) ep_length, ep_reward, actions, action_means, states, kl_certificates = p.run_test( compute_bounds=params['compute_kl_cert'], use_full_backward=params['use_full_backward']) if i == 0: all_actions = actions.copy() all_states = states.copy() else: all_actions = np.concatenate((all_actions, actions), axis=0) all_states = np.concatenate((all_states, states), axis=0) if params['compute_kl_cert']: print('Epoch KL certificates:', kl_certificates) all_kl_certificates.append(kl_certificates) all_rewards.append(ep_reward) all_lens.append(ep_length) attack_dir = 'attack-{}-eps-{}'.format(params['attack_method'], params['attack_eps']) if 'sarsa' in params['attack_method']: attack_dir += '-sarsa_steps-{}-sarsa_eps-{}-sarsa_reg-{}'.format( params['sarsa_steps'], params['sarsa_eps'], params['sarsa_reg']) if 'action' in params['attack_method']: attack_dir += '-attack_sarsa_action_ratio-{}'.format( params['attack_sarsa_action_ratio']) save_path = os.path.join(params['out_dir'], params['exp_id'], attack_dir) if not os.path.exists(save_path): os.makedirs(save_path) for name, value in [('actions', all_actions), ('states', all_states), ('rewards', all_rewards), ('length', all_lens)]: with open(os.path.join(save_path, '{}.pkl'.format(name)), 'wb') as f: pickle.dump(value, f) print(params) with open(os.path.join(save_path, 'params.json'), 'w') as f: json.dump(params, f, indent=4) print('\n') print('all rewards:', all_rewards) print('rewards stats:\nmean: {}, std:{}, min:{}, max:{}'.format( np.mean(all_rewards), np.std(all_rewards), np.min(all_rewards), np.max(all_rewards))) if params['compute_kl_cert']: print('KL certificates stats: mean: {}, std: {}, min: {}, max: {}'. format(np.mean(all_kl_certificates), np.std(all_kl_certificates), np.min(all_kl_certificates), np.max(all_kl_certificates)))
def main(params): override_params = copy.deepcopy(params) excluded_params = [ 'config_path', 'out_dir_prefix', 'num_episodes', 'row_id', 'exp_id', 'load_model', 'seed', 'deterministic', 'noise_factor', 'compute_kl_cert', 'use_full_backward', 'sqlite_path', 'early_terminate' ] sarsa_params = [ 'sarsa_enable', 'sarsa_steps', 'sarsa_eps', 'sarsa_reg', 'sarsa_model_path' ] imit_params = ['imit_enable', 'imit_epochs', 'imit_model_path', 'imit_lr'] # original_params contains all flags in config files that are overridden via command. for k in list(override_params.keys()): if k in excluded_params: del override_params[k] if params['sqlite_path']: print( f"Will save results in sqlite database in {params['sqlite_path']}") connection = sqlite3.connect(params['sqlite_path']) cur = connection.cursor() cur.execute('''create table if not exists attack_results (method varchar(20), mean_reward real, std_reward real, min_reward real, max_reward real, sarsa_eps real, sarsa_reg real, sarsa_steps integer, deterministic bool, early_terminate bool)''') connection.commit() # We will set this flag to True we break early. early_terminate = False # Append a prefix for output path. if params['out_dir_prefix']: params['out_dir'] = os.path.join(params['out_dir_prefix'], params['out_dir']) print(f"setting output dir to {params['out_dir']}") if params['config_path']: # Load from a pretrained model using existing config. # First we need to create the model using the given config file. json_params = json.load(open(params['config_path'])) params = override_json_params( params, json_params, excluded_params + sarsa_params + imit_params) if params['sarsa_enable']: assert params['attack_method'] == "none" or params['attack_method'] is None, \ "--train-sarsa is only available when --attack-method=none, but got {}".format(params['attack_method']) if 'load_model' in params and params['load_model']: for k, v in zip(params.keys(), params.values()): assert v is not None, f"Value for {k} is None" # Create the agent from config file. p = Trainer.agent_from_params(params, store=None) print('Loading pretrained model', params['load_model']) pretrained_model = torch.load(params['load_model']) if 'policy_model' in pretrained_model: p.policy_model.load_state_dict(pretrained_model['policy_model']) if 'val_model' in pretrained_model: p.val_model.load_state_dict(pretrained_model['val_model']) if 'policy_opt' in pretrained_model: p.POLICY_ADAM.load_state_dict(pretrained_model['policy_opt']) if 'val_opt' in pretrained_model: p.val_opt.load_state_dict(pretrained_model['val_opt']) # Restore environment parameters, like mean and std. if 'envs' in pretrained_model: p.envs = pretrained_model['envs'] for e in p.envs: e.normalizer_read_only = True e.setup_visualization(params['show_env'], params['save_frames'], params['save_frames_path']) else: # Load from experiment directory. No need to use a config. base_directory = params['out_dir'] store = Store(base_directory, params['exp_id'], mode='r') if params['row_id'] < 0: row = store['final_results'].df else: checkpoints = store['checkpoints'].df row_id = params['row_id'] row = checkpoints.iloc[row_id:row_id + 1] print("row to test: ", row) if params['cpu'] == None: cpu = False else: cpu = params['cpu'] p, _ = Trainer.agent_from_data(store, row, cpu, extra_params=params, override_params=override_params, excluded_params=excluded_params) store.close() rewards = [] print('Gaussian noise in policy:') print(torch.exp(p.policy_model.log_stdev)) original_stdev = p.policy_model.log_stdev.clone().detach() if params['noise_factor'] != 1.0: p.policy_model.log_stdev.data[:] += np.log(params['noise_factor']) if params['deterministic']: print('Policy runs in deterministic mode. Ignoring Gaussian noise.') p.policy_model.log_stdev.data[:] = -100 print('Gaussian noise in policy (after adjustment):') print(torch.exp(p.policy_model.log_stdev)) if params['sarsa_enable']: num_steps = params['sarsa_steps'] # learning rate scheduler: linearly annealing learning rate after lr_decrease_point = num_steps * 2 / 3 decreasing_steps = num_steps - lr_decrease_point lr_sch = lambda epoch: 1.0 if epoch < lr_decrease_point else ( decreasing_steps - epoch + lr_decrease_point) / decreasing_steps # robust training scheduler. Currently using 1/3 epochs for warmup, 1/3 for schedule and 1/3 for final training. eps_start_point = int(num_steps * 1 / 3) robust_eps_scheduler = LinearScheduler( params['sarsa_eps'], f"start={eps_start_point},length={eps_start_point}") robust_beta_scheduler = LinearScheduler( 1.0, f"start={eps_start_point},length={eps_start_point}") # reinitialize value model, and run value function learning steps. p.setup_sarsa(lr_schedule=lr_sch, eps_scheduler=robust_eps_scheduler, beta_scheduler=robust_beta_scheduler) # Run Sarsa training. for i in range(num_steps): print( f'Step {i+1} / {num_steps}, lr={p.sarsa_scheduler.get_last_lr()}' ) mean_reward = p.sarsa_step() rewards.append(mean_reward) # for w in p.val_model.parameters(): # print(f'{w.size()}, {torch.norm(w.view(-1), 2)}') # Save Sarsa model. saved_model = { 'state_dict': p.sarsa_model.state_dict(), 'metadata': params, } torch.save(saved_model, params['sarsa_model_path']) elif params['imit_enable']: num_epochs = params['imit_epochs'] num_episodes = params['num_episodes'] print('\n\n' + 'Start collecting data\n' + '-' * 80) for i in range(num_episodes): print('Collecting %d / %d episodes' % (i + 1, num_episodes)) ep_length, ep_reward, actions, action_means, states, kl_certificates = p.run_test( compute_bounds=params['compute_kl_cert'], use_full_backward=params['use_full_backward'], original_stdev=original_stdev) not_dones = np.ones(len(actions)) not_dones[-1] = 0 if i == 0: all_actions = actions.copy() all_states = states.copy() all_not_dones = not_dones.copy() else: all_actions = np.concatenate((all_actions, actions), axis=0) all_states = np.concatenate((all_states, states), axis=0) all_not_dones = np.concatenate((all_not_dones, not_dones)) print('Collected actions shape:', all_actions.shape) print('Collected states shape:', all_states.shape) p.setup_imit(lr=params['imit_lr']) p.imit_steps(torch.from_numpy(all_actions), torch.from_numpy(all_states), torch.from_numpy(all_not_dones), num_epochs) saved_model = { 'state_dict': p.imit_network.state_dict(), 'metadata': params, } torch.save(saved_model, params['imit_model_path']) else: num_episodes = params['num_episodes'] all_rewards = [] all_lens = [] all_kl_certificates = [] for i in range(num_episodes): print('Episode %d / %d' % (i + 1, num_episodes)) ep_length, ep_reward, actions, action_means, states, kl_certificates = p.run_test( compute_bounds=params['compute_kl_cert'], use_full_backward=params['use_full_backward'], original_stdev=original_stdev) if i == 0: all_actions = actions.copy() all_states = states.copy() else: all_actions = np.concatenate((all_actions, actions), axis=0) all_states = np.concatenate((all_states, states), axis=0) if params['compute_kl_cert']: print('Epoch KL certificates:', kl_certificates) all_kl_certificates.append(kl_certificates) all_rewards.append(ep_reward) all_lens.append(ep_length) # Current step mean, std, min and max mean_reward, std_reward, min_reward, max_reward = np.mean( all_rewards), np.std(all_rewards), np.min(all_rewards), np.max( all_rewards) if i > num_episodes // 5 and params['early_terminate'] and params[ 'sqlite_path'] and params['attack_method'] != 'none': # Attempt to early terminiate if some other attacks have done with low reward. cur.execute( "SELECT MIN(mean_reward) FROM attack_results WHERE deterministic=?;", (params['deterministic'], )) current_best_reward = cur.fetchone()[0] print( f'current best: {current_best_reward}, ours: {mean_reward} +/- {std_reward}, min: {min_reward}' ) # Terminiate if mean - 2*std is worse than best, or our min is worse than best. if current_best_reward is not None and ( (current_best_reward < mean_reward - 2 * std_reward) or (min_reward > current_best_reward)): print('terminating early!') early_terminate = True break attack_dir = 'attack-{}-eps-{}'.format(params['attack_method'], params['attack_eps']) if 'sarsa' in params['attack_method']: attack_dir += '-sarsa_steps-{}-sarsa_eps-{}-sarsa_reg-{}'.format( params['sarsa_steps'], params['sarsa_eps'], params['sarsa_reg']) if 'action' in params['attack_method']: attack_dir += '-attack_sarsa_action_ratio-{}'.format( params['attack_sarsa_action_ratio']) save_path = os.path.join(params['out_dir'], params['exp_id'], attack_dir) if not os.path.exists(save_path): os.makedirs(save_path) for name, value in [('actions', all_actions), ('states', all_states), ('rewards', all_rewards), ('length', all_lens)]: with open(os.path.join(save_path, '{}.pkl'.format(name)), 'wb') as f: pickle.dump(value, f) print(params) with open(os.path.join(save_path, 'params.json'), 'w') as f: json.dump(params, f, indent=4) mean_reward, std_reward, min_reward, max_reward = np.mean( all_rewards), np.std(all_rewards), np.min(all_rewards), np.max( all_rewards) if params['compute_kl_cert']: print('KL certificates stats: mean: {}, std: {}, min: {}, max: {}'. format(np.mean(all_kl_certificates), np.std(all_kl_certificates), np.min(all_kl_certificates), np.max(all_kl_certificates))) # write results to sqlite. if params['sqlite_path']: method = params['attack_method'] if params['attack_method'] == "sarsa": # Load sarsa parameters from checkpoint sarsa_ckpt = torch.load(params['attack_sarsa_network']) sarsa_meta = sarsa_ckpt['metadata'] sarsa_eps = sarsa_meta[ 'sarsa_eps'] if 'sarsa_eps' in sarsa_meta else -1.0 sarsa_reg = sarsa_meta[ 'sarsa_reg'] if 'sarsa_reg' in sarsa_meta else -1.0 sarsa_steps = sarsa_meta[ 'sarsa_steps'] if 'sarsa_steps' in sarsa_meta else -1 elif params['attack_method'] == "sarsa+action": sarsa_eps = -1.0 sarsa_reg = params['attack_sarsa_action_ratio'] sarsa_steps = -1 else: sarsa_eps = -1.0 sarsa_reg = -1.0 sarsa_steps = -1 try: cur.execute( "INSERT INTO attack_results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?);", (method, mean_reward, std_reward, min_reward, max_reward, sarsa_eps, sarsa_reg, sarsa_steps, params['deterministic'], early_terminate)) connection.commit() except sqlite3.OperationalError as e: import traceback traceback.print_exc() print('Cannot insert into the SQLite table. Give up.') else: print(f'results saved to database {params["sqlite_path"]}') connection.close() print('\n') print('all rewards:', all_rewards) print('rewards stats:\nmean: {}, std:{}, min:{}, max:{}'.format( mean_reward, std_reward, min_reward, max_reward))
def dump_one_exp_id(best_exp_id): print('\n\n>>>selected id', best_exp_id, 'args.best', args.best, '\n\n') if best_exp_id is not None: env_name = get_env_name(base_directory) alg_name = get_alg_name(base_directory) store = Store(base_directory, best_exp_id) if 'final_results' in store.tables and not args.all_ckpts: table_name = 'final_results' index_id = 0 else: table_name = 'checkpoints' print( f'Warning: final_results table not found for expid {best_exp_id}, using last checkpoints' ) index_id = -1 # use last checkpoint ckpts = store[table_name] print( 'loading from exp id:', best_exp_id, ' reward: ', ckpts.df['5_rewards'].iloc[index_id] if '5_rewards' in ckpts.df else "training not finished") def dump_model(sel_ckpts, sel_index_id, sel_path): P = {} # mapper = ch.device('cuda:0') for name in [ 'val_model', 'policy_model', 'val_opt', 'policy_opt', 'adversary_policy_model', 'adversary_val_model', 'adversary_policy_opt', 'adversary_val_opt' ]: if name in sel_ckpts.df: print( f'Saving {name} out of {len(sel_ckpts.df[name])}') P[name] = sel_ckpts.get_state_dict( sel_ckpts.df[name].iloc[sel_index_id]) P['envs'] = sel_ckpts.get_pickle( sel_ckpts.df['envs'].iloc[sel_index_id]) ch.save(P, sel_path) print('\n', sel_path, 'saved.\n') if not args.all_ckpts: if args.output is None: path = f"best_model-{alg_name}-{env_name}.{best_exp_id[:8]}.model" else: path = args.output dump_model(ckpts, index_id, path) else: iters = ckpts.df['iteration'] for i, it in enumerate(iters): if i % args.dump_step != 0: continue path = f"best_model-{alg_name}-{env_name}.{best_exp_id[:8]}.iter{it}.model" if args.output is not None: if not os.path.exists(args.output): os.makedirs(args.output) path = os.path.join(args.output, path) dump_model(ckpts, i, path) store.close() else: raise ValueError('no usable exp found! Cannot load.')
def main(args): base_directory = args.base_directory exp_id_list = os.listdir(base_directory) best_exp_id = None all_rew = [] all_exp_id = [] train_eps = [] if args.exp_id == '': for exp_id in exp_id_list: s = None try: s = Store(base_directory, exp_id) rew = s['final_results'].df['5_rewards'][0] # train_eps.append(s['metadata'].df['robust_ppo_eps'][0]) all_rew.append(rew) print(f"rew={rew}") all_exp_id.append(exp_id) s.close() except Exception as e: print(f'Load result error for {exp_id}: {e}') if s is not None: s.close() continue n_exps = len(all_rew) all_rew = np.array(all_rew) all_exp_id = np.array(all_exp_id) ind = np.argsort(all_rew) for i in range(len(train_eps)): if train_eps[i] == 0.075: print(all_exp_id[i]) print( f'Read {n_exps} models. Avg reward is {all_rew.mean()}, median is {all_rew[ind[n_exps//2]]}' ) def dump_one_exp_id(best_exp_id): print('\n\n>>>selected id', best_exp_id, 'args.best', args.best, '\n\n') if best_exp_id is not None: env_name = get_env_name(base_directory) alg_name = get_alg_name(base_directory) store = Store(base_directory, best_exp_id) if 'final_results' in store.tables and not args.all_ckpts: table_name = 'final_results' index_id = 0 else: table_name = 'checkpoints' print( f'Warning: final_results table not found for expid {best_exp_id}, using last checkpoints' ) index_id = -1 # use last checkpoint ckpts = store[table_name] print( 'loading from exp id:', best_exp_id, ' reward: ', ckpts.df['5_rewards'].iloc[index_id] if '5_rewards' in ckpts.df else "training not finished") def dump_model(sel_ckpts, sel_index_id, sel_path): P = {} # mapper = ch.device('cuda:0') for name in [ 'val_model', 'policy_model', 'val_opt', 'policy_opt', 'adversary_policy_model', 'adversary_val_model', 'adversary_policy_opt', 'adversary_val_opt' ]: if name in sel_ckpts.df: print( f'Saving {name} out of {len(sel_ckpts.df[name])}') P[name] = sel_ckpts.get_state_dict( sel_ckpts.df[name].iloc[sel_index_id]) P['envs'] = sel_ckpts.get_pickle( sel_ckpts.df['envs'].iloc[sel_index_id]) ch.save(P, sel_path) print('\n', sel_path, 'saved.\n') if not args.all_ckpts: if args.output is None: path = f"best_model-{alg_name}-{env_name}.{best_exp_id[:8]}.model" else: path = args.output dump_model(ckpts, index_id, path) else: iters = ckpts.df['iteration'] for i, it in enumerate(iters): if i % args.dump_step != 0: continue path = f"best_model-{alg_name}-{env_name}.{best_exp_id[:8]}.iter{it}.model" if args.output is not None: if not os.path.exists(args.output): os.makedirs(args.output) path = os.path.join(args.output, path) dump_model(ckpts, i, path) store.close() else: raise ValueError('no usable exp found! Cannot load.') if not args.all_exp: if args.best: if args.attack: sel_exp_id = all_exp_id[ind[0]] else: sel_exp_id = all_exp_id[ind[-1]] else: if args.exp_id: sel_exp_id = args.exp_id else: sel_exp_id = all_exp_id[ind[n_exps // 2]] dump_one_exp_id(sel_exp_id) else: for sel_exp_id in all_exp_id: dump_one_exp_id(sel_exp_id)
pass os.mkdir(OUT_DIR) def f(x): return (x - 2.03)**2 + 3 if __name__ == "__main__": # Three parameters: initial guess for x, step size, tolerance combos = itertools.product(np.linspace(-15, 15, 3), np.linspace(1, 1e-5, 3), [1e-5]) for x, step, tol in combos: store = Store(OUT_DIR) store.add_table( 'metadata', { 'step_size': float, 'tolerance': float, 'initial_x': float, 'out_dir': str }) store.add_table('result', {'final_x': float, 'final_opt': float}) store.add_table('running_log', { 'current_x': float, 'current_f': float })