예제 #1
0
def test_L1Ball(algorithm, lr):
    # Setup
    constraint = chop.constraints.L1Ball(alpha)
    prox = constraint.prox
    lmo = constraint.lmo
    assert (constraint.prox(w) == w).all()
    w_t = Variable(torch.zeros_like(w), requires_grad=True)

    constraint_oracles = {
        stochastic.PGD.name: {
            'prox': [prox]
        },
        stochastic.PGDMadry.name: {
            'prox': [prox],
            'lmo': [lmo]
        },
        stochastic.FrankWolfe.name: {
            'lmo': [lmo]
        },
        stochastic.S3CM.name: {
            'prox1': [prox],
            'prox2': [prox]
        }
    }

    optimizer = algorithm([w_t], **(constraint_oracles[algorithm.name]), lr=lr)
    criterion = torch.nn.MSELoss(reduction='mean')

    # Logging
    store = Store(OUT_DIR)
    store.add_table('metadata', {'algorithm': str, 'lr': float})

    store['metadata'].append_row({'algorithm': optimizer.name, 'lr': lr})
    store.add_table(optimizer.name, {
        'func_val': float,
        'certificate': float,
        'norm(w_t)': float
    })
    cert = torch.tensor(np.inf)
    for ii in range(MAX_ITER):
        optimizer.zero_grad()
        loss = criterion(X.mv(w_t), y)
        loss.backward()

        optimizer.step()

        try:
            cert = next(optimizer.certificate)  # only one parameter here
        except AttributeError:
            cert = torch.tensor(np.nan)

        store.log_table_and_tb(
            optimizer.name, {
                'func_val': loss.item(),
                'certificate': cert.item(),
                'norm(w_t)': sum(abs(w_t)).item()
            })
        store[optimizer.name].flush_row()

    store.close()
예제 #2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--out-dir',
                        type=str,
                        required=True,
                        help='Out directory to save results to')
    parser.add_argument('--df-path',
                        type=str,
                        required=True,
                        help='Input dataframe to draw annotations from')
    parser.add_argument('--debug', action='store_true')
    args = parser.parse_args()

    out_dir = Path(args.out_dir)
    out_dir.mkdir(exist_ok=True)

    s = Store(str(args.out_dir))

    # held_outs
    s.add_table('out', {'dists': s.OBJECT})

    df = load_dfs(path=args.df_path)
    dists = model_fit(args.debug, NUM_BETAS, df)

    s['out'].append_row({'dists': dists})

    print('store located in:', s.path)
    print(
        'In Beta-Binomial Model Analysis.ipynb: set INPUT_DATA = \'f{s.path}\''
    )
예제 #3
0
def train_configs(configs: List[Config],
                  n_seeds: List[int],
                  targets: List[int],
                  n_epochs: int,
                  save_dir_artifacts: str,
                  save_dir_models: str,
                  data_aug: bool = False,
                  save_models: bool = True,
                  adv_acc: bool = False,
                  run_time_str: str = None):

    loaders_BIN, normalization_function_BIN, label_map_BIN = utils.get_binary_dataset(
        batch_size=256, transfer=True, data_aug=data_aug, targets=targets, per_target=128, random=False
    )
    train_loader_BIN, test_loader_BIN = loaders_BIN

    run_time_str = run_time_str if run_time_str is not None else datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
    for seed in n_seeds:
        for config in configs:
            dir_name = f"{config.name}_{run_time_str}_{seed}"
            store = Store(save_dir_artifacts, dir_name)
            writer = store.tensorboard

            if adv_acc:
                metrics = {
                    'test_accuracy_True': float,
                    'test_accuracy_False': float,
                    'epoch': int}
            else:
                metrics = {
                    'test_accuracy_False': float,
                    'epoch': int}

            store.add_table('result', metrics)

            # std_linear_net = utils.Linear(Nfeatures=3*32*32, Nclasses=2).cuda()
            model = get_new_model(config.robust_model)
            if not config.fine_tune:
                for param in model.parameters():
                    param.requires_grad = False
            model.fc = nn.Linear(512, len(targets)).cuda()
            model.train()

            train_model(model,
                        train_loader_BIN,
                        test_loader_BIN,
                        train_loop,
                        eval_loop,
                        step_size=config.step_size,
                        epochs=n_epochs,
                        normalization=normalization_function_BIN,
                        store=store,
                        adv_train=config.adv_train,
                        log_iterations=10,
                        adv_acc=adv_acc
                        )
            if save_models:
                path = f"{save_dir_models}/{dir_name}.pt"
                torch.save(model.state_dict(), path)
예제 #4
0
def train_model(args,
                model,
                optim,
                train_dl: DataLoader,
                valid_dl: DataLoader,
                store: Store,
                device,
                attack=None,
                ratio: int = 0):
    """
    Generic training routine, which is flexible to allow both standard and adversarial training.
    """
    start_time = time.time()

    # Initial setup
    store.add_table(consts.LOGS_TABLE, consts.LOGS_SCHEMA)
    store.add_table(consts.ARGS_TABLE, consts.ARGS_SCHEMA)
    args_info = {
        'epochs': args['--epochs'],
        'batch_size': args['--batch-size'],
        'model': 'mnist'
    }

    # store[consts.ARGS_TABLE].append_row(args_info)

    model.to(device)

    for epoch in range(args['--epochs']):
        # Train for one epoch
        train_acc, train_loss = _internal_loop(args, True, model, optim,
                                               train_dl, epoch, store, device)

        # Evaluate on validation
        with torch.no_grad():
            valid_acc, valid_loss = _internal_loop(args, False, model, None,
                                                   valid_dl, epoch, store,
                                                   device)

        # Log
        log_info = {
            'epoch': epoch,
            'train_loss': train_loss,
            'valid_loss': valid_loss,
            'train_error_rate': 1 - train_acc,
            'valid_error_rate': 1 - valid_acc,
            'valid_adv_error_rate': -1,
            'time': time.time() - start_time
        }

        # store[consts.LOGS_TABLE].append_row(log_info)

    return model
예제 #5
0
def plot_metric(metrics: List[str],
                metric_labels: List[str],
                labels: List[str],
                n_seeds: int,
                config_groups: List[str],
                plot_name: str,
                save_dir_artifacts: str,
                save_dir_plots: str,
                figsize: List[int] = None):
    # plt.figure(figsize=(8, 6))
    conf_group_n = len(config_groups)
    metrics_n = len(metrics)

    n_rows = metrics_n * conf_group_n // 2
    n_cols = 2 if len(config_groups) > 1 else 1
    fig, axs = plt.subplots(n_rows, n_cols,
                            figsize=figsize if figsize else (6 * conf_group_n, 6 * metrics_n),
                            sharex=False, sharey='row')
    if n_rows == 1 and n_cols == 1:
        axs = [axs]
    elif n_rows > 1:
        axs = [ax for ax_row in axs for ax in ax_row]
    for metric_idx, metric in enumerate(metrics):
        for conf_group_idx, config_group in enumerate(config_groups):
            for config_idx, run_id in enumerate(config_group.run_ids):
                df_sum = None
                df_sum_squared = None
                K = None
                for seed in range(n_seeds):
                    dir_path = f'{run_id}_{seed}'
                    df = Store(save_dir_artifacts, dir_path)['result'].df
                    # plt.plot(df['epoch'], df['test_accuracy_False'], label=config.name)
                    if K is None:
                        K = df
                        df_sum = df - K
                        df_sum_squared = (df - K) ** 2
                    else:
                        df_sum += df - K
                        df_sum_squared += (df - K) ** 2

                df_mean = K + df_sum / 3
                df_var = (df_sum_squared - df_sum * df_sum / 3) / (3 - 1)
                mean = df_mean[metric]
                stddev = df_var[metric] ** (0.5)
                label = labels[config_idx] if labels is not None else run_id
                linestyle = 'dashed' if 'STD_network' in run_id else 'solid'
                color = '#ff7f0e' if 'STD_train' in run_id else '#1f77b4'
                axs[metric_idx * conf_group_n + conf_group_idx].plot(df['epoch'], mean, label=label,
                                                                     linestyle=linestyle, color=color)
                axs[metric_idx * conf_group_n + conf_group_idx].fill_between(df['epoch'], mean - stddev, mean + stddev,
                                                                             alpha=0.2, color=color)
            axs[metric_idx * conf_group_n + conf_group_idx].set_title(config_group.title)
            axs[metric_idx * conf_group_n + conf_group_idx].set_xlabel('Epoch')
            if conf_group_idx == 0:
                axs[metric_idx * conf_group_n + conf_group_idx].set_ylabel(metric_labels[metric_idx])
            if metric_idx * conf_group_n + conf_group_idx == 1:
                axs[metric_idx * conf_group_n + conf_group_idx].legend(loc='best')
    fig.tight_layout()
    fig.savefig(f'{save_dir_plots}/{plot_name}.pdf')
def initialize_cox_store(cox_dir='cox') -> Store:
    store = Store(cox_dir)
    store.add_table(
        'experiments',
        {
            'k': int,
            'random_state': int,
            'Train AUC': float,
            'Validation AUC': float,
            'Test AUC': float,
            'Test MCC': float,
            'start_time': str,
            # 'runtime(sec)': float,
            'classifier': str,
            'classifier_full': str
        })
    return store
예제 #7
0
def test_L1Ball(algorithm, step_size):
    # Setup
    constraint = constopt.constraints.L1Ball(alpha)
    assert (constraint.prox(w) == w).all()
    w_t = Variable(torch.zeros_like(w), requires_grad=True)

    optimizer = algorithm([w_t], constraint)
    criterion = torch.nn.MSELoss(reduction='mean')

    # Logging
    store = Store(OUT_DIR)
    store.add_table('metadata', {'algorithm': str, 'step-size': float})

    store['metadata'].append_row({
        'algorithm': optimizer.name,
        'step-size': step_size
    })
    store.add_table(optimizer.name, {
        'func_val': float,
        'FW gap': float,
        'norm(w_t)': float
    })
    gap = torch.tensor(np.inf)
    for ii in range(MAX_ITER):
        optimizer.zero_grad()
        loss = criterion(X.mv(w_t), y)
        loss.backward()

        # Compute gap
        with torch.no_grad():
            gap = constraint.fw_gap(w_t.grad, w_t)

        optimizer.step(step_size)
        store.log_table_and_tb(
            optimizer.name, {
                'func_val': loss.item(),
                'FW gap': gap.item(),
                'norm(w_t)': sum(abs(w_t)).item()
            })
        store[optimizer.name].flush_row()

    store.close()
예제 #8
0
def test_L1Ball(algorithm, lr):
    # Setup
    constraint = chop.constraints.L1Ball(alpha)
    assert (constraint.prox(w) == w).all()
    w_t = Variable(torch.zeros_like(w), requires_grad=True)

    optimizer = algorithm([w_t], constraint, lr=lr)
    criterion = torch.nn.MSELoss(reduction='mean')

    # Logging
    store = Store(OUT_DIR)
    store.add_table('metadata', {'algorithm': str, 'lr': float})

    store['metadata'].append_row({'algorithm': optimizer.name, 'lr': lr})
    store.add_table(optimizer.name, {
        'func_val': float,
        'certificate': float,
        'norm(w_t)': float
    })
    cert = torch.tensor(np.inf)
    for ii in range(MAX_ITER):
        optimizer.zero_grad()
        loss = criterion(X.mv(w_t), y)
        loss.backward()

        optimizer.step()

        cert = next(optimizer.certificate)  # only one parameter here

        store.log_table_and_tb(
            optimizer.name, {
                'func_val': loss.item(),
                'certificate': cert.item(),
                'norm(w_t)': sum(abs(w_t)).item()
            })
        store[optimizer.name].flush_row()

    store.close()
예제 #9
0
def run(args):
    if args['--local'] == True:
        args[
            '--base-path'] = '/Users/andrei/Google Drive/_Facultate/MPhil Cambridge/Dissertation/project'
    else:
        args[
            '--base-path'] = '/content/drive/My Drive/_Facultate/MPhil Cambridge/Dissertation/project'

    initialize(args, seed=0)

    device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
    OUT_DIR = path.join(args['--base-path'], 'logs')
    store = Store(OUT_DIR)

    if args['--model-to-train'] == 'mnist':
        model = MnistClassifier()
        optim = torch.optim.Adam(model.parameters(), lr=1e-3)

        train_dl = get_mnist_dl(args, train=True)
        valid_dl = get_mnist_dl(args, train=False)

        train_model(args, model, optim, train_dl, valid_dl, store, device)
예제 #10
0
def main(params):
    for k, v in zip(params.keys(), params.values()):
        assert v is not None, f"Value for {k} is None"

    # #
    # Setup logging
    # #
    metadata_schema = schema_from_dict(params)
    base_directory = params['out_dir']
    store = Store(base_directory)

    # redirect stderr, stdout to file
    """
    def make_err_redirector(stream_name):
        tee = Tee(os.path.join(store.path, stream_name + '.txt'), stream_name)
        return tee

    stderr_tee = make_err_redirector('stderr')
    stdout_tee = make_err_redirector('stdout')
    """

    # Store the experiment path and the git commit for this experiment
    metadata_schema.update({
        'store_path':str,
        'git_commit':str
    })

    repo = git.Repo(path=os.path.dirname(os.path.realpath(__file__)),
                    search_parent_directories=True)

    metadata_table = store.add_table('metadata', metadata_schema)
    metadata_table.update_row(params)
    metadata_table.update_row({
        'store_path':store.path,
        'git_commit':repo.head.object.hexsha
    })

    metadata_table.flush_row()

    # Table for checkpointing models and envs

    if params['save_iters'] > 0:
        store.add_table('checkpoints', {
            'val_model':store.PYTORCH_STATE,
            'policy_model':store.PYTORCH_STATE,
            'envs':store.PICKLE,
            'policy_opt': store.PYTORCH_STATE,
            'val_opt': store.PYTORCH_STATE,
            'iteration':int
        })

    # The trainer object is in charge of sampling trajectories and
    # taking PPO/TRPO optimization steps

    p = Trainer.agent_from_params(params, store=store)
    if 'load_model' in params and params['load_model']:
        print('Loading pretrained model', params['load_model'])
        pretrained_models = torch.load(params['load_model'])
        p.policy_model.load_state_dict(pretrained_models['policy_model'])
        p.val_model.load_state_dict(pretrained_models['val_model'])
        # Load optimizer states. Note that 
        # p.POLICY_ADAM.load_state_dict(pretrained_models['policy_opt'])
        # p.val_opt.load_state_dict(pretrained_models['val_opt'])
        # Restore environment parameters, like mean and std.
        p.envs = pretrained_models['envs']
    rewards = []

    # Table for final results
    final_table = store.add_table('final_results', {
        'iteration':int,
        '5_rewards':float,
        'terminated_early':bool,
        'val_model':store.PYTORCH_STATE,
        'policy_model':store.PYTORCH_STATE,
        'envs':store.PICKLE,
        'policy_opt': store.PYTORCH_STATE,
        'val_opt': store.PYTORCH_STATE,
        'iteration':int
    })


    def finalize_table(iteration, terminated_early, rewards):
        final_5_rewards = np.array(rewards)[-5:].mean()
        final_table.append_row({
            'iteration':iteration,
            '5_rewards':final_5_rewards,
            'terminated_early':terminated_early,
            'iteration':iteration,
            'val_model': p.val_model.state_dict(),
            'policy_model': p.policy_model.state_dict(),
            'policy_opt': p.POLICY_ADAM.state_dict(),
            'val_opt': p.val_opt.state_dict(),
            'envs':p.envs
        })

    # Try-except so that we save if the user interrupts the process
    try:
        for i in range(params['train_steps']):
            print('Step %d' % (i,))
            if params['save_iters'] > 0 and i % params['save_iters'] == 0:
                store['checkpoints'].append_row({
                    'iteration':i,
                    'val_model': p.val_model.state_dict(),
                    'policy_model': p.policy_model.state_dict(),
                    'policy_opt': p.POLICY_ADAM.state_dict(),
                    'val_opt': p.val_opt.state_dict(),
                    'envs':p.envs
                })
            
            mean_reward = p.train_step()
            rewards.append(mean_reward)

        finalize_table(i, False, rewards)
    except KeyboardInterrupt:
        torch.save(p.val_model, 'saved_experts/%s-expert-vf' % (params['game'],))
        torch.save(p.policy_model, 'saved_experts/%s-expert-pol' % (params['game'],))

        finalize_table(i, True, rewards)
    store.close()
예제 #11
0
def main(params):
    for k, v in zip(params.keys(), params.values()):
        assert v is not None, f"Value for {k} is None"

    # #
    # Setup logging
    # #
    metadata_schema = schema_from_dict(params)
    base_directory = params['out_dir']
    store = Store(base_directory)

    # redirect stderr, stdout to file
    """
    def make_err_redirector(stream_name):
        tee = Tee(os.path.join(store.path, stream_name + '.txt'), stream_name)
        return tee

    stderr_tee = make_err_redirector('stderr')
    stdout_tee = make_err_redirector('stdout')
    """

    # Store the experiment path and the git commit for this experiment
    metadata_schema.update({
        'store_path': str,
        'git_commit': str
    })

    repo = git.Repo(path=os.path.dirname(os.path.realpath(__file__)),
                    search_parent_directories=True)

    metadata_table = store.add_table('metadata', metadata_schema)
    metadata_table.update_row(params)
    metadata_table.update_row({
        'store_path': store.path,
        'git_commit': repo.head.object.hexsha
    })

    metadata_table.flush_row()

    # Extra items in table when minimax training is enabled.
    if params['mode'] == "adv_ppo" or params['mode'] == 'adv_trpo' or params['mode'] == 'adv_sa_ppo':
        adversary_table_dict = {
            'adversary_policy_model': store.PYTORCH_STATE,
            'adversary_policy_opt': store.PYTORCH_STATE,
            'adversary_val_model': store.PYTORCH_STATE,
            'adversary_val_opt': store.PYTORCH_STATE,
        }
    else:
        adversary_table_dict = {}

    # Table for checkpointing models and envs
    if params['save_iters'] > 0:
        checkpoint_dict = {
            'val_model': store.PYTORCH_STATE,
            'policy_model': store.PYTORCH_STATE,
            'envs': store.PICKLE,
            'policy_opt': store.PYTORCH_STATE,
            'val_opt': store.PYTORCH_STATE,
            'iteration': int,
            '5_rewards': float,
        }
        checkpoint_dict.update(adversary_table_dict)
        store.add_table('checkpoints', checkpoint_dict)

    # The trainer object is in charge of sampling trajectories and
    # taking PPO/TRPO optimization steps

    p = Trainer.agent_from_params(params, store=store)
    if params['initial_std'] != 1.0:
        p.policy_model.log_stdev.data[:] = np.log(params['initial_std'])
    if 'load_model' in params and params['load_model']:
        print('Loading pretrained model', params['load_model'])
        pretrained_model = torch.load(params['load_model'])
        if 'policy_model' in pretrained_model:
            p.policy_model.load_state_dict(pretrained_model['policy_model'])
        if params['deterministic']:
            print('Policy runs in deterministic mode. Ignoring Gaussian noise.')
            p.policy_model.log_stdev.data[:] = -100
        else:
            print('Policy runs in non deterministic mode with Gaussian noise.')
        if 'val_model' in pretrained_model:
            p.val_model.load_state_dict(pretrained_model['val_model'])
        if 'policy_opt' in pretrained_model:
            p.POLICY_ADAM.load_state_dict(pretrained_model['policy_opt'])
        if 'val_opt' in pretrained_model:
            p.val_opt.load_state_dict(pretrained_model['val_opt'])
        # Load adversary models.
        if 'no_load_adv_policy' in params and params['no_load_adv_policy']:
            print('Skipping loading adversary models.')
        else:
            if 'adversary_policy_model' in pretrained_model and hasattr(p, 'adversary_policy_model'):
                p.adversary_policy_model.load_state_dict(pretrained_model['adversary_policy_model'])
            if 'adversary_val_model' in pretrained_model and hasattr(p, 'adversary_val_model'):
                p.adversary_val_model.load_state_dict(pretrained_model['adversary_val_model'])
            if 'adversary_policy_opt' in pretrained_model and hasattr(p, 'adversary_policy_opt'):
                p.adversary_policy_opt.load_state_dict(pretrained_model['adversary_policy_opt'])
            if 'adversary_val_opt' in pretrained_model and hasattr(p, 'adversary_val_opt'):
                p.adversary_val_opt.load_state_dict(pretrained_model['adversary_val_opt'])
        # Load optimizer states.
        # p.POLICY_ADAM.load_state_dict(pretrained_models['policy_opt'])
        # p.val_opt.load_state_dict(pretrained_models['val_opt'])
        # Restore environment parameters, like mean and std.
        if 'envs' in pretrained_model:
            p.envs = pretrained_model['envs']
        for e in p.envs:
            e.setup_visualization(params['show_env'], params['save_frames'], params['save_frames_path'])
    rewards = []

    # Table for final results
    final_dict = {
        'iteration': int,
        '5_rewards': float,
        'terminated_early': bool,
        'val_model': store.PYTORCH_STATE,
        'policy_model': store.PYTORCH_STATE,
        'envs': store.PICKLE,
        'policy_opt': store.PYTORCH_STATE,
        'val_opt': store.PYTORCH_STATE,
    }
    final_dict.update(adversary_table_dict)
    final_table = store.add_table('final_results', final_dict)

    def add_adversary_to_table(p, table_dict):
        if params['mode'] == "adv_ppo" or params['mode'] == 'adv_trpo' or params['mode'] == 'adv_sa_ppo':
            table_dict["adversary_policy_model"] = p.adversary_policy_model.state_dict()
            table_dict["adversary_policy_opt"] = p.ADV_POLICY_ADAM.state_dict()
            table_dict["adversary_val_model"] = p.adversary_val_model.state_dict()
            table_dict["adversary_val_opt"] = p.adversary_val_opt.state_dict()
        return table_dict

    def finalize_table(iteration, terminated_early, rewards):
        final_5_rewards = np.array(rewards)[-5:].mean()
        final_dict = {
            'iteration': iteration,
            '5_rewards': final_5_rewards,
            'terminated_early': terminated_early,
            'val_model': p.val_model.state_dict(),
            'policy_model': p.policy_model.state_dict(),
            'policy_opt': p.POLICY_ADAM.state_dict(),
            'val_opt': p.val_opt.state_dict(),
            'envs': p.envs
        }
        final_dict = add_adversary_to_table(p, final_dict)
        final_table.append_row(final_dict)

    ret = 0
    # Try-except so that we save if the user interrupts the process
    try:
        for i in range(params['train_steps']):
            print('Step %d' % (i,))
            if params['save_iters'] > 0 and i % params['save_iters'] == 0 and i != 0:
                final_5_rewards = np.array(rewards)[-5:].mean()
                print(f'Saving checkpoints to {store.path} with reward {final_5_rewards:.5g}')
                checkpoint_dict = {
                    'iteration': i,
                    'val_model': p.val_model.state_dict(),
                    'policy_model': p.policy_model.state_dict(),
                    'policy_opt': p.POLICY_ADAM.state_dict(),
                    'val_opt': p.val_opt.state_dict(),
                    'envs': p.envs,
                    '5_rewards': final_5_rewards,
                }
                checkpoint_dict = add_adversary_to_table(p, checkpoint_dict)
                store['checkpoints'].append_row(checkpoint_dict)

            mean_reward = p.train_step()
            rewards.append(mean_reward)

            # For debugging and tuning, we can break in the middle.
            if i == params['force_stop_step']:
                print('Terminating early because --force-stop-step is set.')
                raise KeyboardInterrupt

        finalize_table(i, False, rewards)
    except KeyboardInterrupt:
        finalize_table(i, True, rewards)
        ret = 1
    except:
        print("An error occurred during training:")
        traceback.print_exc()
        # Other errors, make sure to finalize the cox store before exiting.
        finalize_table(i, True, rewards)
        ret = -1
    print(f'Models saved to {store.path}')
    store.close()
    return ret
예제 #12
0
from cox.readers import CollectionReader

## Code sample to go alongside Walkthrough #2 in README.md

OUT_DIR = '/tmp/cox_example/'

try:
    shutil.rmtree(OUT_DIR)
except:
    pass

os.mkdir(OUT_DIR)

if __name__ == "__main__":
    for slope in range(5):
        store = Store(OUT_DIR)
        store.add_table('metadata', {'slope': int})
        store.add_table('line_graphs', {'mx': int, 'mx^2': int})
        store['metadata'].append_row({'slope': slope})

        for x in range(100):
            store.log_table_and_tb('line_graphs', {
                'mx': slope * x,
                'mx^2': slope * (x**2)
            })
            store['line_graphs'].flush_row()

        store.close()

    ### Collection reading
    print("Done experiments, printing results...")
예제 #13
0
파일: test.py 프로젝트: Saeid-jhn/SA_PPO
def main(params):
    override_params = copy.deepcopy(params)
    excluded_params = [
        'config_path', 'out_dir_prefix', 'num_episodes', 'row_id', 'exp_id',
        'load_model', 'seed', 'deterministic', 'scan_config',
        'compute_kl_cert', 'use_full_backward'
    ]
    sarsa_params = [
        'sarsa_enable', 'sarsa_steps', 'sarsa_eps', 'sarsa_reg',
        'sarsa_model_path'
    ]
    # original_params contains all flags in config files that are overridden via command.
    for k in list(override_params.keys()):
        if k in excluded_params:
            del override_params[k]

    # Append a prefix for output path.
    if params['out_dir_prefix']:
        params['out_dir'] = os.path.join(params['out_dir_prefix'],
                                         params['out_dir'])
        print(f"setting output dir to {params['out_dir']}")

    if params['config_path']:
        # Load from a pretrained model using existing config.
        # First we need to create the model using the given config file.
        json_params = json.load(open(params['config_path']))

        params = override_json_params(params, json_params,
                                      excluded_params + sarsa_params)

    if params['sarsa_enable']:
        assert params['attack_method'] == "none" or params['attack_method'] is None, \
                "--train-sarsa is only available when --attack-method=none, but got {}".format(params['attack_method'])

    if 'load_model' in params and params['load_model']:
        for k, v in zip(params.keys(), params.values()):
            assert v is not None, f"Value for {k} is None"

        # Create the agent from config file.
        p = Trainer.agent_from_params(params, store=None)
        print('Loading pretrained model', params['load_model'])
        pretrained_model = torch.load(params['load_model'])
        if 'policy_model' in pretrained_model:
            p.policy_model.load_state_dict(pretrained_model['policy_model'])
        if 'val_model' in pretrained_model:
            p.val_model.load_state_dict(pretrained_model['val_model'])
        if 'policy_opt' in pretrained_model:
            p.POLICY_ADAM.load_state_dict(pretrained_model['policy_opt'])
        if 'val_opt' in pretrained_model:
            p.val_opt.load_state_dict(pretrained_model['val_opt'])
        # Restore environment parameters, like mean and std.
        if 'envs' in pretrained_model:
            p.envs = pretrained_model['envs']
        for e in p.envs:
            e.normalizer_read_only = True
            e.setup_visualization(params['show_env'], params['save_frames'],
                                  params['save_frames_path'])
    else:
        # Load from experiment directory. No need to use a config.
        base_directory = params['out_dir']
        store = Store(base_directory, params['exp_id'], mode='r')
        if params['row_id'] < 0:
            row = store['final_results'].df
        else:
            checkpoints = store['checkpoints'].df
            row_id = params['row_id']
            row = checkpoints.iloc[row_id:row_id + 1]
        print("row to test: ", row)
        if params['cpu'] == None:
            cpu = False
        else:
            cpu = params['cpu']
        p, _ = Trainer.agent_from_data(store,
                                       row,
                                       cpu,
                                       extra_params=params,
                                       override_params=override_params,
                                       excluded_params=excluded_params)
        store.close()

    rewards = []

    if params['sarsa_enable']:
        num_steps = params['sarsa_steps']
        # learning rate scheduler: linearly annealing learning rate after
        lr_decrease_point = num_steps * 2 / 3
        decreasing_steps = num_steps - lr_decrease_point
        lr_sch = lambda epoch: 1.0 if epoch < lr_decrease_point else (
            decreasing_steps - epoch + lr_decrease_point) / decreasing_steps
        # robust training scheduler. Currently using 1/3 epochs for warmup, 1/3 for schedule and 1/3 for final training.
        eps_start_point = int(num_steps * 1 / 3)
        robust_eps_scheduler = LinearScheduler(
            params['sarsa_eps'],
            f"start={eps_start_point},length={eps_start_point}")
        robust_beta_scheduler = LinearScheduler(
            1.0, f"start={eps_start_point},length={eps_start_point}")
        # reinitialize value model, and run value function learning steps.
        p.setup_sarsa(lr_schedule=lr_sch,
                      eps_scheduler=robust_eps_scheduler,
                      beta_scheduler=robust_beta_scheduler)
        # Run Sarsa training.
        for i in range(num_steps):
            print(
                f'Step {i+1} / {num_steps}, lr={p.sarsa_scheduler.get_last_lr()}'
            )
            mean_reward = p.sarsa_step()
            rewards.append(mean_reward)
            # for w in p.val_model.parameters():
            #     print(f'{w.size()}, {torch.norm(w.view(-1), 2)}')
        # Save Sarsa model.
        saved_model = {
            'state_dict': p.sarsa_model.state_dict(),
            'metadata': params,
        }
        torch.save(saved_model, params['sarsa_model_path'])
    else:
        print('Gaussian noise in policy:')
        print(torch.exp(p.policy_model.log_stdev))
        if params['deterministic']:
            print(
                'Policy runs in deterministic mode. Ignoring Gaussian noise.')
            p.policy_model.log_stdev.data[:] = -100
        num_episodes = params['num_episodes']
        all_rewards = []
        all_lens = []
        all_kl_certificates = []

        for i in range(num_episodes):
            print('Episode %d / %d' % (i + 1, num_episodes))
            ep_length, ep_reward, actions, action_means, states, kl_certificates = p.run_test(
                compute_bounds=params['compute_kl_cert'],
                use_full_backward=params['use_full_backward'])
            if i == 0:
                all_actions = actions.copy()
                all_states = states.copy()
            else:
                all_actions = np.concatenate((all_actions, actions), axis=0)
                all_states = np.concatenate((all_states, states), axis=0)
            if params['compute_kl_cert']:
                print('Epoch KL certificates:', kl_certificates)
                all_kl_certificates.append(kl_certificates)
            all_rewards.append(ep_reward)
            all_lens.append(ep_length)

        attack_dir = 'attack-{}-eps-{}'.format(params['attack_method'],
                                               params['attack_eps'])
        if 'sarsa' in params['attack_method']:
            attack_dir += '-sarsa_steps-{}-sarsa_eps-{}-sarsa_reg-{}'.format(
                params['sarsa_steps'], params['sarsa_eps'],
                params['sarsa_reg'])
            if 'action' in params['attack_method']:
                attack_dir += '-attack_sarsa_action_ratio-{}'.format(
                    params['attack_sarsa_action_ratio'])
        save_path = os.path.join(params['out_dir'], params['exp_id'],
                                 attack_dir)
        if not os.path.exists(save_path):
            os.makedirs(save_path)
        for name, value in [('actions', all_actions), ('states', all_states),
                            ('rewards', all_rewards), ('length', all_lens)]:
            with open(os.path.join(save_path, '{}.pkl'.format(name)),
                      'wb') as f:
                pickle.dump(value, f)
        print(params)
        with open(os.path.join(save_path, 'params.json'), 'w') as f:
            json.dump(params, f, indent=4)

        print('\n')
        print('all rewards:', all_rewards)
        print('rewards stats:\nmean: {}, std:{}, min:{}, max:{}'.format(
            np.mean(all_rewards), np.std(all_rewards), np.min(all_rewards),
            np.max(all_rewards)))
        if params['compute_kl_cert']:
            print('KL certificates stats: mean: {}, std: {}, min: {}, max: {}'.
                  format(np.mean(all_kl_certificates),
                         np.std(all_kl_certificates),
                         np.min(all_kl_certificates),
                         np.max(all_kl_certificates)))
예제 #14
0
def main(params):
    override_params = copy.deepcopy(params)
    excluded_params = [
        'config_path', 'out_dir_prefix', 'num_episodes', 'row_id', 'exp_id',
        'load_model', 'seed', 'deterministic', 'noise_factor',
        'compute_kl_cert', 'use_full_backward', 'sqlite_path',
        'early_terminate'
    ]
    sarsa_params = [
        'sarsa_enable', 'sarsa_steps', 'sarsa_eps', 'sarsa_reg',
        'sarsa_model_path'
    ]
    imit_params = ['imit_enable', 'imit_epochs', 'imit_model_path', 'imit_lr']

    # original_params contains all flags in config files that are overridden via command.
    for k in list(override_params.keys()):
        if k in excluded_params:
            del override_params[k]

    if params['sqlite_path']:
        print(
            f"Will save results in sqlite database in {params['sqlite_path']}")
        connection = sqlite3.connect(params['sqlite_path'])
        cur = connection.cursor()
        cur.execute('''create table if not exists attack_results
              (method varchar(20),
              mean_reward real,
              std_reward real,
              min_reward real,
              max_reward real,
              sarsa_eps real,
              sarsa_reg real,
              sarsa_steps integer,
              deterministic bool,
              early_terminate bool)''')
        connection.commit()
        # We will set this flag to True we break early.
        early_terminate = False

    # Append a prefix for output path.
    if params['out_dir_prefix']:
        params['out_dir'] = os.path.join(params['out_dir_prefix'],
                                         params['out_dir'])
        print(f"setting output dir to {params['out_dir']}")

    if params['config_path']:
        # Load from a pretrained model using existing config.
        # First we need to create the model using the given config file.
        json_params = json.load(open(params['config_path']))

        params = override_json_params(
            params, json_params, excluded_params + sarsa_params + imit_params)

    if params['sarsa_enable']:
        assert params['attack_method'] == "none" or params['attack_method'] is None, \
                "--train-sarsa is only available when --attack-method=none, but got {}".format(params['attack_method'])

    if 'load_model' in params and params['load_model']:
        for k, v in zip(params.keys(), params.values()):
            assert v is not None, f"Value for {k} is None"

        # Create the agent from config file.
        p = Trainer.agent_from_params(params, store=None)
        print('Loading pretrained model', params['load_model'])
        pretrained_model = torch.load(params['load_model'])
        if 'policy_model' in pretrained_model:
            p.policy_model.load_state_dict(pretrained_model['policy_model'])
        if 'val_model' in pretrained_model:
            p.val_model.load_state_dict(pretrained_model['val_model'])
        if 'policy_opt' in pretrained_model:
            p.POLICY_ADAM.load_state_dict(pretrained_model['policy_opt'])
        if 'val_opt' in pretrained_model:
            p.val_opt.load_state_dict(pretrained_model['val_opt'])
        # Restore environment parameters, like mean and std.
        if 'envs' in pretrained_model:
            p.envs = pretrained_model['envs']
        for e in p.envs:
            e.normalizer_read_only = True
            e.setup_visualization(params['show_env'], params['save_frames'],
                                  params['save_frames_path'])
    else:
        # Load from experiment directory. No need to use a config.
        base_directory = params['out_dir']
        store = Store(base_directory, params['exp_id'], mode='r')
        if params['row_id'] < 0:
            row = store['final_results'].df
        else:
            checkpoints = store['checkpoints'].df
            row_id = params['row_id']
            row = checkpoints.iloc[row_id:row_id + 1]
        print("row to test: ", row)
        if params['cpu'] == None:
            cpu = False
        else:
            cpu = params['cpu']
        p, _ = Trainer.agent_from_data(store,
                                       row,
                                       cpu,
                                       extra_params=params,
                                       override_params=override_params,
                                       excluded_params=excluded_params)
        store.close()

    rewards = []

    print('Gaussian noise in policy:')
    print(torch.exp(p.policy_model.log_stdev))
    original_stdev = p.policy_model.log_stdev.clone().detach()
    if params['noise_factor'] != 1.0:
        p.policy_model.log_stdev.data[:] += np.log(params['noise_factor'])
    if params['deterministic']:
        print('Policy runs in deterministic mode. Ignoring Gaussian noise.')
        p.policy_model.log_stdev.data[:] = -100
    print('Gaussian noise in policy (after adjustment):')
    print(torch.exp(p.policy_model.log_stdev))

    if params['sarsa_enable']:
        num_steps = params['sarsa_steps']
        # learning rate scheduler: linearly annealing learning rate after
        lr_decrease_point = num_steps * 2 / 3
        decreasing_steps = num_steps - lr_decrease_point
        lr_sch = lambda epoch: 1.0 if epoch < lr_decrease_point else (
            decreasing_steps - epoch + lr_decrease_point) / decreasing_steps
        # robust training scheduler. Currently using 1/3 epochs for warmup, 1/3 for schedule and 1/3 for final training.
        eps_start_point = int(num_steps * 1 / 3)
        robust_eps_scheduler = LinearScheduler(
            params['sarsa_eps'],
            f"start={eps_start_point},length={eps_start_point}")
        robust_beta_scheduler = LinearScheduler(
            1.0, f"start={eps_start_point},length={eps_start_point}")
        # reinitialize value model, and run value function learning steps.
        p.setup_sarsa(lr_schedule=lr_sch,
                      eps_scheduler=robust_eps_scheduler,
                      beta_scheduler=robust_beta_scheduler)
        # Run Sarsa training.
        for i in range(num_steps):
            print(
                f'Step {i+1} / {num_steps}, lr={p.sarsa_scheduler.get_last_lr()}'
            )
            mean_reward = p.sarsa_step()
            rewards.append(mean_reward)
            # for w in p.val_model.parameters():
            #     print(f'{w.size()}, {torch.norm(w.view(-1), 2)}')
        # Save Sarsa model.
        saved_model = {
            'state_dict': p.sarsa_model.state_dict(),
            'metadata': params,
        }
        torch.save(saved_model, params['sarsa_model_path'])
    elif params['imit_enable']:
        num_epochs = params['imit_epochs']
        num_episodes = params['num_episodes']
        print('\n\n' + 'Start collecting data\n' + '-' * 80)
        for i in range(num_episodes):
            print('Collecting %d / %d episodes' % (i + 1, num_episodes))
            ep_length, ep_reward, actions, action_means, states, kl_certificates = p.run_test(
                compute_bounds=params['compute_kl_cert'],
                use_full_backward=params['use_full_backward'],
                original_stdev=original_stdev)
            not_dones = np.ones(len(actions))
            not_dones[-1] = 0
            if i == 0:
                all_actions = actions.copy()
                all_states = states.copy()
                all_not_dones = not_dones.copy()
            else:
                all_actions = np.concatenate((all_actions, actions), axis=0)
                all_states = np.concatenate((all_states, states), axis=0)
                all_not_dones = np.concatenate((all_not_dones, not_dones))
        print('Collected actions shape:', all_actions.shape)
        print('Collected states shape:', all_states.shape)
        p.setup_imit(lr=params['imit_lr'])
        p.imit_steps(torch.from_numpy(all_actions),
                     torch.from_numpy(all_states),
                     torch.from_numpy(all_not_dones), num_epochs)
        saved_model = {
            'state_dict': p.imit_network.state_dict(),
            'metadata': params,
        }
        torch.save(saved_model, params['imit_model_path'])
    else:
        num_episodes = params['num_episodes']
        all_rewards = []
        all_lens = []
        all_kl_certificates = []

        for i in range(num_episodes):
            print('Episode %d / %d' % (i + 1, num_episodes))
            ep_length, ep_reward, actions, action_means, states, kl_certificates = p.run_test(
                compute_bounds=params['compute_kl_cert'],
                use_full_backward=params['use_full_backward'],
                original_stdev=original_stdev)
            if i == 0:
                all_actions = actions.copy()
                all_states = states.copy()
            else:
                all_actions = np.concatenate((all_actions, actions), axis=0)
                all_states = np.concatenate((all_states, states), axis=0)
            if params['compute_kl_cert']:
                print('Epoch KL certificates:', kl_certificates)
                all_kl_certificates.append(kl_certificates)
            all_rewards.append(ep_reward)
            all_lens.append(ep_length)
            # Current step mean, std, min and max
            mean_reward, std_reward, min_reward, max_reward = np.mean(
                all_rewards), np.std(all_rewards), np.min(all_rewards), np.max(
                    all_rewards)

            if i > num_episodes // 5 and params['early_terminate'] and params[
                    'sqlite_path'] and params['attack_method'] != 'none':
                # Attempt to early terminiate if some other attacks have done with low reward.
                cur.execute(
                    "SELECT MIN(mean_reward) FROM attack_results WHERE deterministic=?;",
                    (params['deterministic'], ))
                current_best_reward = cur.fetchone()[0]
                print(
                    f'current best: {current_best_reward}, ours: {mean_reward} +/- {std_reward}, min: {min_reward}'
                )
                # Terminiate if mean - 2*std is worse than best, or our min is worse than best.
                if current_best_reward is not None and (
                    (current_best_reward < mean_reward - 2 * std_reward) or
                    (min_reward > current_best_reward)):
                    print('terminating early!')
                    early_terminate = True
                    break

        attack_dir = 'attack-{}-eps-{}'.format(params['attack_method'],
                                               params['attack_eps'])
        if 'sarsa' in params['attack_method']:
            attack_dir += '-sarsa_steps-{}-sarsa_eps-{}-sarsa_reg-{}'.format(
                params['sarsa_steps'], params['sarsa_eps'],
                params['sarsa_reg'])
            if 'action' in params['attack_method']:
                attack_dir += '-attack_sarsa_action_ratio-{}'.format(
                    params['attack_sarsa_action_ratio'])
        save_path = os.path.join(params['out_dir'], params['exp_id'],
                                 attack_dir)
        if not os.path.exists(save_path):
            os.makedirs(save_path)
        for name, value in [('actions', all_actions), ('states', all_states),
                            ('rewards', all_rewards), ('length', all_lens)]:
            with open(os.path.join(save_path, '{}.pkl'.format(name)),
                      'wb') as f:
                pickle.dump(value, f)
        print(params)
        with open(os.path.join(save_path, 'params.json'), 'w') as f:
            json.dump(params, f, indent=4)

        mean_reward, std_reward, min_reward, max_reward = np.mean(
            all_rewards), np.std(all_rewards), np.min(all_rewards), np.max(
                all_rewards)
        if params['compute_kl_cert']:
            print('KL certificates stats: mean: {}, std: {}, min: {}, max: {}'.
                  format(np.mean(all_kl_certificates),
                         np.std(all_kl_certificates),
                         np.min(all_kl_certificates),
                         np.max(all_kl_certificates)))
        # write results to sqlite.
        if params['sqlite_path']:
            method = params['attack_method']
            if params['attack_method'] == "sarsa":
                # Load sarsa parameters from checkpoint
                sarsa_ckpt = torch.load(params['attack_sarsa_network'])
                sarsa_meta = sarsa_ckpt['metadata']
                sarsa_eps = sarsa_meta[
                    'sarsa_eps'] if 'sarsa_eps' in sarsa_meta else -1.0
                sarsa_reg = sarsa_meta[
                    'sarsa_reg'] if 'sarsa_reg' in sarsa_meta else -1.0
                sarsa_steps = sarsa_meta[
                    'sarsa_steps'] if 'sarsa_steps' in sarsa_meta else -1
            elif params['attack_method'] == "sarsa+action":
                sarsa_eps = -1.0
                sarsa_reg = params['attack_sarsa_action_ratio']
                sarsa_steps = -1
            else:
                sarsa_eps = -1.0
                sarsa_reg = -1.0
                sarsa_steps = -1
            try:
                cur.execute(
                    "INSERT INTO attack_results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?);",
                    (method, mean_reward, std_reward, min_reward, max_reward,
                     sarsa_eps, sarsa_reg, sarsa_steps,
                     params['deterministic'], early_terminate))
                connection.commit()
            except sqlite3.OperationalError as e:
                import traceback
                traceback.print_exc()
                print('Cannot insert into the SQLite table. Give up.')
            else:
                print(f'results saved to database {params["sqlite_path"]}')
            connection.close()
        print('\n')
        print('all rewards:', all_rewards)
        print('rewards stats:\nmean: {}, std:{}, min:{}, max:{}'.format(
            mean_reward, std_reward, min_reward, max_reward))
예제 #15
0
    def dump_one_exp_id(best_exp_id):
        print('\n\n>>>selected id', best_exp_id, 'args.best', args.best,
              '\n\n')
        if best_exp_id is not None:
            env_name = get_env_name(base_directory)
            alg_name = get_alg_name(base_directory)
            store = Store(base_directory, best_exp_id)
            if 'final_results' in store.tables and not args.all_ckpts:
                table_name = 'final_results'
                index_id = 0
            else:
                table_name = 'checkpoints'
                print(
                    f'Warning: final_results table not found for expid {best_exp_id}, using last checkpoints'
                )
                index_id = -1  # use last checkpoint
            ckpts = store[table_name]
            print(
                'loading from exp id:', best_exp_id, ' reward: ',
                ckpts.df['5_rewards'].iloc[index_id]
                if '5_rewards' in ckpts.df else "training not finished")

            def dump_model(sel_ckpts, sel_index_id, sel_path):
                P = {}
                # mapper = ch.device('cuda:0')
                for name in [
                        'val_model', 'policy_model', 'val_opt', 'policy_opt',
                        'adversary_policy_model', 'adversary_val_model',
                        'adversary_policy_opt', 'adversary_val_opt'
                ]:
                    if name in sel_ckpts.df:
                        print(
                            f'Saving {name} out of {len(sel_ckpts.df[name])}')
                        P[name] = sel_ckpts.get_state_dict(
                            sel_ckpts.df[name].iloc[sel_index_id])
                P['envs'] = sel_ckpts.get_pickle(
                    sel_ckpts.df['envs'].iloc[sel_index_id])

                ch.save(P, sel_path)
                print('\n', sel_path, 'saved.\n')

            if not args.all_ckpts:
                if args.output is None:
                    path = f"best_model-{alg_name}-{env_name}.{best_exp_id[:8]}.model"
                else:
                    path = args.output
                dump_model(ckpts, index_id, path)
            else:
                iters = ckpts.df['iteration']

                for i, it in enumerate(iters):
                    if i % args.dump_step != 0:
                        continue
                    path = f"best_model-{alg_name}-{env_name}.{best_exp_id[:8]}.iter{it}.model"
                    if args.output is not None:
                        if not os.path.exists(args.output):
                            os.makedirs(args.output)
                        path = os.path.join(args.output, path)
                    dump_model(ckpts, i, path)

            store.close()
        else:
            raise ValueError('no usable exp found! Cannot load.')
예제 #16
0
def main(args):
    base_directory = args.base_directory
    exp_id_list = os.listdir(base_directory)
    best_exp_id = None
    all_rew = []
    all_exp_id = []
    train_eps = []
    if args.exp_id == '':
        for exp_id in exp_id_list:
            s = None
            try:
                s = Store(base_directory, exp_id)
                rew = s['final_results'].df['5_rewards'][0]
                # train_eps.append(s['metadata'].df['robust_ppo_eps'][0])
                all_rew.append(rew)
                print(f"rew={rew}")
                all_exp_id.append(exp_id)
                s.close()
            except Exception as e:
                print(f'Load result error for {exp_id}: {e}')
                if s is not None:
                    s.close()
                continue
        n_exps = len(all_rew)
        all_rew = np.array(all_rew)
        all_exp_id = np.array(all_exp_id)
        ind = np.argsort(all_rew)
        for i in range(len(train_eps)):
            if train_eps[i] == 0.075:
                print(all_exp_id[i])
        print(
            f'Read {n_exps} models. Avg reward is {all_rew.mean()}, median is {all_rew[ind[n_exps//2]]}'
        )

    def dump_one_exp_id(best_exp_id):
        print('\n\n>>>selected id', best_exp_id, 'args.best', args.best,
              '\n\n')
        if best_exp_id is not None:
            env_name = get_env_name(base_directory)
            alg_name = get_alg_name(base_directory)
            store = Store(base_directory, best_exp_id)
            if 'final_results' in store.tables and not args.all_ckpts:
                table_name = 'final_results'
                index_id = 0
            else:
                table_name = 'checkpoints'
                print(
                    f'Warning: final_results table not found for expid {best_exp_id}, using last checkpoints'
                )
                index_id = -1  # use last checkpoint
            ckpts = store[table_name]
            print(
                'loading from exp id:', best_exp_id, ' reward: ',
                ckpts.df['5_rewards'].iloc[index_id]
                if '5_rewards' in ckpts.df else "training not finished")

            def dump_model(sel_ckpts, sel_index_id, sel_path):
                P = {}
                # mapper = ch.device('cuda:0')
                for name in [
                        'val_model', 'policy_model', 'val_opt', 'policy_opt',
                        'adversary_policy_model', 'adversary_val_model',
                        'adversary_policy_opt', 'adversary_val_opt'
                ]:
                    if name in sel_ckpts.df:
                        print(
                            f'Saving {name} out of {len(sel_ckpts.df[name])}')
                        P[name] = sel_ckpts.get_state_dict(
                            sel_ckpts.df[name].iloc[sel_index_id])
                P['envs'] = sel_ckpts.get_pickle(
                    sel_ckpts.df['envs'].iloc[sel_index_id])

                ch.save(P, sel_path)
                print('\n', sel_path, 'saved.\n')

            if not args.all_ckpts:
                if args.output is None:
                    path = f"best_model-{alg_name}-{env_name}.{best_exp_id[:8]}.model"
                else:
                    path = args.output
                dump_model(ckpts, index_id, path)
            else:
                iters = ckpts.df['iteration']

                for i, it in enumerate(iters):
                    if i % args.dump_step != 0:
                        continue
                    path = f"best_model-{alg_name}-{env_name}.{best_exp_id[:8]}.iter{it}.model"
                    if args.output is not None:
                        if not os.path.exists(args.output):
                            os.makedirs(args.output)
                        path = os.path.join(args.output, path)
                    dump_model(ckpts, i, path)

            store.close()
        else:
            raise ValueError('no usable exp found! Cannot load.')

    if not args.all_exp:
        if args.best:
            if args.attack:
                sel_exp_id = all_exp_id[ind[0]]
            else:
                sel_exp_id = all_exp_id[ind[-1]]
        else:
            if args.exp_id:
                sel_exp_id = args.exp_id
            else:
                sel_exp_id = all_exp_id[ind[n_exps // 2]]
        dump_one_exp_id(sel_exp_id)
    else:
        for sel_exp_id in all_exp_id:
            dump_one_exp_id(sel_exp_id)
예제 #17
0
    pass

os.mkdir(OUT_DIR)


def f(x):
    return (x - 2.03)**2 + 3


if __name__ == "__main__":
    # Three parameters: initial guess for x, step size, tolerance
    combos = itertools.product(np.linspace(-15, 15, 3),
                               np.linspace(1, 1e-5, 3), [1e-5])

    for x, step, tol in combos:
        store = Store(OUT_DIR)
        store.add_table(
            'metadata', {
                'step_size': float,
                'tolerance': float,
                'initial_x': float,
                'out_dir': str
            })

        store.add_table('result', {'final_x': float, 'final_opt': float})

        store.add_table('running_log', {
            'current_x': float,
            'current_f': float
        })