def main():
    parser = argparse.ArgumentParser(
        description='run a particular evaluation job')
    parser.add_argument('params', type=str)
    parser.add_argument('dir',
                        type=str,
                        help="direction to search for threshold")
    parser.add_argument('outputfile', type=str)
    parser.add_argument('--num-steps', type=int, default=bigint)
    parser.add_argument('--num-episodes', type=int, default=bigint)
    parser.add_argument('--device', type=str, default="cpu")
    parser.add_argument('--key', type=str, default="episode_rewards")
    parser.add_argument('--tolerance', type=float, required=True)

    args = parser.parse_args()

    info = json.load(open(Path(args.params).parent.parent / "info.json"))
    params = [
        v.cpu().detach().numpy() for v in torch.load(
            args.params, map_location=torch.device('cpu')).values()
    ]
    dir = readz(args.dir)

    up_bound = search(info, params, dir, args.num_steps, args.num_episodes,
                      args.key, args.device, args.tolerance)

    out_dict = {
        "offset": up_bound,
    }
    with open(args.outputfile, 'w') as file:
        file.write(json.dumps(out_dict))
    parser.add_argument('--num-steps',
                        type=int,
                        default=200000,
                        help="Number of steps to evaluate each point")
    parser.add_argument('--grid-size',
                        type=int,
                        default=31,
                        help="Width and height of the heatmap grid")
    args = parser.parse_args()

    trained_checkpoint = Path(args.checkpoint)
    output_path = Path(args.output_path)

    # Load gradient direction
    grad_dir_fname = Path(args.grad_path) / "grad.npz"
    grad_dir = readz(grad_dir_fname)

    # Load random filter normalized direction
    rand_dir_fname = trained_checkpoint / "parameters.th"
    param_values = torch.load(str(rand_dir_fname),
                              map_location=torch.device('cpu')).values()
    rand_dir = [
        filter_normalize(v.cpu().detach().numpy()) for v in param_values
    ]

    train_info = json.load(open(trained_checkpoint.parent / "info.json"))
    # TODO: Make option for grad normalization
    flat_grad = [d.flatten() for d in grad_dir]
    grad_cat = np.concatenate(flat_grad, axis=0)
    grad_magnitude = np.linalg.norm(grad_cat)
    dir1 = [d / grad_magnitude for d in grad_dir]
def main():
    parser = argparse.ArgumentParser(
        description='run a particular evaluation job')
    parser.add_argument('params', type=str)
    parser.add_argument('dir',
                        type=str,
                        help="direction to search for threshold")
    parser.add_argument('outputfile', type=str)
    parser.add_argument('--num-steps', type=int, default=bigint)
    parser.add_argument('--num-episodes', type=int, default=bigint)
    parser.add_argument('--device', type=str, default="cpu")
    parser.add_argument('--length', type=int, default=5)
    parser.add_argument('--max-magnitude', type=float, default=0.1)

    args = parser.parse_args()

    info = json.load(
        open(PurePath(args.outputfile).parent.parent / "info.json"))
    checkpoint = os.path.basename(args.outputfile)

    agent, steps = make_agent(info['agent_name'],
                              info['env'],
                              PurePath(args.params).parent.parent,
                              info['hyperparameters'],
                              device=args.device)

    params = [
        v.cpu().detach().numpy() for v in torch.load(
            args.params, map_location=torch.device('cpu')).values()
    ]
    if info['random_dir_seed'] is not None:
        seed = info['random_dir_seed']
        np.random.seed(seed + hash(args.outputfile) % (1 << 30))
        direction = [filter_normalize(p) for p in params]
    else:
        direction = readz(args.dir)

    if info['scale_dir']:
        dir_sum = sum(np.sum(x) for x in direction)
        if dir_sum != 0:
            direction = [d / dir_sum for d in direction]

    # High resolution in first segment
    for i in range(1, 10):
        mm = args.max_magnitude
        weights = [
            p + d * i * mm / (10 * args.length)
            for p, d in zip(params, direction)
        ]
        agent.set_weights(weights)
        evaluator = agent.evaluator()
        eval_results = evaluate(evaluator, args.num_episodes, args.num_steps)
        eval_results['checkpoint'] = checkpoint
        out_fname = f"{args.outputfile},0.{i}.json"
        eval_results["offset"] = i * mm / (10 * args.length)

        with open(out_fname, 'w') as file:
            file.write(json.dumps(eval_results))

    for i in range(args.length):
        mm = args.max_magnitude
        weights = [
            p + d * i * mm / args.length for p, d in zip(params, direction)
        ]
        agent.set_weights(weights)
        evaluator = agent.evaluator()
        eval_results = evaluate(evaluator, args.num_episodes, args.num_steps)
        eval_results['checkpoint'] = checkpoint
        out_fname = f"{args.outputfile},{i}.json"
        eval_results["offset"] = i * mm / args.length

        with open(out_fname, 'w') as file:
            file.write(json.dumps(eval_results))
Exemple #4
0
from reward_surfaces.experiments import generate_plane_data
from reward_surfaces.plotting import plot_plane
from reward_surfaces.runners.run_jobs_multiproc import run_job_list
from reward_surfaces.utils.job_results_to_csv import job_results_to_csv
from reward_surfaces.utils.surface_utils import readz
from reward_surfaces.agents.make_agent import make_agent

import json
import os
import shutil

trained_checkpoint = "train_results/bullet/hopper/0040000/"
generated_dirs_dir = "generated_dirs/test_script/"

dir1_fname = "generated_dirs/hopper_eig_vecs/results/0040000/mineigvec.npz"
dir2_fname = "generated_dirs/hopper_eig_vecs/results/0040000/maxeigvec.npz"

dir1 = readz(dir1_fname)
dir2 = readz(dir2_fname)

train_info = json.load(open(trained_checkpoint+"../info.json"))

if os.path.exists(generated_dirs_dir):
    shutil.rmtree(generated_dirs_dir)
print("removed")

generate_plane_data(trained_checkpoint, generated_dirs_dir, dir1, dir2, train_info, num_steps=1000)
run_job_list(generated_dirs_dir+"jobs.sh")
job_results_to_csv(generated_dirs_dir)
plot_plane(generated_dirs_dir+"results.csv")
Exemple #5
0
def main():
    parser = argparse.ArgumentParser(description='generate jobs for plane')
    parser.add_argument('checkpoint_dir', type=str)
    parser.add_argument('output_path', type=str)
    parser.add_argument('--directions',
                        type=str,
                        default="filter",
                        help="'filter' is only option right now")
    parser.add_argument(
        '--copy-directions',
        type=str,
        help=
        "overrides directions with directions from specified folder. Does not copy any other data. "
    )
    parser.add_argument(
        '--scale-vec',
        type=str,
        help=
        "A .npz file of same shape as directions, which indicates how much each dimention should be scaled by."
    )
    parser.add_argument('--dir1',
                        type=str,
                        help="overrides dir1 with vector from specified path.")
    parser.add_argument('--dir2',
                        type=str,
                        help="overrides dir2 with vector from specified path.")
    parser.add_argument('--magnitude',
                        type=float,
                        default=1.,
                        help="scales directions by given amount")
    parser.add_argument('--grid-size', type=int, default=5)
    parser.add_argument('--num-steps', type=int)
    parser.add_argument('--num-episodes', type=int)
    parser.add_argument('--device', type=str, default='cpu')
    parser.add_argument(
        '--use_offset_critic',
        action='store_true',
        help="use critic at center or at offset for value estimation")
    parser.add_argument('--est-hesh', action='store_true')
    parser.add_argument('--est-grad', action='store_true')
    parser.add_argument('--calc-hesh', action='store_true')
    parser.add_argument('--calc-grad', action='store_true')
    parser.add_argument('--batch-grad', action='store_true')

    args = parser.parse_args()

    assert args.copy_directions is None or args.directions == "copy", "if --copy-directions is None, --directions=copy must be set"
    output_path = Path(args.output_path)
    checkpoint_dir = Path(args.checkpoint_dir)
    folder_argname = Path(
        os.path.dirname(strip_lagging_slash(args.checkpoint_dir)))
    checkpoint_fname = next(fname for fname in os.listdir(checkpoint_dir)
                            if "checkpoint" in fname)
    checkpoint_path = checkpoint_dir / checkpoint_fname

    info_fname = "info.json"
    info = json.load(open((folder_argname / info_fname)))

    agent, steps = make_agent(info['agent_name'],
                              info['env'],
                              output_path,
                              info['hyperparameters'],
                              device="cpu")
    agent.load_weights(checkpoint_path)

    # Generate directions normally
    dir1_vec, dir2_vec = find_unscaled_alts(agent, args.directions)

    # copy directions
    if args.copy_directions is not None:
        dir_path = Path(args.copy_directions)
        dir1_vec = readz(dir_path / "dir1.npz")
        dir2_vec = readz(dir_path / "dir2.npz")
    if args.dir1 is not None:
        dir1_vec = readz(args.dir1)
        info['dir1'] = args.dir1
    if args.dir2 is not None:
        dir2_vec = readz(args.dir2)
        info['dir2'] = args.dir2

    if args.scale_vec is not None:
        scale_vec = readz(args.scale_vec)
        dir1_vec = scale_dir(dir1_vec, scale_vec)
        dir2_vec = scale_dir(dir2_vec, scale_vec)

    if args.magnitude is not None:
        info['magnitude'] = m = args.magnitude
        dir1_vec = [m * v for v in dir1_vec]
        dir2_vec = [m * v for v in dir2_vec]

    info[
        'directions'] = args.directions if args.copy_directions is None else "copy"

    generate_plane_data(args.checkpoint_dir,
                        args.output_path,
                        dir1_vec,
                        dir2_vec,
                        args.magnitude,
                        info,
                        grid_size=args.grid_size,
                        num_steps=args.num_steps,
                        num_episodes=args.num_episodes,
                        device=args.device,
                        use_offset_critic=args.use_offset_critic,
                        est_hesh=args.est_hesh,
                        est_grad=args.est_grad,
                        calc_hesh=args.calc_hesh,
                        calc_grad=args.calc_grad,
                        batch_grad=args.batch_grad)