def compute_baselines(environment, agent_name, checkpoint, episodes, trials):
    directory = f"./runs/{agent_name}_checkpoints"
    pretraining = {
        "latest": f"{directory}/{checkpoint}/checkpoint.zip",
        "best": "./runs/{agent_name}/best/checkpoint.zip",
        "trained_steps": int(f"{checkpoint}"),
    }

    # Baseline
    baseline_scores = []
    for _ in range(trials):
        agent, steps = make_agent("SB3_ON",
                                  environment,
                                  directory,
                                  json.loads('{"ALGO": "A2C"}'),
                                  eval_freq=100000000,
                                  n_eval_episodes=1,
                                  pretraining=pretraining,
                                  device=args.device)
        agent.load_weights(f"{directory}/{checkpoint}/checkpoint.zip")
        # agent.set_weights(weights)
        evaluator = agent.evaluator()
        baseline = evaluate(evaluator, episodes, 100000000)
        print("Baseline: ", baseline["episode_rewards"])
        baseline_scores.append(baseline["episode_rewards"])
    return sum(baseline_scores) / len(baseline_scores)
Exemple #2
0
def calc_result(offset, params, info, dir, device, key, num_steps,
                num_episodes):
    agent, steps = make_agent(info['agent_name'],
                              info['env'],
                              dir,
                              info['hyperparameters'],
                              device=device)

    weights = [p + g * offset for p, g in zip(params, dir)]
    agent.set_weights(weights)

    evaluator = agent.evaluator()
    eval_results = evaluate(evaluator, num_episodes, num_steps)

    return eval_results[key]
Exemple #3
0
def main():
    parser = argparse.ArgumentParser(
        description='Train an agent and keep track of important information.')
    parser.add_argument('save_dir',
                        type=str,
                        help="Directory where checkpoints will be saved")
    parser.add_argument(
        'agent_name',
        type=str,
        help="One of 'rainbow', 'SB3_OFF', 'SB3_ON', or 'SB3_HER'")
    parser.add_argument('env', type=str, help="Environment name")
    parser.add_argument('device',
                        type=str,
                        help="Device used for training ('cpu' or 'cuda')")
    parser.add_argument(
        'hyperparameters',
        type=str,
        help=
        "Dictionary of hyperparameters for training. Should include the intended training algorithm (E.g. {'ALGO': 'PPO'})"
    )
    parser.add_argument('--save_freq',
                        type=int,
                        default=10000,
                        help="Training steps between each saved checkpoint.")
    parser.add_argument('--eval_freq',
                        type=int,
                        default=10000,
                        help="Training steps between each evaluations.")
    parser.add_argument('--resume',
                        action='store_true',
                        help="Continue training from last checkpoint")

    args = parser.parse_args()
    assert args.agent_name in [
        'rainbow', 'SB3_OFF', 'SB3_ON', 'SB3_HER'
    ], "Name must be one of 'rainbow', 'SB3_OFF', 'SB3_ON', or 'SB3_HER'"

    torch.set_num_threads(1)

    zip_path = ""
    timesteps = 0
    pretraining = None
    if args.resume:
        subdirs = glob(args.save_dir + "/*/")
        for i, subdir in enumerate(subdirs):
            parts = subdir.split("/")
            subdirs[i] = ""
            for part in parts:
                if part.isdigit():
                    subdirs[i] = int(part)
        subdirs = sorted(list(filter(lambda a: a != "", subdirs)))
        latest_checkpoint = subdirs.pop()
        timesteps = int(latest_checkpoint)
        zip_path = args.save_dir + "/" + latest_checkpoint + "/checkpoint.zip"
        best_path = args.save_dir + "/best/checkpoint.zip"
        pretraining = {
            "latest": zip_path,
            "best": best_path,
            "trained_steps": timesteps,
        }
        print(zip_path)

    # trainer = SB3HerPolicyTrainer(robo_env_fn,HER("MlpPolicy",robo_env_fn(),model_class=TD3,device="cpu",max_episode_length=100))
    print(args.resume)
    agent, steps = make_agent(args.agent_name,
                              args.env,
                              args.save_dir,
                              json.loads(args.hyperparameters),
                              pretraining=pretraining,
                              device=args.device,
                              eval_freq=args.eval_freq)

    os.makedirs(args.save_dir, exist_ok=True)

    hyperparams = json.loads(args.hyperparameters)
    run_info = {
        "agent_name": args.agent_name,
        "env": args.env,
        "hyperparameters": hyperparams,
    }
    run_info_fname = os.path.join(args.save_dir, "info.json")
    with open(run_info_fname, 'w') as file:
        file.write(json.dumps(run_info, indent=4))

    agent.train(steps, args.save_dir, save_freq=args.save_freq)
def compare_a2c_ppo(environment, agent_name, checkpoint, episodes, trials,
                    baseline_reward):
    directory = f"./runs/{agent_name}_checkpoints"
    pretraining = {
        "latest": f"{directory}/{checkpoint}/checkpoint.zip",
        "best": "./runs/{agent_name}/best/checkpoint.zip",
        "trained_steps": int(f"{checkpoint}"),
    }

    # One step A2C
    a2c_scores = []
    for _ in range(trials):
        agent, steps = make_agent(
            "SB3_ON",
            environment,
            directory,
            json.loads(
                '{"ALGO": "A2C", "learning_rate": 0.000001, "n_steps": 128}'),
            eval_freq=100000000,
            n_eval_episodes=1,
            pretraining=pretraining,
            device=args.device)
        agent.load_weights(f"{directory}/{checkpoint}/checkpoint.zip")
        evaluator = agent.evaluator()
        agent.train(2048,
                    f"./runs/vpg/{agent_name}/{checkpoint}",
                    save_freq=10000)
        a2c = evaluate(evaluator, episodes, 100000000)
        print("A2C: ", a2c["episode_rewards"])
        a2c_scores.append(a2c["episode_rewards"])

    # One step PPO
    ppo_scores = []
    for _ in range(trials):
        agent, steps = make_agent(
            "SB3_ON",
            environment,
            directory,
            json.loads(
                '{"ALGO": "PPO", "learning_rate": 0.000001, "n_steps": 128}'),
            eval_freq=100000000,
            n_eval_episodes=1,
            pretraining=pretraining,
            device=args.device)
        evaluator = agent.evaluator()
        agent.load_weights(f"{directory}/{checkpoint}/checkpoint.zip")
        agent.train(2048,
                    f"./runs/vpg/{agent_name}/{checkpoint}",
                    save_freq=10000)
        ppo = evaluate(evaluator, episodes, 100000000)
        print("PPO: ", ppo["episode_rewards"])
        ppo_scores.append(ppo["episode_rewards"])

    # Calculate statistics
    a2c_reward = sum(a2c_scores) / len(a2c_scores)
    ppo_reward = sum(ppo_scores) / len(ppo_scores)
    ppo_percent = math.copysign(1, baseline_reward) * (
        (ppo_reward / baseline_reward) - 1)
    a2c_percent = math.copysign(1, baseline_reward) * (
        (a2c_reward / baseline_reward) - 1)

    return ppo_percent, a2c_percent
Exemple #5
0
def main():
    parser = argparse.ArgumentParser(description='generate jobs for plane')
    parser.add_argument('checkpoint_dir', type=str)
    parser.add_argument('output_path', type=str)
    parser.add_argument('--directions',
                        type=str,
                        default="filter",
                        help="'filter' is only option right now")
    parser.add_argument(
        '--copy-directions',
        type=str,
        help=
        "overrides directions with directions from specified folder. Does not copy any other data. "
    )
    parser.add_argument(
        '--scale-vec',
        type=str,
        help=
        "A .npz file of same shape as directions, which indicates how much each dimention should be scaled by."
    )
    parser.add_argument('--dir1',
                        type=str,
                        help="overrides dir1 with vector from specified path.")
    parser.add_argument('--dir2',
                        type=str,
                        help="overrides dir2 with vector from specified path.")
    parser.add_argument('--magnitude',
                        type=float,
                        default=1.,
                        help="scales directions by given amount")
    parser.add_argument('--grid-size', type=int, default=5)
    parser.add_argument('--num-steps', type=int)
    parser.add_argument('--num-episodes', type=int)
    parser.add_argument('--device', type=str, default='cpu')
    parser.add_argument(
        '--use_offset_critic',
        action='store_true',
        help="use critic at center or at offset for value estimation")
    parser.add_argument('--est-hesh', action='store_true')
    parser.add_argument('--est-grad', action='store_true')
    parser.add_argument('--calc-hesh', action='store_true')
    parser.add_argument('--calc-grad', action='store_true')
    parser.add_argument('--batch-grad', action='store_true')

    args = parser.parse_args()

    assert args.copy_directions is None or args.directions == "copy", "if --copy-directions is None, --directions=copy must be set"
    output_path = Path(args.output_path)
    checkpoint_dir = Path(args.checkpoint_dir)
    folder_argname = Path(
        os.path.dirname(strip_lagging_slash(args.checkpoint_dir)))
    checkpoint_fname = next(fname for fname in os.listdir(checkpoint_dir)
                            if "checkpoint" in fname)
    checkpoint_path = checkpoint_dir / checkpoint_fname

    info_fname = "info.json"
    info = json.load(open((folder_argname / info_fname)))

    agent, steps = make_agent(info['agent_name'],
                              info['env'],
                              output_path,
                              info['hyperparameters'],
                              device="cpu")
    agent.load_weights(checkpoint_path)

    # Generate directions normally
    dir1_vec, dir2_vec = find_unscaled_alts(agent, args.directions)

    # copy directions
    if args.copy_directions is not None:
        dir_path = Path(args.copy_directions)
        dir1_vec = readz(dir_path / "dir1.npz")
        dir2_vec = readz(dir_path / "dir2.npz")
    if args.dir1 is not None:
        dir1_vec = readz(args.dir1)
        info['dir1'] = args.dir1
    if args.dir2 is not None:
        dir2_vec = readz(args.dir2)
        info['dir2'] = args.dir2

    if args.scale_vec is not None:
        scale_vec = readz(args.scale_vec)
        dir1_vec = scale_dir(dir1_vec, scale_vec)
        dir2_vec = scale_dir(dir2_vec, scale_vec)

    if args.magnitude is not None:
        info['magnitude'] = m = args.magnitude
        dir1_vec = [m * v for v in dir1_vec]
        dir2_vec = [m * v for v in dir2_vec]

    info[
        'directions'] = args.directions if args.copy_directions is None else "copy"

    generate_plane_data(args.checkpoint_dir,
                        args.output_path,
                        dir1_vec,
                        dir2_vec,
                        args.magnitude,
                        info,
                        grid_size=args.grid_size,
                        num_steps=args.num_steps,
                        num_episodes=args.num_episodes,
                        device=args.device,
                        use_offset_critic=args.use_offset_critic,
                        est_hesh=args.est_hesh,
                        est_grad=args.est_grad,
                        calc_hesh=args.calc_hesh,
                        calc_grad=args.calc_grad,
                        batch_grad=args.batch_grad)