Beispiel #1
0
def main():
    args = get_args()

    init_logging(args.log_level)

    get_logger().info("Running with args {}".format(args))

    ptitle("Master: {}".format(
        "Training" if args.test_date is None else "Testing"))

    cfg, srcs = load_config(args)

    if args.test_date is None:
        OnPolicyRunner(
            config=cfg,
            output_dir=args.output_dir,
            loaded_config_src_files=srcs,
            seed=args.seed,
            mode="train",
            deterministic_cudnn=args.deterministic_cudnn,
            deterministic_agents=args.deterministic_agents,
            extra_tag=args.extra_tag,
            disable_tensorboard=args.disable_tensorboard,
            disable_config_saving=args.disable_config_saving,
        ).start_train(
            checkpoint=args.checkpoint,
            restart_pipeline=args.restart_pipeline,
            max_sampler_processes_per_worker=args.
            max_sampler_processes_per_worker,
        )
    else:
        OnPolicyRunner(
            config=cfg,
            output_dir=args.output_dir,
            loaded_config_src_files=srcs,
            seed=args.seed,
            mode="test",
            deterministic_cudnn=args.deterministic_cudnn,
            deterministic_agents=args.deterministic_agents,
            extra_tag=args.extra_tag,
            disable_tensorboard=args.disable_tensorboard,
            disable_config_saving=args.disable_config_saving,
        ).start_test(
            experiment_date=args.test_date,
            checkpoint=args.checkpoint,
            skip_checkpoints=args.skip_checkpoints,
            max_sampler_processes_per_worker=args.
            max_sampler_processes_per_worker,
        )
    def test_ppo_trains(self, tmpdir):
        cfg = PPOBabyAIGoToObjExperimentConfig()

        output_dir = tmpdir.mkdir("experiment_output")

        train_runner = OnPolicyRunner(
            config=cfg,
            output_dir=output_dir,
            loaded_config_src_files=None,
            seed=1,
            mode="train",
            deterministic_cudnn=True,
        )

        start_time_str = train_runner.start_train(
            max_sampler_processes_per_worker=1)

        test_runner = OnPolicyRunner(
            config=cfg,
            output_dir=output_dir,
            loaded_config_src_files=None,
            seed=1,
            mode="test",
            deterministic_cudnn=True,
        )
        test_results = test_runner.start_test(
            experiment_date=start_time_str,
            skip_checkpoints=1,
            max_sampler_processes_per_worker=1,
        )

        assert (len(test_results) == 1
                ), f"Too many or too few test results ({test_results})"

        tr = test_results[0]
        assert (tr["training_steps"] == round(
            math.ceil(cfg.TOTAL_RL_TRAIN_STEPS /
                      (cfg.ROLLOUT_STEPS * cfg.NUM_TRAIN_SAMPLERS))) *
                cfg.ROLLOUT_STEPS *
                cfg.NUM_TRAIN_SAMPLERS), "Incorrect number of training steps"
        assert len(tr["tasks"]
                   ) == cfg.NUM_TEST_TASKS, "Incorrect number of test tasks"
        assert tr["success"] == sum(
            task["success"] for task in tr["tasks"]) / len(
                tr["tasks"]), "Success counts don't seem to match"
        assert (
            tr["success"] > 0.95
        ), "PPO did not seem to converge for the go_to_obj task (success {}).".format(
            tr["success"])