def main(): args = get_args() init_logging(args.log_level) get_logger().info("Running with args {}".format(args)) ptitle("Master: {}".format( "Training" if args.test_date is None else "Testing")) cfg, srcs = load_config(args) if args.test_date is None: OnPolicyRunner( config=cfg, output_dir=args.output_dir, loaded_config_src_files=srcs, seed=args.seed, mode="train", deterministic_cudnn=args.deterministic_cudnn, deterministic_agents=args.deterministic_agents, extra_tag=args.extra_tag, disable_tensorboard=args.disable_tensorboard, disable_config_saving=args.disable_config_saving, ).start_train( checkpoint=args.checkpoint, restart_pipeline=args.restart_pipeline, max_sampler_processes_per_worker=args. max_sampler_processes_per_worker, ) else: OnPolicyRunner( config=cfg, output_dir=args.output_dir, loaded_config_src_files=srcs, seed=args.seed, mode="test", deterministic_cudnn=args.deterministic_cudnn, deterministic_agents=args.deterministic_agents, extra_tag=args.extra_tag, disable_tensorboard=args.disable_tensorboard, disable_config_saving=args.disable_config_saving, ).start_test( experiment_date=args.test_date, checkpoint=args.checkpoint, skip_checkpoints=args.skip_checkpoints, max_sampler_processes_per_worker=args. max_sampler_processes_per_worker, )
def test_ppo_trains(self, tmpdir): cfg = PPOBabyAIGoToObjExperimentConfig() output_dir = tmpdir.mkdir("experiment_output") train_runner = OnPolicyRunner( config=cfg, output_dir=output_dir, loaded_config_src_files=None, seed=1, mode="train", deterministic_cudnn=True, ) start_time_str = train_runner.start_train( max_sampler_processes_per_worker=1) test_runner = OnPolicyRunner( config=cfg, output_dir=output_dir, loaded_config_src_files=None, seed=1, mode="test", deterministic_cudnn=True, ) test_results = test_runner.start_test( experiment_date=start_time_str, skip_checkpoints=1, max_sampler_processes_per_worker=1, ) assert (len(test_results) == 1 ), f"Too many or too few test results ({test_results})" tr = test_results[0] assert (tr["training_steps"] == round( math.ceil(cfg.TOTAL_RL_TRAIN_STEPS / (cfg.ROLLOUT_STEPS * cfg.NUM_TRAIN_SAMPLERS))) * cfg.ROLLOUT_STEPS * cfg.NUM_TRAIN_SAMPLERS), "Incorrect number of training steps" assert len(tr["tasks"] ) == cfg.NUM_TEST_TASKS, "Incorrect number of test tasks" assert tr["success"] == sum( task["success"] for task in tr["tasks"]) / len( tr["tasks"]), "Success counts don't seem to match" assert ( tr["success"] > 0.95 ), "PPO did not seem to converge for the go_to_obj task (success {}).".format( tr["success"])