Beispiel #1
0
def main(args):
    parser = create_parser()
    args = parser.parse_args(args)

    # load experiment configuration
    with open(args.parameters.strip(), "r") as f:
        params = json.load(f)

    checkpoint_freq = params["run_details"]["checkpoint_after_ts"]
    # train agent
    dataset = RLDataset(
        args.file_path) if checkpoint_freq != 0 and args.file_path else None

    # log experiment info to Tensorboard
    evaluation_file = args.evaluation_file_path
    config_file = args.parameters.strip()
    experiment_name = config_file[config_file.rfind('/') +
                                  1:config_file.rfind('.json')]
    os.environ["TENSORBOARD_DIR"] = os.path.join(evaluation_file,
                                                 experiment_name)

    start_time = time.time()
    average_reward_train, num_episodes_train, average_reward_eval, num_episodes_eval, timesteps_history, trainer, predictor, env = horizon_runner.run_gym(
        params,
        args.offline_train,
        args.score_bar,
        args.gpu_id,
        dataset,
        args.start_saving_from_score,
        args.path_to_pickled_transitions,
    )

    if dataset:
        dataset.save()
    end_time = time.time()

    # save runtime
    runtime_file = os.path.join(evaluation_file, 'runtime', 'runtime.csv')
    f = open(runtime_file, 'a+')
    f.write(experiment_name + ', ' + str(end_time - start_time) + '\n')
    f.close()

    # inference testing
    try:
        num_inference_steps = params["run_details"]["num_inference_steps"]
        if num_inference_steps:
            print("--- STARTING HORIZON CARTPOLE INFERENCE EXPERIMENT ---")
            start_time = time.time()
            _ = env.run_n_steps(num_inference_steps, predictor, test=True)
            end_time = time.time()
            print("--- HORIZON CARTPOLE INFERENCE EXPERIMENT COMPLETED ---")
            inference_file = os.path.join(evaluation_file, 'runtime',
                                          'inference.csv')
            f = open(inference_file, 'a+')
            f.write(experiment_name + ', ' + str(end_time - start_time) + '\n')
            f.close()
    except KeyError:
        pass

    return average_reward_eval
Beispiel #2
0
def test_complete_experiment(env_name, config):
    """
    Smoke test that runs a small Park QOpt experiment and fails if any exception during its execution was raised.
    """
    try:
        SummaryWriterContext._reset_globals()

        with open(config) as f:
            params = json.load(f)

        checkpoint_freq = params["run_details"]["checkpoint_after_ts"]
        # train agent
        dataset = RLDataset(FILE_PATH)
        # log experiment info to Tensorboard
        evaluation_file = EVALUATION_PATH
        config_file = config
        experiment_name = config_file[config_file.rfind('/') +
                                      1:config_file.rfind('.json')]
        os.environ["TENSORBOARD_DIR"] = os.path.join(evaluation_file,
                                                     experiment_name)
        average_reward_train, num_episodes_train, average_reward_eval, num_episodes_eval, timesteps_history, trainer, predictor, env = horizon_runner.run_gym(
            params, False, None, -1, dataset)

        if dataset:
            dataset.save()

        SummaryWriterContext._reset_globals()
    except Exception:
        pytest.fail('Running a small ' + str(env_name) +
                    ' experiment in Horizon failed!')
Beispiel #3
0
def mdnrnn_gym(
    params: OpenAiGymParameters,
    feature_importance: bool = False,
    feature_sensitivity: bool = False,
    save_embedding_to_path: Optional[str] = None,
    seed: Optional[int] = None,
):
    assert params.mdnrnn is not None
    use_gpu = params.use_gpu
    logger.info("Running gym with params")
    logger.info(params)

    env_type = params.env
    env = OpenAIGymEnvironment(
        env_type, epsilon=1.0, softmax_policy=False, gamma=0.99, random_seed=seed
    )

    # create test data once
    assert params.run_details.max_steps is not None
    test_replay_buffer = get_replay_buffer(
        params.run_details.num_test_episodes,
        params.run_details.seq_len,
        params.run_details.max_steps,
        env,
    )
    test_batch = test_replay_buffer.sample_memories(
        test_replay_buffer.memory_size, use_gpu=use_gpu, batch_first=True
    )

    trainer = create_trainer(params, env, use_gpu)
    _, _, trainer = train_sgd(
        env,
        trainer,
        use_gpu,
        "{} test run".format(env_type),
        params.mdnrnn.minibatch_size,
        params.run_details,
        test_batch=test_batch,
    )
    feature_importance_map, feature_sensitivity_map, dataset = None, None, None
    if feature_importance:
        feature_importance_map = calculate_feature_importance(
            env, trainer, use_gpu, params.run_details, test_batch=test_batch
        )
    if feature_sensitivity:
        feature_sensitivity_map = calculate_feature_sensitivity_by_actions(
            env, trainer, use_gpu, params.run_details, test_batch=test_batch
        )
    if save_embedding_to_path:
        dataset = RLDataset(save_embedding_to_path)
        create_embed_rl_dataset(env, trainer, dataset, use_gpu, params.run_details)
        dataset.save()
    return env, trainer, feature_importance_map, feature_sensitivity_map, dataset
Beispiel #4
0
def mdnrnn_gym(
    params: OpenAiGymParameters,
    feature_importance: bool = False,
    feature_sensitivity: bool = False,
    save_embedding_to_path: Optional[str] = None,
):
    assert params.mdnrnn is not None
    use_gpu = params.use_gpu
    logger.info("Running gym with params")
    logger.info(params)

    env_type = params.env
    env = OpenAIGymEnvironment(env_type,
                               epsilon=1.0,
                               softmax_policy=True,
                               gamma=0.99)

    trainer = create_trainer(params, env, use_gpu)
    _, _, trainer = train_sgd(
        env,
        trainer,
        use_gpu,
        "{} test run".format(env_type),
        params.mdnrnn.minibatch_size,
        params.run_details,
    )
    feature_importance_map, feature_sensitivity_map, dataset = None, None, None
    if feature_importance:
        feature_importance_map = calculate_feature_importance(
            env, trainer, use_gpu, params.run_details)
    if feature_sensitivity:
        feature_sensitivity_map = calculate_feature_sensitivity_by_actions(
            env, trainer, use_gpu, params.run_details)
    if save_embedding_to_path:
        dataset = RLDataset(save_embedding_to_path)
        create_embed_rl_dataset(env, trainer, dataset, use_gpu,
                                params.run_details)
        dataset.save()
    return env, trainer, feature_importance_map, feature_sensitivity_map, dataset
Beispiel #5
0
def main(args):
    parser = argparse.ArgumentParser(
        description="Train a RL net to play in an OpenAI Gym environment."
    )
    parser.add_argument("-p", "--parameters", help="Path to JSON parameters file.")
    parser.add_argument(
        "-s",
        "--score-bar",
        help="Bar for averaged tests scores.",
        type=float,
        default=None,
    )
    parser.add_argument(
        "-l",
        "--log_level",
        help="If set, use logging level specified (debug, info, warning, error, "
        "critical). Else defaults to info.",
        default="info",
    )
    parser.add_argument(
        "-f",
        "--file_path",
        help="If set, save all collected samples as an RLDataset to this file.",
        default=None,
    )
    parser.add_argument(
        "-e",
        "--start_saving_from_score",
        type=int,
        help="If file_path is set, start saving episodes after this score is hit.",
        default=None,
    )
    parser.add_argument(
        "-r",
        "--results_file_path",
        help="If set, save evaluation results to file.",
        type=str,
        default=None,
    )
    parser.add_argument(
        "--offline_train",
        action="store_true",
        help="If set, collect data using a random policy then train RL offline.",
    )
    parser.add_argument(
        "--path_to_pickled_transitions",
        help="Path to saved transitions to load into replay buffer.",
        type=str,
        default=None,
    )
    parser.add_argument(
        "--seed",
        help="Seed for the test (numpy, torch, and gym).",
        type=int,
        default=None,
    )
    parser.add_argument(
        "--use_gpu",
        help="Use GPU, if available; set the device with CUDA_VISIBLE_DEVICES",
        action="store_true",
    )

    args = parser.parse_args(args)

    if args.log_level not in ("debug", "info", "warning", "error", "critical"):
        raise Exception("Logging level {} not valid level.".format(args.log_level))
    else:
        logging.getLogger().setLevel(getattr(logging, args.log_level.upper()))

    if args.seed is not None:
        np.random.seed(args.seed)
        torch.manual_seed(args.seed)
        random.seed(args.seed)

    assert (
        not args.path_to_pickled_transitions or args.offline_train
    ), "path_to_pickled_transitions is provided so you must run offline training"

    with open(args.parameters, "r") as f:
        params = json_to_object(f.read(), OpenAiGymParameters)

    if args.use_gpu:
        assert torch.cuda.is_available(), "CUDA requested but not available"
        params = params._replace(use_gpu=True)

    dataset = RLDataset(args.file_path) if args.file_path else None

    reward_history, iteration_history, trainer, predictor, env = run_gym(
        params,
        args.offline_train,
        args.score_bar,
        args.seed,
        dataset,
        args.start_saving_from_score,
        args.path_to_pickled_transitions,
    )

    if dataset:
        dataset.save()
        logger.info("Saving dataset to {}".format(args.file_path))
        final_score_exploit, _ = env.run_ep_n_times(
            params.run_details.avg_over_num_episodes, predictor, test=True
        )
        final_score_explore, _ = env.run_ep_n_times(
            params.run_details.avg_over_num_episodes, predictor, test=False
        )
        logger.info(
            "Final policy scores {} with epsilon={} and {} with epsilon=0 over {} eps.".format(
                final_score_explore,
                env.epsilon,
                final_score_exploit,
                params.run_details.avg_over_num_episodes,
            )
        )

    if args.results_file_path:
        write_lists_to_csv(args.results_file_path, reward_history, iteration_history)
    return reward_history
Beispiel #6
0
def main(args):
    parser = argparse.ArgumentParser(
        description="Train a RL net to play in an OpenAI Gym environment.")
    parser.add_argument("-p",
                        "--parameters",
                        help="Path to JSON parameters file.")
    parser.add_argument(
        "-s",
        "--score-bar",
        help="Bar for averaged tests scores.",
        type=float,
        default=None,
    )
    parser.add_argument(
        "-g",
        "--gpu_id",
        help="If set, will use GPU with specified ID. Otherwise will use CPU.",
        default=USE_CPU,
    )
    parser.add_argument(
        "-l",
        "--log_level",
        help="If set, use logging level specified (debug, info, warning, error, "
        "critical). Else defaults to info.",
        default="info",
    )
    parser.add_argument(
        "-f",
        "--file_path",
        help="If set, save all collected samples as an RLDataset to this file.",
        default=None,
    )
    parser.add_argument(
        "-e",
        "--start_saving_from_episode",
        type=int,
        help=
        "If file_path is set, start saving episodes from this episode num.",
        default=0,
    )
    parser.add_argument(
        "-r",
        "--results_file_path",
        help="If set, save evaluation results to file.",
        type=str,
        default=None,
    )
    args = parser.parse_args(args)

    if args.log_level not in ("debug", "info", "warning", "error", "critical"):
        raise Exception("Logging level {} not valid level.".format(
            args.log_level))
    else:
        logger.setLevel(getattr(logging, args.log_level.upper()))

    with open(args.parameters, "r") as f:
        params = json.load(f)

    dataset = RLDataset(args.file_path) if args.file_path else None
    reward_history, timestep_history, trainer, predictor = run_gym(
        params, args.score_bar, args.gpu_id, dataset,
        args.start_saving_from_episode)
    if dataset:
        dataset.save()
    if args.results_file_path:
        write_lists_to_csv(args.results_file_path, reward_history,
                           timestep_history)
    return reward_history
Beispiel #7
0
def main(args):
    parser = argparse.ArgumentParser(
        description="Train a RL net to play in an OpenAI Gym environment."
    )
    parser.add_argument("-p", "--parameters", help="Path to JSON parameters file.")
    parser.add_argument(
        "-s",
        "--score-bar",
        help="Bar for averaged tests scores.",
        type=float,
        default=None,
    )
    parser.add_argument(
        "-g",
        "--gpu_id",
        help="If set, will use GPU with specified ID. Otherwise will use CPU.",
        default=USE_CPU,
    )
    parser.add_argument(
        "-l",
        "--log_level",
        help="If set, use logging level specified (debug, info, warning, error, "
        "critical). Else defaults to info.",
        default="info",
    )
    parser.add_argument(
        "-f",
        "--file_path",
        help="If set, save all collected samples as an RLDataset to this file.",
        default=None,
    )
    parser.add_argument(
        "-e",
        "--start_saving_from_score",
        type=int,
        help="If file_path is set, start saving episodes after this score is hit.",
        default=None,
    )
    parser.add_argument(
        "-r",
        "--results_file_path",
        help="If set, save evaluation results to file.",
        type=str,
        default=None,
    )
    parser.add_argument(
        "--offline_train",
        action="store_true",
        help="If set, collect data using a random policy then train RL offline.",
    )
    parser.add_argument(
        "--path_to_pickled_transitions",
        help="Path to saved transitions to load into replay buffer.",
        type=str,
        default=None,
    )
    args = parser.parse_args(args)

    if args.log_level not in ("debug", "info", "warning", "error", "critical"):
        raise Exception("Logging level {} not valid level.".format(args.log_level))
    else:
        logger.setLevel(getattr(logging, args.log_level.upper()))

    assert (
        not args.path_to_pickled_transitions or args.offline_train
    ), "path_to_pickled_transitions is provided so you must run offline training"

    with open(args.parameters, "r") as f:
        params = json.load(f)

    dataset = RLDataset(args.file_path) if args.file_path else None

    reward_history, iteration_history, trainer, predictor, env = run_gym(
        params,
        args.offline_train,
        args.score_bar,
        args.gpu_id,
        dataset,
        args.start_saving_from_score,
        args.path_to_pickled_transitions,
    )

    if dataset:
        dataset.save()
        logger.info("Saving dataset to {}".format(args.file_path))
        final_score_exploit, _ = env.run_ep_n_times(
            params["run_details"]["avg_over_num_episodes"], predictor, test=True
        )
        final_score_explore, _ = env.run_ep_n_times(
            params["run_details"]["avg_over_num_episodes"], predictor, test=False
        )
        logger.info(
            "Final policy scores {} with epsilon={} and {} with epsilon=0 over {} eps.".format(
                final_score_explore,
                env.epsilon,
                final_score_exploit,
                params["run_details"]["avg_over_num_episodes"],
            )
        )

    if args.results_file_path:
        write_lists_to_csv(args.results_file_path, reward_history, iteration_history)
    return reward_history
Beispiel #8
0
def main(args):
    parser = argparse.ArgumentParser(
        description="Train a RL net to play in an OpenAI Gym environment."
    )
    parser.add_argument("-p", "--parameters", help="Path to JSON parameters file.")
    parser.add_argument(
        "-s",
        "--score-bar",
        help="Bar for averaged tests scores.",
        type=float,
        default=None,
    )
    parser.add_argument(
        "-g",
        "--gpu_id",
        help="If set, will use GPU with specified ID. Otherwise will use CPU.",
        default=USE_CPU,
    )
    parser.add_argument(
        "-l",
        "--log_level",
        help="If set, use logging level specified (debug, info, warning, error, "
        "critical). Else defaults to info.",
        default="info",
    )
    parser.add_argument(
        "-f",
        "--file_path",
        help="If set, save all collected samples as an RLDataset to this file.",
        default=None,
    )
    parser.add_argument(
        "-e",
        "--start_saving_from_episode",
        type=int,
        help="If file_path is set, start saving episodes from this episode num.",
        default=0,
    )
    parser.add_argument(
        "-r",
        "--results_file_path",
        help="If set, save evaluation results to file.",
        type=str,
        default=None,
    )
    args = parser.parse_args(args)

    if args.log_level not in ("debug", "info", "warning", "error", "critical"):
        raise Exception("Logging level {} not valid level.".format(args.log_level))
    else:
        logger.setLevel(getattr(logging, args.log_level.upper()))

    with open(args.parameters, "r") as f:
        params = json.load(f)

    dataset = RLDataset(args.file_path) if args.file_path else None
    reward_history, timestep_history, trainer, predictor = run_gym(
        params, args.score_bar, args.gpu_id, dataset, args.start_saving_from_episode
    )
    if dataset:
        dataset.save()
    if args.results_file_path:
        write_lists_to_csv(args.results_file_path, reward_history, timestep_history)
    return reward_history