예제 #1
0
 def get_modular_sarsa_trainer_exporter(
     self,
     environment,
     reward_shape,
     dueling,
     categorical,
     quantile,
     use_gpu=False,
     use_all_avail_gpus=False,
     clip_grad_norm=None,
 ):
     parameters = self.get_sarsa_parameters(environment, reward_shape,
                                            dueling, categorical, quantile,
                                            clip_grad_norm)
     trainer = self.get_modular_sarsa_trainer_reward_boost(
         environment,
         reward_shape,
         dueling=dueling,
         categorical=categorical,
         quantile=quantile,
         use_gpu=use_gpu,
         use_all_avail_gpus=use_all_avail_gpus,
         clip_grad_norm=clip_grad_norm,
     )
     feature_extractor = PredictorFeatureExtractor(
         state_normalization_parameters=environment.normalization)
     output_transformer = DiscreteActionOutputTransformer(
         parameters.actions)
     exporter = DQNExporter(trainer.q_network, feature_extractor,
                            output_transformer)
     return (trainer, exporter)
예제 #2
0
def main(params):
    # Set minibatch size based on # of devices being used to train
    params["training"]["minibatch_size"] *= minibatch_size_multiplier(
        params["use_gpu"], params["use_all_avail_gpus"])

    rl_parameters = RLParameters(**params["rl"])
    training_parameters = TrainingParameters(**params["training"])
    rainbow_parameters = RainbowDQNParameters(**params["rainbow"])

    model_params = DiscreteActionModelParameters(
        actions=params["actions"],
        rl=rl_parameters,
        training=training_parameters,
        rainbow=rainbow_parameters,
    )
    state_normalization = BaseWorkflow.read_norm_file(
        params["state_norm_data_path"])

    writer = SummaryWriter(log_dir=params["model_output_path"])
    logger.info("TensorBoard logging location is: {}".format(writer.log_dir))

    preprocess_handler = DqnPreprocessHandler(
        Preprocessor(state_normalization, False),
        np.array(model_params.actions),
        PandasSparseToDenseProcessor(),
    )

    workflow = DqnWorkflow(
        model_params,
        preprocess_handler,
        state_normalization,
        params["use_gpu"],
        params["use_all_avail_gpus"],
    )

    train_dataset = JSONDatasetReader(
        params["training_data_path"],
        batch_size=training_parameters.minibatch_size)
    eval_dataset = JSONDatasetReader(params["eval_data_path"], batch_size=16)

    with summary_writer_context(writer):
        workflow.train_network(train_dataset, eval_dataset,
                               int(params["epochs"]))

    exporter = DQNExporter(
        workflow.trainer.q_network,
        PredictorFeatureExtractor(
            state_normalization_parameters=state_normalization),
        DiscreteActionOutputTransformer(model_params.actions),
    )

    return export_trainer_and_predictor(workflow.trainer,
                                        params["model_output_path"],
                                        exporter=exporter)  # noqa
예제 #3
0
def single_process_main(gpu_index, *args):
    params = args[0]
    # Set minibatch size based on # of devices being used to train
    params["training"]["minibatch_size"] *= minibatch_size_multiplier(
        params["use_gpu"], params["use_all_avail_gpus"]
    )

    action_names = params["actions"]

    rl_parameters = RLParameters(**params["rl"])
    training_parameters = TrainingParameters(**params["training"])
    rainbow_parameters = RainbowDQNParameters(**params["rainbow"])

    model_params = DiscreteActionModelParameters(
        actions=action_names,
        rl=rl_parameters,
        training=training_parameters,
        rainbow=rainbow_parameters,
    )
    state_normalization = BaseWorkflow.read_norm_file(params["state_norm_data_path"])

    writer = SummaryWriter(log_dir=params["model_output_path"])
    logger.info("TensorBoard logging location is: {}".format(writer.log_dir))

    if params["use_all_avail_gpus"]:
        BaseWorkflow.init_multiprocessing(
            int(params["num_processes_per_node"]),
            int(params["num_nodes"]),
            int(params["node_index"]),
            gpu_index,
            params["init_method"],
        )

    workflow = DqnWorkflow(
        model_params,
        state_normalization,
        params["use_gpu"],
        params["use_all_avail_gpus"],
    )

    sorted_features, _ = sort_features_by_normalization(state_normalization)
    preprocess_handler = DiscreteDqnPreprocessHandler(
        action_names, PandasSparseToDenseProcessor(sorted_features)
    )

    train_dataset = JSONDatasetReader(
        params["training_data_path"],
        batch_size=training_parameters.minibatch_size,
        preprocess_handler=preprocess_handler,
    )
    eval_dataset = JSONDatasetReader(
        params["eval_data_path"],
        batch_size=training_parameters.minibatch_size,
        preprocess_handler=preprocess_handler,
    )

    with summary_writer_context(writer):
        workflow.train_network(train_dataset, eval_dataset, int(params["epochs"]))

    exporter = DQNExporter(
        workflow.trainer.q_network,
        PredictorFeatureExtractor(state_normalization_parameters=state_normalization),
        DiscreteActionOutputTransformer(model_params.actions),
    )

    if int(params["node_index"]) == 0 and gpu_index == 0:
        export_trainer_and_predictor(
            workflow.trainer, params["model_output_path"], exporter=exporter
        )  # noqa