def get_critic_exporter(self, trainer, environment):
        feature_extractor = PredictorFeatureExtractor(
            state_normalization_parameters=environment.normalization,
            action_normalization_parameters=environment.normalization_action,
        )
        output_transformer = ParametricActionOutputTransformer()

        return ParametricDQNExporter(trainer.q1_network, feature_extractor,
                                     output_transformer)
Exemple #2
0
def main(params):
    # Set minibatch size based on # of devices being used to train
    params["training"]["minibatch_size"] *= minibatch_size_multiplier(
        params["use_gpu"], params["use_all_avail_gpus"])

    rl_parameters = RLParameters(**params["rl"])
    training_parameters = TrainingParameters(**params["training"])
    rainbow_parameters = RainbowDQNParameters(**params["rainbow"])

    model_params = ContinuousActionModelParameters(
        rl=rl_parameters,
        training=training_parameters,
        rainbow=rainbow_parameters)
    state_normalization = BaseWorkflow.read_norm_file(
        params["state_norm_data_path"])
    action_normalization = BaseWorkflow.read_norm_file(
        params["action_norm_data_path"])

    writer = SummaryWriter(log_dir=params["model_output_path"])
    logger.info("TensorBoard logging location is: {}".format(writer.log_dir))

    preprocess_handler = ParametricDqnPreprocessHandler(
        Preprocessor(state_normalization, False),
        Preprocessor(action_normalization, False),
        PandasSparseToDenseProcessor(),
    )

    workflow = ParametricDqnWorkflow(
        model_params,
        preprocess_handler,
        state_normalization,
        action_normalization,
        params["use_gpu"],
        params["use_all_avail_gpus"],
    )

    train_dataset = JSONDatasetReader(
        params["training_data_path"],
        batch_size=training_parameters.minibatch_size)
    eval_dataset = JSONDatasetReader(params["eval_data_path"], batch_size=16)

    with summary_writer_context(writer):
        workflow.train_network(train_dataset, eval_dataset,
                               int(params["epochs"]))

    exporter = ParametricDQNExporter(
        workflow.trainer.q_network,
        PredictorFeatureExtractor(
            state_normalization_parameters=state_normalization,
            action_normalization_parameters=action_normalization,
        ),
        ParametricActionOutputTransformer(),
    )
    return export_trainer_and_predictor(workflow.trainer,
                                        params["model_output_path"],
                                        exporter=exporter)  # noqa
Exemple #3
0
    def test_create_net(self):
        q_value_blob = core.BlobReference("q_values")
        N = 10
        # NOTE: We add `b` prefix here to match the return type of FetchBlob
        actions = [b"Q"]
        q_values = np.random.randn(N, len(actions)).astype(np.float32)
        workspace.FeedBlob(q_value_blob, q_values)
        ot = ParametricActionOutputTransformer()
        output_record = schema.Struct(("q_value", schema.Scalar(blob=q_value_blob)))
        nets = ot.create_net(output_record)
        workspace.RunNetOnce(nets.init_net)
        workspace.RunNetOnce(nets.net)

        external_outputs = {str(b) for b in nets.net.external_outputs}

        def fetch_blob(b):
            self.assertIn(b, external_outputs)
            return workspace.FetchBlob(b)

        feature_lengths = fetch_blob(
            "output/string_weighted_multi_categorical_features.lengths"
        )
        feature_keys = fetch_blob(
            "output/string_weighted_multi_categorical_features.keys"
        )
        values_lengths = fetch_blob(
            "output/string_weighted_multi_categorical_features.values.lengths"
        )
        values_keys = fetch_blob(
            "output/string_weighted_multi_categorical_features.values.keys"
        )
        values_values = fetch_blob(
            "output/string_weighted_multi_categorical_features.values.values"
        )

        npt.assert_array_equal(np.ones(N, dtype=np.int32), feature_lengths)
        npt.assert_array_equal(np.zeros(N, dtype=np.int64), feature_keys)
        npt.assert_array_equal([len(actions)] * N, values_lengths)
        npt.assert_array_equal(np.array(actions * N, dtype=np.object), values_keys)
        npt.assert_array_equal(q_values.reshape(-1), values_values)
Exemple #4
0
    def get_modular_sarsa_trainer_exporter(self,
                                           environment,
                                           parameters=None,
                                           use_gpu=False,
                                           use_all_avail_gpus=False):
        parameters = parameters or self.get_sarsa_parameters()
        q_network = FullyConnectedParametricDQN(
            state_dim=get_num_output_features(environment.normalization),
            action_dim=get_num_output_features(
                environment.normalization_action),
            sizes=parameters.training.layers[1:-1],
            activations=parameters.training.activations[:-1],
        )
        reward_network = FullyConnectedParametricDQN(
            state_dim=get_num_output_features(environment.normalization),
            action_dim=get_num_output_features(
                environment.normalization_action),
            sizes=parameters.training.layers[1:-1],
            activations=parameters.training.activations[:-1],
        )
        if use_gpu:
            q_network = q_network.cuda()
            reward_network = reward_network.cuda()
            if use_all_avail_gpus:
                q_network = q_network.get_data_parallel_model()
                reward_network = reward_network.get_data_parallel_model()

        q_network_target = q_network.get_target_network()
        trainer = _ParametricDQNTrainer(q_network, q_network_target,
                                        reward_network, parameters)
        state_preprocessor = Preprocessor(environment.normalization, False,
                                          True)
        action_preprocessor = Preprocessor(environment.normalization_action,
                                           False, True)
        feature_extractor = PredictorFeatureExtractor(
            state_normalization_parameters=environment.normalization,
            action_normalization_parameters=environment.normalization_action,
        )
        output_transformer = ParametricActionOutputTransformer()
        exporter = ParametricDQNExporter(
            q_network,
            feature_extractor,
            output_transformer,
            state_preprocessor,
            action_preprocessor,
        )
        return (trainer, exporter)
Exemple #5
0
 def from_state_action_normalization(
     cls,
     dnn,
     state_normalization,
     action_normalization,
     state_preprocessor=None,
     action_preprocessor=None,
     **kwargs,
 ):
     return cls(
         dnn=dnn,
         feature_extractor=PredictorFeatureExtractor(
             state_normalization_parameters=state_normalization,
             action_normalization_parameters=action_normalization,
         ),
         output_transformer=ParametricActionOutputTransformer(),
         state_preprocessor=state_preprocessor,
         action_preprocessor=action_preprocessor,
         **kwargs,
     )
def single_process_main(gpu_index, *args):
    params = args[0]
    # Set minibatch size based on # of devices being used to train
    params["training"]["minibatch_size"] *= minibatch_size_multiplier(
        params["use_gpu"], params["use_all_avail_gpus"])

    rl_parameters = RLParameters(**params["rl"])
    training_parameters = TrainingParameters(**params["training"])
    rainbow_parameters = RainbowDQNParameters(**params["rainbow"])

    model_params = ContinuousActionModelParameters(
        rl=rl_parameters,
        training=training_parameters,
        rainbow=rainbow_parameters)
    state_normalization = BaseWorkflow.read_norm_file(
        params["state_norm_data_path"])
    action_normalization = BaseWorkflow.read_norm_file(
        params["action_norm_data_path"])

    writer = SummaryWriter(log_dir=params["model_output_path"])
    logger.info("TensorBoard logging location is: {}".format(writer.log_dir))

    if params["use_all_avail_gpus"]:
        BaseWorkflow.init_multiprocessing(
            int(params["num_processes_per_node"]),
            int(params["num_nodes"]),
            int(params["node_index"]),
            gpu_index,
            params["init_method"],
        )

    workflow = ParametricDqnWorkflow(
        model_params,
        state_normalization,
        action_normalization,
        params["use_gpu"],
        params["use_all_avail_gpus"],
    )

    state_sorted_features, _ = sort_features_by_normalization(
        state_normalization)
    action_sorted_features, _ = sort_features_by_normalization(
        action_normalization)
    preprocess_handler = ParametricDqnPreprocessHandler(
        PandasSparseToDenseProcessor(state_sorted_features),
        PandasSparseToDenseProcessor(action_sorted_features),
    )

    train_dataset = JSONDatasetReader(
        params["training_data_path"],
        batch_size=training_parameters.minibatch_size,
        preprocess_handler=preprocess_handler,
    )
    eval_dataset = JSONDatasetReader(params["eval_data_path"],
                                     batch_size=16,
                                     preprocess_handler=preprocess_handler)

    with summary_writer_context(writer):
        workflow.train_network(train_dataset, eval_dataset,
                               int(params["epochs"]))

    exporter = ParametricDQNExporter(
        workflow.trainer.q_network,
        PredictorFeatureExtractor(
            state_normalization_parameters=state_normalization,
            action_normalization_parameters=action_normalization,
        ),
        ParametricActionOutputTransformer(),
    )

    if int(params["node_index"]) == 0 and gpu_index == 0:
        export_trainer_and_predictor(workflow.trainer,
                                     params["model_output_path"],
                                     exporter=exporter)  # noqa