Esempio n. 1
0
    def test_create_net(self):
        q_value_blob = core.BlobReference("q_values")
        N = 10
        # NOTE: We add `b` prefix here to match the return type of FetchBlob
        actions = [b"yes", b"no"]
        q_values = np.random.randn(N, len(actions)).astype(np.float32)
        workspace.FeedBlob(q_value_blob, q_values)
        ot = DiscreteActionOutputTransformer(actions)
        output_record = schema.Struct(("q_values", schema.Scalar(blob=q_value_blob)))
        nets = ot.create_net(output_record)
        workspace.RunNetOnce(nets.init_net)
        workspace.RunNetOnce(nets.net)

        external_outputs = {str(b) for b in nets.net.external_outputs}

        def fetch_blob(b):
            self.assertIn(b, external_outputs)
            return workspace.FetchBlob(b)

        feature_lengths = fetch_blob(
            "output/string_weighted_multi_categorical_features.lengths"
        )
        feature_keys = fetch_blob(
            "output/string_weighted_multi_categorical_features.keys"
        )
        values_lengths = fetch_blob(
            "output/string_weighted_multi_categorical_features.values.lengths"
        )
        values_keys = fetch_blob(
            "output/string_weighted_multi_categorical_features.values.keys"
        )
        values_values = fetch_blob(
            "output/string_weighted_multi_categorical_features.values.values"
        )
        action_lengths = fetch_blob("output/string_single_categorical_features.lengths")
        action_keys = fetch_blob("output/string_single_categorical_features.keys")
        action_values = fetch_blob("output/string_single_categorical_features.values")

        npt.assert_array_equal(np.ones(N, dtype=np.int32), feature_lengths)
        npt.assert_array_equal(np.zeros(N, dtype=np.int64), feature_keys)
        npt.assert_array_equal([len(actions)] * N, values_lengths)
        npt.assert_array_equal(np.array(actions * N, dtype=np.object), values_keys)
        npt.assert_array_equal(q_values.reshape(-1), values_values)
        npt.assert_array_equal([len(actions)] * N, action_lengths)
        npt.assert_array_equal(list(range(len(actions))) * N, action_keys)

        # We can only assert max-Q policy
        max_q_actions = action_values.reshape(-1, len(actions))[::, 0]
        npt.assert_array_equal(
            [actions[i] for i in np.argmax(q_values, axis=1)], max_q_actions
        )
Esempio n. 2
0
 def get_modular_sarsa_trainer_exporter(
     self,
     environment,
     reward_shape,
     dueling,
     use_gpu=False,
     use_all_avail_gpus=False,
     clip_grad_norm=None,
 ):
     parameters = self.get_sarsa_parameters(
         environment, reward_shape, dueling, clip_grad_norm
     )
     trainer = self.get_modular_sarsa_trainer_reward_boost(
         environment,
         reward_shape,
         dueling,
         use_gpu=use_gpu,
         use_all_avail_gpus=use_all_avail_gpus,
         clip_grad_norm=clip_grad_norm,
     )
     feature_extractor = PredictorFeatureExtractor(
         state_normalization_parameters=environment.normalization
     )
     output_transformer = DiscreteActionOutputTransformer(parameters.actions)
     exporter = DQNExporter(trainer.q_network, feature_extractor, output_transformer)
     return (trainer, exporter)
Esempio n. 3
0
def main(params):
    # Set minibatch size based on # of devices being used to train
    params["training"]["minibatch_size"] *= minibatch_size_multiplier(
        params["use_gpu"], params["use_all_avail_gpus"])

    rl_parameters = RLParameters(**params["rl"])
    training_parameters = TrainingParameters(**params["training"])
    rainbow_parameters = RainbowDQNParameters(**params["rainbow"])

    model_params = DiscreteActionModelParameters(
        actions=params["actions"],
        rl=rl_parameters,
        training=training_parameters,
        rainbow=rainbow_parameters,
    )
    state_normalization = BaseWorkflow.read_norm_file(
        params["state_norm_data_path"])

    writer = SummaryWriter(log_dir=params["model_output_path"])
    logger.info("TensorBoard logging location is: {}".format(writer.log_dir))

    preprocess_handler = DqnPreprocessHandler(
        Preprocessor(state_normalization, False),
        np.array(model_params.actions),
        PandasSparseToDenseProcessor(),
    )

    workflow = DqnWorkflow(
        model_params,
        preprocess_handler,
        state_normalization,
        params["use_gpu"],
        params["use_all_avail_gpus"],
    )

    train_dataset = JSONDatasetReader(
        params["training_data_path"],
        batch_size=training_parameters.minibatch_size)
    eval_dataset = JSONDatasetReader(params["eval_data_path"], batch_size=16)

    with summary_writer_context(writer):
        workflow.train_network(train_dataset, eval_dataset,
                               int(params["epochs"]))

    exporter = DQNExporter(
        workflow.trainer.q_network,
        PredictorFeatureExtractor(
            state_normalization_parameters=state_normalization),
        DiscreteActionOutputTransformer(model_params.actions),
    )

    return export_trainer_and_predictor(workflow.trainer,
                                        params["model_output_path"],
                                        exporter=exporter)  # noqa
Esempio n. 4
0
def single_process_main(gpu_index, *args):
    params = args[0]
    # Set minibatch size based on # of devices being used to train
    params["training"]["minibatch_size"] *= minibatch_size_multiplier(
        params["use_gpu"], params["use_all_avail_gpus"]
    )

    action_names = params["actions"]

    rl_parameters = RLParameters(**params["rl"])
    training_parameters = TrainingParameters(**params["training"])
    rainbow_parameters = RainbowDQNParameters(**params["rainbow"])

    model_params = DiscreteActionModelParameters(
        actions=action_names,
        rl=rl_parameters,
        training=training_parameters,
        rainbow=rainbow_parameters,
    )
    state_normalization = BaseWorkflow.read_norm_file(params["state_norm_data_path"])

    writer = SummaryWriter(log_dir=params["model_output_path"])
    logger.info("TensorBoard logging location is: {}".format(writer.log_dir))

    if params["use_all_avail_gpus"]:
        BaseWorkflow.init_multiprocessing(
            int(params["num_processes_per_node"]),
            int(params["num_nodes"]),
            int(params["node_index"]),
            gpu_index,
            params["init_method"],
        )

    workflow = DqnWorkflow(
        model_params,
        state_normalization,
        params["use_gpu"],
        params["use_all_avail_gpus"],
    )

    sorted_features, _ = sort_features_by_normalization(state_normalization)
    preprocess_handler = DiscreteDqnPreprocessHandler(
        action_names, PandasSparseToDenseProcessor(sorted_features)
    )

    train_dataset = JSONDatasetReader(
        params["training_data_path"],
        batch_size=training_parameters.minibatch_size,
        preprocess_handler=preprocess_handler,
    )
    eval_dataset = JSONDatasetReader(
        params["eval_data_path"],
        batch_size=training_parameters.minibatch_size,
        preprocess_handler=preprocess_handler,
    )

    with summary_writer_context(writer):
        workflow.train_network(train_dataset, eval_dataset, int(params["epochs"]))

    exporter = DQNExporter(
        workflow.trainer.q_network,
        PredictorFeatureExtractor(state_normalization_parameters=state_normalization),
        DiscreteActionOutputTransformer(model_params.actions),
    )

    if int(params["node_index"]) == 0 and gpu_index == 0:
        export_trainer_and_predictor(
            workflow.trainer, params["model_output_path"], exporter=exporter
        )  # noqa