def test_parametric_wrapper(self):
        state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)}
        action_normalization_parameters = {i: _cont_norm() for i in range(5, 9)}
        state_preprocessor = Preprocessor(state_normalization_parameters, False)
        action_preprocessor = Preprocessor(action_normalization_parameters, False)
        dqn = FullyConnectedParametricDQN(
            state_dim=len(state_normalization_parameters),
            action_dim=len(action_normalization_parameters),
            sizes=[16],
            activations=["relu"],
        )
        dqn_with_preprocessor = ParametricDqnWithPreprocessor(
            dqn,
            state_preprocessor=state_preprocessor,
            action_preprocessor=action_preprocessor,
        )
        wrapper = ParametricDqnPredictorWrapper(dqn_with_preprocessor)

        input_prototype = dqn_with_preprocessor.input_prototype()
        output_action_names, q_value = wrapper(*input_prototype)
        self.assertEqual(output_action_names, ["Q"])
        self.assertEqual(q_value.shape, (1, 1))

        expected_output = dqn(
            rlt.PreprocessedStateAction.from_tensors(
                state=state_preprocessor(*input_prototype[0]),
                action=action_preprocessor(*input_prototype[1]),
            )
        ).q_value
        self.assertTrue((expected_output == q_value).all())
    def build_serving_module(
        self,
        actor: ModelBase,
        state_normalization_data: NormalizationData,
        action_normalization_data: NormalizationData,
    ) -> torch.nn.Module:
        """
        Returns a TorchScript predictor module
        """
        state_normalization_parameters = (
            state_normalization_data.dense_normalization_parameters)
        action_normalization_parameters = (
            action_normalization_data.dense_normalization_parameters)
        assert state_normalization_parameters is not None
        assert action_normalization_parameters is not None

        state_preprocessor = Preprocessor(state_normalization_parameters,
                                          use_gpu=False)
        postprocessor = Postprocessor(action_normalization_parameters,
                                      use_gpu=False)
        actor_with_preprocessor = ActorWithPreprocessor(
            actor.cpu_model().eval(), state_preprocessor, postprocessor)
        action_features = Preprocessor(action_normalization_parameters,
                                       use_gpu=False).sorted_features
        return ActorPredictorWrapper(actor_with_preprocessor, action_features)
    def test_preprocessing_network(self):
        feature_value_map = read_data()

        normalization_parameters = {}
        name_preprocessed_blob_map = {}

        for feature_name, feature_values in feature_value_map.items():
            normalization_parameters[feature_name] = normalization.identify_parameter(
                feature_values, feature_type=self._feature_type_override(feature_name)
            )
            feature_values[
                0
            ] = MISSING_VALUE  # Set one entry to MISSING_VALUE to test that

            preprocessor = Preprocessor(
                {feature_name: normalization_parameters[feature_name]}, False
            )
            preprocessor.clamp = False
            feature_values_matrix = np.expand_dims(feature_values, -1)
            normalized_feature_values = preprocessor.forward(feature_values_matrix)
            name_preprocessed_blob_map[feature_name] = normalized_feature_values.numpy()

        test_features = self.preprocess(feature_value_map, normalization_parameters)

        for feature_name in feature_value_map:
            normalized_features = name_preprocessed_blob_map[feature_name]
            if feature_name != ENUM_FEATURE_ID:
                normalized_features = np.squeeze(normalized_features, -1)

            tolerance = 0.01
            if feature_name == BOXCOX_FEATURE_ID:
                # At the limit, boxcox has some numerical instability
                tolerance = 0.5
            non_matching = np.where(
                np.logical_not(
                    np.isclose(
                        normalized_features.flatten(),
                        test_features[feature_name].flatten(),
                        rtol=tolerance,
                        atol=tolerance,
                    )
                )
            )
            self.assertTrue(
                np.all(
                    np.isclose(
                        normalized_features.flatten(),
                        test_features[feature_name].flatten(),
                        rtol=tolerance,
                        atol=tolerance,
                    )
                ),
                "{} does not match: {} \n!=\n {}".format(
                    feature_name,
                    normalized_features.flatten()[non_matching],
                    test_features[feature_name].flatten()[non_matching],
                ),
            )
Beispiel #4
0
def main(params):
    # Set minibatch size based on # of devices being used to train
    params["shared_training"]["minibatch_size"] *= minibatch_size_multiplier(
        params["use_gpu"], params["use_all_avail_gpus"])

    rl_parameters = RLParameters(**params["rl"])
    training_parameters = DDPGTrainingParameters(**params["shared_training"])
    actor_parameters = DDPGNetworkParameters(**params["actor_training"])
    critic_parameters = DDPGNetworkParameters(**params["critic_training"])

    model_params = DDPGModelParameters(
        rl=rl_parameters,
        shared_training=training_parameters,
        actor_training=actor_parameters,
        critic_training=critic_parameters,
    )

    state_normalization = BaseWorkflow.read_norm_file(
        params["state_norm_data_path"])
    action_normalization = BaseWorkflow.read_norm_file(
        params["action_norm_data_path"])

    writer = SummaryWriter(log_dir=params["model_output_path"])
    logger.info("TensorBoard logging location is: {}".format(writer.log_dir))

    preprocess_handler = ContinuousPreprocessHandler(
        Preprocessor(state_normalization, False),
        Preprocessor(action_normalization, False),
        PandasSparseToDenseProcessor(),
    )

    workflow = ContinuousWorkflow(
        model_params,
        preprocess_handler,
        state_normalization,
        action_normalization,
        params["use_gpu"],
        params["use_all_avail_gpus"],
    )

    train_dataset = JSONDatasetReader(
        params["training_data_path"],
        batch_size=training_parameters.minibatch_size)
    eval_dataset = JSONDatasetReader(params["eval_data_path"], batch_size=16)

    with summary_writer_context(writer):
        workflow.train_network(train_dataset, eval_dataset,
                               int(params["epochs"]))
    return export_trainer_and_predictor(
        workflow.trainer,
        params["model_output_path"],
        exporter=_get_actor_exporter(
            trainer=workflow.trainer,
            state_normalization=state_normalization,
            action_normalization=action_normalization,
        ),
    )  # noqa
Beispiel #5
0
def main(params):
    # Set minibatch size based on # of devices being used to train
    params["training"]["minibatch_size"] *= minibatch_size_multiplier(
        params["use_gpu"], params["use_all_avail_gpus"])

    rl_parameters = RLParameters(**params["rl"])
    training_parameters = TrainingParameters(**params["training"])
    rainbow_parameters = RainbowDQNParameters(**params["rainbow"])

    model_params = ContinuousActionModelParameters(
        rl=rl_parameters,
        training=training_parameters,
        rainbow=rainbow_parameters)
    state_normalization = BaseWorkflow.read_norm_file(
        params["state_norm_data_path"])
    action_normalization = BaseWorkflow.read_norm_file(
        params["action_norm_data_path"])

    writer = SummaryWriter(log_dir=params["model_output_path"])
    logger.info("TensorBoard logging location is: {}".format(writer.log_dir))

    preprocess_handler = ParametricDqnPreprocessHandler(
        Preprocessor(state_normalization, False),
        Preprocessor(action_normalization, False),
        PandasSparseToDenseProcessor(),
    )

    workflow = ParametricDqnWorkflow(
        model_params,
        preprocess_handler,
        state_normalization,
        action_normalization,
        params["use_gpu"],
        params["use_all_avail_gpus"],
    )

    train_dataset = JSONDatasetReader(
        params["training_data_path"],
        batch_size=training_parameters.minibatch_size)
    eval_dataset = JSONDatasetReader(params["eval_data_path"], batch_size=16)

    with summary_writer_context(writer):
        workflow.train_network(train_dataset, eval_dataset,
                               int(params["epochs"]))

    exporter = ParametricDQNExporter(
        workflow.trainer.q_network,
        PredictorFeatureExtractor(
            state_normalization_parameters=state_normalization,
            action_normalization_parameters=action_normalization,
        ),
        ParametricActionOutputTransformer(),
    )
    return export_trainer_and_predictor(workflow.trainer,
                                        params["model_output_path"],
                                        exporter=exporter)  # noqa
Beispiel #6
0
    def test_preprocessing_network_onnx(self):
        feature_value_map = read_data()

        for feature_name, feature_values in feature_value_map.items():
            normalization_parameters = normalization.identify_parameter(
                feature_name,
                feature_values,
                feature_type=self._feature_type_override(feature_name),
            )
            feature_values[
                0
            ] = MISSING_VALUE  # Set one entry to MISSING_VALUE to test that
            feature_values_matrix = np.expand_dims(feature_values, -1)

            preprocessor = Preprocessor({feature_name: normalization_parameters}, False)
            normalized_feature_values = preprocessor.forward(feature_values_matrix)

            input_blob, output_blob, netdef = PytorchCaffe2Converter.pytorch_net_to_caffe2_netdef(
                preprocessor, 1, False, float_input=True
            )

            preproc_workspace = netdef.workspace

            preproc_workspace.FeedBlob(input_blob, feature_values_matrix)

            preproc_workspace.RunNetOnce(core.Net(netdef.init_net))
            preproc_workspace.RunNetOnce(core.Net(netdef.predict_net))

            normalized_feature_values_onnx = netdef.workspace.FetchBlob(output_blob)
            tolerance = 0.0001
            non_matching = np.where(
                np.logical_not(
                    np.isclose(
                        normalized_feature_values,
                        normalized_feature_values_onnx,
                        rtol=tolerance,
                        atol=tolerance,
                    )
                )
            )
            self.assertTrue(
                np.all(
                    np.isclose(
                        normalized_feature_values,
                        normalized_feature_values_onnx,
                        rtol=tolerance,
                        atol=tolerance,
                    )
                ),
                "{} does not match: {} \n!=\n {}".format(
                    feature_name,
                    normalized_feature_values[non_matching].tolist()[0:10],
                    normalized_feature_values_onnx[non_matching].tolist()[0:10],
                ),
            )
    def test_preprocessing_network(self):
        features, feature_value_map = preprocessing_util.read_data()
        normalization_parameters = {}
        name_preprocessed_blob_map = {}

        for feature_name, feature_values in feature_value_map.items():
            normalization_parameters[feature_name] = normalization.identify_parameter(
                feature_values
            )

            preprocessor = Preprocessor(
                {feature_name: normalization_parameters[feature_name]}, False
            )
            preprocessor.clamp = False
            feature_values_matrix = np.expand_dims(feature_values, -1)
            normalized_feature_values = preprocessor.forward(feature_values_matrix)
            name_preprocessed_blob_map[feature_name] = normalized_feature_values.numpy()

        test_features = self.preprocess(feature_value_map, normalization_parameters)

        for feature_name in feature_value_map:
            normalized_features = name_preprocessed_blob_map[feature_name]
            if feature_name != identify_types.ENUM:
                normalized_features = np.squeeze(normalized_features, -1)

            tolerance = 0.01
            if feature_name == BOXCOX:
                # At the limit, boxcox has some numerical instability
                tolerance = 0.5
            non_matching = np.where(
                np.logical_not(
                    np.isclose(
                        normalized_features,
                        test_features[feature_name],
                        rtol=tolerance,
                        atol=tolerance,
                    )
                )
            )
            self.assertTrue(
                np.all(
                    np.isclose(
                        normalized_features,
                        test_features[feature_name],
                        rtol=tolerance,
                        atol=tolerance,
                    )
                ),
                "{} does not match: {} {}".format(
                    feature_name,
                    normalized_features[non_matching].tolist()[0:10],
                    test_features[feature_name][non_matching].tolist()[0:10],
                ),
            )
Beispiel #8
0
 def get_predictor(self, trainer, environment):
     state_preprocessor = Preprocessor(environment.normalization, False)
     action_preprocessor = Preprocessor(environment.normalization_action,
                                        False)
     q_network = self.current_predictor_network
     dqn_with_preprocessor = ParametricDqnWithPreprocessor(
         q_network.cpu_model().eval(), state_preprocessor,
         action_preprocessor)
     serving_module = ParametricDqnPredictorWrapper(
         dqn_with_preprocessor=dqn_with_preprocessor)
     predictor = ParametricDqnTorchPredictor(serving_module)
     return predictor
    def get_critic_exporter(self, trainer, environment):
        feature_extractor = PredictorFeatureExtractor(
            state_normalization_parameters=environment.normalization,
            action_normalization_parameters=environment.normalization_action,
        )
        output_transformer = ParametricActionOutputTransformer()

        return ParametricDQNExporter(
            trainer.q1_network,
            feature_extractor,
            output_transformer,
            Preprocessor(environment.normalization, False, True),
            Preprocessor(environment.normalization_action, False, True),
        )
Beispiel #10
0
    def test_discrete_wrapper(self):
        state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)}
        state_preprocessor = Preprocessor(state_normalization_parameters,
                                          False)
        action_dim = 2
        dqn = FullyConnectedDQN(
            state_dim=len(state_normalization_parameters),
            action_dim=action_dim,
            sizes=[16],
            activations=["relu"],
        )
        dqn_with_preprocessor = DiscreteDqnWithPreprocessor(
            dqn, state_preprocessor)
        action_names = ["L", "R"]
        wrapper = DiscreteDqnPredictorWrapper(dqn_with_preprocessor,
                                              action_names)
        input_prototype = dqn_with_preprocessor.input_prototype()
        output_action_names, q_values = wrapper(*input_prototype)
        self.assertEqual(action_names, output_action_names)
        self.assertEqual(q_values.shape, (1, 2))

        expected_output = dqn(
            rlt.PreprocessedState.from_tensor(
                state_preprocessor(*input_prototype[0]))).q_values
        self.assertTrue((expected_output == q_values).all())
Beispiel #11
0
    def __init__(
        self,
        model_params: DiscreteActionModelParameters,
        state_normalization: Dict[int, NormalizationParameters],
        use_gpu: bool,
        use_all_avail_gpus: bool,
    ):
        logger.info("Running DQN workflow with params:")
        logger.info(model_params)
        model_params = model_params

        trainer = create_dqn_trainer_from_params(
            model_params,
            state_normalization,
            use_gpu=use_gpu,
            use_all_avail_gpus=use_all_avail_gpus,
        )
        trainer = update_model_for_warm_start(trainer)
        assert type(trainer) == DQNTrainer, "Warm started wrong model type: " + str(
            type(trainer)
        )

        evaluator = Evaluator(
            model_params.actions,
            model_params.rl.gamma,
            trainer,
            metrics_to_score=trainer.metrics_to_score,
        )

        super().__init__(
            DiscreteDqnBatchPreprocessor(Preprocessor(state_normalization, use_gpu)),
            trainer,
            evaluator,
            model_params.training.minibatch_size,
        )
    def test_actor_wrapper(self):
        state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)}
        action_normalization_parameters = {
            i: _cont_action_norm() for i in range(101, 105)
        }
        state_preprocessor = Preprocessor(state_normalization_parameters, False)
        postprocessor = Postprocessor(action_normalization_parameters, False)

        # Test with FullyConnectedActor to make behavior deterministic
        actor = FullyConnectedActor(
            state_dim=len(state_normalization_parameters),
            action_dim=len(action_normalization_parameters),
            sizes=[16],
            activations=["relu"],
        )
        actor_with_preprocessor = ActorWithPreprocessor(
            actor, state_preprocessor, postprocessor
        )
        wrapper = ActorPredictorWrapper(actor_with_preprocessor)
        input_prototype = actor_with_preprocessor.input_prototype()
        action = wrapper(*input_prototype)
        self.assertEqual(action.shape, (1, len(action_normalization_parameters)))

        expected_output = postprocessor(
            actor(
                rlt.PreprocessedState.from_tensor(
                    state_preprocessor(*input_prototype[0])
                )
            ).action
        )
        self.assertTrue((expected_output == action).all())
Beispiel #13
0
    def get_modular_sarsa_trainer_exporter(self,
                                           environment,
                                           parameters=None,
                                           use_gpu=False,
                                           use_all_avail_gpus=False):
        parameters = parameters or self.get_sarsa_parameters()
        q_network = FullyConnectedParametricDQN(
            state_dim=get_num_output_features(environment.normalization),
            action_dim=get_num_output_features(
                environment.normalization_action),
            sizes=parameters.training.layers[1:-1],
            activations=parameters.training.activations[:-1],
        )
        reward_network = FullyConnectedParametricDQN(
            state_dim=get_num_output_features(environment.normalization),
            action_dim=get_num_output_features(
                environment.normalization_action),
            sizes=parameters.training.layers[1:-1],
            activations=parameters.training.activations[:-1],
        )
        if use_gpu:
            q_network = q_network.cuda()
            reward_network = reward_network.cuda()
            if use_all_avail_gpus:
                q_network = q_network.get_data_parallel_model()
                reward_network = reward_network.get_data_parallel_model()

        q_network_target = q_network.get_target_network()
        trainer = _ParametricDQNTrainer(q_network, q_network_target,
                                        reward_network, parameters)
        state_preprocessor = Preprocessor(environment.normalization, False,
                                          True)
        action_preprocessor = Preprocessor(environment.normalization_action,
                                           False, True)
        feature_extractor = PredictorFeatureExtractor(
            state_normalization_parameters=environment.normalization,
            action_normalization_parameters=environment.normalization_action,
        )
        output_transformer = ParametricActionOutputTransformer()
        exporter = ParametricDQNExporter(
            q_network,
            feature_extractor,
            output_transformer,
            state_preprocessor,
            action_preprocessor,
        )
        return (trainer, exporter)
Beispiel #14
0
    def __init__(
        self,
        state_preprocessor: Preprocessor,
        value_network: Optional[nn.Module],
        action_names: List[str],
    ) -> None:
        super().__init__()

        self.state_sorted_features_t = state_preprocessor.sorted_features

        self.state_preprocessor = torch.jit.trace(
            state_preprocessor, (state_preprocessor.input_prototype()))

        value_network_sample_input = self.state_preprocessor(
            *state_preprocessor.input_prototype())
        self.value_network = torch.jit.trace(value_network,
                                             value_network_sample_input)
        self.action_names = torch.jit.Attribute(action_names, List[str])
Beispiel #15
0
 def build_serving_module(
     self,
     q_network: ModelBase,
     state_normalization_parameters: Dict[int, NormalizationParameters],
     action_normalization_parameters: Dict[int, NormalizationParameters],
 ) -> torch.nn.Module:
     """
     Returns a TorchScript predictor module
     """
     state_preprocessor = Preprocessor(state_normalization_parameters,
                                       False)
     action_preprocessor = Preprocessor(action_normalization_parameters,
                                        False)
     dqn_with_preprocessor = ParametricDqnWithPreprocessor(
         q_network.cpu_model().eval(), state_preprocessor,
         action_preprocessor)
     return ParametricDqnPredictorWrapper(
         dqn_with_preprocessor=dqn_with_preprocessor)
    def save_models(self, path: str):
        export_time = round(time.time())
        output_path = os.path.expanduser(path)
        pytorch_output_path = os.path.join(output_path,
                                           "trainer_{}.pt".format(export_time))
        torchscript_output_path = os.path.join(
            path, "model_{}.torchscript".format(export_time))

        state_preprocessor = Preprocessor(self.state_normalization, False)
        action_preprocessor = Preprocessor(self.action_normalization, False)
        q_network = self.trainer.q_network
        dqn_with_preprocessor = ParametricDqnWithPreprocessor(
            q_network.cpu_model().eval(), state_preprocessor,
            action_preprocessor)
        serving_module = ParametricDqnPredictorWrapper(
            dqn_with_preprocessor=dqn_with_preprocessor)
        logger.info("Saving PyTorch trainer to {}".format(pytorch_output_path))
        save_model_to_file(self.trainer, pytorch_output_path)
        self.save_torchscript_model(serving_module, torchscript_output_path)
    def test_normalize_dense_matrix_enum(self):
        normalization_parameters = {
            1:
            NormalizationParameters(
                identify_types.ENUM,
                None,
                None,
                None,
                None,
                [12, 4, 2],
                None,
                None,
                None,
            ),
            2:
            NormalizationParameters(identify_types.CONTINUOUS, None, 0, 0, 1,
                                    None, None, None, None),
            3:
            NormalizationParameters(identify_types.ENUM, None, None, None,
                                    None, [15, 3], None, None, None),
        }
        preprocessor = Preprocessor(normalization_parameters, False)
        preprocessor.clamp = False

        inputs = np.zeros([4, 3], dtype=np.float32)
        feature_ids = [2, 1, 3]  # Sorted according to feature type
        inputs[:, feature_ids.index(1)] = [12, 4, 2, 2]
        inputs[:, feature_ids.index(2)] = [1.0, 2.0, 3.0, 3.0]
        inputs[:, feature_ids.index(3)] = [
            15, 3, 15, normalization.MISSING_VALUE
        ]
        normalized_feature_matrix = preprocessor.forward(inputs)

        np.testing.assert_allclose(
            np.array([
                [1.0, 1, 0, 0, 1, 0],
                [2.0, 0, 1, 0, 0, 1],
                [3.0, 0, 0, 1, 1, 0],
                [3.0, 0, 0, 1, 0, 0],  # Missing values should go to all 0
            ]),
            normalized_feature_matrix,
        )
Beispiel #18
0
    def test_quantile_boundary_logic(self):
        """Test quantile logic when feaure value == quantile boundary."""
        input = torch.tensor([[0.0], [80.0], [100.0]])
        norm_params = NormalizationParameters(
            feature_type="QUANTILE",
            boxcox_lambda=None,
            boxcox_shift=None,
            mean=0,
            stddev=1,
            possible_values=None,
            quantiles=[0.0, 80.0, 100.0],
            min_value=0.0,
            max_value=100.0,
        )
        preprocessor = Preprocessor({1: norm_params}, False)
        output = preprocessor._preprocess_QUANTILE(0, input.float(), [norm_params])

        expected_output = torch.tensor([[0.0], [0.5], [1.0]])

        self.assertTrue(np.all(np.isclose(output, expected_output)))
 def get_predictor(self, trainer, environment):
     state_preprocessor = Preprocessor(environment.normalization, False)
     q_network = trainer.q_network
     dqn_with_preprocessor = DiscreteDqnWithPreprocessor(
         q_network.cpu_model().eval(), state_preprocessor)
     serving_module = DiscreteDqnPredictorWrapper(
         dqn_with_preprocessor=dqn_with_preprocessor,
         action_names=environment.ACTIONS,
     )
     predictor = DiscreteDqnTorchPredictor(serving_module)
     return predictor
Beispiel #20
0
    def test_quantile_boundary_logic(self):
        """Test quantile logic when feaure value == quantile boundary."""
        input = torch.tensor([[0.0], [80.0], [100.0]])
        norm_params = NormalizationParameters(
            feature_type="QUANTILE",
            boxcox_lambda=None,
            boxcox_shift=None,
            mean=0,
            stddev=1,
            possible_values=None,
            quantiles=[0.0, 80.0, 100.0],
            min_value=0.0,
            max_value=100.0,
        )
        preprocessor = Preprocessor({1: norm_params}, False)
        output = preprocessor._preprocess_QUANTILE(0, input.float(), [norm_params])

        expected_output = torch.tensor([[0.0], [0.5], [1.0]])

        self.assertTrue(np.all(np.isclose(output, expected_output)))
Beispiel #21
0
    def test_do_not_preprocess(self):
        normalization_parameters = {
            i: NormalizationParameters(feature_type=DO_NOT_PREPROCESS)
            for i in range(1, 5)
        }
        preprocessor = Preprocessor(normalization_parameters, use_gpu=False)
        postprocessor = Postprocessor(normalization_parameters, use_gpu=False)

        x = torch.randn(3, 4)
        presence = torch.ones_like(x, dtype=torch.uint8)
        y = postprocessor(preprocessor(x, presence))
        npt.assert_allclose(x, y)
Beispiel #22
0
    def test_normalize_dense_matrix_enum(self):
        normalization_parameters = {
            1: NormalizationParameters(
                identify_types.ENUM,
                None,
                None,
                None,
                None,
                [12, 4, 2],
                None,
                None,
                None,
            ),
            2: NormalizationParameters(
                identify_types.CONTINUOUS, None, 0, 0, 1, None, None, None, None
            ),
            3: NormalizationParameters(
                identify_types.ENUM, None, None, None, None, [15, 3], None, None, None
            ),
        }
        preprocessor = Preprocessor(normalization_parameters, False)

        inputs = np.zeros([4, 3], dtype=np.float32)
        feature_ids = [2, 1, 3]  # Sorted according to feature type
        inputs[:, feature_ids.index(1)] = [12, 4, 2, 2]
        inputs[:, feature_ids.index(2)] = [1.0, 2.0, 3.0, 3.0]
        inputs[:, feature_ids.index(3)] = [15, 3, 15, normalization.MISSING_VALUE]
        normalized_feature_matrix = preprocessor.forward(inputs)

        np.testing.assert_allclose(
            np.array(
                [
                    [1.0, 1, 0, 0, 1, 0],
                    [2.0, 0, 1, 0, 0, 1],
                    [3.0, 0, 0, 1, 1, 0],
                    [3.0, 0, 0, 1, 0, 0],  # Missing values should go to all 0
                ]
            ),
            normalized_feature_matrix,
        )
Beispiel #23
0
    def test_predictor_torch_export(self):
        """Verify that q-values before model export equal q-values after
        model export. Meant to catch issues with export logic."""
        environment = Gridworld()
        samples = Samples(
            mdp_ids=["0"],
            sequence_numbers=[0],
            sequence_number_ordinals=[1],
            states=[{0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0, 5: 1.0, 15: 1.0, 24: 1.0}],
            actions=["D"],
            action_probabilities=[0.5],
            rewards=[0],
            possible_actions=[["R", "D"]],
            next_states=[{5: 1.0}],
            next_actions=["U"],
            terminals=[False],
            possible_next_actions=[["R", "U", "D"]],
        )
        tdps = environment.preprocess_samples(samples, 1)
        assert len(tdps) == 1, "Invalid number of data pages"

        trainer, exporter = self.get_modular_sarsa_trainer_exporter(
            environment, {}, False
        )
        input = rlt.PreprocessedState.from_tensor(tdps[0].states)

        pre_export_q_values = trainer.q_network(input).q_values.detach().numpy()

        preprocessor = Preprocessor(environment.normalization, False)
        cpu_q_network = trainer.q_network.cpu_model()
        cpu_q_network.eval()
        dqn_with_preprocessor = DiscreteDqnWithPreprocessor(cpu_q_network, preprocessor)
        serving_module = DiscreteDqnPredictorWrapper(
            dqn_with_preprocessor, action_names=environment.ACTIONS
        )

        with tempfile.TemporaryDirectory() as tmpdirname:
            buf = export_module_to_buffer(serving_module)
            tmp_path = os.path.join(tmpdirname, "model")
            with open(tmp_path, "wb") as f:
                f.write(buf.getvalue())
                f.close()
                predictor = DiscreteDqnTorchPredictor(torch.jit.load(tmp_path))

        post_export_q_values = predictor.predict([samples.states[0]])

        for i, action in enumerate(environment.ACTIONS):
            self.assertAlmostEqual(
                float(pre_export_q_values[0][i]),
                float(post_export_q_values[0][action]),
                places=4,
            )
Beispiel #24
0
    def __init__(
        self,
        state_preprocessor: Preprocessor,
        action_preprocessor: Preprocessor,
        value_network: Optional[nn.Module],
    ) -> None:
        super().__init__()

        self.state_sorted_features_t = state_preprocessor.sorted_features
        self.state_preprocessor = torch.jit.trace(
            state_preprocessor, (state_preprocessor.input_prototype()))

        self.action_sorted_features_t = action_preprocessor.sorted_features
        self.action_preprocessor = torch.jit.trace(
            action_preprocessor, (action_preprocessor.input_prototype()))

        value_network_sample_input = (
            self.state_preprocessor(*state_preprocessor.input_prototype()),
            self.action_preprocessor(*action_preprocessor.input_prototype()),
        )
        self.value_network = torch.jit.trace(value_network,
                                             value_network_sample_input)
Beispiel #25
0
 def get_actor_predictor(self, trainer, environment):
     state_preprocessor = Preprocessor(environment.normalization, False)
     postprocessor = Postprocessor(
         environment.normalization_continuous_action, False)
     actor_with_preprocessor = ActorWithPreprocessor(
         trainer.actor_network.cpu_model().eval(), state_preprocessor,
         postprocessor)
     serving_module = ActorPredictorWrapper(actor_with_preprocessor)
     predictor = ActorTorchPredictor(
         serving_module,
         sort_features_by_normalization(
             environment.normalization_continuous_action)[0],
     )
     return predictor
    def test_discrete_wrapper_with_id_list(self):
        state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)}
        state_preprocessor = Preprocessor(state_normalization_parameters,
                                          False)
        action_dim = 2
        state_feature_config = rlt.ModelFeatureConfig(
            float_feature_infos=[
                rlt.FloatFeatureInfo(name=str(i), feature_id=i)
                for i in range(1, 5)
            ],
            id_list_feature_configs=[
                rlt.IdListFeatureConfig(name="A",
                                        feature_id=10,
                                        id_mapping_name="A_mapping")
            ],
            id_mapping_config={"A_mapping": rlt.IdMapping(ids=[0, 1, 2])},
        )
        dqn = FullyConnectedDQNWithEmbedding(
            state_dim=len(state_normalization_parameters),
            action_dim=action_dim,
            sizes=[16],
            activations=["relu"],
            model_feature_config=state_feature_config,
            embedding_dim=8,
        )
        dqn_with_preprocessor = DiscreteDqnWithPreprocessorWithIdList(
            dqn, state_preprocessor, state_feature_config)
        action_names = ["L", "R"]
        wrapper = DiscreteDqnPredictorWrapperWithIdList(
            dqn_with_preprocessor, action_names, state_feature_config)
        input_prototype = dqn_with_preprocessor.input_prototype()
        output_action_names, q_values = wrapper(*input_prototype)
        self.assertEqual(action_names, output_action_names)
        self.assertEqual(q_values.shape, (1, 2))

        feature_id_to_name = {
            config.feature_id: config.name
            for config in state_feature_config.id_list_feature_configs
        }
        state_id_list_features = {
            feature_id_to_name[k]: v
            for k, v in input_prototype[1].items()
        }
        expected_output = dqn(
            rlt.PreprocessedState(state=rlt.PreprocessedFeatureVector(
                float_features=state_preprocessor(*input_prototype[0]),
                id_list_features=state_id_list_features,
            ))).q_values
        self.assertTrue((expected_output == q_values).all())
Beispiel #27
0
    def test_continuous_action(self):
        normalization_parameters = {
            i: NormalizationParameters(feature_type=CONTINUOUS_ACTION,
                                       min_value=-5.0 * i,
                                       max_value=10.0 * i)
            for i in range(1, 5)
        }
        preprocessor = Preprocessor(normalization_parameters, use_gpu=False)
        postprocessor = Postprocessor(normalization_parameters, use_gpu=False)

        x = torch.rand(3, 4) * torch.tensor([15, 30, 45, 60]) + torch.tensor(
            [-5, -10, -15, -20])
        presence = torch.ones_like(x, dtype=torch.uint8)
        y = postprocessor(preprocessor(x, presence))
        npt.assert_allclose(x, y, rtol=1e-5)
 def build_serving_module(
     self,
     q_network: ModelBase,
     state_normalization_parameters: Dict[int, NormalizationParameters],
     action_names: List[str],
     state_feature_config: rlt.ModelFeatureConfig,
 ) -> torch.nn.Module:
     """
     Returns a TorchScript predictor module
     """
     state_preprocessor = Preprocessor(state_normalization_parameters,
                                       False)
     dqn_with_preprocessor = DiscreteDqnWithPreprocessor(
         q_network.cpu_model().eval(), state_preprocessor)
     return DiscreteDqnPredictorWrapper(dqn_with_preprocessor, action_names,
                                        state_feature_config)
    def get_actor_predictor(self, trainer, environment):
        feature_extractor = PredictorFeatureExtractor(
            state_normalization_parameters=environment.normalization)

        output_transformer = ActorOutputTransformer(
            sort_features_by_normalization(
                environment.normalization_action)[0],
            environment.max_action_range.reshape(-1),
            environment.min_action_range.reshape(-1),
        )

        predictor = ActorExporter(
            trainer.actor_network,
            feature_extractor,
            output_transformer,
            Preprocessor(environment.normalization, False, True),
        ).export()
        return predictor
    def preprocess_samples(
        self,
        samples: Samples,
        minibatch_size: int,
        use_gpu: bool = False,
        one_hot_action: bool = True,
        normalize_actions: bool = True,
    ) -> List[TrainingDataPage]:
        logger.info("Shuffling...")
        samples.shuffle()

        logger.info("Sparse2Dense...")
        net = core.Net("gridworld_preprocessing")
        C2.set_net(net)
        saa = StackedAssociativeArray.from_dict_list(samples.states, "states")
        sorted_state_features, _ = sort_features_by_normalization(self.normalization)
        state_matrix, _ = sparse_to_dense(
            saa.lengths, saa.keys, saa.values, sorted_state_features
        )
        saa = StackedAssociativeArray.from_dict_list(samples.next_states, "next_states")
        next_state_matrix, _ = sparse_to_dense(
            saa.lengths, saa.keys, saa.values, sorted_state_features
        )
        sorted_action_features, _ = sort_features_by_normalization(
            self.normalization_action
        )
        saa = StackedAssociativeArray.from_dict_list(samples.actions, "action")
        action_matrix, _ = sparse_to_dense(
            saa.lengths, saa.keys, saa.values, sorted_action_features
        )
        saa = StackedAssociativeArray.from_dict_list(
            samples.next_actions, "next_action"
        )
        next_action_matrix, _ = sparse_to_dense(
            saa.lengths, saa.keys, saa.values, sorted_action_features
        )
        action_probabilities = torch.tensor(
            samples.action_probabilities, dtype=torch.float32
        ).reshape(-1, 1)
        rewards = torch.tensor(samples.rewards, dtype=torch.float32).reshape(-1, 1)

        pnas_lengths_list = []
        pnas_flat: List[List[str]] = []
        for pnas in samples.possible_next_actions:
            pnas_lengths_list.append(len(pnas))
            pnas_flat.extend(pnas)
        saa = StackedAssociativeArray.from_dict_list(pnas_flat, "possible_next_actions")

        pnas_lengths = torch.tensor(pnas_lengths_list, dtype=torch.int32)
        pna_lens_blob = "pna_lens_blob"
        workspace.FeedBlob(pna_lens_blob, pnas_lengths.numpy())

        possible_next_actions_matrix, _ = sparse_to_dense(
            saa.lengths, saa.keys, saa.values, sorted_action_features
        )

        state_pnas_tile_blob = C2.LengthsTile(next_state_matrix, pna_lens_blob)

        workspace.RunNetOnce(net)

        logger.info("Preprocessing...")
        state_preprocessor = Preprocessor(self.normalization, False)
        action_preprocessor = Preprocessor(self.normalization_action, False)

        states_ndarray = workspace.FetchBlob(state_matrix)
        states_ndarray = state_preprocessor.forward(states_ndarray)

        actions_ndarray = torch.from_numpy(workspace.FetchBlob(action_matrix))
        if normalize_actions:
            actions_ndarray = action_preprocessor.forward(actions_ndarray)

        next_states_ndarray = workspace.FetchBlob(next_state_matrix)
        next_states_ndarray = state_preprocessor.forward(next_states_ndarray)

        next_actions_ndarray = torch.from_numpy(workspace.FetchBlob(next_action_matrix))
        if normalize_actions:
            next_actions_ndarray = action_preprocessor.forward(next_actions_ndarray)

        logged_possible_next_actions = action_preprocessor.forward(
            workspace.FetchBlob(possible_next_actions_matrix)
        )

        state_pnas_tile = state_preprocessor.forward(
            workspace.FetchBlob(state_pnas_tile_blob)
        )
        logged_possible_next_state_actions = torch.cat(
            (state_pnas_tile, logged_possible_next_actions), dim=1
        )

        logger.info("Reward Timeline to Torch...")
        possible_next_actions_ndarray = logged_possible_next_actions
        possible_next_actions_state_concat = logged_possible_next_state_actions
        time_diffs = torch.ones([len(samples.states), 1])

        tdps = []
        pnas_start = 0
        logger.info("Batching...")
        for start in range(0, states_ndarray.shape[0], minibatch_size):
            end = start + minibatch_size
            if end > states_ndarray.shape[0]:
                break
            pnas_end = pnas_start + torch.sum(pnas_lengths[start:end])
            pnas = possible_next_actions_ndarray[pnas_start:pnas_end]
            pnas_concat = possible_next_actions_state_concat[pnas_start:pnas_end]
            pnas_start = pnas_end
            tdp = TrainingDataPage(
                states=states_ndarray[start:end],
                actions=actions_ndarray[start:end],
                propensities=action_probabilities[start:end],
                rewards=rewards[start:end],
                next_states=next_states_ndarray[start:end],
                next_actions=next_actions_ndarray[start:end],
                possible_next_actions=None,
                not_terminals=(pnas_lengths[start:end] > 0).reshape(-1, 1),
                time_diffs=time_diffs[start:end],
                possible_next_actions_lengths=pnas_lengths[start:end],
                possible_next_actions_state_concat=pnas_concat,
            )
            tdp.set_type(torch.cuda.FloatTensor if use_gpu else torch.FloatTensor)
            tdps.append(tdp)
        return tdps
Beispiel #31
0
    def test_prepare_normalization_and_normalize(self):
        feature_value_map = read_data()

        normalization_parameters = {}
        for name, values in feature_value_map.items():
            normalization_parameters[name] = normalization.identify_parameter(
                name, values, 10, feature_type=self._feature_type_override(name)
            )
        for k, v in normalization_parameters.items():
            if id_to_type(k) == CONTINUOUS:
                self.assertEqual(v.feature_type, CONTINUOUS)
                self.assertIs(v.boxcox_lambda, None)
                self.assertIs(v.boxcox_shift, None)
            elif id_to_type(k) == BOXCOX:
                self.assertEqual(v.feature_type, BOXCOX)
                self.assertIsNot(v.boxcox_lambda, None)
                self.assertIsNot(v.boxcox_shift, None)
            else:
                assert v.feature_type == id_to_type(k)

        preprocessor = Preprocessor(normalization_parameters, False)
        sorted_features, _ = sort_features_by_normalization(normalization_parameters)
        input_matrix = np.zeros([10000, len(sorted_features)], dtype=np.float32)
        for i, feature in enumerate(sorted_features):
            input_matrix[:, i] = feature_value_map[feature]
        normalized_feature_matrix = preprocessor.forward(input_matrix)

        normalized_features = {}
        on_column = 0
        for feature in sorted_features:
            norm = normalization_parameters[feature]
            if norm.feature_type == ENUM:
                column_size = len(norm.possible_values)
            else:
                column_size = 1
            normalized_features[feature] = normalized_feature_matrix[
                :, on_column : (on_column + column_size)
            ]
            on_column += column_size

        self.assertTrue(
            all(
                [
                    np.isfinite(parameter.stddev) and np.isfinite(parameter.mean)
                    for parameter in normalization_parameters.values()
                ]
            )
        )
        for k, v in six.iteritems(normalized_features):
            v = v.numpy()
            self.assertTrue(np.all(np.isfinite(v)))
            feature_type = normalization_parameters[k].feature_type
            if feature_type == identify_types.PROBABILITY:
                sigmoidv = special.expit(v)
                self.assertTrue(
                    np.all(
                        np.logical_and(np.greater(sigmoidv, 0), np.less(sigmoidv, 1))
                    )
                )
            elif feature_type == identify_types.ENUM:
                possible_values = normalization_parameters[k].possible_values
                self.assertEqual(v.shape[0], len(feature_value_map[k]))
                self.assertEqual(v.shape[1], len(possible_values))

                possible_value_map = {}
                for i, possible_value in enumerate(possible_values):
                    possible_value_map[possible_value] = i

                for i, row in enumerate(v):
                    original_feature = feature_value_map[k][i]
                    if abs(original_feature - MISSING_VALUE) < 0.01:
                        self.assertEqual(0.0, np.sum(row))
                    else:
                        self.assertEqual(
                            possible_value_map[original_feature],
                            np.where(row == 1)[0][0],
                        )
            elif feature_type == identify_types.QUANTILE:
                for i, feature in enumerate(v[0]):
                    original_feature = feature_value_map[k][i]
                    expected = NumpyFeatureProcessor.value_to_quantile(
                        original_feature, normalization_parameters[k].quantiles
                    )
                    self.assertAlmostEqual(feature, expected, 2)
            elif feature_type == identify_types.BINARY:
                pass
            elif (
                feature_type == identify_types.CONTINUOUS
                or feature_type == identify_types.BOXCOX
            ):
                one_stddev = np.isclose(np.std(v, ddof=1), 1, atol=0.01)
                zero_stddev = np.isclose(np.std(v, ddof=1), 0, atol=0.01)
                zero_mean = np.isclose(np.mean(v), 0, atol=0.01)
                self.assertTrue(
                    np.all(zero_mean),
                    "mean of feature {} is {}, not 0".format(k, np.mean(v)),
                )
                self.assertTrue(np.all(np.logical_or(one_stddev, zero_stddev)))
            elif feature_type == identify_types.CONTINUOUS_ACTION:
                less_than_max = v < 1
                more_than_min = v > -1
                self.assertTrue(
                    np.all(less_than_max),
                    "values are not less than 1: {}".format(v[less_than_max == False]),
                )
                self.assertTrue(
                    np.all(more_than_min),
                    "values are not more than -1: {}".format(v[more_than_min == False]),
                )
            else:
                raise NotImplementedError()
def benchmark(num_forward_passes):
    """
    Benchmark preprocessor speeds:
        1 - PyTorch
        2 - PyTorch -> ONNX -> C2
        3 - C2
    """

    feature_value_map = gen_data(
        num_binary_features=10,
        num_boxcox_features=10,
        num_continuous_features=10,
        num_enum_features=10,
        num_prob_features=10,
        num_quantile_features=10,
    )

    normalization_parameters = {}
    for name, values in feature_value_map.items():
        normalization_parameters[name] = normalization.identify_parameter(
            name, values, 10
        )

    sorted_features, _ = sort_features_by_normalization(normalization_parameters)

    # Dummy input
    input_matrix = np.zeros([10000, len(sorted_features)], dtype=np.float32)

    # PyTorch Preprocessor
    pytorch_preprocessor = Preprocessor(normalization_parameters, False)
    for i, feature in enumerate(sorted_features):
        input_matrix[:, i] = feature_value_map[feature]

    #################### time pytorch ############################
    start = time.time()
    for _ in range(NUM_FORWARD_PASSES):
        _ = pytorch_preprocessor.forward(input_matrix)
    end = time.time()
    logger.info(
        "PyTorch: {} forward passes done in {} seconds".format(
            NUM_FORWARD_PASSES, end - start
        )
    )

    ################ time pytorch -> ONNX -> caffe2 ####################
    buffer = PytorchCaffe2Converter.pytorch_net_to_buffer(
        pytorch_preprocessor, len(sorted_features), False
    )
    input_blob, output_blob, caffe2_netdef = PytorchCaffe2Converter.buffer_to_caffe2_netdef(
        buffer
    )
    torch_workspace = caffe2_netdef.workspace
    parameters = torch_workspace.Blobs()
    for blob_str in parameters:
        workspace.FeedBlob(blob_str, torch_workspace.FetchBlob(blob_str))
    torch_init_net = core.Net(caffe2_netdef.init_net)
    torch_predict_net = core.Net(caffe2_netdef.predict_net)
    input_matrix_blob = "input_matrix_blob"
    workspace.FeedBlob(input_blob, input_matrix)
    workspace.RunNetOnce(torch_init_net)
    start = time.time()
    for _ in range(NUM_FORWARD_PASSES):
        workspace.RunNetOnce(torch_predict_net)
        _ = workspace.FetchBlob(output_blob)
    end = time.time()
    logger.info(
        "PyTorch -> ONNX -> Caffe2: {} forward passes done in {} seconds".format(
            NUM_FORWARD_PASSES, end - start
        )
    )

    #################### time caffe2 ############################
    norm_net = core.Net("net")
    C2.set_net(norm_net)
    preprocessor = PreprocessorNet()
    input_matrix_blob = "input_matrix_blob"
    workspace.FeedBlob(input_matrix_blob, np.array([], dtype=np.float32))
    output_blob, _ = preprocessor.normalize_dense_matrix(
        input_matrix_blob, sorted_features, normalization_parameters, "", False
    )
    workspace.FeedBlob(input_matrix_blob, input_matrix)
    start = time.time()
    for _ in range(NUM_FORWARD_PASSES):
        workspace.RunNetOnce(norm_net)
        _ = workspace.FetchBlob(output_blob)
    end = time.time()
    logger.info(
        "Caffe2: {} forward passes done in {} seconds".format(
            NUM_FORWARD_PASSES, end - start
        )
    )
Beispiel #33
0
    def preprocess_samples_discrete(
        self,
        samples: Samples,
        minibatch_size: int,
        one_hot_action: bool = True,
        use_gpu: bool = False,
    ) -> List[TrainingDataPage]:
        logger.info("Shuffling...")
        samples = shuffle_samples(samples)
        logger.info("Preprocessing...")

        if self.sparse_to_dense_net is None:
            self.sparse_to_dense_net = core.Net("gridworld_sparse_to_dense")
            C2.set_net(self.sparse_to_dense_net)
            saa = StackedAssociativeArray.from_dict_list(samples.states, "states")
            sorted_features, _ = sort_features_by_normalization(self.normalization)
            self.state_matrix, _ = sparse_to_dense(
                saa.lengths, saa.keys, saa.values, sorted_features
            )
            saa = StackedAssociativeArray.from_dict_list(
                samples.next_states, "next_states"
            )
            self.next_state_matrix, _ = sparse_to_dense(
                saa.lengths, saa.keys, saa.values, sorted_features
            )
            C2.set_net(None)
        else:
            StackedAssociativeArray.from_dict_list(samples.states, "states")
            StackedAssociativeArray.from_dict_list(samples.next_states, "next_states")
        workspace.RunNetOnce(self.sparse_to_dense_net)

        logger.info("Converting to Torch...")
        actions_one_hot = torch.tensor(
            (np.array(samples.actions).reshape(-1, 1) == np.array(self.ACTIONS)).astype(
                np.int64
            )
        )
        actions = actions_one_hot.argmax(dim=1, keepdim=True)
        rewards = torch.tensor(samples.rewards, dtype=torch.float32).reshape(-1, 1)
        action_probabilities = torch.tensor(
            samples.action_probabilities, dtype=torch.float32
        ).reshape(-1, 1)
        next_actions_one_hot = torch.tensor(
            (
                np.array(samples.next_actions).reshape(-1, 1) == np.array(self.ACTIONS)
            ).astype(np.int64)
        )
        logger.info("Converting PA to Torch...")
        possible_action_strings = np.array(
            list(itertools.zip_longest(*samples.possible_actions, fillvalue=""))
        ).T
        possible_actions_mask = torch.zeros([len(samples.actions), len(self.ACTIONS)])
        for i, action in enumerate(self.ACTIONS):
            possible_actions_mask[:, i] = torch.tensor(
                np.max(possible_action_strings == action, axis=1).astype(np.int64)
            )
        logger.info("Converting PNA to Torch...")
        possible_next_action_strings = np.array(
            list(itertools.zip_longest(*samples.possible_next_actions, fillvalue=""))
        ).T
        possible_next_actions_mask = torch.zeros(
            [len(samples.next_actions), len(self.ACTIONS)]
        )
        for i, action in enumerate(self.ACTIONS):
            possible_next_actions_mask[:, i] = torch.tensor(
                np.max(possible_next_action_strings == action, axis=1).astype(np.int64)
            )
        terminals = torch.tensor(samples.terminals, dtype=torch.int32).reshape(-1, 1)
        not_terminal = 1 - terminals
        logger.info("Converting RT to Torch...")

        time_diffs = torch.ones([len(samples.states), 1])

        logger.info("Preprocessing...")
        preprocessor = Preprocessor(self.normalization, False)

        states_ndarray = workspace.FetchBlob(self.state_matrix)
        states_ndarray = preprocessor.forward(states_ndarray)

        next_states_ndarray = workspace.FetchBlob(self.next_state_matrix)
        next_states_ndarray = preprocessor.forward(next_states_ndarray)

        logger.info("Batching...")
        tdps = []
        for start in range(0, states_ndarray.shape[0], minibatch_size):
            end = start + minibatch_size
            if end > states_ndarray.shape[0]:
                break
            tdp = TrainingDataPage(
                states=states_ndarray[start:end],
                actions=actions_one_hot[start:end]
                if one_hot_action
                else actions[start:end],
                propensities=action_probabilities[start:end],
                rewards=rewards[start:end],
                next_states=next_states_ndarray[start:end],
                not_terminal=not_terminal[start:end],
                next_actions=next_actions_one_hot[start:end],
                possible_actions_mask=possible_actions_mask[start:end],
                possible_next_actions_mask=possible_next_actions_mask[start:end],
                time_diffs=time_diffs[start:end],
            )
            tdp.set_type(torch.cuda.FloatTensor if use_gpu else torch.FloatTensor)
            tdps.append(tdp)
        return tdps
Beispiel #34
0
    def test_prepare_normalization_and_normalize(self):
        feature_value_map = read_data()

        normalization_parameters = {}
        for name, values in feature_value_map.items():
            normalization_parameters[name] = normalization.identify_parameter(
                values, 10, feature_type=self._feature_type_override(name)
            )
        for k, v in normalization_parameters.items():
            if id_to_type(k) == CONTINUOUS:
                self.assertEqual(v.feature_type, CONTINUOUS)
                self.assertIs(v.boxcox_lambda, None)
                self.assertIs(v.boxcox_shift, None)
            elif id_to_type(k) == BOXCOX:
                self.assertEqual(v.feature_type, BOXCOX)
                self.assertIsNot(v.boxcox_lambda, None)
                self.assertIsNot(v.boxcox_shift, None)
            else:
                assert v.feature_type == id_to_type(k)

        preprocessor = Preprocessor(normalization_parameters, False)
        sorted_features, _ = sort_features_by_normalization(normalization_parameters)
        input_matrix = np.zeros([10000, len(sorted_features)], dtype=np.float32)
        for i, feature in enumerate(sorted_features):
            input_matrix[:, i] = feature_value_map[feature]
        normalized_feature_matrix = preprocessor.forward(input_matrix)

        normalized_features = {}
        on_column = 0
        for feature in sorted_features:
            norm = normalization_parameters[feature]
            if norm.feature_type == ENUM:
                column_size = len(norm.possible_values)
            else:
                column_size = 1
            normalized_features[feature] = normalized_feature_matrix[
                :, on_column : (on_column + column_size)
            ]
            on_column += column_size

        self.assertTrue(
            all(
                [
                    np.isfinite(parameter.stddev) and np.isfinite(parameter.mean)
                    for parameter in normalization_parameters.values()
                ]
            )
        )
        for k, v in six.iteritems(normalized_features):
            v = v.numpy()
            self.assertTrue(np.all(np.isfinite(v)))
            feature_type = normalization_parameters[k].feature_type
            if feature_type == identify_types.PROBABILITY:
                sigmoidv = special.expit(v)
                self.assertTrue(
                    np.all(
                        np.logical_and(np.greater(sigmoidv, 0), np.less(sigmoidv, 1))
                    )
                )
            elif feature_type == identify_types.ENUM:
                possible_values = normalization_parameters[k].possible_values
                self.assertEqual(v.shape[0], len(feature_value_map[k]))
                self.assertEqual(v.shape[1], len(possible_values))

                possible_value_map = {}
                for i, possible_value in enumerate(possible_values):
                    possible_value_map[possible_value] = i

                for i, row in enumerate(v):
                    original_feature = feature_value_map[k][i]
                    if abs(original_feature - MISSING_VALUE) < 0.01:
                        self.assertEqual(0.0, np.sum(row))
                    else:
                        self.assertEqual(
                            possible_value_map[original_feature],
                            np.where(row == 1)[0][0],
                        )
            elif feature_type == identify_types.QUANTILE:
                for i, feature in enumerate(v[0]):
                    original_feature = feature_value_map[k][i]
                    expected = self._value_to_quantile(
                        original_feature, normalization_parameters[k].quantiles
                    )
                    self.assertAlmostEqual(feature, expected, 2)
            elif feature_type == identify_types.BINARY:
                pass
            elif (
                feature_type == identify_types.CONTINUOUS
                or feature_type == identify_types.BOXCOX
            ):
                one_stddev = np.isclose(np.std(v, ddof=1), 1, atol=0.01)
                zero_stddev = np.isclose(np.std(v, ddof=1), 0, atol=0.01)
                zero_mean = np.isclose(np.mean(v), 0, atol=0.01)
                self.assertTrue(
                    np.all(zero_mean),
                    "mean of feature {} is {}, not 0".format(k, np.mean(v)),
                )
                self.assertTrue(np.all(np.logical_or(one_stddev, zero_stddev)))
            elif feature_type == identify_types.CONTINUOUS_ACTION:
                less_than_max = v < 1
                more_than_min = v > -1
                self.assertTrue(
                    np.all(less_than_max),
                    "values are not less than 1: {}".format(v[less_than_max == False]),
                )
                self.assertTrue(
                    np.all(more_than_min),
                    "values are not more than -1: {}".format(v[more_than_min == False]),
                )
            else:
                raise NotImplementedError()
Beispiel #35
0
    def test_preprocessing_network(self):
        feature_value_map = read_data()

        normalization_parameters = {}
        name_preprocessed_blob_map = {}

        for feature_name, feature_values in feature_value_map.items():
            normalization_parameters[feature_name] = normalization.identify_parameter(
                feature_name,
                feature_values,
                feature_type=self._feature_type_override(feature_name),
            )
            feature_values[
                0
            ] = MISSING_VALUE  # Set one entry to MISSING_VALUE to test that

            preprocessor = Preprocessor(
                {feature_name: normalization_parameters[feature_name]}, False
            )
            feature_values_matrix = np.expand_dims(feature_values, -1)
            normalized_feature_values = preprocessor.forward(feature_values_matrix)
            name_preprocessed_blob_map[feature_name] = normalized_feature_values.numpy()

        test_features = NumpyFeatureProcessor.preprocess(
            feature_value_map, normalization_parameters
        )

        for feature_name in feature_value_map:
            normalized_features = name_preprocessed_blob_map[feature_name]
            if feature_name != ENUM_FEATURE_ID:
                normalized_features = np.squeeze(normalized_features, -1)

            tolerance = 0.01
            if feature_name == BOXCOX_FEATURE_ID:
                # At the limit, boxcox has some numerical instability
                tolerance = 0.5
            non_matching = np.where(
                np.logical_not(
                    np.isclose(
                        normalized_features.flatten(),
                        test_features[feature_name].flatten(),
                        rtol=tolerance,
                        atol=tolerance,
                    )
                )
            )
            self.assertTrue(
                np.all(
                    np.isclose(
                        normalized_features.flatten(),
                        test_features[feature_name].flatten(),
                        rtol=tolerance,
                        atol=tolerance,
                    )
                ),
                "{} does not match: {} \n!=\n {}".format(
                    feature_name,
                    normalized_features.flatten()[non_matching],
                    test_features[feature_name].flatten()[non_matching],
                ),
            )
Beispiel #36
0
    def preprocess_samples_discrete(
            self,
            samples: Samples,
            minibatch_size: int,
            one_hot_action: bool = True) -> List[TrainingDataPage]:
        logger.info("Shuffling...")
        samples.shuffle()
        logger.info("Preprocessing...")

        net = core.Net("gridworld_preprocessing")
        C2.set_net(net)
        preprocessor = PreprocessorNet(True)
        saa = StackedAssociativeArray.from_dict_list(samples.states, "states")
        state_matrix, _ = preprocessor.normalize_sparse_matrix(
            saa.lengths,
            saa.keys,
            saa.values,
            self.normalization,
            "state_norm",
            False,
            False,
            False,
        )
        saa = StackedAssociativeArray.from_dict_list(samples.next_states,
                                                     "next_states")
        next_state_matrix, _ = preprocessor.normalize_sparse_matrix(
            saa.lengths,
            saa.keys,
            saa.values,
            self.normalization,
            "next_state_norm",
            False,
            False,
            False,
        )
        workspace.RunNetOnce(net)

        logger.info("Converting to Torch...")
        actions_one_hot = torch.tensor((np.array(samples.actions).reshape(
            -1, 1) == np.array(self.ACTIONS)).astype(np.int64))
        actions = actions_one_hot.argmax(dim=1, keepdim=True)
        rewards = torch.tensor(samples.rewards,
                               dtype=torch.float32).reshape(-1, 1)
        action_probabilities = torch.tensor(samples.action_probabilities,
                                            dtype=torch.float32).reshape(
                                                -1, 1)
        next_actions_one_hot = torch.tensor(
            (np.array(samples.next_actions).reshape(-1, 1) == np.array(
                self.ACTIONS)).astype(np.int64))
        logger.info("Converting PNA to Torch...")
        possible_next_action_strings = np.array(
            list(
                itertools.zip_longest(*samples.possible_next_actions,
                                      fillvalue=""))).T
        possible_next_actions_mask = torch.zeros(
            [len(samples.next_actions),
             len(self.ACTIONS)])
        for i, action in enumerate(self.ACTIONS):
            possible_next_actions_mask[:, i] = torch.tensor(
                np.max(possible_next_action_strings == action,
                       axis=1).astype(np.int64))
        terminals = torch.tensor(samples.terminals,
                                 dtype=torch.int32).reshape(-1, 1)
        not_terminals = 1 - terminals
        episode_values = None
        logger.info("Converting RT to Torch...")
        episode_values = torch.tensor(samples.episode_values,
                                      dtype=torch.float32).reshape(-1, 1)

        time_diffs = torch.ones([len(samples.states), 1])

        logger.info("Preprocessing...")
        preprocessor = Preprocessor(self.normalization, False)

        states_ndarray = workspace.FetchBlob(state_matrix)
        states_ndarray = preprocessor.forward(states_ndarray)

        next_states_ndarray = workspace.FetchBlob(next_state_matrix)
        next_states_ndarray = preprocessor.forward(next_states_ndarray)

        logger.info("Batching...")
        tdps = []
        for start in range(0, states_ndarray.shape[0], minibatch_size):
            end = start + minibatch_size
            if end > states_ndarray.shape[0]:
                break
            tdp = TrainingDataPage(
                states=states_ndarray[start:end],
                actions=actions_one_hot[start:end]
                if one_hot_action else actions[start:end],
                propensities=action_probabilities[start:end],
                rewards=rewards[start:end],
                next_states=next_states_ndarray[start:end],
                not_terminals=not_terminals[start:end],
                next_actions=next_actions_one_hot[start:end],
                possible_next_actions=possible_next_actions_mask[start:end],
                episode_values=episode_values[start:end]
                if episode_values is not None else None,
                time_diffs=time_diffs[start:end],
            )
            tdp.set_type(torch.FloatTensor)
            tdps.append(tdp)
        return tdps
Beispiel #37
0
    def preprocess_samples(
        self,
        samples: Samples,
        minibatch_size: int,
        use_gpu: bool = False,
        one_hot_action: bool = True,
        normalize_actions: bool = True,
    ) -> List[TrainingDataPage]:
        logger.info("Shuffling...")
        samples = shuffle_samples(samples)

        logger.info("Sparse2Dense...")
        net = core.Net("gridworld_preprocessing")
        C2.set_net(net)
        saa = StackedAssociativeArray.from_dict_list(samples.states, "states")
        sorted_state_features, _ = sort_features_by_normalization(self.normalization)
        state_matrix, _ = sparse_to_dense(
            saa.lengths, saa.keys, saa.values, sorted_state_features
        )
        saa = StackedAssociativeArray.from_dict_list(samples.next_states, "next_states")
        next_state_matrix, _ = sparse_to_dense(
            saa.lengths, saa.keys, saa.values, sorted_state_features
        )
        sorted_action_features, _ = sort_features_by_normalization(
            self.normalization_action
        )
        saa = StackedAssociativeArray.from_dict_list(samples.actions, "action")
        action_matrix, _ = sparse_to_dense(
            saa.lengths, saa.keys, saa.values, sorted_action_features
        )
        saa = StackedAssociativeArray.from_dict_list(
            samples.next_actions, "next_action"
        )
        next_action_matrix, _ = sparse_to_dense(
            saa.lengths, saa.keys, saa.values, sorted_action_features
        )
        action_probabilities = torch.tensor(
            samples.action_probabilities, dtype=torch.float32
        ).reshape(-1, 1)
        rewards = torch.tensor(samples.rewards, dtype=torch.float32).reshape(-1, 1)

        max_action_size = 4

        pnas_mask_list: List[List[int]] = []
        pnas_flat: List[Dict[str, float]] = []
        for pnas in samples.possible_next_actions:
            pnas_mask_list.append([1] * len(pnas) + [0] * (max_action_size - len(pnas)))
            pnas_flat.extend(pnas)
            for _ in range(max_action_size - len(pnas)):
                pnas_flat.append({})  # Filler
        saa = StackedAssociativeArray.from_dict_list(pnas_flat, "possible_next_actions")
        pnas_mask = torch.Tensor(pnas_mask_list)

        possible_next_actions_matrix, _ = sparse_to_dense(
            saa.lengths, saa.keys, saa.values, sorted_action_features
        )

        workspace.RunNetOnce(net)

        logger.info("Preprocessing...")
        state_preprocessor = Preprocessor(self.normalization, False)
        action_preprocessor = Preprocessor(self.normalization_action, False)

        states_ndarray = workspace.FetchBlob(state_matrix)
        states_ndarray = state_preprocessor.forward(states_ndarray)

        actions_ndarray = torch.from_numpy(workspace.FetchBlob(action_matrix))
        if normalize_actions:
            actions_ndarray = action_preprocessor.forward(actions_ndarray)

        next_states_ndarray = workspace.FetchBlob(next_state_matrix)
        next_states_ndarray = state_preprocessor.forward(next_states_ndarray)

        state_pnas_tile = next_states_ndarray.repeat(1, max_action_size).reshape(
            -1, next_states_ndarray.shape[1]
        )

        next_actions_ndarray = torch.from_numpy(workspace.FetchBlob(next_action_matrix))
        if normalize_actions:
            next_actions_ndarray = action_preprocessor.forward(next_actions_ndarray)

        logged_possible_next_actions = action_preprocessor.forward(
            workspace.FetchBlob(possible_next_actions_matrix)
        )

        assert state_pnas_tile.shape[0] == logged_possible_next_actions.shape[0], (
            "Invalid shapes: "
            + str(state_pnas_tile.shape)
            + " != "
            + str(logged_possible_next_actions.shape)
        )
        logged_possible_next_state_actions = torch.cat(
            (state_pnas_tile, logged_possible_next_actions), dim=1
        )

        logger.info("Reward Timeline to Torch...")
        time_diffs = torch.ones([len(samples.states), 1])

        tdps = []
        pnas_start = 0
        logger.info("Batching...")
        for start in range(0, states_ndarray.shape[0], minibatch_size):
            end = start + minibatch_size
            if end > states_ndarray.shape[0]:
                break
            pnas_end = pnas_start + (minibatch_size * max_action_size)
            tdp = TrainingDataPage(
                states=states_ndarray[start:end],
                actions=actions_ndarray[start:end],
                propensities=action_probabilities[start:end],
                rewards=rewards[start:end],
                next_states=next_states_ndarray[start:end],
                next_actions=next_actions_ndarray[start:end],
                not_terminal=(pnas_mask[start:end, :].sum(dim=1, keepdim=True) > 0),
                time_diffs=time_diffs[start:end],
                possible_next_actions_mask=pnas_mask[start:end, :],
                possible_next_actions_state_concat=logged_possible_next_state_actions[
                    pnas_start:pnas_end, :
                ],
            )
            pnas_start = pnas_end
            tdp.set_type(torch.cuda.FloatTensor if use_gpu else torch.FloatTensor)
            tdps.append(tdp)
        return tdps