Beispiel #1
0
    def test_normalize_dense_matrix_enum(self):
        normalization_parameters = {
            'f1':
            NormalizationParameters(identify_types.ENUM, None, None, None,
                                    None, [12.0, 4.2, 2.1]),
            'f2':
            NormalizationParameters(identify_types.CONTINUOUS, None, 0, 0, 1,
                                    None),
            'f3':
            NormalizationParameters(identify_types.ENUM, None, None, None,
                                    None, [15.1, -3.2])
        }
        features = list(normalization_parameters.keys())
        norm_net = core.Net("net")
        blobname_template = '{}_blob'
        blob_map = prepare_normalization(norm_net, normalization_parameters,
                                         features, blobname_template, False)

        inputs = np.array(
            [[12.0, 1.0, 15.1], [4.2, 2.0, -3.2], [2.1, 3.0, 15.1],
             [2.1, 3.0, normalization.MISSING_VALUE]],
            dtype=np.float32)
        normalized_outputs = normalize_dense_matrix(inputs, features,
                                                    normalization_parameters,
                                                    blob_map, norm_net,
                                                    blobname_template)

        np.testing.assert_array_equal(
            np.array([
                [1, 0, 0, 1.0, 1, 0],
                [0, 1, 0, 2.0, 0, 1],
                [0, 0, 1, 3.0, 1, 0],
                [0, 0, 1, 3.0, 0, 0]  # Missing values should go to all 0
            ]),
            normalized_outputs)
Beispiel #2
0
    def test_normalize_feature_map_enum(self):
        feature_name_1 = 'f1'
        feature_name_2 = 'f2'
        feature_name_3 = 'f3'
        normalization_parameters = {
            feature_name_1:
            NormalizationParameters(identify_types.ENUM, None, None, None,
                                    None, [12.0, 4.2, 2.1]),
            feature_name_2:
            NormalizationParameters(identify_types.CONTINUOUS, None, 0, 0, 1,
                                    None),
            feature_name_3:
            NormalizationParameters(identify_types.ENUM, None, None, None,
                                    None, [15.1, -3.2])
        }

        feature_value_map = {
            feature_name_1:
            np.array([2.1, 4.2, 12.0, 12.0], dtype=np.float32),
            feature_name_2:
            np.array([1.9, 2.2, 5.0, 1.0], dtype=np.float32),
            feature_name_3:
            np.array([-3.2, -3.2, 15.1, normalization.MISSING_VALUE],
                     dtype=np.float32)
        }

        features = list(feature_value_map.keys())
        norm_net = core.Net("net")
        blobname_template = '{}_blob'
        blob_map = prepare_normalization(norm_net, normalization_parameters,
                                         features, blobname_template, False)
        normalized_features = normalize_feature_map(feature_value_map,
                                                    norm_net, features,
                                                    blob_map,
                                                    blobname_template)

        for v in normalized_features.values():
            self.assertTrue(np.all(np.isfinite(v)))

        np.testing.assert_array_equal(
            np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0], [1, 0, 0]]),
            normalized_features[feature_name_1])
        np.testing.assert_array_equal(
            np.array([[1.9, 2.2, 5.0, 1.0]], dtype=np.float32),
            normalized_features[feature_name_2])
        np.testing.assert_array_equal(
            np.array([
                [0, 1],
                [0, 1],
                [1, 0],
                [0, 0]  # Missing value should go to all 0
            ]),
            normalized_features[feature_name_3])
Beispiel #3
0
    def test_normalize_dense_matrix_enum(self):
        normalization_parameters = {
            1:
            NormalizationParameters(
                identify_types.ENUM,
                None,
                None,
                None,
                None,
                [12, 4, 2],
                None,
                None,
                None,
            ),
            2:
            NormalizationParameters(identify_types.CONTINUOUS, None, 0, 0, 1,
                                    None, None, None, None),
            3:
            NormalizationParameters(identify_types.ENUM, None, None, None,
                                    None, [15, 3], None, None, None),
        }
        norm_net = core.Net("net")
        C2.set_net(norm_net)
        preprocessor = PreprocessorNet()

        inputs = np.zeros([4, 3], dtype=np.float32)
        feature_ids = [2, 1, 3]  # Sorted according to feature type
        inputs[:, feature_ids.index(1)] = [12, 4, 2, 2]
        inputs[:, feature_ids.index(2)] = [1.0, 2.0, 3.0, 3.0]
        inputs[:, feature_ids.index(3)] = [
            15, 3, 15, normalization.MISSING_VALUE
        ]
        input_blob = C2.NextBlob("input_blob")
        workspace.FeedBlob(input_blob, np.array([0], dtype=np.float32))
        normalized_output_blob, _ = preprocessor.normalize_dense_matrix(
            input_blob, feature_ids, normalization_parameters, "", False)
        workspace.FeedBlob(input_blob, inputs)
        workspace.RunNetOnce(norm_net)
        normalized_feature_matrix = workspace.FetchBlob(normalized_output_blob)

        np.testing.assert_allclose(
            np.array([
                [1.0, 1, 0, 0, 1, 0],
                [2.0, 0, 1, 0, 0, 1],
                [3.0, 0, 0, 1, 1, 0],
                [3.0, 0, 0, 1, 0, 0],  # Missing values should go to all 0
            ]),
            normalized_feature_matrix,
        )
Beispiel #4
0
def only_continuous_normalizer(feats, min_value=None, max_value=None):
    assert type(min_value) == type(max_value) and type(min_value) in (
        int,
        float,
        list,
        np.ndarray,
        type(None),
    )
    if type(min_value) in [int, float, type(None)]:
        min_value = [min_value] * len(feats)
        max_value = [max_value] * len(feats)
    normalization = collections.OrderedDict([(
        feats[i],
        NormalizationParameters(
            feature_type="CONTINUOUS",
            boxcox_lambda=None,
            boxcox_shift=None,
            mean=0,
            stddev=1,
            possible_values=None,
            quantiles=None,
            min_value=min_value[i],
            max_value=max_value[i],
        ),
    ) for i in range(len(feats))])
    return normalization
 def get_state_normalization_parameters(self):
     return {
         i: NormalizationParameters(
             feature_type=PROBABILITY if i % 2 else CONTINUOUS, mean=0, stddev=1
         )
         for i in range(1, 5)
     }
 def get_action_normalization_parameters(self):
     # Sorted order: 12, 11, 13
     return {
         i: NormalizationParameters(feature_type=CONTINUOUS if i %
                                    2 else PROBABILITY)
         for i in range(11, 14)
     }
Beispiel #7
0
    def test_normalize_dense_matrix_enum(self):
        normalization_parameters = {
            1:
            NormalizationParameters(
                identify_types.ENUM,
                None,
                None,
                None,
                None,
                [12, 4, 2],
                None,
                None,
                None,
            ),
            2:
            NormalizationParameters(identify_types.CONTINUOUS, None, 0, 0, 1,
                                    None, None, None, None),
            3:
            NormalizationParameters(identify_types.ENUM, None, None, None,
                                    None, [15, 3], None, None, None),
        }
        preprocessor = Preprocessor(normalization_parameters, False)

        inputs = np.zeros([4, 3], dtype=np.float32)
        feature_ids = [2, 1, 3]  # Sorted according to feature type
        inputs[:, feature_ids.index(1)] = [12, 4, 2, 2]
        inputs[:, feature_ids.index(2)] = [1.0, 2.0, 3.0, 3.0]
        inputs[:, feature_ids.index(3)] = [
            15, 3, 15, normalization.MISSING_VALUE
        ]
        inputs = torch.from_numpy(inputs)
        normalized_feature_matrix = preprocessor(
            inputs, (inputs != MISSING_VALUE).float())

        np.testing.assert_allclose(
            np.array([
                [1.0, 1, 0, 0, 1, 0],
                [2.0, 0, 1, 0, 0, 1],
                [3.0, 0, 0, 1, 1, 0],
                [3.0, 0, 0, 1, 0, 0],  # Missing values should go to all 0
            ]),
            normalized_feature_matrix,
        )
Beispiel #8
0
 def normalization(self):
     return {
         'f':
         NormalizationParameters(
             feature_type="ENUM",
             boxcox_lambda=None,
             boxcox_shift=None,
             mean=None,
             stddev=None,
             possible_values=[float(i) for i in range(len(self.STATES))])
     }
    def test_normalize_dense_matrix_enum(self):
        normalization_parameters = {
            "f1": NormalizationParameters(
                identify_types.ENUM,
                None,
                None,
                None,
                None,
                [12, 4, 2],
                None,
                None,
                None,
            ),
            "f2": NormalizationParameters(
                identify_types.CONTINUOUS, None, 0, 0, 1, None, None, None, None
            ),
            "f3": NormalizationParameters(
                identify_types.ENUM, None, None, None, None, [15, 3], None, None, None
            ),
        }
        preprocessor = Preprocessor(normalization_parameters, False)
        preprocessor.clamp = False

        inputs = np.zeros([4, 3], dtype=np.float32)
        feature_ids = ["f2", "f1", "f3"]  # Sorted according to feature type
        inputs[:, feature_ids.index("f1")] = [12, 4, 2, 2]
        inputs[:, feature_ids.index("f2")] = [1.0, 2.0, 3.0, 3.0]
        inputs[:, feature_ids.index("f3")] = [15, 3, 15, normalization.MISSING_VALUE]
        normalized_feature_matrix = preprocessor.forward(inputs)

        np.testing.assert_allclose(
            np.array(
                [
                    [1.0, 1, 0, 0, 1, 0],
                    [2.0, 0, 1, 0, 0, 1],
                    [3.0, 0, 0, 1, 1, 0],
                    [3.0, 0, 0, 1, 0, 0],  # Missing values should go to all 0
                ]
            ),
            normalized_feature_matrix,
        )
Beispiel #10
0
 def normalization(self):
     return {
         0: NormalizationParameters(
             feature_type="ENUM",
             boxcox_lambda=None,
             boxcox_shift=None,
             mean=None,
             stddev=None,
             possible_values=list(range(len(self.STATES))),
             quantiles=None,
         )
     }
Beispiel #11
0
def default_normalizer(feats):
    normalization = collections.OrderedDict([(feats[i],
                                              NormalizationParameters(
                                                  feature_type="CONTINUOUS",
                                                  boxcox_lambda=None,
                                                  boxcox_shift=0,
                                                  mean=0,
                                                  stddev=1,
                                                  possible_values=None,
                                                  quantiles=None,
                                              )) for i in range(len(feats))])
    return normalization
Beispiel #12
0
    def test_do_not_preprocess(self):
        normalization_parameters = {
            i: NormalizationParameters(feature_type=DO_NOT_PREPROCESS)
            for i in range(1, 5)
        }
        preprocessor = Preprocessor(normalization_parameters, use_gpu=False)
        postprocessor = Postprocessor(normalization_parameters, use_gpu=False)

        x = torch.randn(3, 4)
        presence = torch.ones_like(x, dtype=torch.uint8)
        y = postprocessor(preprocessor(x, presence))
        npt.assert_allclose(x, y)
Beispiel #13
0
    def test_continuous_action(self):
        normalization_parameters = {
            i: NormalizationParameters(feature_type=CONTINUOUS_ACTION,
                                       min_value=-5.0 * i,
                                       max_value=10.0 * i)
            for i in range(1, 5)
        }
        preprocessor = Preprocessor(normalization_parameters, use_gpu=False)
        postprocessor = Postprocessor(normalization_parameters, use_gpu=False)

        x = torch.rand(3, 4) * torch.tensor([15, 30, 45, 60]) + torch.tensor(
            [-5, -10, -15, -20])
        presence = torch.ones_like(x, dtype=torch.uint8)
        y = postprocessor(preprocessor(x, presence))
        npt.assert_allclose(x, y, rtol=1e-5)
Beispiel #14
0
def only_continuous_action_normalizer(feats, min_value=None, max_value=None):
    normalization = collections.OrderedDict([(
        feats[i],
        NormalizationParameters(
            feature_type="CONTINUOUS_ACTION",
            boxcox_lambda=None,
            boxcox_shift=None,
            mean=0,
            stddev=1,
            possible_values=None,
            quantiles=None,
            min_value=min_value,
            max_value=max_value,
        ),
    ) for i in range(len(feats))])
    return normalization
Beispiel #15
0
    def test_quantile_boundary_logic(self):
        """Test quantile logic when feaure value == quantile boundary."""
        input = torch.tensor([[0.0], [80.0], [100.0]])
        norm_params = NormalizationParameters(
            feature_type="QUANTILE",
            boxcox_lambda=None,
            boxcox_shift=None,
            mean=0,
            stddev=1,
            possible_values=None,
            quantiles=[0.0, 80.0, 100.0],
            min_value=0.0,
            max_value=100.0,
        )
        preprocessor = Preprocessor({1: norm_params}, False)
        output = preprocessor._preprocess_QUANTILE(0, input.float(), [norm_params])

        expected_output = torch.tensor([[0.0], [0.5], [1.0]])

        self.assertTrue(np.all(np.isclose(output, expected_output)))
Beispiel #16
0
    def test_get_predictor_export_meta_and_workspace_with_feature_extractor(
            self):
        model = Model()

        state_normalization_parameters = {
            i: NormalizationParameters(feature_type=CONTINUOUS)
            for i in range(1, 5)
        }
        action_normalization_parameters = {
            i: NormalizationParameters(feature_type=CONTINUOUS)
            for i in range(5, 9)
        }

        extractor = PredictorFeatureExtractor(
            state_normalization_parameters=state_normalization_parameters,
            action_normalization_parameters=action_normalization_parameters,
            normalize=False,
        )

        pem, ws = model.get_predictor_export_meta_and_workspace(
            feature_extractor=extractor)
        # model has 2 params + 1 const. extractor has 1 const.
        self.assertEqual(4, len(pem.parameters))
        for p in pem.parameters:
            self.assertTrue(ws.HasBlob(p))
        self.assertEqual(3, len(pem.inputs))
        self.assertEqual(4, len(pem.outputs))

        input_prototype = model.input_prototype()

        with tempfile.TemporaryDirectory() as tmpdirname:
            db_path = os.path.join(tmpdirname, "model")
            logger.info("DB path: ", db_path)
            db_type = "minidb"
            with ws._ctx:
                save_to_db(db_type, db_path, pem)

            # Load the model from DB file and run it
            net = prepare_prediction_net(db_path, db_type)

            state_features = input_prototype.state.float_features
            action_features = input_prototype.action.float_features
            float_features_values = (torch.cat(
                (state_features, action_features), dim=1).reshape(-1).numpy())
            float_features_keys = np.arange(1, 9)
            float_features_lengths = np.array([8], dtype=np.int32)

            workspace.FeedBlob("input/float_features.keys",
                               float_features_keys)
            workspace.FeedBlob("input/float_features.values",
                               float_features_values)
            workspace.FeedBlob("input/float_features.lengths",
                               float_features_lengths)

            workspace.RunNet(net)
            net_sum = workspace.FetchBlob("sum")
            net_mul = workspace.FetchBlob("mul")
            net_plus_one = workspace.FetchBlob("plus_one")
            net_linear = workspace.FetchBlob("linear")

            model_sum, model_mul, model_plus_one, model_linear = model(
                input_prototype)

            npt.assert_array_equal(model_sum.numpy(), net_sum)
            npt.assert_array_equal(model_mul.numpy(), net_mul)
            npt.assert_array_equal(model_plus_one.numpy(), net_plus_one)
            npt.assert_allclose(model_linear.detach().numpy(),
                                net_linear,
                                rtol=1e-4)
Beispiel #17
0
    def test_get_predictor_export_meta_and_workspace_full(self):
        model = Model()

        state_normalization_parameters = {
            i: NormalizationParameters(feature_type=CONTINUOUS)
            for i in range(1, 5)
        }
        action_normalization_parameters = {
            i: NormalizationParameters(feature_type=CONTINUOUS)
            for i in range(5, 9)
        }

        extractor = PredictorFeatureExtractor(
            state_normalization_parameters=state_normalization_parameters,
            action_normalization_parameters=action_normalization_parameters,
            normalize=False,
        )
        output_transformer = TestOutputTransformer()

        pem, ws = model.get_predictor_export_meta_and_workspace(
            feature_extractor=extractor, output_transformer=output_transformer)
        # model has 2 params + 1 const. extractor has 1 const. output_transformer has 1 const.
        self.assertEqual(5, len(pem.parameters))
        for p in pem.parameters:
            self.assertTrue(ws.HasBlob(p))
        self.assertEqual(3, len(pem.inputs))
        self.assertEqual(5, len(pem.outputs))
        self.assertEqual(
            {
                "output/string_weighted_multi_categorical_features.lengths",
                "output/string_weighted_multi_categorical_features.keys",
                "output/string_weighted_multi_categorical_features.values.lengths",
                "output/string_weighted_multi_categorical_features.values.keys",
                "output/string_weighted_multi_categorical_features.values.values",
            },
            set(pem.outputs),
        )

        input_prototype = model.input_prototype()

        with tempfile.TemporaryDirectory() as tmpdirname:
            db_path = os.path.join(tmpdirname, "model")
            logger.info("DB path: {}".format(db_path))
            db_type = "minidb"
            with ws._ctx:
                save_to_db(db_type, db_path, pem)

            # Load the model from DB file and run it
            net = prepare_prediction_net(db_path, db_type)

            state_features = input_prototype.state.float_features
            action_features = input_prototype.action.float_features
            float_features_values = (torch.cat(
                (state_features, action_features), dim=1).reshape(-1).numpy())
            float_features_keys = np.arange(1, 9)
            float_features_lengths = np.array([8], dtype=np.int32)

            workspace.FeedBlob("input/float_features.keys",
                               float_features_keys)
            workspace.FeedBlob("input/float_features.values",
                               float_features_values)
            workspace.FeedBlob("input/float_features.lengths",
                               float_features_lengths)

            workspace.RunNet(net)

            model_sum, model_mul, model_plus_one, model_linear = model(
                input_prototype)

            lengths = workspace.FetchBlob(
                "output/string_weighted_multi_categorical_features.lengths")
            keys = workspace.FetchBlob(
                "output/string_weighted_multi_categorical_features.keys")
            values_lengths = workspace.FetchBlob(
                "output/string_weighted_multi_categorical_features.values.lengths"
            )
            values_keys = workspace.FetchBlob(
                "output/string_weighted_multi_categorical_features.values.keys"
            )
            values_values = workspace.FetchBlob(
                "output/string_weighted_multi_categorical_features.values.values"
            )

            N = 1
            npt.assert_array_equal(np.ones(N, dtype=np.int32), lengths)
            npt.assert_array_equal(np.zeros(N, dtype=np.int64), keys)
            npt.assert_array_equal([1] * N, values_lengths)
            npt.assert_array_equal(np.array([b"TestAction"], dtype=np.object),
                                   values_keys)
            npt.assert_array_equal(model_linear.detach().numpy().reshape(-1),
                                   values_values)
def _cont_action_norm():
    return NormalizationParameters(
        feature_type=CONTINUOUS_ACTION, min_value=-3.0, max_value=3.0
    )
def _cont_norm():
    return NormalizationParameters(feature_type=CONTINUOUS, mean=0.0, stddev=1.0)
Beispiel #20
0
def default_normalizer(feats, min_value=None, max_value=None):
    normalization_types = [
        NormalizationParameters(
            feature_type="BINARY",
            boxcox_lambda=None,
            boxcox_shift=None,
            mean=0,
            stddev=1,
            possible_values=None,
            quantiles=None,
            min_value=min_value,
            max_value=max_value,
        ),
        NormalizationParameters(
            feature_type="PROBABILITY",
            boxcox_lambda=None,
            boxcox_shift=None,
            mean=0,
            stddev=1,
            possible_values=None,
            quantiles=None,
            min_value=min_value,
            max_value=max_value,
        ),
        NormalizationParameters(
            feature_type="CONTINUOUS",
            boxcox_lambda=None,
            boxcox_shift=None,
            mean=0,
            stddev=1,
            possible_values=None,
            quantiles=None,
            min_value=min_value,
            max_value=max_value,
        ),
        NormalizationParameters(
            feature_type="BOXCOX",
            boxcox_lambda=1,
            boxcox_shift=1,
            mean=0,
            stddev=1,
            possible_values=None,
            quantiles=None,
            min_value=min_value,
            max_value=max_value,
        ),
        NormalizationParameters(
            feature_type="QUANTILE",
            boxcox_lambda=None,
            boxcox_shift=None,
            mean=0,
            stddev=1,
            possible_values=None,
            quantiles=[0, 1],
            min_value=min_value,
            max_value=max_value,
        ),
        NormalizationParameters(
            feature_type="ENUM",
            boxcox_lambda=None,
            boxcox_shift=None,
            mean=0,
            stddev=1,
            possible_values=[0, 1],
            quantiles=None,
            min_value=min_value,
            max_value=max_value,
        ),
    ]
    normalization = collections.OrderedDict([
        (feats[i], normalization_types[i % len(normalization_types)])
        for i in range(len(feats))
    ])
    return normalization