Ejemplo n.º 1
0
def read_norm_file(path):
    path = os.path.expanduser(path)
    if path.split(".")[-1] == "gz":
        with gzip.open(path) as f:
            norm_json = json.load(f)
    else:
        with open(path) as f:
            norm_json = json.load(f)
    return normalization.deserialize(norm_json)
Ejemplo n.º 2
0
    def test_persistency(self):
        _, feature_value_map = preprocessing_util.read_data()
        normalization_parameters = {}
        for name, values in feature_value_map.items():
            normalization_parameters[name] = normalization.identify_parameter(values)

        s = normalization.serialize(normalization_parameters)
        read_parameters = normalization.deserialize(s)
        self.assertEqual(read_parameters, normalization_parameters)
Ejemplo n.º 3
0
 def read_norm_file(path) -> Dict[int, NormalizationParameters]:
     path = os.path.expanduser(path)
     if path.split(".")[-1] == "gz":
         with gzip.open(path) as f:
             norm_json = json.load(f)
     else:
         with open(path) as f:
             norm_json = json.load(f)
     return normalization.deserialize(norm_json)
Ejemplo n.º 4
0
    def test_persistency(self):
        feature_value_map = read_data()
        normalization_parameters = {}
        for name, values in feature_value_map.items():
            normalization_parameters[name] = normalization.identify_parameter(
                values, feature_type=self._feature_type_override(name))

        s = normalization.serialize(normalization_parameters)
        read_parameters = normalization.deserialize(s)
        self.assertEqual(read_parameters, normalization_parameters)
Ejemplo n.º 5
0
    def test_persistency(self):
        feature_value_map = preprocessing_util.read_data()

        normalization_parameters = normalization.identify_parameters(
            feature_value_map
        )

        s = normalization.serialize(normalization_parameters)
        read_parameters = normalization.deserialize(s)
        self.assertEqual(read_parameters, normalization_parameters)
Ejemplo n.º 6
0
    def test_persistency(self):
        feature_value_map = read_data()
        normalization_parameters = {}
        for name, values in feature_value_map.items():
            normalization_parameters[name] = normalization.identify_parameter(
                values)
            values[
                0] = MISSING_VALUE  # Set one entry to MISSING_VALUE to test that

        s = normalization.serialize(normalization_parameters)
        read_parameters = normalization.deserialize(s)
        self.assertEqual(read_parameters, normalization_parameters)
Ejemplo n.º 7
0
    def test_persistency(self):
        feature_value_map = read_data()
        normalization_parameters = {}
        for name, values in feature_value_map.items():
            normalization_parameters[name] = normalization.identify_parameter(
                name, values, feature_type=self._feature_type_override(name))
            values[
                0] = MISSING_VALUE  # Set one entry to MISSING_VALUE to test that

        s = normalization.serialize(normalization_parameters)
        read_parameters = normalization.deserialize(s)
        # Unfortunately, Thrift serializatin seems to lose a bit of precision.
        # Using `==` will be false.
        self.assertEqual(read_parameters.keys(),
                         normalization_parameters.keys())
        for k in normalization_parameters:
            self.assertEqual(
                read_parameters[k].feature_type,
                normalization_parameters[k].feature_type,
            )
            self.assertEqual(
                read_parameters[k].possible_values,
                normalization_parameters[k].possible_values,
            )
            for field in [
                    "boxcox_lambda",
                    "boxcox_shift",
                    "mean",
                    "stddev",
                    "quantiles",
                    "min_value",
                    "max_value",
            ]:
                if getattr(normalization_parameters[k], field) is None:
                    self.assertEqual(
                        getattr(read_parameters[k], field),
                        getattr(normalization_parameters[k], field),
                    )
                else:
                    npt.assert_allclose(
                        getattr(read_parameters[k], field),
                        getattr(normalization_parameters[k], field),
                    )
Ejemplo n.º 8
0
    def test_persistency(self):
        feature_value_map = read_data()
        normalization_parameters = {}
        for name, values in feature_value_map.items():
            normalization_parameters[name] = normalization.identify_parameter(
                name, values, feature_type=self._feature_type_override(name)
            )
            values[0] = MISSING_VALUE  # Set one entry to MISSING_VALUE to test that

        s = normalization.serialize(normalization_parameters)
        read_parameters = normalization.deserialize(s)
        # Unfortunately, Thrift serializatin seems to lose a bit of precision.
        # Using `==` will be false.
        self.assertEqual(read_parameters.keys(), normalization_parameters.keys())
        for k in normalization_parameters:
            self.assertEqual(
                read_parameters[k].feature_type,
                normalization_parameters[k].feature_type,
            )
            self.assertEqual(
                read_parameters[k].possible_values,
                normalization_parameters[k].possible_values,
            )
            for field in [
                "boxcox_lambda",
                "boxcox_shift",
                "mean",
                "stddev",
                "quantiles",
                "min_value",
                "max_value",
            ]:
                if getattr(normalization_parameters[k], field) is None:
                    self.assertEqual(
                        getattr(read_parameters[k], field),
                        getattr(normalization_parameters[k], field),
                    )
                else:
                    npt.assert_allclose(
                        getattr(read_parameters[k], field),
                        getattr(normalization_parameters[k], field),
                    )
Ejemplo n.º 9
0
def read_norm_file(path):
    path = os.path.expanduser(path)
    with open(path) as f:
        norm_json = json.load(f)
    return normalization.deserialize(norm_json)
Ejemplo n.º 10
0
def read_norm_params(table_output):
    norm_data = dict(
        zip(table_output["feature"], table_output["normalization"]))
    return normalization.deserialize(norm_data)