Beispiel #1
0
def get_value_meta(value,
                   meta_conf,
                   tracking_config,
                   value_type=None,
                   target=None):
    # type: ( Any, ValueMetaConf, TrackingConfig, Optional[ValueType], Optional[Target]) -> Optional[ValueMeta]
    """
    Build the value meta for tracking logging.
    Using the given meta config, the value, and tracking_config to calculate the required value meta.

    @param value: the value to calc value meta for
    @param meta_conf: a given meta_config by a user
    @param tracking_config: TrackingConfig to calc the wanted meta conf
    @param value_type: optional value_type, if its known.
    @param target: knowledge about the target which contains the value - this can effect the cost of the calculation
    @return: Calculated value meta
    """

    if value is None:
        return None

    # required for calculating the relevant configuration and to build value_meta
    if _is_default_value_type(value_type) or isinstance(
            value_type, TargetValueType):
        # we calculate the actual value_type even if the given value is the default value
        # so we can be sure that we can report it the right way
        # also Targets are futures types and now would can log their actual value
        value_type = get_value_type_of_obj(
            value, default_value_type=ObjectValueType())

    meta_conf = tracking_config.get_value_meta_conf(meta_conf, value_type,
                                                    target)
    return value_type.get_value_meta(value, meta_conf=meta_conf)
    def test_log_schema(
        self, tracking_config, param_log_schema, config_log_schema,
        expected_log_schema
    ):  # type: (Callable[[], TrackingConfig], bool, bool, bool) -> None
        # Arrange
        tracking_config = tracking_config()
        param_mc = ValueMetaConf(log_schema=param_log_schema)
        if config_log_schema is not None:
            tracking_config.log_value_schema = config_log_schema

        # Act
        actual_value_meta_conf = tracking_config.get_value_meta_conf(
            param_mc, ObjectValueType())

        # Assert
        assert actual_value_meta_conf.log_schema == expected_log_schema
Beispiel #3
0
    def dump(self, value, value_type=None, **kwargs):
        from targets.values import get_value_type_of_obj, get_value_type_of_type
        from targets.values import ObjectValueType

        if value_type:
            value_type = get_value_type_of_type(value_type)
        else:
            value_type = get_value_type_of_obj(value, ObjectValueType())
        try:
            m = get_marshaller_ctrl(self, value_type=value_type)
            m.dump(value, **kwargs)
        except Exception as ex:
            raise friendly_error.failed_to_write_task_output(
                ex, self, value_type=value_type
            )
        cache_key = TargetCacheKey(target=self, value_type=value_type)
        TARGET_CACHE[cache_key] = value
    def test_log_preview_size(
        self,
        tracking_config,
        param_log_preview_size,
        config_log_preview_size,
        expected_log_preview_size,
    ):  # type: (Callable[[], TrackingConfig], int, int, int) -> None
        # Arrange
        tc = tracking_config()
        param_mc = ValueMetaConf(log_preview_size=param_log_preview_size)
        if config_log_preview_size is not None:
            tc.log_value_preview_max_len = config_log_preview_size

        # Act
        actual_value_meta_conf = tc.get_value_meta_conf(
            param_mc, ObjectValueType())

        # Assert
        assert actual_value_meta_conf.log_preview_size == expected_log_preview_size
    def test_log_preview(
            self,
            tracking_config,  # type: Callable[[], TrackingConfig]
            param_log_preview,  # type: bool
            config_log_preview,  # type: bool
            expected_log_preview,  # type: bool
    ):
        # Arrange
        tc = tracking_config()
        param_mc = ValueMetaConf(log_preview=param_log_preview)
        if config_log_preview is not None:
            tc.log_value_preview = config_log_preview

        # Act
        actual_value_meta_conf = tc.get_value_meta_conf(
            param_mc, ObjectValueType())

        # Assert
        assert actual_value_meta_conf.log_preview == expected_log_preview
    def test_log_stats(
        self, tracking_config, param_log_stats, config_log_stats,
        expected_log_stats
    ):  # type: (Callable[[], TrackingConfig], bool, bool, bool) -> None
        # Arrange
        tc = tracking_config()
        param_mc = ValueMetaConf(log_stats=param_log_stats)
        if config_log_stats is not None:
            tc.log_value_stats = config_log_stats
        expected_log_stats = LogDataRequest(
            include_all_boolean=expected_log_stats,
            include_all_numeric=expected_log_stats,
            include_all_string=expected_log_stats,
        )

        # Act
        actual_value_meta_conf = tc.get_value_meta_conf(
            param_mc, ObjectValueType())

        # Assert
        assert actual_value_meta_conf.log_stats == expected_log_stats
    def test_log_histograms(
            self,
            tracking_config,  # type: Callable[[], TrackingConfig]
            param_log_histograms,  # type: bool
            config_log_histograms,  # type: bool
            expected_log_histograms,  # type: bool
    ):
        # Arrange
        tc = tracking_config()
        param_mc = ValueMetaConf(log_histograms=param_log_histograms)
        if config_log_histograms is not None:
            tc.log_histograms = config_log_histograms
        expected_log_histograms = LogDataRequest(
            include_all_boolean=expected_log_histograms,
            include_all_numeric=expected_log_histograms,
            include_all_string=expected_log_histograms,
        )

        # Act
        actual_value_meta_conf = tc.get_value_meta_conf(
            param_mc, ObjectValueType())

        # Assert
        assert actual_value_meta_conf.log_histograms == expected_log_histograms
Beispiel #8
0
 def dump(self, value, value_type=None, **kwargs):
     self._obj = value
     if value_type:
         self.value_type = get_value_type_of_type(value_type)
     else:
         self.value_type = get_value_type_of_obj(value, ObjectValueType())
Beispiel #9
0
class TestTrackingConfig:
    @pytest.mark.parametrize(
        "value, value_type, target, expected_value_preview, expected_data_schema",
        [
            (10, None, None, "10", {
                "type": "int"
            }),
            (10, ObjectValueType(), None, "10", {
                "type": "int"
            }),
            (10, TargetValueType(), target_factory("/path"), "10", {
                "type": "int"
            }),
            (10, StrValueType(), None, "10", {
                "type": "str"
            }),
            (
                [10],
                ListValueType(),
                None,
                "[10]",
                {
                    "columns": [],
                    "dtypes": {},
                    "shape": (1, 0),
                    "size.bytes": 48,
                    "type": "List",
                },
            ),
        ],
    )
    def test_get_value_meta(self, value, value_type, target,
                            expected_value_preview, expected_data_schema):
        tracking_config = TrackingConfig.from_databand_context()
        tracking_config.value_reporting_strategy = ValueTrackingLevel.ALL

        result = get_value_meta(
            value,
            ValueMetaConf(),
            tracking_config,
            value_type=value_type,
            target=target,
        )

        assert result.value_preview == expected_value_preview
        assert result.data_schema.as_dict() == expected_data_schema

    @pytest.mark.parametrize(
        "value, value_type, target, expected_value_preview, expected_data_schema",
        [(
            [
                {
                    "test": "test",
                    "num": 10,
                    "bool": True
                },
                {
                    "test": "test_2",
                    "num": 20,
                    "bool": False
                },
            ],
            ListValueType(),
            None,
            '[{"test": "test", "num": 10, "bool": true}, {"test": "test_2", "num": 20, '
            '"bool": false}]',
            {
                "columns": ["bool", "num", "test"],
                "dtypes": {
                    "bool": "<class 'bool'>",
                    "num": "<class 'int'>",
                    "test": "<class 'str'>",
                },
                "shape": (2, 3),
                "size.bytes": 56,
                "type": "List",
            },
        )],
    )
    def test_get_value_meta_list_of_flat_dict(self, value, value_type, target,
                                              expected_value_preview,
                                              expected_data_schema):
        self.test_get_value_meta(value, value_type, target,
                                 expected_value_preview, expected_data_schema)

    @pytest.mark.parametrize(
        "value, value_type, target, expected_value_preview, expected_data_schema",
        [(
            [
                {
                    "test": "test",
                    "num": 10,
                    "bool": True,
                    "test_object": {
                        "test_object_name": "some_name",
                        "test_id": 12345,
                        "inner": {
                            "foo": "bar"
                        },
                    },
                },
                {
                    "test": "test_2",
                    "num": 20,
                    "bool": False,
                    "test_object": {
                        "test_object_name": "some_other_name",
                        "test_id": 56789,
                        "inner": {
                            "foo": "bar_2"
                        },
                    },
                },
            ],
            ListValueType(),
            None,
            '[{"test": "test", "num": 10, "bool": true, "test_object": '
            '{"test_object_name": "some_name", "test_id": 12345, "inner": {"foo": '
            '"bar"}}}, {"test": "test_2", "num": 20, "bool": false, "test_object": '
            '{"test_object_name": "some_other_name", "test_id": 56789, "inner": {"foo": '
            '"bar_2"}}}]',
            {
                "columns": [
                    "bool",
                    "num",
                    "test",
                    "test_object",
                    "test_object.inner.foo",
                    "test_object.test_id",
                    "test_object.test_object_name",
                ],
                "dtypes": {
                    "bool": "<class 'bool'>",
                    "num": "<class 'int'>",
                    "test": "<class 'str'>",
                    "test_object": "<class 'dict'>",
                    "test_object.inner.foo": "<class 'str'>",
                    "test_object.test_id": "<class 'int'>",
                    "test_object.test_object_name": "<class 'str'>",
                },
                "shape": (2, 7),
                "size.bytes":
                56,
                "type":
                "List",
            },
        )],
    )
    def test_get_value_meta_list_of_nested_dict(self, value, value_type,
                                                target, expected_value_preview,
                                                expected_data_schema):
        self.test_get_value_meta(value, value_type, target,
                                 expected_value_preview, expected_data_schema)

    @pytest.mark.parametrize(
        "value, value_type, target, expected_value_preview, expected_data_schema",
        [(
            [
                {
                    "test":
                    "test",
                    "num":
                    10,
                    "bool":
                    True,
                    "test_objects": [{
                        "test_object_name": "some_name",
                        "test_id": 12345
                    }],
                },
                {
                    "test":
                    "test_2",
                    "num":
                    20,
                    "bool":
                    False,
                    "test_objects": [{
                        "test_object_name": "some_other_name",
                        "test_id": 56789
                    }],
                },
            ],
            ListValueType(),
            None,
            '[{"test": "test", "num": 10, "bool": true, "test_objects": '
            '[{"test_object_name": "some_name", "test_id": 12345}]}, {"test": "test_2", '
            '"num": 20, "bool": false, "test_objects": [{"test_object_name": '
            '"some_other_name", "test_id": 56789}]}]',
            {
                "columns": ["bool", "num", "test", "test_objects"],
                "dtypes": {
                    "bool": "<class 'bool'>",
                    "num": "<class 'int'>",
                    "test": "<class 'str'>",
                    "test_objects": "<class 'list'>",
                },
                "shape": (2, 4),
                "size.bytes": 56,
                "type": "List",
            },
        )],
    )
    def test_get_value_meta_list_of_dict_with_list_of_flat_dict(
            self, value, value_type, target, expected_value_preview,
            expected_data_schema):
        self.test_get_value_meta(value, value_type, target,
                                 expected_value_preview, expected_data_schema)

    @pytest.mark.parametrize(
        "value, value_type, target, expected_value_preview, expected_data_schema",
        [(
            [
                {
                    "test":
                    "test",
                    "num":
                    10,
                    "bool":
                    True,
                    "test_object": {
                        "test_object_name": "some_other_name",
                        "test_id": 56789,
                        "inner": {
                            "foo": "bar_3"
                        },
                    },
                    "test_objects": [
                        {
                            "test_object_name": "some_name",
                            "test_id": 12345,
                            "inner": {
                                "foo": "bar"
                            },
                        },
                        {
                            "test_object_name": "some_name_1",
                            "test_id": 54321,
                            "inner": {
                                "foo": "bar_2"
                            },
                        },
                    ],
                },
                {
                    "test":
                    "test_2",
                    "num":
                    20,
                    "bool":
                    False,
                    "test_object": {
                        "test_object_name": "some_name",
                        "test_id": 11111,
                        "inner": {
                            "foo": "bar_6"
                        },
                    },
                    "test_objects": [
                        {
                            "test_object_name": "some_other_name",
                            "test_id": 56789,
                            "inner": {
                                "foo": "bar_3"
                            },
                        },
                        {
                            "test_object_name": "some_other_name",
                            "test_id": 98765,
                            "inner": {
                                "foo": "bar_4"
                            },
                        },
                    ],
                },
            ],
            ListValueType(),
            None,
            '[{"test": "test", "num": 10, "bool": true, "test_object": '
            '{"test_object_name": "some_other_name", "test_id": 56789, "inner": {"foo": '
            '"bar_3"}}, "test_objects": [{"test_object_name": "some_name", "test_id": '
            '12345, "inner": {"foo": "bar"}}, {"test_object_name": "some_name_1", '
            '"test_id": 54321, "inner": {"foo": "bar_2"}}]}, {"test": "test_2", "num": '
            '20, "bool": false, "test_object": {"test_object_name": "some_name", '
            '"test_id": 11111, "inner": {"foo": "bar_6"}}, "test_objects": '
            '[{"test_object_name": "some_other_name", "test_id": 56789, "inner": {"foo": '
            '"bar_3"}}, {"test_object_name": "some_other_name", "test_id": 98765, '
            '"inner": {"foo": "bar_4"}}]}]',
            {
                "columns": [
                    "bool",
                    "num",
                    "test",
                    "test_object",
                    "test_object.inner.foo",
                    "test_object.test_id",
                    "test_object.test_object_name",
                    "test_objects",
                ],
                "dtypes": {
                    "bool": "<class 'bool'>",
                    "num": "<class 'int'>",
                    "test": "<class 'str'>",
                    "test_object": "<class 'dict'>",
                    "test_object.inner.foo": "<class 'str'>",
                    "test_object.test_id": "<class 'int'>",
                    "test_object.test_object_name": "<class 'str'>",
                    "test_objects": "<class 'list'>",
                },
                "shape": (2, 8),
                "size.bytes":
                56,
                "type":
                "List",
            },
        )],
    )
    def test_get_value_meta_list_of_dict_with_list_of_nested_dicts(
            self, value, value_type, target, expected_value_preview,
            expected_data_schema):
        self.test_get_value_meta(value, value_type, target,
                                 expected_value_preview, expected_data_schema)

    @pytest.mark.parametrize(
        "value, value_type, target, expected_value_preview, expected_data_schema",
        [(
            [
                {
                    "test":
                    "test",
                    "num":
                    10,
                    "bool":
                    True,
                    "test_object": {
                        "test_object_name": "some_other_name",
                        "test_id": 56789,
                        "inner": {
                            "foo": "bar_3"
                        },
                    },
                    "test_objects": [
                        {
                            "test_object_name": "some_name",
                            "test_id": 12345,
                            "inner": {
                                "foo": "bar"
                            },
                        },
                        {
                            "test_object_name": "some_name_1",
                            "test_id": 54321,
                            "inner": {
                                "foo": "bar_2"
                            },
                        },
                    ],
                },
                {
                    "test_1":
                    "test_2",
                    "num":
                    20,
                    "bool":
                    False,
                    "test_object": {
                        "test_object_name": "some_name",
                        "test_id": 11111,
                        "inner": {
                            "foo": "bar_6",
                            "bar": "foo"
                        },
                    },
                    "test_objects": [
                        {
                            "test_object_name": "some_other_name",
                            "test_id": 56789,
                            "inner": {
                                "foo": "bar_3"
                            },
                        },
                        {
                            "test_object_name": "some_other_name",
                            "test_id": 98765,
                            "inner": {
                                "foo": "bar_4"
                            },
                        },
                    ],
                },
            ],
            ListValueType(),
            None,
            '[{"test": "test", "num": 10, "bool": true, "test_object": '
            '{"test_object_name": "some_other_name", "test_id": 56789, "inner": {"foo": '
            '"bar_3"}}, "test_objects": [{"test_object_name": "some_name", "test_id": '
            '12345, "inner": {"foo": "bar"}}, {"test_object_name": "some_name_1", '
            '"test_id": 54321, "inner": {"foo": "bar_2"}}]}, {"test_1": "test_2", "num": '
            '20, "bool": false, "test_object": {"test_object_name": "some_name", '
            '"test_id": 11111, "inner": {"foo": "bar_6", "bar": "foo"}}, "test_objects": '
            '[{"test_object_name": "some_other_name", "test_id": 56789, "inner": {"foo": '
            '"bar_3"}}, {"test_object_name": "some_other_name", "test_id": 98765, '
            '"inner": {"foo": "bar_4"}}]}]',
            {
                "columns": [
                    "bool",
                    "num",
                    "test",
                    "test_1",
                    "test_object",
                    "test_object.inner.bar",
                    "test_object.inner.foo",
                    "test_object.test_id",
                    "test_object.test_object_name",
                    "test_objects",
                ],
                "dtypes": {
                    "bool": "<class 'bool'>",
                    "num": "<class 'int'>",
                    "test": "<class 'str'>",
                    "test_1": "<class 'str'>",
                    "test_object": "<class 'dict'>",
                    "test_object.inner.bar": "<class 'str'>",
                    "test_object.inner.foo": "<class 'str'>",
                    "test_object.test_id": "<class 'int'>",
                    "test_object.test_object_name": "<class 'str'>",
                    "test_objects": "<class 'list'>",
                },
                "shape": (2, 10),
                "size.bytes":
                56,
                "type":
                "List",
            },
        )],
    )
    def test_get_value_meta_list_of_dict_with_list_of_nested_dicts_with_different_keys(
            self, value, value_type, target, expected_value_preview,
            expected_data_schema):
        self.test_get_value_meta(value, value_type, target,
                                 expected_value_preview, expected_data_schema)

    @pytest.mark.parametrize(
        "value, value_type, target, expected_value_preview",
        [(
            [
                {
                    "a": "a",
                    "b": "b"
                },
                {
                    "a": "c",
                    "b": "d"
                },
                {
                    "a": "e",
                    "b": "f"
                },
                {
                    "a": "g",
                    "b": "h"
                },
                {
                    "a": "i",
                    "b": "j"
                },
                {
                    "a": "k",
                    "b": "l"
                },
                {
                    "a": "m",
                    "b": "n"
                },
                {
                    "a": "o",
                    "b": "p"
                },
                {
                    "a": "q",
                    "b": "r"
                },
                {
                    "a": "s",
                    "b": "t"
                },
                {
                    "a": "u",
                    "b": "v"
                },
                {
                    "a": "w",
                    "b": "x"
                },
                {
                    "a": "y",
                    "b": "z"
                },
            ],
            ListValueType(),
            None,
            '[{"a": "a", "b": "b"}, {"a": "c", "b": "d"}, {"a": "e", "b": "f"}, {"a": '
            '"g", "b": "h"}, {"a": "i", "b": "j"}, {"a": "k", "b": "l"}, {"a": "m", "b": '
            '"n"}, {"a": "o", "b": "p"}, {"a": "q", "b": "r"}, {"a": "s", "b": "t"}]',
        )],
    )
    def test_get_value_meta_preview_size_default_max_elements(
            self, value, value_type, target, expected_value_preview):
        tracking_config = TrackingConfig.from_databand_context()
        tracking_config.value_reporting_strategy = ValueTrackingLevel.ALL

        result = get_value_meta(
            value,
            ValueMetaConf(),
            tracking_config,
            value_type=value_type,
            target=target,
        )

        assert result.value_preview == expected_value_preview

    @pytest.mark.parametrize(
        "value, value_type, target, expected_value_preview",
        [(
            [
                {
                    "a": "a",
                    "b": "b"
                },
                {
                    "a": "c",
                    "b": "d"
                },
                {
                    "a": "e",
                    "b": "f"
                },
                {
                    "a": "g",
                    "b": "h"
                },
                {
                    "a": "i",
                    "b": "j"
                },
                {
                    "a": "k",
                    "b": "l"
                },
                {
                    "a": "m",
                    "b": "n"
                },
                {
                    "a": "o",
                    "b": "p"
                },
                {
                    "a": "q",
                    "b": "r"
                },
                {
                    "a": "s",
                    "b": "t"
                },
                {
                    "a": "u",
                    "b": "v"
                },
                {
                    "a": "w",
                    "b": "x"
                },
                {
                    "a": "y",
                    "b": "z"
                },
            ],
            ListValueType(),
            None,
            '[{"a": "a", "b": "b"}, {"a": "c", "b": "d"}, {"a": "e", "b": "f"}, {"a": '
            '"g", "b": "h"}, {"a": "i", "b": "j"}]',
        )],
    )
    def test_get_value_meta_preview_size_config_max_elements(
            self, value, value_type, target, expected_value_preview):
        tracking_config = TrackingConfig.from_databand_context()
        tracking_config.value_reporting_strategy = ValueTrackingLevel.ALL

        result = get_value_meta(
            value,
            ValueMetaConf(log_preview_size=5),
            tracking_config,
            value_type=value_type,
            target=target,
        )

        assert result.value_preview == expected_value_preview

    @pytest.mark.parametrize(
        "value, value_type, target, expected_value_preview",
        [([{
            "a": "a",
            "b": "b"
        }], ListValueType(), None, '[{"a": "a", "b": "b"}]')],
    )
    def test_get_value_meta_preview_small_size(self, value, value_type, target,
                                               expected_value_preview):
        tracking_config = TrackingConfig.from_databand_context()
        tracking_config.value_reporting_strategy = ValueTrackingLevel.ALL

        result = get_value_meta(
            value,
            ValueMetaConf(),
            tracking_config,
            value_type=value_type,
            target=target,
        )

        assert result.value_preview == expected_value_preview
                    log_histograms=False,
                ),
            ),
        ],
    )
    def test_summing(self, meta_conf_list, expected):
        assert reduce(lambda x, y: x.merge_if_none(y), meta_conf_list) == expected


@pytest.mark.parametrize(
    "level, value_type, target, expected",
    [
        # ValueTrackingLevel.NONE
        (
            ValueTrackingLevel.NONE,
            ObjectValueType(),
            None,
            ValueMetaConf(
                log_preview=False, log_histograms=False, log_stats=False, log_size=False
            ),
        ),
        (
            ValueTrackingLevel.NONE,
            LazyValueType(),
            None,
            ValueMetaConf(
                log_preview=False, log_histograms=False, log_stats=False, log_size=False
            ),
        ),
        (
            ValueTrackingLevel.NONE,