def test_usage_statistics_handler_validate_message_success(
        caplog, in_memory_data_context_config_usage_stats_enabled,
        sample_partial_message):

    # caplog default is WARNING and above, we want to see DEBUG level messages for this test
    caplog.set_level(
        level=logging.DEBUG,
        logger="great_expectations.core.usage_statistics.usage_statistics",
    )

    context: BaseDataContext = BaseDataContext(
        in_memory_data_context_config_usage_stats_enabled)

    usage_statistics_handler = UsageStatisticsHandler(
        data_context=context,
        data_context_id=in_memory_data_context_config_usage_stats_enabled.
        anonymous_usage_statistics.data_context_id,
        usage_statistics_url=in_memory_data_context_config_usage_stats_enabled.
        anonymous_usage_statistics.usage_statistics_url,
    )

    assert (usage_statistics_handler._data_context_id ==
            "00000000-0000-0000-0000-000000000001")

    envelope = usage_statistics_handler.build_envelope(sample_partial_message)
    validated_message = usage_statistics_handler.validate_message(
        envelope, anonymized_usage_statistics_record_schema)

    assert validated_message
    assert not usage_stats_invalid_messages_exist(caplog.messages)
def test_usage_statistics_handler_build_envelope(
        in_memory_data_context_config_usage_stats_enabled,
        sample_partial_message):
    """This test is for a happy path only but will fail if there is an exception thrown in build_envelope"""

    context: BaseDataContext = BaseDataContext(
        in_memory_data_context_config_usage_stats_enabled)

    usage_statistics_handler = UsageStatisticsHandler(
        data_context=context,
        data_context_id=in_memory_data_context_config_usage_stats_enabled.
        anonymous_usage_statistics.data_context_id,
        usage_statistics_url=in_memory_data_context_config_usage_stats_enabled.
        anonymous_usage_statistics.usage_statistics_url,
    )

    assert (usage_statistics_handler._data_context_id ==
            "00000000-0000-0000-0000-000000000001")

    envelope = usage_statistics_handler.build_envelope(sample_partial_message)
    required_keys = [
        "event",
        "event_payload",
        "version",
        "ge_version",
        "data_context_id",
        "data_context_instance_id",
        "event_time",
    ]
    assert all([key in envelope.keys() for key in required_keys])

    assert envelope["version"] == "1.0.0"
    assert envelope[
        "data_context_id"] == "00000000-0000-0000-0000-000000000001"
def test_in_memory_runtime_context_configured_with_usage_stats_handler(
        mock_emit, in_memory_runtime_context, test_pandas_df):
    context: DataContext = in_memory_runtime_context

    # manually set usage statistics handler
    context._usage_statistics_handler = UsageStatisticsHandler(
        data_context=context,
        data_context_id=context._data_context_id,
        usage_statistics_url="http://fakeendpoint.com",
    )

    catch_exceptions: bool = False  # expect exceptions to be raised
    result_format: dict = {
        "result_format": "SUMMARY",
    }
    runtime_environment_arguments = {
        "catch_exceptions": catch_exceptions,
        "result_format": result_format,
    }

    suite: ExpectationSuite = in_memory_runtime_context.create_expectation_suite(
        "test_suite", overwrite_existing=True)

    expectation_configuration: ExpectationConfiguration

    expectation_meta: dict = {"Notes": "Some notes"}

    expectation_arguments_without_meta: dict

    expectation_arguments_column: dict = {
        "include_config": True,
        "column": "Name",  # use correct column to avoid error
    }
    expectation_arguments_without_meta = dict(**runtime_environment_arguments,
                                              **expectation_arguments_column)
    expectation_configuration = ExpectationConfiguration(
        expectation_type="expect_column_values_to_not_be_null",
        kwargs=expectation_arguments_without_meta,
        meta=expectation_meta,
    )
    suite.add_expectation(expectation_configuration=expectation_configuration)

    # emit 1 from add_expectation
    assert mock_emit.call_count == 1
    assert mock_emit.call_args_list == [
        mock.call({
            "event": "expectation_suite.add_expectation",
            "event_payload": {},
            "success": True,
        })
    ]
Ejemplo n.º 4
0
def test_get_profiler_run_usage_statistics_with_handler_invalid_payload(
    mock_data_context: mock.MagicMock, ):
    # Ensure that real handler gets passed down by the context
    handler: UsageStatisticsHandler = UsageStatisticsHandler(
        mock_data_context, "my_id", "my_url")
    mock_data_context.usage_statistics_handler = handler

    profiler: RuleBasedProfiler = RuleBasedProfiler(
        name="my_profiler", config_version=1.0, data_context=mock_data_context)

    payload: dict = get_profiler_run_usage_statistics(profiler=profiler)

    # Payload won't pass schema validation due to a lack of rules but we can confirm that it is anonymized
    assert payload == {
        "anonymized_name": "a0061ec021855cd2b3a994dd8d90fe5d",
        "config_version": 1.0,
        "rule_count": 0,
        "variable_count": 0,
    }
Ejemplo n.º 5
0
def test_get_profiler_run_usage_statistics_with_handler_valid_payload(
    mock_data_context: mock.MagicMock, ):
    # Ensure that real handler gets passed down by the context
    handler: UsageStatisticsHandler = UsageStatisticsHandler(
        mock_data_context, "my_id", "my_url")
    mock_data_context.usage_statistics_handler = handler

    profiler: RuleBasedProfiler = RuleBasedProfiler(
        name="my_profiler", config_version=1.0, data_context=mock_data_context)

    override_rules: Dict[str, dict] = {
        "my_override_rule": {
            "domain_builder": {
                "class_name":
                "ColumnDomainBuilder",
                "module_name":
                "great_expectations.rule_based_profiler.domain_builder",
            },
            "parameter_builders": [
                {
                    "class_name": "MetricMultiBatchParameterBuilder",
                    "module_name":
                    "great_expectations.rule_based_profiler.parameter_builder",
                    "name": "my_parameter",
                    "metric_name": "my_metric",
                },
                {
                    "class_name":
                    "NumericMetricRangeMultiBatchParameterBuilder",
                    "module_name":
                    "great_expectations.rule_based_profiler.parameter_builder",
                    "name": "my_other_parameter",
                    "metric_name": "my_other_metric",
                },
            ],
            "expectation_configuration_builders": [
                {
                    "class_name": "DefaultExpectationConfigurationBuilder",
                    "module_name":
                    "great_expectations.rule_based_profiler.expectation_configuration_builder",
                    "expectation_type":
                    "expect_column_pair_values_A_to_be_greater_than_B",
                    "column_A": "$domain.domain_kwargs.column_A",
                    "column_B": "$domain.domain_kwargs.column_B",
                    "my_one_arg": "$parameter.my_parameter.value[0]",
                    "meta": {
                        "details": {
                            "my_parameter_estimator":
                            "$parameter.my_parameter.details",
                            "note":
                            "Important remarks about estimation algorithm.",
                        },
                    },
                },
                {
                    "class_name": "DefaultExpectationConfigurationBuilder",
                    "module_name":
                    "great_expectations.rule_based_profiler.expectation_configuration_builder",
                    "expectation_type": "expect_column_min_to_be_between",
                    "column": "$domain.domain_kwargs.column",
                    "my_another_arg": "$parameter.my_other_parameter.value[0]",
                    "meta": {
                        "details": {
                            "my_other_parameter_estimator":
                            "$parameter.my_other_parameter.details",
                            "note":
                            "Important remarks about estimation algorithm.",
                        },
                    },
                },
            ],
        },
    }

    payload: dict = get_profiler_run_usage_statistics(profiler=profiler,
                                                      rules=override_rules)

    assert payload == {
        "anonymized_name":
        "a0061ec021855cd2b3a994dd8d90fe5d",
        "anonymized_rules": [{
            "anonymized_domain_builder": {
                "parent_class": "ColumnDomainBuilder"
            },
            "anonymized_expectation_configuration_builders": [
                {
                    "expectation_type":
                    "expect_column_pair_values_A_to_be_greater_than_B",
                    "parent_class": "DefaultExpectationConfigurationBuilder",
                },
                {
                    "expectation_type": "expect_column_min_to_be_between",
                    "parent_class": "DefaultExpectationConfigurationBuilder",
                },
            ],
            "anonymized_name":
            "bd8a8b4465a94b363caf2b307c080547",
            "anonymized_parameter_builders": [
                {
                    "anonymized_name": "25dac9e56a1969727bc0f90db6eaa833",
                    "parent_class": "MetricMultiBatchParameterBuilder",
                },
                {
                    "anonymized_name": "be5baa3f1064e6e19356f2168968cbeb",
                    "parent_class":
                    "NumericMetricRangeMultiBatchParameterBuilder",
                },
            ],
        }],
        "config_version":
        1.0,
        "rule_count":
        1,
        "variable_count":
        0,
    }