Beispiel #1
0
    def test_validate_numeric_stats(self, *mocks):
        options = ProfilerOptions()
        numerical_options = {
            "histogram_and_quantiles.is_enabled": False,
            "min.is_enabled": False,
            "max.is_enabled": False,
            "sum.is_enabled": False,
            "variance.is_enabled": True
        }
        # Asserts error since sum must be toggled on if variance is
        expected_error = (
            "ProfilerOptions.structured_options.int: The numeric stats must "
            "toggle on the sum if the variance is toggled on.\n"
            "ProfilerOptions.structured_options.float: The numeric stats must "
            "toggle on the sum if the variance is toggled on.\n"
            "ProfilerOptions.structured_options.text: The numeric stats must "
            "toggle on the sum if the variance is toggled on.")
        options.set(numerical_options)

        with self.assertRaisesRegex(ValueError, expected_error):
            options.validate()

        # test warns if is_numeric_stats_enabled = False
        numerical_options = {
            "is_numeric_stats_enabled": False,
        }
        options.set(numerical_options)
        with self.assertWarnsRegex(
                UserWarning, 'ProfilerOptions.structured_options.int.'
                'numeric_stats: The numeric stats are '
                'completely disabled.'):
            options.validate()
Beispiel #2
0
    def test_data_labeler(self, *mocks):
        options = ProfilerOptions()
        options.structured_options.data_labeler.data_labeler_dirpath \
            = "Test_Dirpath"
        options.structured_options.data_labeler.max_sample_size = 50
        options.structured_options.multiprocess.is_enabled = False

        profile = Profiler(self.data, profiler_options=options)

        # Mock[0] is the Datalabeler Object mock
        mocks[0].assert_called_with(dirpath='Test_Dirpath',
                                    labeler_type='structured',
                                    load_options=None)
        actual_sample_size = profile._profile[0].profiles['data_label_profile'] \
            ._profiles["data_labeler"]._max_sample_size
        self.assertEqual(actual_sample_size, 50)

        data_labeler = mock.Mock(spec=BaseDataLabeler)
        data_labeler.reverse_label_mapping = dict()
        data_labeler.model.num_labels = 0
        options.set({'data_labeler.data_labeler_object': data_labeler})
        with self.assertWarnsRegex(
                UserWarning, "The data labeler passed in will be used,"
                " not through the directory of the default"
                " model"):
            options.validate()

        profile = Profiler(self.data, profiler_options=options)
        self.assertEqual(
            data_labeler,
            # profile, col prof, compiler
            (
                profile._profile[0].profiles['data_label_profile'].
                # column profiler
                _profiles["data_labeler"].data_labeler))
Beispiel #3
0
    def test_improper_profile_options(self, *mocks):
        with self.assertRaisesRegex(
                ValueError, "The profile options must be passed as a "
                             "ProfileOptions object."):
            profile = Profiler(self.data, options="Strings are not accepted")

        with self.assertRaisesRegex(
                ValueError, "ProfilerOptions.structured_options.text.max."
                            "is_enabled must be a Boolean."):
            profile_options = ProfilerOptions()
            profile_options.structured_options.text.max.is_enabled = "String"
            profile_options.validate()
Beispiel #4
0
    def test_validate(self, *mocks):
        options = ProfilerOptions()

        options.structured_options.data_labeler.is_enabled = "Invalid"
        options.structured_options.data_labeler.data_labeler_dirpath = 5
        options.structured_options.int.max = "Invalid"

        expected_error = (
            "ProfilerOptions.structured_options.int.max must be a "
            "BooleanOption.\n"
            "ProfilerOptions.structured_options.data_labeler.is_enabled must be"
            " a Boolean.\n"
            "ProfilerOptions.structured_options.data_labeler."
            "data_labeler_dirpath must be a string.")
        with self.assertRaisesRegex(ValueError, expected_error):
            options.validate()