def test_predict(self, mock_stdout):
        parameters = {
            'regex_patterns': {
                'PAD': [r'\W'],
                'BACKGROUND': [r'\w']
            },
            'ignore_case': True,
            'default_label': 'BACKGROUND',
        }
        model = RegexModel(label_mapping=self.label_mapping,
                           parameters=parameters)

        # test only pad and background separate
        expected_output = {
            'pred': [
                np.array([[1, 0, 0], [1, 0, 0], [1, 0, 0]]),
                np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0], [0, 1, 0],
                          [0, 1, 0]])
            ]
        }
        model_output = model.predict(['   ', 'hello'])
        self.assertIn('pred', model_output)
        for expected, output in zip(expected_output['pred'],
                                    model_output['pred']):
            self.assertTrue(np.array_equal(expected, output))

        # check verbose printing
        self.assertIn('Data Samples', mock_stdout.getvalue())

        # test pad with background
        expected_output = {
            'pred': [
                np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0],
                          [1, 0, 0]])
            ]
        }
        model_output = model.predict([' h w.'])
        self.assertIn('pred', model_output)
        for expected, output in zip(expected_output['pred'],
                                    model_output['pred']):
            self.assertTrue(np.array_equal(expected, output))

        # test show confidences
        expected_output = {
            'pred': [
                np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0],
                          [1, 0, 0]])
            ],
            'conf': [
                np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0],
                          [1, 0, 0]])
            ]
        }
        model_output = model.predict([' h w.'], show_confidences=True)
        self.assertIn('pred', model_output)
        self.assertIn('conf', model_output)
        for expected, output in zip(expected_output['pred'],
                                    model_output['pred']):
            self.assertTrue(np.array_equal(expected, output))
        for expected, output in zip(expected_output['conf'],
                                    model_output['conf']):
            self.assertTrue(np.array_equal(expected, output))

        # test verbose = False
        # clear stdout
        mock_stdout.seek(0)
        mock_stdout.truncate(0)
        model_output = model.predict(['hello world.'], verbose=False)
        self.assertNotIn('Data Samples', mock_stdout.getvalue())
Example #2
0
    def test_predict(self, mock_stdout):
        parameters = {
            "regex_patterns": {
                "PAD": [r"\W"],
                "UNKNOWN": [r"\w"]
            },
            "ignore_case": True,
            "default_label": "UNKNOWN",
        }
        model = RegexModel(label_mapping=self.label_mapping,
                           parameters=parameters)

        # test only pad and background separate
        expected_output = {
            "pred": [
                np.array([[1, 0, 0], [1, 0, 0], [1, 0, 0]]),
                np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0], [0, 1, 0],
                          [0, 1, 0]]),
            ]
        }
        with self.assertLogs("DataProfiler.labelers.regex_model",
                             level="INFO") as logs:
            model_output = model.predict(["   ", "hello"])
        self.assertIn("pred", model_output)
        for expected, output in zip(expected_output["pred"],
                                    model_output["pred"]):
            self.assertTrue(np.array_equal(expected, output))

        # check verbose printing
        self.assertIn("Data Samples", mock_stdout.getvalue())
        # check verbose logging
        self.assertTrue(len(logs.output))

        # test pad with background
        expected_output = {
            "pred": [
                np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0],
                          [1, 0, 0]])
            ]
        }
        model_output = model.predict([" h w."])
        self.assertIn("pred", model_output)
        for expected, output in zip(expected_output["pred"],
                                    model_output["pred"]):
            self.assertTrue(np.array_equal(expected, output))

        # test show confidences
        expected_output = {
            "pred": [
                np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0],
                          [1, 0, 0]])
            ],
            "conf": [
                np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0],
                          [1, 0, 0]])
            ],
        }
        model_output = model.predict([" h w."], show_confidences=True)
        self.assertIn("pred", model_output)
        self.assertIn("conf", model_output)
        for expected, output in zip(expected_output["pred"],
                                    model_output["pred"]):
            self.assertTrue(np.array_equal(expected, output))
        for expected, output in zip(expected_output["conf"],
                                    model_output["conf"]):
            self.assertTrue(np.array_equal(expected, output))

        # clear stdout
        mock_stdout.seek(0)
        mock_stdout.truncate(0)

        # test verbose = False
        # Want to ensure no INFO logged
        with self.assertRaisesRegex(
                AssertionError,
                "no logs of level INFO or higher triggered "
                "on DataProfiler.labelers.regex_model",
        ):
            with self.assertLogs("DataProfiler.labelers.regex_model",
                                 level="INFO"):
                model.predict(["hello world."], verbose=False)

        # Not in stdout
        self.assertNotIn("Data Samples", mock_stdout.getvalue())
    def test_predict(self, mock_stdout):
        parameters = {
            'regex_patterns': {
                'PAD': [r'\W'],
                'UNKNOWN': [r'\w']
            },
            'ignore_case': True,
            'default_label': 'UNKNOWN',
        }
        model = RegexModel(label_mapping=self.label_mapping,
                           parameters=parameters)

        # test only pad and background separate
        expected_output = {
            'pred': [
                np.array([[1, 0, 0], [1, 0, 0], [1, 0, 0]]),
                np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0], [0, 1, 0],
                          [0, 1, 0]])
            ]
        }
        with self.assertLogs('DataProfiler.labelers.regex_model',
                             level='INFO') as logs:
            model_output = model.predict(['   ', 'hello'])
        self.assertIn('pred', model_output)
        for expected, output in zip(expected_output['pred'],
                                    model_output['pred']):
            self.assertTrue(np.array_equal(expected, output))

        # check verbose printing
        self.assertIn('Data Samples', mock_stdout.getvalue())
        # check verbose logging
        self.assertTrue(len(logs.output))

        # test pad with background
        expected_output = {
            'pred': [
                np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0],
                          [1, 0, 0]])
            ]
        }
        model_output = model.predict([' h w.'])
        self.assertIn('pred', model_output)
        for expected, output in zip(expected_output['pred'],
                                    model_output['pred']):
            self.assertTrue(np.array_equal(expected, output))

        # test show confidences
        expected_output = {
            'pred': [
                np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0],
                          [1, 0, 0]])
            ],
            'conf': [
                np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0],
                          [1, 0, 0]])
            ]
        }
        model_output = model.predict([' h w.'], show_confidences=True)
        self.assertIn('pred', model_output)
        self.assertIn('conf', model_output)
        for expected, output in zip(expected_output['pred'],
                                    model_output['pred']):
            self.assertTrue(np.array_equal(expected, output))
        for expected, output in zip(expected_output['conf'],
                                    model_output['conf']):
            self.assertTrue(np.array_equal(expected, output))

        # clear stdout
        mock_stdout.seek(0)
        mock_stdout.truncate(0)

        # test verbose = False
        # Want to ensure no INFO logged
        with self.assertRaisesRegex(
                AssertionError, 'no logs of level INFO or higher triggered '
                'on DataProfiler.labelers.regex_model'):
            with self.assertLogs('DataProfiler.labelers.regex_model',
                                 level='INFO'):
                model.predict(['hello world.'], verbose=False)

        # Not in stdout
        self.assertNotIn('Data Samples', mock_stdout.getvalue())