def test_get_output(self):
        jvm = Mock()
        baos = ByteArrayOutputStream()
        baos.jvm = jvm

        baos.get_output()
        baos.jvm_obj.toString().strip.assert_called()
Example #2
0
    def test_get_output(self):
        jvm = Mock()
        baos = ByteArrayOutputStream()
        baos.jvm = jvm

        baos.get_output()
        baos.jvm_obj.toString().strip.assert_called()
    def test_output(self):
        check = Check(self.df).hasUniqueKey("_1").hasUniqueKey("_1", "_2")
        baos = ByteArrayOutputStream()
        reporter = ConsoleReporter(baos)
        check.run([reporter])
        expected_output = """
\x1b[34mChecking [_1: bigint, _2: string]\x1b[0m
\x1b[34mIt has a total number of 2 columns and 3 rows.\x1b[0m
\x1b[31m- Column _1 is not a key (1 non-unique tuple).\x1b[0m
\x1b[32m- Columns _1, _2 are a key.\x1b[0m
""".strip()
        self.assertEqual(baos.get_output(), expected_output)
    def test_output(self):
        check = Check(self.df).hasUniqueKey("_1").hasUniqueKey("_1", "_2")
        baos = ByteArrayOutputStream()
        reporter = ConsoleReporter(baos)
        check.run([reporter])
        expected_output = """
\x1b[34mChecking [_1: bigint, _2: string]\x1b[0m
\x1b[34mIt has a total number of 2 columns and 3 rows.\x1b[0m
\x1b[31m- Column _1 is not a key (1 non-unique tuple).\x1b[0m
\x1b[32m- Columns _1, _2 are a key.\x1b[0m
""".strip()
        self.assertEqual(baos.get_output(), expected_output)
Example #5
0
    def test_jvm_obj(self):
        jvm = Mock()

        baos = ByteArrayOutputStream()
        with self.assertRaises(AttributeError):
            jvm_obj = baos.jvm_obj

        # check that on the second call ByteArrayOutputStream returns the same jvm_obj
        jvm.java.io.ByteArrayOutputStream = Mock(side_effects=[1, 2])
        baos.jvm = jvm
        jvm_obj1 = baos.jvm_obj
        jvm_obj2 = baos.jvm_obj
        self.assertEqual(jvm_obj1, jvm_obj2)
    def test_output(self):
        check = Check(self.df).hasUniqueKey("_1").hasUniqueKey("_1", "_2")
        baos = ByteArrayOutputStream()
        reporter = MarkdownReporter(baos)
        check.run([reporter])
        expected_output = """
**Checking [_1: bigint, _2: string]**

It has a total number of 2 columns and 3 rows.

- *FAILURE*: Column _1 is not a key (1 non-unique tuple).
- *SUCCESS*: Columns _1, _2 are a key.
""".strip()
        self.assertEqual(baos.get_output(), expected_output)
    def test_output(self):
        check = Check(self.df).hasUniqueKey("_1").hasUniqueKey("_1", "_2")
        baos = ByteArrayOutputStream()
        reporter = MarkdownReporter(baos)
        check.run([reporter])
        expected_output = """
**Checking [_1: bigint, _2: string]**

It has a total number of 2 columns and 3 rows.

- *FAILURE*: Column _1 is not a key (1 non-unique tuple).
- *SUCCESS*: Columns _1, _2 are a key.
""".strip()
        self.assertEqual(baos.get_output(), expected_output)
    def test_jvm_obj(self):
        jvm = Mock()

        baos = ByteArrayOutputStream()
        with self.assertRaises(AttributeError):
            jvm_obj = baos.jvm_obj

        # check that on the second call ByteArrayOutputStream returns the same jvm_obj
        jvm.java.io.ByteArrayOutputStream = Mock(
            side_effects=[1, 2]
        )
        baos.jvm = jvm
        jvm_obj1 = baos.jvm_obj
        jvm_obj2 = baos.jvm_obj
        self.assertEqual(jvm_obj1, jvm_obj2)
    def test_output(self):
        with patch("pyddq.reporters.get_field") as get_field:
            baos = ByteArrayOutputStream()
            baos.jvm = self.df._sc._jvm

            get_field.return_value = baos.jvm_obj
            check = Check(self.df).hasUniqueKey("_1").hasUniqueKey("_1", "_2")
            z = Mock()
            reporter = ZeppelinReporter(z)
            check.run([reporter])
            expected_output = """
%html
</p>
<h4>Checking [_1: bigint, _2: string]</h4>
<h5>It has a total number of 2 columns and 3 rows.</h5>
<table>
<tr><td style="padding:3px">&#10060;</td><td style="padding:3px">Column _1 is not a key (1 non-unique tuple).</td></tr>
<tr><td style="padding:3px">&#9989;</td><td style="padding:3px">Columns _1, _2 are a key.</td></tr>
</table>
<p hidden>
""".strip()
            self.assertEqual(baos.get_output(), expected_output)
    def test_output(self):
        with patch("pyddq.reporters.get_field") as get_field:
            baos = ByteArrayOutputStream()
            baos.jvm = self.df._sc._jvm

            get_field.return_value = baos.jvm_obj
            check = Check(self.df).hasUniqueKey("_1").hasUniqueKey("_1", "_2")
            z = Mock()
            reporter = ZeppelinReporter(z)
            check.run([reporter])
            expected_output = """
%html
</p>
<h4>Checking [_1: bigint, _2: string]</h4>
<h5>It has a total number of 2 columns and 3 rows.</h5>
<table>
<tr><td style="padding:3px">&#10060;</td><td style="padding:3px">Column _1 is not a key (1 non-unique tuple).</td></tr>
<tr><td style="padding:3px">&#9989;</td><td style="padding:3px">Columns _1, _2 are a key.</td></tr>
</table>
<p hidden>
""".strip()
            self.assertEqual(baos.get_output(), expected_output)
Example #11
0
    def run(self, reporters=None):
        """
        Runs check with all the previously specified constraints and report to
        every reporter passed as an argument
        Args:
            reporters (List[reporters.Reporter]): iterable of reporters
                to produce output on the check result. If not specified,
                reporters.ConsoleReporter is used
        Returns: None
        """
        baos = None
        if not reporters:
            baos = ByteArrayOutputStream()
            reporters = [ConsoleReporter(baos)]

        jvm_reporters = jc.iterable_to_scala_list(
            self._jvm,
            [reporter.get_jvm_reporter(self._jvm) for reporter in reporters])
        self.jvmCheck.run(jvm_reporters)

        if baos:
            print(baos.get_output())
Example #12
0
    def run(self, reporters=None):
        """
        Runs check with all the previously specified constraints and report to
        every reporter passed as an argument
        Args:
            reporters (List[reporters.Reporter]): iterable of reporters
                to produce output on the check result. If not specified,
                reporters.ConsoleReporter is used
        Returns: None
        """
        baos = None
        if not reporters:
            baos = ByteArrayOutputStream()
            reporters = [ConsoleReporter(baos)]

        jvm_reporters = jc.iterable_to_scala_list(
            self._jvm,
            [reporter.get_jvm_reporter(self._jvm) for reporter in reporters]
        )
        self.jvmCheck.run(jvm_reporters)

        if baos:
            print baos.get_output()
Example #13
0
 def setUp(self):
     self.spark = SparkSession.builder.appName("Testing").master(
         "local[4]").getOrCreate()
     self.reporter = MarkdownReporter(ByteArrayOutputStream())
 def setUp(self):
     self.sc = SparkContext()
     self.sqlContext = SQLContext(self.sc)
     self.reporter = MarkdownReporter(ByteArrayOutputStream())