Example #1
0
 def test_run_invalid_scan(self, monkeypatch):
     scan_def = "invalid scan definition"
     df = SparkSession.builder.getOrCreate().createDataFrame([{
         "id": 123,
         "value": "foo"
     }, {
         "id": 456,
         "value": "bar"
     }])
     soda_spark_scan_task = SodaSparkScan(scan_def=scan_def, df=df)
     with pytest.raises(AttributeError):
         soda_spark_scan_task.run()
Example #2
0
    def test_run_no_scan(self):
        df = SparkSession.builder.getOrCreate().createDataFrame([{
            "id": 123,
            "value": "foo"
        }, {
            "id": 456,
            "value": "bar"
        }])
        soda_spark_scan_task = SodaSparkScan(df=df)

        with pytest.raises(ValueError) as exc:
            soda_spark_scan_task.run()
        assert "scan_def cannot be None" in str(exc)
Example #3
0
 def test_run_invalid_df(self, monkeypatch):
     scan_def = """
     table_name: demodata
     metrics:
     - row_count
     - max
     - min_length
     tests:
     - row_count > 0
     """
     df = "not a valid df"
     soda_spark_scan_task = SodaSparkScan(scan_def=scan_def, df=df)
     with pytest.raises(AttributeError):
         soda_spark_scan_task.run()
Example #4
0
    def test_run_valid_scan_and_df_with_measurements(self):
        scan_def = """
        table_name: demodata
        metrics:
        - row_count
        - max
        - min_length
        tests:
        - row_count > 0
        """
        df = SparkSession.builder.getOrCreate().createDataFrame([{
            "id": 123,
            "value": "foo"
        }, {
            "id": 456,
            "value": "bar"
        }])
        soda_spark_scan_task = SodaSparkScan(scan_def=scan_def, df=df)
        res = soda_spark_scan_task.run()

        assert hasattr(res, "measurements")
Example #5
0
    def test_construction_provide_scan_and_df(self):
        expected_scan_def = "/foo/bar.yaml"
        expected_df = SparkSession.builder.getOrCreate().createDataFrame([{
            "id":
            123,
            "value":
            "foo"
        }, {
            "id":
            456,
            "value":
            "bar"
        }])
        soda_spark_scan_task = SodaSparkScan(scan_def=expected_scan_def,
                                             df=expected_df)

        assert soda_spark_scan_task.scan_def == expected_scan_def
        assert soda_spark_scan_task.df == expected_df
Example #6
0
 def test_run_no_df(self):
     soda_spark_scan_task = SodaSparkScan(scan_def="/foo/bar.yaml")
     with pytest.raises(ValueError) as exc:
         soda_spark_scan_task.run()
     assert "df cannot be None" in str(exc)
Example #7
0
    def test_construction_no_scan_and_df(self):
        soda_spark_scan_task = SodaSparkScan()

        assert soda_spark_scan_task.scan_def is None
        assert soda_spark_scan_task.df is None