def test_forbidden_concurrent_sessions():
    with databricks_test.session() as dbrickstest:  # noqa: F841
        try:
            with databricks_test.session() as dbrickstest2:  # noqa: F841
                assert False, "should have failed"
        except SessionAlreadyExistsException:
            pass
Beispiel #2
0
def test_multiple_runs_in_same_session_1():
    with databricks_test.session() as dbrickstest:
        run_notebook("multiple_runs_notebook", 1, dbrickstest)
        run_notebook("multiple_runs_notebook", 2, dbrickstest)

    with databricks_test.session() as dbrickstest:
        run_notebook("multiple_runs_notebook", 3, dbrickstest)
Beispiel #3
0
def test_patch():
    with databricks_test.session() as dbrickstest:
        # Provide input and output location as widgets to notebook
        switcher = {
            "input": "input_value",
            "output": "output_value",
        }
        dbrickstest.dbutils.widgets.get.side_effect = lambda x: switcher.get(
            x, "")

        # Run notebook
        dbrickstest.run_notebook(".", "patch_notebook")

    with databricks_test.session() as dbrickstest:
        dbrickstest.run_notebook(".", "patch_notebook2")
Beispiel #4
0
def test_sqldw(monkeypatch):
    with databricks_test.session() as dbrickstest, TemporaryDirectory() as tmp:

        out_dir = f"{tmp}/out"

        # Mock SQL DW loader, creating a Spark DataFrame instead
        def mock_load(reader):
            return (dbrickstest.spark.range(10).withColumn(
                "age",
                F.col("id") * 6).withColumn("salary",
                                            F.col("id") * 10000))

        monkeypatch.setattr(pyspark.sql.readwriter.DataFrameReader, "load",
                            mock_load)

        # Mock SQL DW writer, writing to a local Parquet file instead
        def mock_save(writer):
            monkeypatch.undo()
            writer.format("parquet")
            writer.save(out_dir)

        monkeypatch.setattr(pyspark.sql.readwriter.DataFrameWriter, "save",
                            mock_save)

        # Run notebook
        dbrickstest.run_notebook(".", "sqldw_notebook")

        # Notebook produces a Parquet file (directory)
        resultDF = pd.read_parquet(out_dir)

        # Compare produced Parquet file and expected CSV file
        expectedDF = pd.read_csv("tests/sqldw_expected.csv")
        assert_frame_equal(expectedDF, resultDF, check_dtype=False)
Beispiel #5
0
def test_results_do_not_match():
    with databricks_test.session() as dbrickstest:
        actual_query = """
        SELECT col1,col2
        FROM
        (VALUES 
          (100,'foo'),
          (101,'bar'),
          (102,'baz')
        ) AS v (col1, col2)
        """

        expected_query = """
        SELECT col1,col2
        FROM
        (VALUES 
          (100,'foo'),
          (110,'bar'),
          (999,'qux')
        ) AS v (col1, col2)
        """

        with pytest.raises(Exception) as exception_message:
            dbrickstest.assert_queries_are_equal(actual_query, expected_query)

        assert str(exception_message.value).startswith(
            "the result sets did not match:")
def test_rows_returned():
    with databricks_test.session() as dbrickstest:
        query = """
        SELECT col1,col2
        FROM
        (VALUES 
          (100,'foo'),
          (101,'bar'),
          (102,'baz')
        ) AS v (col1, col2)
        ORDER BY col1
        """

        expected_message = """the result set was not empty:
+----+----+
|col1|col2|
+----+----+
|100 |foo |
|101 |bar |
|102 |baz |
+----+----+
"""
        with pytest.raises(Exception) as exception_message:
            dbrickstest.assert_query_returns_no_rows(query)

        assert str(exception_message.value) == expected_message
Beispiel #7
0
def test_deltalake_write():
    with databricks_test.session() as dbrickstest:
        with TemporaryDirectory() as tmp_dir:
            out_dir = f"{tmp_dir}/delta_out"

            # Provide input and output location as widgets to notebook
            switch = {
                "output": out_dir,
            }
            dbrickstest.dbutils.widgets.get.side_effect = lambda x: switch.get(
                x, "")

            # Run notebook
            dbrickstest.run_notebook(".", "deltalake_write_notebook")

            # Read delta
            df = dbrickstest.spark.read.format("delta").load(out_dir)

            # Validate dataframe contains the expected values
            rg = range(0, 5)
            for n in rg:
                assert df.filter(df["id"] == n).count() == 1

            # Validate dataframe contains no unexpected values
            assert df.count() == 5
Beispiel #8
0
def test_missing_result():
    with databricks_test.session() as dbrickstest:
        actual_query = """
        SELECT col1,col2
        FROM
        (VALUES 
          (100,'foo')
        ) AS v (col1, col2)
        """

        expected_query = """
        SELECT col1,col2
        FROM
        (VALUES 
          (100,'foo'),
          (101,'bar')
        ) AS v (col1, col2)
        """

        expected_message = """the result sets did not match:
+---+----+----+
|m  |col1|col2|
+---+----+----+
|=  |100 |foo |
|<  |101 |bar |
+---+----+----+
"""

        with pytest.raises(Exception) as exception_message:
            dbrickstest.assert_queries_are_equal(actual_query, expected_query)

        assert str(exception_message.value) == expected_message
Beispiel #9
0
def test_results_match():
    with databricks_test.session() as dbrickstest:
        query = """
        SELECT col1,col2
        FROM
        (VALUES 
          (100,'foo'),
          (101,'bar'),
          (102,'baz')
        ) AS v (col1, col2)
        """

        dbrickstest.assert_queries_are_equal(query, query)
def test_no_rows_returned():
    with databricks_test.session() as dbrickstest:
        query = """
        SELECT col1,col2
        FROM
        (VALUES 
          (100,'foo'),
          (101,'bar'),
          (102,'baz')
        ) AS v (col1, col2)
        WHERE 1=2
        """

        dbrickstest.assert_query_returns_no_rows(query)
Beispiel #11
0
def test_etl():
    with databricks_test.session() as dbrickstest:
        with TemporaryDirectory() as tmp_dir:
            out_dir = f"{tmp_dir}/out"

            # Provide input and output location as widgets to notebook
            switch = {
                "input": "tests/etl_input.csv",
                "output": out_dir,
            }
            dbrickstest.dbutils.widgets.get.side_effect = lambda x: switch.get(
                x, "")

            # Run notebook
            dbrickstest.run_notebook(".", "etl_notebook")

            # Notebook produces a Parquet file (directory)
            resultDF = pd.read_parquet(out_dir)

        # Compare produced Parquet file and expected CSV file
        expectedDF = pd.read_csv("tests/etl_expected.csv")
        assert_frame_equal(expectedDF, resultDF, check_dtype=False)
def test_feature_engineering(mocker):
    mocker.patch("azureml.core.Run")

    with databricks_test.session() as dbrickstest, \
            TemporaryDirectory() as out_dir:

        # Provide input and output location as widgets to notebook
        switcher = {
            "training": "code/tests/diabetes_missing_values.csv",
            "feature_engineered": out_dir,
        }
        dbrickstest.dbutils.widgets.get = lambda x: switcher.get(x, "")

        capture_files = {}

        def mock_cp(src, dst, capture_files=capture_files):
            prefix = "file:"
            assert src.startswith(prefix)
            capture_files[dst] = pd.read_csv(src)
            return True

        dbrickstest.dbutils.fs.cp.side_effect = mock_cp

        # Run notebook
        dbrickstest.run_notebook("./code/prepare", "feature_engineering")

    expected_name = "engineered.csv"
    expected_file = "%s/%s" % (out_dir, expected_name)
    assert expected_file in capture_files
    resultDF = capture_files[expected_file]

    # Compare produced and expected CSV files
    expectedDF = pd.read_csv(
        "code/tests/feature_engineering_expected.csv")
    assert_frame_equal(
        expectedDF, resultDF, check_dtype=False, check_categorical=False)
def test_library():
    with databricks_test.session() as dbrickstest:
        # Run notebook
        dbrickstest.run_notebook(".", "library_notebook")
Beispiel #14
0
def test_fs():
    with databricks_test.session() as dbrickstest:
        # Run notebook
        dbrickstest.run_notebook(".", "fs_notebook")
Beispiel #15
0
def test_multiple_runs_in_same_session_and_run_other_session():
    with databricks_test.session() as dbrickstest:
        run_notebook("multiple_runs_notebook", 4, dbrickstest)
Beispiel #16
0
def test_multiple_runs_in_multiple_test_cases2():
    with databricks_test.session() as dbrickstest:
        run_notebook("multiple_runs_notebook2", 6, dbrickstest)
Beispiel #17
0
def test_workflow():
    with databricks_test.session() as dbrickstest:
        # Run notebook
        dbrickstest.run_notebook(".", "workflow_notebook")