def test_forbidden_concurrent_sessions(): with databricks_test.session() as dbrickstest: # noqa: F841 try: with databricks_test.session() as dbrickstest2: # noqa: F841 assert False, "should have failed" except SessionAlreadyExistsException: pass
def test_multiple_runs_in_same_session_1(): with databricks_test.session() as dbrickstest: run_notebook("multiple_runs_notebook", 1, dbrickstest) run_notebook("multiple_runs_notebook", 2, dbrickstest) with databricks_test.session() as dbrickstest: run_notebook("multiple_runs_notebook", 3, dbrickstest)
def test_patch(): with databricks_test.session() as dbrickstest: # Provide input and output location as widgets to notebook switcher = { "input": "input_value", "output": "output_value", } dbrickstest.dbutils.widgets.get.side_effect = lambda x: switcher.get( x, "") # Run notebook dbrickstest.run_notebook(".", "patch_notebook") with databricks_test.session() as dbrickstest: dbrickstest.run_notebook(".", "patch_notebook2")
def test_sqldw(monkeypatch): with databricks_test.session() as dbrickstest, TemporaryDirectory() as tmp: out_dir = f"{tmp}/out" # Mock SQL DW loader, creating a Spark DataFrame instead def mock_load(reader): return (dbrickstest.spark.range(10).withColumn( "age", F.col("id") * 6).withColumn("salary", F.col("id") * 10000)) monkeypatch.setattr(pyspark.sql.readwriter.DataFrameReader, "load", mock_load) # Mock SQL DW writer, writing to a local Parquet file instead def mock_save(writer): monkeypatch.undo() writer.format("parquet") writer.save(out_dir) monkeypatch.setattr(pyspark.sql.readwriter.DataFrameWriter, "save", mock_save) # Run notebook dbrickstest.run_notebook(".", "sqldw_notebook") # Notebook produces a Parquet file (directory) resultDF = pd.read_parquet(out_dir) # Compare produced Parquet file and expected CSV file expectedDF = pd.read_csv("tests/sqldw_expected.csv") assert_frame_equal(expectedDF, resultDF, check_dtype=False)
def test_results_do_not_match(): with databricks_test.session() as dbrickstest: actual_query = """ SELECT col1,col2 FROM (VALUES (100,'foo'), (101,'bar'), (102,'baz') ) AS v (col1, col2) """ expected_query = """ SELECT col1,col2 FROM (VALUES (100,'foo'), (110,'bar'), (999,'qux') ) AS v (col1, col2) """ with pytest.raises(Exception) as exception_message: dbrickstest.assert_queries_are_equal(actual_query, expected_query) assert str(exception_message.value).startswith( "the result sets did not match:")
def test_rows_returned(): with databricks_test.session() as dbrickstest: query = """ SELECT col1,col2 FROM (VALUES (100,'foo'), (101,'bar'), (102,'baz') ) AS v (col1, col2) ORDER BY col1 """ expected_message = """the result set was not empty: +----+----+ |col1|col2| +----+----+ |100 |foo | |101 |bar | |102 |baz | +----+----+ """ with pytest.raises(Exception) as exception_message: dbrickstest.assert_query_returns_no_rows(query) assert str(exception_message.value) == expected_message
def test_deltalake_write(): with databricks_test.session() as dbrickstest: with TemporaryDirectory() as tmp_dir: out_dir = f"{tmp_dir}/delta_out" # Provide input and output location as widgets to notebook switch = { "output": out_dir, } dbrickstest.dbutils.widgets.get.side_effect = lambda x: switch.get( x, "") # Run notebook dbrickstest.run_notebook(".", "deltalake_write_notebook") # Read delta df = dbrickstest.spark.read.format("delta").load(out_dir) # Validate dataframe contains the expected values rg = range(0, 5) for n in rg: assert df.filter(df["id"] == n).count() == 1 # Validate dataframe contains no unexpected values assert df.count() == 5
def test_missing_result(): with databricks_test.session() as dbrickstest: actual_query = """ SELECT col1,col2 FROM (VALUES (100,'foo') ) AS v (col1, col2) """ expected_query = """ SELECT col1,col2 FROM (VALUES (100,'foo'), (101,'bar') ) AS v (col1, col2) """ expected_message = """the result sets did not match: +---+----+----+ |m |col1|col2| +---+----+----+ |= |100 |foo | |< |101 |bar | +---+----+----+ """ with pytest.raises(Exception) as exception_message: dbrickstest.assert_queries_are_equal(actual_query, expected_query) assert str(exception_message.value) == expected_message
def test_results_match(): with databricks_test.session() as dbrickstest: query = """ SELECT col1,col2 FROM (VALUES (100,'foo'), (101,'bar'), (102,'baz') ) AS v (col1, col2) """ dbrickstest.assert_queries_are_equal(query, query)
def test_no_rows_returned(): with databricks_test.session() as dbrickstest: query = """ SELECT col1,col2 FROM (VALUES (100,'foo'), (101,'bar'), (102,'baz') ) AS v (col1, col2) WHERE 1=2 """ dbrickstest.assert_query_returns_no_rows(query)
def test_etl(): with databricks_test.session() as dbrickstest: with TemporaryDirectory() as tmp_dir: out_dir = f"{tmp_dir}/out" # Provide input and output location as widgets to notebook switch = { "input": "tests/etl_input.csv", "output": out_dir, } dbrickstest.dbutils.widgets.get.side_effect = lambda x: switch.get( x, "") # Run notebook dbrickstest.run_notebook(".", "etl_notebook") # Notebook produces a Parquet file (directory) resultDF = pd.read_parquet(out_dir) # Compare produced Parquet file and expected CSV file expectedDF = pd.read_csv("tests/etl_expected.csv") assert_frame_equal(expectedDF, resultDF, check_dtype=False)
def test_feature_engineering(mocker): mocker.patch("azureml.core.Run") with databricks_test.session() as dbrickstest, \ TemporaryDirectory() as out_dir: # Provide input and output location as widgets to notebook switcher = { "training": "code/tests/diabetes_missing_values.csv", "feature_engineered": out_dir, } dbrickstest.dbutils.widgets.get = lambda x: switcher.get(x, "") capture_files = {} def mock_cp(src, dst, capture_files=capture_files): prefix = "file:" assert src.startswith(prefix) capture_files[dst] = pd.read_csv(src) return True dbrickstest.dbutils.fs.cp.side_effect = mock_cp # Run notebook dbrickstest.run_notebook("./code/prepare", "feature_engineering") expected_name = "engineered.csv" expected_file = "%s/%s" % (out_dir, expected_name) assert expected_file in capture_files resultDF = capture_files[expected_file] # Compare produced and expected CSV files expectedDF = pd.read_csv( "code/tests/feature_engineering_expected.csv") assert_frame_equal( expectedDF, resultDF, check_dtype=False, check_categorical=False)
def test_library(): with databricks_test.session() as dbrickstest: # Run notebook dbrickstest.run_notebook(".", "library_notebook")
def test_fs(): with databricks_test.session() as dbrickstest: # Run notebook dbrickstest.run_notebook(".", "fs_notebook")
def test_multiple_runs_in_same_session_and_run_other_session(): with databricks_test.session() as dbrickstest: run_notebook("multiple_runs_notebook", 4, dbrickstest)
def test_multiple_runs_in_multiple_test_cases2(): with databricks_test.session() as dbrickstest: run_notebook("multiple_runs_notebook2", 6, dbrickstest)
def test_workflow(): with databricks_test.session() as dbrickstest: # Run notebook dbrickstest.run_notebook(".", "workflow_notebook")