def test_data_set_describe(): """Test `describe` method invocation""" def _dummy_load(): pass # pragma: no cover def _dummy_save(): pass # pragma: no cover def _dummy_exists(): return False # pragma: no cover def _dummy_release(): pass # pragma: no cover assert "LambdaDataSet(load=<tests.io.test_lambda_data_set._dummy_load>)" in str( LambdaDataSet(_dummy_load, None)) assert "LambdaDataSet(save=<tests.io.test_lambda_data_set._dummy_save>)" in str( LambdaDataSet(None, _dummy_save)) assert "LambdaDataSet(exists=<tests.io.test_lambda_data_set._dummy_exists>)" in str( LambdaDataSet(None, None, _dummy_exists)) assert ( "LambdaDataSet(release=<tests.io.test_lambda_data_set._dummy_release>)" in str(LambdaDataSet(None, None, None, _dummy_release))) # __init__ keys alphabetically sorted, None values not shown expected = ( "LambdaDataSet(exists=<tests.io.test_lambda_data_set._dummy_exists>, " "load=<tests.io.test_lambda_data_set._dummy_load>, " "save=<tests.io.test_lambda_data_set._dummy_save>)") actual = str(LambdaDataSet(_dummy_load, _dummy_save, _dummy_exists, None)) assert actual == expected
def test_exists_invocation(self, mocker): """Test the basic `exists` method invocation""" mocked_exists = mocker.Mock(return_value=True) data_set = LambdaDataSet(None, None, mocked_exists) result = data_set.exists() mocked_exists.assert_called_once_with() assert result is True
def test_load_invocation(self, mocker): """Test the basic `load` method invocation""" mocked_load = mocker.Mock(return_value=42) data_set = LambdaDataSet(mocked_load, None) result = data_set.load() mocked_load.assert_called_once_with() assert result == 42
def test_release_raises_error(self, mocker): """Check the error when `release` raises an exception""" mocked_release = mocker.Mock() error_message = "File not found" mocked_release.side_effect = FileNotFoundError(error_message) data_set = LambdaDataSet(None, None, None, mocked_release) with pytest.raises(DataSetError, match=error_message): data_set.release() mocked_release.assert_called_once_with()
def test_load_raises_error(self): """Check the error if loading the LambdaDataSet raises an exception""" error_message = "Internal load exception message" def internal_load(): raise FileNotFoundError(error_message) data_set = LambdaDataSet(internal_load, None) with pytest.raises(DataSetError, match=error_message): data_set.load()
def test_result_saved_not_returned(self, saving_result_pipeline): """The pipeline runs ds->dsX but save does not save the output.""" def _load(): return 0 def _save(arg): assert arg == 0 catalog = DataCatalog({ "ds": LambdaDataSet(load=_load, save=_save), "dsX": LambdaDataSet(load=_load, save=_save), }) output = SequentialRunner().run(saving_result_pipeline, catalog) assert output == {}
def csv_dataset_same_file(context): context.read_csv_path = create_sample_csv() context.write_csv_path = context.read_csv_path context.csv_data_set = LambdaDataSet( load=lambda: pd.read_csv(context.read_csv_path), save=lambda df: df.to_csv(context.write_csv_path, index=False), )
def define_dataset_with_load_save(context): context.read_csv_path = create_sample_csv() context.write_csv_path = create_temp_csv() context.csv_data_set = LambdaDataSet( load=lambda: pd.read_csv(context.read_csv_path), save=lambda df: df.to_csv(context.write_csv_path), )
def test_release_not_callable(self): pattern = ( r"`release` function for LambdaDataSet must be a Callable\. " r"Object of type `str` provided instead\." ) with pytest.raises(DataSetError, match=pattern): LambdaDataSet(None, None, None, "release")
def test_load_not_callable(self): pattern = ( r"`load` function for LambdaDataSet must be a Callable\. " r"Object of type `str` provided instead\." ) with pytest.raises(DataSetError, match=pattern): LambdaDataSet("load", None)
def test_data_set_describe(): """Test `describe` method invocation""" def _dummy_load(): pass # pragma: no cover def _dummy_save(): pass # pragma: no cover def _dummy_exists(): return False # pragma: no cover assert "LambdaDataSet(load=<tests.io.test_lambda_data_set._dummy_load>)" in str( LambdaDataSet(_dummy_load, None)) assert "LambdaDataSet(save=<tests.io.test_lambda_data_set._dummy_save>)" in str( LambdaDataSet(None, _dummy_save)) assert "LambdaDataSet(exists=<tests.io.test_lambda_data_set._dummy_exists>)" in str( LambdaDataSet(None, None, _dummy_exists))
def _make_catalog( existent=None, non_existent=None, no_exists_method=None, feed_dict=None ): """Creates a catalog of existent and non-existent DataSets.""" existent = [] if existent is None else existent non_existent = [] if non_existent is None else non_existent no_exists_method = [] if no_exists_method is None else no_exists_method catalog = DataCatalog(feed_dict=feed_dict) for source in existent: catalog.add(source, LambdaDataSet(None, None, lambda: True)) for source in non_existent: catalog.add(source, LambdaDataSet(None, None, lambda: False)) # Some LambdaDataSet do not have exists() method for source in no_exists_method: catalog.add(source, LambdaDataSet(None, None)) return catalog
def test_exists_not_implemented(self, caplog): """Test calling `exists` on the data set, which didn't implement it""" catalog = DataCatalog(data_sets={"test": LambdaDataSet(None, None)}) result = catalog.exists("test") log_record = caplog.records[0] assert log_record.levelname == "WARNING" assert ("`exists()` not implemented for `LambdaDataSet`. " "Assuming output does not exist." in log_record.message) assert result is False
def test_data_set_not_serializable(self, is_async, fan_out_fan_in): """Data set A cannot be serializable because _load and _save are not defined in global scope. """ def _load(): return 0 # pragma: no cover def _save(arg): assert arg == 0 # pragma: no cover # Data set A cannot be serialized catalog = DataCatalog({"A": LambdaDataSet(load=_load, save=_save)}) pipeline = Pipeline([fan_out_fan_in]) with pytest.raises(AttributeError, match="['A']"): ParallelRunner(is_async=is_async).run(pipeline, catalog)
def test_save_undefined(self): """Check the error if `LambdaDataSet.__save` is None""" with pytest.raises(DataSetError, match="Cannot save to data set"): LambdaDataSet(None, None).save(42)
def test_load_undefined(self): """Check the error if `LambdaDataSet.__load` is None""" with pytest.raises(DataSetError, match="Cannot load data set"): LambdaDataSet(None, None).load()
def test_run(mocker): # pylint: disable=too-many-locals # The Nodes first_node = Node(lambda: None, [], "a") middle_node = Node(lambda a: None, ["a"], "b") last_node = Node(lambda b: None, ["b"], []) # get turned into tasks by create_task first_task = Mock() middle_task = Mock() last_task = Mock() create_task = mocker.patch( "kedro_airflow.runner.AirflowRunner.create_task") create_task.side_effect = lambda node, catalog: { first_node: first_task, middle_node: middle_task, last_node: last_task, }[node] # and tasks get turned into operators by the runner first_op = Mock() middle_op = Mock() last_op = Mock() operator = mocker.patch("kedro_airflow.runner.PythonOperator") operator.side_effect = lambda python_callable, **kwargs: { first_task: first_op, middle_task: middle_op, last_task: last_op, }[python_callable] def operator_arguments(task_id): args = { "lambda-none-a": { "retries": 1 }, "lambda-b-none": { "retries": 2 } } return args.get(task_id, {}) # actually call the runner to do the conversion dag = Mock() pipeline = Pipeline([first_node, last_node, middle_node]) catalog = DataCatalog({ "a": LambdaDataSet(load=None, save=None), "b": LambdaDataSet(load=None, save=None), }) AirflowRunner(dag, None, operator_arguments).run(pipeline, catalog) # check the create task calls create_task.assert_has_calls( [ call(first_node, catalog), call(middle_node, catalog), call(last_node, catalog), ], any_order=True, ) # check the operator constructor calls operator.assert_has_calls( [ call( dag=dag, provide_context=True, python_callable=first_task, task_id="lambda-none-a", retries=1, ), call( dag=dag, provide_context=True, python_callable=middle_task, task_id="lambda-a-b", ), call( dag=dag, provide_context=True, python_callable=last_task, task_id="lambda-b-none", retries=2, ), ], any_order=True, ) # check the dependcy hookup first_op.set_upstream.assert_not_called() middle_op.set_upstream.assert_called_once_with(first_op) last_op.set_upstream.assert_called_once_with(middle_op)
def data_set_with_no_save(context): context.csv_data_set = LambdaDataSet(load=None, save=None)
def test_exists_not_implemented(self): """Check that `exists` method is not implemented by default""" data_set = LambdaDataSet(None, None) assert not hasattr(data_set, "exists")
def mocked_data_set(mocked_save): return LambdaDataSet(None, mocked_save)
def prepare_missing_csv(context): sample_csv = "/var/missing_csv_file.csv" context.csv_data_set = LambdaDataSet( load=lambda: pd.read_csv(sample_csv), save=None )
def one_in_two_out(arg): load = mocker.Mock(return_value=42) save = mocker.Mock() return [LambdaDataSet(load, save), LambdaDataSet(load, save)]
def test_release_not_implemented(self): """Check that `release` does nothing by default""" data_set = LambdaDataSet(None, None) data_set.release()
def test_release_invocation(self, mocker): """Test the basic `release` method invocation""" mocked_release = mocker.Mock() data_set = LambdaDataSet(None, None, None, mocked_release) data_set.release() mocked_release.assert_called_once_with()
def test_exists_not_implemented(self): """Check that `exists` method returns False by default""" data_set = LambdaDataSet(None, None) assert not data_set.exists()
def mocked_dataset(mocker): load = mocker.Mock(return_value=42) save = mocker.Mock() return LambdaDataSet(load, save)