def test_basic_instantiation(tmp_path_factory): base_directory = str(tmp_path_factory.mktemp("test_basic_instantiation")) create_files_in_directory( directory=base_directory, file_name_list=[ "alpha-1.csv", "alpha-2.csv", "alpha-3.csv", ], ) my_data_connector = ConfiguredAssetFilesystemDataConnector( name="my_data_connector", datasource_name="FAKE_DATASOURCE_NAME", execution_engine=PandasExecutionEngine(), default_regex={ "pattern": "alpha-(.*)\\.csv", "group_names": ["index"], }, base_directory=base_directory, assets={"alpha": {}}, ) assert my_data_connector.self_check() == { "class_name": "ConfiguredAssetFilesystemDataConnector", "data_asset_count": 1, "example_data_asset_names": [ "alpha", ], "data_assets": { "alpha": { "example_data_references": [ "alpha-1.csv", "alpha-2.csv", "alpha-3.csv", ], "batch_definition_count": 3, }, }, "example_unmatched_data_references": [], "unmatched_data_reference_count": 0, # FIXME: (Sam) example_data_reference removed temporarily in PR #2590: # "example_data_reference": {}, } # noinspection PyProtectedMember my_data_connector._refresh_data_references_cache() assert my_data_connector.get_data_reference_list_count() == 3 assert my_data_connector.get_unmatched_data_references() == [] # Illegal execution environment name with pytest.raises(ValueError): print( my_data_connector.get_batch_definition_list_from_batch_request( BatchRequest( datasource_name="something", data_connector_name="my_data_connector", data_asset_name="something", )))
def test__file_object_caching_for_FileDataConnector(tmp_path_factory): base_directory = str( tmp_path_factory.mktemp( "basic_data_connector__filesystem_data_connector")) create_files_in_directory( directory=base_directory, file_name_list=[ "pretend/path/A-100.csv", "pretend/path/A-101.csv", "pretend/directory/B-1.csv", "pretend/directory/B-2.csv", ], ) my_data_connector = ConfiguredAssetFilesystemDataConnector( name="my_data_connector", datasource_name="FAKE_DATASOURCE", execution_engine=PandasExecutionEngine(), base_directory=base_directory, glob_directive="*/*/*.csv", default_regex={ "pattern": "(.*).csv", "group_names": ["name"], }, assets={"stuff": {}}, ) assert my_data_connector.get_data_reference_list_count() == 0 assert len(my_data_connector.get_unmatched_data_references()) == 0 # noinspection PyProtectedMember my_data_connector._refresh_data_references_cache() assert len(my_data_connector.get_unmatched_data_references()) == 0 assert my_data_connector.get_data_reference_list_count() == 4
def test_basic_instantiation(tmp_path_factory): base_directory = str( tmp_path_factory.mktemp( "basic_data_connector__filesystem_data_connector")) # noinspection PyUnusedLocal my_data_connector = ConfiguredAssetFilesystemDataConnector( name="my_data_connector", base_directory=base_directory, glob_directive="*.csv", datasource_name="FAKE_DATASOURCE", default_regex={ "pattern": "(.*)", "group_names": ["file_name"], }, assets={"my_asset_name": {}}, )
def test_basic_instantiation_with_nested_directories(tmp_path_factory): base_directory = str( tmp_path_factory.mktemp("test_basic_instantiation_with_nested_directories") ) os.makedirs(os.path.join(base_directory, "foo")) create_files_in_directory( directory=os.path.join(base_directory, "foo"), file_name_list=[ "alpha-1.csv", "alpha-2.csv", "alpha-3.csv", ], ) my_data_connector = ConfiguredAssetFilesystemDataConnector( name="my_data_connector", datasource_name="FAKE_DATASOURCE_NAME", default_regex={ "pattern": "alpha-(.*)\\.csv", "group_names": ["index"], }, base_directory=os.path.join(base_directory, "foo"), assets={"alpha": {}}, ) assert my_data_connector.self_check() == { "class_name": "ConfiguredAssetFilesystemDataConnector", "data_asset_count": 1, "example_data_asset_names": [ "alpha", ], "data_assets": { "alpha": { "example_data_references": [ "alpha-1.csv", "alpha-2.csv", "alpha-3.csv", ], "batch_definition_count": 3, }, }, "example_unmatched_data_references": [], "unmatched_data_reference_count": 0, "example_data_reference": {}, } my_data_connector = ConfiguredAssetFilesystemDataConnector( name="my_data_connector", datasource_name="FAKE_DATASOURCE_NAME", default_regex={ "pattern": "alpha-(.*)\\.csv", "group_names": ["index"], }, base_directory=base_directory, assets={"alpha": {"base_directory": "foo"}}, ) assert my_data_connector.self_check() == { "class_name": "ConfiguredAssetFilesystemDataConnector", "data_asset_count": 1, "example_data_asset_names": [ "alpha", ], "data_assets": { "alpha": { "example_data_references": [ "alpha-1.csv", "alpha-2.csv", "alpha-3.csv", ], "batch_definition_count": 3, }, }, "example_unmatched_data_references": [], "unmatched_data_reference_count": 0, "example_data_reference": {}, } my_data_connector = ConfiguredAssetFilesystemDataConnector( name="my_data_connector", datasource_name="FAKE_DATASOURCE_NAME", default_regex={ "pattern": "foo/alpha-(.*)\\.csv", "group_names": ["index"], }, base_directory=base_directory, assets={"alpha": {}}, ) assert my_data_connector.self_check() == { "class_name": "ConfiguredAssetFilesystemDataConnector", "data_asset_count": 1, "example_data_asset_names": [ "alpha", ], "data_assets": { "alpha": { "example_data_references": [ "foo/alpha-1.csv", "foo/alpha-2.csv", "foo/alpha-3.csv", ], "batch_definition_count": 3, }, }, "example_unmatched_data_references": ["foo"], "unmatched_data_reference_count": 1, "example_data_reference": {}, }