def test_create_from_env_with_dl_service_auth(auth_mock, adl_client_mock): adl_secret = ADLSecret("tenant_id", "client_id", "client_secret") ADLGen1FileSystem.create_from_env("dlstore", adl_secret=adl_secret) auth_mock.assert_called_once_with( tenant_id="tenant_id", client_id="client_id", client_secret="client_secret", resource="https://datalake.azure.net/", ) adl_client_mock.assert_called_once_with("123", store_name="dlstore")
def test_walk_with_info(adl_client_mock): fs = ADLGen1FileSystem(adl_client_mock, store_name="dlstore") result = list(fs.walk("/path", with_info=True)) expected = [ ( "/path/to", FileInfo( file_type=FileType.DIRECTORY, size=0, access_time=None, modify_time=None ), ), ( "/path/out.json", FileInfo( file_type=FileType.FILE, size=983432, access_time=datetime(2020, 6, 14, 9, 33, 26, 477000), modify_time=datetime(2020, 9, 9, 7, 36, 14, 996000), ), ), ( "/path/to/file.json", FileInfo( file_type=FileType.FILE, size=142453, access_time=datetime(2020, 9, 8, 22, 21, 39, 143000), modify_time=datetime(2020, 9, 8, 22, 40, 56, 564000), ), ), ] assert result == expected
def ncs_reader(assets_config): return NcsReader( ADLGen1FileSystem(AzureDLFileSystemMock(), "adl1"), assets_config=assets_config, lookup_for=["yearly_parquet", "csv"], partition_by=PartitionBy.YEAR, )
def test_walk_without_info(adl_client_mock): fs = ADLGen1FileSystem(adl_client_mock, store_name="dlstore") result = list(fs.walk("/path", with_info=False)) assert result == [ ("/path/to", None), ("/path/out.json", None), ("/path/to/file.json", None), ]
def test_can_handle_tag_non_supported_asset_with_base_path( ncs_reader, assets_config): tag = SensorTag("WEIRD-123", "UNKNOWN-ASSET") assert not ncs_reader.can_handle_tag(tag) ncs_reader_with_base = NcsReader( ADLGen1FileSystem(AzureDLFileSystemMock(), "adl1"), assets_config=assets_config, dl_base_path="/this/is/a/base/path", ) assert ncs_reader_with_base.can_handle_tag(tag)
def test_info_directory(adl_client_mock): adl_client_mock.info.return_value = { "type": "DIRECTORY", "length": 0, } fs = ADLGen1FileSystem(adl_client_mock, store_name="dlstore") info = fs.info("/path/to/file.json") adl_client_mock.info.assert_called_once_with("/path/to/file.json") assert info.file_type == FileType.DIRECTORY assert info.size == 0 assert info.access_time is None assert info.modify_time is None
def test_info_file(adl_client_mock): adl_client_mock.info.return_value = { "type": "FILE", "length": 304254, "accessTime": 1599631062424, "modificationTime": 1599631097160, } fs = ADLGen1FileSystem(adl_client_mock, store_name="dlstore") info = fs.info("/path/to/file.json") adl_client_mock.info.assert_called_once_with("/path/to/file.json") assert info.file_type == FileType.FILE assert info.size == 304254 assert info.access_time.isoformat() == "2020-09-09T05:57:42.424000" assert info.modify_time.isoformat() == "2020-09-09T05:58:17.160000"
def test_with_conflicted_file_types_with_preferable_csv(dates, assets_config): ncs_reader = NcsReader( ADLGen1FileSystem(AzureDLFileSystemMock(), "adl1"), assets_config=assets_config, remove_status_codes=[0], lookup_for=["csv"], partition_by="year", ) valid_tag_list = normalize_sensor_tags(["TRC-324"]) series_gen = ncs_reader.load_series(dates[0], dates[1], valid_tag_list) tags_series = [v for v in series_gen] assert len(tags_series) == 1 trc_324_series = tags_series[0] # CSV file should be with 1 row assert len(trc_324_series) == 1
def test_parquet_files_lookup(dates, assets_config): ncs_reader = NcsReader( ADLGen1FileSystem(AzureDLFileSystemMock(), "adl1"), assets_config=assets_config, remove_status_codes=[0], lookup_for=["yearly_parquet", "csv"], partition_by=PartitionBy.YEAR, ) valid_tag_list = normalize_sensor_tags(["TRC-323"]) series_gen = ncs_reader.load_series(dates[0], dates[1], valid_tag_list) tags_series = [v for v in series_gen] assert len(tags_series) == 1 trc_323_series = tags_series[0] assert trc_323_series.name == "TRC-323" assert trc_323_series.dtype.name == "float64" assert len(trc_323_series) == 20
def test_load_series_with_filter_bad_data(dates, remove_status_codes, assets_config): ncs_reader = NcsReader( ADLGen1FileSystem(AzureDLFileSystemMock(), "adl1"), assets_config=assets_config, remove_status_codes=remove_status_codes, lookup_for=["yearly_parquet", "csv"], partition_by=PartitionBy.YEAR, ) valid_tag_list = normalize_sensor_tags(["TRC-322"]) series_gen = ncs_reader.load_series(dates[0], dates[1], valid_tag_list) # Checks if the bad data from the files under tests/gordo/data_provider/data/datalake/TRC-322 # are filtered out. 20 rows exists, 5 of then have the value 0. n_expected = 15 if remove_status_codes != [] else 20 assert all(len(series) == n_expected for series in series_gen)
def test_monthly_parquet(dates, assets_config): ncs_reader = NcsReader( ADLGen1FileSystem(AzureDLFileSystemMock(), "adl1"), assets_config=assets_config, ) valid_tag_list = normalize_sensor_tags(["TRC-325"]) series_gen = ncs_reader.load_series(dates[0], dates[1], valid_tag_list) tags_series = [v for v in series_gen] assert len(tags_series) == 1 index = tags_series[0].index assert len(index) == 20 dr1 = pd.date_range(start="2001-05-10T00:00:00+00:00", periods=10, freq="1T") dr2 = pd.date_range(start="2001-06-10T00:00:00+00:00", periods=10, freq="1T") dr = dr1.append(dr2) assert index.equals(dr)
def test_ls_with_info(adl_client_mock): fs = ADLGen1FileSystem(adl_client_mock, store_name="dlstore") result = list(fs.ls("/path", with_info=True)) assert result == [ ( "/path/to", FileInfo( file_type=FileType.DIRECTORY, size=0, access_time=None, modify_time=None ), ), ( "/path/out.json", FileInfo( file_type=FileType.FILE, size=983432, access_time=datetime(2020, 6, 14, 9, 33, 26, 477000), modify_time=datetime(2020, 9, 9, 7, 36, 14, 996000), ), ), ]
def test_load_series_need_base_path(ncs_reader, dates, assets_config): tag = SensorTag("WEIRD-123", "BASE-PATH-ASSET") with pytest.raises(ValueError): for _ in ncs_reader.load_series(dates[0], dates[1], [tag]): pass path_to_weird_base_path_asset = os.path.join( os.path.dirname(os.path.realpath(__file__)), "data", "datalake", "base_path_asset", ) ncs_reader_with_base = NcsReader( ADLGen1FileSystem(AzureDLFileSystemMock(), "adl1"), assets_config=assets_config, dl_base_path=path_to_weird_base_path_asset, lookup_for=["yearly_parquet", "csv"], partition_by=PartitionBy.YEAR, ) for tag_series in ncs_reader_with_base.load_series(dates[0], dates[1], [tag]): assert len(tag_series) == 20
def test_ncs_reader_valid_tag_path(): with pytest.raises(FileNotFoundError): NcsReader._verify_tag_path_exist( ADLGen1FileSystem(AzureDLFileSystemMock(), "adl1"), "not/valid/path")
def test_exists(adl_client_mock): adl_client_mock.exists.return_value = True fs = ADLGen1FileSystem(adl_client_mock, store_name="dlstore") assert fs.exists("/path/to/file.json") adl_client_mock.exists.assert_called_once_with("/path/to/file.json")
def test_create_from_env_with_invalid_dl_service(auth_mock, adl_client_mock): with pytest.raises(ConfigException): ADLGen1FileSystem.create_from_env("dlstore", adl_secret=None)
def test_isdir(adl_client_mock): adl_client_mock.info.return_value = {"type": "DIRECTORY"} fs = ADLGen1FileSystem(adl_client_mock, store_name="dlstore") assert fs.isdir("/path/to/file.json") adl_client_mock.info.assert_called_once_with("/path/to/file.json")
def test_create_from_env_interactive(auth_mock, adl_client_mock): ADLGen1FileSystem.create_from_env("dlstore", interactive=True) auth_mock.assert_called_once_with() adl_client_mock.assert_called_once_with("123", store_name="dlstore")
def test_open_in_text_mode(adl_client_mock): fs = ADLGen1FileSystem(adl_client_mock, store_name="dlstore") f = fs.open("/path/to/file.json", mode="r") adl_client_mock.open.assert_called_once_with("/path/to/file.json", mode="rb") assert isinstance(f, TextIOWrapper)