예제 #1
0
def test_create_from_env_with_dl_service_auth(auth_mock, adl_client_mock):
    adl_secret = ADLSecret("tenant_id", "client_id", "client_secret")
    ADLGen1FileSystem.create_from_env("dlstore", adl_secret=adl_secret)
    auth_mock.assert_called_once_with(
        tenant_id="tenant_id",
        client_id="client_id",
        client_secret="client_secret",
        resource="https://datalake.azure.net/",
    )
    adl_client_mock.assert_called_once_with("123", store_name="dlstore")
예제 #2
0
def test_walk_with_info(adl_client_mock):
    fs = ADLGen1FileSystem(adl_client_mock, store_name="dlstore")
    result = list(fs.walk("/path", with_info=True))
    expected = [
        (
            "/path/to",
            FileInfo(
                file_type=FileType.DIRECTORY, size=0, access_time=None, modify_time=None
            ),
        ),
        (
            "/path/out.json",
            FileInfo(
                file_type=FileType.FILE,
                size=983432,
                access_time=datetime(2020, 6, 14, 9, 33, 26, 477000),
                modify_time=datetime(2020, 9, 9, 7, 36, 14, 996000),
            ),
        ),
        (
            "/path/to/file.json",
            FileInfo(
                file_type=FileType.FILE,
                size=142453,
                access_time=datetime(2020, 9, 8, 22, 21, 39, 143000),
                modify_time=datetime(2020, 9, 8, 22, 40, 56, 564000),
            ),
        ),
    ]
    assert result == expected
예제 #3
0
def ncs_reader(assets_config):
    return NcsReader(
        ADLGen1FileSystem(AzureDLFileSystemMock(), "adl1"),
        assets_config=assets_config,
        lookup_for=["yearly_parquet", "csv"],
        partition_by=PartitionBy.YEAR,
    )
예제 #4
0
def test_walk_without_info(adl_client_mock):
    fs = ADLGen1FileSystem(adl_client_mock, store_name="dlstore")
    result = list(fs.walk("/path", with_info=False))
    assert result == [
        ("/path/to", None),
        ("/path/out.json", None),
        ("/path/to/file.json", None),
    ]
예제 #5
0
def test_can_handle_tag_non_supported_asset_with_base_path(
        ncs_reader, assets_config):
    tag = SensorTag("WEIRD-123", "UNKNOWN-ASSET")
    assert not ncs_reader.can_handle_tag(tag)

    ncs_reader_with_base = NcsReader(
        ADLGen1FileSystem(AzureDLFileSystemMock(), "adl1"),
        assets_config=assets_config,
        dl_base_path="/this/is/a/base/path",
    )
    assert ncs_reader_with_base.can_handle_tag(tag)
예제 #6
0
def test_info_directory(adl_client_mock):
    adl_client_mock.info.return_value = {
        "type": "DIRECTORY",
        "length": 0,
    }
    fs = ADLGen1FileSystem(adl_client_mock, store_name="dlstore")
    info = fs.info("/path/to/file.json")
    adl_client_mock.info.assert_called_once_with("/path/to/file.json")
    assert info.file_type == FileType.DIRECTORY
    assert info.size == 0
    assert info.access_time is None
    assert info.modify_time is None
예제 #7
0
def test_info_file(adl_client_mock):
    adl_client_mock.info.return_value = {
        "type": "FILE",
        "length": 304254,
        "accessTime": 1599631062424,
        "modificationTime": 1599631097160,
    }
    fs = ADLGen1FileSystem(adl_client_mock, store_name="dlstore")
    info = fs.info("/path/to/file.json")
    adl_client_mock.info.assert_called_once_with("/path/to/file.json")
    assert info.file_type == FileType.FILE
    assert info.size == 304254
    assert info.access_time.isoformat() == "2020-09-09T05:57:42.424000"
    assert info.modify_time.isoformat() == "2020-09-09T05:58:17.160000"
예제 #8
0
def test_with_conflicted_file_types_with_preferable_csv(dates, assets_config):
    ncs_reader = NcsReader(
        ADLGen1FileSystem(AzureDLFileSystemMock(), "adl1"),
        assets_config=assets_config,
        remove_status_codes=[0],
        lookup_for=["csv"],
        partition_by="year",
    )

    valid_tag_list = normalize_sensor_tags(["TRC-324"])
    series_gen = ncs_reader.load_series(dates[0], dates[1], valid_tag_list)
    tags_series = [v for v in series_gen]
    assert len(tags_series) == 1
    trc_324_series = tags_series[0]
    # CSV file should be with 1 row
    assert len(trc_324_series) == 1
예제 #9
0
def test_parquet_files_lookup(dates, assets_config):
    ncs_reader = NcsReader(
        ADLGen1FileSystem(AzureDLFileSystemMock(), "adl1"),
        assets_config=assets_config,
        remove_status_codes=[0],
        lookup_for=["yearly_parquet", "csv"],
        partition_by=PartitionBy.YEAR,
    )

    valid_tag_list = normalize_sensor_tags(["TRC-323"])
    series_gen = ncs_reader.load_series(dates[0], dates[1], valid_tag_list)
    tags_series = [v for v in series_gen]
    assert len(tags_series) == 1
    trc_323_series = tags_series[0]
    assert trc_323_series.name == "TRC-323"
    assert trc_323_series.dtype.name == "float64"
    assert len(trc_323_series) == 20
예제 #10
0
def test_load_series_with_filter_bad_data(dates, remove_status_codes,
                                          assets_config):

    ncs_reader = NcsReader(
        ADLGen1FileSystem(AzureDLFileSystemMock(), "adl1"),
        assets_config=assets_config,
        remove_status_codes=remove_status_codes,
        lookup_for=["yearly_parquet", "csv"],
        partition_by=PartitionBy.YEAR,
    )

    valid_tag_list = normalize_sensor_tags(["TRC-322"])
    series_gen = ncs_reader.load_series(dates[0], dates[1], valid_tag_list)
    # Checks if the bad data from the files under tests/gordo/data_provider/data/datalake/TRC-322
    # are filtered out. 20 rows exists, 5 of then have the value 0.

    n_expected = 15 if remove_status_codes != [] else 20
    assert all(len(series) == n_expected for series in series_gen)
예제 #11
0
def test_monthly_parquet(dates, assets_config):
    ncs_reader = NcsReader(
        ADLGen1FileSystem(AzureDLFileSystemMock(), "adl1"),
        assets_config=assets_config,
    )

    valid_tag_list = normalize_sensor_tags(["TRC-325"])
    series_gen = ncs_reader.load_series(dates[0], dates[1], valid_tag_list)
    tags_series = [v for v in series_gen]
    assert len(tags_series) == 1
    index = tags_series[0].index
    assert len(index) == 20
    dr1 = pd.date_range(start="2001-05-10T00:00:00+00:00",
                        periods=10,
                        freq="1T")
    dr2 = pd.date_range(start="2001-06-10T00:00:00+00:00",
                        periods=10,
                        freq="1T")
    dr = dr1.append(dr2)
    assert index.equals(dr)
예제 #12
0
def test_ls_with_info(adl_client_mock):
    fs = ADLGen1FileSystem(adl_client_mock, store_name="dlstore")
    result = list(fs.ls("/path", with_info=True))
    assert result == [
        (
            "/path/to",
            FileInfo(
                file_type=FileType.DIRECTORY, size=0, access_time=None, modify_time=None
            ),
        ),
        (
            "/path/out.json",
            FileInfo(
                file_type=FileType.FILE,
                size=983432,
                access_time=datetime(2020, 6, 14, 9, 33, 26, 477000),
                modify_time=datetime(2020, 9, 9, 7, 36, 14, 996000),
            ),
        ),
    ]
예제 #13
0
def test_load_series_need_base_path(ncs_reader, dates, assets_config):
    tag = SensorTag("WEIRD-123", "BASE-PATH-ASSET")
    with pytest.raises(ValueError):
        for _ in ncs_reader.load_series(dates[0], dates[1], [tag]):
            pass

    path_to_weird_base_path_asset = os.path.join(
        os.path.dirname(os.path.realpath(__file__)),
        "data",
        "datalake",
        "base_path_asset",
    )
    ncs_reader_with_base = NcsReader(
        ADLGen1FileSystem(AzureDLFileSystemMock(), "adl1"),
        assets_config=assets_config,
        dl_base_path=path_to_weird_base_path_asset,
        lookup_for=["yearly_parquet", "csv"],
        partition_by=PartitionBy.YEAR,
    )
    for tag_series in ncs_reader_with_base.load_series(dates[0], dates[1],
                                                       [tag]):
        assert len(tag_series) == 20
예제 #14
0
def test_ncs_reader_valid_tag_path():
    with pytest.raises(FileNotFoundError):
        NcsReader._verify_tag_path_exist(
            ADLGen1FileSystem(AzureDLFileSystemMock(), "adl1"),
            "not/valid/path")
예제 #15
0
def test_exists(adl_client_mock):
    adl_client_mock.exists.return_value = True
    fs = ADLGen1FileSystem(adl_client_mock, store_name="dlstore")
    assert fs.exists("/path/to/file.json")
    adl_client_mock.exists.assert_called_once_with("/path/to/file.json")
예제 #16
0
def test_create_from_env_with_invalid_dl_service(auth_mock, adl_client_mock):
    with pytest.raises(ConfigException):
        ADLGen1FileSystem.create_from_env("dlstore", adl_secret=None)
예제 #17
0
def test_isdir(adl_client_mock):
    adl_client_mock.info.return_value = {"type": "DIRECTORY"}
    fs = ADLGen1FileSystem(adl_client_mock, store_name="dlstore")
    assert fs.isdir("/path/to/file.json")
    adl_client_mock.info.assert_called_once_with("/path/to/file.json")
예제 #18
0
def test_create_from_env_interactive(auth_mock, adl_client_mock):
    ADLGen1FileSystem.create_from_env("dlstore", interactive=True)
    auth_mock.assert_called_once_with()
    adl_client_mock.assert_called_once_with("123", store_name="dlstore")
예제 #19
0
def test_open_in_text_mode(adl_client_mock):
    fs = ADLGen1FileSystem(adl_client_mock, store_name="dlstore")
    f = fs.open("/path/to/file.json", mode="r")
    adl_client_mock.open.assert_called_once_with("/path/to/file.json", mode="rb")
    assert isinstance(f, TextIOWrapper)