Ejemplo n.º 1
0
def test_with_conflicted_file_types_with_preferable_csv(dates, assets_config):
    ncs_reader = NcsReader(
        ADLGen1FileSystem(AzureDLFileSystemMock(), "adl1"),
        assets_config=assets_config,
        remove_status_codes=[0],
        lookup_for=["csv"],
        partition_by="year",
    )

    valid_tag_list = normalize_sensor_tags(["TRC-324"])
    series_gen = ncs_reader.load_series(dates[0], dates[1], valid_tag_list)
    tags_series = [v for v in series_gen]
    assert len(tags_series) == 1
    trc_324_series = tags_series[0]
    # CSV file should be with 1 row
    assert len(trc_324_series) == 1
Ejemplo n.º 2
0
def test_parquet_files_lookup(dates, assets_config):
    ncs_reader = NcsReader(
        ADLGen1FileSystem(AzureDLFileSystemMock(), "adl1"),
        assets_config=assets_config,
        remove_status_codes=[0],
        lookup_for=["yearly_parquet", "csv"],
        partition_by=PartitionBy.YEAR,
    )

    valid_tag_list = normalize_sensor_tags(["TRC-323"])
    series_gen = ncs_reader.load_series(dates[0], dates[1], valid_tag_list)
    tags_series = [v for v in series_gen]
    assert len(tags_series) == 1
    trc_323_series = tags_series[0]
    assert trc_323_series.name == "TRC-323"
    assert trc_323_series.dtype.name == "float64"
    assert len(trc_323_series) == 20
Ejemplo n.º 3
0
def test_load_series_with_filter_bad_data(dates, remove_status_codes,
                                          assets_config):

    ncs_reader = NcsReader(
        ADLGen1FileSystem(AzureDLFileSystemMock(), "adl1"),
        assets_config=assets_config,
        remove_status_codes=remove_status_codes,
        lookup_for=["yearly_parquet", "csv"],
        partition_by=PartitionBy.YEAR,
    )

    valid_tag_list = normalize_sensor_tags(["TRC-322"])
    series_gen = ncs_reader.load_series(dates[0], dates[1], valid_tag_list)
    # Checks if the bad data from the files under tests/gordo/data_provider/data/datalake/TRC-322
    # are filtered out. 20 rows exists, 5 of then have the value 0.

    n_expected = 15 if remove_status_codes != [] else 20
    assert all(len(series) == n_expected for series in series_gen)
Ejemplo n.º 4
0
def test_monthly_parquet(dates, assets_config):
    ncs_reader = NcsReader(
        ADLGen1FileSystem(AzureDLFileSystemMock(), "adl1"),
        assets_config=assets_config,
    )

    valid_tag_list = normalize_sensor_tags(["TRC-325"])
    series_gen = ncs_reader.load_series(dates[0], dates[1], valid_tag_list)
    tags_series = [v for v in series_gen]
    assert len(tags_series) == 1
    index = tags_series[0].index
    assert len(index) == 20
    dr1 = pd.date_range(start="2001-05-10T00:00:00+00:00",
                        periods=10,
                        freq="1T")
    dr2 = pd.date_range(start="2001-06-10T00:00:00+00:00",
                        periods=10,
                        freq="1T")
    dr = dr1.append(dr2)
    assert index.equals(dr)
Ejemplo n.º 5
0
def test_load_series_need_base_path(ncs_reader, dates, assets_config):
    tag = SensorTag("WEIRD-123", "BASE-PATH-ASSET")
    with pytest.raises(ValueError):
        for _ in ncs_reader.load_series(dates[0], dates[1], [tag]):
            pass

    path_to_weird_base_path_asset = os.path.join(
        os.path.dirname(os.path.realpath(__file__)),
        "data",
        "datalake",
        "base_path_asset",
    )
    ncs_reader_with_base = NcsReader(
        ADLGen1FileSystem(AzureDLFileSystemMock(), "adl1"),
        assets_config=assets_config,
        dl_base_path=path_to_weird_base_path_asset,
        lookup_for=["yearly_parquet", "csv"],
        partition_by=PartitionBy.YEAR,
    )
    for tag_series in ncs_reader_with_base.load_series(dates[0], dates[1],
                                                       [tag]):
        assert len(tag_series) == 20