예제 #1
0
def dataset():
    return RandomDataset(
        train_start_date="2017-12-25 06:00:00Z",
        train_end_date="2017-12-29 06:00:00Z",
        tag_list=[SensorTag("Tag 1", None),
                  SensorTag("Tag 2", None)],
    )
예제 #2
0
def test_aggregation_methods():
    """Tests that it works to set aggregation method(s)"""

    kwargs = dict(
        data_provider=MockDataProvider(),
        tag_list=[
            SensorTag("Tag 1", None),
            SensorTag("Tag 2", None),
            SensorTag("Tag 3", None),
        ],
        train_start_date=dateutil.parser.isoparse("2017-12-25 06:00:00Z"),
        train_end_date=dateutil.parser.isoparse("2017-12-29 06:00:00Z"),
    )

    # Default aggregation gives no extra columns
    X, _ = TimeSeriesDataset(**kwargs).get_data()
    assert (83, 3) == X.shape

    # The default single aggregation method gives the tag-names as columns
    assert list(X.columns) == ["Tag 1", "Tag 2", "Tag 3"]

    # Using two aggregation methods give a multi-level column with tag-names
    # on top and aggregation_method as second level
    X, _ = TimeSeriesDataset(aggregation_methods=["mean", "max"], **kwargs).get_data()

    assert (83, 6) == X.shape
    assert list(X.columns) == [
        ("Tag 1", "mean"),
        ("Tag 1", "max"),
        ("Tag 2", "mean"),
        ("Tag 2", "max"),
        ("Tag 3", "mean"),
        ("Tag 3", "max"),
    ]
예제 #3
0
def get_random_data():
    data = {
        "type": "RandomDataset",
        "train_start_date": dateutil.parser.isoparse("2017-12-25 06:00:00Z"),
        "train_end_date": dateutil.parser.isoparse("2017-12-30 06:00:00Z"),
        "tag_list": [SensorTag("Tag 1", None), SensorTag("Tag 2", None)],
        "target_tag_list": [SensorTag("Tag 1", None), SensorTag("Tag 2", None)],
    }
    return data
예제 #4
0
def test_from_timeseries_to_gordo():
    info = IntermediateFormat()
    info.from_time_series_api(tsapi_datas)
    # logger.info(f"tagnames: {info.tag_names} tagmap {info.tag_names_map} tagdata {info.tag_names_data}")
    assert info.tag_names == expected_intermediate_tag_names
    assert info.tag_names_map == expected_intermediate_tag_map
    assert info.tag_names_data == expected_intermediate_tag_data
    gordo_data = info.to_gordo(
        tags=[SensorTag(name="tag_1", asset="asset_1")],
        target_tags=[SensorTag(name="tag_2", asset="asset_2")],
    )
    # logger.info(f"gordo_data: {gordo_data}")
    assert gordo_data == expected_gordo_data
예제 #5
0
 def test_load_from_multiple_providers(self):
     """ Two tags, each belonging to different data producers, and both gets loaded
     """
     series_collection = list(
         load_series_from_multiple_providers(
             [self.ab_producer, self.containing_b_producer],
             None,
             None,
             [SensorTag("abba", None),
              SensorTag("cba", None)],
         ))
     self.assertEqual(series_collection[0].name, "ab.*")
     self.assertEqual(series_collection[1].name, ".*b.*")
예제 #6
0
 def test_load_multiple_raises_with_no_matches(self):
     """If no provider matches a tag then load_series_from_multiple_providers
     raises a ValueError when the generator is realized"""
     with self.assertRaises(ValueError):
         list(
             load_series_from_multiple_providers(
                 [self.ab_producer, self.containing_b_producer],
                 None,
                 None,
                 [
                     SensorTag("ab", None),
                     SensorTag("tag_not_matching_any_of_the_regexps", None),
                 ],
             ))
예제 #7
0
def test_trigger_tags(mock_tag_normalizer):
    data_provider = MockDataProvider()
    dataset = TimeSeriesDataset(
        data_provider=data_provider,
        tag_list=[
            SensorTag("Tag 1", None),
            SensorTag("Tag 2", None),
        ],
        target_tag_list=[
            SensorTag("Tag 5", None),
        ],
        train_start_date=dateutil.parser.isoparse("2017-12-25 06:00:00Z"),
        train_end_date=dateutil.parser.isoparse("2017-12-29 06:00:00Z"),
        row_filter="`Tag 3` > 0 & `Tag 4` > 1",
        tag_normalizer=mock_tag_normalizer,
    )
    X, y = dataset.get_data()
    assert X is not None
    assert y is not None
    assert set(data_provider.last_tag_list) == {
        SensorTag("Tag 1", None),
        SensorTag("Tag 2", None),
        SensorTag("Tag 3", None),
        SensorTag("Tag 4", None),
        SensorTag("Tag 5", None),
    }
    assert set(X.columns.values) == {"Tag 1", "Tag 2"}
    assert set(y.columns.values) == {"Tag 5"}
예제 #8
0
def test_timeseries_dataset_compat():
    """
    There are accepted keywords in the config file when using type: TimeSeriesDataset
    which don't actually match the kwargs of the dataset's __init__; for compatibility
    :func:`gordo.machine.dataset.datasets.compat` should adjust for these differences.
    """
    dataset = TimeSeriesDataset(
        data_provider=MockDataProvider(),
        train_start_date="2017-12-25 06:00:00Z",
        train_end_date="2017-12-29 06:00:00Z",
        tags=[SensorTag("Tag 1", None)],
    )
    assert dataset.train_start_date == dateutil.parser.isoparse("2017-12-25 06:00:00Z")
    assert dataset.train_end_date == dateutil.parser.isoparse("2017-12-29 06:00:00Z")
    assert dataset.tag_list == [SensorTag("Tag 1", None)]
예제 #9
0
def test_time_series_no_resolution():
    kwargs = dict(
        data_provider=MockDataProvider(),
        tag_list=[
            SensorTag("Tag 1", None),
            SensorTag("Tag 2", None),
            SensorTag("Tag 3", None),
        ],
        train_start_date=dateutil.parser.isoparse("2017-12-25 06:00:00Z"),
        train_end_date=dateutil.parser.isoparse("2017-12-29 06:00:00Z"),
    )

    no_resolution, _ = TimeSeriesDataset(resolution=None, **kwargs).get_data()
    wi_resolution, _ = TimeSeriesDataset(resolution="10T", **kwargs).get_data()
    assert len(no_resolution) > len(wi_resolution)
예제 #10
0
def test_load_series_need_asset_hint(dates, ncs_reader):
    with pytest.raises(ValueError):
        for _ in ncs_reader.load_series(dates[0], dates[1],
                                        [SensorTag("XYZ-123", None)]):
            pass

    path_to_xyz = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                               "data", "datalake", "gordoplatform")
    with patch(
            "gordo.machine.dataset.data_provider.ncs_reader.NcsReader.ASSET_TO_PATH",
        {"gordoplatform": path_to_xyz},
    ):
        valid_tag_list_with_asset = [SensorTag("XYZ-123", "gordoplatform")]
        for frame in ncs_reader.load_series(dates[0], dates[1],
                                            valid_tag_list_with_asset):
            assert len(frame) == 20
예제 #11
0
def test_can_handle_tag_non_supported_asset_with_base_path(ncs_reader):
    tag = SensorTag("WEIRD-123", "UNKNOWN-ASSET")
    assert not ncs_reader.can_handle_tag(tag)

    ncs_reader_with_base = NcsReader(AzureDLFileSystemMock(),
                                     dl_base_path="/this/is/a/base/path")
    assert ncs_reader_with_base.can_handle_tag(tag)
예제 #12
0
def test_gordo_to_latigo_tag():
    name = "some_name"
    asset = "some_asset"
    gordo_tag = SensorTag(name=name, asset=asset)
    assert isinstance(gordo_tag, SensorTag)
    latigo_tag = _gordo_to_latigo_tag(gordo_tag)
    assert isinstance(latigo_tag, LatigoSensorTag)
    assert type(latigo_tag.name) == type(name)
    assert type(latigo_tag.asset) == type(asset)
    assert latigo_tag.name == name
    assert latigo_tag.asset == asset
예제 #13
0
 def test_load_multiple_matches_loads_from_first(self):
     """When a tag can be read from multiple providers it is the first provider in
     the list of providers which gets the job"""
     series_collection = list(
         load_series_from_multiple_providers(
             [self.ab_producer, self.containing_b_producer],
             None,
             None,
             [SensorTag("abba", None)],
         ))
     self.assertEqual(series_collection[0].name, "ab.*")
예제 #14
0
 def test_load_series_no_data(self, _mocked_method):
     """load_series will raise ValueError if it does not find any tags"""
     iroc_reader = IrocReader(client=None, threads=1)
     with self.assertRaises(ValueError):
         list(
             iroc_reader.load_series(
                 train_start_date=isoparse("2018-05-02T01:56:00+00:00"),
                 train_end_date=isoparse("2018-05-03T01:56:00+00:00"),
                 tag_list=[SensorTag("jalla",
                                     None)],  # Not a tag in the input
             ))
예제 #15
0
 def test_load_series_checks_date(self):
     """load_series will raise ValueError if train_end_date<train_start_date"""
     iroc_reader = IrocReader(client=None, threads=1)
     with self.assertRaises(ValueError):
         list(
             iroc_reader.load_series(
                 train_start_date=isoparse("2018-05-03T01:56:00+00:00"),
                 train_end_date=isoparse("2018-05-02T01:56:00+00:00"),
                 tag_list=[SensorTag("jalla",
                                     None)],  # Not a tag in the input
             ))
예제 #16
0
def _machine(name: str) -> Machine:
    """
    Helper to build a basic Machine, only defining its name
    """
    from gordo.machine.dataset.sensor_tag import SensorTag

    return Machine.from_config(
        config={
            "name": name,
            "dataset": {
                "tag_list":
                [SensorTag("tag-1", "foo"),
                 SensorTag("tag-2", "foo")],
                "train_start_date": "2016-01-01T00:00:00Z",
                "train_end_date": "2016-01-05T00:00:00Z",
            },
            "model": "sklearn.linear_model.LinearRegression",
        },
        project_name="test-project",
    )
예제 #17
0
def test_insufficient_data_after_row_filtering(n_samples_threshold, filter_value):
    """
    Test that dataframe after row_filter scenarios raise appropriate
    InsufficientDataAfterRowFilteringError
    """

    kwargs = dict(
        data_provider=MockDataProvider(),
        tag_list=[
            SensorTag("Tag 1", None),
            SensorTag("Tag 2", None),
            SensorTag("Tag 3", None),
        ],
        train_start_date=dateutil.parser.isoparse("2017-12-25 06:00:00Z"),
        train_end_date=dateutil.parser.isoparse("2017-12-29 06:00:00Z"),
        n_samples_threshold=n_samples_threshold,
    )

    with pytest.raises(InsufficientDataAfterRowFilteringError):
        TimeSeriesDataset(row_filter=f"`Tag 1` < {filter_value}", **kwargs).get_data()
예제 #18
0
def test_row_filter():
    """Tests that row_filter filters away rows"""
    kwargs = dict(
        data_provider=MockDataProvider(),
        tag_list=[
            SensorTag("Tag 1", None),
            SensorTag("Tag 2", None),
            SensorTag("Tag 3", None),
        ],
        train_start_date=dateutil.parser.isoparse("2017-12-25 06:00:00Z"),
        train_end_date=dateutil.parser.isoparse("2017-12-29 06:00:00Z"),
    )
    X, _ = TimeSeriesDataset(**kwargs).get_data()
    assert 577 == len(X)

    X, _ = TimeSeriesDataset(row_filter="`Tag 1` < 5000", **kwargs).get_data()
    assert 8 == len(X)

    X, _ = TimeSeriesDataset(row_filter="`Tag 1` / `Tag 3` < 0.999",
                             **kwargs).get_data()
    assert 3 == len(X)
예제 #19
0
 def test_load_series_missing_columns_data(self, _mocked_method):
     """load_series will raise ValueError if there is a single tag it can not
     find"""
     iroc_reader = IrocReader(client=None, threads=1)
     with self.assertRaises(ValueError):
         list(
             iroc_reader.load_series(
                 train_start_date=isoparse("2018-05-02T01:56:00+00:00"),
                 train_end_date=isoparse("2018-05-03T01:56:00+00:00"),
                 tag_list=IROC_HAPPY_TAG_LIST + [SensorTag("jalla", None)],
                 # "jalla" is not a tag
             ))
예제 #20
0
def test_insufficient_data_after_automatic_filtering():
    """
    Test that dataframe after row_filter scenarios raise appropriate
    InsufficientDataError
    """

    kwargs = dict(
        data_provider=MockDataProvider(),
        tag_list=[
            SensorTag("Tag 1", None),
            SensorTag("Tag 2", None),
            SensorTag("Tag 3", None),
        ],
        train_start_date=dateutil.parser.isoparse("2017-12-25 06:00:00Z"),
        train_end_date=dateutil.parser.isoparse("2017-12-29 06:00:00Z"),
        n_samples_threshold=84,
        filter_periods={"filter_method": "median"},
    )

    with pytest.raises(InsufficientDataError):
        TimeSeriesDataset(**kwargs).get_data()
예제 #21
0
def test_metadata_statistics():
    """Tests that it works to set aggregation method(s)"""

    kwargs = dict(
        data_provider=MockDataProvider(),
        tag_list=[
            SensorTag("Tag 1", None),
            SensorTag("Tag 2", None),
            SensorTag("Tag 3", None),
        ],
        train_start_date=dateutil.parser.isoparse("2017-12-25 06:00:00Z"),
        train_end_date=dateutil.parser.isoparse("2017-12-29 06:00:00Z"),
    )

    # Default aggregation gives no extra columns
    dataset = TimeSeriesDataset(**kwargs)
    X, _ = dataset.get_data()
    assert (83, 3) == X.shape
    metadata = dataset.get_metadata()
    assert isinstance(metadata["x_hist"], dict)
    assert len(metadata["x_hist"].keys()) == 3
예제 #22
0
def test_insufficient_data_after_known_filter_periods_filtering():
    """
    Test that dataframe after row_filter scenarios raise appropriate
    InsufficientDataError
    """

    kwargs = dict(
        data_provider=MockDataProvider(),
        tag_list=[
            SensorTag("Tag 1", None),
            SensorTag("Tag 2", None),
            SensorTag("Tag 3", None),
        ],
        train_start_date=dateutil.parser.isoparse("2017-12-25 06:00:00Z"),
        train_end_date=dateutil.parser.isoparse("2017-12-29 06:00:00Z"),
        n_samples_threshold=10,
        known_filter_periods=[
            "~('2017-12-25 07:00:00+00:00' <= index <= '2017-12-29 06:00:00+00:00')"
        ],
    )

    with pytest.raises(InsufficientDataError):
        TimeSeriesDataset(**kwargs).get_data()
예제 #23
0
def test_load_series_need_base_path(ncs_reader, dates):
    tag = SensorTag("WEIRD-123", "BASE-PATH-ASSET")
    with pytest.raises(ValueError):
        for _ in ncs_reader.load_series(dates[0], dates[1], [tag]):
            pass

    path_to_weird_base_path_asset = os.path.join(
        os.path.dirname(os.path.realpath(__file__)),
        "data",
        "datalake",
        "base_path_asset",
    )
    ncs_reader_with_base = NcsReader(
        AzureDLFileSystemMock(), dl_base_path=path_to_weird_base_path_asset)
    for tag_series in ncs_reader_with_base.load_series(dates[0], dates[1],
                                                       [tag]):
        assert len(tag_series) == 20
예제 #24
0
def test_join_timeseries_empty_series(value, n_rows, resolution, error):
    """
    Test that empty data scenarios raise appropriate errors
    """
    train_start_date = dateutil.parser.isoparse("2018-01-01 00:00:00+00:00")
    train_end_date = dateutil.parser.isoparse("2018-01-05 00:00:00+00:00")
    tag_list = [SensorTag(name=n, asset=None) for n in ["Tag 1", "Tag 2", "Tag 3"]]

    kwargs = {
        "train_start_date": train_start_date,
        "train_end_date": train_end_date,
        "tag_list": tag_list,
        "resolution": resolution,
        "data_provider": MockDataProvider(value=np.NaN, n_rows=n_rows),
    }

    with pytest.raises(error):
        TimeSeriesDataset(**kwargs).get_data()
예제 #25
0
def dataset():
    return RandomDataset(
        train_start_date="2017-01-01 00:00:00Z",
        train_end_date="2018-01-01 00:00:00Z",
        tag_list=[SensorTag("Tag 1", None)],
    )
예제 #26
0
def ncs_reader():
    return NcsReader(AzureDLFileSystemMock())


@pytest.fixture
def dates():
    return (
        dateutil.parser.isoparse("2000-01-01T08:56:00+00:00"),
        dateutil.parser.isoparse("2001-09-01T10:01:00+00:00"),
    )


@pytest.mark.parametrize(
    "tag_to_check",
    [normalize_sensor_tags(["TRC-123"])[0],
     SensorTag("XYZ-123", "1776-TROC")],
)
def test_can_handle_tag_ok(tag_to_check, ncs_reader):
    assert ncs_reader.can_handle_tag(tag_to_check)


@pytest.mark.parametrize(
    "tag_to_check",
    [SensorTag("TRC-123", None),
     SensorTag("XYZ-123", "123-XXX")])
def test_can_handle_tag_notok(tag_to_check, ncs_reader):
    assert not ncs_reader.can_handle_tag(tag_to_check)


def test_can_handle_tag_unknow_prefix_raise(ncs_reader):
    with pytest.raises(ValueError):
예제 #27
0
 def test_can_handle_tag_no_asset(self):
     iroc_reader = IrocReader(client=None, threads=1)
     assert not iroc_reader.can_handle_tag(SensorTag("UON_EF.xxx", None))
예제 #28
0
 def test_can_handle_tag_unknown_asset(self):
     iroc_reader = IrocReader(client=None, threads=1)
     assert not iroc_reader.can_handle_tag(
         SensorTag("UON_EF.xxx", "UNKNOWǸ_ASSET"))
예제 #29
0
 def test_can_handle_tag_ok(self):
     iroc_reader = IrocReader(client=None, threads=1)
     assert iroc_reader.can_handle_tag(SensorTag("UON_EF.xxx", "UON_EF"))
예제 #30
0
import unittest
from io import StringIO
from unittest import mock

from dateutil.parser import isoparse  # type: ignore

from gordo.machine.dataset.data_provider.iroc_reader import IrocReader, read_iroc_file
from gordo.machine.dataset.sensor_tag import normalize_sensor_tags
from gordo.machine.dataset.sensor_tag import SensorTag

IROC_HAPPY_TAG_LIST = [
    SensorTag("NINENINE.OPCIS::NNFCDPC01.AI1410J0", "NINENINE"),
    SensorTag("NINENINE.OPCIS::NNFCDPC01.AI1840C1J0", "NINENINE"),
    SensorTag("NINENINE.OPCIS::NNFCDPC01.AI1840E1J0", "NINENINE"),
]

HAPPY_FROM_TS = isoparse("2018-05-02T01:56:00+00:00")
HAPPY_TO_TS = isoparse("2018-05-03T01:56:00+00:00")

IROC_MANY_ASSETS_TAG_LIST = [
    "NINENINE.OPCIS::NNFCDPC01.AI1410J0",
    "NINENINE.OPCIS::NNFCDPC01.AI1840C1J0",
    "NINENINE.OPCIS::NNFCDPC01.AI1840E1J0",
    "UON_EF.OPCIS::LO006-B1H.PRCASXIN",
    "UON_EF.OPCIS::LO006-B1H.PRTUBXIN",
    "UON_EF.OPCIS::LO006-B1H_M1.PRSTAXIN",
    "UON_EF.OPCIS::LO006-B1H_M1.RTGASDIN",
]

IROC_NO_ASSET_TAG_LIST = [
    SensorTag("NOT.OPCIS::NNFCDPC01.AI1410J0", "NOT"),