예제 #1
0
    def __init__(
        self,
        destination_influx_uri: Optional[str] = None,
        destination_influx_api_key: Optional[str] = None,
        destination_influx_recreate: bool = False,
    ):
        """
        Create an instance which, when called, is a coroutine capable of
        being sent dataframes generated from the '/anomaly/prediction' endpoint

        Parameters
        ----------
        destination_influx_uri: str
            Connection string for destination influx -
            format: <username>:<password>@<host>:<port>/<optional-path>/<db_name>
        destination_influx_api_key: str
            API key if needed for destination db
        destination_influx_recreate: bool
            Drop the database before filling it with data?
        """
        # Create df client if provided
        self.dataframe_client = (influx_client_from_uri(
            destination_influx_uri,
            api_key=destination_influx_api_key,
            recreate=destination_influx_recreate,
            dataframe_client=True,
        ) if destination_influx_uri else None)
예제 #2
0
    def __init__(
        self,
        measurement: str,
        value_name: str = "Value",
        api_key: str = None,
        api_key_header: str = None,
        client: DataFrameClient = None,
        uri: str = None,
        **kwargs,
    ):
        """
        Parameters
        ----------
        measurement: str
            Name of the measurement to select from in Influx
        value_name: str
            Name of value to select, default to 'Value'
        api_key: str
            Api key to use in header
        api_key_header: str
            Key of header to insert the api key for requests
        uri: str
            Create a client from a URI
            format: <username>:<password>@<host>:<port>/<optional-path>/<db_name>
        kwargs: dict
            These are passed directly to the init args of influxdb.DataFrameClient
        """
        super().__init__(**kwargs)
        self.measurement = measurement
        self.value_name = value_name
        self.influx_client = client
        if kwargs.pop("threads", None):
            logger.warning(
                "InfluxDataProvider got parameter 'threads' which is not supported, it "
                "will be ignored.")

        if self.influx_client is None:
            if uri:

                # Import here to avoid any circular import error caused by
                # importing TimeSeriesDataset, which imports this provider
                # which would have imported Client via traversal of the __init__
                # which would then try to import TimeSeriesDataset again.
                from gordo_components.client.utils import influx_client_from_uri

                self.influx_client = influx_client_from_uri(  # type: ignore
                    uri,
                    api_key=api_key,
                    api_key_header=api_key_header,
                    dataframe_client=True,
                )
            else:
                self.influx_client = DataFrameClient(**kwargs)
                if api_key is not None:
                    if not api_key_header:
                        raise ValueError(
                            "If supplying an api key, you must supply the header key to insert it under."
                        )
                    self.influx_client._headers[api_key_header] = api_key
def test_client_cli_predict(
    influxdb, watchman_service, forwarder_args, output_dir, data_provider
):
    """
    Test ability for client to get predictions via CLI
    """
    runner = CliRunner()

    args = [
        "client",
        "--metadata",
        "key,value",
        "--project",
        tu.GORDO_PROJECT,
        "predict",
        "2016-01-01T00:00:00Z",
        "2016-01-01T01:00:00Z",
    ]

    influx_client = client_utils.influx_client_from_uri(
        uri=tu.INFLUXDB_URI, dataframe_client=True
    )
    query = """
        SELECT *
        FROM "resampled"
        """

    # Do we have forwarder args?
    if forwarder_args is not None:
        args.extend(forwarder_args)
        vals = influx_client.query(query)
        # There is no data there before we start doing things
        assert len(vals) == 0

    # Should it write out the predictions to dataframes in an output directory?
    if output_dir is not None:
        args.extend(["--output-dir", output_dir.name])

    # Do we have a data provider, POST else GET requests
    if data_provider is not None:
        args.extend(["--data-provider", json.dumps(data_provider.to_dict())])

    # Run without any error
    out = runner.invoke(cli.gordo, args=args)
    assert out.exit_code == 0, f"{out.output}"

    # If we activated forwarder and we had any actual data then there should
    # be resampled values in the influx
    if forwarder_args and data_provider:
        vals = influx_client.query(query)
        assert len(vals) == 1
        assert len(vals["resampled"]) == 28
        influx_client.drop_measurement("resampled")

    # Did it save dataframes to output dir if specified?
    if output_dir is not None:
        assert os.path.exists(
            os.path.join(output_dir.name, f"{tu.GORDO_SINGLE_TARGET}.csv.gz")
        )
async def test_influx_forwarder(influxdb):
    """
    Test that the forwarder creates correct points from a
    multi-indexed series
    """
    endpoint = EndpointMetadata(
        "some-target-name",
        healthy=True,
        endpoint="/some-endpoint",
        tag_list=tu.SENSORTAG_LIST,
        target_tag_list=tu.SENSORTAG_LIST,
        resolution="10T",
        model_offset=0,
    )

    # Feature outs which match length of tags
    # These should then be re-mapped to the sensor tag names
    keys = [("name1", i) for i, _ in enumerate(tu.SENSORTAG_LIST)]

    # Feature outs which don't match the length of the tags
    # These will be kept at 0..N as field names
    keys.extend([("name2", i) for i in range(len(tu.SENSORTAG_LIST) * 2)])

    # Assign all keys unique numbers
    columns = pd.MultiIndex.from_tuples(keys)
    index = pd.date_range("2019-01-01", "2019-01-02", periods=4)
    df = pd.DataFrame(columns=columns, index=index)

    # Generate some unique values for each key, and insert it into that column
    for i, key in enumerate(keys):
        df[key] = range(i, i + 4)

    # Create the forwarder and forward the 'predictions' to influx.
    forwarder = ForwardPredictionsIntoInflux(
        destination_influx_uri=tu.INFLUXDB_URI)
    await forwarder.forward_predictions(predictions=df, endpoint=endpoint)

    # Client to manually verify the points written
    client = influx_client_from_uri(tu.INFLUXDB_URI, dataframe_client=True)

    name1_results = client.query("SELECT * FROM name1")["name1"]

    # Should have the tag names as column names since the shape matched
    assert all(c in name1_results.columns
               for c in ["machine"] + tu.SENSORS_STR_LIST)
    for i, tag in enumerate(tu.SENSORS_STR_LIST):
        assert np.allclose(df[("name1", i)].values, name1_results[tag].values)

    # Now check the other top level name "name2" is a measurement with the correct points written
    name2_results = client.query("SELECT * FROM name2")["name2"]

    # Should not have the same names as tags, since shape was 2x as long, should just be numeric columns
    assert all([
        str(c) in name2_results.columns
        for c in ["machine"] + list(range(len(tu.SENSORTAG_LIST) * 2))
    ])
    for key in filter(lambda k: k[0] == "name2", keys):
        assert np.allclose(df[key].values, name2_results[str(key[1])].values)
    def __init__(
        self,
        destination_influx_uri: Optional[str] = None,
        destination_influx_api_key: Optional[str] = None,
        destination_influx_recreate: bool = False,
    ):
        """
        Create an instance which, when called, is a coroutine capable of
        being sent autoencoder prediction dataframes in which it will forward to influx

        By autoencoder prediction dataframes, we mean the columns are prefixed with 'output_'
        an 'input_' and then the tag/sensor name; and has a DatetimeIndex

        Parameters
        ----------
        destination_influx_uri: str
            Connection string for destination influx -
            format: <username>:<password>@<host>:<port>/<optional-path>/<db_name>
        destination_influx_api_key: str
            API key if needed for destination db
        destination_influx_recreate: bool
            Drop the database before filling it with data?
        """
        # Create clients if provided
        self.destionation_client = (
            influx_client_from_uri(
                destination_influx_uri,
                api_key=destination_influx_api_key,
                recreate=destination_influx_recreate,
            )
            if destination_influx_uri
            else None
        )
        self.dataframe_client = (
            influx_client_from_uri(
                destination_influx_uri,
                api_key=destination_influx_api_key,
                recreate=destination_influx_recreate,
                dataframe_client=True,
            )
            if destination_influx_uri
            else None
        )
예제 #6
0
def test_get_list_of_tags(influxdb):
    ds = InfluxDataProvider(
        measurement="sensors",
        value_name="Value",
        client=influx_client_from_uri(uri=tu.INFLUXDB_URI, dataframe_client=True),
    )
    expected_tags = set(tu.SENSORS_STR_LIST)

    tags = set(ds.get_list_of_tags())
    assert expected_tags == tags

    # The cache does not screw stuff up
    tags = set(ds.get_list_of_tags())
    assert expected_tags == tags
def test__list_of_tags_from_influx_validate_tag_names(influxdb):
    """
    Test expected tags in influx match the ones actually in influx.
    """
    ds = InfluxDataProvider(
        measurement="sensors",
        value_name="Value",
        client=influx_client_from_uri(uri=tu.INFLUXDB_URI,
                                      dataframe_client=True),
    )
    list_of_tags = ds._list_of_tags_from_influx()
    expected_tags = tu.SENSORS_STR_LIST
    tags = set(list_of_tags)
    assert set(expected_tags) == tags, (f"Expected tags = {expected_tags}"
                                        f"outputted {tags}")
예제 #8
0
def test_read_single_sensor_empty_data_invalid_tag_name_valueerror(influxdb):
    """
    Asserts that a ValueError is raised because the tag name inputted is invalid
    """
    from_ts = dateutil.parser.isoparse("2016-01-01T09:11:00+00:00")
    to_ts = dateutil.parser.isoparse("2016-01-01T10:30:00+00:00")

    ds = InfluxDataProvider(
        measurement="sensors",
        value_name="Value",
        client=influx_client_from_uri(uri=tu.INFLUXDB_URI, dataframe_client=True),
    )
    with pytest.raises(ValueError):
        ds.read_single_sensor(
            from_ts=from_ts,
            to_ts=to_ts,
            tag="tag-does-not-exist",
            measurement="sensors",
        )
예제 #9
0
def test_read_single_sensor_empty_data_time_range_indexerror(influxdb, caplog):
    """
    Asserts that an IndexError is raised because the dates requested are outside the existing time period
    """
    from_ts = dateutil.parser.isoparse("2017-01-01T09:11:00+00:00")
    to_ts = dateutil.parser.isoparse("2017-01-01T10:30:00+00:00")

    ds = InfluxDataProvider(
        measurement="sensors",
        value_name="Value",
        client=influx_client_from_uri(uri=tu.INFLUXDB_URI, dataframe_client=True),
    )

    with caplog.at_level(logging.CRITICAL):
        with pytest.raises(IndexError):
            ds.read_single_sensor(
                from_ts=from_ts,
                to_ts=to_ts,
                tag=tu.SENSORS_STR_LIST[0],
                measurement="sensors",
            )
예제 #10
0
def test_influx_dataset_attrs(influxdb):
    """
    Test expected attributes
    """
    from_ts = dateutil.parser.isoparse("2016-01-01T09:11:00+00:00")
    to_ts = dateutil.parser.isoparse("2016-01-01T10:30:00+00:00")
    tag_list = tu.SENSORTAG_LIST
    config = {
        "type": "TimeSeriesDataset",
        "from_ts": from_ts,
        "to_ts": to_ts,
        "tag_list": tag_list,
    }
    config["data_provider"] = InfluxDataProvider(
        measurement="sensors",
        value_name="Value",
        client=influx_client_from_uri(uri=tu.INFLUXDB_URI, dataframe_client=True),
    )
    dataset = _get_dataset(config)
    assert hasattr(dataset, "get_metadata")

    metadata = dataset.get_metadata()
    assert isinstance(metadata, dict)
def test_client_predictions_diff_batch_sizes_and_toggle_data_provider(
    influxdb, watchman_service, use_data_provider: bool, batch_size: int
):
    """
    Run the prediction client with different batch-sizes and whether to use
    a data provider or not.
    """
    # Time range used in this test
    start, end = (
        isoparse("2016-01-01T00:00:00+00:00"),
        isoparse("2016-01-01T12:00:00+00:00"),
    )

    # Client only used within the this test
    test_client = client_utils.influx_client_from_uri(tu.INFLUXDB_URI)

    # Created measurements by prediction client with dest influx
    query = f"""
    SELECT *
    FROM "model-output"
    WHERE("machine" =~ /^{tu.GORDO_SINGLE_TARGET}$/)
    """

    # Before predicting, influx destination db should be empty for 'predictions' measurement
    vals = test_client.query(query)
    assert len(vals) == 0

    data_provider = (
        providers.InfluxDataProvider(
            measurement=tu.INFLUXDB_MEASUREMENT,
            value_name="Value",
            client=client_utils.influx_client_from_uri(
                uri=tu.INFLUXDB_URI, dataframe_client=True
            ),
        )
        if use_data_provider
        else None
    )

    prediction_client = Client(
        project=tu.GORDO_PROJECT,
        data_provider=data_provider,
        prediction_forwarder=ForwardPredictionsIntoInflux(
            destination_influx_uri=tu.INFLUXDB_URI
        ),
        batch_size=batch_size,
    )

    # Should have discovered machine-1
    assert len(prediction_client.endpoints) == 1

    # All endpoints should be healthy
    assert all(ep.healthy for ep in prediction_client.endpoints)

    # Get predictions
    predictions = prediction_client.predict(start=start, end=end)
    assert isinstance(predictions, list)
    assert len(predictions) == 1

    name, predictions, error_messages = predictions[0]  # First dict of predictions
    assert isinstance(name, str)
    assert isinstance(predictions, pd.DataFrame)
    assert isinstance(error_messages, list)

    assert isinstance(predictions.index, pd.core.indexes.datetimes.DatetimeIndex)

    # This should have resulted in writting predictions to influx
    # Before predicting, influx destination db should be empty
    vals = test_client.query(query)
    assert (
        len(vals) > 0
    ), f"Expected new values in 'predictions' measurement, but found {vals}"