Beispiel #1
0
def test_exponential_sleep_time(caplog, gordo_project, ml_server):

    start, end = (
        isoparse("2016-01-01T00:00:00+00:00"),
        isoparse("2016-01-01T12:00:00+00:00"),
    )

    with caplog.at_level(logging.CRITICAL):
        with patch("gordo_client.client.sleep", return_value=None) as time_sleep:
            # We simulate repeating timeouts
            with patch("gordo_client.client._handle_response") as handle_response_mock:
                handle_response_mock.side_effect = TimeoutError()
                client = Client(project=gordo_project)

                client._send_prediction_request(
                    X=pd.DataFrame([123]),
                    y=None,
                    chunk=slice(0, 1),
                    machine=_machine("t1"),
                    start=start,
                    end=end,
                    revision="1234",
                )

                expected_calls = [call(8), call(16), call(32), call(64), call(128)]
                time_sleep.assert_has_calls(expected_calls)
Beispiel #2
0
def test_client_set_revision_error(ml_server, gordo_project):
    """
    Client will raise an error if asking for a revision that doesn't exist
    """
    with pytest.raises(ResourceGone):
        client = Client(project=gordo_project)
        client.get_machine_names(revision="does-not-exist")
Beispiel #3
0
def predict(
    ctx: click.Context,
    start: datetime,
    end: datetime,
    target: List[str],
    data_provider: providers.GordoBaseDataProvider,
    output_dir: str,
    influx_uri: str,
    influx_api_key: str,
    influx_recreate_db: bool,
    forward_resampled_sensors: bool,
    n_retries: int,
    parquet: bool,
):
    """Run some predictions against the target."""
    if influx_uri is None:
        prediction_forwarder = None
    else:
        prediction_forwarder = ForwardPredictionsIntoInflux(
            destination_influx_uri=influx_uri,
            destination_influx_api_key=influx_api_key,
            destination_influx_recreate=influx_recreate_db,
            n_retries=n_retries,
        )

    ctx.obj["kwargs"].update({
        "data_provider": data_provider,
        "forward_resampled_sensors": forward_resampled_sensors,
        "n_retries": n_retries,
        "use_parquet": parquet,
        "prediction_forwarder": prediction_forwarder,
    })

    client = Client(*ctx.obj["args"], **ctx.obj["kwargs"])

    # Fire off getting predictions
    predictions = client.predict(
        start, end,
        targets=target)  # type: Iterable[Tuple[str, pd.DataFrame, List[str]]]

    # Loop over all error messages for each result and log them
    click.secho(
        f"\n{'-' * 20} Summary of failed predictions (if any) {'-' * 20}")
    exit_code = 0
    for (_name, _df, error_messages) in predictions:
        for err_msg in error_messages:
            # Any error message indicates we encountered at least one error
            exit_code = 1
            click.secho(err_msg, fg="red")

    # Shall we write the predictions out?
    if output_dir is not None:
        for (name, prediction_df, _err_msgs) in predictions:
            prediction_df.to_csv(os.path.join(output_dir, f"{name}.csv.gz"),
                                 compression="gzip")
    sys.exit(exit_code)
Beispiel #4
0
def test_client_get_metadata(gordo_project, ml_server):
    """
    Test client's ability to get metadata from some target
    """
    client = Client(project=gordo_project)

    metadata = client.get_metadata()
    assert isinstance(metadata, dict)

    # Can't get metadata for non-existent target
    assert client.get_metadata().get("no-such-target", None) is None
Beispiel #5
0
def metadata(ctx: click.Context, output_file: Optional[IO[str]],
             target: List[str]):
    """Get metadata from a given endpoint."""
    client = Client(*ctx.obj["args"], **ctx.obj["kwargs"])
    target_metadata = {
        k: v.dict()  # type: ignore
        for k, v in client.get_metadata(targets=target).items()  # type: ignore
    }
    if output_file:
        json.dump(target_metadata, output_file)
        click.secho(f"Saved metadata json to file: '{output_file}'")
    else:
        pprint(target_metadata)
    return target_metadata
Beispiel #6
0
def test_predict_single_machine_all_columns(data_provider, mocked_responses, machine):
    client = Client(project="gordo-test", data_provider=data_provider, all_columns=True)

    revision = "1604861479899"
    end = datetime.now(tz=UTC)
    start = end - timedelta(days=7)
    _mock_response(
        mocked_responses,
        "/gordo/v0/gordo-test/gordo-test/anomaly/prediction?format=json&revision=1604861479899&all_columns=true",
        "anomaly",
    )

    response = client.predict_single_machine(start=start, end=end, revision=revision, machine=machine)

    assert isinstance(response, PredictionResult)
Beispiel #7
0
def test_client_get_dataset(gordo_project, metadata, ml_server):
    data_provider = providers.RandomDataProvider(min_size=10)
    client = Client(project=gordo_project, data_provider=data_provider)
    start = isoparse("2016-01-01T00:00:00+00:00")
    end = isoparse("2016-01-01T12:00:00+00:00")
    machine = Machine(**metadata)
    assert type(machine.dataset) is TimeSeriesDataset
    machine.dataset.row_filter_buffer_size = 12
    machine.dataset.n_samples_threshold = 10
    client_machine = ClientMachine(**machine.to_dict())
    dataset = client._get_dataset(client_machine, start, end)
    assert dataset.row_filter_buffer_size == 0
    assert dataset.n_samples_threshold == 0
    assert dataset.low_threshold is None
    assert dataset.high_threshold is None
Beispiel #8
0
def download_model(ctx: click.Context, output_dir: str, target: List[str]):
    """Download the actual model from the target and write to an output directory."""
    client = Client(*ctx.obj["args"], **ctx.obj["kwargs"])
    models = client.download_model(targets=target)

    # Iterate over mapping of models and save into their own sub dirs of the output_dir
    for model_name, model in models.items():
        model_out_dir = os.path.join(output_dir, model_name)
        os.mkdir(model_out_dir)
        click.secho(
            f"Writing model '{model_name}' to directory: '{model_out_dir}'...",
            nl=False)
        _dump_model(model, model_out_dir)
        click.secho("done")

    click.secho(f"Wrote all models to directory: {output_dir}", fg="green")
Beispiel #9
0
def test_client_predict_specific_targets(gordo_project, gordo_single_target,
                                         ml_server):
    """
    Client.predict should filter any endpoints given to it.
    """
    client = Client(project=gordo_project)
    with mock.patch.object(
            Client,
            "predict_single_machine",
            return_value=PredictionResult("test-name", [], []),
    ) as patched:

        start = (isoparse("2016-01-01T00:00:00+00:00"), )
        end = isoparse("2016-01-01T12:00:00+00:00")

        # Should not call any predictions because this machine name doesn't exist
        with pytest.raises(NotFound):
            client.predict(start=start,
                           end=end,
                           targets=["non-existent-machine"])
            patched.assert_not_called()

        # Should be called once, for this machine.
        client.predict(start=start, end=end, targets=[gordo_single_target])
        patched.assert_called_once()
Beispiel #10
0
def test_client_download_model(gordo_project, gordo_single_target, ml_server):
    """
    Test client's ability to download the model
    """
    client = Client(project=gordo_project)

    models = client.download_model()
    assert isinstance(models, dict)
    assert isinstance(models[gordo_single_target], BaseEstimator)

    # Can't download model for non-existent target
    with pytest.raises(NotFound):
        client = Client(project=gordo_project)
        client.download_model(targets=["non-existent-target"])
Beispiel #11
0
def test_client_predictions_diff_batch_sizes(
    gordo_project,
    gordo_single_target,
    influxdb,
    influxdb_uri,
    influxdb_measurement,
    ml_server,
    batch_size: int,
    use_parquet: bool,
):
    """
    Run the prediction client with different batch-sizes and whether to use
    a data provider or not.
    """
    # Time range used in this test
    start, end = (
        isoparse("2016-01-01T00:00:00+00:00"),
        isoparse("2016-01-01T12:00:00+00:00"),
    )

    # Client only used within the this test
    test_client = client_utils.influx_client_from_uri(influxdb_uri)

    # Created measurements by prediction client with dest influx
    query = f"""
    SELECT *
    FROM "model-output"
    WHERE("machine" =~ /^{gordo_single_target}$/)
    """

    # Before predicting, influx destination db should be empty for 'predictions' measurement
    vals = test_client.query(query)
    assert len(vals) == 0

    data_provider = providers.InfluxDataProvider(
        measurement=influxdb_measurement,
        value_name="Value",
        client=client_utils.influx_client_from_uri(
            uri=influxdb_uri, dataframe_client=True
        ),
    )

    prediction_client = Client(
        project=gordo_project,
        data_provider=data_provider,
        prediction_forwarder=ForwardPredictionsIntoInflux(  # type: ignore
            destination_influx_uri=influxdb_uri
        ),
        batch_size=batch_size,
        use_parquet=use_parquet,
        parallelism=10,
    )

    assert len(prediction_client.get_machine_names()) == 2

    # Get predictions
    predictions = prediction_client.predict(start=start, end=end)
    assert isinstance(predictions, list)
    assert len(predictions) == 2

    name, predictions, error_messages = predictions[0]  # First dict of predictions
    assert isinstance(name, str)
    assert isinstance(predictions, pd.DataFrame)
    assert isinstance(error_messages, list)

    assert isinstance(predictions.index, pd.core.indexes.datetimes.DatetimeIndex)

    # This should have resulted in writting predictions to influx
    # Before predicting, influx destination db should be empty
    vals = test_client.query(query)
    assert (
        len(vals) > 0
    ), f"Expected new values in 'predictions' measurement, but found {vals}"
Beispiel #12
0
def test_client_metadata_revision(gordo_project, gordo_single_target, ml_server):
    prediction_client = Client(project=gordo_project)
    assert "revision" in prediction_client.get_available_machines()
Beispiel #13
0
def client(data_provider):
    client = Client(project="gordo-test", data_provider=data_provider)
    return client