def test_exponential_sleep_time(caplog, gordo_project, ml_server): start, end = ( isoparse("2016-01-01T00:00:00+00:00"), isoparse("2016-01-01T12:00:00+00:00"), ) with caplog.at_level(logging.CRITICAL): with patch("gordo.client.client.sleep", return_value=None) as time_sleep: # We simulate repeating timeouts with patch("gordo.client.client._handle_response" ) as handle_response_mock: handle_response_mock.side_effect = TimeoutError() client = Client(project=gordo_project) client._send_prediction_request( X=pd.DataFrame([123]), y=None, chunk=slice(0, 1), machine=_machine("t1"), start=start, end=end, revision="1234", ) expected_calls = [ call(8), call(16), call(32), call(64), call(128) ] time_sleep.assert_has_calls(expected_calls)
def test_client_set_revision_error(ml_server, gordo_project): """ Client will raise an error if asking for a revision that doesn't exist """ with pytest.raises(ResourceGone): client = Client(project=gordo_project) client.get_machine_names(revision="does-not-exist")
def test_client_metadata_revision( gordo_project, gordo_single_target, ml_server, ): prediction_client = Client(project=gordo_project, ) assert "revision" in prediction_client.get_available_machines()
def predict( ctx: click.Context, start: datetime, end: datetime, target: typing.List[str], data_provider: providers.GordoBaseDataProvider, output_dir: str, influx_uri: str, influx_api_key: str, influx_recreate_db: bool, forward_resampled_sensors: bool, n_retries: int, parquet: bool, ): """ Run some predictions against the target """ ctx.obj["kwargs"].update({ "data_provider": data_provider, "forward_resampled_sensors": forward_resampled_sensors, "n_retries": n_retries, "use_parquet": parquet, }) client = Client(*ctx.obj["args"], **ctx.obj["kwargs"]) if influx_uri is not None: client.prediction_forwarder = ForwardPredictionsIntoInflux( # type: ignore destination_influx_uri=influx_uri, destination_influx_api_key=influx_api_key, destination_influx_recreate=influx_recreate_db, n_retries=n_retries, ) # Fire off getting predictions predictions = client.predict( start, end, targets=target, ) # type: typing.Iterable[typing.Tuple[str, pd.DataFrame, typing.List[str]]] # Loop over all error messages for each result and log them click.secho( f"\n{'-' * 20} Summary of failed predictions (if any) {'-' * 20}") exit_code = 0 for (_name, _df, error_messages) in predictions: for err_msg in error_messages: # Any error message indicates we encountered at least one error exit_code = 1 click.secho(err_msg, fg="red") # Shall we write the predictions out? if output_dir is not None: for (name, prediction_df, _err_msgs) in predictions: prediction_df.to_csv(os.path.join(output_dir, f"{name}.csv.gz"), compression="gzip") sys.exit(exit_code)
def test_client_get_metadata(gordo_project, ml_server): """ Test client's ability to get metadata from some target """ client = Client(project=gordo_project) metadata = client.get_metadata() assert isinstance(metadata, dict) # Can't get metadata for non-existent target assert client.get_metadata().get("no-such-target", None) is None
def test_client_get_dataset(gordo_project, metadata, ml_server): data_provider = providers.RandomDataProvider(min_size=10) client = Client(project=gordo_project, data_provider=data_provider) start = isoparse("2016-01-01T00:00:00+00:00") end = isoparse("2016-01-01T12:00:00+00:00") machine = Machine(**metadata) assert type(machine.dataset) is TimeSeriesDataset machine.dataset.row_filter_buffer_size = 12 machine.dataset.n_samples_threshold = 10 dataset = client._get_dataset(machine, start, end) assert dataset.row_filter_buffer_size == 0 assert dataset.n_samples_threshold == 0 assert dataset.low_threshold is None assert dataset.high_threshold is None
def download_model(ctx: click.Context, output_dir: str, target: typing.List[str]): """ Download the actual model from the target and write to an output directory """ client = Client(*ctx.obj["args"], **ctx.obj["kwargs"]) models = client.download_model(targets=target) # Iterate over mapping of models and save into their own sub dirs of the output_dir for model_name, model in models.items(): model_out_dir = os.path.join(output_dir, model_name) os.mkdir(model_out_dir) click.secho( f"Writing model '{model_name}' to directory: '{model_out_dir}'...", nl=False ) serializer.dump(model, model_out_dir) click.secho(f"done") click.secho(f"Wrote all models to directory: {output_dir}", fg="green")
def metadata( ctx: click.Context, output_file: typing.Optional[typing.IO[str]], target: typing.List[str], ): """ Get metadata from a given endpoint """ client = Client(*ctx.obj["args"], **ctx.obj["kwargs"]) metadata = { k: v.to_dict() for k, v in client.get_metadata(targets=target).items() # type: ignore } if output_file: json.dump(metadata, output_file) click.secho(f"Saved metadata json to file: '{output_file}'") else: pprint(metadata) return metadata
def test_client_predict_specific_targets(gordo_project, gordo_single_target, ml_server): """ Client.predict should filter any endpoints given to it. """ client = Client(project=gordo_project) with mock.patch.object( Client, "predict_single_machine", return_value=PredictionResult("test-name", [], []), ) as patched: start = (isoparse("2016-01-01T00:00:00+00:00"), ) end = isoparse("2016-01-01T12:00:00+00:00") # Should not call any predictions because this machine name doesn't exist with pytest.raises(NotFound): client.predict(start=start, end=end, targets=["non-existent-machine"]) patched.assert_not_called() # Should be called once, for this machine. client.predict(start=start, end=end, targets=[gordo_single_target]) patched.assert_called_once()
def test_client_download_model(gordo_project, gordo_single_target, ml_server): """ Test client's ability to download the model """ client = Client(project=gordo_project) models = client.download_model() assert isinstance(models, dict) assert isinstance(models[gordo_single_target], BaseEstimator) # Can't download model for non-existent target with pytest.raises(NotFound): client = Client(project=gordo_project) client.download_model(targets=["non-existent-target"])
def test_client_predictions_diff_batch_sizes( gordo_project, gordo_single_target, influxdb, influxdb_uri, influxdb_measurement, ml_server, batch_size: int, use_parquet: bool, ): """ Run the prediction client with different batch-sizes and whether to use a data provider or not. """ # Time range used in this test start, end = ( isoparse("2016-01-01T00:00:00+00:00"), isoparse("2016-01-01T12:00:00+00:00"), ) # Client only used within the this test test_client = client_utils.influx_client_from_uri(influxdb_uri) # Created measurements by prediction client with dest influx query = f""" SELECT * FROM "model-output" WHERE("machine" =~ /^{gordo_single_target}$/) """ # Before predicting, influx destination db should be empty for 'predictions' measurement vals = test_client.query(query) assert len(vals) == 0 data_provider = providers.InfluxDataProvider( measurement=influxdb_measurement, value_name="Value", client=client_utils.influx_client_from_uri(uri=influxdb_uri, dataframe_client=True), ) prediction_client = Client( project=gordo_project, data_provider=data_provider, prediction_forwarder=ForwardPredictionsIntoInflux( # type: ignore destination_influx_uri=influxdb_uri), batch_size=batch_size, use_parquet=use_parquet, parallelism=10, ) assert len(prediction_client.get_machine_names()) == 2 # Get predictions predictions = prediction_client.predict(start=start, end=end) assert isinstance(predictions, list) assert len(predictions) == 2 name, predictions, error_messages = predictions[ 0] # First dict of predictions assert isinstance(name, str) assert isinstance(predictions, pd.DataFrame) assert isinstance(error_messages, list) assert isinstance(predictions.index, pd.core.indexes.datetimes.DatetimeIndex) # This should have resulted in writting predictions to influx # Before predicting, influx destination db should be empty vals = test_client.query(query) assert ( len(vals) > 0 ), f"Expected new values in 'predictions' measurement, but found {vals}"