def predict( ctx: click.Context, start: datetime, end: datetime, data_provider: providers.GordoBaseDataProvider, output_dir: str, influx_uri: str, influx_api_key: str, influx_recreate_db: bool, forward_resampled_sensors: bool, ignore_unhealthy_targets: bool, n_retries: int, parquet: bool, ): """ Run some predictions against the target """ ctx.obj["kwargs"].update({ "data_provider": data_provider, "forward_resampled_sensors": forward_resampled_sensors, "ignore_unhealthy_targets": ignore_unhealthy_targets, "n_retries": n_retries, "use_parquet": parquet, }) client = Client(*ctx.obj["args"], **ctx.obj["kwargs"]) if influx_uri is not None: client.prediction_forwarder = ForwardPredictionsIntoInflux( destination_influx_uri=influx_uri, destination_influx_api_key=influx_api_key, destination_influx_recreate=influx_recreate_db, n_retries=n_retries, ) # Fire off getting predictions predictions = client.predict( start, end ) # type: typing.Iterable[typing.Tuple[str, pd.DataFrame, typing.List[str]]] # Loop over all error messages for each result and log them click.secho( f"\n{'-' * 20} Summary of failed predictions (if any) {'-' * 20}") exit_code = 0 for (_name, _df, error_messages) in predictions: for err_msg in error_messages: # Any error message indicates we encountered at least one error exit_code = 1 click.secho(err_msg, fg="red") # Shall we write the predictions out? if output_dir is not None: for (name, prediction_df, _err_msgs) in predictions: prediction_df.to_csv(os.path.join(output_dir, f"{name}.csv.gz"), compression="gzip") sys.exit(exit_code)
def test_client_predictions_diff_batch_sizes_and_toggle_data_provider( influxdb, watchman_service, use_data_provider: bool, batch_size: int ): """ Run the prediction client with different batch-sizes and whether to use a data provider or not. """ # Time range used in this test start, end = ( isoparse("2016-01-01T00:00:00+00:00"), isoparse("2016-01-01T12:00:00+00:00"), ) # Client only used within the this test test_client = client_utils.influx_client_from_uri(tu.INFLUXDB_URI) # Created measurements by prediction client with dest influx query = f""" SELECT * FROM "model-output" WHERE("machine" =~ /^{tu.GORDO_SINGLE_TARGET}$/) """ # Before predicting, influx destination db should be empty for 'predictions' measurement vals = test_client.query(query) assert len(vals) == 0 data_provider = ( providers.InfluxDataProvider( measurement=tu.INFLUXDB_MEASUREMENT, value_name="Value", client=client_utils.influx_client_from_uri( uri=tu.INFLUXDB_URI, dataframe_client=True ), ) if use_data_provider else None ) prediction_client = Client( project=tu.GORDO_PROJECT, data_provider=data_provider, prediction_forwarder=ForwardPredictionsIntoInflux( destination_influx_uri=tu.INFLUXDB_URI ), batch_size=batch_size, ) # Should have discovered machine-1 assert len(prediction_client.endpoints) == 1 # All endpoints should be healthy assert all(ep.healthy for ep in prediction_client.endpoints) # Get predictions predictions = prediction_client.predict(start=start, end=end) assert isinstance(predictions, list) assert len(predictions) == 1 name, predictions, error_messages = predictions[0] # First dict of predictions assert isinstance(name, str) assert isinstance(predictions, pd.DataFrame) assert isinstance(error_messages, list) assert isinstance(predictions.index, pd.core.indexes.datetimes.DatetimeIndex) # This should have resulted in writting predictions to influx # Before predicting, influx destination db should be empty vals = test_client.query(query) assert ( len(vals) > 0 ), f"Expected new values in 'predictions' measurement, but found {vals}"