def test_dataframe_parquet_serializers(df): """The (de)serialization functions should be interoperable""" serialized = server_utils.dataframe_into_parquet_bytes(df.copy()) df_clone = server_utils.dataframe_from_parquet_bytes(serialized) assert df.columns.tolist() == df_clone.columns.tolist() assert df.index.tolist() == df_clone.index.tolist() assert np.allclose(df.values, df_clone.values)
def _create_anomaly_response(self, start_time: float = None): """ Use the current ``X`` and ``y`` to create an anomaly specific response using the trained ML model's ``.anomaly()`` method. Parameters ---------- start_time: Optional[float] Start time to use when timing the processing time of the request, will construct a new one if not provided. Returns ------- flask.Response The formatted anomaly representation response object. """ if start_time is None: start_time = timeit.default_timer() # To use this endpoint, we need a 'y' to calculate the errors. if g.y is None: message = { "message": "Cannot perform anomaly without 'y' to compare against." } return make_response((jsonify(message), 400)) # Now create an anomaly dataframe from the base response dataframe try: anomaly_df = g.model.anomaly(g.X, g.y, frequency=self.frequency) except AttributeError: msg = { "message": f"Model is not an AnomalyDetector, it is of type: {type(g.model)}" } return make_response(jsonify(msg), 422) # 422 Unprocessable Entity if request.args.get("all_columns") is None: columns_for_delete = [] for column in anomaly_df: if column[0] in DELETED_FROM_RESPONSE_COLUMNS: columns_for_delete.append(column) anomaly_df = anomaly_df.drop(columns=columns_for_delete) if request.args.get("format") == "parquet": return send_file( io.BytesIO(utils.dataframe_into_parquet_bytes(anomaly_df)), mimetype="application/octet-stream", ) else: context: typing.Dict[typing.Any, typing.Any] = dict() context["data"] = utils.dataframe_to_dict(anomaly_df) context[ "time-seconds"] = f"{timeit.default_timer() - start_time:.4f}" return make_response(jsonify(context), context.pop("status-code", 200))
def test_prediction_endpoint_post_ok( base_route, sensors, sensors_str, gordo_ml_server_client, data_size, to_dict_arg, resp_format, send_as_parquet, ): """ Test the expected successful data posts, by sending a variety of valid JSON formats of a dataframe, as well as parquet serializations. """ data_to_post = np.random.random(size=(data_size, len(sensors))).tolist() if to_dict_arg is not None: df = pd.DataFrame(data_to_post, columns=sensors_str) data_to_post = df.to_dict(to_dict_arg) endpoint = f"{base_route}/prediction" if resp_format is not None: endpoint += f"?format={resp_format}" if send_as_parquet: X = pd.DataFrame.from_dict(data_to_post) kwargs = dict(data={ "X": (io.BytesIO(server_utils.dataframe_into_parquet_bytes(X)), "X") }) else: kwargs = dict(json={"X": data_to_post}) resp = gordo_ml_server_client.post(endpoint, **kwargs) assert resp.status_code == 200 if resp_format in (None, "json"): data = server_utils.dataframe_from_dict(resp.json["data"]) else: data = server_utils.dataframe_from_parquet_bytes(resp.data) # Expected column names assert all(key in data for key in ("model-output", "model-input"))
def _send_prediction_request( self, X: pd.DataFrame, y: typing.Optional[pd.DataFrame], chunk: slice, machine: Machine, start: datetime, end: datetime, revision: str, ): """ Post a slice of data to the machine Parameters ---------- X: pandas.core.DataFrame The data for the model, in pandas representation chunk: slice The slice to take from DataFrame.iloc for the batch size machine: Machine start: datetime end: datetime Notes ----- PredictionResult.predictions may be None if the prediction process fails Returns ------- PredictionResult Raises ----- ResourceGone If the sever returns a 410, most likely because the revision is too old """ kwargs: Dict[str, Any] = dict( url= f"{self.base_url}/gordo/v0/{self.project_name}/{machine.name}{self.prediction_path}", params={ "format": self.format, "revision": revision }, ) # We're going to serialize the data as either JSON or Arrow if self.use_parquet: kwargs["files"] = { "X": server_utils.dataframe_into_parquet_bytes(X.iloc[chunk]), "y": server_utils.dataframe_into_parquet_bytes(y.iloc[chunk]) if y is not None else None, } else: kwargs["json"] = { "X": server_utils.dataframe_to_dict(X.iloc[chunk]), "y": server_utils.dataframe_to_dict(y.iloc[chunk]) if y is not None else None, } # Start attempting to get predictions for this batch for current_attempt in itertools.count(start=1): try: try: resp = _handle_response(self.session.post(**kwargs)) except HttpUnprocessableEntity: self.prediction_path = "/prediction" kwargs[ "url"] = f"{self.base_url}/gordo/v0/{self.project_name}/{machine.name}{self.prediction_path}" resp = _handle_response(self.session.post(**kwargs)) # If it was an IO or TimeoutError, we can retry except ( IOError, TimeoutError, requests.ConnectionError, requests.HTTPError, ) as exc: if current_attempt <= self.n_retries: time_to_sleep = min(2**(current_attempt + 2), 300) logger.warning( f"Failed to get response on attempt {current_attempt} out of {self.n_retries} attempts." ) sleep(time_to_sleep) continue else: msg = ( f"Failed to get predictions for dates {start} -> {end} " f"for target: '{machine.name}' Error: {exc}") logger.error(msg) return PredictionResult(name=machine.name, predictions=None, error_messages=[msg]) # No point in retrying a BadGordoRequest except (BadGordoRequest, NotFound) as exc: msg = ( f"Failed with bad request or not found for dates {start} -> {end} " f"for target: '{machine.name}' Error: {exc}") logger.error(msg) return PredictionResult(name=machine.name, predictions=None, error_messages=[msg]) except ResourceGone: raise # Process response and return if no exception else: predictions = self.dataframe_from_response(resp) # Forward predictions to any other consumer if registered. if self.prediction_forwarder is not None: self.prediction_forwarder( # type: ignore predictions=predictions, machine=machine, metadata=self.metadata) return PredictionResult(name=machine.name, predictions=predictions, error_messages=[])
def post(self): """ Process a POST request by using provided user data A typical response might look like this .. code-block:: python { 'data': [ { 'end': ['2016-01-01T00:10:00+00:00'], 'model-output': [0.0005317790200933814, -0.0001525811239844188, 0.0008310950361192226, 0.0015755111817270517], 'original-input': [0.9135588550070414, 0.3472517774179448, 0.8994921857179736, 0.11982773108991263], 'start': ['2016-01-01T00:00:00+00:00'], }, ... ], 'tags': [ {'asset': None, 'name': 'tag-0'}, {'asset': None, 'name': 'tag-1'}, {'asset': None, 'name': 'tag-2'}, {'asset': None, 'name': 'tag-3'} ], 'time-seconds': '0.1937' } """ context: typing.Dict[typing.Any, typing.Any] = dict() X = g.X process_request_start_time_s = timeit.default_timer() try: output = model_io.get_model_output(model=g.model, X=X) except ValueError as err: tb = traceback.format_exc() logger.error( f"Failed to predict or transform; error: {err} - \nTraceback: {tb}" ) context["error"] = f"ValueError: {str(err)}" return make_response((jsonify(context), 400)) # Model may only be a transformer, probably an AttributeError, but catch all to avoid logging other # exceptions twice if it happens. except Exception as exc: tb = traceback.format_exc() logger.error( f"Failed to predict or transform; error: {exc} - \nTraceback: {tb}" ) context[ "error"] = "Something unexpected happened; check your input data" return make_response((jsonify(context), 400)) else: get_model_output_time_s = timeit.default_timer() logger.debug( f"Calculating model output took " f"{get_model_output_time_s-process_request_start_time_s} s") data = model_utils.make_base_dataframe( tags=self.tags, model_input=X.values if isinstance(X, pd.DataFrame) else X, model_output=output, target_tag_list=self.target_tags, index=X.index, ) if request.args.get("format") == "parquet": return send_file( io.BytesIO( server_utils.dataframe_into_parquet_bytes(data)), mimetype="application/octet-stream", ) else: context["data"] = server_utils.dataframe_to_dict(data) return make_response( (jsonify(context), context.pop("status-code", 200)))