Esempio n. 1
0
    def download_model(self,
                       revision=None,
                       targets: Optional[List[str]] = None
                       ) -> typing.Dict[str, BaseEstimator]:
        """
        Download the actual model(s) from the ML server /download-model

        Returns
        -------
        Dict[str, BaseEstimator]
            Mapping of target name to the model
        """
        models = dict()
        for machine_name in targets or self.get_machine_names(
                revision=revision):
            resp = self.session.get(
                f"{self.base_url}/gordo/v0/{self.project_name}/{machine_name}/download-model"
            )
            content = _handle_response(
                resp, resource_name=f"Model download for model {machine_name}")
            if isinstance(content, bytes):
                models[machine_name] = serializer.loads(content)
            else:
                raise ValueError(
                    f"Got unexpected return type: {type(content)} when attempting to"
                    f" download the model {machine_name}.")
        return models
Esempio n. 2
0
 def _machine_from_server(self, name: str, revision: str) -> Machine:
     resp = self.session.get(
         f"{self.base_url}/gordo/v0/{self.project_name}/{name}/metadata",
         params={"revision": revision},
     )
     metadata = _handle_response(
         resp=resp, resource_name=f"Machine metadata for {name}")
     if isinstance(metadata, dict) and metadata.get("metadata", None):
         return Machine(**metadata.get("metadata", None))
     else:
         raise NotFound(f"Machine {name} not found")
Esempio n. 3
0
 def _get_available_machines(self, revision):
     req = requests.Request(
         "GET",
         f"{self.base_url}/gordo/v0/{self.project_name}/models",
         params={"revision": revision},
     )
     resp = self.session.send(req.prepare())
     model_response = _handle_response(
         resp=resp,
         resource_name=f"Model name listing for revision {revision}")
     if "models" not in model_response:
         raise ValueError(
             f"Invalid response from server, key 'model' not found in: {model_response}"
         )
     model_response["revision"] = model_response.get("revision", revision)
     return model_response
Esempio n. 4
0
    def get_revisions(self):
        """
        Gets the available revisions served by the server.

        Returns
        ------
        dict
            Dictionary with two keys, `available-revisions` and `latest`. The first is
            a list of all available revisions, and `latest` is the latest and default
            revision.
        """
        req = requests.Request(
            "GET", f"{self.base_url}/gordo/v0/{self.project_name}/revisions")
        resp = self.session.send(req.prepare())
        resp_json = _handle_response(
            resp=resp, resource_name="List of available revisions from server")
        return resp_json
Esempio n. 5
0
def test__handle_response_errors():
    """
    Test expected error raising from gordo.client.io._handle_response
    """
    resp = requests.Response()
    resp.status_code = 422
    with pytest.raises(HttpUnprocessableEntity):
        _handle_response(resp)

    resp = requests.Response()
    resp.status_code = 403
    with pytest.raises(BadGordoRequest):
        _handle_response(resp)

    resp = requests.Response()
    resp.status_code = 502
    with pytest.raises(IOError):
        _handle_response(resp)
Esempio n. 6
0
    def _send_prediction_request(
        self,
        X: pd.DataFrame,
        y: typing.Optional[pd.DataFrame],
        chunk: slice,
        machine: Machine,
        start: datetime,
        end: datetime,
        revision: str,
    ):
        """
        Post a slice of data to the machine

        Parameters
        ----------
        X: pandas.core.DataFrame
            The data for the model, in pandas representation
        chunk: slice
            The slice to take from DataFrame.iloc for the batch size
        machine: Machine
        start: datetime
        end: datetime

        Notes
        -----
        PredictionResult.predictions may be None if the prediction process fails

        Returns
        -------
        PredictionResult

        Raises
        -----
        ResourceGone
            If the sever returns a 410, most likely because the revision is too old
        """

        kwargs: Dict[str, Any] = dict(
            url=
            f"{self.base_url}/gordo/v0/{self.project_name}/{machine.name}{self.prediction_path}",
            params={
                "format": self.format,
                "revision": revision
            },
        )

        # We're going to serialize the data as either JSON or Arrow
        if self.use_parquet:
            kwargs["files"] = {
                "X":
                server_utils.dataframe_into_parquet_bytes(X.iloc[chunk]),
                "y":
                server_utils.dataframe_into_parquet_bytes(y.iloc[chunk])
                if y is not None else None,
            }
        else:
            kwargs["json"] = {
                "X":
                server_utils.dataframe_to_dict(X.iloc[chunk]),
                "y":
                server_utils.dataframe_to_dict(y.iloc[chunk])
                if y is not None else None,
            }

        # Start attempting to get predictions for this batch
        for current_attempt in itertools.count(start=1):
            try:
                try:
                    resp = _handle_response(self.session.post(**kwargs))
                except HttpUnprocessableEntity:
                    self.prediction_path = "/prediction"
                    kwargs[
                        "url"] = f"{self.base_url}/gordo/v0/{self.project_name}/{machine.name}{self.prediction_path}"
                    resp = _handle_response(self.session.post(**kwargs))
            # If it was an IO or TimeoutError, we can retry
            except (
                    IOError,
                    TimeoutError,
                    requests.ConnectionError,
                    requests.HTTPError,
            ) as exc:
                if current_attempt <= self.n_retries:
                    time_to_sleep = min(2**(current_attempt + 2), 300)
                    logger.warning(
                        f"Failed to get response on attempt {current_attempt} out of {self.n_retries} attempts."
                    )
                    sleep(time_to_sleep)
                    continue
                else:
                    msg = (
                        f"Failed to get predictions for dates {start} -> {end} "
                        f"for target: '{machine.name}' Error: {exc}")
                    logger.error(msg)

                    return PredictionResult(name=machine.name,
                                            predictions=None,
                                            error_messages=[msg])

            # No point in retrying a BadGordoRequest
            except (BadGordoRequest, NotFound) as exc:
                msg = (
                    f"Failed with bad request or not found for dates {start} -> {end} "
                    f"for target: '{machine.name}' Error: {exc}")
                logger.error(msg)
                return PredictionResult(name=machine.name,
                                        predictions=None,
                                        error_messages=[msg])
            except ResourceGone:
                raise

            # Process response and return if no exception
            else:

                predictions = self.dataframe_from_response(resp)

                # Forward predictions to any other consumer if registered.
                if self.prediction_forwarder is not None:
                    self.prediction_forwarder(  # type: ignore
                        predictions=predictions,
                        machine=machine,
                        metadata=self.metadata)
                return PredictionResult(name=machine.name,
                                        predictions=predictions,
                                        error_messages=[])