Example #1
0
    def _cross_verify_optimization(self, goal: dict, constraints: List[dict]):
        """Verify the config has the correct combined targets, costs and factors"""
        is_cost = goal.get("type", None) == "COST"

        cost_factors = []
        goal_factor = []
        target_factor = []
        constraint_factors = []
        constraint_targets = []

        if len(constraints) < 1:
            raise FeroError("A constraint must be specified")
        if is_cost:
            for factor in goal["cost_function"]:
                cost_factors.append(factor["name"])
        else:
            if goal["factor"]["name"] in self.factor_names:
                goal_factor.append(goal["goal"])
            else:
                target_factor.append(goal["goal"])

        for constraint in constraints:
            if constraint["name"] in self.factor_names:
                constraint_factors.append(constraint["name"])
            else:
                constraint_targets.append(constraint["name"])

        if len(target_factor + constraint_targets) < 1:
            raise FeroError("No Targets specified")

        if len(constraint_factors + cost_factors + goal_factor) > 3:
            raise FeroError(
                "A maximum of three factors can be specified in an optimization"
            )
Example #2
0
    def get_results(self,
                    format="dataframe") -> Union[pd.DataFrame, List[dict]]:
        """Serializes the prediction results of the prediction, by default this will be a pandas

        DataFrame, but specifying `format="record"` will instead return a list of dictionaries where each
        key specifies a factor and the value is the prediction.

        :param format: The format to return the result as, defaults to "dataframe"
        :type format: str, optional
        :raises FeroError: Raised if the prediction is not yet complete or is completed and failed.
        :return: The results of the prediction
        :rtype: Union[pd.DataFrame, List[dict]]
        """
        if not self.complete:
            raise FeroError("Prediction is not complete.")
        if self.status != "SUCCESS":
            raise FeroError(
                f"Prediction failed with the following message: {self._data['result_data']['message']}"
            )
        if self.prediction_type == BULK_PREDICTION_TYPE:
            data_url = self._data["result_data"]["data"]["download_url"]
            data = self._client.get_preauthenticated(data_url)
            return pd.DataFrame(**data)
        else:
            data = self._data["result_data"]["data"]["values"]
            if format == "records":
                return [{col: val
                         for col, val in zip(data["columns"], row)}
                        for row in data["data"]]
            else:
                return pd.DataFrame(data["data"],
                                    columns=data["columns"],
                                    index=data["index"])
Example #3
0
 def _verify_cost_goal(self, goal: dict):
     """Verify that a cost goal is correct"""
     for factor in goal["cost_function"]:
         factor_name = factor["name"]
         factor_type = self._get_factor_dtype(factor_name)
         if factor_type is None:
             raise FeroError(f'"{factor_name}" is not a factor')
         # Implicitly find missing factors
         if factor_type not in ["factor_float", "factor_int"]:
             raise FeroError(
                 "Cost functions factors must be floats or integers")
Example #4
0
    def _verify_standard_goal(self, goal: dict):
        """Verifies the goal config relative to the analysis"""
        goal_name = goal["factor"]["name"]

        # The goal label must be a target or factor
        if goal_name not in self.target_names + self.factor_names:
            raise FeroError(f'"{goal_name}" is not a valid goal')

        # If this is a factor makes sure it's not a float
        if goal_name not in self.target_names and self._get_factor_dtype(
                goal_name) not in ["factor_float", "factor_int"]:
            raise FeroError("Goal must be a float or integer")
Example #5
0
    def make_prediction_serial(
        self, prediction_data: Union[pd.DataFrame, List[dict]]
    ) -> Union[pd.DataFrame, List[dict]]:
        """Makes a prediction from the provided data using the most recent trained model for the analysis. This
        computes predictions one row at a time. Therefore, while it is slower than `make_prediction`, this method
        works for analyses that do not support bulk predictions and can be called with inputs that are too large to
        be transferred in a single call.

        `make_prediction` takes either a data frame or list of dictionaries of values that will be sent to Fero
        to make a prediction of what the targets of the Analysis will be. The results are returned as either a dataframe
        or list of dictionaries with both the original prediction data and the predicted targets in each row or dict.
        Each target has a `high`, `low`, and `mid` value and these are added to the target variable name with an `_`.

        :param prediction_data:  Either a data frame or list of dictionaries specifying values to be used in the model.
        :type prediction_data: Union[pd.DataFrame, List[dict]]
        :raises FeroError: Raised if no model has been trained or the server returns an error message
        :return: A data frame or list of dictionaries depending on how the function was called
        :rtype: Union[pd.DataFrame, List[dict]]
        """
        if not self.has_trained_model:
            raise FeroError("No model has been trained on this analysis.")

        is_df = isinstance(prediction_data, pd.DataFrame)

        # convert to dictionary for serialization
        if is_df:
            prediction_data = [
                dict(row) for _, row in prediction_data.iterrows()
            ]

        prediction_results = []
        # make a prediction for each row
        for row in prediction_data:

            prediction_request = {"values": row}
            prediction_result = self._client.post(
                f"/api/revision_models/{str(self.latest_completed_model)}/predict/",
                prediction_request,
            )
            if prediction_result.get("status") != "SUCCESS":
                raise FeroError(
                    prediction_result.get(
                        "message",
                        "The prediction failed for unknown reasons."))

            prediction_results.append(
                self._flatten_result(prediction_result, row))

        # convert back to a data frame if need
        return pd.DataFrame(
            prediction_results) if is_df else prediction_results
Example #6
0
    def replace_csv(self, file_path: str, wait_until_complete: bool = False):
        """Appends a specified csv file to the data source.

        :param file_path: Location of the csv file to append
        :type file_path: str
        :raises FeroError: Raised if the file does not match a naive csv check
        """
        if not file_path.endswith(".csv"):
            raise FeroError("Fero only supports csv appends")

        file_name = os.path.basename(file_path)

        inbox_response = self._client.post(
            f"/api/v2/data_source/{self.uuid}/inbox_url/",
            {"file_name": file_name, "action": "R"},
        )
        with open(file_path) as fp:
            self._client.upload_file(inbox_response, file_name, fp)

        upload_status = UploadedFileStatus(self._client, inbox_response["upload_uuid"])

        return (
            upload_status.wait_until_complete()
            if wait_until_complete
            else upload_status
        )
Example #7
0
 def _verify_constraints(self, constraints: List[dict]):
     """verify provided constraints are in the analysis"""
     all_names = self.factor_names + self.target_names
     for constraint in constraints:
         constraint_name = constraint["name"]
         if constraint_name not in all_names:
             raise FeroError(
                 f'Constraint "{constraint_name}" is not in this analysis')
Example #8
0
    def make_prediction(
        self, prediction_data: Union[pd.DataFrame, List[dict]]
    ) -> Union[pd.DataFrame, List[dict]]:
        """Makes a prediction from the provided data using the most recent trained model for the analysis.
        This method is optimized for analyses that support fast, bulk prediction. For analyses that do not support
        bulk prediction, use `make_prediction_serial`.

        `make_prediction` takes either a data frame or list of dictionaries of values that will be sent to Fero
        to make a prediction of what the targets of the Analysis will be. The results are returned as either a dataframe
        or list of dictionaries with both the original prediction data and the predicted targets in each row or dict.
        Each target has a `high`, `low`, and `mid` value and these are added to the target variable name with an `_`.

        :param prediction_data:  Either a data frame or list of dictionaries specifying values to be used in the model.
        :type prediction_data: Union[pd.DataFrame, List[dict]]
        :raises FeroError: Raised if no model has been trained or the server returns an error message
        :return: A data frame or list of dictionaries depending on how the function was called
        :rtype: Union[pd.DataFrame, List[dict]]
        """
        if not self.has_trained_model:
            raise FeroError("No model has been trained on this analysis.")

        is_dict_list = isinstance(prediction_data, list)

        prediction_df = (pd.DataFrame(prediction_data)
                         if is_dict_list else prediction_data)

        data_file = io.StringIO()
        prediction_df.to_json(data_file, orient="split")
        data_file.seek(0)
        upload_identifier = str(uuid.uuid4())
        workspace_id = self._upload_file(data_file, upload_identifier,
                                         BULK_PREDICTION_TYPE)
        prediction = self._poll_workspace_for_prediction(workspace_id)
        if prediction.status != "SUCCESS":
            raise FeroError(
                prediction.result_data.get(
                    "message", "The prediction failed for unknown reasons"))
        output = prediction.get_results()
        return list(output.T.to_dict().values()) if is_dict_list else output
Example #9
0
    def _check_status_complete(status: Optional[dict]) -> bool:
        """Checks status of the latest uploaded file response.

        Returns true if complete, false if not complete and raises an error if the status is error.
        """
        if status is None or status["status"] not in [
            UploadedFilesSchema.ERROR,
            UploadedFilesSchema.DONE,
        ]:
            return False

        if status["status"] == UploadedFilesSchema.ERROR:

            errors = [
                f'"{str(e)}"'
                for e in status["error_notices"]["global_notices"]
                + status["error_notices"]["parsing_notices"]
            ]

            error_message = f"Unable to upload file.  The following error(s) occurred: {', '.join(errors)}"
            raise FeroError(error_message)

        return True
Example #10
0
    def make_optimization(
        self,
        name: str,
        goal: dict,
        constraints: List[dict],
        fixed_factors: Optional[dict] = None,
        include_confidence_intervals: bool = False,
        synchronous: bool = True,
    ) -> Prediction:
        """Perform an optimization using the most recent model for the analysis.

        By default this function will block until the optimization is complete, however specifying `synchonous=False`
        will instead return a prediction object referencing the optimization being made.  This prediction will not contain
        results until the `complete` property is true.

        The expected config input looks as follows:

        Example configuration for a standard (without cost) optimization
        {
            "goal": "maximize",
            "factor": {"name": "factor1", "min": 5, "max": 10}
        }

        Example configuration for a cost optimization
        Cost Goal Config
        {
            "type": "COST",
            "goal": "minimize"
            "cost_function": [
                {"min": 5, "max": 10, "cost": 1000, "factor": "factor1},
                {"min": 5, "max": 10, "cost": 500, "factor": "factor1}
            ]
        }

        The constraints configuration is a list of factors and their constraints
        [
            {"name": "factor2",  "min": 10, "max": 10}
            {"name": "target1", "min": 100, "max": 500}
        ]

        :param name: Name for this optimizatino
        :type name: str
        :param goal: A dictionary describing the goal of the optimization
        :type goal: dict
        :param constrains: A dictionary describing the constraints of the optimization
        :type constrains: dict
        :param fixed_factors: Values of factors to stay fixed if not provided the mean values are used, defaults to None
        :type fixed_factors: dict, optional
        :param synchronous: Whether the optimization should return only after being complete.  This can take a bit, defaults to True
        :type synchronous: bool, optional
        :return: The results of the optimization
        :rtype: Prediction
        """

        if self.blueprint_name == "fault":
            raise FeroError("Fault analysis optimization are not supported")

        cost_goal = "type" in goal
        if fixed_factors is None:
            fixed_factors = {}

        goal_schema = CostOptimizeGoal(
        ) if cost_goal else StandardOptimizeGoal()
        goal_validation = goal_schema.validate(goal)
        if goal_validation:
            raise FeroError(f"Error validating goal <f{str(goal_validation)}>")

        constraints_schema = FactorSchema(many=True)
        constraints_validation = constraints_schema.validate(constraints)
        if constraints_validation:
            raise FeroError(
                f"Error validating goal <f{str(constraints_validation)}>")

        if cost_goal:
            self._verify_cost_goal(goal)
        else:
            self._verify_standard_goal(goal)

        self._verify_constraints(constraints)
        self._cross_verify_optimization(goal, constraints)
        self._verify_fixed_factors(fixed_factors)

        optimize_request = self._build_optimize_request(
            name, goal, constraints, fixed_factors,
            include_confidence_intervals)
        return self._request_prediction(optimize_request, synchronous)
Example #11
0
 def _verify_fixed_factors(self, fixed_factors: dict):
     """Check that the provided fixed factors are in the analysis"""
     all_columns = self.target_names + self.factor_names
     for key in fixed_factors.keys():
         if key not in all_columns:
             raise FeroError(f'"{key}" is not a valid factor')
Example #12
0
    def predict(
        self,
        specified_values: Optional[Union[pd.DataFrame, Mapping[str,
                                                               list]]] = None
    ) -> pd.DataFrame:
        """Makes predictions using the most recent trained asset configuration. Predictions are made
        at regular intervals for the specified horizon time following the end of the training set.

        `predict` returns a DataFrame with predictions for each controllable factor and the target
        for each timestamp in the prediction horizon. `predict` optionally accepts a DataFrame or list
        of dictionaries representing values for one or more controllable factors. If provided, Fero will
        substitute the given values for a controllable factor when predicting the target metric, returning
        either a DataFrame or dict, according to the input type.

        :param specified_values:  Either a data frame or mapping to factors to value lists, specifying values
            to use for controllable factors in the predictions.
        :type specified_values: Union[pd.DataFrame, Mapping[str, list]]
        :raises FeroError: Raised if no model has been trained or the server returns an error message
        :return: A data frame or list of dictionaries depending on how the function was called
        :rtype: Union[pd.DataFrame, Mapping[str, list]]
        """
        if not self.has_trained_model:
            raise FeroError("No model has been trained for this asset.")

        if specified_values is None:
            return self._default_predictions

        is_df = isinstance(specified_values, pd.DataFrame)

        # convert to dictionary for serialization
        if is_df:
            specified_values = specified_values.to_dict("list")

        prediction_request = ({
            "values": specified_values
        } if specified_values is not None else {})
        prediction_result = self._client.post(
            f"/api/configuration_models/{str(self.latest_trained_configuration_model)}/predict/",
            prediction_request,
        )
        if prediction_result.get("status") != "SUCCESS":
            raise FeroError(
                prediction_result.get(
                    "message", "The prediction failed for unknown reasons."))
        result = self._default_predictions.copy()

        prediction_results = pd.DataFrame(**prediction_result["data"])
        if any(result.index != prediction_results.index):
            raise FeroError("Predictions include mismatched timestamps.")
        result[prediction_results.columns] = prediction_results
        specified_columns = list(specified_values.keys())
        prefixes = ["mean:", "p5:", "p25:", "p75:", "p95:"]
        drop_cols = [f"{p}{c}" for c in specified_columns for p in prefixes]
        result.drop(drop_cols, axis=1, inplace=True)
        specified_data = pd.DataFrame(
            {f"specified:{k}": v
             for k, v in specified_values.items()}).set_index(result.index)
        result = result.join(specified_data)
        if not is_df:
            dict_result = result.to_dict("list")
            dict_result["index"] = result.index.to_list()
            return dict_result
        return result