Esempi in Python per print_success, esempi in Python per mf_horizon_client.utils.terminal_messages.print_success

Esempio n. 1

0

Mostra file

File: pipeline_interface.py Progetto: MF-HORIZON/mf-horizon-python-client

    def download_feature_info_for_stage(
            self, pipeline_id: int, stage_id: int) -> Dict[str, pd.DataFrame]:
        """
        Downloads the feature data as a data frame.

        WARNING: Total size of data is limited to 100mb * number of horizons (i.e. 2GB if you don't override the
        class checks for maximum number of horizons that can be selected!).

        :param pipeline_id: ID of a pipeline
        :param stage_id: ID of a stage
        :return: Dictionary of Dataframes of feature data with the column names being the transformed features.
        """
        pipeline = self.get_single_pipeline(pipeline_id)
        problem_specification_stage = pipeline.find_stage_by_type(
            StageType.problem_specification)[0]
        horizons = cast(ProblemSpecificationConfig,
                        problem_specification_stage.config).horizons
        feature_df_dict = {}
        for horizon in tqdm(horizons, desc="Fetching Data"):
            data = self.client.get(
                Endpoints.FEATURE_DATA_FOR_STAGE(pipeline_id=pipeline_id,
                                                 stage_id=stage_id,
                                                 horizon=horizon),
                download=True,
            )
            feature_df_dict[str(horizon)] = pd.read_csv(StringIO(data),
                                                        index_col="time")

        terminal_messages.print_success(
            f"Retrieved Feature Data for Pipeline {pipeline_id} and Stage {stage_id}"
        )
        return feature_df_dict

Esempio n. 2

0

Mostra file

File: session.py Progetto: MF-HORIZON/mf-horizon-python-client

    def post(
        self,
        endpoint: str,
        body: dict = None,
        files: Dict = None,
        on_success_message: str = None,
    ) -> HorizonResponse:
        """Make a POST request to Horizon with a JSON body.

        Args:
            endpoint: Endpoint for the request (will be appended to the server_url).
            body: Request body in JSON format.
            files: File for file upload.
            on_success_message: message to print if successful request

        Returns:
            The :class:`.HorizonResponse` to the request.

        Raises:
            :class:`.HorizonError` if an error response is received.
        """

        response = self._session.post(
            urljoin(base=self._root_url, url=endpoint),
            data=body,
            files=files,
        )

        if on_success_message and response.ok:
            print_success(on_success_message)

        return HorizonResponse(response).body

Esempio n. 3

0

Mostra file

File: pipeline_interface.py Progetto: MF-HORIZON/mf-horizon-python-client

    def download_backtest_info_for_stage(
            self,
            pipeline_id: int,
            stage_id: int,
            verbose=True) -> Dict[str, pd.DataFrame]:
        """
        Downloads the backtest data of a backtest stage as a data frame. Only validation data is shown.

        df columns:
            - truth: the true value at the given time stamp
            - mean: mean prediction at the given time stamp
            - bound_low: lower bound prediction at the given time stamp (3std)
            - bound_high: higher bound prediction at the given time stamp (3std)
            - backtest: The backtest number. This is set by the n_backtests configuration in the backtest stage.
            - verbose: Log output to terminal?


        WARNING: This is not the same as the expert_backtests; the backtests are finite and discrete here.
        For every-point-rolling retrain backtests please run the expert backtest function, which can
        backtest with retrains between any two arbitrary rows.


        :param pipeline_id: ID of a pipeline
        :param stage_id: ID of a stage - MUST BE A BACKTEST STAGE
        :return: Dictionary of Dataframe of backtest data, indexed by Horizon.
        """
        pipeline = self.get_single_pipeline(pipeline_id)
        problem_specification_stage = pipeline.find_stage_by_type(
            StageType.problem_specification)[0]
        horizons = cast(ProblemSpecificationConfig,
                        problem_specification_stage.config).horizons
        backtest_df_dict = {}

        if verbose:
            pbar = tqdm(desc="Fetching Data")
            pbar.total = len(horizons)
        else:
            pbar = None

        for horizon in horizons:
            if pbar and verbose:
                pbar.update()

            data = self.client.get(
                Endpoints.BACKTEST_DATA_FOR_STAGE(pipeline_id=pipeline_id,
                                                  stage_id=stage_id,
                                                  horizon=horizon),
                download=True,
            )
            backtest_df_dict[str(horizon)] = pd.read_csv(StringIO(data),
                                                         index_col="time")

        if verbose:
            terminal_messages.print_success(
                f"Retrieved Feature Backtest for Pipeline {pipeline_id} and Stage {stage_id}"
            )
        return backtest_df_dict

Esempio n. 4

0

Mostra file

File: pipeline_interface.py Progetto: MF-HORIZON/mf-horizon-python-client

    def wait_for_pipeline_completion(self,
                                     pipeline_ids: List[int],
                                     _progress_bars=None,
                                     verbose=True):
        """
        Function that waits until a running pipeline is complete before returning

        :param pipeline_ids:
        :param _progress_bars: List of TQDM progress bars (only used in recursive calls)
        :param verbose: If true then show output
        :return:
        """
        def should_return(pipeline: Pipeline):
            if pipeline.is_complete or pipeline.is_errored:
                if pipeline.is_complete:
                    return True
                if pipeline.is_errored:
                    terminal_messages.print_failure(
                        f"Pipeline {pipeline.summary.id_} ({pipeline.summary.name}) errored!"
                    )
                    return True
            return False

        sleep(1)  # Give the api some time to recover from being ambushed
        pipelines = [
            self.get_single_pipeline(pipeline_id=pipeline_id)
            for pipeline_id in pipeline_ids
        ]

        if not _progress_bars and verbose:
            _progress_bars = [
                initialise_progress_bar(pipeline) for pipeline in pipelines
            ]

        if all(should_return(pipeline) for pipeline in pipelines):
            if not verbose:
                return
            for pipeline, progress_bar in zip(pipelines, _progress_bars):
                terminal_messages.print_success(
                    f"Pipeline {pipeline.summary.id_} ({pipeline.summary.name}) successfully completed!"
                )
                progress_bar.clear()
                progress_bar.close()
            return

        if verbose:
            compute_status = convert_dict_from_camel_to_snake(
                self.client.horizon_compute_status())
            update_single_pipeline_status(pipelines, _progress_bars,
                                          compute_status)
            self.wait_for_pipeline_completion(pipeline_ids,
                                              _progress_bars=_progress_bars)
        else:
            self.wait_for_pipeline_completion(pipeline_ids, verbose=False)

Esempio n. 5

0

Mostra file

File: data_interface.py Progetto: MF-HORIZON/mf-horizon-python-client

    def delete_all_datasets(self):
        """
        Deletes all data sets previously uploaded by the authorised user.

        WARNING: All associated pipelines will also be deleted.
        WARNING: Calling this endpoint is effectively the same as resetting Horizon for a user.

        :return:
        """

        datasets = self.list_datasets()
        dataset_ids = [dataset.id_ for dataset in datasets]
        self.delete_datasets(dataset_ids)
        print_success("All data successfully deleted from Horizon!")

Esempio n. 6

0

Mostra file

File: pipeline_interface.py Progetto: MF-HORIZON/mf-horizon-python-client

    def run_multitarget_forecast_with_target_specific_feature_set(
        self,
        pipeline_template: Pipeline,
        *,
        column_names: List = -1,  # type: ignore
        column_ids: List = -1,  # type: ignore
        n_training_rows_for_one_point_backtest=None,
        one_point_backtests=False,
    ) -> Dict[str, pd.DataFrame]:
        """

        DEPRECATED - NOW SUPPORTED NATIVELY IN THE PROBLEM SPECIFICATION STAGE CONFIG

        Creates a multi target forecast by looping through all specified targets.

        Feature engineering is run ONCE for the specified target

        :param one_point_backtests: Runs expert backtests. If false then n_training_rows_for_one_point_backtest is ignored.
        :param n_training_rows_for_one_point_backtest: Number of training rows to use for the regressor
        :param pipeline_template: The pipeline template to be used for creating new pipelines
        :param column_names: List of names of columns to run analysis with. Do not specify this and ids together.
        :param column_ids: List of ids of columns to run analysis with. Do not specify this and names together.
        :return: Dictionary of results
        """

        pipeline_columns = pipeline_template.dataset.columns

        if column_names == -1:
            column_names = [
                column.name for column in pipeline_columns
                if str(column.id_) in column_ids
            ]  # type: ignore

        pipeline = self.build_pipeline_from_template(
            target_column_name=column_names[0],
            pipeline_template=pipeline_template)

        for stage, template_stage in zip(pipeline.stages,
                                         pipeline_template.stages):
            self.update_config(pipeline_id=pipeline.summary.id_,
                               stage_id=stage.id_,
                               config=template_stage.config)

        terminal_messages.print_update(
            "Running Template Pipeline for Feature Discovery")
        self.run_pipeline(pipeline_id=pipeline.summary.id_, synchronous=True)

        pipeline = self.get_single_pipeline(pipeline_id=pipeline.summary.id_)
        terminal_messages.print_success(
            "Successfully run feature generation. Exporting Data.")

        features = self.download_feature_info_for_stage(
            pipeline_id=pipeline.summary.id_,
            stage_id=pipeline.last_completed_stage.id_,
        )

        original_data = self.download_feature_info_for_stage(
            pipeline_id=pipeline.summary.id_,
            stage_id=pipeline.stages[0].id_,
        )

        augmented_features = pd.concat(features.values(), axis=1, sort=False)
        augmented_features = pd.concat(
            [augmented_features, *original_data.values()], axis=1, sort=False)
        augmented_features_no_duplicates = augmented_features.loc[:,
                                                                  ~augmented_features
                                                                  .columns.
                                                                  duplicated()]
        augmented_features_no_duplicates.reset_index(inplace=True)

        data_interface = DataInterface(self.client)

        augmented_dataset = data_interface.upload_data(
            data=augmented_features_no_duplicates,
            name=f"Features {pipeline_template.summary.name}",
        )

        template_pipeline_regression_only = self.create_pipeline(
            dataset_id=augmented_dataset.summary.id_,
            blueprint=BlueprintType.time_series_regression,
            name=pipeline_template.summary.name,
            delete_after_creation=True,
        )

        regression_template_problem_spec_config = cast(
            ProblemSpecificationConfig,
            template_pipeline_regression_only.stages[0].config,
        )

        original_template_problem_spec_config = cast(
            ProblemSpecificationConfig,
            pipeline_template.stages[0].config,
        )

        regression_template_problem_spec_config.data_split = original_template_problem_spec_config.data_split
        regression_template_problem_spec_config.horizons = original_template_problem_spec_config.horizons

        return self.run_multitarget_forecast(
            pipeline_template=template_pipeline_regression_only,
            column_names=column_names,
            one_point_backtests=one_point_backtests,
            n_training_rows_for_one_point_backtest=
            n_training_rows_for_one_point_backtest,
        )

Esempio n. 7

0

Mostra file

File: pipeline_interface.py Progetto: MF-HORIZON/mf-horizon-python-client

    def run_expert_backtest_between_two_rows(self,
                                             horizon: int,
                                             start_row: int,
                                             end_row: int,
                                             n_training_rows_for_backtest: int,
                                             pipeline_id: int,
                                             stage_id: int,
                                             verbose=True):
        """

        EXPERT FUNCTIONALITY - Not exposed in the Horizon User Interface!

        WARNING: This function contains no guards to ensure that the rows are not in the feature training data. The method
                 run_expert_backtest_for_validation_data ensures that the backtests are run over valid rows.

        Runs a rolling retrain between two rows. This is a synchronous request that might take a very long
        time to compute; n different models are trained, where there are n points in the training data.

        df columns:
            - truth: the true value at the given time stamp
            - mean: mean prediction at the given time stamp
            - bound_low: lower bound prediction at the given time stamp (3std)
            - bound_high: higher bound prediction at the given time stamp (3std)
            - backtest: The backtest number. This is set by the n_backtests configuration in the backtest stage.
            - timestamps: Timestamp

        :param horizon: Forecast horizon to run backtests over
        :param start_row: Row to start backtest
        :param end_row: Row to backtest to
                :param n_training_rows_for_backtest: Number of rows to train on for each rolling train / backtest
        :param pipeline_id: ID of a pipeline
        :param stage_id: ID of a stage
        :param verbose: print to console

        :return:  Dataframe of backtest results
        """

        if verbose:
            terminal_messages.print_expert_message(
                f"Initialising Backtest from row {start_row} to row {end_row} (Pipeline {pipeline_id})"
            )

        response = self.client.get(
            Endpoints.EXPERT_BACKTEST_FOR_STAGE_AND_HORIZON(
                pipeline_id=pipeline_id,
                horizon=horizon,
                first_row=start_row,
                last_row=end_row,
                n_training_rows=n_training_rows_for_backtest,
                stage_id=stage_id,
            ))

        if verbose:
            terminal_messages.print_success("Expert Backtest Complete")

        df = pd.DataFrame.from_dict(
            convert_dict_from_camel_to_snake(response), )
        df.drop("neg_rmse", axis=1, inplace=True)
        df.set_index("timestamps", inplace=True)
        df.index = pd.to_datetime(df.index)
        return df