Exemple #1
0
def run_mlbase_xgboost_regression(context: mlrun.MLClientCtx):
    import json

    import xgboost as xgb

    from mlrun.frameworks.xgboost import apply_mlrun

    model = xgb.XGBRegressor()
    X_train, X_test, y_train, y_test = get_dataset(classification=False)
    model_handler = apply_mlrun(
        model,
        context,
        X_train=X_train,
        y_train=y_train,
        X_test=X_test,
        y_test=y_test,
    )
    model.fit(X_train, y_train)

    json_artifact = "test.json"
    with open(json_artifact, "w") as json_file:
        json.dump({"test": 0}, json_file, indent=4)

    model_handler.register_artifacts(
        context.log_artifact(
            json_artifact,
            local_path=json_artifact,
            artifact_path=context.artifact_path,
            db_key=False,
        ))
    model_handler.update()
Exemple #2
0
def custom_hyper_func(context: mlrun.MLClientCtx):
    best_accuracy = 0
    for param in [1, 2, 4, 3]:
        with context.get_child_context(myparam=param) as child:
            accuracy = child.get_param("myparam")
            child.log_result("accuracy", accuracy)
            if accuracy > best_accuracy:
                child.mark_as_best()
                best_accuracy = accuracy
    def _extract_properties_from_context(
            context: mlrun.MLClientCtx) -> Dict[str, Any]:
        """
        Extract the properties of the run this context belongs to.

        :param context: The context to get his properties.

        :return: The properties as a dictionary where each key is the property name.
        """
        run = mlrun.RunObject.from_dict(context.to_dict())
        runs = mlrun.lists.RunList([run.to_dict()])
        info = {}
        for property_name, property_value in list(zip(*runs.to_rows())):
            info[property_name] = property_value
        return info
Exemple #4
0
def concept_drift_deployer(
    context: MLClientCtx,
    base_dataset: DataItem,
    input_stream: str,
    output_stream: str,
    output_tsdb: str,
    tsdb_batch_size: int,
    callbacks: list,
    models: list = ["ddm", "eddm", "pagehinkley"],
    models_dest="models",
    pagehinkley_threshold: float = 10,
    ddm_warning_level: float = 2,
    ddm_out_control_level: float = 3,
    label_col="label",
    prediction_col="prediction",
    hub_url: str = mlconf.hub_url,
    fn_tag: str = "master",
):
    """Deploy a streaming Concept Drift detector on a labeled stream
       This function is the Deployment step for the Streaming Concept Drift Detector.
       It will load the selected drift detectors and initialize them with the
       base_dataset's statistics.  Then it will deploy the concept_drift_streaming
       function and pass the models to it for streaming concept-drift detection on top
       of a labeled stream.

    :param context:         MLRun context
    :param base_dataset:    Dataset containing label_col and prediction_col to initialize the detectors
    :param input_stream:    labeled stream to track.
                            Should contain label_col and prediction_col
    :param output_stream:   Output stream to push the detector's alerts
    :param output_tsdb:     Output TSDB table to allow analysis and display
    :param tsdb_batch_size: Batch size of alerts to buffer before pushing to the TSDB
    :param callbacks:       Additional rest endpoints to send the alert data to
    :param models:          List of the detectors to deploy
                            Defaults to ['ddm', 'eddm', 'pagehinkley'].
    :param models_dest:     Location for saving the detectors
                            Defaults to 'models' (in relation to artifact_path).
    :param pagehinkley_threshold:  Drift level threshold for PH detector Defaults to 10.
    :param ddm_warning_level:      Warning level alert for DDM detector Defaults to 2.
    :param ddm_out_control_level:  Drift level alert for DDM detector Defaults to 3.
    :param label_col:       Label column to be used on base_dataset and input_stream
                            Defaults to 'label'.
    :param prediction_col:  Prediction column to be used on base_dataset and input_stream
                            Defaults to 'prediction'.
    :param hub_url:         hub_url in case the default is not used, concept_drift_streaming will be loaded
                            by this url
                            Defaults to mlconf.hub_url.
    :param fn_tag:          hub tag to use
                            Defaults to 'master'
    """

    mlconf.dbpath = mlconf.dbpath or "http://mlrun-api:8080"
    mlconf.hub_url = hub_url
    fn = import_function(url="hub://concept_drift_streaming")

    context.logger.info("Loading base dataset")
    base_df = base_dataset.as_df()
    error_stream = np.where(
        base_df[prediction_col].values == base_df[label_col].values, 0, 1
    )

    context.logger.info("Creating models")
    models = [
        model.strip()
        for model in os.getenv("models", "pagehinkley, ddm, eddm").split(",")
    ]
    models = {
        "eddm": skmultiflow.drift_detection.EDDM(),
        "pagehinkley": skmultiflow.drift_detection.PageHinkley(
            min_instances=len(error_stream), threshold=pagehinkley_threshold
        ),
        "ddm": skmultiflow.drift_detection.DDM(
            min_num_instances=len(error_stream),
            warning_level=ddm_warning_level,
            out_control_level=ddm_out_control_level,
        ),
    }

    context.logger.info("Streaming data to models")
    for i in range(len(error_stream)):
        for model_name, model in models.items():
            model.add_element(error_stream[i])

    context.logger.info("Logging ready models")
    for name, model in models.items():
        data = dumps(model)
        model_file = f"{name}.pkl"
        context.log_model(
            f"{name}_concept_drift",
            body=data,
            labels={"framework": "skmultiflow", "workflow": "concept-drift"},
            model_file=model_file,
            model_dir=models_dest,
            tag="latest",
        )
        fn.set_envs(
            {
                f"{name}_model_path": os.path.join(
                    context.artifact_path, models_dest, model_file
                )
            }
        )

    context.logger.info("Deploying Concept Drift Streaming function")
    fn.set_envs(
        {
            "label_col": label_col,
            "prediction_col": prediction_col,
            "drift_stream": output_stream,
            "tsdb_table": output_tsdb,
            "pagehinkley_threshold": pagehinkley_threshold,
            "ddm_warning_level": ddm_warning_level,
            "ddm_out_control": ddm_out_control_level,
        }
    )
    fn.add_trigger(
        "labeled_stream", V3IOStreamTrigger(url=input_stream, name="labeled_stream")
    )
    fn.apply(mount_v3io())
    fn.deploy(project=context.project)