def add_v3io_stream_trigger( self, stream_path, name="stream", group="serving", seek_to="earliest", shards=1, ): """add v3io stream trigger to the function""" endpoint = None if "://" in stream_path: endpoint, stream_path = parse_v3io_path(stream_path, suffix="") container, path = split_path(stream_path) shards = shards or 1 self.add_trigger( name, V3IOStreamTrigger( name=name, container=container, path=path[1:], consumerGroup=group, seekTo=seek_to, webapi=endpoint or "http://v3io-webapi:8081", ), ) self.spec.min_replicas = shards self.spec.max_replicas = shards
def init_functions(functions: dict, project=None, secrets=None): for f in functions.values(): # Add V3IO Mount f.apply(mount_v3io()) # Always pull images to keep updates f.spec.image_pull_policy = 'Always' # Define inference-stream related triggers functions['s2p'].add_trigger('labeled_stream', V3IOStreamTrigger(url=f'{labeled_stream_url}@s2p'))
def add_v3io_stream_trigger( self, stream_path, name="stream", group="serving", seek_to="earliest", shards=1, extra_attributes=None, **kwargs, ): """add v3io stream trigger to the function :param stream_path: v3io stream path (e.g. 'v3io:///projects/myproj/stream1' :param name: trigger name :param group: consumer group :param seek_to: start seek from: "earliest", "latest", "time", "sequence" :param shards: number of shards (used to set number of replicas) :param extra_attributes: key/value dict with extra trigger attributes :param kwargs: extra V3IOStreamTrigger class attributes """ endpoint = None if "://" in stream_path: endpoint, stream_path = parse_v3io_path(stream_path, suffix="") container, path = split_path(stream_path) shards = shards or 1 self.add_trigger( name, V3IOStreamTrigger( name=name, container=container, path=path[1:], consumerGroup=group, seekTo=seek_to, webapi=endpoint or "http://v3io-webapi:8081", extra_attributes=extra_attributes, **kwargs, ), ) self.spec.min_replicas = shards self.spec.max_replicas = shards
def add_v3io_stream_trigger( self, stream_path, name="stream", group="serving", seek_to="earliest", shards=1, ): """add v3io stream trigger to the function""" container, path = split_path(stream_path) shards = shards or 1 self.add_trigger( name, V3IOStreamTrigger( name=name, container=container, path=path[1:], consumerGroup=group, seekTo=seek_to, ), ) self.spec.min_replicas = shards self.spec.max_replicas = shards
def concept_drift_deployer( context: MLClientCtx, base_dataset: DataItem, input_stream: str, output_stream: str, output_tsdb: str, tsdb_batch_size: int, callbacks: list, models: list = ["ddm", "eddm", "pagehinkley"], models_dest="models", pagehinkley_threshold: float = 10, ddm_warning_level: float = 2, ddm_out_control_level: float = 3, label_col="label", prediction_col="prediction", hub_url: str = mlconf.hub_url, fn_tag: str = "master", ): """Deploy a streaming Concept Drift detector on a labeled stream This function is the Deployment step for the Streaming Concept Drift Detector. It will load the selected drift detectors and initialize them with the base_dataset's statistics. Then it will deploy the concept_drift_streaming function and pass the models to it for streaming concept-drift detection on top of a labeled stream. :param context: MLRun context :param base_dataset: Dataset containing label_col and prediction_col to initialize the detectors :param input_stream: labeled stream to track. Should contain label_col and prediction_col :param output_stream: Output stream to push the detector's alerts :param output_tsdb: Output TSDB table to allow analysis and display :param tsdb_batch_size: Batch size of alerts to buffer before pushing to the TSDB :param callbacks: Additional rest endpoints to send the alert data to :param models: List of the detectors to deploy Defaults to ['ddm', 'eddm', 'pagehinkley']. :param models_dest: Location for saving the detectors Defaults to 'models' (in relation to artifact_path). :param pagehinkley_threshold: Drift level threshold for PH detector Defaults to 10. :param ddm_warning_level: Warning level alert for DDM detector Defaults to 2. :param ddm_out_control_level: Drift level alert for DDM detector Defaults to 3. :param label_col: Label column to be used on base_dataset and input_stream Defaults to 'label'. :param prediction_col: Prediction column to be used on base_dataset and input_stream Defaults to 'prediction'. :param hub_url: hub_url in case the default is not used, concept_drift_streaming will be loaded by this url Defaults to mlconf.hub_url. :param fn_tag: hub tag to use Defaults to 'master' """ mlconf.dbpath = mlconf.dbpath or "http://mlrun-api:8080" mlconf.hub_url = hub_url fn = import_function(url="hub://concept_drift_streaming") context.logger.info("Loading base dataset") base_df = base_dataset.as_df() error_stream = np.where( base_df[prediction_col].values == base_df[label_col].values, 0, 1 ) context.logger.info("Creating models") models = [ model.strip() for model in os.getenv("models", "pagehinkley, ddm, eddm").split(",") ] models = { "eddm": skmultiflow.drift_detection.EDDM(), "pagehinkley": skmultiflow.drift_detection.PageHinkley( min_instances=len(error_stream), threshold=pagehinkley_threshold ), "ddm": skmultiflow.drift_detection.DDM( min_num_instances=len(error_stream), warning_level=ddm_warning_level, out_control_level=ddm_out_control_level, ), } context.logger.info("Streaming data to models") for i in range(len(error_stream)): for model_name, model in models.items(): model.add_element(error_stream[i]) context.logger.info("Logging ready models") for name, model in models.items(): data = dumps(model) model_file = f"{name}.pkl" context.log_model( f"{name}_concept_drift", body=data, labels={"framework": "skmultiflow", "workflow": "concept-drift"}, model_file=model_file, model_dir=models_dest, tag="latest", ) fn.set_envs( { f"{name}_model_path": os.path.join( context.artifact_path, models_dest, model_file ) } ) context.logger.info("Deploying Concept Drift Streaming function") fn.set_envs( { "label_col": label_col, "prediction_col": prediction_col, "drift_stream": output_stream, "tsdb_table": output_tsdb, "pagehinkley_threshold": pagehinkley_threshold, "ddm_warning_level": ddm_warning_level, "ddm_out_control": ddm_out_control_level, } ) fn.add_trigger( "labeled_stream", V3IOStreamTrigger(url=input_stream, name="labeled_stream") ) fn.apply(mount_v3io()) fn.deploy(project=context.project)