Esempio n. 1
0
    def create_demo_project(self) -> mlrun.projects.MlrunProject:
        self._logger.debug("Creating horovod project")
        demo_project = mlrun.new_project(self.project_name,
                                         str(self.assets_path),
                                         init_git=True)

        mlrun.mount_v3io()

        self._logger.debug("Uploading training file")
        trainer_src_path = str(self.assets_path / "horovod_training.py")
        trainer_dest_path = pathlib.Path("/assets/horovod_training.py")
        stores = mlrun.datastore.store_manager.set()
        datastore, subpath = stores.get_or_create_store(
            self._get_v3io_user_store_path(trainer_dest_path))
        datastore.upload(subpath, trainer_src_path)

        self._logger.debug("Creating iris-generator function")
        function_path = str(self.assets_path / "utils_functions.py")
        utils = mlrun.code_to_function(
            name="utils",
            kind="job",
            filename=function_path,
            image="mlrun/mlrun",
        )

        utils.spec.remote = True
        utils.spec.replicas = 1
        utils.spec.service_type = "NodePort"
        utils.spec.command = function_path

        self._logger.debug("Setting project functions")
        demo_project.set_function(utils)

        trainer = mlrun.new_function(
            name="trainer",
            kind="mpijob",
            command=self._get_v3io_user_store_path(trainer_dest_path,
                                                   remote=False),
            image="mlrun/ml-models",
        )
        trainer.spec.remote = True
        trainer.spec.replicas = 4
        trainer.spec.service_type = "NodePort"

        demo_project.set_function(trainer)
        demo_project.set_function("hub://tf2_serving", "serving")

        demo_project.log_artifact(
            "images",
            target_path=
            "http://iguazio-sample-data.s3.amazonaws.com/catsndogs.zip",
            artifact_path=mlrun.mlconf.artifact_path,
        )

        self._logger.debug("Setting project workflow")
        demo_project.set_workflow("main",
                                  str(self.assets_path / "workflow.py"),
                                  embed=True)

        return demo_project
Esempio n. 2
0
def init_functions(functions: dict, project=None, secrets=None):
    for f in functions.values():
        # Add V3IO Mount
        f.apply(mount_v3io())

        # Always pull images to keep updates
        f.spec.image_pull_policy = 'Always'

    # Define inference-stream related triggers
    functions['sentiment_analysis_server'].add_model('bert_classifier_v1',
                                                     model_filepath)
    functions['sentiment_analysis_server'].spec.readiness_timeout = 500
    functions['sentiment_analysis_server'].set_config(
        'readinessTimeoutSeconds', 500)

    # Add triggers
    functions['stocks_reader'].add_trigger('cron',
                                           CronTrigger(readers_cron_interval))
    functions['news_reader'].add_trigger('cron',
                                         CronTrigger(readers_cron_interval))

    # Set max replicas for resource limits
    functions['sentiment_analysis_server'].spec.max_replicas = max_replicas
    functions['news_reader'].spec.max_replicas = max_replicas
    functions['stocks_reader'].spec.max_replicas = max_replicas

    # Add GPU for training
    functions['bert_sentiment_classifier_trainer'].gpus(training_gpus)
Esempio n. 3
0
def init_functions(functions: dict, project=None, secrets=None):
    for f in functions.values():
        # Add V3IO Mount
        f.apply(mount_v3io())

        # Always pull images to keep updates
        f.spec.image_pull_policy = 'Always'
def init_functions(functions: dict, project=None, secrets=None):
    # Mount V3IO data layer to pipeline components
    for f in functions.values():
        f.apply(mount_v3io())

    # Configuration for training function
    image = lambda gpu: 'mlrun/ml-models-gpu' if gpu else 'mlrun/ml-models'
    functions['trainer'].spec.image = image(
        config['trainer']['resources']['use_gpu'])
    functions['trainer'].with_requests(
        cpu=config['trainer']['resources']['requests']['cpu'],
        mem=config['trainer']['resources']['requests']['mem'])
    functions['trainer'].with_limits(
        cpu=config['trainer']['resources']['limits']['cpu'],
        mem=config['trainer']['resources']['limits']['mem'])
    functions['trainer'].spec.replicas = config['trainer']['resources'][
        'replicas']
    if config['trainer']['resources']['use_gpu']:
        functions['trainer'].gpus(1)

    # Configuration for serving function
    functions['serving'].set_env('MODEL_CLASS',
                                 config['serving']['model_class'])
    functions['serving'].set_env('IMAGE_HEIGHT',
                                 config['serving']['image_height'])
    functions['serving'].set_env('IMAGE_WIDTH',
                                 config['serving']['image_width'])
    functions['serving'].set_env('ENABLE_EXPLAINER',
                                 config['serving']['enable_explainer'])
    functions["serving"].spec.base_spec['spec']['loggerSinks'] = [{
        'level':
        'info'
    }]
    functions['serving'].spec.min_replicas = config['serving']['replicas']
    functions['serving'].spec.max_replicas = config['serving']['replicas']
Esempio n. 5
0
def init_functions(functions: dict, project=None, secrets=None):
    for f in functions.values():
        f.apply(mount_v3io())

    functions["server"].set_env(
        "INFERENCE_STREAM", "users/admin/artifacts/churn/model_stream"
    )
Esempio n. 6
0
def init_functions(functions: dict, project=None, secrets=None):
    '''
    This function will run before running the project.
    It allows us to add our specific system configurations to the functions
    like mounts or secrets if needed.

    In this case we will add Iguazio's user mount to our functions using the
    `mount_v3io()` function to automatically set the mount with the needed
    variables taken from the environment. 
    * mount_v3io can be replaced with mlrun.platforms.mount_pvc() for 
    non-iguazio mount

    @param functions: <function_name: function_yaml> dict of functions in the
                        workflow
    @param project: project object
    @param secrets: secrets required for the functions for s3 connections and
                    such
    '''
    for f in functions.values():
        f.apply(mount_v3io())  # On Iguazio (Auto-mount /User)
        # f.apply(mlrun.platforms.mount_pvc()) # Non-Iguazio mount

    functions['serving'].set_env('MODEL_CLASS', 'TFModel')
    functions['serving'].set_env('IMAGE_HEIGHT', '224')
    functions['serving'].set_env('IMAGE_WIDTH', '224')
    functions['serving'].set_env('ENABLE_EXPLAINER', 'False')
    functions['serving'].spec.min_replicas = 1
def main():
    # Load environment variables
    load_dotenv()

    # Setup project
    project_name, artifact_path = set_environment(
        project="remote-model-deployment",
        artifact_path=os.getenv("MLRUN_ARTIFACT_PATH"),
        api_path=os.getenv("MLRUN_DBPATH"),
        access_key=os.getenv("V3IO_ACCESS_KEY"),
    )
    print(f"Creating project '{project_name}'")

    # Push assets to V3IO
    v3io_client = v3io.dataplane.Client()
    push_to_v3io(v3io_client, "assets/model.pkl", "nick/tmp/model.pkl")

    # Create MLRun function
    serving_fn = code_to_function(
        name="serving",
        kind="serving",
        image="mlrun/mlrun",
        filename="assets/model_server.py",
    ).apply(mount_v3io())
    print(f"Creating function '{serving_fn.metadata.name}'")

    # Configure MLRun function
    serving_fn.spec.default_class = "ClassifierModel"
    serving_fn.add_model("my_model", model_path="/User/tmp/model.pkl")

    # Deploy
    addr = serving_fn.deploy()

    # Test model inference
    test_inference(addr)
Esempio n. 8
0
def init_context(context):
    setattr(context, "batch", [])
    setattr(context, "window", int(os.getenv("window", 10)))
    setattr(context, "save_to", os.getenv("save_to", "/bigdata/inference_pq/"))
    os.makedirs(context.save_to, exist_ok=True)

    mlrun.mlconf.dbpath = mlrun.mlconf.dbpath or "http://mlrun-api:8080"
    artifact_path = os.getenv("artifact_path", None)
    if artifact_path:
        mlrun.mlconf.artifact_path = artifact_path
    if "hub_url" in os.environ:
        mlrun.mlconf.hub_url = os.environ["hub_url"]
    virtual_drift_fn = mlrun.import_function("hub://virtual_drift")
    virtual_drift_fn.apply(
        mlrun.mount_v3io(
            mount_path=os.getenv("mount_path", "~/"),
            remote=os.getenv("mount_remote", "/User"),
        ))
    setattr(context, "virtual_drift_fn", virtual_drift_fn)

    predictions_col = os.getenv("predictions", None)
    label_col = os.getenv("label_col", None)
    setattr(context, "base_dataset", os.getenv("base_dataset", ""))
    setattr(context, "indexes", json.loads(os.environ.get("indexes", "[]")))
    setattr(context, "predictions_col", predictions_col)
    setattr(context, "label_col", label_col)
    setattr(context, "results_tsdb_container",
            os.getenv("results_tsdb_container", None))
    setattr(context, "results_tsdb_table", os.getenv("results_tsdb_table",
                                                     None))
Esempio n. 9
0
def init_functions(functions: dict, project=None, secrets=None):

    image = f"docker-registry.{os.getenv('IGZ_NAMESPACE_DOMAIN')}:80/{config['project']['docker_image']}"

    for fn in functions.values():

        # Set resources for jobs
        if fn.to_dict()["kind"] == "job":
            fn.spec.build.image = image

        # Set resources for nuclio functions
        elif fn.to_dict()["kind"] == "remote":
            fn.with_http(workers=1)
            fn.spec.base_spec['spec']['build']['baseImage'] = image
            fn.spec.base_spec['spec']['loggerSinks'] = [{'level': 'info'}]
            fn.spec.min_replicas = 1
            fn.spec.max_replicas = 1

        # Apply environment variables
        fn.set_env('V3IO_ACCESS_KEY', os.getenv('V3IO_ACCESS_KEY'))
        fn.set_env('V3IO_USERNAME', os.getenv('V3IO_USERNAME'))
        fn.set_env('IGZ_NAMESPACE_DOMAIN', os.getenv('IGZ_NAMESPACE_DOMAIN'))
        fn.set_env('RAW_VIDEO_STREAM', config['stream']['raw_video_stream'])
        fn.set_env('TAGGED_VIDEO_STREAM',
                   config['stream']['tagged_video_stream'])
        fn.set_env('IGZ_CONTAINER', config['project']['container'])
        fn.set_env('CAMERA_LIST_TBL', config['camera']['list_table'])
        fn.set_env('CAMERA_ID', config['camera']['id'])
        fn.set_env('SHARD_ID', config['stream']['shard_id'])
        fn.set_env('CAMERA_URL', config['camera']['url'])
        fn.set_env('ROTATE_180', config['stream']['rotate_180'])
        fn.set_env('FACIAL_RECOGNITION_FUNCTION',
                   config['api']['facial_recognition_function'])
        fn.set_env('GET_IMAGE_FUNCTION', config['api']['get_image_function'])
        fn.set_env('API_GATEWAY', config['api']['gateway'])
        fn.set_env('PROJECT', config['project']['name'])
        fn.set_env('IGZ_AUTH', config['api']['auth'])

        # Set default handler
        fn.spec.default_handler = "handler"

        # Apply V3IO mount
        fn.apply(mount_v3io())

    # Apply V3IO trigger
    facial_recognition_trigger_spec = {
        'kind': 'v3ioStream',
        'url':
        f"http://*****:*****@processorgrp",
        "password": os.getenv('V3IO_ACCESS_KEY'),
        'attributes': {
            "pollingIntervalMs": 500,
            "seekTo": "earliest",
            "readBatchSize": 100,
            "partitions": "0-100",
        }
    }
    functions['deploy-facial-recognition'].add_trigger(
        'image-proc', facial_recognition_trigger_spec)
Esempio n. 10
0
def init_functions(functions: dict, project=None, secrets=None):
    for f in functions.values():
        # Add V3IO Mount
        f.apply(mount_v3io())
        
        # Always pull images to keep updates
        f.spec.image_pull_policy = 'Always'
    
    # Define inference-stream related triggers
    functions['s2p'].add_trigger('labeled_stream', V3IOStreamTrigger(url=f'{labeled_stream_url}@s2p'))
Esempio n. 11
0
    def custom_setup(self):
        self._logger.debug("Creating dask function")
        self.dask_function = code_to_function(
            "mydask", kind="dask", filename=str(self.assets_path / "dask_function.py"),
        ).apply(mount_v3io())

        self.dask_function.spec.image = "mlrun/ml-models"
        self.dask_function.spec.remote = True
        self.dask_function.spec.replicas = 1
        self.dask_function.spec.service_type = "NodePort"
        self.dask_function.spec.command = str(self.assets_path / "dask_function.py")
Esempio n. 12
0
def init_functions(functions: dict, project=None, secrets=None):
    for fn in functions.values():
        # Mount V3IO filesystem
        fn.apply(mount_v3io())

    functions["deploy-model"].spec.base_spec['spec']['loggerSinks'] = [{
        'level':
        'info'
    }]
    functions["deploy-model"].spec.min_replicas = 1
    functions["deploy-model"].spec.max_replicas = 1
    functions["deploy-model"].spec.default_class = 'MNISTModel'
def init_functions(functions: dict, project=None, secrets=None):

    for fn in functions.values():
        # Mount V3IO filesystem
        fn.apply(mount_v3io())
        fn.apply(
            mount_v3io(name="csv",
                       remote=config["csv"]["s3_images_csv_remote_path"],
                       mount_path=config["csv"]["s3_images_csv_mount_path"]))
        fn.apply(
            mount_v3io(name="data",
                       remote=config["data"]["remote_download_path"],
                       mount_path=config["data"]["mount_download_path"]))

    # Set env var configuation for S3 functions
    s3_functions = ['download-s3', 'upload-s3']
    for func in s3_functions:
        functions[func].set_env('AWS_ACCESS_KEY_ID',
                                config['aws']['aws_access_key_id'])
        functions[func].set_env('AWS_SECRET_ACCESS_KEY',
                                config['aws']['aws_secret_access_key'])
        functions[func].set_env('AWS_DEFAULT_REGION',
                                config['aws']['aws_default_region'])

    # Set GPU reources for model training
    #functions['train-model'].with_limits(gpus="1", gpu_type='nvidia.com/gpu')

    # Set resources for model deployment
    functions["deploy-model"].spec.base_spec['spec']['build'][
        'baseImage'] = "mlrun/ml-models-gpu"
    functions["deploy-model"].spec.base_spec['spec']['loggerSinks'] = [{
        'level':
        'info'
    }]
    functions["deploy-model"].spec.min_replicas = 1
    functions["deploy-model"].spec.max_replicas = 1
Esempio n. 14
0
    def create_demo_project(self) -> mlrun.projects.MlrunProject:
        self._logger.debug("Creating churn project")
        demo_project = mlrun.new_project(self.project_name,
                                         str(self.assets_path),
                                         init_git=True)

        data_url = (
            "https://raw.githubusercontent.com/mlrun/demos/master/customer-churn-prediction/WA_Fn-UseC_-Telco-"
            "Customer-Churn.csv")
        demo_project.log_artifact("raw-data", target_path=data_url)

        self._logger.debug("Creating clean-data function")
        function_path = str(self.assets_path / "data_clean_function.py")
        clean_data_function = mlrun.code_to_function(
            name="clean_data",
            kind="job",
            filename=function_path,
            image="mlrun/ml-models-gpu"
            if self.use_gpus else "mlrun/ml-models",
            description="clean and encode raw data",
            categories=["data-prep"],
            labels={
                "author": "yasha",
                "framework": "xgboost"
            },
        ).apply(mlrun.mount_v3io())

        clean_data_function.spec.remote = True
        clean_data_function.spec.replicas = 1
        clean_data_function.spec.service_type = "NodePort"
        clean_data_function.spec.command = function_path

        self._logger.debug("Setting project functions")
        demo_project.set_function(clean_data_function)
        demo_project.set_function("hub://describe", "describe")
        demo_project.set_function("hub://xgb_trainer", "classify")
        demo_project.set_function("hub://xgb_test", "xgbtest")
        demo_project.set_function("hub://coxph_trainer", "survive")
        demo_project.set_function("hub://coxph_test", "coxtest")
        demo_project.set_function("hub://churn_server", "server")

        self._logger.debug("Setting project workflow")
        demo_project.set_workflow("main",
                                  str(self.assets_path / "workflow.py"),
                                  embed=True)

        return demo_project
Esempio n. 15
0
def test_mount_v3io():
    username = "******"
    access_key = "access-key"
    env = os.environ
    env["V3IO_USERNAME"] = username
    env["V3IO_ACCESS_KEY"] = access_key
    function = mlrun.new_function("function-name",
                                  "function-project",
                                  kind=mlrun.runtimes.RuntimeKinds.job)
    function.apply(mlrun.mount_v3io())
    expected_volume = {
        "flexVolume": {
            "driver": "v3io/fuse",
            "options": {
                "accessKey": access_key,
                "container": "users",
                "subPath": f"/{username}",
            },
        },
        "name": "v3io",
    }
    expected_volume_mount = {
        "mountPath": "/User",
        "name": "v3io",
        "subPath": ""
    }
    assert (deepdiff.DeepDiff(
        [expected_volume],
        function.spec.volumes,
        ignore_order=True,
    ) == {})
    assert (deepdiff.DeepDiff(
        [expected_volume_mount],
        function.spec.volume_mounts,
        ignore_order=True,
    ) == {})
Esempio n. 16
0
def init_functions(functions: dict, project=None, secrets=None):
    for f in functions.values():
        f.apply(mount_v3io())

    functions['serving'].metadata.name = 'getting-started-serving'
Esempio n. 17
0
def concept_drift_deployer(
    context: MLClientCtx,
    base_dataset: DataItem,
    input_stream: str,
    output_stream: str,
    output_tsdb: str,
    tsdb_batch_size: int,
    callbacks: list,
    models: list = ["ddm", "eddm", "pagehinkley"],
    models_dest="models",
    pagehinkley_threshold: float = 10,
    ddm_warning_level: float = 2,
    ddm_out_control_level: float = 3,
    label_col="label",
    prediction_col="prediction",
    hub_url: str = mlconf.hub_url,
    fn_tag: str = "master",
):
    """Deploy a streaming Concept Drift detector on a labeled stream
       This function is the Deployment step for the Streaming Concept Drift Detector.
       It will load the selected drift detectors and initialize them with the
       base_dataset's statistics.  Then it will deploy the concept_drift_streaming
       function and pass the models to it for streaming concept-drift detection on top
       of a labeled stream.

    :param context:         MLRun context
    :param base_dataset:    Dataset containing label_col and prediction_col to initialize the detectors
    :param input_stream:    labeled stream to track.
                            Should contain label_col and prediction_col
    :param output_stream:   Output stream to push the detector's alerts
    :param output_tsdb:     Output TSDB table to allow analysis and display
    :param tsdb_batch_size: Batch size of alerts to buffer before pushing to the TSDB
    :param callbacks:       Additional rest endpoints to send the alert data to
    :param models:          List of the detectors to deploy
                            Defaults to ['ddm', 'eddm', 'pagehinkley'].
    :param models_dest:     Location for saving the detectors
                            Defaults to 'models' (in relation to artifact_path).
    :param pagehinkley_threshold:  Drift level threshold for PH detector Defaults to 10.
    :param ddm_warning_level:      Warning level alert for DDM detector Defaults to 2.
    :param ddm_out_control_level:  Drift level alert for DDM detector Defaults to 3.
    :param label_col:       Label column to be used on base_dataset and input_stream
                            Defaults to 'label'.
    :param prediction_col:  Prediction column to be used on base_dataset and input_stream
                            Defaults to 'prediction'.
    :param hub_url:         hub_url in case the default is not used, concept_drift_streaming will be loaded
                            by this url
                            Defaults to mlconf.hub_url.
    :param fn_tag:          hub tag to use
                            Defaults to 'master'
    """

    mlconf.dbpath = mlconf.dbpath or "http://mlrun-api:8080"
    mlconf.hub_url = hub_url
    fn = import_function(url="hub://concept_drift_streaming")

    context.logger.info("Loading base dataset")
    base_df = base_dataset.as_df()
    error_stream = np.where(
        base_df[prediction_col].values == base_df[label_col].values, 0, 1
    )

    context.logger.info("Creating models")
    models = [
        model.strip()
        for model in os.getenv("models", "pagehinkley, ddm, eddm").split(",")
    ]
    models = {
        "eddm": skmultiflow.drift_detection.EDDM(),
        "pagehinkley": skmultiflow.drift_detection.PageHinkley(
            min_instances=len(error_stream), threshold=pagehinkley_threshold
        ),
        "ddm": skmultiflow.drift_detection.DDM(
            min_num_instances=len(error_stream),
            warning_level=ddm_warning_level,
            out_control_level=ddm_out_control_level,
        ),
    }

    context.logger.info("Streaming data to models")
    for i in range(len(error_stream)):
        for model_name, model in models.items():
            model.add_element(error_stream[i])

    context.logger.info("Logging ready models")
    for name, model in models.items():
        data = dumps(model)
        model_file = f"{name}.pkl"
        context.log_model(
            f"{name}_concept_drift",
            body=data,
            labels={"framework": "skmultiflow", "workflow": "concept-drift"},
            model_file=model_file,
            model_dir=models_dest,
            tag="latest",
        )
        fn.set_envs(
            {
                f"{name}_model_path": os.path.join(
                    context.artifact_path, models_dest, model_file
                )
            }
        )

    context.logger.info("Deploying Concept Drift Streaming function")
    fn.set_envs(
        {
            "label_col": label_col,
            "prediction_col": prediction_col,
            "drift_stream": output_stream,
            "tsdb_table": output_tsdb,
            "pagehinkley_threshold": pagehinkley_threshold,
            "ddm_warning_level": ddm_warning_level,
            "ddm_out_control": ddm_out_control_level,
        }
    )
    fn.add_trigger(
        "labeled_stream", V3IOStreamTrigger(url=input_stream, name="labeled_stream")
    )
    fn.apply(mount_v3io())
    fn.deploy(project=context.project)
Esempio n. 18
0
def init_functions(functions: dict, project=None, secrets=None):
    for f in functions.values():
        f.apply(mount_v3io())
Esempio n. 19
0
# In[28]:

db.list_functions(project='properties-management')

# ### Import a specific version of a function

# In[37]:

fn = import_function(url='db://properties-management/properties-management')

# In[38]:

print(fn.to_yaml())

# ### Execute function

# In[35]:

fn.apply(mount_v3io())

# In[39]:

fn.run(artifact_path=artifact_path,
       handler='save_properties',
       params={
           "source_file": '/User/igztraining/mlrun/Day-4/data/demo.csv',
           "target_file": "/User/igztraining/mlrun/data/condos.csv"
       })

# In[ ]:
Esempio n. 20
0
def _deploy_v1alpha1(project_graph=''):
    for function in project_graph['project']['functions']:
        fn = composer(project_graph['apiVersion'],
                      function['function_name'],
                      project=project_graph['project']['name'])

        fn.with_http(workers=1).apply(mount_v3io())

        GPU = bool(function['gpu'])
        fn.spec.base_spec['spec']['build']['baseImage'] = function[
            'docker_image']
        fn.spec.build.commands = ['pip install v3io==0.4.0']

        fn.spec.min_replicas = function['minReplicas']
        fn.spec.max_replicas = function['maxReplicas']

        if GPU:
            fn.spec.base_spec['spec']['resources'] = {}
            fn.spec.base_spec['spec']['resources']['limits'] = {
                'nvidia.com/gpu': function['num_gpus']
            }

        fn.set_env('V3IO_ACCESS_KEY', os.getenv('V3IO_ACCESS_KEY'))
        INPUT_STREAM = function['input_stream']
        consumer_group = function['function_name'].replace('-', '_')
        #consumer_group='inferencegrp'

        maxWorkers = function['maxWorkers']

        trigger_spec = {
            'kind':
            'v3ioStream',
            'url':
            "http://%s/%s/%s" %
            ('v3io-webapi:8081', function['input_stream_container'],
             f'{INPUT_STREAM}@{consumer_group}'),
            "password":
            os.getenv('V3IO_ACCESS_KEY'),
            "maxWorkers":
            maxWorkers,
            'attributes': {
                "pollingIntervalMs": 500,
                "seekTo": "latest",
                "readBatchSize": 100,
            }
        }
        fn.add_trigger('input-stream', trigger_spec)

        # These should in your Yaml
        fn.set_env('MODULE_PATHS', function['module_paths'])
        fn.set_env('IMPORT_MODULES', function['import_modules'])
        fn.set_env('CLASS_LOAD_FUNCTION', function['class_load_function'])
        fn.set_env('PROCESSING_FUNCTION', function['processing_function'])
        fn.set_env('STEP_NAME', function['function_name'])
        fn.set_env('POST_PROCESS_FUNCTION', function['post_process_function'])
        fn.set_env('OUTPUT_STREAM', function['output_stream'])
        fn.set_env('OUTPUT_STREAM_CONTAINER',
                   function['output_stream_container'])

        if 'env_custom' in function:
            for env_var in function['env_custom']:
                fn.set_env(env_var['name'], env_var['value'])

        fn.apply(mount_v3io())

        addr = fn.deploy(project=project_graph['project']['name'])
Esempio n. 21
0
def _deploy_v0_1(project_graph=''):
    for function in project_graph['project']['functions']:
        fn = composer(project_graph['apiVersion'],
                      function['function_name'],
                      project=project_graph['project']['name'])

        fn.spec.base_spec['spec']['build']['baseImage'] = function[
            'docker_image']

        fn.spec.build.commands = format_pip_libraries(function)

        fn.spec.min_replicas = function['minReplicas']
        fn.spec.max_replicas = function['maxReplicas']

        GPU = bool(function['gpu'])

        if GPU:
            fn.spec.base_spec['spec']['resources'] = {}
            fn.spec.base_spec['spec']['resources']['limits'] = {
                'nvidia.com/gpu': function['num_gpus']
            }

        fn.set_env('V3IO_ACCESS_KEY', os.getenv('V3IO_ACCESS_KEY'))
        consumer_group = function['function_name'].replace('-', '_')

        _input_streams = function['input_streams']
        for _stream in _input_streams.keys():
            _container = project_graph['project']['v3io_streams'][_stream][
                'container']
            _stream_path = project_graph['project']['v3io_streams'][_stream][
                'path']

            _maxWorkers = _input_streams[_stream]['maxWorkers']

            try:
                _v3io_access_key = _input_streams[_stream]['v3io_access_key']
            except:
                print("Using default v3io_access_key from environment")
                _v3io_access_key = os.getenv('V3IO_ACCESS_KEY')

            try:
                _pollingIntervalMs = _input_streams[_stream][
                    'pollingIntervalMs']
            except:
                print('Using default pollingIntervalMs')
                _pollingIntervalMs = 500

            try:
                _seekTo = _input_streams[_stream]['seekTo']
            except:
                print('Using default seek to latest')
                _seekTo = 'latest'

            try:
                _readBatchSize = _input_streams[_stream]['readBatchSize']
            except:
                print('Using default readBatchSize 100')
                _readBatchSize = 100

            trigger_spec = {
                'kind':
                'v3ioStream',
                'url':
                "http://%s/%s/%s" % ('v3io-webapi:8081', _container,
                                     f'{_stream_path}@{consumer_group}'),
                "password":
                _v3io_access_key,
                "maxWorkers":
                _maxWorkers,
                'attributes': {
                    "pollingIntervalMs": _pollingIntervalMs,
                    "seekTo": _seekTo,
                    "readBatchSize": _readBatchSize,
                }
            }
            fn.add_trigger(_stream, trigger_spec)

        # These should in your Yaml
        _step_config = {
            'MODULE_PATHS': function['module_paths'],
            'IMPORT_MODULES': function['import_modules'],
            'CLASS_LOAD_FUNCTION': function['class_load_function'],
            'PROCESSING_FUNCTION': function['processing_function'],
            'STEP_NAME': function['function_name'],
            'OUTPUT_STREAM_CONTAINER': function['output_stream_container'],
            'OUTPUTS': function['outputs']
        }

        fn.set_env("STEP_CONFIG", json.dumps(_step_config))
        if 'env_custom' in function:
            for env_var in function['env_custom']:
                fn.set_env(env_var['name'], env_var['value'])

        # MOunt v3io volumes
        if 'v3io_volumes' in project_graph['project']:
            _volumes = project_graph['project']['v3io_volumes']
            for volume in _volumes.keys():
                fn.apply(
                    mount_v3io(name=volume,
                               remote=_volumes[volume]['remote'],
                               mount_path=_volumes[volume]['mount_path']))

        if 'class_init' in function:
            fn.set_env("CLASS_INIT", json.dumps(function['class_init']))

        if 'loggerSinks' in function:
            fn.spec.base_spec['spec']['loggerSinks'] = function['loggerSinks']

        deployment_url = fn.deploy(project=project_graph['project']['name'])
        print(f'Function deployed: {deployment_url}')
Esempio n. 22
0
def _deploy_v2alpha3(project_graph=''):
    for function in project_graph['project']['functions']:
        fn = composer(project_graph['apiVersion'],
                      function['function_name'],
                      project=project_graph['project']['name'])

        #fn.with_http(workers=1)

        fn.spec.base_spec['spec']['build']['baseImage'] = function[
            'docker_image']
        fn.spec.build.commands = ['pip install v3io==0.5.0']

        fn.spec.min_replicas = function['minReplicas']
        fn.spec.max_replicas = function['maxReplicas']

        GPU = bool(function['gpu'])

        if GPU:
            fn.spec.base_spec['spec']['resources'] = {}
            fn.spec.base_spec['spec']['resources']['limits'] = {
                'nvidia.com/gpu': function['num_gpus']
            }

        fn.set_env('V3IO_ACCESS_KEY', os.getenv('V3IO_ACCESS_KEY'))
        INPUT_STREAM = function['input_stream']
        consumer_group = function['function_name'].replace('-', '_')
        #consumer_group='inferencegrp'

        maxWorkers = function['maxWorkers']

        trigger_spec = {
            'kind':
            'v3ioStream',
            'url':
            "http://%s/%s/%s" %
            ('v3io-webapi:8081', function['input_stream_container'],
             f'{INPUT_STREAM}@{consumer_group}'),
            "password":
            os.getenv('V3IO_ACCESS_KEY'),
            "maxWorkers":
            maxWorkers,
            'attributes': {
                "pollingIntervalMs": 500,
                "seekTo": "earliest",
                "readBatchSize": 100,
            }
        }
        fn.add_trigger('input-stream', trigger_spec)

        # These should in your Yaml
        _step_config = {}
        _step_config['MODULE_PATHS'] = function['module_paths']
        _step_config['IMPORT_MODULES'] = function['import_modules']
        _step_config['CLASS_LOAD_FUNCTION'] = function['class_load_function']
        _step_config['PROCESSING_FUNCTION'] = function['processing_function']
        _step_config['STEP_NAME'] = function['function_name']
        _step_config['OUTPUT_STREAM_CONTAINER'] = function[
            'output_stream_container']
        _step_config['OUTPUTS'] = function['outputs']

        fn.set_env("STEP_CONFIG", json.dumps(_step_config))
        if 'env_custom' in function:
            for env_var in function['env_custom']:
                fn.set_env(env_var['name'], env_var['value'])

        # MOunt v3io volumes
        if 'v3io_volumes' in project_graph['project']:
            _volumes = project_graph['project']['v3io_volumes']
            for volume in _volumes.keys():
                fn.apply(
                    mount_v3io(name=volume,
                               remote=_volumes[volume]['remote'],
                               mount_path=_volumes[volume]['mount_path']))

        if 'class_init' in function:
            fn.set_env("CLASS_INIT", json.dumps(function['class_init']))

        addr = fn.deploy(project=project_graph['project']['name'])
Esempio n. 23
0
     os.environ["V3IO_USERNAME"] = username
     os.environ["V3IO_ACCESS_KEY"] = "access-key"
 else:
     os.environ.pop("V3IO_USERNAME", None)
     os.environ.pop("V3IO_ACCESS_KEY", None)
 mount_v3io_kwargs = {
     "remote": case.get("remote"),
     "mount_path": case.get("mount_path"),
     "volume_mounts": case.get("volume_mounts"),
 }
 mount_v3io_kwargs = {k: v for k, v in mount_v3io_kwargs.items() if v}
 if case.get("expect_failure"):
     with pytest.raises(mlrun.errors.MLRunInvalidArgumentError):
         tested_function.apply(mlrun.mount_v3io(**mount_v3io_kwargs))
 else:
     tested_function.apply(mlrun.mount_v3io(**mount_v3io_kwargs))
     if not case.get("volume_mounts") and case.get("remote"):
         expectation_modifier = mlrun.mount_v3io_legacy
         expectation_modifier_kwargs = {
             "remote": case.get("remote"),
             "mount_path": case.get("mount_path"),
         }
     else:
         expectation_modifier = mlrun.mount_v3io_extended
         expectation_modifier_kwargs = {
             "remote": case.get("remote"),
             "mounts": case.get("volume_mounts"),
         }
     expectation_modifier_kwargs = {
         k: v
         for k, v in expectation_modifier_kwargs.items() if v
Esempio n. 24
0
def test_mount_v3io():
    cases = [
        {
            "mount_path":
            "/custom-mount-path",
            "volume_mounts":
            [mlrun.VolumeMount("/volume-mount-path", "volume-sub-path")],
            "expect_failure":
            True,
        },
        {
            "mount_path":
            "/custom-mount-path",
            "volume_mounts":
            [mlrun.VolumeMount("/volume-mount-path", "volume-sub-path")],
            "remote":
            "~/custom-remote",
            "expect_failure":
            True,
        },
        {
            "mount_path": "/custom-mount-path",
            "remote": "~/custom-remote",
            "set_user": True,
        },
        {
            "mount_path": "/custom-mount-path",
            "set_user": True
        },
        {
            "remote": "~/custom-remote",
            "set_user": True
        },
        {
            "remote":
            "~/custom-remote",
            "volume_mounts":
            [mlrun.VolumeMount("/volume-mount-path", "volume-sub-path")],
            "set_user":
            True,
        },
        {
            "volume_mounts":
            [mlrun.VolumeMount("/volume-mount-path", "volume-sub-path")],
            "set_user":
            True,
        },
        {
            "set_user": True
        },
    ]
    for case in cases:
        username = "******"
        tested_function = mlrun.new_function(
            "tested-function-name",
            "function-project",
            kind=mlrun.runtimes.RuntimeKinds.job,
        )
        expectation_function = mlrun.new_function(
            "expectation-function-name",
            "function-project",
            kind=mlrun.runtimes.RuntimeKinds.job,
        )
        if case.get("set_user"):
            os.environ["V3IO_USERNAME"] = username
            os.environ["V3IO_ACCESS_KEY"] = "access-key"
        else:
            os.environ.pop("V3IO_USERNAME", None)
            os.environ.pop("V3IO_ACCESS_KEY", None)
        mount_v3io_kwargs = {
            "remote": case.get("remote"),
            "mount_path": case.get("mount_path"),
            "volume_mounts": case.get("volume_mounts"),
        }
        mount_v3io_kwargs = {k: v for k, v in mount_v3io_kwargs.items() if v}
        if case.get("expect_failure"):
            with pytest.raises(mlrun.errors.MLRunInvalidArgumentError):
                tested_function.apply(mlrun.mount_v3io(**mount_v3io_kwargs))