예제 #1
0
파일: test_dask.py 프로젝트: rajacsp/mlrun
def test_dask_local_hyper():
    task = NewTask().with_hyper_params({'p1': [5, 2, 3]}, 'max.accuracy')
    spec = tag_test(task, 'test_dask_local_hyper')
    run = new_function(command='dask://').run(spec, handler=my_func)
    verify_state(run)
    assert len(run.status.iterations) == 3 + 1, 'hyper parameters test failed'
    pprint(run.to_dict())
예제 #2
0
def test_local_py():
    file_path = f'{examples_path}/training.py'
    mod = function_to_module(file_path)
    task = NewTask(inputs={'infile.txt': f'{examples_path}/infile.txt'})
    context = get_or_create_ctx('myfunc', spec=task)
    mod.my_job(context, p1=2, p2='x')
    assert context.results['accuracy'] == 4, 'failed to run'
예제 #3
0
def kfpipeline():

    exit_task = NewTask(handler='run_summary_comment')
    exit_task.with_params(workflow_id='{{workflow.uid}}',
                          repo=this_project.params.get('git_repo'),
                          issue=this_project.params.get('git_issue'))
    exit_task.with_secrets(
        'inline', {'GITHUB_TOKEN': this_project.get_secret('GITHUB_TOKEN')})
    with dsl.ExitHandler(funcs['git_utils'].as_step(exit_task,
                                                    name='exit-handler')):

        # run the ingestion function with the new image and params
        ingest = funcs['gen-iris'].as_step(name="get-data",
                                           handler='iris_generator',
                                           params={'format': 'pq'},
                                           outputs=[DATASET])

        # train with hyper-paremeters
        train = funcs["train"].as_step(
            name="train",
            params={
                "sample": -1,
                "label_column": LABELS,
                "test_size": 0.10
            },
            hyperparams={
                'model_pkg_class': [
                    "sklearn.ensemble.RandomForestClassifier",
                    "sklearn.linear_model.LogisticRegression",
                    "sklearn.ensemble.AdaBoostClassifier"
                ]
            },
            selector='max.accuracy',
            inputs={"dataset": ingest.outputs[DATASET]},
            labels={"commit": this_project.params.get('commit', '')},
            outputs=['model', 'test_set'])

        # test and visualize our model
        test = funcs["test"].as_step(name="test",
                                     params={"label_column": LABELS},
                                     inputs={
                                         "models_path": train.outputs['model'],
                                         "test_set": train.outputs['test_set']
                                     })

        # deploy our model as a serverless function
        deploy = funcs["serving"].deploy_step(
            models={f"{DATASET}_v1": train.outputs['model']},
            tag=this_project.params.get('commit', 'v1')[:6])

        # test out new model server (via REST API calls)
        tester = funcs["live_tester"].as_step(
            name='model-tester',
            params={
                'addr': deploy.outputs['endpoint'],
                'model': f"{DATASET}_v1"
            },
            inputs={'table': train.outputs['test_set']})
예제 #4
0
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from os import path

import pytest

from tests.conftest import (examples_path, has_secrets, here, out_path,
                            tag_test, verify_state)
from mlrun import NewTask, run_local, code_to_function

base_spec = NewTask(params={'p1': 8}, out_path=out_path)
base_spec.spec.inputs = {'infile.txt': 'infile.txt'}


def test_run_local():
    spec = tag_test(base_spec, 'test_run_local')
    result = run_local(spec,
                       command='{}/training.py'.format(examples_path),
                       workdir=examples_path)
    verify_state(result)


def test_run_local_handler():
    spec = tag_test(base_spec, 'test_run_local_handler')
    spec.spec.handler = 'my_func'
    result = run_local(spec,
예제 #5
0
                      verify_state)
from mlrun import NewTask, get_run_db, new_function


def my_func(context, p1=1, p2='a-string'):
    print(f'Run: {context.name} (uid={context.uid})')
    print(f'Params: p1={p1}, p2={p2}\n')
    print('file\n{}\n'.format(context.get_input('infile.txt').get()))

    context.log_result('accuracy', p1 * 2)
    context.logger.info('some info')
    context.log_metric('loss', 7)
    context.log_artifact('chart', body='abc')


base_spec = NewTask(params={'p1': 8}, out_path=out_path)
base_spec.spec.inputs = {'infile.txt': 'infile.txt'}

s3_spec = base_spec.copy().with_secrets('file', 'secrets.txt')
s3_spec.spec.inputs = {'infile.txt': 's3://yarons-tests/infile.txt'}


def test_noparams():
    result = new_function().run(handler=my_func)

    assert result.output('accuracy') == 2, 'failed to run'
    assert result.status.artifacts[0].get('key') == 'chart', 'failed to run'


def test_with_params():
    spec = tag_test(base_spec, 'test_with_params')
예제 #6
0
파일: test_dask.py 프로젝트: yaronha/mlrun
def test_dask_local():
    spec = tag_test(NewTask(params={'p1': 3, 'p2': 'vv'}), 'test_dask_local')
    run = new_function(kind='dask').run(spec, handler=my_func)
    verify_state(run)
예제 #7
0
out = mlconf.artifact_path or path.abspath('./data')
# {{run.uid}} will be substituted with the run id, so output will be written to different directoried per run
artifact_path = path.join(out, '{{run.uid}}')

# #### _running and linking multiple tasks_
# In this example we run two functions, ```training``` and ```validation``` and we pass the result from one to the other.
# We will see in the ```job``` example that linking works even when the tasks are run in a workflow on different processes or containers.

# ```run_local()``` will run our task on a local function:

# Run the training function. Functions can have multiple handlers/methods, here we call the ```training``` handler:

# In[ ]:

train_run = run_local(
    NewTask(handler=training, params={'p1': 5}, artifact_path=out))

# After the function runs it generates the result widget, you can click the `model` artifact to see its content.

# In[ ]:

train_run.outputs

# The output from the first training function is passed to the validation function, let's run it:

# In[ ]:

model_path = train_run.outputs['model']

validation_run = run_local(
    NewTask(handler=validation,
예제 #8
0
# See the License for the specific language governing permissions and
# limitations under the License.

import csv
from os import listdir
from tempfile import mktemp

import yaml

from conftest import out_path
from mlrun.artifacts import ChartArtifact, TableArtifact
from mlrun import NewTask, new_function


run_spec = NewTask(
    params={'p1': 5},
    out_path=out_path,
    outputs=['model.txt', 'chart.html', 'iteration_results']).set_label('tests', 'kfp')


def my_job(context, p1=1, p2='a-string'):

    # access input metadata, values, files, and secrets (passwords)
    print(f'Run: {context.name} (uid={context.uid})')
    print(f'Params: p1={p1}, p2={p2}')
    print('accesskey = {}'.format(context.get_secret('ACCESS_KEY')))
    print('file\n{}\n'.format(context.get_input('infile.txt').get()))

    # RUN some useful code e.g. ML training, data prep, etc.

    # log scalar result values (job result metrics)
    context.log_result('accuracy', p1 * 2)
예제 #9
0
# Setup Project
project_name = os.getenv("MLRUN_PROJECT_NAME")
project_path = "./hadoop_hack"
skproj = new_project(name=project_name, context=project_path)
artifact_path = os.getenv("MLRUN_ARTIFACT_PATH")
mlconf.dbpath = os.getenv("MLRUN_DBPATH")

print(f"Project name: {project_name}")
print(f"Artifacts path: {artifact_path}\nMLRun DB path: {mlconf.dbpath}")

# Setup task
task = NewTask(
    name="hdfs-submit",
    project=os.getenv("MLRUN_PROJECT_NAME"),
    params={
        "hdfs_cmd": "hadoop fs -ls",
        "shell_pod_name": "nick-shell"
    },
    handler="hdfs_submit",
    artifact_path=artifact_path,
)

# Create MLRun job
fn = code_to_function(
    name="hdfs-submit",
    filename=f"{project_path}/hadoop.py",
    handler="hdfs_submit",
    kind="job",
)

fn.spec.service_account = "mlrun-api"
fn.spec.build.image = "mlrun/mlrun"
예제 #10
0
from mlrun import run_local, RunTemplate, NewTask, mlconf
from os import path
mlconf.dbpath = mlconf.dbpath or './'
out = mlconf.artifact_path or path.abspath('./data')
# {{run.uid}} will be substituted with the run id, so output will be written to different directoried per run
artifact_path = path.join(out, '{{run.uid}}')
task = NewTask(name='demo', params={
    'p1': 5
}, artifact_path=artifact_path).with_secrets('file', 'secrets.txt').set_label(
    'type', 'demo')
# run our task using our new function
run_object = run_local(task, command='training.py')
run_object.uid()
run_object.to_dict()
run_object.state()
run_object.show()
run_object.outputs
run_object.logs()
run_object.artifact('dataset')
예제 #11
0
from conftest import (
    examples_path, has_secrets, here, out_path, tag_test, verify_state
)
from mlrun import NewTask, run_local, code_to_function
from mlrun import NewTask, get_run_db, new_function

base_spec = NewTask(params={'artifacts_path': 'faces/artifacts/',
                            'frames_url': "https://framesd.default-tenant.app.vmdev22.lab.iguazeng.com",
                            'token': '4c76b197-713f-4e2f-8d72-48a46b2c053b',
                            'models_path': '../notebooks/functions/models.py',
                            'encodings_path': 'avia/encodings7'},
                    out_path=out_path
                    )


def test_encode_images():
    spec = tag_test(base_spec, 'test_run_local_encode_images')
    result = run_local(spec,
                       command='../notebooks/functions/encode_images.py',
                       workdir='./',
                       artifact_path='./faces/artifacts')
    verify_state(result)


def test_train():
    spec = tag_test(base_spec, 'test_run_local_train')
    result = run_local(spec,
                       command='../notebooks/functions/train.py',
                       workdir='./',
                       artifact_path='./faces/artifacts')
    verify_state(result)
# Setup Project
project_name = os.getenv("MLRUN_PROJECT_NAME")
project_path = "./python_to_mlrun"
skproj = new_project(name=project_name, context=project_path)
artifact_path = os.getenv("MLRUN_ARTIFACT_PATH")
mlconf.dbpath = os.getenv("MLRUN_DBPATH")

print(f"Project name: {project_name}")
print(f"Artifacts path: {artifact_path}\nMLRun DB path: {mlconf.dbpath}")

# Setup task
inputs = {"data": "https://datahub.io/machine-learning/iris/r/iris.csv"}
task = NewTask(
    name="training-demo",
    project=project_name,
    handler="handler",
    artifact_path=artifact_path,
    inputs=inputs,
)

# Create MLRun job
fn = code_to_function(
    name="train_model",
    filename=f"{project_path}/training_gregory.py",
    handler="handler",
    kind="job",
)
fn.spec.build.image = "mlrun/mlrun"
fn.apply(mount_v3io())
fn.deploy()
def kfpipeline(
        bucket_name: str = config['aws']['bucket_name'],
        s3_images_csv:
    str = f'{config["csv"]["s3_images_csv_mount_path"]}/{config["csv"]["s3_images_csv"]}',
        data_download_path: str = config['data']['mount_download_path'],
        results_upload_path: str = config['aws']['results_upload_path'],
        download_data: bool = config['data']['download_data'],
        batch_size: int = config['data']['batch_size'],
        img_dimensions: int = config['data']['img_dimensions'],
        train_pct: float = config['data']['train_pct'],
        val_pct: float = config['data']['val_pct'],
        test_pct: float = config['data']['test_pct'],
        epochs: int = config['train']['epochs'],
        lr: list = config['train']['lr'],
        layer_size: list = config['train']['layer_size'],
        hyper_param_runs: int = config['train']['hyper_param_runs'],
        device: str = config['train']['device'],
        debug_logs: bool = config['project']['debug_logs']):

    # Download Data from S3
    inputs = {
        "bucket_name": bucket_name,
        "s3_images_csv": s3_images_csv,
        "data_download_path": data_download_path,
        "download_data": download_data
    }
    download_s3 = funcs['download-s3'].as_step(
        handler="handler",
        inputs=inputs,
        outputs=["s3_image_csv_local", "data_download_path"],
        verbose=debug_logs)

    # Prep Data
    inputs = {
        "data_download_path": download_s3.outputs['data_download_path'],
        "batch_size": batch_size,
        "img_dimensions": img_dimensions,
        "train_pct": train_pct,
        "val_pct": val_pct,
        "test_pct": test_pct
    }
    prep_data = funcs['prep-data'].as_step(handler="handler",
                                           inputs=inputs,
                                           outputs=[
                                               "train_data_loader",
                                               "validation_data_loader",
                                               "test_data_loader"
                                           ],
                                           verbose=debug_logs)

    # Train Model
    inputs = {
        "train_data_loader": prep_data.outputs["train_data_loader"],
        "validation_data_loader": prep_data.outputs["train_data_loader"],
        "epochs": epochs,
        "batch_size": batch_size,
        "device": device
    }
    hyper_params = {
        'lr': lr,
        "layer_size": layer_size,
        "MAX_EVALS": hyper_param_runs
    }
    train_model = funcs['train-model'].as_step(
        handler="handler",
        inputs=inputs,
        hyperparams=hyper_params,
        runspec=NewTask(tuning_strategy="random"),
        selector="max.validation_accuracy",
        outputs=["model"],
        verbose=debug_logs)

    # Evaluate Model
    inputs = {
        "test_data_loader": prep_data.outputs["test_data_loader"],
        "model": train_model.outputs["model"],
        "device": device
    }
    eval_model = funcs['eval-model'].as_step(handler="handler",
                                             inputs=inputs,
                                             verbose=debug_logs)

    # Deploy Model
    env = {
        "model_url": train_model.outputs["model"],
        "device": device,
        "img_dimensions": img_dimensions
    }
    deploy = funcs['deploy-model'].deploy_step(env=env)

    # Upload Model/Metrics to S3
    inputs = {
        "model": train_model.outputs["model"],
        "bucket_name": bucket_name,
        "results_upload_path": results_upload_path
    }
    upload_s3 = funcs['upload-s3'].as_step(handler="handler",
                                           inputs=inputs,
                                           verbose=debug_logs)
    upload_s3.after(eval_model)