def test_dask_local_hyper(): task = NewTask().with_hyper_params({'p1': [5, 2, 3]}, 'max.accuracy') spec = tag_test(task, 'test_dask_local_hyper') run = new_function(command='dask://').run(spec, handler=my_func) verify_state(run) assert len(run.status.iterations) == 3 + 1, 'hyper parameters test failed' pprint(run.to_dict())
def test_local_py(): file_path = f'{examples_path}/training.py' mod = function_to_module(file_path) task = NewTask(inputs={'infile.txt': f'{examples_path}/infile.txt'}) context = get_or_create_ctx('myfunc', spec=task) mod.my_job(context, p1=2, p2='x') assert context.results['accuracy'] == 4, 'failed to run'
def kfpipeline(): exit_task = NewTask(handler='run_summary_comment') exit_task.with_params(workflow_id='{{workflow.uid}}', repo=this_project.params.get('git_repo'), issue=this_project.params.get('git_issue')) exit_task.with_secrets( 'inline', {'GITHUB_TOKEN': this_project.get_secret('GITHUB_TOKEN')}) with dsl.ExitHandler(funcs['git_utils'].as_step(exit_task, name='exit-handler')): # run the ingestion function with the new image and params ingest = funcs['gen-iris'].as_step(name="get-data", handler='iris_generator', params={'format': 'pq'}, outputs=[DATASET]) # train with hyper-paremeters train = funcs["train"].as_step( name="train", params={ "sample": -1, "label_column": LABELS, "test_size": 0.10 }, hyperparams={ 'model_pkg_class': [ "sklearn.ensemble.RandomForestClassifier", "sklearn.linear_model.LogisticRegression", "sklearn.ensemble.AdaBoostClassifier" ] }, selector='max.accuracy', inputs={"dataset": ingest.outputs[DATASET]}, labels={"commit": this_project.params.get('commit', '')}, outputs=['model', 'test_set']) # test and visualize our model test = funcs["test"].as_step(name="test", params={"label_column": LABELS}, inputs={ "models_path": train.outputs['model'], "test_set": train.outputs['test_set'] }) # deploy our model as a serverless function deploy = funcs["serving"].deploy_step( models={f"{DATASET}_v1": train.outputs['model']}, tag=this_project.params.get('commit', 'v1')[:6]) # test out new model server (via REST API calls) tester = funcs["live_tester"].as_step( name='model-tester', params={ 'addr': deploy.outputs['endpoint'], 'model': f"{DATASET}_v1" }, inputs={'table': train.outputs['test_set']})
# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from os import path import pytest from tests.conftest import (examples_path, has_secrets, here, out_path, tag_test, verify_state) from mlrun import NewTask, run_local, code_to_function base_spec = NewTask(params={'p1': 8}, out_path=out_path) base_spec.spec.inputs = {'infile.txt': 'infile.txt'} def test_run_local(): spec = tag_test(base_spec, 'test_run_local') result = run_local(spec, command='{}/training.py'.format(examples_path), workdir=examples_path) verify_state(result) def test_run_local_handler(): spec = tag_test(base_spec, 'test_run_local_handler') spec.spec.handler = 'my_func' result = run_local(spec,
verify_state) from mlrun import NewTask, get_run_db, new_function def my_func(context, p1=1, p2='a-string'): print(f'Run: {context.name} (uid={context.uid})') print(f'Params: p1={p1}, p2={p2}\n') print('file\n{}\n'.format(context.get_input('infile.txt').get())) context.log_result('accuracy', p1 * 2) context.logger.info('some info') context.log_metric('loss', 7) context.log_artifact('chart', body='abc') base_spec = NewTask(params={'p1': 8}, out_path=out_path) base_spec.spec.inputs = {'infile.txt': 'infile.txt'} s3_spec = base_spec.copy().with_secrets('file', 'secrets.txt') s3_spec.spec.inputs = {'infile.txt': 's3://yarons-tests/infile.txt'} def test_noparams(): result = new_function().run(handler=my_func) assert result.output('accuracy') == 2, 'failed to run' assert result.status.artifacts[0].get('key') == 'chart', 'failed to run' def test_with_params(): spec = tag_test(base_spec, 'test_with_params')
def test_dask_local(): spec = tag_test(NewTask(params={'p1': 3, 'p2': 'vv'}), 'test_dask_local') run = new_function(kind='dask').run(spec, handler=my_func) verify_state(run)
out = mlconf.artifact_path or path.abspath('./data') # {{run.uid}} will be substituted with the run id, so output will be written to different directoried per run artifact_path = path.join(out, '{{run.uid}}') # #### _running and linking multiple tasks_ # In this example we run two functions, ```training``` and ```validation``` and we pass the result from one to the other. # We will see in the ```job``` example that linking works even when the tasks are run in a workflow on different processes or containers. # ```run_local()``` will run our task on a local function: # Run the training function. Functions can have multiple handlers/methods, here we call the ```training``` handler: # In[ ]: train_run = run_local( NewTask(handler=training, params={'p1': 5}, artifact_path=out)) # After the function runs it generates the result widget, you can click the `model` artifact to see its content. # In[ ]: train_run.outputs # The output from the first training function is passed to the validation function, let's run it: # In[ ]: model_path = train_run.outputs['model'] validation_run = run_local( NewTask(handler=validation,
# See the License for the specific language governing permissions and # limitations under the License. import csv from os import listdir from tempfile import mktemp import yaml from conftest import out_path from mlrun.artifacts import ChartArtifact, TableArtifact from mlrun import NewTask, new_function run_spec = NewTask( params={'p1': 5}, out_path=out_path, outputs=['model.txt', 'chart.html', 'iteration_results']).set_label('tests', 'kfp') def my_job(context, p1=1, p2='a-string'): # access input metadata, values, files, and secrets (passwords) print(f'Run: {context.name} (uid={context.uid})') print(f'Params: p1={p1}, p2={p2}') print('accesskey = {}'.format(context.get_secret('ACCESS_KEY'))) print('file\n{}\n'.format(context.get_input('infile.txt').get())) # RUN some useful code e.g. ML training, data prep, etc. # log scalar result values (job result metrics) context.log_result('accuracy', p1 * 2)
# Setup Project project_name = os.getenv("MLRUN_PROJECT_NAME") project_path = "./hadoop_hack" skproj = new_project(name=project_name, context=project_path) artifact_path = os.getenv("MLRUN_ARTIFACT_PATH") mlconf.dbpath = os.getenv("MLRUN_DBPATH") print(f"Project name: {project_name}") print(f"Artifacts path: {artifact_path}\nMLRun DB path: {mlconf.dbpath}") # Setup task task = NewTask( name="hdfs-submit", project=os.getenv("MLRUN_PROJECT_NAME"), params={ "hdfs_cmd": "hadoop fs -ls", "shell_pod_name": "nick-shell" }, handler="hdfs_submit", artifact_path=artifact_path, ) # Create MLRun job fn = code_to_function( name="hdfs-submit", filename=f"{project_path}/hadoop.py", handler="hdfs_submit", kind="job", ) fn.spec.service_account = "mlrun-api" fn.spec.build.image = "mlrun/mlrun"
from mlrun import run_local, RunTemplate, NewTask, mlconf from os import path mlconf.dbpath = mlconf.dbpath or './' out = mlconf.artifact_path or path.abspath('./data') # {{run.uid}} will be substituted with the run id, so output will be written to different directoried per run artifact_path = path.join(out, '{{run.uid}}') task = NewTask(name='demo', params={ 'p1': 5 }, artifact_path=artifact_path).with_secrets('file', 'secrets.txt').set_label( 'type', 'demo') # run our task using our new function run_object = run_local(task, command='training.py') run_object.uid() run_object.to_dict() run_object.state() run_object.show() run_object.outputs run_object.logs() run_object.artifact('dataset')
from conftest import ( examples_path, has_secrets, here, out_path, tag_test, verify_state ) from mlrun import NewTask, run_local, code_to_function from mlrun import NewTask, get_run_db, new_function base_spec = NewTask(params={'artifacts_path': 'faces/artifacts/', 'frames_url': "https://framesd.default-tenant.app.vmdev22.lab.iguazeng.com", 'token': '4c76b197-713f-4e2f-8d72-48a46b2c053b', 'models_path': '../notebooks/functions/models.py', 'encodings_path': 'avia/encodings7'}, out_path=out_path ) def test_encode_images(): spec = tag_test(base_spec, 'test_run_local_encode_images') result = run_local(spec, command='../notebooks/functions/encode_images.py', workdir='./', artifact_path='./faces/artifacts') verify_state(result) def test_train(): spec = tag_test(base_spec, 'test_run_local_train') result = run_local(spec, command='../notebooks/functions/train.py', workdir='./', artifact_path='./faces/artifacts') verify_state(result)
# Setup Project project_name = os.getenv("MLRUN_PROJECT_NAME") project_path = "./python_to_mlrun" skproj = new_project(name=project_name, context=project_path) artifact_path = os.getenv("MLRUN_ARTIFACT_PATH") mlconf.dbpath = os.getenv("MLRUN_DBPATH") print(f"Project name: {project_name}") print(f"Artifacts path: {artifact_path}\nMLRun DB path: {mlconf.dbpath}") # Setup task inputs = {"data": "https://datahub.io/machine-learning/iris/r/iris.csv"} task = NewTask( name="training-demo", project=project_name, handler="handler", artifact_path=artifact_path, inputs=inputs, ) # Create MLRun job fn = code_to_function( name="train_model", filename=f"{project_path}/training_gregory.py", handler="handler", kind="job", ) fn.spec.build.image = "mlrun/mlrun" fn.apply(mount_v3io()) fn.deploy()
def kfpipeline( bucket_name: str = config['aws']['bucket_name'], s3_images_csv: str = f'{config["csv"]["s3_images_csv_mount_path"]}/{config["csv"]["s3_images_csv"]}', data_download_path: str = config['data']['mount_download_path'], results_upload_path: str = config['aws']['results_upload_path'], download_data: bool = config['data']['download_data'], batch_size: int = config['data']['batch_size'], img_dimensions: int = config['data']['img_dimensions'], train_pct: float = config['data']['train_pct'], val_pct: float = config['data']['val_pct'], test_pct: float = config['data']['test_pct'], epochs: int = config['train']['epochs'], lr: list = config['train']['lr'], layer_size: list = config['train']['layer_size'], hyper_param_runs: int = config['train']['hyper_param_runs'], device: str = config['train']['device'], debug_logs: bool = config['project']['debug_logs']): # Download Data from S3 inputs = { "bucket_name": bucket_name, "s3_images_csv": s3_images_csv, "data_download_path": data_download_path, "download_data": download_data } download_s3 = funcs['download-s3'].as_step( handler="handler", inputs=inputs, outputs=["s3_image_csv_local", "data_download_path"], verbose=debug_logs) # Prep Data inputs = { "data_download_path": download_s3.outputs['data_download_path'], "batch_size": batch_size, "img_dimensions": img_dimensions, "train_pct": train_pct, "val_pct": val_pct, "test_pct": test_pct } prep_data = funcs['prep-data'].as_step(handler="handler", inputs=inputs, outputs=[ "train_data_loader", "validation_data_loader", "test_data_loader" ], verbose=debug_logs) # Train Model inputs = { "train_data_loader": prep_data.outputs["train_data_loader"], "validation_data_loader": prep_data.outputs["train_data_loader"], "epochs": epochs, "batch_size": batch_size, "device": device } hyper_params = { 'lr': lr, "layer_size": layer_size, "MAX_EVALS": hyper_param_runs } train_model = funcs['train-model'].as_step( handler="handler", inputs=inputs, hyperparams=hyper_params, runspec=NewTask(tuning_strategy="random"), selector="max.validation_accuracy", outputs=["model"], verbose=debug_logs) # Evaluate Model inputs = { "test_data_loader": prep_data.outputs["test_data_loader"], "model": train_model.outputs["model"], "device": device } eval_model = funcs['eval-model'].as_step(handler="handler", inputs=inputs, verbose=debug_logs) # Deploy Model env = { "model_url": train_model.outputs["model"], "device": device, "img_dimensions": img_dimensions } deploy = funcs['deploy-model'].deploy_step(env=env) # Upload Model/Metrics to S3 inputs = { "model": train_model.outputs["model"], "bucket_name": bucket_name, "results_upload_path": results_upload_path } upload_s3 = funcs['upload-s3'].as_step(handler="handler", inputs=inputs, verbose=debug_logs) upload_s3.after(eval_model)