def soojin_pipeline(): add_p = dsl.ContainerOp(name="load iris data pipeline", image="lsjsj92/soojin-iris-load:0.7", arguments=['--data_path', './Iris.csv'], file_outputs={'iris': '/iris.csv'}) train_and_eval = dsl.ContainerOp( name="training pipeline", image="lsjsj92/soojin-iris-train_and_eval:0.4", arguments=['--data', add_p.outputs['iris']], file_outputs={ 'accuracy': '/accuracy.json', 'mlpipeline-metrics': '/mlpipeline-metrics.json' }) train_and_eval.after(add_p) baseline = 0.7 with dsl.Condition( train_and_eval.outputs['accuracy'] > baseline) as check_condition: print_op( f"accuracy는 {train_and_eval.outputs['accuracy']}로 accuracy baseline인 {baseline}보다 큽니다!" ) with dsl.Condition( train_and_eval.outputs['accuracy'] < baseline) as check_condition: print_op( f"accuracy는 {train_and_eval.outputs['accuracy']}로 accuracy baseline인 {baseline}보다 작습니다." )
def my_pipeline(text_parameter: str = json.dumps([ {'p_a': -1, 'p_b': 'hello'}, {'p_a': 2, 'p_b': 'halo'}, {'p_a': 3, 'p_b': 'ni hao'}, ], sort_keys=True)): flip1 = flip_coin_op() with dsl.Condition(flip1.output != 'no-such-result'): # always true args_generator = args_generator_op() with dsl.ParallelFor(args_generator.output) as item: print_op(text_parameter) with dsl.Condition(flip1.output == 'heads'): print_op(item.A_a) with dsl.Condition(flip1.output == 'tails'): print_op(item.B_b) with dsl.Condition(item.A_a == '1'): with dsl.ParallelFor([{'a':'-1'}, {'a':'-2'}]) as item: print_op(item) with dsl.ParallelFor(text_parameter) as item: with dsl.Condition(item.p_a > 0): print_op(item.p_a) print_op(item.p_b)
def train_test_knn(): load_op = func_to_container_op(load_data, base_image='qiuosier/sklearn') split_op = func_to_container_op(split_data, base_image='qiuosier/sklearn') test_op = func_to_container_op(test_model, base_image='qiuosier/sklearn') train_knn_op = func_to_container_op(train_knn, base_image='qiuosier/sklearn') train_logistics_op = func_to_container_op(train_logistics, base_image='qiuosier/sklearn') load_task = load_op() split_task = split_op(load_task.outputs['x'], load_task.outputs['y']) split_task.execution_options.caching_strategy.max_cache_staleness = "P0D" p = select_classifier(0, 9) p.execution_options.caching_strategy.max_cache_staleness = "P0D" with dsl.Condition(p.output >= 5): train_task = train_knn_op(split_task.outputs['train_x'], split_task.outputs['train_y']) test_op(split_task.outputs['test_x'], split_task.outputs['test_y'], train_task.output) with dsl.Condition(p.output < 5): train_task = train_logistics_op(split_task.outputs['train_x'], split_task.outputs['train_y']) test_op(split_task.outputs['test_x'], split_task.outputs['test_y'], train_task.output)
def parameters_pipeline(predict: str = 'heads'): flip = flip_coin_op() with dsl.Condition(flip.output == predict): print_op('YOU WIN') with dsl.Condition(flip.output != predict): print_op('YOU LOSE')
def det_train_pipeline( detmaster, mlrepo="https://github.com/determined-ai/determined.git", branch="0.13.0", config="examples/official/trial/mnist_pytorch/const.yaml", context="examples/official/trial/mnist_pytorch/", model_name="mnist-prod", deployment_name="mnist-prod-kf", deployment_namespace="david", image="davidhershey/seldon-mnist:1.6" ): volume_op = dsl.VolumeOp( name="create pipeline volume", resource_name="mlrepo-pvc", modes=["ReadWriteOnce"], size="3Gi", ) clone = clone_mlrepo(mlrepo, branch, volume_op.volume) train = ( run_det_and_wait_op(detmaster, config, context) .add_pvolumes({"/src/": clone.pvolume}) .after(clone) ) decide = decide_op(detmaster, train.output, model_name) with dsl.Condition(decide.output == True, name="Deploy"): deploy = create_seldon_op( detmaster, deployment_name, deployment_namespace, model_name, image, ) with dsl.Condition(decide.output == False, name="No-Deploy"): print_op('Model Not Deployed -- Performance was not better than previous version')
def condition_pipeline(): flip = flip_coin_op() with dsl.Condition(flip.output == 'heads'): print_op('YOUT WIN') with dsl.Condition(flip.output == 'tails'): print_op('YOU LOSE')
def conditions_with_global_params(n1='5', threshold='10', lower_bound='15'): add_numbers_task = add_numbers(n1, lower_bound) print_number_task = print_number(add_numbers_task.output) with dsl.Condition(print_number_task.output > threshold): notify_success() with dsl.Condition(print_number_task.output <= threshold): notify_failure()
def nested_condition_test(a: int, b: int, c: int): op = CEL_Exprs( ab=f"{a} < {b}", bc=f"{b} < {c}", ) with dsl.Condition(op.outputs["ab"] == 'true'): with dsl.Condition(op.outputs["bc"] == 'true'): print_op = PrintOp(f"{a} < {b} < {c}")
def flipcoin_pipeline(): flip = flip_coin_op() cel_condition = CEL_ConditionOp("'%s' != 'hey'" % flip.output) with dsl.Condition(cel_condition.output == 'true'): random_num_head = random_num_op(6, 9) cel_condition_2 = CEL_ConditionOp("%s > 5" % random_num_head.output) with dsl.Condition(cel_condition_2.output == 'true'): print_op('heads and %s > 5!' % random_num_head.output)
def conditional_pipeline(): number = get_random_int_op(0, 100).output with dsl.Condition(number < 10): process_small_op(number) with dsl.Condition(number > 10 and number < 50): process_medium_op(number) with dsl.Condition(number > 50): process_large_op(number)
def conditions_and_loops(n='3', threshold='20'): produce_numbers_task = produce_numbers(n) with dsl.ParallelFor(produce_numbers_task.output) as loop_item: add_numbers_task = add_numbers(loop_item, '10') print_number_task = print_number(add_numbers_task.output) with dsl.Condition(print_number_task.output > threshold): notify_success() with dsl.Condition(print_number_task.output <= threshold): notify_failure()
def conditions_with_global_params(n: int = 5, threshold: int = 10, lower_bound: int = 15): add_numbers_task = add_numbers(n, lower_bound) print_number_task = print_number(add_numbers_task.output) with dsl.Condition(print_number_task.output > threshold): notify_success() with dsl.Condition(print_number_task.output <= threshold): notify_failure()
def flipcoin(forced_result1: str = 'heads', forced_result2: str = 'tails'): flip = FlipCoinOp('flip', str(forced_result1)) with dsl.Condition(flip.output == 'heads'): flip2 = FlipCoinOp('flip-again', str(forced_result2)) with dsl.Condition(flip2.output == 'tails'): PrintOp('print1', flip2.output) with dsl.Condition(flip.output == 'tails'): PrintOp('print2', flip.output)
def flipcoin(): flip = FlipCoinOp('flip') with dsl.Condition(flip.output == 'heads'): flip2 = FlipCoinOp('flip-again') with dsl.Condition(flip2.output == 'tails'): PrintOp('print1') with dsl.Condition(flip.output == 'tails'): PrintOp('print2')
def flipcoin(forced_result1: str = 'heads', forced_result2: str = 'tails'): flip = flip_coin_op(str(forced_result1)) with dsl.Condition(flip.outputs['output'] == 'heads') as condition: flip2 = flip_coin_op(str(forced_result2)) with dsl.Condition(flip2.outputs['output'] == 'tails'): print_op(flip2.outputs['output']) with dsl.Condition(flip.outputs['output'] == 'tails') as condition_2: print_op(flip.outputs['output']) print_op('done').after(condition).after(condition_2)
def flip_component(flip_result, maxVal): @dsl._component.graph_component def flip_component_b(flip_result_b, maxVal_b): with dsl.Condition(flip_result_b == 'heads'): print_flip_b = print_op(flip_result_b) flipB = flip_coin_op().after(print_flip_b) flip_component_b(flipB.output, maxVal_b) with dsl.Condition(flip_result == 'heads'): flip_component_b(flip_result, maxVal) with dsl.Condition(flip_result == 'tails'): print_flip = print_op(flip_result) flipA = flip_coin_op().after(print_flip) flip_component(flipA.output, maxVal)
def my_pipeline(): flip1 = flip_coin_op() print_op(flip1.output) flip2 = flip_coin_op() print_op(flip2.output) with dsl.Condition(flip1.output != 'no-such-result'): # always true flip3 = flip_coin_op() print_op(flip3.output) with dsl.Condition(flip2.output == flip3.output): flip4 = flip_coin_op() print_op(flip4.output)
def flipcoin(forced_result1: str = 'heads', forced_result2: str = 'tails'): flip = FlipCoinOp('flip', str(forced_result1)) with dsl.Condition(flip.output == 'heads') as condition: flip2 = FlipCoinOp('flip-again', str(forced_result2)) with dsl.Condition(flip2.output == 'tails'): PrintOp('print1', flip2.output) with dsl.Condition(flip.output == 'tails') as condition_2: PrintOp('print2', flip.output) PrintOp('print3', 'done').after(condition).after(condition_2)
def pipeline(dataset_location='/mnt/data/manipulated_fashion_mnist.csv', test_size=0.3, random_state=42, input_shape_height=28, input_shape_width=28, use_pretrained_model='False', model_units_num=128, model_outputs_num=10, model_activation_func_layer2='relu', model_activation_func_layer3='softmax', optimizer='adam', loss='binary_crossentropy', metrics='accuracy', num_epochs=10, location_prepared_dataset='/mnt/data/prep_fashion_mnist.csv', location_improved_dataset='/mnt/data/impr_fasion_mnist.csv', location_training_images='/mnt/data/train_img.csv', location_training_labels='/mnt/data/train_labels.csv', location_test_images='/mnt/data/test_img.csv', location_test_labels='/mnt/data/test_labels.csv', location_base_model='/mnt/model/base_model.h5', location_trained_model='/mnt/model/trained_model.h5'): data_preparation = data_prep_op(dataset_location, location_prepared_dataset).apply(onprem.mount_pvc("fashion-mnist-vol", 'local-storage', "/mnt")) feature_engineering = feature_eng_op(data_preparation.outputs['output'], location_improved_dataset).apply(onprem.mount_pvc("fashion-mnist-vol", 'local-storage', "/mnt")) data_split = data_split_op(feature_engineering.outputs['output'], test_size, random_state, location_training_images, location_training_labels, location_test_images, location_test_labels).apply(onprem.mount_pvc("fashion-mnist-vol", 'local-storage', "/mnt")) with dsl.Condition(use_pretrained_model == 'True'): model_building = model_download_op(input_shape_height, input_shape_width, location_base_model).apply(onprem.mount_pvc("fashion-mnist-vol", 'local-storage', "/mnt")) model_training = model_train_op(data_split.outputs['train_img'], data_split.outputs['train_label'], input_shape_height, input_shape_width, model_building.outputs['output_model_loc'], num_epochs, location_trained_model).apply(onprem.mount_pvc("fashion-mnist-vol", 'local-storage', "/mnt")) model_evaluation = model_eval_op(data_split.outputs['test_img'], data_split.outputs['test_label'], input_shape_height, input_shape_width, model_training.outputs['output_model_loc'], '/mlpipeline-ui-metadata.json').apply(onprem.mount_pvc("fashion-mnist-vol", 'local-storage', "/mnt")) with dsl.Condition(use_pretrained_model == 'False'): model_building = model_build_op(input_shape_height, input_shape_width, model_units_num, model_outputs_num, model_activation_func_layer2, model_activation_func_layer3, optimizer, loss, metrics, location_base_model).apply(onprem.mount_pvc("fashion-mnist-vol", 'local-storage', "/mnt")) model_training = model_train_op(data_split.outputs['train_img'], data_split.outputs['train_label'], input_shape_height, input_shape_width, model_building.outputs['output_model_loc'], num_epochs, location_trained_model).apply(onprem.mount_pvc("fashion-mnist-vol", 'local-storage', "/mnt")) model_evaluation = model_eval_op(data_split.outputs['test_img'], data_split.outputs['test_label'], input_shape_height, input_shape_width, model_training.outputs['output_model_loc'], '/mlpipeline-ui-metadata.json').apply(onprem.mount_pvc("fashion-mnist-vol", 'local-storage', "/mnt"))
def flipcoin(forced_result1: str = 'heads', forced_result2: str = 'tails', forced_result3: str = 'heads'): flip = flip_coin_op(str(forced_result1)) flip3 = flip_coin_op(str(forced_result3)) with dsl.Condition(flip.outputs['output'] == 'heads'): flip2 = flip_coin_op(str(forced_result2)) with dsl.Condition(flip2.outputs['output'] == 'tails'): with dsl.Condition(flip3.outputs['output'] == 'heads'): print_op(flip2.outputs['output']) with dsl.Condition(flip.output == 'tails'): print_op(flip.outputs['output'])
def bikes_weather_hptune( #pylint: disable=unused-argument tune_epochs: int = 2, train_epochs: int = 5, num_tuners: int = 8, bucket_name: str = 'YOUR_BUCKET_NAME', # used for the HP dirs; don't include the 'gs://' tuner_dir_prefix: str = 'hptest', tuner_proj: str = 'p1', max_trials: int = 128, working_dir: str = 'gs://YOUR/GCS/PATH', # for the full training jobs data_dir: str = 'gs://aju-dev-demos-codelabs/bikes_weather/', steps_per_epoch: int = -1, # if -1, don't override normal calcs based on dataset size num_best_hps: int = 2, # the N best parameter sets for full training # the indices to the best param sets; necessary in addition to the above param because of # how KFP loops work currently. Must be consistent with the above param. num_best_hps_list: list = [0, 1], thresholds: str = '{"root_mean_squared_error": 2000}'): hptune = dsl.ContainerOp( name='ktune', image='gcr.io/google-samples/ml-pipeline-bikes-dep:b97ee76', arguments=[ '--epochs', tune_epochs, '--num-tuners', num_tuners, '--tuner-dir', '%s/%s' % (tuner_dir_prefix, dsl.RUN_ID_PLACEHOLDER), '--tuner-proj', tuner_proj, '--bucket-name', bucket_name, '--max-trials', max_trials, '--namespace', 'default', '--num-best-hps', num_best_hps, '--executions-per-trial', 2, '--deploy' ], file_outputs={'hps': '/tmp/hps.json'}, ) # create TensorBoard viz for the parent directory of all training runs, so that we can # compare them. tb_viz = tb_op(log_dir_uri='%s/%s' % (working_dir, dsl.RUN_ID_PLACEHOLDER)) with dsl.ParallelFor(num_best_hps_list ) as idx: # start the full training runs in parallel train = train_op(data_dir=data_dir, workdir='%s/%s' % (tb_viz.outputs['log_dir_uri'], idx), tb_dir=tb_viz.outputs['log_dir_uri'], epochs=train_epochs, steps_per_epoch=steps_per_epoch, hp_idx=idx, hptune_results=hptune.outputs['hps']) eval_metrics = eval_metrics_op( thresholds=thresholds, metrics=train.outputs['metrics_output_path'], ) with dsl.Condition(eval_metrics.outputs['deploy'] == 'deploy'): serve = serve_op(model_path=train.outputs['train_output_path'], model_name='bikesw', namespace='default') train.set_gpu_limit(2)
def kfpipeline(model_pkg_class=default_pkg_class, build=0): # if build=True, build the function image before the run with dsl.Condition(build == 1) as build_cond: funcs["prep-data"].deploy_step() # run a local data prep function prep_data = (funcs["prep-data"].as_step( name="prep_data", inputs={ "source_url": project.get_artifact_uri("data") }, outputs=["cleaned_data"], ).after(build_cond)) # train the model using a library (hub://) function and the generated data train = funcs["train"].as_step( name="train", inputs={"dataset": prep_data.outputs["cleaned_data"]}, params={ "model_pkg_class": model_pkg_class, "label_column": project.get_param("label", "label"), }, outputs=["model", "test_set"], ) # test the model using a library (hub://) function and the generated model funcs["test"].as_step( name="test", params={"label_column": "label"}, inputs={ "models_path": train.outputs["model"], "test_set": train.outputs["test_set"], }, )
def train_until_low_error(starting_model, training_data, true_values): # Training model = xgboost_train_on_csv_op( training_data=training_data, starting_model=starting_model, label_column=0, objective='reg:squarederror', num_iterations=50, ).outputs['model'] # Predicting predictions = xgboost_predict_on_csv_op( data=training_data, model=model, label_column=0, ).output # Calculating the regression metrics metrics_task = calculate_regression_metrics_from_csv_op( true_values=true_values, predicted_values=predictions, ) # Checking the metrics with dsl.Condition(metrics_task.outputs['mean_squared_error'] > 0.01): # Training some more train_until_low_error( starting_model=model, training_data=training_data, true_values=true_values, )
def flip_component(flip_result, maxVal, my_pipe_param): with dsl.Condition(flip_result == 'heads'): print_flip = print_op(flip_result) flipA = flip_coin_op().after(print_flip) loop_args = [{'a': 1, 'b': 2}, {'a': 10, 'b': 20}] with dsl.ParallelFor(loop_args) as item: op1 = dsl.ContainerOp( name="my-in-coop1", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo op1 %s %s" % (item.a, my_pipe_param)], ) with dsl.ParallelFor([100, 200, 300]) as inner_item: op11 = dsl.ContainerOp( name="my-inner-inner-coop", image="library/bash:4.4.23", command=["sh", "-c"], arguments=[ "echo op1 %s %s %s" % (item.a, inner_item, my_pipe_param) ], ) op2 = dsl.ContainerOp( name="my-in-coop2", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo op2 %s" % item.b], ) flip_component(flipA.output, maxVal, my_pipe_param)
def pipeline(): op0 = dsl.ContainerOp( name="my-out-cop0", image='python:alpine3.6', command=["sh", "-c"], arguments=[ 'python -c "import json; import sys; json.dump([{\'a\': 1, \'b\': 2}, {\'a\': 10, \'b\': 20}], open(\'/tmp/out.json\', \'w\'))"' ], file_outputs={'out': '/tmp/out.json'}, ) with dsl.ParallelFor(op0.output) as item: with dsl.Condition(item.a == '1'): op1 = dsl.ContainerOp( name="my-in-cop1", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo do output op1 item.a: %s" % item.a], ) op2 = dsl.ContainerOp( name="my-in-cop1", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo do output op1 item.a: %s" % item.a], ).after(op1) op_out = dsl.ContainerOp( name="my-out-cop2", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo do output op2, outp: %s" % op0.output], )
def gh_summ( #pylint: disable=unused-argument train_steps: 'Integer' = 2019300, project: String = 'YOUR_PROJECT_HERE', github_token: String = 'YOUR_GITHUB_TOKEN_HERE', working_dir: GCSPath = 'gs://YOUR_GCS_DIR_HERE', checkpoint_dir: GCSPath = 'gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000/', deploy_webapp: String = 'true', data_dir: GCSPath = 'gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/' ): copydata = copydata_op( data_dir=data_dir, checkpoint_dir=checkpoint_dir, model_dir='%s/%s/model_output' % (working_dir, dsl.RUN_ID_PLACEHOLDER), action=COPY_ACTION, ).apply(gcp.use_gcp_secret('user-gcp-sa')) log_dataset = metadata_log_op(log_type=DATASET, workspace_name=WORKSPACE_NAME, run_name=dsl.RUN_ID_PLACEHOLDER, data_uri=data_dir) train = train_op(data_dir=data_dir, model_dir=copydata.outputs['copy_output_path'], action=TRAIN_ACTION, train_steps=train_steps, deploy_webapp=deploy_webapp).apply( gcp.use_gcp_secret('user-gcp-sa')) log_model = metadata_log_op(log_type=MODEL, workspace_name=WORKSPACE_NAME, run_name=dsl.RUN_ID_PLACEHOLDER, model_uri=train.outputs['train_output_path']) serve = dsl.ContainerOp( name='serve', image='gcr.io/google-samples/ml-pipeline-kubeflow-tfserve:v2', arguments=[ "--model_name", 'ghsumm-%s' % (dsl.RUN_ID_PLACEHOLDER, ), "--model_path", train.outputs['train_output_path'] ]).apply(gcp.use_gcp_secret('user-gcp-sa')) log_dataset.after(copydata) log_model.after(train) train.set_gpu_limit(1) train.set_memory_limit('48G') with dsl.Condition(train.outputs['launch_server'] == 'true'): webapp = dsl.ContainerOp( name='webapp', image='gcr.io/google-samples/ml-pipeline-webapp-launcher:v3ap', arguments=[ "--model_name", 'ghsumm-%s' % (dsl.RUN_ID_PLACEHOLDER, ), "--github_token", github_token ]) webapp.after(serve)
def my_pipeline(text: str = 'condition test'): flip1 = flip_coin_op().set_caching_options(False) print_op(msg=flip1.output) with dsl.Condition(flip1.output == 'heads'): flip2 = flip_coin_op().set_caching_options(False) print_op(msg=flip2.output) print_op(msg=text)
def pipeline(param: int = 10): loop_args = [1, 2] with dsl.ParallelFor(loop_args, parallelism=1) as item: op1_template = components.load_component_from_text(op1_yaml) op1 = op1_template(item, param) condi_1 = tekton.CEL_ConditionOp(f"{item} == 2").output with dsl.Condition(condi_1 == 'true'): tekton.Break()
def condition(text: str = 'condition test', force_flip_result: str = ''): flip1 = flip_coin(force_flip_result=force_flip_result) print_msg(msg=flip1.output) with dsl.Condition(flip1.output == 'heads'): flip2 = flip_coin() print_msg(msg=flip2.output) print_msg(msg=text)
def my_pipeline(text: str = 'condition test', force_flip_result: str = ''): flip1 = flip_coin_op(force_flip_result) print_op(flip1.output) with dsl.Condition(flip1.output == 'heads'): flip2 = flip_coin_op() print_op(flip2.output) print_op(text)