def _assert_compiled_pipeline_equals_golden( self, kfp_compiler: compiler.Compiler, pipeline_func: Callable, golden_yaml_filename: str): compiled_file = os.path.join(tempfile.mkdtemp(), 'workflow.yaml') kfp_compiler.compile(pipeline_func, package_path=compiled_file) test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata') golden_file = os.path.join(test_data_dir, golden_yaml_filename) # Uncomment the following to update goldens. # TODO: place this behind some --update_goldens flag. # kfp_compiler.compile(pipeline_func, package_path=golden_file) with open(golden_file, 'r') as f: golden = yaml.safe_load(f) with open(compiled_file, 'r') as f: compiled = yaml.safe_load(f) for workflow in golden, compiled: del workflow['metadata'] for template in workflow['spec']['templates']: template.pop('metadata', None) self.maxDiff = None self.assertDictEqual(golden, compiled)
def _assert_compiled_pipeline_equals_golden( self, kfp_compiler: compiler.Compiler, pipeline_func: Callable, golden_yaml_filename: str): compiled_file = os.path.join(tempfile.mkdtemp(), 'workflow.yaml') kfp_compiler.compile(pipeline_func, package_path=compiled_file) test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata') golden_file = os.path.join(test_data_dir, golden_yaml_filename) # Uncomment the following to update goldens. # TODO: place this behind some --update_goldens flag. # kfp_compiler.compile(pipeline_func, package_path=golden_file) with open(golden_file, 'r') as f: golden = yaml.safe_load(f) with open(compiled_file, 'r') as f: compiled = yaml.safe_load(f) for workflow in golden, compiled: del workflow['metadata'] for template in workflow['spec']['templates']: template.pop('metadata', None) if 'initContainers' not in template: continue # Strip off the launcher image label before comparison for initContainer in template['initContainers']: initContainer['image'] = initContainer['image'].split( ':')[0] self.maxDiff = None self.assertDictEqual(golden, compiled)
def test__resolve_task_pipeline_param(self): p = PipelineParam(name='param2') resolved = Compiler._resolve_task_pipeline_param(p, group_type=None) self.assertEqual(resolved, "{{workflow.parameters.param2}}") p = PipelineParam(name='param1', op_name='op1') resolved = Compiler._resolve_task_pipeline_param(p, group_type=None) self.assertEqual(resolved, "{{tasks.op1.outputs.parameters.op1-param1}}") p = PipelineParam(name='param1', op_name='op1') resolved = Compiler._resolve_task_pipeline_param(p, group_type="subgraph") self.assertEqual(resolved, "{{inputs.parameters.op1-param1}}")
def _upload_pipeline_version(self, pipeline_func, pipeline_id, pipeline_name): version_name = f"{_clean_name(pipeline_name)}-{uuid.uuid4()}"[:100] with NamedTemporaryFile(suffix=".yaml") as f: Compiler().compile(pipeline_func, f.name) return self.client.pipeline_uploads.upload_pipeline_version( f.name, name=version_name, pipelineid=pipeline_id).id
def compile(self, pipeline, image, output, image_pull_policy="IfNotPresent"): Compiler().compile( self.generate_pipeline(pipeline, image, image_pull_policy), output) self.log.info("Generated pipeline definition was saved to %s" % output)
def _assert_compiled_pipeline_equals_golden(self, kfp_compiler: compiler.Compiler, pipeline_func: Callable, golden_yaml_filename: str): compiled_file = os.path.join(tempfile.mkdtemp(), 'workflow.yaml') kfp_compiler.compile(pipeline_func, package_path=compiled_file) test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata') golden_file = os.path.join(test_data_dir, golden_yaml_filename) def _load_compiled_template(filename: str) -> Dict: with open(filename, 'r') as f: workflow = yaml.safe_load(f) del workflow['metadata'] for template in workflow['spec']['templates']: template.pop('metadata', None) if 'initContainers' not in template: continue # Strip off the launcher image label before comparison for initContainer in template['initContainers']: initContainer['image'] = initContainer['image'].split( ':')[0] if 'container' in template: template['container'] = json.loads( re.sub("'kfp==(\d+).(\d+).(\d+)'", 'kfp', json.dumps(template['container']))) return workflow golden = _load_compiled_template(golden_file) compiled = _load_compiled_template(compiled_file) # Devs can run the following command to update golden files: # UPDATE_GOLDENS=True python3 -m unittest kfp/compiler/v2_compatible_compiler_test.py # If UPDATE_GOLDENS=True, and the diff is # different, update the golden file and reload it. update_goldens = os.environ.get('UPDATE_GOLDENS', False) if golden != compiled and update_goldens: kfp_compiler.compile(pipeline_func, package_path=golden_file) golden = _load_compiled_template(golden_file) self.assertDictEqual(golden, compiled)
def _upload_pipeline(self, pipeline_func): with NamedTemporaryFile(suffix=".yaml") as f: Compiler().compile(pipeline_func, f.name) pipeline = self.client.pipeline_uploads.upload_pipeline( f.name, name=self.project_name, description=self.pipeline_description, _request_timeout=10000, ) return (pipeline.id, pipeline.default_version.id)
def compile_pipeline(pipeline_source, pipeline_name): """Read in the generated python script and compile it to a KFP package.""" # create a tmp folder tmp_dir = tempfile.mkdtemp() # copy generated script to temp dir copyfile(pipeline_source, tmp_dir + '/' + "pipeline_code.py") path = tmp_dir + '/' + 'pipeline_code.py' spec = importlib.util.spec_from_file_location(tmp_dir.split('/')[-1], path) foo = importlib.util.module_from_spec(spec) spec.loader.exec_module(foo) # path to generated pipeline package pipeline_package = pipeline_name + '.pipeline.tar.gz' Compiler().compile(foo.auto_generated_pipeline, pipeline_package) return pipeline_package
def _get_workflow(self): """ Calculate the Argo workflow from the execution of the Pipeline function """ # Go and compile the workflow, which will mean executing our # pipeline function. We store a global reference to this pipeline # while we are compiling to allow us to easily bind the pipeline # to the `HmlContainerOp`, without damaging the re-usabulity of the # op. _pipeline_enter(self) workflow = Compiler()._create_workflow(self.pipeline_func) # print("WORKFLOW ->") # print(yaml.dump(workflow)) # print("<- WORKFLOW") _pipeline_exit() return workflow
pvc_path=pvc_path, vol=train_model_op_task.pvolume, TAG=TAG) from scripts2.D99docker_setup.push_all_images import push_all_images TAG = "test30" repos = { "get_data_image": r"C:\Users\nicog\Documents\rs-thesis\Code\Movie_Recommender\scripts2\D01get_data", "train_model_image": r"C:\Users\nicog\Documents\rs-thesis\Code\Movie_Recommender\scripts2\D03train_model", "prepare_data_image": r"C:\Users\nicog\Documents\rs-thesis\Code\Movie_Recommender\scripts2\D02prepare_data", "evaluate_model_image": r"C:\Users\nicog\Documents\rs-thesis\Code\Movie_Recommender\scripts2\D05_evaluate_model", } #push_all_images(origtag=TAG, repos=repos) #compiling the created pipeline pipelineConfig = dsl.PipelineConf() pipelineConfig.set_image_pull_policy("Always") print(pipelineConfig.image_pull_policy) #pipelineConfig.add_op_transformer(gcp.use_gcp_secret('user-gcp-sa')) Compiler().compile(train_recommender_model_pipeline, 'train_modell_pipeline3.zip', pipeline_conf=pipelineConfig)
import lady_gaga def my_func(): lady_gaga.my_func_impl() def next_func(a): lady_gaga.next_func_impl() my_op = components.func_to_container_op(my_func) next_op = components.func_to_container_op(next_func) @dsl.pipeline( name='simple pipeline', description='A trainer that does end-to-end distributed training for XGBoost models.' ) def my_pipeline(): my_task = my_op() next_task = next_op("111") next_task.after(my_task) if __name__ == '__main__': compiler = Compiler() compiler.compile(my_pipeline, "hallo.zip") # client = kfp.Client() kfp.Client().create_run_from_pipeline_func(my_pipeline, arguments=None)
res18_confusion = storage_op(get_confusion, fit_res18.output) res_confusion = storage_op(get_confusion, fit_res.output) squeeze_confusion.after(fit_squeeze) res18_confusion.after(fit_res18) res_confusion.after(fit_res) # save the best model for later deployment models = np.array( [SQUEEZE_FILE_PATH, RES18_FILE_PATH, RESNET_FILE_PATH]) best = np.argmax( [squeeze_acc.output, res18_acc.output, res_acc.output]) globals()['DEPLOYED_MODEL'] = models[best] if __name__ == '__main__': Compiler().compile(pipeline) def markdown_metadata(result: str) -> str: return json.dumps({ 'outputs': [{ 'type': 'markdown', 'source': 'The result: %s' % result, 'storage': 'inline', }] }) def demo_op(name: str, metadata=markdown_metadata, is_exit_handler=False) -> ContainerOp:
def compile_pipeline(name: str): Compiler().compile( pipeline_func=get_pipeline(name=name), package_path=f"../data/11_kubeflow_files/{name}.yaml", )
import kfp from kfp import dsl from kfp.compiler import Compiler #sb_op = kfp.components.load_component_from_url("https://raw.githubusercontent.com/demotto/my-pipeline/master/expr001/component.yaml") import requests url = "https://raw.githubusercontent.com/demotto/my-pipeline/master/expr001/component.yaml" resp = requests.get(url) sb_op = kfp.components.load_component_from_text(resp.text) @dsl.pipeline( name='simple pipeline', description='A trainer that does end-to-end distributed training for XGBoost models.' ) def my_pipeline(): my_task = sb_op( input1="xxxxxxxxxx", input2="oooooooooo" ) if __name__ == '__main__': compiler = Compiler() compiler.compile(my_pipeline, "my_brick.yaml")
def _upload_pipeline(self, pipeline_func, pipeline_name): with NamedTemporaryFile(suffix=".yaml") as f: Compiler().compile(pipeline_func, f.name) pipeline = self.client.pipeline_uploads.upload_pipeline( f.name, name=pipeline_name) return (pipeline.id, pipeline.default_version.id)
def run(): logger.info("Building pipeline into {}", Pipeline.FILE) Compiler().compile(pipeline_func=Pipeline.__run, package_path=Pipeline.FILE)
from kfp.dsl import pipeline # defining pipeline meta @pipeline(name='Calculate Average', description='This pipeline calculates average') # stitch the steps def train_recommender_modell(): # importing container operation import kfp.dsl as dsl step_1 = dsl.ContainerOp( name='get_data', # name of the operation image='rsthesis/get_data_image:latest', #docker location in registry #arguments = [context], # passing context as argument file_outputs={ 'context': '/output.txt' #name of the file with result }) step_2 = dsl.ContainerOp( name='get_data', # name of operation image='rsthesis/get_data_image:latest', #docker location in registry arguments=step_1.output, # passing step_1.output as argument file_outputs={ 'context': '/output.txt' #name of the file with result }) #importing KFP compiler from kfp.compiler import Compiler #compiling the created pipeline Compiler().compile(train_recommender_modell, 'train_modell_pipeline.zip')
#3 print data @dsl.pipeline(name="hello world pipeline", description="demo pipe") def hello_word(): vop = dsl.VolumeOp(name="create_pvc", resource_name="my-pvc", size="2Gi", modes=dsl.VOLUME_MODE_RWM) step1 = dsl.ContainerOp(name="download", image="google/cloud-sdk:295.0.0-slim", command=["gsutil", "cp", "-r"], arguments=["gs://raw_movie_data", "/mnt"], pvolumes={"/mnt": vop.volume}) step2 = dsl.ContainerOp(name="step2", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["ls", "/mnt"], pvolumes={"/mnt": step1.pvolume}) step3 = dsl.ContainerOp(name="step3", image="library/bash:4.4.23", command=[ "cat", "/mnt/raw_movie_data/links.csv", "/mnt/raw_movie_data/ratings.csv" ], pvolumes={"/mnt": step2.pvolume}) Compiler().compile(hello_word, 'volume_check.zip')