def my_job(context, p1=1, p2='a-string'): # access input metadata, values, files, and secrets (passwords) print(f'Run: {context.name} (uid={context.uid})') print(f'Params: p1={p1}, p2={p2}') print('accesskey = {}'.format(context.get_secret('ACCESS_KEY'))) print('file\n{}\n'.format(context.get_input('infile.txt').get())) # RUN some useful code e.g. ML training, data prep, etc. # log scalar result values (job result metrics) context.log_result('accuracy', p1 * 2) context.log_result('loss', p1 * 3) # log various types of artifacts (file, web page, table), will be versioned and visible in the UI context.log_artifact('model.txt', body=b'abc is 123') context.log_artifact('results.html', body=b'<b> Some HTML <b>', viewer='web-app') context.log_artifact( TableArtifact('dataset.csv', '1,2,3\n4,5,6\n', viewer='table', header=['A', 'B', 'C'])) # create a chart output (will show in the pipelines UI) chart = ChartArtifact('chart.html') chart.header = ['Epoch', 'Accuracy', 'Loss'] for i in range(1, 8): chart.add_row([i, i / 20 + 0.75, 0.30 - i / 20]) context.log_artifact(chart)
def my_job(): # load MLRUN runtime context (will be set by the runtime framework e.g. KubeFlow) context = get_or_create_ctx('train') # get parameters from the runtime context (or use defaults) p1 = context.get_param('p1', 1) p2 = context.get_param('p2', 'a-string') # access input metadata, values, files, and secrets (passwords) print(f'Run: {context.name} (uid={context.uid})') print(f'Params: p1={p1}, p2={p2}') print('accesskey = {}'.format(context.get_secret('ACCESS_KEY'))) print('file\n{}\n'.format(context.get_input('infile.txt', 'infile.txt').get())) # Run some useful code e.g. ML training, data prep, etc. # log scalar result values (job result metrics) context.log_result('accuracy', p1 * 2) context.log_result('loss', p1 * 3) context.set_label('framework', 'sklearn') # log various types of artifacts (file, web page, table), will be versioned and visible in the UI context.log_artifact('model', body=b'abc is 123', target_path='model.txt', labels={'framework': 'xgboost'}) context.log_artifact('html_result', body=b'<b> Some HTML <b>', target_path='result.html', viewer='web-app') context.log_artifact(TableArtifact('dataset', '1,2,3\n4,5,6\n', viewer='table', header=['A', 'B', 'C']), target_path='dataset.csv') # create a chart output (will show in the pipelines UI) chart = ChartArtifact('chart.html') chart.labels = {'type': 'roc'} chart.header = ['Epoch', 'Accuracy', 'Loss'] for i in range(1, 8): chart.add_row([i, i/20+0.75, 0.30-i/20]) context.log_artifact(chart)
def my_func(ctx): # get parameters from context (or default) p1 = ctx.get_param('p1', 1) p2 = ctx.get_param('p2', 'a-string') # access input metadata, values, and inputs print(f'Run: {ctx.name} (uid={ctx.uid})') print(f'Params: p1={p1}, p2={p2}') print('accesskey = {}'.format(ctx.get_secret('ACCESS_KEY'))) print('file\n{}\n'.format(ctx.get_object('infile.txt').get())) # log scalar values (KFP metrics) ctx.log_result('accuracy', p1 * 2) ctx.log_result('latency', p1 * 3) # log various types of artifacts (and set UI viewers) ctx.log_artifact('test.txt', body=b'abc is 123') ctx.log_artifact('test.html', body=b'<b> Some HTML <b>', viewer='web-app') table = TableArtifact('tbl.csv', '1,2,3\n4,5,6\n', viewer='table', header=['A', 'B', 'C']) ctx.log_artifact(table) chart = ChartArtifact('chart.html') chart.header = ['Hour', 'One', 'Two'] for i in range(1, 4): chart.add_row([i, 1 + 2, 2 * i]) ctx.log_artifact(chart)
def my_job(context, p1=1, p2='x'): # load MLRUN runtime context (will be set by the runtime framework e.g. KubeFlow) # get parameters from the runtime context (or use defaults) # access input metadata, values, files, and secrets (passwords) print(f'Run: {context.name} (uid={context.uid})') print(f'Params: p1={p1}, p2={p2}') print('accesskey = {}'.format(context.get_secret('ACCESS_KEY'))) print('file\n{}\n'.format( context.get_input('infile.txt', 'infile.txt').get())) # Run some useful code e.g. ML training, data prep, etc. # log scalar result values (job result metrics) context.log_result('accuracy', p1 * 2) context.log_result('loss', p1 * 3) context.set_label('framework', 'sklearn') # log various types of artifacts (file, web page, table), will be versioned and visible in the UI context.log_artifact('model', body=b'abc is 123', local_path='model.txt', labels={'framework': 'xgboost'}) context.log_artifact('html_result', body=b'<b> Some HTML <b>', local_path='result.html') context.log_artifact(TableArtifact('dataset', '1,2,3\n4,5,6\n', visible=True, header=['A', 'B', 'C']), local_path='dataset.csv') # create a chart output (will show in the pipelines UI) chart = ChartArtifact('chart') chart.labels = {'type': 'roc'} chart.header = ['Epoch', 'Accuracy', 'Loss'] for i in range(1, 8): chart.add_row([i, i / 20 + 0.75, 0.30 - i / 20]) context.log_artifact(chart) raw_data = { 'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'], 'last_name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze'], 'age': [42, 52, 36, 24, 73], 'testScore': [25, 94, 57, 62, 70] } df = pd.DataFrame(raw_data, columns=['first_name', 'last_name', 'age', 'testScore']) context.log_dataset('mydf', df=df, stats=True) #if __name__ == "__main__": # context = get_or_create_ctx('train') # p1 = context.get_param('p1', 1) # p2 = context.get_param('p2', 'a-string') # my_job(context, p1, p2)
def my_job(context, p1=1, p2="x"): # load MLRUN runtime context (will be set by the runtime framework e.g. KubeFlow) # get parameters from the runtime context (or use defaults) # access input metadata, values, files, and secrets (passwords) print(f"Run: {context.name} (uid={context.uid})") print(f"Params: p1={p1}, p2={p2}") access_key = context.get_secret("ACCESS_KEY") print(f"Access key = {access_key}") input_file = context.get_input("infile.txt", "infile.txt").get() print(f"File\n{input_file}\n") # Run some useful code e.g. ML training, data prep, etc. # log scalar result values (job result metrics) context.log_result("accuracy", p1 * 2) context.log_result("loss", p1 * 3) context.set_label("framework", "sklearn") # log various types of artifacts (file, web page, table), will be versioned and visible in the UI context.log_artifact( "model", body=b"abc is 123", local_path="model.txt", labels={"framework": "xgboost"}, ) context.log_artifact("html_result", body=b"<b> Some HTML <b>", local_path="result.html") # create a chart output (will show in the pipelines UI) chart = ChartArtifact("chart") chart.labels = {"type": "roc"} chart.header = ["Epoch", "Accuracy", "Loss"] for i in range(1, 8): chart.add_row([i, i / 20 + 0.75, 0.30 - i / 20]) context.log_artifact(chart) raw_data = { "first_name": ["Jason", "Molly", "Tina", "Jake", "Amy"], "last_name": ["Miller", "Jacobson", "Ali", "Milner", "Cooze"], "age": [42, 52, 36, 24, 73], "testScore": [25, 94, 57, 62, 70], } df = pd.DataFrame(raw_data, columns=["first_name", "last_name", "age", "testScore"]) context.log_dataset("mydf", df=df, stats=True)
def my_job(context, p1=1, p2='a-string'): # access input metadata, values, files, and secrets (passwords) print(f'Run: {context.name} (uid={context.uid})') print(f'Params: p1={p1}, p2={p2}') print('accesskey = {}'.format(context.get_secret('ACCESS_KEY'))) print('file\n{}\n'.format(context.get_input('infile.txt').get())) # RUN some useful code e.g. ML training, data prep, etc. # log scalar result values (job result metrics) context.log_result('accuracy', p1 * 2) context.log_result('loss', p1 * 3) # log various types of artifacts (file, web page, table), will be # versioned and visible in the UI context.log_artifact('model', body=b'abc is 123', local_path='model.txt') context.log_artifact('results', local_path='results.html', body=b'<b> Some HTML <b>') context.log_artifact( TableArtifact( 'dataset', '1,2,3\n4,5,6\n', format='csv', viewer='table', header=['A', 'B', 'C'], )) # create a chart output (will show in the pipelines UI) chart = ChartArtifact('chart') chart.header = ['Epoch', 'Accuracy', 'Loss'] for i in range(1, 8): chart.add_row([i, i / 20 + 0.75, 0.30 - i / 20]) context.log_artifact(chart) raw_data = { 'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'], 'last_name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze'], 'age': [42, 52, 36, 24, 73], 'postTestScore': [25, 94, 57, 62, 70], } df = pd.DataFrame( raw_data, columns=['first_name', 'last_name', 'age', 'postTestScore']) context.log_dataset('mydf', df=df)
def my_job(context, p1=1, p2="a-string"): # access input metadata, values, files, and secrets (passwords) print(f"Run: {context.name} (uid={context.uid})") print(f"Params: p1={p1}, p2={p2}") print("accesskey = {}".format(context.get_secret("ACCESS_KEY"))) print("file\n{}\n".format( context.get_input(str(tests_dir) + "/assets/test_kfp_input_file.txt").get())) # RUN some useful code e.g. ML training, data prep, etc. # log scalar result values (job result metrics) context.log_result("accuracy", p1 * 2) context.log_result("loss", p1 * 3) # log various types of artifacts (file, web page, table), will be # versioned and visible in the UI context.log_artifact("model", body=model_body, local_path="model.txt") context.log_artifact("results", local_path="results.html", body=results_body) # create a chart output (will show in the pipelines UI) chart = ChartArtifact("chart") chart.header = ["Epoch", "Accuracy", "Loss"] for i in range(1, 8): chart.add_row([i, i / 20 + 0.75, 0.30 - i / 20]) context.log_artifact(chart) raw_data = { "first_name": ["Jason", "Molly", "Tina", "Jake", "Amy"], "last_name": ["Miller", "Jacobson", "Ali", "Milner", "Cooze"], "age": [42, 52, 36, 24, 73], "postTestScore": [25, 94, 57, 62, 70], } df = pd.DataFrame( raw_data, columns=["first_name", "last_name", "age", "postTestScore"]) context.log_dataset("mydf", df=df)
) # save the model only on worker 0 to prevent failures ("cannot lock file") if hvd.rank() == 0: # os.makedirs(MODEL_DIR, exist_ok=True) model_artifacts = os.path.join(mlctx.artifact_path, MODEL_DIR) # log the epoch advancement mlctx.logger.info("history:", history.history) print("MA:", model_artifacts) # Save the model file model.save("model.h5") # Produce training chart artifact chart = ChartArtifact("summary.html") chart.header = ["epoch", "accuracy", "val_accuracy", "loss", "val_loss"] for i in range(EPOCHS): chart.add_row([ i + 1, history.history["accuracy"][i], history.history["val_accuracy"][i], history.history["loss"][i], history.history["val_loss"][i], ]) summary = mlctx.log_artifact(chart, local_path="training-summary.html", artifact_path=model_artifacts) # Save weights model.save_weights("model-weights.h5") weights = mlctx.log_artifact(
validation_steps=total_validate // BATCH_SIZE) # save the model only on worker 0 to prevent failures ("cannot lock file") if hvd.rank() == 0: #os.makedirs(MODEL_DIR, exist_ok=True) model_artifacts = os.path.join(mlctx.artifact_path, MODEL_DIR) # log the epoch advancement mlctx.logger.info('history:', history.history) print('MA:', model_artifacts) # Save the model file model.save('model.h5') # Produce training chart artifact chart = ChartArtifact('summary.html') chart.header = ['epoch', 'accuracy', 'val_accuracy', 'loss', 'val_loss'] for i in range(EPOCHS): chart.add_row([ i + 1, history.history['accuracy'][i], history.history['val_accuracy'][i], history.history['loss'][i], history.history['val_loss'][i] ]) summary = mlctx.log_artifact(chart, local_path='training-summary.html', artifact_path=model_artifacts) # Save weights model.save_weights('model-weights.h5') weights = mlctx.log_artifact('model-weights', local_path='model-weights.h5', artifact_path=model_artifacts,