Beispiel #1
0
def my_job(context, p1=1, p2='a-string'):

    # access input metadata, values, files, and secrets (passwords)
    print(f'Run: {context.name} (uid={context.uid})')
    print(f'Params: p1={p1}, p2={p2}')
    print('accesskey = {}'.format(context.get_secret('ACCESS_KEY')))
    print('file\n{}\n'.format(context.get_input('infile.txt').get()))

    # RUN some useful code e.g. ML training, data prep, etc.

    # log scalar result values (job result metrics)
    context.log_result('accuracy', p1 * 2)
    context.log_result('loss', p1 * 3)

    # log various types of artifacts (file, web page, table), will be versioned and visible in the UI
    context.log_artifact('model.txt', body=b'abc is 123')
    context.log_artifact('results.html',
                         body=b'<b> Some HTML <b>',
                         viewer='web-app')
    context.log_artifact(
        TableArtifact('dataset.csv',
                      '1,2,3\n4,5,6\n',
                      viewer='table',
                      header=['A', 'B', 'C']))

    # create a chart output (will show in the pipelines UI)
    chart = ChartArtifact('chart.html')
    chart.header = ['Epoch', 'Accuracy', 'Loss']
    for i in range(1, 8):
        chart.add_row([i, i / 20 + 0.75, 0.30 - i / 20])
    context.log_artifact(chart)
Beispiel #2
0
def my_job():
    # load MLRUN runtime context (will be set by the runtime framework e.g. KubeFlow)
    context = get_or_create_ctx('train')
    
    # get parameters from the runtime context (or use defaults)
    p1 = context.get_param('p1', 1)
    p2 = context.get_param('p2', 'a-string')

    # access input metadata, values, files, and secrets (passwords)
    print(f'Run: {context.name} (uid={context.uid})')
    print(f'Params: p1={p1}, p2={p2}')
    print('accesskey = {}'.format(context.get_secret('ACCESS_KEY')))
    print('file\n{}\n'.format(context.get_input('infile.txt', 'infile.txt').get()))
    
    # Run some useful code e.g. ML training, data prep, etc.

    # log scalar result values (job result metrics)
    context.log_result('accuracy', p1 * 2)
    context.log_result('loss', p1 * 3)
    context.set_label('framework', 'sklearn')

    # log various types of artifacts (file, web page, table), will be versioned and visible in the UI
    context.log_artifact('model', body=b'abc is 123', target_path='model.txt', labels={'framework': 'xgboost'})
    context.log_artifact('html_result', body=b'<b> Some HTML <b>', target_path='result.html', viewer='web-app')
    context.log_artifact(TableArtifact('dataset', '1,2,3\n4,5,6\n',
                                        viewer='table', header=['A', 'B', 'C']), target_path='dataset.csv')

    # create a chart output (will show in the pipelines UI)
    chart = ChartArtifact('chart.html')
    chart.labels = {'type': 'roc'}
    chart.header = ['Epoch', 'Accuracy', 'Loss']
    for i in range(1, 8):
        chart.add_row([i, i/20+0.75, 0.30-i/20])
    context.log_artifact(chart)
Beispiel #3
0
def my_func(ctx):
    # get parameters from context (or default)
    p1 = ctx.get_param('p1', 1)
    p2 = ctx.get_param('p2', 'a-string')

    # access input metadata, values, and inputs
    print(f'Run: {ctx.name} (uid={ctx.uid})')
    print(f'Params: p1={p1}, p2={p2}')
    print('accesskey = {}'.format(ctx.get_secret('ACCESS_KEY')))
    print('file\n{}\n'.format(ctx.get_object('infile.txt').get()))

    # log scalar values (KFP metrics)
    ctx.log_result('accuracy', p1 * 2)
    ctx.log_result('latency', p1 * 3)

    # log various types of artifacts (and set UI viewers)
    ctx.log_artifact('test.txt', body=b'abc is 123')
    ctx.log_artifact('test.html', body=b'<b> Some HTML <b>', viewer='web-app')

    table = TableArtifact('tbl.csv',
                          '1,2,3\n4,5,6\n',
                          viewer='table',
                          header=['A', 'B', 'C'])
    ctx.log_artifact(table)

    chart = ChartArtifact('chart.html')
    chart.header = ['Hour', 'One', 'Two']
    for i in range(1, 4):
        chart.add_row([i, 1 + 2, 2 * i])
    ctx.log_artifact(chart)
def my_job(context, p1=1, p2='x'):
    # load MLRUN runtime context (will be set by the runtime framework e.g. KubeFlow)

    # get parameters from the runtime context (or use defaults)

    # access input metadata, values, files, and secrets (passwords)
    print(f'Run: {context.name} (uid={context.uid})')
    print(f'Params: p1={p1}, p2={p2}')
    print('accesskey = {}'.format(context.get_secret('ACCESS_KEY')))
    print('file\n{}\n'.format(
        context.get_input('infile.txt', 'infile.txt').get()))

    # Run some useful code e.g. ML training, data prep, etc.

    # log scalar result values (job result metrics)
    context.log_result('accuracy', p1 * 2)
    context.log_result('loss', p1 * 3)
    context.set_label('framework', 'sklearn')

    # log various types of artifacts (file, web page, table), will be versioned and visible in the UI
    context.log_artifact('model',
                         body=b'abc is 123',
                         local_path='model.txt',
                         labels={'framework': 'xgboost'})
    context.log_artifact('html_result',
                         body=b'<b> Some HTML <b>',
                         local_path='result.html')
    context.log_artifact(TableArtifact('dataset',
                                       '1,2,3\n4,5,6\n',
                                       visible=True,
                                       header=['A', 'B', 'C']),
                         local_path='dataset.csv')

    # create a chart output (will show in the pipelines UI)
    chart = ChartArtifact('chart')
    chart.labels = {'type': 'roc'}
    chart.header = ['Epoch', 'Accuracy', 'Loss']
    for i in range(1, 8):
        chart.add_row([i, i / 20 + 0.75, 0.30 - i / 20])
    context.log_artifact(chart)

    raw_data = {
        'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
        'last_name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze'],
        'age': [42, 52, 36, 24, 73],
        'testScore': [25, 94, 57, 62, 70]
    }
    df = pd.DataFrame(raw_data,
                      columns=['first_name', 'last_name', 'age', 'testScore'])
    context.log_dataset('mydf', df=df, stats=True)


#if __name__ == "__main__":
#    context = get_or_create_ctx('train')
#    p1 = context.get_param('p1', 1)
#    p2 = context.get_param('p2', 'a-string')
#    my_job(context, p1, p2)
Beispiel #5
0
def my_job(context, p1=1, p2="x"):
    # load MLRUN runtime context (will be set by the runtime framework e.g. KubeFlow)

    # get parameters from the runtime context (or use defaults)

    # access input metadata, values, files, and secrets (passwords)
    print(f"Run: {context.name} (uid={context.uid})")
    print(f"Params: p1={p1}, p2={p2}")
    access_key = context.get_secret("ACCESS_KEY")
    print(f"Access key = {access_key}")
    input_file = context.get_input("infile.txt", "infile.txt").get()
    print(f"File\n{input_file}\n")

    # Run some useful code e.g. ML training, data prep, etc.

    # log scalar result values (job result metrics)
    context.log_result("accuracy", p1 * 2)
    context.log_result("loss", p1 * 3)
    context.set_label("framework", "sklearn")

    # log various types of artifacts (file, web page, table), will be versioned and visible in the UI
    context.log_artifact(
        "model",
        body=b"abc is 123",
        local_path="model.txt",
        labels={"framework": "xgboost"},
    )
    context.log_artifact("html_result",
                         body=b"<b> Some HTML <b>",
                         local_path="result.html")

    # create a chart output (will show in the pipelines UI)
    chart = ChartArtifact("chart")
    chart.labels = {"type": "roc"}
    chart.header = ["Epoch", "Accuracy", "Loss"]
    for i in range(1, 8):
        chart.add_row([i, i / 20 + 0.75, 0.30 - i / 20])
    context.log_artifact(chart)

    raw_data = {
        "first_name": ["Jason", "Molly", "Tina", "Jake", "Amy"],
        "last_name": ["Miller", "Jacobson", "Ali", "Milner", "Cooze"],
        "age": [42, 52, 36, 24, 73],
        "testScore": [25, 94, 57, 62, 70],
    }
    df = pd.DataFrame(raw_data,
                      columns=["first_name", "last_name", "age", "testScore"])
    context.log_dataset("mydf", df=df, stats=True)
Beispiel #6
0
def my_job(context, p1=1, p2='a-string'):

    # access input metadata, values, files, and secrets (passwords)
    print(f'Run: {context.name} (uid={context.uid})')
    print(f'Params: p1={p1}, p2={p2}')
    print('accesskey = {}'.format(context.get_secret('ACCESS_KEY')))
    print('file\n{}\n'.format(context.get_input('infile.txt').get()))

    # RUN some useful code e.g. ML training, data prep, etc.

    # log scalar result values (job result metrics)
    context.log_result('accuracy', p1 * 2)
    context.log_result('loss', p1 * 3)

    # log various types of artifacts (file, web page, table), will be
    # versioned and visible in the UI
    context.log_artifact('model', body=b'abc is 123', local_path='model.txt')
    context.log_artifact('results',
                         local_path='results.html',
                         body=b'<b> Some HTML <b>')
    context.log_artifact(
        TableArtifact(
            'dataset',
            '1,2,3\n4,5,6\n',
            format='csv',
            viewer='table',
            header=['A', 'B', 'C'],
        ))

    # create a chart output (will show in the pipelines UI)
    chart = ChartArtifact('chart')
    chart.header = ['Epoch', 'Accuracy', 'Loss']
    for i in range(1, 8):
        chart.add_row([i, i / 20 + 0.75, 0.30 - i / 20])
    context.log_artifact(chart)

    raw_data = {
        'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
        'last_name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze'],
        'age': [42, 52, 36, 24, 73],
        'postTestScore': [25, 94, 57, 62, 70],
    }
    df = pd.DataFrame(
        raw_data, columns=['first_name', 'last_name', 'age', 'postTestScore'])
    context.log_dataset('mydf', df=df)
Beispiel #7
0
def my_job(context, p1=1, p2="a-string"):

    # access input metadata, values, files, and secrets (passwords)
    print(f"Run: {context.name} (uid={context.uid})")
    print(f"Params: p1={p1}, p2={p2}")
    print("accesskey = {}".format(context.get_secret("ACCESS_KEY")))
    print("file\n{}\n".format(
        context.get_input(str(tests_dir) +
                          "/assets/test_kfp_input_file.txt").get()))

    # RUN some useful code e.g. ML training, data prep, etc.

    # log scalar result values (job result metrics)
    context.log_result("accuracy", p1 * 2)
    context.log_result("loss", p1 * 3)

    # log various types of artifacts (file, web page, table), will be
    # versioned and visible in the UI
    context.log_artifact("model", body=model_body, local_path="model.txt")
    context.log_artifact("results",
                         local_path="results.html",
                         body=results_body)

    # create a chart output (will show in the pipelines UI)
    chart = ChartArtifact("chart")
    chart.header = ["Epoch", "Accuracy", "Loss"]
    for i in range(1, 8):
        chart.add_row([i, i / 20 + 0.75, 0.30 - i / 20])
    context.log_artifact(chart)

    raw_data = {
        "first_name": ["Jason", "Molly", "Tina", "Jake", "Amy"],
        "last_name": ["Miller", "Jacobson", "Ali", "Milner", "Cooze"],
        "age": [42, 52, 36, 24, 73],
        "postTestScore": [25, 94, 57, 62, 70],
    }
    df = pd.DataFrame(
        raw_data, columns=["first_name", "last_name", "age", "postTestScore"])
    context.log_dataset("mydf", df=df)
Beispiel #8
0
)

# save the model only on worker 0 to prevent failures ("cannot lock file")
if hvd.rank() == 0:
    # os.makedirs(MODEL_DIR, exist_ok=True)
    model_artifacts = os.path.join(mlctx.artifact_path, MODEL_DIR)

    # log the epoch advancement
    mlctx.logger.info("history:", history.history)
    print("MA:", model_artifacts)

    # Save the model file
    model.save("model.h5")
    # Produce training chart artifact
    chart = ChartArtifact("summary.html")
    chart.header = ["epoch", "accuracy", "val_accuracy", "loss", "val_loss"]
    for i in range(EPOCHS):
        chart.add_row([
            i + 1,
            history.history["accuracy"][i],
            history.history["val_accuracy"][i],
            history.history["loss"][i],
            history.history["val_loss"][i],
        ])
    summary = mlctx.log_artifact(chart,
                                 local_path="training-summary.html",
                                 artifact_path=model_artifacts)

    # Save weights
    model.save_weights("model-weights.h5")
    weights = mlctx.log_artifact(
Beispiel #9
0
                    validation_steps=total_validate // BATCH_SIZE)

# save the model only on worker 0 to prevent failures ("cannot lock file")
if hvd.rank() == 0:
    #os.makedirs(MODEL_DIR, exist_ok=True)
    model_artifacts = os.path.join(mlctx.artifact_path, MODEL_DIR)

    # log the epoch advancement
    mlctx.logger.info('history:', history.history)
    print('MA:', model_artifacts)

    # Save the model file
    model.save('model.h5')
    # Produce training chart artifact
    chart = ChartArtifact('summary.html')
    chart.header = ['epoch', 'accuracy', 'val_accuracy', 'loss', 'val_loss']
    for i in range(EPOCHS):
        chart.add_row([
            i + 1, history.history['accuracy'][i],
            history.history['val_accuracy'][i], history.history['loss'][i],
            history.history['val_loss'][i]
        ])
    summary = mlctx.log_artifact(chart,
                                 local_path='training-summary.html',
                                 artifact_path=model_artifacts)

    # Save weights
    model.save_weights('model-weights.h5')
    weights = mlctx.log_artifact('model-weights',
                                 local_path='model-weights.h5',
                                 artifact_path=model_artifacts,