Beispiel #1
0
def my_job(context, p1=1, p2='a-string'):

    # access input metadata, values, files, and secrets (passwords)
    print(f'Run: {context.name} (uid={context.uid})')
    print(f'Params: p1={p1}, p2={p2}')
    print('accesskey = {}'.format(context.get_secret('ACCESS_KEY')))
    print('file\n{}\n'.format(context.get_input('infile.txt').get()))

    # RUN some useful code e.g. ML training, data prep, etc.

    # log scalar result values (job result metrics)
    context.log_result('accuracy', p1 * 2)
    context.log_result('loss', p1 * 3)

    # log various types of artifacts (file, web page, table), will be versioned and visible in the UI
    context.log_artifact('model.txt', body=b'abc is 123')
    context.log_artifact('results.html',
                         body=b'<b> Some HTML <b>',
                         viewer='web-app')
    context.log_artifact(
        TableArtifact('dataset.csv',
                      '1,2,3\n4,5,6\n',
                      viewer='table',
                      header=['A', 'B', 'C']))

    # create a chart output (will show in the pipelines UI)
    chart = ChartArtifact('chart.html')
    chart.header = ['Epoch', 'Accuracy', 'Loss']
    for i in range(1, 8):
        chart.add_row([i, i / 20 + 0.75, 0.30 - i / 20])
    context.log_artifact(chart)
Beispiel #2
0
def my_job():
    # load MLRUN runtime context (will be set by the runtime framework e.g. KubeFlow)
    context = get_or_create_ctx('train')
    
    # get parameters from the runtime context (or use defaults)
    p1 = context.get_param('p1', 1)
    p2 = context.get_param('p2', 'a-string')

    # access input metadata, values, files, and secrets (passwords)
    print(f'Run: {context.name} (uid={context.uid})')
    print(f'Params: p1={p1}, p2={p2}')
    print('accesskey = {}'.format(context.get_secret('ACCESS_KEY')))
    print('file\n{}\n'.format(context.get_input('infile.txt', 'infile.txt').get()))
    
    # Run some useful code e.g. ML training, data prep, etc.

    # log scalar result values (job result metrics)
    context.log_result('accuracy', p1 * 2)
    context.log_result('loss', p1 * 3)
    context.set_label('framework', 'sklearn')

    # log various types of artifacts (file, web page, table), will be versioned and visible in the UI
    context.log_artifact('model', body=b'abc is 123', target_path='model.txt', labels={'framework': 'xgboost'})
    context.log_artifact('html_result', body=b'<b> Some HTML <b>', target_path='result.html', viewer='web-app')
    context.log_artifact(TableArtifact('dataset', '1,2,3\n4,5,6\n',
                                        viewer='table', header=['A', 'B', 'C']), target_path='dataset.csv')

    # create a chart output (will show in the pipelines UI)
    chart = ChartArtifact('chart.html')
    chart.labels = {'type': 'roc'}
    chart.header = ['Epoch', 'Accuracy', 'Loss']
    for i in range(1, 8):
        chart.add_row([i, i/20+0.75, 0.30-i/20])
    context.log_artifact(chart)
Beispiel #3
0
def my_func(ctx):
    # get parameters from context (or default)
    p1 = ctx.get_param('p1', 1)
    p2 = ctx.get_param('p2', 'a-string')

    # access input metadata, values, and inputs
    print(f'Run: {ctx.name} (uid={ctx.uid})')
    print(f'Params: p1={p1}, p2={p2}')
    print('accesskey = {}'.format(ctx.get_secret('ACCESS_KEY')))
    print('file\n{}\n'.format(ctx.get_object('infile.txt').get()))

    # log scalar values (KFP metrics)
    ctx.log_result('accuracy', p1 * 2)
    ctx.log_result('latency', p1 * 3)

    # log various types of artifacts (and set UI viewers)
    ctx.log_artifact('test.txt', body=b'abc is 123')
    ctx.log_artifact('test.html', body=b'<b> Some HTML <b>', viewer='web-app')

    table = TableArtifact('tbl.csv',
                          '1,2,3\n4,5,6\n',
                          viewer='table',
                          header=['A', 'B', 'C'])
    ctx.log_artifact(table)

    chart = ChartArtifact('chart.html')
    chart.header = ['Hour', 'One', 'Two']
    for i in range(1, 4):
        chart.add_row([i, 1 + 2, 2 * i])
    ctx.log_artifact(chart)
def my_job(context, p1=1, p2='x'):
    # load MLRUN runtime context (will be set by the runtime framework e.g. KubeFlow)

    # get parameters from the runtime context (or use defaults)

    # access input metadata, values, files, and secrets (passwords)
    print(f'Run: {context.name} (uid={context.uid})')
    print(f'Params: p1={p1}, p2={p2}')
    print('accesskey = {}'.format(context.get_secret('ACCESS_KEY')))
    print('file\n{}\n'.format(
        context.get_input('infile.txt', 'infile.txt').get()))

    # Run some useful code e.g. ML training, data prep, etc.

    # log scalar result values (job result metrics)
    context.log_result('accuracy', p1 * 2)
    context.log_result('loss', p1 * 3)
    context.set_label('framework', 'sklearn')

    # log various types of artifacts (file, web page, table), will be versioned and visible in the UI
    context.log_artifact('model',
                         body=b'abc is 123',
                         local_path='model.txt',
                         labels={'framework': 'xgboost'})
    context.log_artifact('html_result',
                         body=b'<b> Some HTML <b>',
                         local_path='result.html')
    context.log_artifact(TableArtifact('dataset',
                                       '1,2,3\n4,5,6\n',
                                       visible=True,
                                       header=['A', 'B', 'C']),
                         local_path='dataset.csv')

    # create a chart output (will show in the pipelines UI)
    chart = ChartArtifact('chart')
    chart.labels = {'type': 'roc'}
    chart.header = ['Epoch', 'Accuracy', 'Loss']
    for i in range(1, 8):
        chart.add_row([i, i / 20 + 0.75, 0.30 - i / 20])
    context.log_artifact(chart)

    raw_data = {
        'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
        'last_name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze'],
        'age': [42, 52, 36, 24, 73],
        'testScore': [25, 94, 57, 62, 70]
    }
    df = pd.DataFrame(raw_data,
                      columns=['first_name', 'last_name', 'age', 'testScore'])
    context.log_dataset('mydf', df=df, stats=True)


#if __name__ == "__main__":
#    context = get_or_create_ctx('train')
#    p1 = context.get_param('p1', 1)
#    p2 = context.get_param('p2', 'a-string')
#    my_job(context, p1, p2)
Beispiel #5
0
def my_job(context, p1=1, p2="x"):
    # load MLRUN runtime context (will be set by the runtime framework e.g. KubeFlow)

    # get parameters from the runtime context (or use defaults)

    # access input metadata, values, files, and secrets (passwords)
    print(f"Run: {context.name} (uid={context.uid})")
    print(f"Params: p1={p1}, p2={p2}")
    access_key = context.get_secret("ACCESS_KEY")
    print(f"Access key = {access_key}")
    input_file = context.get_input("infile.txt", "infile.txt").get()
    print(f"File\n{input_file}\n")

    # Run some useful code e.g. ML training, data prep, etc.

    # log scalar result values (job result metrics)
    context.log_result("accuracy", p1 * 2)
    context.log_result("loss", p1 * 3)
    context.set_label("framework", "sklearn")

    # log various types of artifacts (file, web page, table), will be versioned and visible in the UI
    context.log_artifact(
        "model",
        body=b"abc is 123",
        local_path="model.txt",
        labels={"framework": "xgboost"},
    )
    context.log_artifact("html_result",
                         body=b"<b> Some HTML <b>",
                         local_path="result.html")

    # create a chart output (will show in the pipelines UI)
    chart = ChartArtifact("chart")
    chart.labels = {"type": "roc"}
    chart.header = ["Epoch", "Accuracy", "Loss"]
    for i in range(1, 8):
        chart.add_row([i, i / 20 + 0.75, 0.30 - i / 20])
    context.log_artifact(chart)

    raw_data = {
        "first_name": ["Jason", "Molly", "Tina", "Jake", "Amy"],
        "last_name": ["Miller", "Jacobson", "Ali", "Milner", "Cooze"],
        "age": [42, 52, 36, 24, 73],
        "testScore": [25, 94, 57, 62, 70],
    }
    df = pd.DataFrame(raw_data,
                      columns=["first_name", "last_name", "age", "testScore"])
    context.log_dataset("mydf", df=df, stats=True)
Beispiel #6
0
def my_job(context, p1=1, p2='a-string'):

    # access input metadata, values, files, and secrets (passwords)
    print(f'Run: {context.name} (uid={context.uid})')
    print(f'Params: p1={p1}, p2={p2}')
    print('accesskey = {}'.format(context.get_secret('ACCESS_KEY')))
    print('file\n{}\n'.format(context.get_input('infile.txt').get()))

    # RUN some useful code e.g. ML training, data prep, etc.

    # log scalar result values (job result metrics)
    context.log_result('accuracy', p1 * 2)
    context.log_result('loss', p1 * 3)

    # log various types of artifacts (file, web page, table), will be
    # versioned and visible in the UI
    context.log_artifact('model', body=b'abc is 123', local_path='model.txt')
    context.log_artifact('results',
                         local_path='results.html',
                         body=b'<b> Some HTML <b>')
    context.log_artifact(
        TableArtifact(
            'dataset',
            '1,2,3\n4,5,6\n',
            format='csv',
            viewer='table',
            header=['A', 'B', 'C'],
        ))

    # create a chart output (will show in the pipelines UI)
    chart = ChartArtifact('chart')
    chart.header = ['Epoch', 'Accuracy', 'Loss']
    for i in range(1, 8):
        chart.add_row([i, i / 20 + 0.75, 0.30 - i / 20])
    context.log_artifact(chart)

    raw_data = {
        'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
        'last_name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze'],
        'age': [42, 52, 36, 24, 73],
        'postTestScore': [25, 94, 57, 62, 70],
    }
    df = pd.DataFrame(
        raw_data, columns=['first_name', 'last_name', 'age', 'postTestScore'])
    context.log_dataset('mydf', df=df)
Beispiel #7
0
def my_job(context, p1=1, p2="a-string"):

    # access input metadata, values, files, and secrets (passwords)
    print(f"Run: {context.name} (uid={context.uid})")
    print(f"Params: p1={p1}, p2={p2}")
    print("accesskey = {}".format(context.get_secret("ACCESS_KEY")))
    print("file\n{}\n".format(
        context.get_input(str(tests_dir) +
                          "/assets/test_kfp_input_file.txt").get()))

    # RUN some useful code e.g. ML training, data prep, etc.

    # log scalar result values (job result metrics)
    context.log_result("accuracy", p1 * 2)
    context.log_result("loss", p1 * 3)

    # log various types of artifacts (file, web page, table), will be
    # versioned and visible in the UI
    context.log_artifact("model", body=model_body, local_path="model.txt")
    context.log_artifact("results",
                         local_path="results.html",
                         body=results_body)

    # create a chart output (will show in the pipelines UI)
    chart = ChartArtifact("chart")
    chart.header = ["Epoch", "Accuracy", "Loss"]
    for i in range(1, 8):
        chart.add_row([i, i / 20 + 0.75, 0.30 - i / 20])
    context.log_artifact(chart)

    raw_data = {
        "first_name": ["Jason", "Molly", "Tina", "Jake", "Amy"],
        "last_name": ["Miller", "Jacobson", "Ali", "Milner", "Cooze"],
        "age": [42, 52, 36, 24, 73],
        "postTestScore": [25, 94, 57, 62, 70],
    }
    df = pd.DataFrame(
        raw_data, columns=["first_name", "last_name", "age", "postTestScore"])
    context.log_dataset("mydf", df=df)
Beispiel #8
0
    model_artifacts = os.path.join(mlctx.artifact_path, MODEL_DIR)

    # log the epoch advancement
    mlctx.logger.info("history:", history.history)
    print("MA:", model_artifacts)

    # Save the model file
    model.save("model.h5")
    # Produce training chart artifact
    chart = ChartArtifact("summary.html")
    chart.header = ["epoch", "accuracy", "val_accuracy", "loss", "val_loss"]
    for i in range(EPOCHS):
        chart.add_row([
            i + 1,
            history.history["accuracy"][i],
            history.history["val_accuracy"][i],
            history.history["loss"][i],
            history.history["val_loss"][i],
        ])
    summary = mlctx.log_artifact(chart,
                                 local_path="training-summary.html",
                                 artifact_path=model_artifacts)

    # Save weights
    model.save_weights("model-weights.h5")
    weights = mlctx.log_artifact(
        "model-weights",
        local_path="model-weights.h5",
        artifact_path=model_artifacts,
        db_key=False,
    )
Beispiel #9
0
def model_server_tester(context,
                        table: DataItem,
                        addr: str,
                        label_column: str = "label",
                        model: str = '',
                        match_err: bool = False,
                        rows: int = 20):
    """ Test a model server 
    
    :param table:         csv/parquet table with test data
    :param addr:          function address/url
    :param label_column:  name of the label column in table
    :param model:         tested model name 
    :param match_err:     raise error on validation (require proper test set)
    :param rows:          number of rows to use from test set
    """

    table = table.as_df()

    y_list = table.pop(label_column).values.tolist()
    context.logger.info(f'testing with dataset against {addr}, model: {model}')
    if rows and rows < table.shape[0]:
        table = table.sample(rows)

    count = err_count = match = 0
    times = []
    for x, y in zip(table.values, y_list):
        count += 1
        event_data = json.dumps({"inputs": [x.tolist()]})
        had_err = False
        try:
            start = datetime.now()
            resp = requests.put(f'{addr}/v2/models/{model}/infer',
                                json=event_data)
            if not resp.ok:
                context.logger.error(f'bad function resp!!\n{resp.text}')
                err_count += 1
                continue
            times.append((datetime.now() - start).microseconds)

        except OSError as err:
            context.logger.error(
                f'error in request, data:{event_data}, error: {err}')
            err_count += 1
            continue

        resp_data = resp.json()
        print(resp_data)
        y_resp = resp_data['outputs'][0]
        if y == y_resp:
            match += 1

    context.log_result('total_tests', count)
    context.log_result('errors', err_count)
    context.log_result('match', match)
    if count - err_count > 0:
        times_arr = np.array(times)
        context.log_result('avg_latency', int(np.mean(times_arr)))
        context.log_result('min_latency', int(np.amin(times_arr)))
        context.log_result('max_latency', int(np.amax(times_arr)))

        chart = ChartArtifact('latency', header=['Test', 'Latency (microsec)'])
        for i in range(len(times)):
            chart.add_row([i + 1, int(times[i])])
        context.log_artifact(chart)

    context.logger.info(
        f'run {count} tests, {err_count} errors and {match} match expected value'
    )

    if err_count:
        raise ValueError(f'failed on {err_count} tests of {count}')

    if match_err and match != count:
        raise ValueError(f'only {match} results match out of {count}')
Beispiel #10
0
    #os.makedirs(MODEL_DIR, exist_ok=True)
    model_artifacts = os.path.join(mlctx.artifact_path, MODEL_DIR)

    # log the epoch advancement
    mlctx.logger.info('history:', history.history)
    print('MA:', model_artifacts)

    # Save the model file
    model.save('model.h5')
    # Produce training chart artifact
    chart = ChartArtifact('summary.html')
    chart.header = ['epoch', 'accuracy', 'val_accuracy', 'loss', 'val_loss']
    for i in range(EPOCHS):
        chart.add_row([
            i + 1, history.history['accuracy'][i],
            history.history['val_accuracy'][i], history.history['loss'][i],
            history.history['val_loss'][i]
        ])
    summary = mlctx.log_artifact(chart,
                                 local_path='training-summary.html',
                                 artifact_path=model_artifacts)

    # Save weights
    model.save_weights('model-weights.h5')
    weights = mlctx.log_artifact('model-weights',
                                 local_path='model-weights.h5',
                                 artifact_path=model_artifacts,
                                 db_key=False)

    # Log results
    mlctx.log_result('loss', float(history.history['loss'][EPOCHS - 1]))