Esempio n. 1
0
def infer_pipeline(info,
                   pipeline_,
                   datasource,
                   orchestration_backend,
                   orchestration_args,
                   processing_backend,
                   processing_args,
                   force):
    """Initiate a batch inference run of a selected pipeline"""
    # Resolving additional args
    _, run_id = utils.resolve_pipeline_runs(
        info,
        pipeline_,
        run_type=PipelineRunTypes.training.name)

    if datasource is None:
        utils.check_datasource_commit(info)

    """Initiate an infer run of a selected pipeline"""
    utils.resolve_pipeline_creation(info=info,
                                    pipeline_type=PipelineRunTypes.infer.name,
                                    pipeline_=pipeline_,
                                    datasource=datasource,
                                    orchestration_backend=orchestration_backend,
                                    orchestration_args=orchestration_args,
                                    processing_backend=processing_backend,
                                    processing_args=processing_args,
                                    force=force,
                                    additional_args={'run_id': run_id})
Esempio n. 2
0
def evaluate(info, pipeline_):
    """Tool for the in-depth evaluation of a pipeline run"""
    p_uuid, r_uuid = resolve_pipeline_runs(info, pipeline_)

    notice('Downloading evaluation metrics and tensorboard logs for '
           'pipeline ID {} and run ID {}. This might take some time if the '
           'model resources are significantly large in size.\nYour patience '
           'is much appreciated!'.format(format_uuid(p_uuid),
                                         format_uuid(r_uuid)))

    log_dir = get_log_dir(p_uuid, r_uuid, info)
    eval_dir = get_eval_dir(p_uuid, r_uuid, info)

    # generate notebook
    nb = nbf.v4.new_notebook()
    nb['cells'] = [
        nbf.v4.new_code_cell(get_model_block(log_dir)),
        nbf.v4.new_code_cell(get_eval_block(eval_dir)),
    ]

    # write notebook
    final_out_path = (Path(click.get_app_dir(constants.APP_NAME)) /
                      constants.EVALUATION_NOTEBOOK)

    s = nbf.writes(nb)
    if isinstance(s, bytes):
        s = s.decode('utf8')

    # only import tfx when needed
    with open(final_out_path, 'w') as f:
        f.write(s)

    os.system('jupyter notebook "{}"'.format(final_out_path))
Esempio n. 3
0
def logs_pipeline(info, source_id):
    """Get link to the logs of a pipeline"""

    p_uuid, r_uuid = utils.resolve_pipeline_runs(info, source_id)
    utils.notice(
        'Generating logs url for the pipeline run ID {}. Please visit the '
        'url for all your logs.'.format(utils.format_uuid(r_uuid)))

    api = ce_api.PipelinesApi(utils.api_client(info))
    logs_url = utils.api_call(
        api.get_pipeline_logs_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_logs_get,
        pipeline_id=p_uuid,
        pipeline_run_id=r_uuid
    )

    click.echo(logs_url)
Esempio n. 4
0
def statistics_pipeline(info, pipeline_):
    """Serve the statistics of a pipeline run"""

    p_uuid, r_uuid = utils.resolve_pipeline_runs(info,
                                                 pipeline_,
                                                 run_type=PipelineRunTypes.training.name)

    utils.notice('Generating statistics for the pipeline run ID {}. If your '
                 'browser opens up to a blank window, please refresh '
                 'the page once.'.format(utils.format_uuid(r_uuid)))

    api = ce_api.PipelinesApi(utils.api_client(info))
    stat_artifact = utils.api_call(
        api.get_pipeline_artifacts_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_artifacts_component_type_get,
        pipeline_id=p_uuid,
        pipeline_run_id=r_uuid,
        component_type=GDPComponent.SplitStatistics.name)

    ws_id = info[info[constants.ACTIVE_USER]][constants.ACTIVE_WORKSPACE]
    path = Path(click.get_app_dir(constants.APP_NAME),
                'statistics',
                str(ws_id),
                p_uuid,
                r_uuid)
    utils.download_artifact(artifact_json=stat_artifact[0].to_dict(),
                            path=path)

    import tensorflow as tf
    from tensorflow_metadata.proto.v0 import statistics_pb2
    import panel as pn

    result = {}
    for split in os.listdir(path):
        stats_path = os.path.join(path, split, 'stats_tfrecord')
        serialized_stats = next(tf.compat.v1.io.tf_record_iterator(stats_path))
        stats = statistics_pb2.DatasetFeatureStatisticsList()
        stats.ParseFromString(serialized_stats)
        dataset_list = statistics_pb2.DatasetFeatureStatisticsList()
        for i, d in enumerate(stats.datasets):
            d.name = split
            dataset_list.datasets.append(d)
        result[split] = dataset_list
    h = utils.get_statistics_html(result)

    pn.serve(panels=pn.pane.HTML(h, width=1200), show=True)
Esempio n. 5
0
def model_pipeline(info, pipeline_, output_path):
    """Download the trained model to a specified location"""
    if os.path.exists(output_path) and os.path.isdir(output_path):
        if not [f for f in os.listdir(output_path) if
                not f.startswith('.')] == []:
            utils.error("Output path must be an empty directory!")
    if os.path.exists(output_path) and not os.path.isdir(output_path):
        utils.error("Output path must be an empty directory!")
    if not os.path.exists(output_path):
        "Creating directory {}..".format(output_path)

    p_uuid, r_uuid = utils.resolve_pipeline_runs(info, pipeline_)

    utils.notice('Downloading the trained model from pipeline run '
                 'ID {}. This might take some time if the model '
                 'resources are significantly large in size.\nYour patience '
                 'is much appreciated!'.format(utils.format_uuid(r_uuid)))

    api = ce_api.PipelinesApi(utils.api_client(info))
    artifact = utils.api_call(
        api.get_pipeline_artifacts_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_artifacts_component_type_get,
        pipeline_id=p_uuid,
        pipeline_run_id=r_uuid,
        component_type=GDPComponent.Deployer.name)

    spin = utils.Spinner()
    spin.start()
    if len(artifact) == 1:
        utils.download_artifact(artifact_json=artifact[0].to_dict(),
                                path=output_path)
        spin.stop()
    else:
        utils.error('Something unexpected happened! Please contact '
                    '[email protected] to get further information.')

    utils.declare('Model downloaded to: {}'.format(output_path))
    # TODO: [LOW] Make the Tensorflow version more dynamic
    utils.declare('Please note that the model is saved as a SavedModel '
                  'Tensorflow artifact, trained on Tensoflow 2.1.0.')