Esempio n. 1
0
def compare(info):
    """Tool to compare pipeline runs based on defined metrics"""
    notice('Downloading evaluation metrics and tensorboard logs for all run '
           'pipelines in the workspace. This might take some time if there  '
           'are many pipelines.\nYour patience is '
           'much appreciated!')

    temp_info = {
        constants.ACTIVE_USER: info[constants.ACTIVE_USER],
        info[constants.ACTIVE_USER]: info[info[constants.ACTIVE_USER]]
    }

    # generate notebook
    nb = nbf.v4.new_notebook()
    nb['cells'] = [
        nbf.v4.new_code_cell(import_block()),
        nbf.v4.new_code_cell(info_block(temp_info)),
        nbf.v4.new_code_cell(application_block()),
        nbf.v4.new_code_cell(interface_block()),
    ]

    # write notebook
    final_out_path = os.path.join(click.get_app_dir(constants.APP_NAME),
                                  constants.COMPARISON_NOTEBOOK)
    s = nbf.writes(nb)
    if isinstance(s, bytes):
        s = s.decode('utf8')

    with open(final_out_path, 'w') as f:
        f.write(s)

    # serve notebook
    os.system('panel serve "{}" --show'.format(final_out_path))
Esempio n. 2
0
def evaluate(info, pipeline_):
    """Tool for the in-depth evaluation of a pipeline run"""
    p_uuid, r_uuid = resolve_pipeline_runs(info, pipeline_)

    notice('Downloading evaluation metrics and tensorboard logs for '
           'pipeline ID {} and run ID {}. This might take some time if the '
           'model resources are significantly large in size.\nYour patience '
           'is much appreciated!'.format(format_uuid(p_uuid),
                                         format_uuid(r_uuid)))

    log_dir = get_log_dir(p_uuid, r_uuid, info)
    eval_dir = get_eval_dir(p_uuid, r_uuid, info)

    # generate notebook
    nb = nbf.v4.new_notebook()
    nb['cells'] = [
        nbf.v4.new_code_cell(get_model_block(log_dir)),
        nbf.v4.new_code_cell(get_eval_block(eval_dir)),
    ]

    # write notebook
    final_out_path = (Path(click.get_app_dir(constants.APP_NAME)) /
                      constants.EVALUATION_NOTEBOOK)

    s = nbf.writes(nb)
    if isinstance(s, bytes):
        s = s.decode('utf8')

    # only import tfx when needed
    with open(final_out_path, 'w') as f:
        f.write(s)

    os.system('jupyter notebook "{}"'.format(final_out_path))
Esempio n. 3
0
def list_pipelines(info, pipeline_id, ignore_empty):
    """List of registered pipelines"""
    utils.notice('Fetching pipeline(s). This might take a few seconds... \n')
    active_user = info[constants.ACTIVE_USER]
    ws = info[active_user][constants.ACTIVE_WORKSPACE]
    ws_api = ce_api.WorkspacesApi(utils.api_client(info))
    p_api = ce_api.PipelinesApi(utils.api_client(info))
    d_api = ce_api.DatasourcesApi(utils.api_client(info))

    pipelines = utils.api_call(
        ws_api.get_workspaces_pipelines_api_v1_workspaces_workspace_id_pipelines_get,
        ws)

    if pipeline_id is not None:
        pipeline_id = utils.find_closest_uuid(pipeline_id, pipelines)

    pipelines.sort(key=lambda x: x.created_at)
    for p in pipelines:
        write_check = (len(p.pipeline_runs) > 0 or not ignore_empty) and \
                      (pipeline_id is None or pipeline_id == p.id)

        if write_check:
            # THIS WHOLE THING IS HERE FOR A REASON!!!!!!
            title = 'PIPELINE NAME: {} PIPELINE ID: {}'.format(
                p.name, utils.format_uuid(p.id))
            utils.declare(title)
            utils.declare('-' * len(title))
            if len(p.pipeline_runs) == 0:
                click.echo('No runs for this pipeline yet!')
            else:
                table = []
                for r in p.pipeline_runs:
                    author = utils.api_call(
                        p_api.get_pipeline_run_user_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_user_get,
                        p.id,
                        r.id)

                    # Resolve datasource
                    ds_commit = utils.api_call(
                        d_api.get_single_commit_api_v1_datasources_commits_commit_id_get,
                        r.datasource_commit_id)
                    ds = utils.api_call(
                        d_api.get_datasource_api_v1_datasources_ds_id_get,
                        ds_commit.datasource_id)

                    table.append({
                        'RUN ID': utils.format_uuid(r.id),
                        'TYPE': r.pipeline_run_type,
                        'CPUs PER WORKER': r.cpus_per_worker,
                        'WORKERS': r.workers,
                        'DATASOURCE': '{}_{}'.format(
                            ds.name,
                            utils.format_uuid(r.datasource_commit_id)),
                        'AUTHOR': author.email,
                        'CREATED AT': utils.format_date(r.start_time),
                    })
                click.echo(tabulate(table, headers='keys', tablefmt='plain'))
            click.echo('\n')
Esempio n. 4
0
def logs_pipeline(info, source_id):
    """Get link to the logs of a pipeline"""

    p_uuid, r_uuid = utils.resolve_pipeline_runs(info, source_id)
    utils.notice(
        'Generating logs url for the pipeline run ID {}. Please visit the '
        'url for all your logs.'.format(utils.format_uuid(r_uuid)))

    api = ce_api.PipelinesApi(utils.api_client(info))
    logs_url = utils.api_call(
        api.get_pipeline_logs_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_logs_get,
        pipeline_id=p_uuid,
        pipeline_run_id=r_uuid
    )

    click.echo(logs_url)
Esempio n. 5
0
def statistics_pipeline(info, pipeline_):
    """Serve the statistics of a pipeline run"""

    p_uuid, r_uuid = utils.resolve_pipeline_runs(info,
                                                 pipeline_,
                                                 run_type=PipelineRunTypes.training.name)

    utils.notice('Generating statistics for the pipeline run ID {}. If your '
                 'browser opens up to a blank window, please refresh '
                 'the page once.'.format(utils.format_uuid(r_uuid)))

    api = ce_api.PipelinesApi(utils.api_client(info))
    stat_artifact = utils.api_call(
        api.get_pipeline_artifacts_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_artifacts_component_type_get,
        pipeline_id=p_uuid,
        pipeline_run_id=r_uuid,
        component_type=GDPComponent.SplitStatistics.name)

    ws_id = info[info[constants.ACTIVE_USER]][constants.ACTIVE_WORKSPACE]
    path = Path(click.get_app_dir(constants.APP_NAME),
                'statistics',
                str(ws_id),
                p_uuid,
                r_uuid)
    utils.download_artifact(artifact_json=stat_artifact[0].to_dict(),
                            path=path)

    import tensorflow as tf
    from tensorflow_metadata.proto.v0 import statistics_pb2
    import panel as pn

    result = {}
    for split in os.listdir(path):
        stats_path = os.path.join(path, split, 'stats_tfrecord')
        serialized_stats = next(tf.compat.v1.io.tf_record_iterator(stats_path))
        stats = statistics_pb2.DatasetFeatureStatisticsList()
        stats.ParseFromString(serialized_stats)
        dataset_list = statistics_pb2.DatasetFeatureStatisticsList()
        for i, d in enumerate(stats.datasets):
            d.name = split
            dataset_list.datasets.append(d)
        result[split] = dataset_list
    h = utils.get_statistics_html(result)

    pn.serve(panels=pn.pane.HTML(h, width=1200), show=True)
Esempio n. 6
0
def model_pipeline(info, pipeline_, output_path):
    """Download the trained model to a specified location"""
    if os.path.exists(output_path) and os.path.isdir(output_path):
        if not [f for f in os.listdir(output_path) if
                not f.startswith('.')] == []:
            utils.error("Output path must be an empty directory!")
    if os.path.exists(output_path) and not os.path.isdir(output_path):
        utils.error("Output path must be an empty directory!")
    if not os.path.exists(output_path):
        "Creating directory {}..".format(output_path)

    p_uuid, r_uuid = utils.resolve_pipeline_runs(info, pipeline_)

    utils.notice('Downloading the trained model from pipeline run '
                 'ID {}. This might take some time if the model '
                 'resources are significantly large in size.\nYour patience '
                 'is much appreciated!'.format(utils.format_uuid(r_uuid)))

    api = ce_api.PipelinesApi(utils.api_client(info))
    artifact = utils.api_call(
        api.get_pipeline_artifacts_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_artifacts_component_type_get,
        pipeline_id=p_uuid,
        pipeline_run_id=r_uuid,
        component_type=GDPComponent.Deployer.name)

    spin = utils.Spinner()
    spin.start()
    if len(artifact) == 1:
        utils.download_artifact(artifact_json=artifact[0].to_dict(),
                                path=output_path)
        spin.stop()
    else:
        utils.error('Something unexpected happened! Please contact '
                    '[email protected] to get further information.')

    utils.declare('Model downloaded to: {}'.format(output_path))
    # TODO: [LOW] Make the Tensorflow version more dynamic
    utils.declare('Please note that the model is saved as a SavedModel '
                  'Tensorflow artifact, trained on Tensoflow 2.1.0.')
Esempio n. 7
0
def pull_function_version(info, function_id, version_id, output_path):
    """Download a version of a given custom function"""
    api = ce_api.FunctionsApi(api_client(info))

    # Infer the function uuid and name
    f_list = api_call(api.get_functions_api_v1_functions_get)
    f_uuid = find_closest_uuid(function_id, f_list)
    f_name = [f.name for f in f_list if f.id == f_uuid][0]

    # Infer the version uuid
    v_list = api_call(
        api.get_function_versions_api_v1_functions_function_id_versions_get,
        f_uuid)
    v_uuid = find_closest_uuid(version_id, v_list)

    notice('Downloading the function with the following parameters: \n'
           'Name: {f_name}\n'
           'function_id: {f_id}\n'
           'version_id: {v_id}\n'.format(f_name=f_name,
                                         f_id=format_uuid(f_uuid),
                                         v_id=format_uuid(v_uuid)))

    # Get the file and write it to the output path
    encoded_file = api_call(
        api.get_function_version_api_v1_functions_function_id_versions_version_id_get,
        f_uuid,
        v_uuid)

    # Derive the output path and download
    if output_path is None:
        output_path = os.path.join(os.getcwd(), '{}@{}.py'.format(f_name,
                                                                  v_uuid))

    with open(output_path, 'wb') as f:
        f.write(base64.b64decode(encoded_file.file_contents))

    declare('File downloaded to {}'.format(output_path))
Esempio n. 8
0
def get_pipeline_status(info, pipeline_id):
    """Get status of started pipelines"""
    utils.notice('Fetching pipeline(s). This might take a few seconds.. \n')
    active_user = info[constants.ACTIVE_USER]
    ws = info[active_user][constants.ACTIVE_WORKSPACE]

    ws_api = ce_api.WorkspacesApi(utils.api_client(info))
    p_api = ce_api.PipelinesApi(utils.api_client(info))
    d_api = ce_api.DatasourcesApi(utils.api_client(info))

    pipelines = utils.api_call(
        ws_api.get_workspaces_pipelines_api_v1_workspaces_workspace_id_pipelines_get,
        ws)

    if pipeline_id is not None:
        pipeline_id = utils.find_closest_uuid(pipeline_id, pipelines)

    pipelines.sort(key=lambda x: x.created_at)
    for p in pipelines:
        write_check = (len(p.pipeline_runs) > 0) and \
                      (pipeline_id is None or pipeline_id == p.id)

        if write_check:
            title = 'PIPELINE NAME: {} PIPELINE ID: {}'.format(
                p.name, utils.format_uuid(p.id))
            utils.declare(title)
            utils.declare('-' * len(title))

            table = []
            for r in p.pipeline_runs:
                run = utils.api_call(
                    p_api.get_pipeline_run_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_get,
                    p.id,
                    r.id)

                # Resolve datasource
                ds_commit = utils.api_call(
                    d_api.get_single_commit_api_v1_datasources_commits_commit_id_get,
                    r.datasource_commit_id)
                ds = utils.api_call(
                    d_api.get_datasource_api_v1_datasources_ds_id_get,
                    ds_commit.datasource_id)

                if run.end_time:
                    td = run.end_time - run.start_time
                else:
                    td = datetime.now(timezone.utc) - run.start_time

                # # Resolve component status
                # stage = utils.get_run_stage(run.pipeline_components)

                table.append({
                    'RUN ID': utils.format_uuid(run.id),
                    'TYPE': run.pipeline_run_type,
                    'STATUS': run.status,
                    # 'STAGE': stage,
                    'DATASOURCE': '{}_{}'.format(
                        ds.name, utils.format_uuid(run.datasource_commit_id)),
                    'DATAPOINTS': '{}'.format(ds_commit.n_datapoints),
                    # 'RUNNING STAGE': stage,
                    'START TIME': utils.format_date(run.start_time),
                    'DURATION': utils.format_timedelta(td),
                })

            click.echo(tabulate(table, headers='keys', tablefmt='plain'))
            click.echo('\n')