コード例 #1
0
def create_datasource(info,
                      name,
                      ds_type,
                      source,
                      provider_id,
                      args):
    """Create a datasource"""
    click.echo('Registering datasource {}...'.format(name))

    parsed_args = parse_unknown_options(args)

    api = ce_api.DatasourcesApi(api_client(info))
    p_api = ce_api.ProvidersApi(api_client(info))

    p_list = api_call(p_api.get_loggedin_provider_api_v1_providers_get)
    p_uuid = find_closest_uuid(provider_id, p_list)

    ds = api_call(
        api.create_datasource_api_v1_datasources_post,
        DatasourceCreate(
            name=name,
            type=ds_type,
            source=source,
            provider_id=p_uuid,
            args=parsed_args,
        ))

    declare('Datasource registered with ID: {}'.format(
        format_uuid(ds.id)))
コード例 #2
0
def pull_pipeline(info, pipeline_id, output_path, no_docs):
    """Copy the configuration of a registered pipeline"""
    p_api = ce_api.PipelinesApi(utils.api_client(info))
    ws_api = ce_api.WorkspacesApi(utils.api_client(info))

    active_user = info[constants.ACTIVE_USER]
    ws_id = info[active_user][constants.ACTIVE_WORKSPACE]

    all_ps = utils.api_call(
        ws_api.get_workspaces_pipelines_api_v1_workspaces_workspace_id_pipelines_get,
        ws_id)
    p_uuid = utils.find_closest_uuid(pipeline_id, all_ps)

    utils.declare('Pulling pipeline: {}'.format(utils.format_uuid(p_uuid)))

    pp = utils.api_call(p_api.get_pipeline_api_v1_pipelines_pipeline_id_get,
                        pipeline_id=p_uuid)

    # Short term fix for these getting into the exp_config
    c = pp.pipeline_config
    if 'bq_args' in c:
        c.pop('bq_args')
    if 'ai_platform_training_args' in c:
        c.pop('ai_platform_training_args')

    utils.save_config(c, output_path, no_docs)
コード例 #3
0
def list_pipelines(info, pipeline_id, ignore_empty):
    """List of registered pipelines"""
    utils.notice('Fetching pipeline(s). This might take a few seconds... \n')
    active_user = info[constants.ACTIVE_USER]
    ws = info[active_user][constants.ACTIVE_WORKSPACE]
    ws_api = ce_api.WorkspacesApi(utils.api_client(info))
    p_api = ce_api.PipelinesApi(utils.api_client(info))
    d_api = ce_api.DatasourcesApi(utils.api_client(info))

    pipelines = utils.api_call(
        ws_api.get_workspaces_pipelines_api_v1_workspaces_workspace_id_pipelines_get,
        ws)

    if pipeline_id is not None:
        pipeline_id = utils.find_closest_uuid(pipeline_id, pipelines)

    pipelines.sort(key=lambda x: x.created_at)
    for p in pipelines:
        write_check = (len(p.pipeline_runs) > 0 or not ignore_empty) and \
                      (pipeline_id is None or pipeline_id == p.id)

        if write_check:
            # THIS WHOLE THING IS HERE FOR A REASON!!!!!!
            title = 'PIPELINE NAME: {} PIPELINE ID: {}'.format(
                p.name, utils.format_uuid(p.id))
            utils.declare(title)
            utils.declare('-' * len(title))
            if len(p.pipeline_runs) == 0:
                click.echo('No runs for this pipeline yet!')
            else:
                table = []
                for r in p.pipeline_runs:
                    author = utils.api_call(
                        p_api.get_pipeline_run_user_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_user_get,
                        p.id,
                        r.id)

                    # Resolve datasource
                    ds_commit = utils.api_call(
                        d_api.get_single_commit_api_v1_datasources_commits_commit_id_get,
                        r.datasource_commit_id)
                    ds = utils.api_call(
                        d_api.get_datasource_api_v1_datasources_ds_id_get,
                        ds_commit.datasource_id)

                    table.append({
                        'RUN ID': utils.format_uuid(r.id),
                        'TYPE': r.pipeline_run_type,
                        'CPUs PER WORKER': r.cpus_per_worker,
                        'WORKERS': r.workers,
                        'DATASOURCE': '{}_{}'.format(
                            ds.name,
                            utils.format_uuid(r.datasource_commit_id)),
                        'AUTHOR': author.email,
                        'CREATED AT': utils.format_date(r.start_time),
                    })
                click.echo(tabulate(table, headers='keys', tablefmt='plain'))
            click.echo('\n')
コード例 #4
0
def commit_datasource(ctx,
                      info,
                      datasource_id,
                      message,
                      schema,
                      orchestration_backend,
                      orchestration_args,
                      processing_backend,
                      processing_args,
                      force):
    """Creates a commit for a datasource"""
    api = ce_api.DatasourcesApi(api_client(info))

    if not force:
        confirmation('Committing will trigger a pipeline that will create a '
                     'snapshot of your datasources current state. '
                     'This might take a while. '
                     'Are you sure you wish to continue?', abort=True)

    # find closest, this a heavy call for now
    all_ds = api_call(api.get_datasources_api_v1_datasources_get)
    ds_uuid = find_closest_uuid(datasource_id, all_ds)

    if schema:
        try:
            with open(schema, 'rt', encoding='utf8') as f:
                schema_dict = yaml.load(f)
        except:
            error('Badly formatted YAML!')
            schema_dict = dict()
    else:
        schema_dict = dict()

    commit = api_call(
        api.create_datasource_commit_api_v1_datasources_ds_id_commits_post,
        DatasourceCommitCreate(
            message=message,
            used_schema=schema_dict,
            orchestration_backend=orchestration_backend,
            orchestration_args=orchestration_args,
            processing_backend=processing_backend,
            processing_args=processing_args,
        ),
        ds_id=ds_uuid,
    )
    declare('Commit successful: {}'.format(format_uuid(commit.id)))

    active_commit = '{datasource_id}:{commit_id}'.format(datasource_id=ds_uuid,
                                                         commit_id=commit.id)

    user = info[constants.ACTIVE_USER]
    info[user][constants.ACTIVE_DATASOURCE_COMMIT] = active_commit
    info.save()
    declare('Active datasource commit set to: {}'.format(
        format_uuid(active_commit)))
コード例 #5
0
def reset_password(info):
    """Send reset password link to registered email address"""
    confirmation('Are you sure you want to reset your password? This will '
                 'trigger an email for resetting your password and '
                 'clear cookies.', abort=True)
    check_login_status(info)
    api = ce_api.UsersApi(api_client(info))
    user = api_call(api.get_loggedin_user_api_v1_users_me_get)
    api = ce_api.LoginApi(api_client(info))
    api_call(api.send_reset_pass_email_api_v1_login_email_resetpassword_post,
             AuthEmail(email=user.email))
    info[constants.ACTIVE_USER] = None
    info.save()
    declare("Reset password email sent to {}".format(user.email))
コード例 #6
0
def set_workspace(info, workspace_id):
    """Set workspace to be active"""
    user = info[constants.ACTIVE_USER]

    api = ce_api.WorkspacesApi(api_client(info))
    all_ws = api_call(api.get_loggedin_workspaces_api_v1_workspaces_get)
    ws_uuid = find_closest_uuid(workspace_id, all_ws)

    api_call(api.get_workspace_api_v1_workspaces_workspace_id_get,
             ws_uuid)

    info[user][constants.ACTIVE_WORKSPACE] = ws_uuid
    info.save()
    declare('Active workspace set to id: {id}'.format(id=format_uuid(
        ws_uuid)))
コード例 #7
0
def create_workspace(ctx, info, provider_id, name):
    """Create a workspace and set it to be active."""
    click.echo('Registering the workspace "{}"...'.format(name))

    w_api = ce_api.WorkspacesApi(api_client(info))
    p_api = ce_api.ProvidersApi(api_client(info))

    p_list = api_call(p_api.get_loggedin_provider_api_v1_providers_get)
    p_uuid = find_closest_uuid(provider_id, p_list)

    ws = api_call(w_api.create_workspace_api_v1_workspaces_post,
                  WorkspaceCreate(name=name,
                                  provider_id=p_uuid))

    declare('Workspace registered.')
    ctx.invoke(set_workspace, workspace_id=ws.id)
コード例 #8
0
def get_eval_dir(p_uuid, r_uuid, info, d_path=None):
    ws_id = info[info[constants.ACTIVE_USER]][constants.ACTIVE_WORKSPACE]

    if d_path is None:
        d_path = os.path.join(click.get_app_dir(constants.APP_NAME),
                              'eval_evaluator', str(ws_id), str(p_uuid),
                              str(r_uuid))

    if os.path.exists(os.path.join(d_path, 'eval_config.json')):
        return d_path

    api = ce_api.PipelinesApi(api_client(info))
    artifact = api_call(
        api.
        get_pipeline_artifacts_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_artifacts_component_type_get,
        pipeline_id=p_uuid,
        pipeline_run_id=r_uuid,
        component_type=GDPComponent.Evaluator.name)

    # TODO: [LOW] artifact[1] hard-coded because of upgrade to 0.21.4
    download_artifact(artifact[0].to_dict(), path=d_path)

    # replace google path with local path
    with open(os.path.join(d_path, 'eval_config.json'), 'r') as f:
        eval_config = json.load(f)

    # now override the google path to local path
    eval_config['modelLocations'][''] = d_path

    with open(os.path.join(d_path, 'eval_config.json'), 'w') as f:
        json.dump(eval_config, f)

    return d_path
コード例 #9
0
def template_pipeline(info, datasource, output_path, no_docs, no_datasource):
    """Copy the configuration of a registered pipeline"""
    # TODO: with the info we can do datasource specific templates later on
    from ce_cli.pretty_yaml import TEMPLATE_CONFIG
    if not no_datasource:
        active_user = info[constants.ACTIVE_USER]
        if datasource is not None:
            from ce_cli.utils import resolve_datasource_commits
            ds_id, c_id = resolve_datasource_commits(info, datasource)
        elif constants.ACTIVE_DATASOURCE_COMMIT in info[active_user]:
            ds_id, c_id = info[active_user][
                constants.ACTIVE_DATASOURCE_COMMIT].split(':')
        else:
            raise AssertionError('Please either select an active datasource '
                                 'commit to work on or explicitly define it.')

        api = ce_api.DatasourcesApi(utils.api_client(info))
        schema = utils.api_call(
            api.get_datasource_commit_schema_api_v1_datasources_ds_id_commits_commit_id_schema_get,
            ds_id=ds_id,
            commit_id=c_id)

        from ce_standards.standard_experiment import GlobalKeys
        TEMPLATE_CONFIG[GlobalKeys.FEATURES] = {f: {} for f in schema}

    utils.save_config(TEMPLATE_CONFIG, output_path, no_docs)
コード例 #10
0
def list_backends(info, backend_class):
    """Lists all created backends"""
    b_api = ce_api.BackendsApi(api_client(info))
    b_list = api_call(b_api.get_loggedin_backend_api_v1_backends_get)

    if backend_class:
        b_list = [b for b in b_list if b.backend_class == backend_class]

    declare('You have {count} different {class_}backend(s) so '
            'far. \n'.format(count=len(b_list),
                             class_=backend_class +
                             ' ' if backend_class else ''))

    if b_list:
        b_list = sorted(b_list, key=lambda b: b.backend_class)
        table = []
        for b in b_list:
            table.append({
                'ID': format_uuid(b.id),
                'Name': b.name,
                'Backend Class': b.backend_class,
                'Backend Type': b.type,
                'Created At': b.created_at
            })
        click.echo(tabulate(table, headers='keys', tablefmt='presto'))
        click.echo()
コード例 #11
0
def login(info):
    """Login with your username and password"""
    username = click.prompt('Please enter your email', type=str)
    password = click.prompt('Please enter your password', type=str,
                            hide_input=True)

    # API instance
    config = ce_api.Configuration()
    config.host = constants.API_HOST
    api_instance = ce_api.LoginApi(ce_api.ApiClient(config))

    output = api_call(
        func=api_instance.login_access_token_api_v1_login_access_token_post,
        username=username,
        password=password
    )

    info[constants.ACTIVE_USER] = username
    declare('Login successful!')
    if username in info:
        info[username][constants.TOKEN] = output.access_token
    else:
        info[username] = {constants.TOKEN: output.access_token}

    info.save()
コード例 #12
0
def create_function(info, local_path, name, func_type, udf_name, message):
    """Register a custom function to use with the Core Engine"""
    click.echo('Registering the function {}.'.format(udf_name))

    with open(local_path, 'rb') as file:
        data = file.read()
    encoded_file = base64.b64encode(data).decode()

    api = ce_api.FunctionsApi(api_client(info))
    api_call(api.create_function_api_v1_functions_post,
             FunctionCreate(name=name,
                            function_type=func_type,
                            udf_path=udf_name,
                            message=message,
                            file_contents=encoded_file))

    declare('Function registered.')
コード例 #13
0
def organization(info):
    """Info about the account which is currently logged in"""
    check_login_status(info)
    api = ce_api.OrganizationsApi(api_client(info))
    billing_api = ce_api.BillingApi(api_client(info))

    org = api_call(api.get_loggedin_organization_api_v1_organizations_get)
    bill = api_call(
        billing_api.get_organization_billing_api_v1_billing_organization_get)
    table = [{
        'Organization Name': org.name,
        'Processed Datapoints total': bill.total_processed_datapoints,
        'Cost Total': bill.cost_total,
        'Processed Datapoints this Month':
            bill.processed_datapoints_this_month,
        'Cost This Month': bill.cost_this_month,
    }]
    click.echo(tabulate(table, headers='keys', tablefmt='presto'))
コード例 #14
0
def create_provider(info, name, type, args):
    """Create a provider with a unique name"""
    parsed_args = parse_unknown_options(args)

    for k in parsed_args:
        v = parsed_args[k]
        if v.endswith('.json') and os.path.isfile(v):
            parsed_args[k] = json.load(open(v))
        if v.endswith('.yaml') and os.path.isfile(v):
            parsed_args[k] = yaml.load(open(v))

    click.echo('Registering the provider.')

    api = ce_api.ProvidersApi(api_client(info))
    api_call(api.create_provider_api_v1_providers_post,
             ProviderCreate(name=name,
                            type=type,
                            args=parsed_args))
コード例 #15
0
def set_provider(info, provider_id):
    user = info[constants.ACTIVE_USER]

    api = ce_api.ProvidersApi(api_client(info))
    p_list = api_call(api.get_loggedin_provider_api_v1_providers_get)
    p_id = find_closest_uuid(provider_id, p_list)

    info[user][constants.ACTIVE_PROVIDER] = p_id
    info.save()
    declare('Active provider set to id: {id}'.format(id=format_uuid(p_id)))
コード例 #16
0
def whoami(info):
    """Info about the account which is currently logged in"""
    check_login_status(info)
    api = ce_api.UsersApi(api_client(info))
    billing_api = ce_api.BillingApi(api_client(info))

    user = api_call(api.get_loggedin_user_api_v1_users_me_get)
    bill = api_call(billing_api.get_user_billing_api_v1_billing_users_user_id_get,
                    user_id=user.id)
    table = [{
        'Email': info[constants.ACTIVE_USER],
        'Full Name': user.full_name if user.full_name else '',
        'Pipelines Run': user.n_pipelines_executed,
        'Processed Datapoints total': bill.total_processed_datapoints,
        'Cost Total': bill.cost_total,
        'Processed Datapoints this Month':
            bill.processed_datapoints_this_month,
        'Cost This Month': bill.cost_this_month,
    }]
    click.echo(tabulate(table, headers='keys', tablefmt='presto'))
コード例 #17
0
def create_backend(info, name, backend_class, backend_type, args):
    """Create backend for orchestration, processing, training, serving"""
    parsed_args = parse_unknown_options(args)

    for k in parsed_args:
        v = parsed_args[k]
        if v.endswith('.json') and os.path.isfile(v):
            parsed_args[k] = json.load(open(v))
        if v.endswith('.yaml') and os.path.isfile(v):
            parsed_args[k] = yaml.load(open(v))

    click.echo('Registering the backend.')

    api = ce_api.BackendsApi(api_client(info))
    api_call(
        api.create_backend_api_v1_backends_post,
        BackendCreate(backend_class=backend_class,
                      name=name,
                      type=backend_type,
                      args=parsed_args))
コード例 #18
0
def list_datasource_commits(info, datasource_id):
    """List of all the available datasources"""
    api = ce_api.DatasourcesApi(api_client(info))

    # find closest, this a heavy call for now
    all_ds = api_call(api.get_datasources_api_v1_datasources_get)
    ds_uuid = find_closest_uuid(datasource_id, all_ds)

    ds = api_call(
        api.get_datasource_api_v1_datasources_ds_id_get,
        ds_id=ds_uuid)

    declare('There are {count} different commits for datasource {name}'
            '.\n'.format(count=len(ds.datasource_commits), name=ds.name))

    user = info[constants.ACTIVE_USER]
    if constants.ACTIVE_DATASOURCE_COMMIT in info[user]:
        _, c_id = info[user][constants.ACTIVE_DATASOURCE_COMMIT].split(':')
    else:
        c_id = None

    if ds.datasource_commits:
        table = []
        for commit in ds.datasource_commits:
            status = api_call(
                api.get_datasource_commit_status_api_v1_datasources_ds_id_commits_commit_id_status_get,
                ds.id,
                commit.id,
            )
            table.append({
                'Selection': '*' if commit.id == c_id else '',
                'ID': format_uuid(commit.id),
                'Created At': format_date(commit.created_at),
                'Status': status,
                'Message': commit.message,
                'Bytes': commit.n_bytes,
                '# Datapoints': commit.n_datapoints,
                '# Features': commit.n_features
            })
        click.echo(tabulate(table, headers='keys', tablefmt='presto'))
        click.echo()
コード例 #19
0
def list_versions(info, function_id):
    """List of versions for a selected custom function"""
    api = ce_api.FunctionsApi(api_client(info))
    f_list = api_call(api.get_functions_api_v1_functions_get)
    f_uuid = find_closest_uuid(function_id, f_list)

    v_list = api_call(
        api.get_function_versions_api_v1_functions_function_id_versions_get,
        f_uuid)

    declare('Function with {id} has {count} '
            'versions.\n'.format(id=format_uuid(function_id),
                                 count=len(v_list)))

    if v_list:
        table = []
        for v in v_list:
            table.append({'ID': format_uuid(v.id),
                          'Created At': v.created_at,
                          'Description': v.message})
        click.echo(tabulate(table, headers='keys', tablefmt='presto'))
        click.echo()
コード例 #20
0
def update_function(info, function_id, local_path, udf_name, message):
    """Add a new version to a function and update it"""
    click.echo('Updating the function {}.'.format(
        format_uuid(function_id)))

    api = ce_api.FunctionsApi(api_client(info))

    f_list = api_call(api.get_functions_api_v1_functions_get)
    f_uuid = find_closest_uuid(function_id, f_list)

    with open(local_path, 'rb') as file:
        data = file.read()
    encoded_file = base64.b64encode(data).decode()

    api_call(
        api.create_function_version_api_v1_functions_function_id_versions_post,
        FunctionVersionCreate(udf_path=udf_name,
                              message=message,
                              file_contents=encoded_file),
        f_uuid)

    declare('Function updated!')
コード例 #21
0
def pull_function_version(info, function_id, version_id, output_path):
    """Download a version of a given custom function"""
    api = ce_api.FunctionsApi(api_client(info))

    # Infer the function uuid and name
    f_list = api_call(api.get_functions_api_v1_functions_get)
    f_uuid = find_closest_uuid(function_id, f_list)
    f_name = [f.name for f in f_list if f.id == f_uuid][0]

    # Infer the version uuid
    v_list = api_call(
        api.get_function_versions_api_v1_functions_function_id_versions_get,
        f_uuid)
    v_uuid = find_closest_uuid(version_id, v_list)

    notice('Downloading the function with the following parameters: \n'
           'Name: {f_name}\n'
           'function_id: {f_id}\n'
           'version_id: {v_id}\n'.format(f_name=f_name,
                                         f_id=format_uuid(f_uuid),
                                         v_id=format_uuid(v_uuid)))

    # Get the file and write it to the output path
    encoded_file = api_call(
        api.get_function_version_api_v1_functions_function_id_versions_version_id_get,
        f_uuid,
        v_uuid)

    # Derive the output path and download
    if output_path is None:
        output_path = os.path.join(os.getcwd(), '{}@{}.py'.format(f_name,
                                                                  v_uuid))

    with open(output_path, 'wb') as f:
        f.write(base64.b64decode(encoded_file.file_contents))

    declare('File downloaded to {}'.format(output_path))
コード例 #22
0
def logs_pipeline(info, source_id):
    """Get link to the logs of a pipeline"""

    p_uuid, r_uuid = utils.resolve_pipeline_runs(info, source_id)
    utils.notice(
        'Generating logs url for the pipeline run ID {}. Please visit the '
        'url for all your logs.'.format(utils.format_uuid(r_uuid)))

    api = ce_api.PipelinesApi(utils.api_client(info))
    logs_url = utils.api_call(
        api.get_pipeline_logs_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_logs_get,
        pipeline_id=p_uuid,
        pipeline_run_id=r_uuid
    )

    click.echo(logs_url)
コード例 #23
0
def list_functions(info):
    """List the given custom functions"""
    api = ce_api.FunctionsApi(api_client(info))
    f_list = api_call(api.get_functions_api_v1_functions_get)
    declare('You have declared {count} different '
            'function(s) so far. \n'.format(count=len(f_list)))

    if f_list:
        table = []
        for f in f_list:
            table.append({'ID': format_uuid(f.id),
                          'Name': f.name,
                          'Type': f.function_type,
                          'Created At': f.created_at})
        click.echo(tabulate(table, headers='keys', tablefmt='presto'))
        click.echo()
コード例 #24
0
def statistics_pipeline(info, pipeline_):
    """Serve the statistics of a pipeline run"""

    p_uuid, r_uuid = utils.resolve_pipeline_runs(info,
                                                 pipeline_,
                                                 run_type=PipelineRunTypes.training.name)

    utils.notice('Generating statistics for the pipeline run ID {}. If your '
                 'browser opens up to a blank window, please refresh '
                 'the page once.'.format(utils.format_uuid(r_uuid)))

    api = ce_api.PipelinesApi(utils.api_client(info))
    stat_artifact = utils.api_call(
        api.get_pipeline_artifacts_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_artifacts_component_type_get,
        pipeline_id=p_uuid,
        pipeline_run_id=r_uuid,
        component_type=GDPComponent.SplitStatistics.name)

    ws_id = info[info[constants.ACTIVE_USER]][constants.ACTIVE_WORKSPACE]
    path = Path(click.get_app_dir(constants.APP_NAME),
                'statistics',
                str(ws_id),
                p_uuid,
                r_uuid)
    utils.download_artifact(artifact_json=stat_artifact[0].to_dict(),
                            path=path)

    import tensorflow as tf
    from tensorflow_metadata.proto.v0 import statistics_pb2
    import panel as pn

    result = {}
    for split in os.listdir(path):
        stats_path = os.path.join(path, split, 'stats_tfrecord')
        serialized_stats = next(tf.compat.v1.io.tf_record_iterator(stats_path))
        stats = statistics_pb2.DatasetFeatureStatisticsList()
        stats.ParseFromString(serialized_stats)
        dataset_list = statistics_pb2.DatasetFeatureStatisticsList()
        for i, d in enumerate(stats.datasets):
            d.name = split
            dataset_list.datasets.append(d)
        result[split] = dataset_list
    h = utils.get_statistics_html(result)

    pn.serve(panels=pn.pane.HTML(h, width=1200), show=True)
コード例 #25
0
def list_datasources(info):
    """List of all the available datasources"""
    user = info[constants.ACTIVE_USER]
    if constants.ACTIVE_DATASOURCE_COMMIT in info[user]:
        active_dc = info[user][constants.ACTIVE_DATASOURCE_COMMIT]
        active_dc = active_dc.split(':')[1]
    else:
        active_dc = None
    api = ce_api.DatasourcesApi(api_client(info))
    ds_list = api_call(api.get_datasources_api_v1_datasources_get)

    declare('You have created {count} different '
            'datasource(s).\n'.format(count=len(ds_list)))
    declare("Use 'cengine datasource commits DATASOURCE_ID' see commits of  "
            "any datasource.\n")

    if ds_list:
        table = []
        for ds in ds_list:
            dcs = [x.id for x in ds.datasource_commits]
            status = 'No Commit'
            latest_created_at = 'No Commit'
            if len(dcs) != 0:
                latest = min(ds.datasource_commits,
                             key=attrgetter('created_at'))
                latest_created_at = format_date(latest.created_at)

            latest_n_bytes = latest.n_bytes if latest else ''
            latest_n_datapoints = latest.n_datapoints if latest else ''
            latest_n_features = latest.n_features if latest else ''

            table.append({'Selection': '*' if active_dc in dcs else '',
                          'ID': format_uuid(ds.id),
                          'Name': ds.name,
                          'Type': ds.type,
                          '# Commits': len(ds.datasource_commits),
                          'Latest Commit Status': status,
                          'Latest Commit Date': latest_created_at,
                          'Latest Commit Bytes': latest_n_bytes,
                          'Latest Commit # Datapoints': latest_n_datapoints,
                          'Latest Commit # Features': latest_n_features
                          })
        click.echo(tabulate(table, headers='keys', tablefmt='presto'))
        click.echo()
コード例 #26
0
def peek_datasource(info, source_id, sample_size):
    """Randomly sample datasource and print to console."""
    api = ce_api.DatasourcesApi(api_client(info))

    ds_id, c_id = resolve_datasource_commits(info, source_id)

    declare('Randomly generating {} samples from datasource {}:{}'.format(
        sample_size,
        format_uuid(ds_id),
        format_uuid(c_id)
    ))

    data = api_call(
        api.get_datasource_commit_data_sample_api_v1_datasources_ds_id_commits_commit_id_data_get,
        ds_id=ds_id,
        commit_id=c_id,
        sample_size=sample_size)

    click.echo(tabulate(data, headers='keys', tablefmt='plain'))
コード例 #27
0
def get_log_dir(p_uuid, r_uuid, info):
    # TODO: how do i kow that the pipeline is in this workspace, maybe i
    #   changed the ws
    ws_id = info[info[constants.ACTIVE_USER]][constants.ACTIVE_WORKSPACE]
    d_path = os.path.join(click.get_app_dir(constants.APP_NAME),
                          'eval_trainer', str(ws_id), str(p_uuid), str(r_uuid))

    if os.path.exists(os.path.join(d_path, 'eval_model_dir')):
        return d_path

    api = ce_api.PipelinesApi(api_client(info))
    artifact = api_call(
        api.
        get_pipeline_artifacts_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_artifacts_component_type_get,
        pipeline_id=p_uuid,
        pipeline_run_id=r_uuid,
        component_type=GDPComponent.Trainer.name)
    download_artifact(artifact[0].to_dict(), path=d_path)
    return d_path
コード例 #28
0
def model_pipeline(info, pipeline_, output_path):
    """Download the trained model to a specified location"""
    if os.path.exists(output_path) and os.path.isdir(output_path):
        if not [f for f in os.listdir(output_path) if
                not f.startswith('.')] == []:
            utils.error("Output path must be an empty directory!")
    if os.path.exists(output_path) and not os.path.isdir(output_path):
        utils.error("Output path must be an empty directory!")
    if not os.path.exists(output_path):
        "Creating directory {}..".format(output_path)

    p_uuid, r_uuid = utils.resolve_pipeline_runs(info, pipeline_)

    utils.notice('Downloading the trained model from pipeline run '
                 'ID {}. This might take some time if the model '
                 'resources are significantly large in size.\nYour patience '
                 'is much appreciated!'.format(utils.format_uuid(r_uuid)))

    api = ce_api.PipelinesApi(utils.api_client(info))
    artifact = utils.api_call(
        api.get_pipeline_artifacts_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_artifacts_component_type_get,
        pipeline_id=p_uuid,
        pipeline_run_id=r_uuid,
        component_type=GDPComponent.Deployer.name)

    spin = utils.Spinner()
    spin.start()
    if len(artifact) == 1:
        utils.download_artifact(artifact_json=artifact[0].to_dict(),
                                path=output_path)
        spin.stop()
    else:
        utils.error('Something unexpected happened! Please contact '
                    '[email protected] to get further information.')

    utils.declare('Model downloaded to: {}'.format(output_path))
    # TODO: [LOW] Make the Tensorflow version more dynamic
    utils.declare('Please note that the model is saved as a SavedModel '
                  'Tensorflow artifact, trained on Tensoflow 2.1.0.')
コード例 #29
0
def push_pipeline(info, config_path, pipeline_name):
    """Register a pipeline with the selected configuration"""
    active_user = info[constants.ACTIVE_USER]
    ws_id = info[active_user][constants.ACTIVE_WORKSPACE]

    try:
        with open(config_path, 'rt', encoding='utf8') as f:
            config = yaml.load(f)
    except:
        utils.error('Badly formatted YAML!')

    api = ce_api.PipelinesApi(utils.api_client(info))
    p = utils.api_call(api.create_pipeline_api_v1_pipelines_post,
                       PipelineCreate(name=pipeline_name,
                                      pipeline_config=config,
                                      workspace_id=ws_id))

    utils.declare('Pipeline pushed successfully!'.format(
        id=utils.format_uuid(p.id)))

    utils.declare(
        "Use `cengine pipeline train {} --datasource DS_COMMIT` "
        "to launch a training pipeline!".format(utils.format_uuid(p.id)))
コード例 #30
0
def list_workspaces(info):
    """List of all workspaces available to the user"""
    user = info[constants.ACTIVE_USER]

    api = ce_api.WorkspacesApi(api_client(info))
    ws_list = api_call(api.get_loggedin_workspaces_api_v1_workspaces_get)

    if constants.ACTIVE_WORKSPACE in info[user]:
        active_w = info[user][constants.ACTIVE_WORKSPACE]
    else:
        active_w = None

    declare('You have created {count} different '
            'workspace(s). \n'.format(count=len(ws_list)))
    if ws_list:
        table = []
        for w in ws_list:
            table.append({'Selection': '*' if w.id == active_w else '',
                          'ID': format_uuid(w.id),
                          'Name': w.name,
                          'Provider': format_uuid(w.provider_id)})
        click.echo(tabulate(table, headers='keys', tablefmt='presto'))
        click.echo()