Esempio n. 1
0
def pull_pipeline(info, pipeline_id, output_path, no_docs):
    """Copy the configuration of a registered pipeline"""
    p_api = ce_api.PipelinesApi(utils.api_client(info))
    ws_api = ce_api.WorkspacesApi(utils.api_client(info))

    active_user = info[constants.ACTIVE_USER]
    ws_id = info[active_user][constants.ACTIVE_WORKSPACE]

    all_ps = utils.api_call(
        ws_api.get_workspaces_pipelines_api_v1_workspaces_workspace_id_pipelines_get,
        ws_id)
    p_uuid = utils.find_closest_uuid(pipeline_id, all_ps)

    utils.declare('Pulling pipeline: {}'.format(utils.format_uuid(p_uuid)))

    pp = utils.api_call(p_api.get_pipeline_api_v1_pipelines_pipeline_id_get,
                        pipeline_id=p_uuid)

    # Short term fix for these getting into the exp_config
    c = pp.pipeline_config
    if 'bq_args' in c:
        c.pop('bq_args')
    if 'ai_platform_training_args' in c:
        c.pop('ai_platform_training_args')

    utils.save_config(c, output_path, no_docs)
Esempio n. 2
0
def create_datasource(info,
                      name,
                      ds_type,
                      source,
                      provider_id,
                      args):
    """Create a datasource"""
    click.echo('Registering datasource {}...'.format(name))

    parsed_args = parse_unknown_options(args)

    api = ce_api.DatasourcesApi(api_client(info))
    p_api = ce_api.ProvidersApi(api_client(info))

    p_list = api_call(p_api.get_loggedin_provider_api_v1_providers_get)
    p_uuid = find_closest_uuid(provider_id, p_list)

    ds = api_call(
        api.create_datasource_api_v1_datasources_post,
        DatasourceCreate(
            name=name,
            type=ds_type,
            source=source,
            provider_id=p_uuid,
            args=parsed_args,
        ))

    declare('Datasource registered with ID: {}'.format(
        format_uuid(ds.id)))
Esempio n. 3
0
def list_pipelines(info, pipeline_id, ignore_empty):
    """List of registered pipelines"""
    utils.notice('Fetching pipeline(s). This might take a few seconds... \n')
    active_user = info[constants.ACTIVE_USER]
    ws = info[active_user][constants.ACTIVE_WORKSPACE]
    ws_api = ce_api.WorkspacesApi(utils.api_client(info))
    p_api = ce_api.PipelinesApi(utils.api_client(info))
    d_api = ce_api.DatasourcesApi(utils.api_client(info))

    pipelines = utils.api_call(
        ws_api.get_workspaces_pipelines_api_v1_workspaces_workspace_id_pipelines_get,
        ws)

    if pipeline_id is not None:
        pipeline_id = utils.find_closest_uuid(pipeline_id, pipelines)

    pipelines.sort(key=lambda x: x.created_at)
    for p in pipelines:
        write_check = (len(p.pipeline_runs) > 0 or not ignore_empty) and \
                      (pipeline_id is None or pipeline_id == p.id)

        if write_check:
            # THIS WHOLE THING IS HERE FOR A REASON!!!!!!
            title = 'PIPELINE NAME: {} PIPELINE ID: {}'.format(
                p.name, utils.format_uuid(p.id))
            utils.declare(title)
            utils.declare('-' * len(title))
            if len(p.pipeline_runs) == 0:
                click.echo('No runs for this pipeline yet!')
            else:
                table = []
                for r in p.pipeline_runs:
                    author = utils.api_call(
                        p_api.get_pipeline_run_user_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_user_get,
                        p.id,
                        r.id)

                    # Resolve datasource
                    ds_commit = utils.api_call(
                        d_api.get_single_commit_api_v1_datasources_commits_commit_id_get,
                        r.datasource_commit_id)
                    ds = utils.api_call(
                        d_api.get_datasource_api_v1_datasources_ds_id_get,
                        ds_commit.datasource_id)

                    table.append({
                        'RUN ID': utils.format_uuid(r.id),
                        'TYPE': r.pipeline_run_type,
                        'CPUs PER WORKER': r.cpus_per_worker,
                        'WORKERS': r.workers,
                        'DATASOURCE': '{}_{}'.format(
                            ds.name,
                            utils.format_uuid(r.datasource_commit_id)),
                        'AUTHOR': author.email,
                        'CREATED AT': utils.format_date(r.start_time),
                    })
                click.echo(tabulate(table, headers='keys', tablefmt='plain'))
            click.echo('\n')
Esempio n. 4
0
def set_provider(info, provider_id):
    user = info[constants.ACTIVE_USER]

    api = ce_api.ProvidersApi(api_client(info))
    p_list = api_call(api.get_loggedin_provider_api_v1_providers_get)
    p_id = find_closest_uuid(provider_id, p_list)

    info[user][constants.ACTIVE_PROVIDER] = p_id
    info.save()
    declare('Active provider set to id: {id}'.format(id=format_uuid(p_id)))
Esempio n. 5
0
def commit_datasource(ctx,
                      info,
                      datasource_id,
                      message,
                      schema,
                      orchestration_backend,
                      orchestration_args,
                      processing_backend,
                      processing_args,
                      force):
    """Creates a commit for a datasource"""
    api = ce_api.DatasourcesApi(api_client(info))

    if not force:
        confirmation('Committing will trigger a pipeline that will create a '
                     'snapshot of your datasources current state. '
                     'This might take a while. '
                     'Are you sure you wish to continue?', abort=True)

    # find closest, this a heavy call for now
    all_ds = api_call(api.get_datasources_api_v1_datasources_get)
    ds_uuid = find_closest_uuid(datasource_id, all_ds)

    if schema:
        try:
            with open(schema, 'rt', encoding='utf8') as f:
                schema_dict = yaml.load(f)
        except:
            error('Badly formatted YAML!')
            schema_dict = dict()
    else:
        schema_dict = dict()

    commit = api_call(
        api.create_datasource_commit_api_v1_datasources_ds_id_commits_post,
        DatasourceCommitCreate(
            message=message,
            used_schema=schema_dict,
            orchestration_backend=orchestration_backend,
            orchestration_args=orchestration_args,
            processing_backend=processing_backend,
            processing_args=processing_args,
        ),
        ds_id=ds_uuid,
    )
    declare('Commit successful: {}'.format(format_uuid(commit.id)))

    active_commit = '{datasource_id}:{commit_id}'.format(datasource_id=ds_uuid,
                                                         commit_id=commit.id)

    user = info[constants.ACTIVE_USER]
    info[user][constants.ACTIVE_DATASOURCE_COMMIT] = active_commit
    info.save()
    declare('Active datasource commit set to: {}'.format(
        format_uuid(active_commit)))
Esempio n. 6
0
def pull_function_version(info, function_id, version_id, output_path):
    """Download a version of a given custom function"""
    api = ce_api.FunctionsApi(api_client(info))

    # Infer the function uuid and name
    f_list = api_call(api.get_functions_api_v1_functions_get)
    f_uuid = find_closest_uuid(function_id, f_list)
    f_name = [f.name for f in f_list if f.id == f_uuid][0]

    # Infer the version uuid
    v_list = api_call(
        api.get_function_versions_api_v1_functions_function_id_versions_get,
        f_uuid)
    v_uuid = find_closest_uuid(version_id, v_list)

    notice('Downloading the function with the following parameters: \n'
           'Name: {f_name}\n'
           'function_id: {f_id}\n'
           'version_id: {v_id}\n'.format(f_name=f_name,
                                         f_id=format_uuid(f_uuid),
                                         v_id=format_uuid(v_uuid)))

    # Get the file and write it to the output path
    encoded_file = api_call(
        api.get_function_version_api_v1_functions_function_id_versions_version_id_get,
        f_uuid,
        v_uuid)

    # Derive the output path and download
    if output_path is None:
        output_path = os.path.join(os.getcwd(), '{}@{}.py'.format(f_name,
                                                                  v_uuid))

    with open(output_path, 'wb') as f:
        f.write(base64.b64decode(encoded_file.file_contents))

    declare('File downloaded to {}'.format(output_path))
Esempio n. 7
0
def set_workspace(info, workspace_id):
    """Set workspace to be active"""
    user = info[constants.ACTIVE_USER]

    api = ce_api.WorkspacesApi(api_client(info))
    all_ws = api_call(api.get_loggedin_workspaces_api_v1_workspaces_get)
    ws_uuid = find_closest_uuid(workspace_id, all_ws)

    api_call(api.get_workspace_api_v1_workspaces_workspace_id_get,
             ws_uuid)

    info[user][constants.ACTIVE_WORKSPACE] = ws_uuid
    info.save()
    declare('Active workspace set to id: {id}'.format(id=format_uuid(
        ws_uuid)))
Esempio n. 8
0
def create_workspace(ctx, info, provider_id, name):
    """Create a workspace and set it to be active."""
    click.echo('Registering the workspace "{}"...'.format(name))

    w_api = ce_api.WorkspacesApi(api_client(info))
    p_api = ce_api.ProvidersApi(api_client(info))

    p_list = api_call(p_api.get_loggedin_provider_api_v1_providers_get)
    p_uuid = find_closest_uuid(provider_id, p_list)

    ws = api_call(w_api.create_workspace_api_v1_workspaces_post,
                  WorkspaceCreate(name=name,
                                  provider_id=p_uuid))

    declare('Workspace registered.')
    ctx.invoke(set_workspace, workspace_id=ws.id)
Esempio n. 9
0
def list_datasource_commits(info, datasource_id):
    """List of all the available datasources"""
    api = ce_api.DatasourcesApi(api_client(info))

    # find closest, this a heavy call for now
    all_ds = api_call(api.get_datasources_api_v1_datasources_get)
    ds_uuid = find_closest_uuid(datasource_id, all_ds)

    ds = api_call(
        api.get_datasource_api_v1_datasources_ds_id_get,
        ds_id=ds_uuid)

    declare('There are {count} different commits for datasource {name}'
            '.\n'.format(count=len(ds.datasource_commits), name=ds.name))

    user = info[constants.ACTIVE_USER]
    if constants.ACTIVE_DATASOURCE_COMMIT in info[user]:
        _, c_id = info[user][constants.ACTIVE_DATASOURCE_COMMIT].split(':')
    else:
        c_id = None

    if ds.datasource_commits:
        table = []
        for commit in ds.datasource_commits:
            status = api_call(
                api.get_datasource_commit_status_api_v1_datasources_ds_id_commits_commit_id_status_get,
                ds.id,
                commit.id,
            )
            table.append({
                'Selection': '*' if commit.id == c_id else '',
                'ID': format_uuid(commit.id),
                'Created At': format_date(commit.created_at),
                'Status': status,
                'Message': commit.message,
                'Bytes': commit.n_bytes,
                '# Datapoints': commit.n_datapoints,
                '# Features': commit.n_features
            })
        click.echo(tabulate(table, headers='keys', tablefmt='presto'))
        click.echo()
Esempio n. 10
0
def update_function(info, function_id, local_path, udf_name, message):
    """Add a new version to a function and update it"""
    click.echo('Updating the function {}.'.format(
        format_uuid(function_id)))

    api = ce_api.FunctionsApi(api_client(info))

    f_list = api_call(api.get_functions_api_v1_functions_get)
    f_uuid = find_closest_uuid(function_id, f_list)

    with open(local_path, 'rb') as file:
        data = file.read()
    encoded_file = base64.b64encode(data).decode()

    api_call(
        api.create_function_version_api_v1_functions_function_id_versions_post,
        FunctionVersionCreate(udf_path=udf_name,
                              message=message,
                              file_contents=encoded_file),
        f_uuid)

    declare('Function updated!')
Esempio n. 11
0
def list_versions(info, function_id):
    """List of versions for a selected custom function"""
    api = ce_api.FunctionsApi(api_client(info))
    f_list = api_call(api.get_functions_api_v1_functions_get)
    f_uuid = find_closest_uuid(function_id, f_list)

    v_list = api_call(
        api.get_function_versions_api_v1_functions_function_id_versions_get,
        f_uuid)

    declare('Function with {id} has {count} '
            'versions.\n'.format(id=format_uuid(function_id),
                                 count=len(v_list)))

    if v_list:
        table = []
        for v in v_list:
            table.append({'ID': format_uuid(v.id),
                          'Created At': v.created_at,
                          'Description': v.message})
        click.echo(tabulate(table, headers='keys', tablefmt='presto'))
        click.echo()
Esempio n. 12
0
def get_pipeline_status(info, pipeline_id):
    """Get status of started pipelines"""
    utils.notice('Fetching pipeline(s). This might take a few seconds.. \n')
    active_user = info[constants.ACTIVE_USER]
    ws = info[active_user][constants.ACTIVE_WORKSPACE]

    ws_api = ce_api.WorkspacesApi(utils.api_client(info))
    p_api = ce_api.PipelinesApi(utils.api_client(info))
    d_api = ce_api.DatasourcesApi(utils.api_client(info))

    pipelines = utils.api_call(
        ws_api.get_workspaces_pipelines_api_v1_workspaces_workspace_id_pipelines_get,
        ws)

    if pipeline_id is not None:
        pipeline_id = utils.find_closest_uuid(pipeline_id, pipelines)

    pipelines.sort(key=lambda x: x.created_at)
    for p in pipelines:
        write_check = (len(p.pipeline_runs) > 0) and \
                      (pipeline_id is None or pipeline_id == p.id)

        if write_check:
            title = 'PIPELINE NAME: {} PIPELINE ID: {}'.format(
                p.name, utils.format_uuid(p.id))
            utils.declare(title)
            utils.declare('-' * len(title))

            table = []
            for r in p.pipeline_runs:
                run = utils.api_call(
                    p_api.get_pipeline_run_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_get,
                    p.id,
                    r.id)

                # Resolve datasource
                ds_commit = utils.api_call(
                    d_api.get_single_commit_api_v1_datasources_commits_commit_id_get,
                    r.datasource_commit_id)
                ds = utils.api_call(
                    d_api.get_datasource_api_v1_datasources_ds_id_get,
                    ds_commit.datasource_id)

                if run.end_time:
                    td = run.end_time - run.start_time
                else:
                    td = datetime.now(timezone.utc) - run.start_time

                # # Resolve component status
                # stage = utils.get_run_stage(run.pipeline_components)

                table.append({
                    'RUN ID': utils.format_uuid(run.id),
                    'TYPE': run.pipeline_run_type,
                    'STATUS': run.status,
                    # 'STAGE': stage,
                    'DATASOURCE': '{}_{}'.format(
                        ds.name, utils.format_uuid(run.datasource_commit_id)),
                    'DATAPOINTS': '{}'.format(ds_commit.n_datapoints),
                    # 'RUNNING STAGE': stage,
                    'START TIME': utils.format_date(run.start_time),
                    'DURATION': utils.format_timedelta(td),
                })

            click.echo(tabulate(table, headers='keys', tablefmt='plain'))
            click.echo('\n')