Exemple #1
0
def deploy_cli(api_client, spec_arg, spec, allow_duplicate_names,
               no_update_spec):
    """
    Deploys a delta pipeline according to the pipeline specification. The pipeline spec is a
    specification that explains how to run a Delta Pipeline on Databricks. All local libraries
    referenced in the spec are uploaded to DBFS.

    If the pipeline spec contains an "id" field, attempts to update an existing pipeline with
    that ID. If it does not, creates a new pipeline and edits the spec file to add the ID of the
    created pipeline. The spec file will not be updated if the --no-update-spec option is added.

    The deploy command will not create a new pipeline if a pipeline with the same name already
    exists. This check can be disabled by adding the --allow-duplicate-names option.

    Usage:

    databricks pipelines deploy example.json

    OR

    databricks pipelines deploy --spec example.json
    """
    if bool(spec_arg) == bool(spec):
        raise RuntimeError(
            'The spec should be provided either by an option or argument')
    src = spec_arg if bool(spec_arg) else spec
    spec_obj = _read_spec(src)
    if 'id' not in spec_obj:
        try:
            response = PipelinesApi(api_client).create(spec_obj,
                                                       allow_duplicate_names)
        except requests.exceptions.HTTPError as e:
            _handle_duplicate_name_exception(spec_obj, e)

        new_pipeline_id = response['pipeline_id']
        click.echo("Successfully created pipeline: {}".format(
            _get_pipeline_url(api_client, new_pipeline_id)))

        if not no_update_spec:
            spec_obj['id'] = new_pipeline_id
            _write_spec(src, spec_obj)
            click.echo("Updated spec at {} with ID {}".format(
                src, new_pipeline_id))
        else:
            click.echo(
                "Pipeline has been assigned ID {}".format(new_pipeline_id))
    else:
        _validate_pipeline_id(spec_obj['id'])
        try:
            PipelinesApi(api_client).deploy(spec_obj, allow_duplicate_names)
        except requests.exceptions.HTTPError as e:
            _handle_duplicate_name_exception(spec_obj, e)
        click.echo("Successfully deployed pipeline: {}".format(
            _get_pipeline_url(api_client, spec_obj['id'])))
Exemple #2
0
def deploy_cli(api_client, spec_arg, spec):
    """
    Deploys a delta pipeline according to the pipeline specification. The pipeline spec is a
    specification that explains how to run a Delta Pipeline on Databricks. All local libraries
    referenced in the spec are uploaded to DBFS.

    Usage:

    databricks pipelines deploy example.json

    OR

    databricks pipelines deploy --spec example.json
    """
    if bool(spec_arg) == bool(spec):
        raise RuntimeError(
            'The spec should be provided either by an option or argument')
    src = spec_arg if bool(spec_arg) else spec
    spec_obj = _read_spec(src)
    if 'id' not in spec_obj:
        pipeline_id = str(uuid.uuid4())
        click.echo("Updating spec at {} with id: {}".format(src, pipeline_id))
        spec_obj['id'] = pipeline_id
        _write_spec(src, spec_obj)
    _validate_pipeline_id(spec_obj['id'])
    PipelinesApi(api_client).deploy(spec_obj)

    pipeline_id = spec_obj['id']
    base_url = "{0.scheme}://{0.netloc}/".format(urlparse(api_client.url))
    pipeline_url = urljoin(base_url,
                           "#joblist/pipelines/{}".format(pipeline_id))
    click.echo("Pipeline successfully deployed: {}".format(pipeline_url))
Exemple #3
0
def list_cli(api_client):
    """
    Lists all pipelines and their statuses.

    Usage:

    databricks pipelines list
    """
    click.echo(pretty_format(PipelinesApi(api_client).list()))
Exemple #4
0
def edit_cli(api_client, settings_arg, settings, pipeline_id,
             allow_duplicate_names):
    # pylint: disable=line-too-long
    """
    Edits a pipeline specified by the pipeline settings. The pipeline settings are a
    JSON document that defines a Delta Live Tables pipeline on Databricks. To use a
    file containing the pipeline settings, pass the file path to the command as an
    argument or with the --settings option.

    Specification for the pipeline settings JSON can be found at
    https://docs.databricks.com/data-engineering/delta-live-tables/delta-live-tables-configuration.html

    If another pipeline with the same name exists, pipeline settings will not be edited.
    This check can be disabled by adding the --allow-duplicate-names option.

    Note that if an ID is specified in both the settings and passed with the --pipeline-id argument,
    the two ids must be the same, or the command will fail.

    Usage:

    databricks pipelines edit example.json

    OR

    databricks pipelines edit --settings example.json
    """
    # pylint: enable=line-too-long
    if bool(settings_arg) == bool(settings):
        raise ValueError(
            'Settings should be provided either as an argument ' +
            '(databricks pipelines edit example.json) or as ' +
            'an option (databricks pipelines edit --settings example.json).')

    src = settings_arg if bool(settings_arg) else settings
    settings_obj = _read_settings(src)
    settings_dir = os.path.dirname(src)

    if (pipeline_id
            and 'id' in settings_obj) and pipeline_id != settings_obj["id"]:
        raise ValueError(
            "The ID provided in --pipeline_id '{}' is different from the ID provided "
            "in the settings '{}'. Resolve the conflict and try the command again. "
            .format(pipeline_id, settings_obj["id"]))

    settings_obj['id'] = pipeline_id or settings_obj.get('id', None)
    _validate_pipeline_id(settings_obj['id'])

    try:
        PipelinesApi(api_client).edit(settings_obj, settings_dir,
                                      allow_duplicate_names)
    except requests.exceptions.HTTPError as e:
        _handle_duplicate_name_exception(settings_obj,
                                         e,
                                         is_create_pipeline=False)
    click.echo("Successfully edited pipeline settings: {}.".format(
        _get_pipeline_url(api_client, settings_obj['id'])))
Exemple #5
0
def get_cli(api_client, pipeline_id):
    """
    Gets a delta pipeline's current spec and status.

    Usage:

    databricks pipelines get --pipeline-id 1234
    """
    _validate_pipeline_id(pipeline_id)
    click.echo(pretty_format(PipelinesApi(api_client).get(pipeline_id)))
Exemple #6
0
def start_cli(api_client, pipeline_id, full_refresh):
    """
    Starts a pipeline update.

    Usage:

    databricks pipelines start --pipeline-id 1234 --full-refresh=true
    """
    _validate_pipeline_id(pipeline_id)
    resp = PipelinesApi(api_client).start_update(pipeline_id, full_refresh=full_refresh)
    click.echo(_gen_start_update_msg(resp, pipeline_id, full_refresh))
Exemple #7
0
def delete_cli(api_client, pipeline_id):
    """
    Deletes the pipeline and cancels any active updates.

    Usage:

    databricks pipelines delete --pipeline-id 1234
    """
    _validate_pipeline_id(pipeline_id)
    PipelinesApi(api_client).delete(pipeline_id)
    click.echo("Pipeline {} deleted".format(pipeline_id))
Exemple #8
0
def run_cli(api_client, pipeline_id):
    """
    Starts the execution of a delta pipelines run by starting the cluster and processing data.

    Usage:

    databricks pipelines run --pipeline-id 1234
    """
    _validate_pipeline_id(pipeline_id)
    PipelinesApi(api_client).run(pipeline_id)
    click.echo("Run triggered for pipeline {}".format(pipeline_id))
Exemple #9
0
def stop_cli(api_client, pipeline_id):
    """
    Stops the pipeline by cancelling any active update.

    Usage:

    databricks pipelines stop --pipeline-id 1234
    """
    _validate_pipeline_id(pipeline_id)
    PipelinesApi(api_client).stop(pipeline_id)
    click.echo("Stopped pipeline {}.".format(pipeline_id))
Exemple #10
0
def stop_cli(api_client, pipeline_id):
    """
    Stops the execution of a delta pipelines run by terminating the cluster. Processing of data can
    be resumed by calling `run`.

    Usage:

    databricks pipelines stop --pipeline-id 1234
    """
    _validate_pipeline_id(pipeline_id)
    PipelinesApi(api_client).stop(pipeline_id)
    click.echo("Stopped pipeline {}".format(pipeline_id))
Exemple #11
0
def reset_cli(api_client, pipeline_id):
    """
    Resets a delta pipeline by truncating tables and creating new checkpoint folders so data is
    reprocessed from scratch.

    Usage:

    databricks pipelines reset --pipeline-id 1234
    """
    _validate_pipeline_id(pipeline_id)
    PipelinesApi(api_client).reset(pipeline_id)
    click.echo("Reset triggered for pipeline {}".format(pipeline_id))
Exemple #12
0
def delete_cli(api_client, pipeline_id):
    """
    Stops a delta pipeline and deletes its associated Databricks resources. The pipeline can be
    resumed by deploying it again.

    Usage:

    databricks pipelines delete --pipeline-id 1234
    """
    _validate_pipeline_id(pipeline_id)
    PipelinesApi(api_client).delete(pipeline_id)
    click.echo("Pipeline {} deleted".format(pipeline_id))
Exemple #13
0
def create_cli(api_client, settings_arg, settings, allow_duplicate_names):
    # pylint: disable=line-too-long
    """
    Creates a pipeline specified by the pipeline settings. The pipeline settings are a
    JSON document that defines a Delta Live Tables pipeline on Databricks.

    To use a file containing the pipeline settings, pass the file path to the command as
    an argument or with the --settings option. If the pipeline creation is successful, logs
    the URL and the ID of the new pipeline to STDOUT.

    Specification for the pipeline settings JSON can be found at
    https://docs.databricks.com/data-engineering/delta-live-tables/delta-live-tables-configuration.html

    If a pipeline with the same name already exists, the pipeline will not be created.
    This check can be disabled by adding the --allow-duplicate-names option.

    If the pipeline settings contain an "id" field, this command will fail.

    Usage:

    databricks pipelines create example.json

    OR

    databricks pipelines create --settings example.json
    """
    # pylint: enable=line-too-long
    if bool(settings_arg) == bool(settings):
        raise ValueError(
            'Settings should be provided either as an argument ' +
            '(databricks pipelines create example.json) or as ' +
            'an option (databricks pipelines create --settings example.json).')

    src = settings_arg if bool(settings_arg) else settings
    settings_obj = _read_settings(src)
    settings_dir = os.path.dirname(src)

    if 'id' in settings_obj:
        raise ValueError("Pipeline settings shouldn't contain \"id\" "
                         "when creating a pipeline.")

    try:
        response = PipelinesApi(api_client).create(settings_obj, settings_dir,
                                                   allow_duplicate_names)
    except requests.exceptions.HTTPError as e:
        _handle_duplicate_name_exception(settings_obj,
                                         e,
                                         is_create_pipeline=True)

    new_pipeline_id = response['pipeline_id']
    click.echo("Successfully created pipeline: {} with ID: {}.".format(
        _get_pipeline_url(api_client, new_pipeline_id), new_pipeline_id))
Exemple #14
0
def run_cli(api_client, pipeline_id):
    """
    [Deprecated] Use the "start" command instead.

    Starts a pipeline update.

    Usage:

    databricks pipelines run --pipeline-id 1234
    """
    click.echo("Deprecation warning: the \"run\" command is deprecated." +
               " Use the \"start\" command instead.")
    _validate_pipeline_id(pipeline_id)
    resp = PipelinesApi(api_client).start_update(pipeline_id, full_refresh=False)
    click.echo(_gen_start_update_msg(resp, pipeline_id, False))
Exemple #15
0
def reset_cli(api_client, pipeline_id):
    """
    [Deprecated] Use the "start --full-refresh" command instead.

    Resets a pipeline by truncating tables and creating new checkpoint folders so that data is
    reprocessed from the beginning.

    Usage:

    databricks pipelines reset --pipeline-id 1234
    """
    click.echo("DeprecationWarning: the \"reset\" command is deprecated, " +
               "use the \"start --full-refresh\" command instead.")
    _validate_pipeline_id(pipeline_id)
    resp = PipelinesApi(api_client).start_update(pipeline_id, full_refresh=True)
    click.echo(_gen_start_update_msg(resp, pipeline_id, True))
Exemple #16
0
def get_cli(api_client, spec_arg, spec, pipeline_id):
    """
    Gets a delta pipeline's current spec and status.

    Usage:

    databricks pipelines get example.json

    OR

    databricks pipelines get --spec example.json

    OR

    databricks pipelines get --pipeline-id 1234
    """
    pipeline_id = _get_pipeline_id(spec_arg=spec_arg,
                                   spec=spec,
                                   pipeline_id=pipeline_id)
    click.echo(pretty_format(PipelinesApi(api_client).get(pipeline_id)))
Exemple #17
0
def delete_cli(api_client, spec_arg, spec, pipeline_id):
    """
    Stops a delta pipeline and deletes its associated Databricks resources. The pipeline can be
    resumed by deploying it again.

    Usage:

    databricks pipelines delete example.json

    OR

    databricks pipelines delete --spec example.json

    OR

    databricks pipelines delete --pipeline-id 1234
    """
    pipeline_id = _get_pipeline_id(spec_arg=spec_arg,
                                   spec=spec,
                                   pipeline_id=pipeline_id)
    PipelinesApi(api_client).delete(pipeline_id)
    click.echo("Pipeline {} deleted".format(pipeline_id))
Exemple #18
0
def reset_cli(api_client, spec_arg, spec, pipeline_id):
    """
    Resets a delta pipeline by truncating tables and creating new checkpoint folders so data is
    reprocessed from scratch.

    Usage:

    databricks pipelines reset example.json

    OR

    databricks pipelines reset --spec example.json

    OR

    databricks pipelines reset --pipeline-id 1234
    """
    pipeline_id = _get_pipeline_id(spec_arg=spec_arg,
                                   spec=spec,
                                   pipeline_id=pipeline_id)
    PipelinesApi(api_client).reset(pipeline_id)
    click.echo("Reset triggered for pipeline {}".format(pipeline_id))
Exemple #19
0
def deploy_cli(api_client, spec_arg, spec, allow_duplicate_names, pipeline_id):
    """
    Deploys a delta pipeline according to the pipeline specification. The pipeline spec is a
    specification that explains how to run a Delta Pipeline on Databricks. All local libraries
    referenced in the spec are uploaded to DBFS.

    If the pipeline spec contains an "id" field, or if a pipeline id is specified directly
    (using the  --pipeline-id argument), attempts to update an existing pipeline
    with that ID. If it does not, creates a new pipeline and logs the id of the new pipeline
    to STDOUT. Note that if an id is both specified in the spec and passed via --pipeline-id,
    the two ids must be the same, or the command will fail.

    The deploy command will not create a new pipeline if a pipeline with the same name already
    exists. This check can be disabled by adding the --allow-duplicate-names option.

    Usage:

    databricks pipelines deploy example.json

    OR

    databricks pipelines deploy --spec example.json

    OR

    databricks pipelines deploy --pipeline-id 1234 --spec example.json
    """
    if bool(spec_arg) == bool(spec):
        raise ValueError(
            'The spec should be provided either by an option or argument')
    src = spec_arg if bool(spec_arg) else spec
    spec_obj = _read_spec(src)
    if not pipeline_id and 'id' not in spec_obj:
        try:
            response = PipelinesApi(api_client).create(spec_obj,
                                                       allow_duplicate_names)
        except requests.exceptions.HTTPError as e:
            _handle_duplicate_name_exception(spec_obj, e)

        new_pipeline_id = response['pipeline_id']
        click.echo("Pipeline has been assigned ID {}".format(new_pipeline_id))
        click.echo("Successfully created pipeline: {}".format(
            _get_pipeline_url(api_client, new_pipeline_id)))
        click.echo(new_pipeline_id, err=True)
    else:
        if (pipeline_id
                and 'id' in spec_obj) and pipeline_id != spec_obj["id"]:
            raise ValueError(
                "The ID provided in --pipeline_id '{}' is different from the id provided "
                "in the spec '{}'. Please resolve the conflict and try the command again. "
                "Because pipeline IDs are no longer persisted after being deleted, we "
                "recommend removing the ID field from your spec.".format(
                    pipeline_id, spec_obj["id"]))

        spec_obj['id'] = pipeline_id or spec_obj.get('id', None)
        _validate_pipeline_id(spec_obj['id'])

        try:
            PipelinesApi(api_client).deploy(spec_obj, allow_duplicate_names)
        except requests.exceptions.HTTPError as e:
            _handle_duplicate_name_exception(spec_obj, e)
        click.echo("Successfully deployed pipeline: {}".format(
            _get_pipeline_url(api_client, spec_obj['id'])))
Exemple #20
0
def list_cli(api_client):
    click.echo(pretty_format(PipelinesApi(api_client).list()))
Exemple #21
0
def deploy_cli(api_client, settings_arg, settings, spec, allow_duplicate_names,
               pipeline_id):
    # pylint: disable=line-too-long
    """
    [Deprecated] This command is deprecated, use create and edit commands instead.

    Creates or edits a pipeline specified by the pipeline settings. The pipeline settings
    are a JSON document that defines a Delta Live Tables pipeline on Databricks. To use a
    file containing the pipeline settings, pass the file path to the command as an
    argument or with the --settings option.

    Specification for the pipeline settings JSON can be found at
    https://docs.databricks.com/data-engineering/delta-live-tables/delta-live-tables-configuration.html

    If the pipeline settings contains an "id" field, or if a pipeline ID is specified directly
    (using the  --pipeline-id argument), attempts to update an existing pipeline
    with that ID. If it does not, creates a new pipeline and logs the URL and the ID of the
    new pipeline to STDOUT. Note that if an ID is specified in both the settings and passed
    with the --pipeline-id argument, the two IDs must be the same, or the command will fail.

    The deploy command will not create a new pipeline if a pipeline with the same name already
    exists. This check can be disabled by adding the --allow-duplicate-names option.

    Usage:

    databricks pipelines deploy example.json

    OR

    databricks pipelines deploy --settings example.json

    OR

    databricks pipelines deploy --pipeline-id 1234 --settings example.json
    """
    # pylint: enable=line-too-long
    click.echo(
        "DeprecationWarning: the \"deploy\" command is deprecated, " +
        "use \"create\" command to create a new pipeline or \"edit\" command "
        + "to modify an existing pipeline.\n")

    settings_error_msg = 'Settings should be provided either as an argument ' \
                         '(databricks pipelines deploy example.json) or as ' \
                         'an option (databricks pipelines deploy --settings example.json).'
    if bool(spec):
        if bool(spec) == bool(settings):
            raise ValueError(settings_error_msg)
        settings = spec

    if bool(settings_arg) == bool(settings):
        raise ValueError(settings_error_msg)

    src = settings_arg if bool(settings_arg) else settings
    settings_obj = _read_settings(src)
    settings_dir = os.path.dirname(src)
    if not pipeline_id and 'id' not in settings_obj:
        try:
            response = PipelinesApi(api_client).create(settings_obj,
                                                       settings_dir,
                                                       allow_duplicate_names)
        except requests.exceptions.HTTPError as e:
            _handle_duplicate_name_exception(settings_obj,
                                             e,
                                             is_create_pipeline=True)

        new_pipeline_id = response['pipeline_id']
        click.echo("Successfully created pipeline: {} with ID: {}".format(
            _get_pipeline_url(api_client, new_pipeline_id), new_pipeline_id))
    else:
        if (pipeline_id and 'id'
                in settings_obj) and pipeline_id != settings_obj["id"]:
            raise ValueError(
                "The ID provided in --pipeline_id '{}' is different from the ID provided "
                "in the settings '{}'. Resolve the conflict and try the command again."
                .format(pipeline_id, settings_obj["id"]))

        settings_obj['id'] = pipeline_id or settings_obj.get('id', None)
        _validate_pipeline_id(settings_obj['id'])
        try:
            PipelinesApi(api_client).edit(settings_obj, settings_dir,
                                          allow_duplicate_names)
        except requests.exceptions.HTTPError as e:
            _handle_duplicate_name_exception(settings_obj,
                                             e,
                                             is_create_pipeline=False)
        click.echo("Successfully deployed pipeline: {}.".format(
            _get_pipeline_url(api_client, settings_obj['id'])))