Ejemplos de JobsApi en Python, ejemplos de databricks_cli.jobs.api.JobsApi en Python

Ejemplo n.º 1

0

Mostrar archivo

 def __init__(self, env: ApplicationVersion, config: dict):
     super().__init__(env, config)
     self.vault_name, self.vault_client = KeyVaultClient.vault_and_client(
         self.config, self.env)
     self.databricks_client = Databricks(
         self.vault_name, self.vault_client).api_client(self.config)
     self.jobs_api = JobsApi(self.databricks_client)
     self.runs_api = RunsApi(self.databricks_client)

Ejemplo n.º 2

0

Mostrar archivo

Archivo: deploy.py Proyecto: jspreddy/dbx

def _create_job(api_client: ApiClient, job: Dict[str, Any]) -> str:
    dbx_echo(f'Creating a new job with name {job["name"]}')
    try:
        jobs_api = JobsApi(api_client)
        job_id = jobs_api.create_job(job)["job_id"]
    except HTTPError as e:
        dbx_echo("Failed to create job with definition:")
        dbx_echo(json.dumps(job, indent=4))
        raise e
    return job_id

Ejemplo n.º 3

0

Mostrar archivo

Archivo: test_api.py Proyecto: snosrap/databricks-cli

def test_run_now():
    with mock.patch('databricks_cli.sdk.ApiClient') as api_client_mock:
        api = JobsApi(api_client_mock)
        api.run_now('1', ['bla'], None, None, None, None)
        api_client_mock.perform_query.assert_called_with('POST',
                                                         '/jobs/run-now',
                                                         data={
                                                             'job_id': '1',
                                                             'jar_params':
                                                             ['bla']
                                                         },
                                                         headers=None,
                                                         version=None)

        api.run_now('1', None, None, None, None, None, 'idempotent-token')
        api_client_mock.perform_query.assert_called_with(
            'POST',
            '/jobs/run-now',
            data={
                'job_id': '1',
                'idempotency_token': 'idempotent-token'
            },
            headers=None,
            version=None)

        api.run_now('1', ['bla'], None, None, None, None, version='3.0')
        api_client_mock.perform_query.assert_called_with('POST',
                                                         '/jobs/run-now',
                                                         data={
                                                             'job_id': '1',
                                                             'jar_params':
                                                             ['bla']
                                                         },
                                                         headers=None,
                                                         version='3.0')

Ejemplo n.º 4

0

Mostrar archivo

Archivo: cli.py Proyecto: snosrap/databricks-cli

def get_cli(api_client, job_id, version):
    """
    Describes the metadata for a job.
    """
    check_version(api_client, version)
    click.echo(pretty_format(
        JobsApi(api_client).get_job(job_id, version=version)))

Ejemplo n.º 5

0

Mostrar archivo

Archivo: cli.py Proyecto: Men0x/aobd_project

def create_cli(api_client, json_file, json):
    """
    Creates a job.

    The specification for the json option can be found
    https://docs.databricks.com/api/latest/jobs.html#create
    """
    json_cli_base(json_file, json, lambda json: JobsApi(api_client).create_job(json))

Ejemplo n.º 6

0

Mostrar archivo

Archivo: cli.py Proyecto: amineds/databricks-terraformer

def export_cli(dry_run, tag, delete, git_ssh_url, api_client: ApiClient, hcl, pattern_matches):
    block_key_map = {
        "new_cluster": handle_block,
        "notebook_task": handle_block,
        "aws_attributes": handle_block,
        "spark_env_vars": handle_block,
        "autoscale": handle_block,
        "spark_submit_task": handle_block,
        "libraries": handle_libraries,
        "email_notifications": handle_map,
        "custom_tags": handle_map
    }
    ignore_attribute_key = {
        "created_time", "creator_user_name", "job_id"
    }
    required_attributes_key = {
        "max_concurrent_runs", "name"
    }

    if hcl:
        job_api = JobsApi(api_client)

        jobs = job_api.list_jobs()["jobs"]
        log.info(jobs)

        with GitExportHandler(git_ssh_url, "jobs", delete_not_found=delete, dry_run=dry_run, tag=tag) as gh:
            for job in jobs:
                if not pattern_matches(job["settings"]["name"]):
                    log.debug(f"{job['settings']['name']} did not match pattern function {pattern_matches}")
                    continue
                log.debug(f"{job['settings']['name']} matched the pattern function {pattern_matches}")
                job_resource_data = prep_json(block_key_map, ignore_attribute_key, job['settings'], required_attributes_key)

                base_name = normalize_identifier(job['settings']['name'])
                name = "databricks_job"
                identifier = f"databricks_job-{base_name}"

                #need to escape quotes in the name.
                job_resource_data['name'] = job_resource_data['name'].replace('"','\\"')

                instance_job_hcl = create_resource_from_dict(name, identifier, job_resource_data, False)
                file_name_identifier = f"{identifier}.tf"
                gh.add_file(file_name_identifier, instance_job_hcl)
                log.debug(instance_job_hcl)

Ejemplo n.º 7

0

Mostrar archivo

Archivo: cli.py Proyecto: Men0x/aobd_project

def list_cli(api_client, output):
    """
    Lists the jobs in the Databricks Job Service.

    By default the output format will be a human readable table with the following fields

      - Job ID

      - Job name

    A JSON formatted output can also be requested by setting the --output parameter to "JSON"

    In table mode, the jobs are sorted by their name.
    """
    jobs_api = JobsApi(api_client)
    jobs_json = jobs_api.list_jobs()
    if OutputClickType.is_json(output):
        click.echo(pretty_format(jobs_json))
    else:
        click.echo(tabulate(_jobs_to_table(jobs_json), tablefmt='plain', disable_numparse=True))

Ejemplo n.º 8

0

Mostrar archivo

Archivo: cli.py Proyecto: snosrap/databricks-cli

def list_cli(api_client, output, job_type, version, expand_tasks, offset, limit, _all):
    """
    Lists the jobs in the Databricks Job Service.

    By default the output format will be a human readable table with the following fields

      - Job ID

      - Job name

    A JSON formatted output can also be requested by setting the --output parameter to "JSON"

    In table mode, the jobs are sorted by their name.
    """
    check_version(api_client, version)
    api_version = version or api_client.jobs_api_version
    if api_version != '2.1' and (expand_tasks or offset or limit or _all):
        click.echo(click.style('ERROR', fg='red') + ': the options --expand-tasks, ' +
                   '--offset, --limit, and --all are only available in API 2.1', err=True)
        return
    jobs_api = JobsApi(api_client)
    has_more = True
    jobs = []
    if _all:
        offset = 0
        limit = 20
    while has_more:
        jobs_json = jobs_api.list_jobs(job_type=job_type, expand_tasks=expand_tasks,
                                       offset=offset, limit=limit, version=version)
        jobs += jobs_json['jobs'] if 'jobs' in jobs_json else []
        has_more = jobs_json.get('has_more', False) and _all
        if has_more:
            offset = offset + \
                (len(jobs_json['jobs']) if 'jobs' in jobs_json else 20)

    out = {'jobs': jobs}
    if OutputClickType.is_json(output):
        click.echo(pretty_format(out))
    else:
        click.echo(tabulate(_jobs_to_table(out),
                   tablefmt='plain', disable_numparse=True))

Ejemplo n.º 9

0

Mostrar archivo

class SdkClient():
    def __init__(self, profile=None):
        client = utils.get_api_client(profile)
        self.cluster_client = ClusterApi(client)
        self.jobs_client = JobsApi(client)

    def list_clusters(self):
        return self.cluster_client.list_clusters()

    def get_cluster(self, cluster_id):
        return self.cluster_client.get_cluster(cluster_id)

    def list_jobs(self):
        return self.jobs_client.list_jobs()

Ejemplo n.º 10

0

Mostrar archivo

Archivo: cli.py Proyecto: Men0x/aobd_project

def run_now_cli(api_client, job_id, jar_params, notebook_params, python_params,
                spark_submit_params):
    """
    Runs a job with optional per-run parameters.

    Parameter options are specified in json and the format is documented in
    https://docs.databricks.com/api/latest/jobs.html#jobsrunnow.
    """
    jar_params_json = json_loads(jar_params) if jar_params else None
    notebook_params_json = json_loads(notebook_params) if notebook_params else None
    python_params = json_loads(python_params) if python_params else None
    spark_submit_params = json_loads(spark_submit_params) if spark_submit_params else None
    res = JobsApi(api_client).run_now(
        job_id, jar_params_json, notebook_params_json, python_params, spark_submit_params)
    click.echo(pretty_format(res))

Ejemplo n.º 11

0

Mostrar archivo

Archivo: cli.py Proyecto: yeniherdiyeni/databricks-cli

def reset_cli(api_client, json_file, json, job_id):
    """
    Resets (edits) the definition of a job.

    The specification for the json option can be found
    https://docs.databricks.com/api/latest/jobs.html#jobsjobsettings
    """
    if not bool(json_file) ^ bool(json):
        raise RuntimeError('Either --json-file or --json should be provided')
    if json_file:
        with open(json_file, 'r') as f:
            json = f.read()
    deser_json = json_loads(json)
    request_body = {'job_id': job_id, 'new_settings': deser_json}
    JobsApi(api_client).reset_job(request_body)

Ejemplo n.º 12

0

Mostrar archivo

Archivo: cli.py Proyecto: sacheendra/databricks-cli

def reset_cli(api_client, json_file, json, job_id):
    """
    Resets (edits) the definition of a job.

    The specification for the json option can be found
    https://docs.databricks.com/api/latest/jobs.html#jobsjobsettings

    NOTE. The json parameter described above is not the same as what is normally POSTed
    in the request body to the reset endpoint. Instead it is the object
    defined in the top level "new_settings" field. The job ID is provided
    by the --job-id option.
    """
    if not bool(json_file) ^ bool(json):
        raise RuntimeError('Either --json-file or --json should be provided')
    if json_file:
        with open(json_file, 'r') as f:
            json = f.read()
    deser_json = json_loads(json)
    request_body = {'job_id': job_id, 'new_settings': deser_json}
    JobsApi(api_client).reset_job(request_body)

Ejemplo n.º 13

0

Mostrar archivo

Archivo: test_api.py Proyecto: snosrap/databricks-cli

def test_delete_job():
    with mock.patch('databricks_cli.sdk.ApiClient') as api_client_mock:
        api = JobsApi(api_client_mock)
        api.delete_job('1')
        api_client_mock.perform_query.assert_called_with('POST',
                                                         '/jobs/delete',
                                                         data={'job_id': '1'},
                                                         headers=None,
                                                         version=None)
        api.delete_job('1', version='3.0')
        api_client_mock.perform_query.assert_called_with('POST',
                                                         '/jobs/delete',
                                                         data={'job_id': '1'},
                                                         headers=None,
                                                         version='3.0')

Ejemplo n.º 14

0

Mostrar archivo

Archivo: test_api.py Proyecto: snosrap/databricks-cli

def test_list_jobs():
    with mock.patch('databricks_cli.sdk.ApiClient') as api_client_mock:
        api = JobsApi(api_client_mock)
        api.list_jobs()
        api_client_mock.perform_query.assert_called_with('GET',
                                                         '/jobs/list',
                                                         data={},
                                                         headers=None,
                                                         version=None)

        api.list_jobs(version='3.0')
        api_client_mock.perform_query.assert_called_with('GET',
                                                         '/jobs/list',
                                                         data={},
                                                         headers=None,
                                                         version='3.0')

Ejemplo n.º 15

0

Mostrar archivo

Archivo: test_api.py Proyecto: snosrap/databricks-cli

def test_reset_job():
    with mock.patch('databricks_cli.sdk.ApiClient') as api_client_mock:
        api = JobsApi(api_client_mock)
        api.reset_job({'job_id': '1', 'name': 'new_name'})
        api_client_mock.perform_query.assert_called_with('POST',
                                                         '/jobs/reset',
                                                         data={
                                                             'job_id': '1',
                                                             'name': 'new_name'
                                                         },
                                                         headers=None,
                                                         version=None)

        api.reset_job({'job_id': '1', 'name': 'new_name'}, version='3.0')
        api_client_mock.perform_query.assert_called_with('POST',
                                                         '/jobs/reset',
                                                         data={
                                                             'job_id': '1',
                                                             'name': 'new_name'
                                                         },
                                                         headers=None,
                                                         version='3.0')

Ejemplo n.º 16

0

Mostrar archivo

class StackApi(object):
    def __init__(self, api_client):
        self.jobs_client = JobsApi(api_client)
        self.workspace_client = WorkspaceApi(api_client)
        self.dbfs_client = DbfsApi(api_client)

    def deploy(self, stack_config, stack_status=None, headers=None, **kwargs):
        """
        Deploys a stack given stack JSON configuration template at path config_path.

        After going through each of the resources and deploying them, stores status JSON
        of deployment with deploy status of each resource deployment.
        For each resource deployment, stack_status is used to get the associated resource status
        of a resource from the last deployment.

        :param stack_config: Must have the fields of
        'name', the name of the stack and 'resources', a list of stack resources.
        :param stack_status: Must have the fields of 'name', the name of the stack, 'resources',
        a list of stack resources, and 'deployed', a list of resource statuses from a previous
        deployment.
        :return: new_stack_status: The new stack status generated from the deployment of
        the given stack_config.
        """
        click.echo('#' * 80)
        self._validate_config(stack_config)
        if stack_status:
            click.echo('#' * 80)
            self._validate_status(stack_status)
            resource_id_to_status = self._get_resource_to_status_map(
                stack_status)
        else:
            resource_id_to_status = {}

        stack_name = stack_config.get(STACK_NAME)
        click.echo('#' * 80)
        click.echo('Deploying stack {}'.format(stack_name))

        # List of statuses, One for each resource in stack_config[STACK_RESOURCES]
        resource_statuses = []
        click.echo('#' * 80)
        for resource_config in stack_config.get(STACK_RESOURCES):
            # Retrieve resource deployment info from the last deployment.
            resource_map_key = (resource_config.get(RESOURCE_ID),
                                resource_config.get(RESOURCE_SERVICE))
            resource_status = resource_id_to_status.get(resource_map_key) \
                if resource_map_key in resource_id_to_status else None
            # Deploy resource, get resource_status
            new_resource_status = self._deploy_resource(resource_config,
                                                        resource_status,
                                                        headers=headers,
                                                        **kwargs)

            if resource_config.get(RESOURCE_WRITE_STATUS, True):
                resource_statuses.append(new_resource_status)
            click.echo('#' * 80)

        new_stack_status = {
            STACK_NAME: stack_name,
            CLI_VERSION_KEY: CLI_VERSION,
            STACK_DEPLOYED: resource_statuses
        }

        # Validate that the status has been created correctly
        self._validate_status(new_stack_status)
        click.echo('#' * 80)

        return new_stack_status

    def download(self, stack_config, headers=None, **kwargs):
        """
        Downloads a stack given a dict of the stack configuration.
        :param stack_config: dict of stack configuration. Must contain 'name' and 'resources' field.
        :return: None.
        """
        self._validate_config(stack_config)
        stack_name = stack_config.get(STACK_NAME)
        click.echo('Downloading stack {}'.format(stack_name))

        click.echo('#' * 80)
        for resource_config in stack_config.get(STACK_RESOURCES):
            # Deploy resource, get resource_status
            self._download_resource(resource_config, headers=headers, **kwargs)
            click.echo('#' * 80)

    def _deploy_resource(self,
                         resource_config,
                         resource_status=None,
                         headers=None,
                         **kwargs):
        """
        Deploys a resource given a resource information extracted from the stack JSON configuration
        template.

        :param resource_config: A dict of the resource with fields of 'id', 'service'
        and 'properties'.
        ex. {'id': 'example-resource', 'service': 'jobs', 'properties': {...}}
        :param resource_status: A dict of the resource's deployment info from the last
        deployment. Will be None if this is the first deployment.
        ex. {'id': 'example-resource', 'service': 'jobs', 'databricks_id': {...}}
        :return: dict resource_status- A dictionary of deployment information of the
        resource to be stored at deploy time. It includes the resource id of the resource along
        with the databricks id and deploy output of the resource.
        ex. {'id': 'example-resource', 'service': 'jobs', 'databricks_id': {'job_id': 123}}
        """
        resource_id = resource_config.get(RESOURCE_ID)
        resource_service = resource_config.get(RESOURCE_SERVICE)
        resource_properties = resource_config.get(RESOURCE_PROPERTIES)
        databricks_id = resource_status.get(
            RESOURCE_DATABRICKS_ID) if resource_status else None

        if resource_service == JOBS_SERVICE:
            click.echo('Deploying job "{}" with properties: \n{}'.format(
                resource_id,
                json.dumps(resource_properties,
                           indent=2,
                           separators=(',', ': '))))
            new_databricks_id = self._deploy_job(resource_properties,
                                                 databricks_id,
                                                 headers=headers)
        elif resource_service == WORKSPACE_SERVICE:
            click.echo(
                'Deploying workspace asset "{}" with properties \n{}'.format(
                    resource_id,
                    json.dumps(resource_properties,
                               indent=2,
                               separators=(',', ': '))))
            overwrite = kwargs.get('overwrite', False)
            new_databricks_id = self._deploy_workspace(resource_properties,
                                                       databricks_id,
                                                       overwrite,
                                                       headers=headers)
        elif resource_service == DBFS_SERVICE:
            click.echo('Deploying DBFS asset "{}" with properties \n{}'.format(
                resource_id,
                json.dumps(resource_properties,
                           indent=2,
                           separators=(',', ': '))))
            overwrite = kwargs.get('overwrite', False)
            new_databricks_id = self._deploy_dbfs(resource_properties,
                                                  databricks_id,
                                                  overwrite,
                                                  headers=headers)
        else:
            raise StackError(
                'Resource service "{}" not supported'.format(resource_service))

        new_resource_status = {
            RESOURCE_ID: resource_id,
            RESOURCE_SERVICE: resource_service,
            RESOURCE_DATABRICKS_ID: new_databricks_id
        }
        return new_resource_status

    def _download_resource(self, resource_config, headers=None, **kwargs):
        """
        Downloads a resource given a resource information extracted from the stack JSON
        configuration template.

        :param resource_config: A dict of the resource with fields of 'id', 'service' and
        'properties'.
        ex. {'id': 'example-resource', 'service': 'jobs', 'properties': {...}}
        """
        resource_id = resource_config.get(RESOURCE_ID)
        resource_service = resource_config.get(RESOURCE_SERVICE)
        resource_properties = resource_config.get(RESOURCE_PROPERTIES)

        if resource_service == WORKSPACE_SERVICE:
            click.echo(
                'Downloading workspace asset "{}" with properties \n{}'.format(
                    resource_id,
                    json.dumps(resource_properties,
                               indent=2,
                               separators=(',', ': '))))
            overwrite = kwargs.get('overwrite', False)
            self._download_workspace(resource_properties,
                                     overwrite,
                                     headers=headers)
        else:
            click.echo('Resource service "{}" not supported for download. '
                       'skipping.'.format(resource_service))

    def _deploy_job(self,
                    resource_properties,
                    databricks_id=None,
                    headers=None):
        """
        Deploys a job resource by either creating a job if the job isn't kept track of through
        the databricks_id of the job or updating an existing job. The job is created or updated
        using the the settings specified in the inputted job_settings.

        :param resource_properties: A dict of the Databricks JobSettings data structure
        :param databricks_id: A dict object containing 'job_id' field of job identifier in
        Databricks server

        :return: databricks_id: dict containing  a 'job_id' field of the physical job_id of the
        job on databricks.
        """
        job_settings = resource_properties  # resource_properties of jobs are solely job settings.

        if databricks_id:
            job_id = databricks_id.get(JOBS_RESOURCE_JOB_ID)
            self._update_job(job_settings, job_id, headers=headers)
        else:
            job_id = self._put_job(job_settings, headers=headers)
        click.echo("Job deployed on Databricks with Job ID {}".format(job_id))
        databricks_id = {JOBS_RESOURCE_JOB_ID: job_id}
        return databricks_id

    def _put_job(self, job_settings, headers=None):
        """
        Given settings of the job in job_settings, create a new job. For purposes of idempotency
        and to reduce leaked resources in alpha versions of stack deployment, if a job exists
        with the same name, that job will be updated. If multiple jobs are found with the same name,
        the deployment will abort.

        :param job_settings:
        :return: job_id, Physical ID of job on Databricks server.
        """
        job_name = job_settings.get(JOBS_RESOURCE_NAME)
        jobs_same_name = self.jobs_client._list_jobs_by_name(job_name,
                                                             headers=headers)
        if len(jobs_same_name) > 1:
            raise StackError(
                'Multiple jobs with the same name "{}" already exist, aborting'
                ' stack deployment'.format(job_name))
        if len(jobs_same_name) == 1:
            existing_job = jobs_same_name[0]
            creator_name = existing_job.get('creator_user_name')
            timestamp = existing_job.get(
                'created_time') / MS_SEC  # Convert to readable date.
            date_created = datetime.fromtimestamp(timestamp).strftime(
                '%Y-%m-%d %H:%M:%S')
            click.echo(
                'Warning: Job exists with same name "{}" created by {} on {}. Job will '
                'be overwritten'.format(job_name, creator_name, date_created))
            # Calling jobs_client.reset_job directly so as to not call same level function.
            self.jobs_client.reset_job(
                {
                    'job_id': existing_job.get('job_id'),
                    'new_settings': job_settings
                },
                headers=headers)
            return existing_job.get('job_id')
        else:
            job_id = self.jobs_client.create_job(job_settings,
                                                 headers=headers).get('job_id')
            return job_id

    def _update_job(self, job_settings, job_id, headers=None):
        """
        Given job settings and an existing job_id of a job, update the job settings on databricks.

        :param job_settings: job settings to update the job with.
        :param job_id: physical job_id of job in databricks server.
        """
        try:
            self.jobs_client.reset_job(
                {
                    'job_id': job_id,
                    'new_settings': job_settings
                },
                headers=headers)
        except HTTPError:
            raise StackError(
                'Job ID {} in stack status could not be found in the workspace. '
                'Please remove or make necessary changes to the current stack status '
                'to resolve this inconsistency before proceeding. Aborting '
                'stack deployment ...'.format(job_id))

    def _deploy_workspace(self,
                          resource_properties,
                          databricks_id,
                          overwrite,
                          headers=None):
        """
        Deploy workspace asset.

        :param resource_properties: dict of properties for the workspace asset. Must contain the
        'source_path', 'path' and 'object_type' fields.
        :param databricks_id: dict containing physical identifier of workspace asset on databricks.
        Should contain the field 'path'.
        :param overwrite: Whether or not to overwrite the contents of workspace notebooks.
        :return:  databricks_id: dict containing the physical ID
        for the stack status that contains the workspace path of the notebook or directory on
        datbricks. deploy_output is the initial information about the asset on databricks at deploy
        time returned by the REST API.
        """
        local_path = resource_properties.get(WORKSPACE_RESOURCE_SOURCE_PATH)
        workspace_path = resource_properties.get(WORKSPACE_RESOURCE_PATH)
        object_type = resource_properties.get(WORKSPACE_RESOURCE_OBJECT_TYPE)

        actual_object_type = DIRECTORY if os.path.isdir(
            local_path) else NOTEBOOK
        if object_type != actual_object_type:
            raise StackError('Field "{}" ({}) not consistent '
                             'with actual object type ({})'.format(
                                 WORKSPACE_RESOURCE_OBJECT_TYPE, object_type,
                                 actual_object_type))

        click.echo('Uploading {} from {} to Databricks workspace at {}'.format(
            object_type, local_path, workspace_path))
        if object_type == NOTEBOOK:
            # Inference of notebook language and format
            language_fmt = WorkspaceLanguage.to_language_and_format(local_path)
            if language_fmt is None:
                raise StackError(
                    "Workspace notebook language and format cannot be inferred. "
                    "Please check file extension of notebook file.")
            language, fmt = language_fmt
            # Create needed directories in workspace.
            self.workspace_client.mkdirs(os.path.dirname(workspace_path),
                                         headers=headers)
            self.workspace_client.import_workspace(local_path,
                                                   workspace_path,
                                                   language,
                                                   fmt,
                                                   overwrite,
                                                   headers=headers)
        elif object_type == DIRECTORY:
            self.workspace_client.import_workspace_dir(
                local_path,
                workspace_path,
                overwrite,
                exclude_hidden_files=True,
                headers=headers)
        else:
            # Shouldn't reach here because of verification of object_type above.
            assert False

        if databricks_id and databricks_id[
                WORKSPACE_RESOURCE_PATH] != workspace_path:
            # databricks_id['path'] is the workspace path from the last deployment. Alert when
            # changed
            click.echo("Workspace asset had path changed from {} to {}".format(
                databricks_id[WORKSPACE_RESOURCE_PATH], workspace_path))
        new_databricks_id = {WORKSPACE_RESOURCE_PATH: workspace_path}

        return new_databricks_id

    def _download_workspace(self,
                            resource_properties,
                            overwrite,
                            headers=None):
        """
        Download workspace asset.

        :param resource_properties: dict of properties for the workspace asset. Must contain the
        'source_path', 'path' and 'object_type' fields.
        :param overwrite: Whether or not to overwrite the contents of workspace notebooks.
        """
        local_path = resource_properties.get(WORKSPACE_RESOURCE_SOURCE_PATH)
        workspace_path = resource_properties.get(WORKSPACE_RESOURCE_PATH)
        object_type = resource_properties.get(WORKSPACE_RESOURCE_OBJECT_TYPE)
        click.echo('Downloading {} from Databricks path {} to {}'.format(
            object_type, workspace_path, local_path))
        if object_type == NOTEBOOK:
            # Inference of notebook language and format. A tuple of (language, fmt) or Nonetype.
            language_fmt = WorkspaceLanguage.to_language_and_format(local_path)
            if language_fmt is None:
                raise StackError(
                    "Workspace Notebook language and format cannot be inferred. "
                    "Please check file extension of notebook 'source_path'.")
            (_, fmt) = language_fmt
            local_dir = os.path.dirname(os.path.abspath(local_path))
            if not os.path.exists(local_dir):
                os.makedirs(local_dir)
            self.workspace_client.export_workspace(workspace_path,
                                                   local_path,
                                                   fmt,
                                                   overwrite,
                                                   headers=headers)
        elif object_type == DIRECTORY:
            self.workspace_client.export_workspace_dir(workspace_path,
                                                       local_path,
                                                       overwrite,
                                                       headers=headers)
        else:
            raise StackError('Invalid value for "{}" field: {}'.format(
                WORKSPACE_RESOURCE_OBJECT_TYPE, object_type))

    def _deploy_dbfs(self,
                     resource_properties,
                     databricks_id,
                     overwrite,
                     headers=None):
        """
        Deploy dbfs asset.

        :param resource_properties: dict of properties for the dbfs asset. Must contain the
        'source_path', 'path' and 'is_dir' fields.
        :param databricks_id: dict containing physical identifier of dbfs asset on Databricks.
        Should contain the field 'path'.
        :param overwrite: Whether or not to overwrite the contents of dbfs files.
        :return: databricks_id:  a dict that contains the dbfs path of the file on Databricks.
        ex.{"path":"dbfs:/path/in/dbfs"}
        """

        local_path = resource_properties.get(DBFS_RESOURCE_SOURCE_PATH)
        dbfs_path = resource_properties.get(DBFS_RESOURCE_PATH)
        is_dir = resource_properties.get(DBFS_RESOURCE_IS_DIR)

        if is_dir != os.path.isdir(local_path):
            dir_or_file = 'directory' if os.path.isdir(local_path) else 'file'
            raise StackError(
                'local source_path "{}" is found to be a {}, but is not specified'
                ' as one with is_dir: {}.'.format(local_path, dir_or_file,
                                                  str(is_dir).lower()))
        if is_dir:
            click.echo('Uploading directory from {} to DBFS at {}'.format(
                local_path, dbfs_path))
            self.dbfs_client.cp(recursive=True,
                                overwrite=overwrite,
                                src=local_path,
                                dst=dbfs_path,
                                headers=headers)
        else:
            click.echo('Uploading file from {} to DBFS at {}'.format(
                local_path, dbfs_path))
            self.dbfs_client.cp(recursive=False,
                                overwrite=overwrite,
                                src=local_path,
                                dst=dbfs_path,
                                headers=headers)

        if databricks_id and databricks_id[DBFS_RESOURCE_PATH] != dbfs_path:
            # databricks_id['path'] is the dbfs path from the last deployment. Alert when changed
            click.echo("Dbfs asset had path changed from {} to {}".format(
                databricks_id[DBFS_RESOURCE_PATH], dbfs_path))
        new_databricks_id = {DBFS_RESOURCE_PATH: dbfs_path}

        return new_databricks_id

    def _validate_config(self, stack_config):
        """
        Validate fields within a stack configuration. This ensures that an inputted configuration
        has the necessary fields for stack deployment to function well.

        :param stack_config: dict- stack config that is inputted by the user.
        :return: None. Raises errors to stop deployment if there is a problem.
        """
        click.echo('Validating fields in stack configuration...')
        self._assert_fields_in_dict([STACK_NAME, STACK_RESOURCES],
                                    stack_config)

        seen_resource_ids = set(
        )  # Store seen resources to restrict duplicates.
        for resource in stack_config.get(STACK_RESOURCES):
            # Get validate resource ID exists, then get it.
            self._assert_fields_in_dict([RESOURCE_ID], resource)
            resource_id = resource.get(RESOURCE_ID)

            click.echo('Validating fields in resource with ID "{}"'.format(
                resource_id))
            self._assert_fields_in_dict(
                [RESOURCE_SERVICE, RESOURCE_PROPERTIES], resource)

            resource_service = resource.get(RESOURCE_SERVICE)
            resource_properties = resource.get(RESOURCE_PROPERTIES)

            # Error on duplicate resource ID's
            if resource_id in seen_resource_ids:
                raise StackError(
                    'Duplicate resource ID "{}" found, please resolve.'.format(
                        resource_id))
            seen_resource_ids.add(resource_id)

            # Resource service-specific validations
            click.echo('Validating fields in "{}" of {} resource.'.format(
                RESOURCE_PROPERTIES, resource_service))
            if resource_service == JOBS_SERVICE:
                self._assert_fields_in_dict([JOBS_RESOURCE_NAME],
                                            resource_properties)
            elif resource_service == WORKSPACE_SERVICE:
                self._assert_fields_in_dict([
                    WORKSPACE_RESOURCE_PATH, WORKSPACE_RESOURCE_SOURCE_PATH,
                    WORKSPACE_RESOURCE_OBJECT_TYPE
                ], resource_properties)
            elif resource_service == DBFS_SERVICE:
                self._assert_fields_in_dict([
                    DBFS_RESOURCE_PATH, DBFS_RESOURCE_SOURCE_PATH,
                    DBFS_RESOURCE_IS_DIR
                ], resource_properties)
            else:
                raise StackError('Resource service "{}" not supported'.format(
                    resource_service))

    def _validate_status(self, stack_status):
        """
        Validate fields within a stack status. This ensures that a stack status has the
        necessary fields for stack deployment to function well.

        If there is an error here, then it is either an implementation error that must be fixed by
        a developer or the User edited the stack status file created by the program.

        :param stack_status: dict- stack status that is created by the program.
        :return: None. Raises errors to stop deployment if there is a problem.
        """
        click.echo('Validating fields in stack status...')
        self._assert_fields_in_dict([STACK_NAME, STACK_DEPLOYED], stack_status)

        for resource_status in stack_status.get(STACK_DEPLOYED):
            self._assert_fields_in_dict([RESOURCE_ID], resource_status)
            resource_id = resource_status.get(RESOURCE_ID)
            click.echo(
                'Validating fields in resource status of resource with ID "{}"'
                .format(resource_id))
            self._assert_fields_in_dict(
                [RESOURCE_SERVICE, RESOURCE_DATABRICKS_ID], resource_status)

            resource_service = resource_status.get(RESOURCE_SERVICE)
            resource_databricks_id = resource_status.get(
                RESOURCE_DATABRICKS_ID)

            click.echo(
                'Validating fields in "{}" of {} resource status'.format(
                    RESOURCE_DATABRICKS_ID, resource_service))
            if resource_service == JOBS_SERVICE:
                self._assert_fields_in_dict([JOBS_RESOURCE_JOB_ID],
                                            resource_databricks_id)
            elif resource_service == WORKSPACE_SERVICE:
                self._assert_fields_in_dict([WORKSPACE_RESOURCE_PATH],
                                            resource_databricks_id)
            elif resource_service == DBFS_SERVICE:
                self._assert_fields_in_dict([DBFS_RESOURCE_PATH],
                                            resource_databricks_id)
            else:
                raise StackError(
                    "{} not a valid resource status service".format(
                        resource_service))

    def _assert_fields_in_dict(self, fields, dictionary):
        for field in fields:
            if field not in dictionary:
                raise StackError('Required field "{}" not found'.format(field))

    def _get_resource_to_status_map(self, stack_status):
        """
        Returns a dictionary that maps a resource's (id, service) to the resource's status
        from the last deployment

        The key for this dictionary is the resource's (id, service) so that we don't load
        persisted resources with the wrong resource service.
        """
        return {(resource_status.get(RESOURCE_ID),
                 resource_status.get(RESOURCE_SERVICE)): resource_status
                for resource_status in stack_status.get(STACK_DEPLOYED)}

Ejemplo n.º 17

0

Mostrar archivo

Archivo: cli.py Proyecto: yeniherdiyeni/databricks-cli

def get_cli(api_client, job_id):
    """
    Describes the metadata for a job.
    """
    click.echo(pretty_format(JobsApi(api_client).get_job(job_id)))

Ejemplo n.º 18

0

Mostrar archivo

Archivo: cli.py Proyecto: yeniherdiyeni/databricks-cli

def delete_cli(api_client, job_id):
    """
    Deletes the specified job.
    """
    JobsApi(api_client).delete_job(job_id)

Ejemplo n.º 19

0

Mostrar archivo

Archivo: cli.py Proyecto: snosrap/databricks-cli

def delete_cli(api_client, job_id, version):
    """
    Deletes the specified job.
    """
    check_version(api_client, version)
    JobsApi(api_client).delete_job(job_id, version=version)

Ejemplo n.º 20

0

Mostrar archivo

Archivo: test_api.py Proyecto: snosrap/databricks-cli

def jobs_api():
    with mock.patch('databricks_cli.sdk.JobsService') as jobs_service_mock:
        jobs_service_mock.return_value = mock.MagicMock()
        jobs_api_mock = JobsApi(None)
        yield jobs_api_mock

Ejemplo n.º 21

0

Mostrar archivo

 def __init__(self, profile=None):
     client = utils.get_api_client(profile)
     self.cluster_client = ClusterApi(client)
     self.jobs_client = JobsApi(client)

Ejemplo n.º 22

0

Mostrar archivo

class DeployToDatabricks(Step):
    def __init__(self, env: ApplicationVersion, config: dict):
        super().__init__(env, config)
        self.vault_name, self.vault_client = KeyVaultClient.vault_and_client(self.config, self.env)
        self.databricks_client = Databricks(self.vault_name, self.vault_client).api_client(self.config)
        self.jobs_api = JobsApi(self.databricks_client)
        self.runs_api = RunsApi(self.databricks_client)

    def schema(self) -> vol.Schema:
        return SCHEMA

    def run(self):
        self.deploy_to_databricks()

    @staticmethod
    def _job_is_streaming(job_config: dict):
        """
        If there is no schedule, the job would not run periodically, therefore we assume that is a
        streaming job
        :param job_config: the configuration of the Databricks job
        :return: (bool) if it is a streaming job
        """
        return "schedule" not in job_config.keys()

    def deploy_to_databricks(self):
        """
        The application parameters (cosmos and eventhub) will be removed from this file as they
        will be set as databricks secrets eventually
        If the job is a streaming job this will directly start the new job_run given the new
        configuration. If the job is batch this will not start it manually, assuming the schedule
        has been set correctly.
        """
        for job in self.config["jobs"]:
            app_name = self._construct_name(job["name"])
            job_name = f"{app_name}-{self.env.artifact_tag}"
            job_config = self.create_config(job_name, job)
            is_streaming = self._job_is_streaming(job_config)

            logger.info("Removing old job")
            self.remove_job(self.env.artifact_tag, is_streaming=is_streaming)

            logger.info("Submitting new job with configuration:")
            logger.info(pprint.pformat(job_config))
            self._submit_job(job_config, is_streaming)

    def create_config(self, job_name: str, job_config: dict):
        common_arguments = dict(
            config_file=job_config["config_file"],
            application_name=job_name,
            log_destination=job_name,
            parameters=self._construct_arguments(job_config["arguments"]),
            schedule=self._get_schedule(job_config),
            environment=self.env.environment_formatted,
        )

        root_library_folder = self.config["common"]["databricks_fs_libraries_mount_path"]
        storage_base_path = f"{root_library_folder}/{self.application_name}"
        artifact_path = f"{storage_base_path}/{self.application_name}-{self.env.artifact_tag}"

        if job_config["lang"] == "python":
            wheel_name = get_whl_name(self.application_name, self.env.artifact_tag, ".whl")
            py_main_name = get_main_py_name(self.application_name, self.env.artifact_tag, ".py")
            run_config = DeployToDatabricks._construct_job_config(
                **common_arguments,
                whl_file=f"{root_library_folder}/{wheel_name}",
                python_file=f"{root_library_folder}/{py_main_name}",
            )
        else:  # java/scala jobs
            run_config = DeployToDatabricks._construct_job_config(
                **common_arguments, class_name=job_config["main_name"], jar_file=f"{artifact_path}.jar"
            )
        return run_config

    def _get_schedule(self, job_config: dict) -> Optional[dict]:
        schedule = job_config.get("schedule", None)
        if schedule:
            if "quartz_cron_expression" in schedule:
                return schedule
            else:
                return schedule.get(self.env.environment.lower(), None)

        return schedule

    def _construct_name(self, name: str) -> str:
        postfix = f"-{name}" if name else ""
        return f"{self.application_name}{postfix}"

    @staticmethod
    def _construct_arguments(args: List[dict]) -> list:
        params = []
        for named_arguments_pair in args:
            for k, v in named_arguments_pair.items():
                params.extend([f"--{k}", v])

        return params

    @staticmethod
    def _construct_job_config(config_file: str, **kwargs) -> dict:
        return util.render_file_with_jinja(config_file, kwargs, json.loads)

    def remove_job(self, branch: str, is_streaming: bool):
        """
        Removes the existing job and cancels any running job_run if the application is streaming.
        If the application is batch, it'll let the batch job finish but it will remove the job,
        making sure no other job_runs can start for that old job.
        """

        job_configs = [
            JobConfig(_["settings"]["name"], _["job_id"]) for _ in self.jobs_api.list_jobs()["jobs"]
        ]
        job_ids = self._application_job_id(self.application_name, branch, job_configs)

        if not job_ids:
            logger.info(f"Could not find jobs in list of {pprint.pformat(job_configs)}")

        for job_id in job_ids:
            logger.info(f"Found Job with ID {job_id}")
            if is_streaming:
                self._kill_it_with_fire(job_id)
            logger.info(f"Deleting Job with ID {job_id}")
            self.jobs_api.delete_job(job_id)

    @staticmethod
    def _application_job_id(application_name: str, branch: str, jobs: List[JobConfig]) -> List[int]:
        snapshot = "SNAPSHOT"
        tag = "\d+\.\d+\.\d+"
        pattern = re.compile(rf"^({application_name})-({snapshot}|{tag}|{branch})$")

        return [_.job_id for _ in jobs if has_prefix_match(_.name, application_name, pattern)]

    def _kill_it_with_fire(self, job_id):
        logger.info(f"Finding runs for job_id {job_id}")
        runs = self.runs_api.list_runs(job_id, active_only=True, completed_only=None, offset=None, limit=None)
        # If the runs is empty, there are no jobs at all
        # TODO: Check if the has_more flag is true, this means we need to go over the pages
        if "runs" in runs:
            active_run_ids = [_["run_id"] for _ in runs["runs"]]
            logger.info(f"Canceling active runs {active_run_ids}")
            [self.runs_api.cancel_run(_) for _ in active_run_ids]

    def _submit_job(self, job_config: dict, is_streaming: bool):
        job_resp = self.jobs_api.create_job(job_config)
        logger.info(f"Created Job with ID {job_resp['job_id']}")

        if is_streaming:
            resp = self.jobs_api.run_now(
                job_id=job_resp["job_id"],
                jar_params=None,
                notebook_params=None,
                python_params=None,
                spark_submit_params=None,
            )
            logger.info(f"Created run with ID {resp['run_id']}")

Ejemplo n.º 23

0

Mostrar archivo

class StackApi(object):
    def __init__(self, api_client):
        self.jobs_client = JobsApi(api_client)

    def deploy(self, config_path):  # overwrite to be added
        """
        Deploys a stack given stack JSON configuration template at path config_path.

        Loads the JSON template as well as status JSON if stack has been deployed before.
        Changes working directory to the same directory as where the config file is, then
        calls on deploy_config to do the stack deployment. Finally stores the new status
        file from the deployment.

        The working directory is changed to that where the JSON template is contained
        so that paths within the stack configuration are relative to the directory of the
        JSON template instead of the directory where this function is called.

        :param config_path: Path to stack JSON configuration template. Must have the fields of
        'name', the name of the stack and 'resources', a list of stack resources.
        :return: None.
        """
        stack_config = self._load_json(config_path)
        status_path = self._generate_stack_status_path(config_path)
        stack_status = self._load_json(status_path)
        config_dir = os.path.dirname(os.path.abspath(config_path))
        cli_dir = os.getcwd()
        os.chdir(config_dir)  # Switch current working directory to where json config is stored
        new_stack_status = self.deploy_config(stack_config, stack_status)
        os.chdir(cli_dir)
        click.echo("Saving stack status to {}".format(status_path))
        self._save_json(status_path, new_stack_status)

    def deploy_config(self, stack_config, stack_status=None):
        """
        Deploys a stack given stack JSON configuration template at path config_path.

        After going through each of the resources and deploying them, stores status JSON
        of deployment with deploy status of each resource deployment.
        For each resource deployment, stack_status is used to get the associated resource status
        of a resource from the last deployment.

        :param stack_config: Must have the fields of
        'name', the name of the stack and 'resources', a list of stack resources.
        :param stack_status: Must have the fields of
        :return:
        """
        self._validate_config(stack_config)
        if stack_status:
            self._validate_status(stack_status)
            resource_id_to_status = self._get_resource_to_status_map(stack_status)
        else:
            resource_id_to_status = {}

        stack_name = stack_config.get(STACK_NAME)
        click.echo('Deploying stack {}'.format(stack_name))

        # List of statuses, One for each resource in stack_config[STACK_RESOURCES]
        resource_statuses = []
        click.echo('#' * 80)
        for resource_config in stack_config.get(STACK_RESOURCES):
            # Retrieve resource deployment info from the last deployment.
            resource_map_key = (resource_config.get(RESOURCE_ID),
                                resource_config.get(RESOURCE_SERVICE))
            resource_status = resource_id_to_status.get(resource_map_key) \
                if resource_map_key in resource_id_to_status else None
            # Deploy resource, get resource_status
            new_resource_status = self._deploy_resource(resource_config, resource_status)
            resource_statuses.append(new_resource_status)
            click.echo('#' * 80)

        # stack deploy status is original config with deployed resource statuses added
        new_stack_status = copy.deepcopy(stack_config)
        new_stack_status.update({STACK_DEPLOYED: resource_statuses})
        new_stack_status.update({CLI_VERSION_KEY: CLI_VERSION})

        # Validate that the status has been created correctly
        self._validate_status(new_stack_status)

        return new_stack_status

    def _deploy_resource(self, resource_config, resource_status=None):  # overwrite to be added
        """
        Deploys a resource given a resource information extracted from the stack JSON configuration
        template.

        :param resource_config: A dict of the resource with fields of RESOURCE_ID, RESOURCE_SERVICE
        and RESOURCE_PROPERTIES.
        ex. {'id': 'example-resource', 'service': 'jobs', 'properties': {...}}
        :param resource_status: A dict of the resource's deployment info from the last
        deployment. Will be None if this is the first deployment.
        ex. {'id': 'example-resource', 'service': 'jobs', 'physical_id': {...}}
        :return: dict resource_status- A dictionary of deployment information of the
        resource to be stored at deploy time. It includes the resource id of the resource along
        with the physical id and deploy output of the resource.
        ex. {'id': 'example-resource', 'service': 'jobs', 'physical_id': {'job_id': 123},
        'timestamp': 123456789, 'deploy_output': {..}}
        """
        resource_id = resource_config.get(RESOURCE_ID)
        resource_service = resource_config.get(RESOURCE_SERVICE)
        resource_properties = resource_config.get(RESOURCE_PROPERTIES)
        physical_id = resource_status.get(RESOURCE_PHYSICAL_ID) if resource_status else None

        if resource_service == JOBS_SERVICE:
            click.echo("Deploying job '{}' with properties: \n{}".format(resource_id, json.dumps(
                resource_properties, indent=2, separators=(',', ': '))))
            new_physical_id, deploy_output = self._deploy_job(resource_properties,
                                                              physical_id)
        else:
            raise StackError("Resource service '{}' not supported".format(resource_service))

        new_resource_status = {RESOURCE_ID: resource_id,
                               RESOURCE_SERVICE: resource_service,
                               RESOURCE_DEPLOY_TIMESTAMP:
                                   # Milliseconds since epoch.
                                   int(time.mktime(datetime.now().timetuple()) * MS_SEC),
                               RESOURCE_PHYSICAL_ID: new_physical_id,
                               RESOURCE_DEPLOY_OUTPUT: deploy_output}
        return new_resource_status

    def _deploy_job(self, resource_properties, physical_id=None):
        """
        Deploys a job resource by either creating a job if the job isn't kept track of through
        the physical_id of the job or updating an existing job. The job is created or updated using
        the the settings specified in the inputted job_settings.

        :param resource_properties: A dict of the Databricks JobSettings data structure
        :param physical_id: A dict object containing 'job_id' field of job identifier in Databricks
        server

        :return: tuple of (physical_id, deploy_output), where physical_id contains a 'job_id' field
        of the physical job_id of the job on databricks. deploy_output is the output of the job
        from databricks when a GET request is called for it.
        """
        job_settings = resource_properties  # resource_properties of jobs are solely job settings.

        if physical_id:
            job_id = physical_id.get('job_id')
            self._update_job(job_settings, job_id)
        else:
            job_id = self._put_job(job_settings)
        click.echo("Job deployed on Databricks with Job ID {}".format(job_id))
        physical_id = {'job_id': job_id}
        deploy_output = self.jobs_client.get_job(job_id)
        return physical_id, deploy_output

    def _put_job(self, job_settings):
        """
        Given settings of the job in job_settings, create a new job. For purposes of idempotency
        and to reduce leaked resources in alpha versions of stack deployment, if a job exists
        with the same name, that job will be updated. If multiple jobs are found with the same name,
        the deployment will abort.

        :param job_settings:
        :return: job_id, Physical ID of job on Databricks server.
        """
        if 'name' not in job_settings:
            raise StackError("Please supply 'name' in job resource 'properties'")
        job_name = job_settings.get('name')
        jobs_same_name = self.jobs_client._list_jobs_by_name(job_name)
        if len(jobs_same_name) > 1:
            raise StackError("Multiple jobs with the same name '{}' already exist, aborting"
                             " stack deployment".format(job_name))
        elif len(jobs_same_name) == 1:
            existing_job = jobs_same_name[0]
            creator_name = existing_job.get('creator_user_name')
            timestamp = existing_job.get('created_time') / MS_SEC  # Convert to readable date.
            date_created = datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S')
            click.echo("Warning: Job exists with same name '{}' created by {} on {}. Job will "
                       "be overwritten".format(job_name, creator_name, date_created))
            # Calling jobs_client.reset_job directly so as to not call same level function.
            self.jobs_client.reset_job({'job_id': existing_job.get('job_id'),
                                        'new_settings': job_settings})
            return existing_job.get('job_id')
        else:
            job_id = self.jobs_client.create_job(job_settings).get('job_id')
            return job_id

    def _update_job(self, job_settings, job_id):
        """
        Given job settings and an existing job_id of a job, update the job settings on databricks.

        :param job_settings: job settings to update the job with.
        :param job_id: physical job_id of job in databricks server.
        """
        self.jobs_client.reset_job({'job_id': job_id, 'new_settings': job_settings})

    def _validate_config(self, stack_config):
        """
        Validate fields within a stack configuration. This ensures that an inputted configuration
        has the necessary fields for stack deployment to function well.

        :param stack_config: dict- stack config that is inputted by the user.
        :return: None. Raises errors to stop deployment if there is a problem.
        """
        if STACK_NAME not in stack_config:
            raise StackError("'{}' not in configuration".format(STACK_NAME))
        if STACK_RESOURCES not in stack_config:
            raise StackError("'{}' not in configuration".format(STACK_RESOURCES))
        seen_resource_ids = set()  # Store seen resources to restrict duplicates.
        for resource in stack_config.get(STACK_RESOURCES):
            if RESOURCE_ID not in resource:
                raise StackError("{} doesn't exist in resource config".format(RESOURCE_ID))
            if RESOURCE_SERVICE not in resource:
                raise StackError("{} doesn't exist in resource config".format(RESOURCE_SERVICE))
            if RESOURCE_PROPERTIES not in resource:
                raise StackError("{} doesn't exist in resource config".format(RESOURCE_PROPERTIES))
            # Error on duplicate resource ID's
            resource_id = resource.get(RESOURCE_ID)
            if resource_id in seen_resource_ids:
                raise StackError("Duplicate resource ID '{}' found, please resolve.".format(
                    resource_id))
            seen_resource_ids.add(resource_id)

    def _validate_status(self, stack_status):
        """
        Validate fields within a stack status. This ensures that a stack status has the
        necessary fields for stack deployment to function well.

        If there is an error here, then it is either an implementation error that must be fixed by
        a developer or the User edited the stack status file created by the program.

        :param stack_status: dict- stack status that is created by the program.
        :return: None. Raises errors to stop deployment if there is a problem.
        """
        if STACK_NAME not in stack_status:
            raise StackError("'{}' not in status.".format(STACK_NAME))
        if STACK_RESOURCES not in stack_status:
            raise StackError("'{}' not in status".format(STACK_RESOURCES))
        if STACK_DEPLOYED not in stack_status:
            raise StackError("'{}' not in status".format(STACK_DEPLOYED))
        for deployed_resource in stack_status.get(STACK_DEPLOYED):
            if RESOURCE_ID not in deployed_resource:
                raise StackError("{} doesn't exist in deployed resource status".format(
                    RESOURCE_ID))
            if RESOURCE_SERVICE not in deployed_resource:
                raise StackError("{} doesn't exist in deployed resource status".format(
                    RESOURCE_SERVICE))
            if RESOURCE_PHYSICAL_ID not in deployed_resource:
                raise StackError("{} doesn't exist in deployed resource status".format(
                    RESOURCE_PHYSICAL_ID))

    def _get_resource_to_status_map(self, stack_status):
        """
        Returns a dictionary that maps a resource's (id, service) to the resource's status
        from the last deployment

        The key for this dictionary is the resource's (id, service) so that we don't load
        persisted resources with the wrong resource service.
        """
        return {
            (resource_status.get(RESOURCE_ID), resource_status.get(RESOURCE_SERVICE)):
                resource_status
            for resource_status in stack_status.get(STACK_DEPLOYED)
        }

    def _generate_stack_status_path(self, stack_path):
        """
        Given a path to the stack configuration template JSON file, generates a path to where the
        deployment status JSON will be stored after successful deployment of the stack.

        :param stack_path: Path to the stack config template JSON file
        :return: The path to the stack status file.

        >>> self._generate_stack_status_path('./stack.json')
        './stack.deployed.json'
        """
        stack_status_insert = 'deployed'
        stack_path_split = stack_path.split('.')
        stack_path_split.insert(-1, stack_status_insert)
        return '.'.join(stack_path_split)

    def _load_json(self, path):
        """
        Parse a json file to a readable dict format.
        Returns an empty dictionary if the path doesn't exist.

        :param path: File path of the JSON stack configuration template.
        :return: dict of parsed JSON stack config template.
        """
        stack_conf = {}
        if os.path.exists(path):
            with open(path, 'r') as f:
                stack_conf = json.load(f)
        return stack_conf

    def _save_json(self, path, data):
        """
        Writes data to a JSON file.

        :param path: Path of JSON file.
        :param data: dict- data that wants to by written to JSON file
        :return: None
        """
        with open(path, 'w') as f:
            json.dump(data, f, indent=2, sort_keys=True)

Ejemplo n.º 24

0

Mostrar archivo

 def __init__(self, api_client):
     self.jobs_client = JobsApi(api_client)
     self.workspace_client = WorkspaceApi(api_client)
     self.dbfs_client = DbfsApi(api_client)

Ejemplo n.º 25

0

Mostrar archivo

Archivo: api.py Proyecto: srinathshankar/databricks-cli

class StackApi(object):
    def __init__(self, api_client):
        self.jobs_client = JobsApi(api_client)
        self.workspace_client = WorkspaceApi(api_client)
        self.dbfs_client = DbfsApi(api_client)

    def deploy(self, config_path, **kwargs):
        """
        Deploys a stack given stack JSON configuration template at path config_path.

        Loads the JSON template as well as status JSON if stack has been deployed before.

        The working directory is changed to that where the JSON template is contained
        so that paths within the stack configuration are relative to the directory of the
        JSON template instead of the directory where this function is called.

        :param config_path: Path to stack JSON configuration template. Must have the fields of
        'name', the name of the stack and 'resources', a list of stack resources.
        :return: None.
        """
        stack_config = self._load_json(config_path)
        status_path = self._generate_stack_status_path(config_path)
        stack_status = self._load_json(status_path)
        config_dir = os.path.dirname(os.path.abspath(config_path))
        cli_dir = os.getcwd()
        os.chdir(config_dir)  # Switch current working directory to where json config is stored
        new_stack_status = self.deploy_config(stack_config, stack_status, **kwargs)
        os.chdir(cli_dir)
        click.echo("Saving stack status to {}".format(status_path))
        self._save_json(status_path, new_stack_status)

    def download(self, config_path, **kwargs):
        """
        Downloads a stack given stack JSON configuration template at path config_path.

        The working directory is changed to that where the JSON template is contained
        so that paths within the stack configuration are relative to the directory of the
        JSON template instead of the directory where this function is called.

        :param config_path: Path to stack JSON configuration template. Must have the fields of
        'name', the name of the stack and 'resources', a list of stack resources.
        :return: None.
        """
        stack_config = self._load_json(config_path)
        config_dir = os.path.dirname(os.path.abspath(config_path))
        cli_dir = os.getcwd()
        os.chdir(config_dir)  # Switch current working directory to where json config is stored
        self.download_from_config(stack_config, **kwargs)
        os.chdir(cli_dir)

    def deploy_config(self, stack_config, stack_status=None, **kwargs):
        """
        Deploys a stack given stack JSON configuration template at path config_path.

        After going through each of the resources and deploying them, stores status JSON
        of deployment with deploy status of each resource deployment.
        For each resource deployment, stack_status is used to get the associated resource status
        of a resource from the last deployment.

        :param stack_config: Must have the fields of
        'name', the name of the stack and 'resources', a list of stack resources.
        :param stack_status: Must have the fields of
        :return:
        """
        click.echo('#' * 80)
        self._validate_config(stack_config)
        if stack_status:
            click.echo('#' * 80)
            self._validate_status(stack_status)
            resource_id_to_status = self._get_resource_to_status_map(stack_status)
        else:
            resource_id_to_status = {}

        stack_name = stack_config.get(STACK_NAME)
        click.echo('#' * 80)
        click.echo('Deploying stack {}'.format(stack_name))

        # List of statuses, One for each resource in stack_config[STACK_RESOURCES]
        resource_statuses = []
        click.echo('#' * 80)
        for resource_config in stack_config.get(STACK_RESOURCES):
            # Retrieve resource deployment info from the last deployment.
            resource_map_key = (resource_config.get(RESOURCE_ID),
                                resource_config.get(RESOURCE_SERVICE))
            resource_status = resource_id_to_status.get(resource_map_key) \
                if resource_map_key in resource_id_to_status else None
            # Deploy resource, get resource_status
            new_resource_status = self._deploy_resource(resource_config, resource_status, **kwargs)
            resource_statuses.append(new_resource_status)
            click.echo('#' * 80)
        # stack deploy status is original config with deployed resource statuses added
        new_stack_status = copy.deepcopy(stack_config)
        new_stack_status.update({STACK_DEPLOYED: resource_statuses})
        new_stack_status.update({CLI_VERSION_KEY: CLI_VERSION})

        # Validate that the status has been created correctly
        self._validate_status(new_stack_status)
        click.echo('#' * 80)

        return new_stack_status

    def download_from_config(self, stack_config, **kwargs):
        """
        Downloads a stack given a dict of the stack configuration.
        :param stack_config: dict of stack configuration. Must contain 'name' and 'resources' field.
        :return: None.
        """
        self._validate_config(stack_config)
        stack_name = stack_config.get(STACK_NAME)
        click.echo('Downloading stack {}'.format(stack_name))

        click.echo('#' * 80)
        for resource_config in stack_config.get(STACK_RESOURCES):
            # Deploy resource, get resource_status
            self._download_resource(resource_config, **kwargs)
            click.echo('#' * 80)

    def _deploy_resource(self, resource_config, resource_status=None, **kwargs):
        """
        Deploys a resource given a resource information extracted from the stack JSON configuration
        template.

        :param resource_config: A dict of the resource with fields of 'id', 'service'
        and 'properties'.
        ex. {'id': 'example-resource', 'service': 'jobs', 'properties': {...}}
        :param resource_status: A dict of the resource's deployment info from the last
        deployment. Will be None if this is the first deployment.
        ex. {'id': 'example-resource', 'service': 'jobs', 'physical_id': {...}}
        :return: dict resource_status- A dictionary of deployment information of the
        resource to be stored at deploy time. It includes the resource id of the resource along
        with the physical id and deploy output of the resource.
        ex. {'id': 'example-resource', 'service': 'jobs', 'physical_id': {'job_id': 123},
        'timestamp': 123456789, 'deploy_output': {..}}
        """
        resource_id = resource_config.get(RESOURCE_ID)
        resource_service = resource_config.get(RESOURCE_SERVICE)
        resource_properties = resource_config.get(RESOURCE_PROPERTIES)
        physical_id = resource_status.get(RESOURCE_PHYSICAL_ID) if resource_status else None

        if resource_service == JOBS_SERVICE:
            click.echo("Deploying job '{}' with properties: \n{}".format(resource_id, json.dumps(
                resource_properties, indent=2, separators=(',', ': '))))
            new_physical_id, deploy_output = self._deploy_job(resource_properties,
                                                              physical_id)
        elif resource_service == WORKSPACE_SERVICE:
            click.echo(
                "Deploying workspace asset '{}' with properties \n{}"
                .format(
                    resource_id, json.dumps(resource_properties, indent=2, separators=(',', ': '))
                )
            )
            overwrite = kwargs.get('overwrite', False)
            new_physical_id, deploy_output = self._deploy_workspace(resource_properties,
                                                                    physical_id,
                                                                    overwrite)
        elif resource_service == DBFS_SERVICE:
            click.echo(
                "Deploying DBFS asset '{}' with properties \n{}".format(
                    resource_id, json.dumps(resource_properties, indent=2, separators=(',', ': '))
                )
            )
            overwrite = kwargs.get('overwrite', False)
            new_physical_id, deploy_output = self._deploy_dbfs(resource_properties,
                                                               physical_id,
                                                               overwrite)
        else:
            raise StackError("Resource service '{}' not supported".format(resource_service))

        new_resource_status = {RESOURCE_ID: resource_id,
                               RESOURCE_SERVICE: resource_service,
                               RESOURCE_DEPLOY_TIMESTAMP:
                                   # Milliseconds since epoch.
                                   int(time.mktime(datetime.now().timetuple()) * MS_SEC),
                               RESOURCE_PHYSICAL_ID: new_physical_id,
                               RESOURCE_DEPLOY_OUTPUT: deploy_output}
        return new_resource_status

    def _download_resource(self, resource_config, **kwargs):
        """
        Downloads a resource given a resource information extracted from the stack JSON
        configuration template.

        :param resource_config: A dict of the resource with fields of 'id', 'service' and
        'properties'.
        ex. {'id': 'example-resource', 'service': 'jobs', 'properties': {...}}
        """
        resource_id = resource_config.get(RESOURCE_ID)
        resource_service = resource_config.get(RESOURCE_SERVICE)
        resource_properties = resource_config.get(RESOURCE_PROPERTIES)

        if resource_service == WORKSPACE_SERVICE:
            click.echo(
                "Downloading workspace asset '{}' with properties \n{}"
                .format(
                    resource_id, json.dumps(resource_properties, indent=2, separators=(',', ': '))
                )
            )
            overwrite = kwargs.get('overwrite', False)
            self._download_workspace(resource_properties, overwrite)
        else:
            click.echo("Resource service '{}' not supported for download. "
                       "skipping.".format(resource_service))

    def _deploy_job(self, resource_properties, physical_id=None):
        """
        Deploys a job resource by either creating a job if the job isn't kept track of through
        the physical_id of the job or updating an existing job. The job is created or updated using
        the the settings specified in the inputted job_settings.

        :param resource_properties: A dict of the Databricks JobSettings data structure
        :param physical_id: A dict object containing 'job_id' field of job identifier in Databricks
        server

        :return: tuple of (physical_id, deploy_output), where physical_id contains a 'job_id' field
        of the physical job_id of the job on databricks. deploy_output is the output of the job
        from databricks when a GET request is called for it.
        """
        job_settings = resource_properties  # resource_properties of jobs are solely job settings.

        if physical_id:
            job_id = physical_id.get(JOBS_RESOURCE_JOB_ID)
            self._update_job(job_settings, job_id)
        else:
            job_id = self._put_job(job_settings)
        click.echo("Job deployed on Databricks with Job ID {}".format(job_id))
        physical_id = {JOBS_RESOURCE_JOB_ID: job_id}
        deploy_output = self.jobs_client.get_job(job_id)
        return physical_id, deploy_output

    def _put_job(self, job_settings):
        """
        Given settings of the job in job_settings, create a new job. For purposes of idempotency
        and to reduce leaked resources in alpha versions of stack deployment, if a job exists
        with the same name, that job will be updated. If multiple jobs are found with the same name,
        the deployment will abort.

        :param job_settings:
        :return: job_id, Physical ID of job on Databricks server.
        """
        job_name = job_settings.get(JOBS_RESOURCE_NAME)
        jobs_same_name = self.jobs_client._list_jobs_by_name(job_name)
        if len(jobs_same_name) > 1:
            raise StackError("Multiple jobs with the same name '{}' already exist, aborting"
                             " stack deployment".format(job_name))
        elif len(jobs_same_name) == 1:
            existing_job = jobs_same_name[0]
            creator_name = existing_job.get('creator_user_name')
            timestamp = existing_job.get('created_time') / MS_SEC  # Convert to readable date.
            date_created = datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S')
            click.echo("Warning: Job exists with same name '{}' created by {} on {}. Job will "
                       "be overwritten".format(job_name, creator_name, date_created))
            # Calling jobs_client.reset_job directly so as to not call same level function.
            self.jobs_client.reset_job({'job_id': existing_job.get('job_id'),
                                        'new_settings': job_settings})
            return existing_job.get('job_id')
        else:
            job_id = self.jobs_client.create_job(job_settings).get('job_id')
            return job_id

    def _update_job(self, job_settings, job_id):
        """
        Given job settings and an existing job_id of a job, update the job settings on databricks.

        :param job_settings: job settings to update the job with.
        :param job_id: physical job_id of job in databricks server.
        """

        self.jobs_client.reset_job({'job_id': job_id, 'new_settings': job_settings})

    def _deploy_workspace(self, resource_properties, physical_id, overwrite):
        """
        Deploy workspace asset.

        :param resource_properties: dict of properties for the workspace asset. Must contain the
        'source_path', 'path' and 'object_type' fields.
        :param physical_id: dict containing physical identifier of workspace asset on databricks.
        Should contain the field 'path'.
        :param overwrite: Whether or not to overwrite the contents of workspace notebooks.
        :return: (dict, dict) of (physical_id, deploy_output). physical_id is the physical ID for
        the stack status that contains the workspace path of the notebook or directory on datbricks.
        deploy_output is the initial information about the asset on databricks at deploy time
        returned by the REST API.
        """
        local_path = resource_properties.get(WORKSPACE_RESOURCE_SOURCE_PATH)
        workspace_path = resource_properties.get(WORKSPACE_RESOURCE_PATH)
        object_type = resource_properties.get(WORKSPACE_RESOURCE_OBJECT_TYPE)

        actual_object_type = DIRECTORY if os.path.isdir(local_path) else NOTEBOOK
        if object_type != actual_object_type:
            raise StackError("Field '{}' ({}) not consistent"
                             "with actual object type ({})".format(WORKSPACE_RESOURCE_OBJECT_TYPE,
                                                                   object_type,
                                                                   actual_object_type))

        click.echo('Uploading {} from {} to Databricks workspace at {}'.format(object_type,
                                                                               local_path,
                                                                               workspace_path))
        if object_type == NOTEBOOK:
            # Inference of notebook language and format
            language_fmt = WorkspaceLanguage.to_language_and_format(local_path)
            if language_fmt is None:
                raise StackError("Workspace notebook language and format cannot be inferred"
                                 "Please check file extension of notebook file.")
            language, fmt = language_fmt
            # Create needed directories in workspace.
            self.workspace_client.mkdirs(os.path.dirname(workspace_path))
            self.workspace_client.import_workspace(local_path, workspace_path, language, fmt,
                                                   overwrite)
        elif object_type == DIRECTORY:
            self.workspace_client.import_workspace_dir(local_path, workspace_path, overwrite,
                                                       exclude_hidden_files=True)
        else:
            # Shouldn't reach here because of verification of object_type above.
            assert False

        if physical_id and physical_id[WORKSPACE_RESOURCE_PATH] != workspace_path:
            # physical_id['path'] is the workspace path from the last deployment. Alert when changed
            click.echo("Workspace asset had path changed from {} to {}"
                       .format(physical_id[WORKSPACE_RESOURCE_PATH], workspace_path))
        new_physical_id = {WORKSPACE_RESOURCE_PATH: workspace_path}
        deploy_output = self.workspace_client.client.get_status(workspace_path)

        return new_physical_id, deploy_output

    def _download_workspace(self, resource_properties, overwrite):
        """
        Download workspace asset.

        :param resource_properties: dict of properties for the workspace asset. Must contain the
        'source_path', 'path' and 'object_type' fields.
        :param overwrite: Whether or not to overwrite the contents of workspace notebooks.
        """
        local_path = resource_properties.get(WORKSPACE_RESOURCE_SOURCE_PATH)
        workspace_path = resource_properties.get(WORKSPACE_RESOURCE_PATH)
        object_type = resource_properties.get(WORKSPACE_RESOURCE_OBJECT_TYPE)
        click.echo('Downloading {} from Databricks path {} to {}'.format(object_type,
                                                                         workspace_path,
                                                                         local_path))
        if object_type == NOTEBOOK:
            # Inference of notebook language and format. A tuple of (language, fmt) or Nonetype.
            language_fmt = WorkspaceLanguage.to_language_and_format(local_path)
            if language_fmt is None:
                raise StackError("Workspace Notebook language and format cannot be inferred."
                                 "Please check file extension of notebook 'source_path'.")
            (_, fmt) = language_fmt
            local_dir = os.path.dirname(os.path.abspath(local_path))
            if not os.path.exists(local_dir):
                os.makedirs(local_dir)
            self.workspace_client.export_workspace(workspace_path, local_path, fmt, overwrite)
        elif object_type == DIRECTORY:
            self.workspace_client.export_workspace_dir(workspace_path, local_path, overwrite)
        else:
            raise StackError("Invalid value for '{}' field: {}"
                             .format(WORKSPACE_RESOURCE_OBJECT_TYPE, object_type))

    def _deploy_dbfs(self, resource_properties, physical_id, overwrite):
        """
        Deploy dbfs asset.

        :param resource_properties: dict of properties for the dbfs asset. Must contain the
        'source_path', 'path' and 'is_dir' fields.
        :param physical_id: dict containing physical identifier of dbfs asset on Databricks.
        Should contain the field 'path'.
        :param overwrite: Whether or not to overwrite the contents of dbfs files.
        :return: (dict, dict) of (physical_id, deploy_output). physical_id is a dict that
        contains the dbfs path of the file on Databricks.
        ex.{"path":"dbfs:/path/in/dbfs"}
        deploy_output is the initial information about the dbfs asset at deploy time
        returned by the REST API.
        """

        local_path = resource_properties.get(DBFS_RESOURCE_SOURCE_PATH)
        dbfs_path = resource_properties.get(DBFS_RESOURCE_PATH)
        is_dir = resource_properties.get(DBFS_RESOURCE_IS_DIR)

        if is_dir != os.path.isdir(local_path):
            dir_or_file = 'directory' if os.path.isdir(local_path) else 'file'
            raise StackError("local source_path '{}' is found to be a {}, but is not specified"
                             " as one with is_dir: {}."
                             .format(local_path, dir_or_file, str(is_dir).lower()))
        if is_dir:
            click.echo('Uploading directory from {} to DBFS at {}'.format(local_path, dbfs_path))
            self.dbfs_client.cp(recursive=True, overwrite=overwrite, src=local_path, dst=dbfs_path)
        else:
            click.echo('Uploading file from {} to DBFS at {}'.format(local_path, dbfs_path))
            self.dbfs_client.cp(recursive=False, overwrite=overwrite, src=local_path, dst=dbfs_path)

        if physical_id and physical_id[DBFS_RESOURCE_PATH] != dbfs_path:
            # physical_id['path'] is the dbfs path from the last deployment. Alert when changed
            click.echo("Dbfs asset had path changed from {} to {}"
                       .format(physical_id[DBFS_RESOURCE_PATH], dbfs_path))
        new_physical_id = {DBFS_RESOURCE_PATH: dbfs_path}
        deploy_output = self.dbfs_client.client.get_status(dbfs_path)

        return new_physical_id, deploy_output

    def _validate_config(self, stack_config):
        """
        Validate fields within a stack configuration. This ensures that an inputted configuration
        has the necessary fields for stack deployment to function well.

        :param stack_config: dict- stack config that is inputted by the user.
        :return: None. Raises errors to stop deployment if there is a problem.
        """
        click.echo('Validating fields in stack configuration...')
        self._assert_fields_in_dict([STACK_NAME, STACK_RESOURCES], stack_config)

        seen_resource_ids = set()  # Store seen resources to restrict duplicates.
        for resource in stack_config.get(STACK_RESOURCES):
            # Get validate resource ID exists, then get it.
            self._assert_fields_in_dict([RESOURCE_ID], resource)
            resource_id = resource.get(RESOURCE_ID)

            click.echo('Validating fields in resource with ID "{}"'.format(resource_id))
            self._assert_fields_in_dict([RESOURCE_SERVICE, RESOURCE_PROPERTIES], resource)

            resource_service = resource.get(RESOURCE_SERVICE)
            resource_properties = resource.get(RESOURCE_PROPERTIES)

            # Error on duplicate resource ID's
            if resource_id in seen_resource_ids:
                raise StackError('Duplicate resource ID "{}" found, please resolve.'.format(
                    resource_id))
            seen_resource_ids.add(resource_id)

            # Resource service-specific validations
            click.echo('Validating fields in "{}" of {} resource.'
                       .format(RESOURCE_PROPERTIES, resource_service))
            if resource_service == JOBS_SERVICE:
                self._assert_fields_in_dict([JOBS_RESOURCE_NAME], resource_properties)
            elif resource_service == WORKSPACE_SERVICE:
                self._assert_fields_in_dict(
                    [WORKSPACE_RESOURCE_PATH, WORKSPACE_RESOURCE_SOURCE_PATH,
                     WORKSPACE_RESOURCE_OBJECT_TYPE], resource_properties)
            elif resource_service == DBFS_SERVICE:
                self._assert_fields_in_dict(
                    [DBFS_RESOURCE_PATH, DBFS_RESOURCE_SOURCE_PATH,
                     DBFS_RESOURCE_IS_DIR], resource_properties)
            else:
                raise StackError("Resource service '{}' not supported".format(resource_service))

    def _validate_status(self, stack_status):
        """
        Validate fields within a stack status. This ensures that a stack status has the
        necessary fields for stack deployment to function well.

        If there is an error here, then it is either an implementation error that must be fixed by
        a developer or the User edited the stack status file created by the program.

        :param stack_status: dict- stack status that is created by the program.
        :return: None. Raises errors to stop deployment if there is a problem.
        """
        click.echo('Validating fields in stack status...')
        self._assert_fields_in_dict([STACK_NAME, STACK_RESOURCES, STACK_DEPLOYED], stack_status)

        for resource_status in stack_status.get(STACK_DEPLOYED):
            self._assert_fields_in_dict([RESOURCE_ID], resource_status)
            resource_id = resource_status.get(RESOURCE_ID)
            click.echo('Validating fields in resource status of resource with ID "{}"'
                       .format(resource_id))
            self._assert_fields_in_dict([RESOURCE_SERVICE, RESOURCE_PHYSICAL_ID,
                                         RESOURCE_DEPLOY_OUTPUT], resource_status)

            resource_service = resource_status.get(RESOURCE_SERVICE)
            resource_physical_id = resource_status.get(RESOURCE_PHYSICAL_ID)

            click.echo('Validating fields in "{}" of {} resource status'
                       .format(RESOURCE_PHYSICAL_ID, resource_service))
            if resource_service == JOBS_SERVICE:
                self._assert_fields_in_dict([JOBS_RESOURCE_JOB_ID], resource_physical_id)
            elif resource_service == WORKSPACE_SERVICE:
                self._assert_fields_in_dict([WORKSPACE_RESOURCE_PATH], resource_physical_id)
            elif resource_service == DBFS_SERVICE:
                self._assert_fields_in_dict([DBFS_RESOURCE_PATH], resource_physical_id)
            else:
                raise StackError("{} not a valid resource status service".format(resource_service))

    def _assert_fields_in_dict(self, fields, dictionary):
        for field in fields:
            if field not in dictionary:
                raise StackError('Required field "{}" not found'.format(field))

    def _get_resource_to_status_map(self, stack_status):
        """
        Returns a dictionary that maps a resource's (id, service) to the resource's status
        from the last deployment

        The key for this dictionary is the resource's (id, service) so that we don't load
        persisted resources with the wrong resource service.
        """
        return {
            (resource_status.get(RESOURCE_ID), resource_status.get(RESOURCE_SERVICE)):
                resource_status
            for resource_status in stack_status.get(STACK_DEPLOYED)
        }

    def _generate_stack_status_path(self, stack_path):
        """
        Given a path to the stack configuration template JSON file, generates a path to where the
        deployment status JSON will be stored after successful deployment of the stack.

        :param stack_path: Path to the stack config template JSON file
        :return: The path to the stack status file.

        >>> self._generate_stack_status_path('./stack.json')
        './stack.deployed.json'
        """
        stack_status_insert = 'deployed'
        stack_path_split = stack_path.split('.')
        stack_path_split.insert(-1, stack_status_insert)
        return '.'.join(stack_path_split)

    def _load_json(self, path):
        """
        Parse a json file to a readable dict format.
        Returns an empty dictionary if the path doesn't exist.

        :param path: File path of the JSON stack configuration template.
        :return: dict of parsed JSON stack config template.
        """
        stack_conf = {}
        if os.path.exists(path):
            with open(path, 'r') as f:
                stack_conf = json.load(f)
        return stack_conf

    def _save_json(self, path, data):
        """
        Writes data to a JSON file.

        :param path: Path of JSON file.
        :param data: dict- data that wants to by written to JSON file
        :return: None
        """
        with open(path, 'w') as f:
            json.dump(data, f, indent=2, sort_keys=True)

Ejemplo n.º 26

0

Mostrar archivo

 def __init__(self, api_client):
     self.jobs_client = JobsApi(api_client)