コード例 #1
0
ファイル: cli.py プロジェクト: amineds/databricks-terraformer
def handle_libraries(resource_data, object, map):
    map_resource_data = {}
    log.debug(object[map])
    for key in object[map][0]:
        map_resource_data['library_' + key] = {'path': object[map][0][key], }

    resource_data[f"{map}"] = map_resource_data
コード例 #2
0
ファイル: cli.py プロジェクト: amineds/databricks-terraformer
def export_cli(tag, dry_run, notebook_path, delete, git_ssh_url, api_client: ApiClient, hcl, pattern_matches):
    if hcl:
        log.debug("this if debug")
        service = WorkspaceService(api_client)
        files = get_workspace_notebooks_recursive(service, notebook_path)
        with GitExportHandler(git_ssh_url, "notebooks", delete_not_found=delete, dry_run=dry_run, tag=tag) as gh:
            for file in files:
                identifier = normalize_identifier(f"databricks_notebook-{file.path}")
                content = get_content(service, file.path)
                if content is None:
                    continue
                notebook_resource_data = {
                    "@expr:content": f'filebase64("{identifier}")',
                    "path": file.path,
                    "overwrite": True,
                    "mkdirs": True,
                    "language": file.language,
                    "format": "SOURCE",
                }
                name = "databricks_notebook"
                notebook_file_hcl = create_resource_from_dict(name, identifier, notebook_resource_data, False)
                processed_hcl_file = create_hcl_file(file.path, api_client.url, notebook_resource_data,
                                                     notebook_file_hcl)
                gh.add_file(f"{identifier}.tf", processed_hcl_file)
                gh.add_file(f"files/{identifier}", content)
                hcl_errors = validate_hcl(notebook_file_hcl)
                if len(hcl_errors) > 0:
                    log.error(f"Identified error in the following HCL Config: {notebook_file_hcl}")
                    log.error(hcl_errors)
コード例 #3
0
 def _create_or_update_change_log(self):
     previous_changes = get_previous_changes(self.resource_path)
     ch_log = create_change_log(self.directory,
                                self._tag_value,
                                self._tag_now,
                                added_files=self._git_added,
                                modified_files=self._git_modified,
                                base_path=self.resource_path,
                                removed_files=self._git_removed,
                                previous=previous_changes)
     log.debug(f"Generated changelog: \n{ch_log}")
コード例 #4
0
def export_cli(tag, dry_run, dbfs_path, delete, git_ssh_url,
               api_client: ApiClient, hcl, pattern_matches):
    if hcl:
        log.debug("this if debug")
        service = DbfsService(api_client)

        files = get_dbfs_files_recursive(service, dbfs_path)
        log.info(files)

        with GitExportHandler(git_ssh_url,
                              "dbfs",
                              delete_not_found=delete,
                              dry_run=dry_run,
                              tag=tag) as gh:
            for file in files:
                assert "path" in file
                assert "is_dir" in file
                assert "file_size" in file
                if file["is_dir"]:
                    continue
                base_name = file["path"]

                identifier = normalize_identifier(
                    f"databricks_dbfs_file-{base_name}")
                dbfs_resource_data = {
                    "@expr:source": f'pathexpand("{identifier}")',
                    "@expr:content_b64_md5":
                    f'md5(filebase64(pathexpand("{identifier}")))',
                    "path": file["path"],
                    "overwrite": True,
                    "mkdirs": True,
                    "validate_remote_file": True,
                }

                name = "databricks_dbfs_file"

                dbfs_file_hcl = create_resource_from_dict(
                    name, identifier, dbfs_resource_data, False)

                processed_hcl_file = create_hcl_file(file['path'],
                                                     api_client.url,
                                                     dbfs_resource_data,
                                                     dbfs_file_hcl)

                gh.add_file(f"{identifier}.tf", processed_hcl_file)
                gh.add_file(f"files/{identifier}",
                            get_file_contents(service, file["path"]))
                hcl_errors = validate_hcl(dbfs_file_hcl)
                if len(hcl_errors) > 0:
                    log.error(
                        f"Identified error in the following HCL Config: {dbfs_file_hcl}"
                    )
                    log.error(hcl_errors)
コード例 #5
0
ファイル: cli.py プロジェクト: amineds/databricks-terraformer
def export_cli(tag, dry_run, delete, git_ssh_url, api_client: ApiClient, hcl,
               pattern_matches):
    if hcl is True:
        service = PolicyService(api_client)
        created_policy_list = []
        with GitExportHandler(git_ssh_url,
                              "cluster_policies",
                              delete_not_found=delete,
                              dry_run=dry_run,
                              tag=tag) as gh:
            for policy in service.list_policies()["policies"]:
                assert "definition" in policy
                assert "name" in policy
                assert "policy_id" in policy
                if not pattern_matches(policy["name"]):
                    log.debug(
                        f"{policy['name']} did not match pattern function {pattern_matches}"
                    )
                    continue
                log.debug(
                    f"{policy['name']} matched the pattern function {pattern_matches}"
                )
                cluster_policy_tf_dict = {
                    "@raw:definition": policy["definition"],
                    "name": policy["name"]
                }
                name = "databricks_cluster_policy"
                identifier = normalize_identifier(
                    f"databricks_cluster_policy-{policy['name']}-{policy['policy_id']}"
                )
                created_policy_list.append(identifier)
                policy_hcl = create_resource_from_dict(name, identifier,
                                                       cluster_policy_tf_dict,
                                                       False)
                file_name_identifier = f"{identifier}.tf"

                processed_hcl_file = create_hcl_file(policy['policy_id'],
                                                     api_client.url,
                                                     cluster_policy_tf_dict,
                                                     policy_hcl)

                gh.add_file(file_name_identifier, processed_hcl_file)
                hcl_errors = validate_hcl(policy_hcl)
                if len(hcl_errors) > 0:
                    log.error(
                        f"Identified error in the following HCL Config: {policy_hcl}"
                    )
                    log.error(hcl_errors)
コード例 #6
0
def prep_json(block_key_map, ignore_attribute_key, resource,
              required_attributes_key):
    for req_key in required_attributes_key:
        assert req_key in resource
    pool_resource_data = {}
    for att in resource:
        if att in ignore_attribute_key:
            log.debug(f"{att} is in ignore list")
            continue

        log.debug(att)
        if att in block_key_map:
            block_key_map[att](pool_resource_data, resource, att)
        else:
            assert type(att) is not dict, f"map/block {att} is not defined"
            pool_resource_data[att] = resource[att]
    return pool_resource_data
コード例 #7
0
ファイル: cli.py プロジェクト: amineds/databricks-terraformer
def export_cli(dry_run, tag, delete, git_ssh_url, api_client: ApiClient, hcl,
               pattern_matches):
    block_key_map = {}
    ignore_attribute_key = {}
    required_attributes_key = {"principal", "permission"}

    if hcl:
        secret_api = SecretApi(api_client)

        scopes = secret_api.list_scopes()["scopes"]
        log.info(scopes)

        with GitExportHandler(git_ssh_url,
                              "secret_acls",
                              delete_not_found=delete,
                              dry_run=dry_run,
                              tag=tag) as gh:
            for scope in scopes:
                acls = secret_api.list_acls(scope["name"])["items"]
                log.info(acls)

                for acl in acls:
                    acl_resource_data = prep_json(block_key_map,
                                                  ignore_attribute_key, acl,
                                                  required_attributes_key)

                    base_name = normalize_identifier(acl["principal"])
                    name = "databricks_secret_acl"
                    identifier = f"databricks_secret_acl-{base_name}"

                    acl_resource_data["scope"] = scope["name"]

                    acl_hcl = create_resource_from_dict(
                        name, identifier, acl_resource_data, False)

                    file_name_identifier = f"{identifier}.tf"
                    gh.add_file(file_name_identifier, acl_hcl)
                    log.debug(acl_hcl)
コード例 #8
0
def export_cli(dry_run, tag, delete, git_ssh_url, api_client: ApiClient, hcl,
               pattern_matches):
    block_key_map = {}
    ignore_attribute_key = {"last_updated_timestamp"}
    required_attributes_key = {"key"}

    if hcl:
        secret_api = SecretApi(api_client)

        scopes = secret_api.list_scopes()["scopes"]
        log.info(scopes)

        with GitExportHandler(git_ssh_url,
                              "secrets",
                              delete_not_found=delete,
                              dry_run=dry_run,
                              tag=tag) as gh:
            for scope in scopes:
                secrets = secret_api.list_secrets(scope["name"])["secrets"]
                log.info(secrets)

                for secret in secrets:
                    if not pattern_matches(secret["key"]):
                        log.debug(
                            f"{secret['key']} did not match pattern function {pattern_matches}"
                        )
                        continue
                    log.debug(
                        f"{secret['key']} matched the pattern function {pattern_matches}"
                    )
                    secret_resource_data = prep_json(block_key_map,
                                                     ignore_attribute_key,
                                                     secret,
                                                     required_attributes_key)

                    base_name = normalize_identifier(secret["key"])
                    name = "databricks_secret"
                    identifier = f"databricks_secret-{base_name}"

                    secret_resource_data["scope"] = scope["name"]

                    secret_hcl = create_resource_from_dict(
                        name, identifier, secret_resource_data, False)

                    file_name_identifier = f"{identifier}.tf"
                    gh.add_file(file_name_identifier, secret_hcl)
                    log.debug(secret_hcl)
コード例 #9
0
ファイル: cli.py プロジェクト: amineds/databricks-terraformer
def export_cli(dry_run, tag, delete, git_ssh_url, api_client: ApiClient, hcl,
               pattern_matches):
    block_key_map = {}
    ignore_attribute_key = {}
    required_attributes_key = {"instance_profile_arn"}

    if hcl:
        _data = {}
        headers = None
        profiles = api_client.perform_query(
            'GET', '/instance-profiles/list', data=_data,
            headers=headers)["instance_profiles"]
        log.info(profiles)

        with GitExportHandler(git_ssh_url,
                              "instance_profiles",
                              delete_not_found=delete,
                              dry_run=dry_run,
                              tag=tag) as gh:
            for profile in profiles:
                if not pattern_matches(profile["instance_profile_arn"]):
                    log.debug(
                        f"{profile['instance_profile_arn']} did not match pattern function {pattern_matches}"
                    )
                    continue
                log.debug(
                    f"{profile['instance_profile_arn']} matched the pattern function {pattern_matches}"
                )
                profile_resource_data = prep_json(block_key_map,
                                                  ignore_attribute_key,
                                                  profile,
                                                  required_attributes_key)

                base_name = normalize_identifier(
                    profile["instance_profile_arn"])
                name = "databricks_instance_profile"
                identifier = f"databricks_instance_profile-{base_name}"

                #Force validation. If we import it, we might as well be able to use it
                profile_resource_data["skip_validation"] = False
                instance_profile_hcl = create_resource_from_dict(
                    name, identifier, profile_resource_data, False)

                file_name_identifier = f"{identifier}.tf"
                gh.add_file(file_name_identifier, instance_profile_hcl)
                log.debug(instance_profile_hcl)
コード例 #10
0
ファイル: cli.py プロジェクト: amineds/databricks-terraformer
def export_cli(dry_run, tag, delete, git_ssh_url, api_client: ApiClient, hcl, pattern_matches):
    block_key_map = {
        "new_cluster": handle_block,
        "notebook_task": handle_block,
        "aws_attributes": handle_block,
        "spark_env_vars": handle_block,
        "autoscale": handle_block,
        "spark_submit_task": handle_block,
        "libraries": handle_libraries,
        "email_notifications": handle_map,
        "custom_tags": handle_map
    }
    ignore_attribute_key = {
        "created_time", "creator_user_name", "job_id"
    }
    required_attributes_key = {
        "max_concurrent_runs", "name"
    }

    if hcl:
        job_api = JobsApi(api_client)

        jobs = job_api.list_jobs()["jobs"]
        log.info(jobs)

        with GitExportHandler(git_ssh_url, "jobs", delete_not_found=delete, dry_run=dry_run, tag=tag) as gh:
            for job in jobs:
                if not pattern_matches(job["settings"]["name"]):
                    log.debug(f"{job['settings']['name']} did not match pattern function {pattern_matches}")
                    continue
                log.debug(f"{job['settings']['name']} matched the pattern function {pattern_matches}")
                job_resource_data = prep_json(block_key_map, ignore_attribute_key, job['settings'], required_attributes_key)

                base_name = normalize_identifier(job['settings']['name'])
                name = "databricks_job"
                identifier = f"databricks_job-{base_name}"

                #need to escape quotes in the name.
                job_resource_data['name'] = job_resource_data['name'].replace('"','\\"')

                instance_job_hcl = create_resource_from_dict(name, identifier, job_resource_data, False)
                file_name_identifier = f"{identifier}.tf"
                gh.add_file(file_name_identifier, instance_job_hcl)
                log.debug(instance_job_hcl)
コード例 #11
0
def export_cli(dry_run, tag, delete, git_ssh_url, api_client: ApiClient, hcl, pattern_matches):
    block_key_map = {
        "aws_attributes": handle_block,
        "disk_spec": handle_block,
        "custom_tags": handle_map
    }
    ignore_attribute_key = {
        "stats", "state", "status", "default_tags", "instance_pool_id"
    }
    required_attributes_key = {
        "instance_pool_name", "min_idle_instances", "idle_instance_autotermination_minutes", "node_type_id"
    }

    if hcl:
        pool_api = InstancePoolsApi(api_client)

        pools = pool_api.list_instance_pools()["instance_pools"]
        log.info(pools)

        with GitExportHandler(git_ssh_url, "instance_pools", delete_not_found=delete, dry_run=dry_run, tag=tag) as gh:
            for pool in pools:
                if not pattern_matches(pool["instance_pool_name"]):
                    log.debug(f"{pool['instance_pool_name']} did not match pattern function {pattern_matches}")
                    continue
                log.debug(f"{pool['instance_pool_name']} matched the pattern function {pattern_matches}")
                pool_resource_data = prep_json(block_key_map, ignore_attribute_key, pool, required_attributes_key)

                base_name = normalize_identifier(pool["instance_pool_name"])
                name = "databricks_instance_pool"
                identifier = f"databricks_instance_pool-{base_name}"

                instance_pool_hcl = create_resource_from_dict(name, identifier, pool_resource_data, False)

                file_name_identifier = f"{identifier}.tf"
                gh.add_file(file_name_identifier, instance_pool_hcl)
                log.debug(instance_pool_hcl)
コード例 #12
0
    def cmd(self, cmds, *args, **kwargs):
        """
        run a terraform command, if success, will try to read state file
        :param cmd: command and sub-command of terraform, seperated with space
                    refer to https://www.terraform.io/docs/commands/index.html
        :param args: arguments of a command
        :param kwargs:  any option flag with key value without prefixed dash character
                if there's a dash in the option name, use under line instead of dash,
                    ex. -no-color --> no_color
                if it's a simple flag with no value, value should be IsFlagged
                    ex. cmd('taint', allow_missing=IsFlagged)
                if it's a boolean value flag, assign True or false
                if it's a flag could be used multiple times, assign list to it's value
                if it's a "var" variable flag, assign dictionary to it
                if a value is None, will skip this option
                if the option 'capture_output' is passed (with any value other than
                    True), terraform output will be printed to stdout/stderr and
                    "None" will be returned as out and err.
                if the option 'raise_on_error' is passed (with any value that evaluates to True),
                    and the terraform command returns a nonzerop return code, then
                    a TerraformCommandError exception will be raised. The exception object will
                    have the following properties:
                      returncode: The command's return code
                      out: The captured stdout, or None if not captured
                      err: The captured stderr, or None if not captured
        :return: ret_code, out, err
        """
        capture_output = kwargs.pop('capture_output', True)
        raise_on_error = kwargs.pop('raise_on_error', False)
        if capture_output is True:
            stderr = subprocess.PIPE
            stdout = subprocess.PIPE
        else:
            stderr = sys.stderr
            stdout = sys.stdout

        # cmds = self.generate_cmd_string(cmd, *args, **kwargs)
        log.info('command: {c}'.format(c=' '.join(cmds)))

        working_folder = self.working_dir if self.working_dir else None

        environ_vars = {}
        if self.is_env_vars_included:
            environ_vars = os.environ.copy()

        p = subprocess.Popen(cmds, stdout=stdout, stderr=stderr,
                             cwd=working_folder, env=environ_vars)

        synchronous = kwargs.pop('synchronous', True)
        if not synchronous:
            return p, None, None

        out, err = p.communicate()
        ret_code = p.returncode
        log.debug('output: {o}'.format(o=out))

        if capture_output is True:
            out = out.decode('utf-8')
            err = err.decode('utf-8')
        else:
            out = None
            err = None

        if ret_code != 0 and raise_on_error:
            raise TerraformCommandError(
                ret_code, ' '.join(cmds), out=out, err=err)

        return ret_code, out, err