def handle_libraries(resource_data, object, map): map_resource_data = {} log.debug(object[map]) for key in object[map][0]: map_resource_data['library_' + key] = {'path': object[map][0][key], } resource_data[f"{map}"] = map_resource_data
def export_cli(tag, dry_run, notebook_path, delete, git_ssh_url, api_client: ApiClient, hcl, pattern_matches): if hcl: log.debug("this if debug") service = WorkspaceService(api_client) files = get_workspace_notebooks_recursive(service, notebook_path) with GitExportHandler(git_ssh_url, "notebooks", delete_not_found=delete, dry_run=dry_run, tag=tag) as gh: for file in files: identifier = normalize_identifier(f"databricks_notebook-{file.path}") content = get_content(service, file.path) if content is None: continue notebook_resource_data = { "@expr:content": f'filebase64("{identifier}")', "path": file.path, "overwrite": True, "mkdirs": True, "language": file.language, "format": "SOURCE", } name = "databricks_notebook" notebook_file_hcl = create_resource_from_dict(name, identifier, notebook_resource_data, False) processed_hcl_file = create_hcl_file(file.path, api_client.url, notebook_resource_data, notebook_file_hcl) gh.add_file(f"{identifier}.tf", processed_hcl_file) gh.add_file(f"files/{identifier}", content) hcl_errors = validate_hcl(notebook_file_hcl) if len(hcl_errors) > 0: log.error(f"Identified error in the following HCL Config: {notebook_file_hcl}") log.error(hcl_errors)
def _create_or_update_change_log(self): previous_changes = get_previous_changes(self.resource_path) ch_log = create_change_log(self.directory, self._tag_value, self._tag_now, added_files=self._git_added, modified_files=self._git_modified, base_path=self.resource_path, removed_files=self._git_removed, previous=previous_changes) log.debug(f"Generated changelog: \n{ch_log}")
def export_cli(tag, dry_run, dbfs_path, delete, git_ssh_url, api_client: ApiClient, hcl, pattern_matches): if hcl: log.debug("this if debug") service = DbfsService(api_client) files = get_dbfs_files_recursive(service, dbfs_path) log.info(files) with GitExportHandler(git_ssh_url, "dbfs", delete_not_found=delete, dry_run=dry_run, tag=tag) as gh: for file in files: assert "path" in file assert "is_dir" in file assert "file_size" in file if file["is_dir"]: continue base_name = file["path"] identifier = normalize_identifier( f"databricks_dbfs_file-{base_name}") dbfs_resource_data = { "@expr:source": f'pathexpand("{identifier}")', "@expr:content_b64_md5": f'md5(filebase64(pathexpand("{identifier}")))', "path": file["path"], "overwrite": True, "mkdirs": True, "validate_remote_file": True, } name = "databricks_dbfs_file" dbfs_file_hcl = create_resource_from_dict( name, identifier, dbfs_resource_data, False) processed_hcl_file = create_hcl_file(file['path'], api_client.url, dbfs_resource_data, dbfs_file_hcl) gh.add_file(f"{identifier}.tf", processed_hcl_file) gh.add_file(f"files/{identifier}", get_file_contents(service, file["path"])) hcl_errors = validate_hcl(dbfs_file_hcl) if len(hcl_errors) > 0: log.error( f"Identified error in the following HCL Config: {dbfs_file_hcl}" ) log.error(hcl_errors)
def export_cli(tag, dry_run, delete, git_ssh_url, api_client: ApiClient, hcl, pattern_matches): if hcl is True: service = PolicyService(api_client) created_policy_list = [] with GitExportHandler(git_ssh_url, "cluster_policies", delete_not_found=delete, dry_run=dry_run, tag=tag) as gh: for policy in service.list_policies()["policies"]: assert "definition" in policy assert "name" in policy assert "policy_id" in policy if not pattern_matches(policy["name"]): log.debug( f"{policy['name']} did not match pattern function {pattern_matches}" ) continue log.debug( f"{policy['name']} matched the pattern function {pattern_matches}" ) cluster_policy_tf_dict = { "@raw:definition": policy["definition"], "name": policy["name"] } name = "databricks_cluster_policy" identifier = normalize_identifier( f"databricks_cluster_policy-{policy['name']}-{policy['policy_id']}" ) created_policy_list.append(identifier) policy_hcl = create_resource_from_dict(name, identifier, cluster_policy_tf_dict, False) file_name_identifier = f"{identifier}.tf" processed_hcl_file = create_hcl_file(policy['policy_id'], api_client.url, cluster_policy_tf_dict, policy_hcl) gh.add_file(file_name_identifier, processed_hcl_file) hcl_errors = validate_hcl(policy_hcl) if len(hcl_errors) > 0: log.error( f"Identified error in the following HCL Config: {policy_hcl}" ) log.error(hcl_errors)
def prep_json(block_key_map, ignore_attribute_key, resource, required_attributes_key): for req_key in required_attributes_key: assert req_key in resource pool_resource_data = {} for att in resource: if att in ignore_attribute_key: log.debug(f"{att} is in ignore list") continue log.debug(att) if att in block_key_map: block_key_map[att](pool_resource_data, resource, att) else: assert type(att) is not dict, f"map/block {att} is not defined" pool_resource_data[att] = resource[att] return pool_resource_data
def export_cli(dry_run, tag, delete, git_ssh_url, api_client: ApiClient, hcl, pattern_matches): block_key_map = {} ignore_attribute_key = {} required_attributes_key = {"principal", "permission"} if hcl: secret_api = SecretApi(api_client) scopes = secret_api.list_scopes()["scopes"] log.info(scopes) with GitExportHandler(git_ssh_url, "secret_acls", delete_not_found=delete, dry_run=dry_run, tag=tag) as gh: for scope in scopes: acls = secret_api.list_acls(scope["name"])["items"] log.info(acls) for acl in acls: acl_resource_data = prep_json(block_key_map, ignore_attribute_key, acl, required_attributes_key) base_name = normalize_identifier(acl["principal"]) name = "databricks_secret_acl" identifier = f"databricks_secret_acl-{base_name}" acl_resource_data["scope"] = scope["name"] acl_hcl = create_resource_from_dict( name, identifier, acl_resource_data, False) file_name_identifier = f"{identifier}.tf" gh.add_file(file_name_identifier, acl_hcl) log.debug(acl_hcl)
def export_cli(dry_run, tag, delete, git_ssh_url, api_client: ApiClient, hcl, pattern_matches): block_key_map = {} ignore_attribute_key = {"last_updated_timestamp"} required_attributes_key = {"key"} if hcl: secret_api = SecretApi(api_client) scopes = secret_api.list_scopes()["scopes"] log.info(scopes) with GitExportHandler(git_ssh_url, "secrets", delete_not_found=delete, dry_run=dry_run, tag=tag) as gh: for scope in scopes: secrets = secret_api.list_secrets(scope["name"])["secrets"] log.info(secrets) for secret in secrets: if not pattern_matches(secret["key"]): log.debug( f"{secret['key']} did not match pattern function {pattern_matches}" ) continue log.debug( f"{secret['key']} matched the pattern function {pattern_matches}" ) secret_resource_data = prep_json(block_key_map, ignore_attribute_key, secret, required_attributes_key) base_name = normalize_identifier(secret["key"]) name = "databricks_secret" identifier = f"databricks_secret-{base_name}" secret_resource_data["scope"] = scope["name"] secret_hcl = create_resource_from_dict( name, identifier, secret_resource_data, False) file_name_identifier = f"{identifier}.tf" gh.add_file(file_name_identifier, secret_hcl) log.debug(secret_hcl)
def export_cli(dry_run, tag, delete, git_ssh_url, api_client: ApiClient, hcl, pattern_matches): block_key_map = {} ignore_attribute_key = {} required_attributes_key = {"instance_profile_arn"} if hcl: _data = {} headers = None profiles = api_client.perform_query( 'GET', '/instance-profiles/list', data=_data, headers=headers)["instance_profiles"] log.info(profiles) with GitExportHandler(git_ssh_url, "instance_profiles", delete_not_found=delete, dry_run=dry_run, tag=tag) as gh: for profile in profiles: if not pattern_matches(profile["instance_profile_arn"]): log.debug( f"{profile['instance_profile_arn']} did not match pattern function {pattern_matches}" ) continue log.debug( f"{profile['instance_profile_arn']} matched the pattern function {pattern_matches}" ) profile_resource_data = prep_json(block_key_map, ignore_attribute_key, profile, required_attributes_key) base_name = normalize_identifier( profile["instance_profile_arn"]) name = "databricks_instance_profile" identifier = f"databricks_instance_profile-{base_name}" #Force validation. If we import it, we might as well be able to use it profile_resource_data["skip_validation"] = False instance_profile_hcl = create_resource_from_dict( name, identifier, profile_resource_data, False) file_name_identifier = f"{identifier}.tf" gh.add_file(file_name_identifier, instance_profile_hcl) log.debug(instance_profile_hcl)
def export_cli(dry_run, tag, delete, git_ssh_url, api_client: ApiClient, hcl, pattern_matches): block_key_map = { "new_cluster": handle_block, "notebook_task": handle_block, "aws_attributes": handle_block, "spark_env_vars": handle_block, "autoscale": handle_block, "spark_submit_task": handle_block, "libraries": handle_libraries, "email_notifications": handle_map, "custom_tags": handle_map } ignore_attribute_key = { "created_time", "creator_user_name", "job_id" } required_attributes_key = { "max_concurrent_runs", "name" } if hcl: job_api = JobsApi(api_client) jobs = job_api.list_jobs()["jobs"] log.info(jobs) with GitExportHandler(git_ssh_url, "jobs", delete_not_found=delete, dry_run=dry_run, tag=tag) as gh: for job in jobs: if not pattern_matches(job["settings"]["name"]): log.debug(f"{job['settings']['name']} did not match pattern function {pattern_matches}") continue log.debug(f"{job['settings']['name']} matched the pattern function {pattern_matches}") job_resource_data = prep_json(block_key_map, ignore_attribute_key, job['settings'], required_attributes_key) base_name = normalize_identifier(job['settings']['name']) name = "databricks_job" identifier = f"databricks_job-{base_name}" #need to escape quotes in the name. job_resource_data['name'] = job_resource_data['name'].replace('"','\\"') instance_job_hcl = create_resource_from_dict(name, identifier, job_resource_data, False) file_name_identifier = f"{identifier}.tf" gh.add_file(file_name_identifier, instance_job_hcl) log.debug(instance_job_hcl)
def export_cli(dry_run, tag, delete, git_ssh_url, api_client: ApiClient, hcl, pattern_matches): block_key_map = { "aws_attributes": handle_block, "disk_spec": handle_block, "custom_tags": handle_map } ignore_attribute_key = { "stats", "state", "status", "default_tags", "instance_pool_id" } required_attributes_key = { "instance_pool_name", "min_idle_instances", "idle_instance_autotermination_minutes", "node_type_id" } if hcl: pool_api = InstancePoolsApi(api_client) pools = pool_api.list_instance_pools()["instance_pools"] log.info(pools) with GitExportHandler(git_ssh_url, "instance_pools", delete_not_found=delete, dry_run=dry_run, tag=tag) as gh: for pool in pools: if not pattern_matches(pool["instance_pool_name"]): log.debug(f"{pool['instance_pool_name']} did not match pattern function {pattern_matches}") continue log.debug(f"{pool['instance_pool_name']} matched the pattern function {pattern_matches}") pool_resource_data = prep_json(block_key_map, ignore_attribute_key, pool, required_attributes_key) base_name = normalize_identifier(pool["instance_pool_name"]) name = "databricks_instance_pool" identifier = f"databricks_instance_pool-{base_name}" instance_pool_hcl = create_resource_from_dict(name, identifier, pool_resource_data, False) file_name_identifier = f"{identifier}.tf" gh.add_file(file_name_identifier, instance_pool_hcl) log.debug(instance_pool_hcl)
def cmd(self, cmds, *args, **kwargs): """ run a terraform command, if success, will try to read state file :param cmd: command and sub-command of terraform, seperated with space refer to https://www.terraform.io/docs/commands/index.html :param args: arguments of a command :param kwargs: any option flag with key value without prefixed dash character if there's a dash in the option name, use under line instead of dash, ex. -no-color --> no_color if it's a simple flag with no value, value should be IsFlagged ex. cmd('taint', allow_missing=IsFlagged) if it's a boolean value flag, assign True or false if it's a flag could be used multiple times, assign list to it's value if it's a "var" variable flag, assign dictionary to it if a value is None, will skip this option if the option 'capture_output' is passed (with any value other than True), terraform output will be printed to stdout/stderr and "None" will be returned as out and err. if the option 'raise_on_error' is passed (with any value that evaluates to True), and the terraform command returns a nonzerop return code, then a TerraformCommandError exception will be raised. The exception object will have the following properties: returncode: The command's return code out: The captured stdout, or None if not captured err: The captured stderr, or None if not captured :return: ret_code, out, err """ capture_output = kwargs.pop('capture_output', True) raise_on_error = kwargs.pop('raise_on_error', False) if capture_output is True: stderr = subprocess.PIPE stdout = subprocess.PIPE else: stderr = sys.stderr stdout = sys.stdout # cmds = self.generate_cmd_string(cmd, *args, **kwargs) log.info('command: {c}'.format(c=' '.join(cmds))) working_folder = self.working_dir if self.working_dir else None environ_vars = {} if self.is_env_vars_included: environ_vars = os.environ.copy() p = subprocess.Popen(cmds, stdout=stdout, stderr=stderr, cwd=working_folder, env=environ_vars) synchronous = kwargs.pop('synchronous', True) if not synchronous: return p, None, None out, err = p.communicate() ret_code = p.returncode log.debug('output: {o}'.format(o=out)) if capture_output is True: out = out.decode('utf-8') err = err.decode('utf-8') else: out = None err = None if ret_code != 0 and raise_on_error: raise TerraformCommandError( ret_code, ' '.join(cmds), out=out, err=err) return ret_code, out, err