def import_current_workspace_items(self, artifact_dir='artifacts/'): src_dir = self.get_export_dir() + artifact_dir error_logger = logging_utils.get_error_logger( wmconstants.WM_IMPORT, wmconstants.WORKSPACE_NOTEBOOK_OBJECT, self.get_export_dir()) for root, subdirs, files in self.walk(src_dir): # replace the local directory with empty string to get the notebook workspace directory nb_dir = '/' + root.replace(src_dir, '') upload_dir = nb_dir if not nb_dir == '/': upload_dir = nb_dir + '/' if not self.does_path_exist(upload_dir): resp_mkdirs = self.post(WS_MKDIRS, {'path': upload_dir}) if 'error_code' in resp_mkdirs: logging_utils.log_reponse_error(error_logger, resp_mkdirs) for f in files: logging.info("Uploading: {0}".format(f)) # create the local file path to load the DBC file local_file_path = os.path.join(root, f) # create the ws full file path including filename ws_file_path = upload_dir + f # generate json args with binary data for notebook to upload to the workspace path nb_input_args = self.get_user_import_args( local_file_path, ws_file_path) # call import to the workspace if self.is_verbose(): logging.info("Path: {0}".format(nb_input_args['path'])) resp_upload = self.post(WS_IMPORT, nb_input_args) if 'error_code' in resp_upload: resp_upload['path'] = nb_input_args['path'] logging_utils.log_reponse_error(error_logger, resp_upload)
def import_instance_profiles(self, log_file='instance_profiles.log'): # currently an AWS only operation error_logger = logging_utils.get_error_logger( wmconstants.WM_IMPORT, wmconstants.INSTANCE_PROFILE_OBJECT, self.get_export_dir()) ip_log = self.get_export_dir() + log_file if not os.path.exists(ip_log): logging.info("No instance profiles to import.") return # check current profiles and skip if the profile already exists ip_list = self.get('/instance-profiles/list').get( 'instance_profiles', None) if ip_list: list_of_profiles = [x['instance_profile_arn'] for x in ip_list] else: list_of_profiles = [] import_profiles_count = 0 with open(ip_log, "r") as fp: for line in fp: ip_arn = json.loads(line).get('instance_profile_arn', None) if ip_arn not in list_of_profiles: print("Importing arn: {0}".format(ip_arn)) resp = self.post('/instance-profiles/add', {'instance_profile_arn': ip_arn}) if not logging_utils.log_reponse_error(error_logger, resp): import_profiles_count += 1 else: logging.info( "Skipping since profile already exists: {0}".format( ip_arn)) return import_profiles_count
def log_all_secrets(self, cluster_name=None, log_dir='secret_scopes/'): scopes_dir = self.get_export_dir() + log_dir scopes_list = self.get_secret_scopes_list() error_logger = logging_utils.get_error_logger( wmconstants.WM_EXPORT, wmconstants.SECRET_OBJECT, self.get_export_dir()) os.makedirs(scopes_dir, exist_ok=True) start = timer() cid = self.start_cluster_by_name( cluster_name) if cluster_name else self.launch_cluster() time.sleep(5) ec_id = self.get_execution_context(cid) for scope_json in scopes_list: scope_name = scope_json.get('name') secrets_list = self.get_secrets(scope_name) if logging_utils.log_reponse_error(error_logger, secrets_list): continue scopes_logfile = scopes_dir + scope_name try: with open(scopes_logfile, 'w') as fp: for secret_json in secrets_list: secret_name = secret_json.get('key') b64_value = self.get_secret_value( scope_name, secret_name, cid, ec_id, error_logger) s_json = {'name': secret_name, 'value': b64_value} fp.write(json.dumps(s_json) + '\n') except ValueError as error: if "embedded null byte" in str(error): error_msg = f"{scopes_logfile} has bad name and hence cannot open: {str(error)} Skipping.." logging.error(error_msg) error_logger.error(error_msg) else: raise error
def import_mlflow_experiments_acls( self, acl_log='mlflow_experiments_acls.log', experiment_id_map_log='mlflow_experiments_id_map.log', num_parallel=4): """ Import all experiments' permissions which are already exported in acl_log file. Finds out the new_experiment_id by looking up experiment_id_map_log file. While the permissions are persisted, the original creator (tagged as Created By label) is not persisted. The creator will always be set as the caller of this script. """ experiment_id_map = self._load_experiment_id_map(self.export_dir + experiment_id_map_log) acl_log_file = self.get_export_dir() + acl_log error_logger = logging_utils.get_error_logger( wmconstants.WM_IMPORT, wmconstants.MLFLOW_EXPERIMENT_PERMISSION_OBJECT, self.get_export_dir()) checkpoint_key_set = self._checkpoint_service.get_checkpoint_key_set( wmconstants.WM_IMPORT, wmconstants.MLFLOW_EXPERIMENT_PERMISSION_OBJECT) start = timer() with open(acl_log_file, 'r') as fp: with ThreadPoolExecutor(max_workers=num_parallel) as executor: futures = [ executor.submit(self._put_mlflow_experiment_acl, acl_str, experiment_id_map, checkpoint_key_set, error_logger) for acl_str in fp ] concurrent.futures.wait(futures, return_when="FIRST_EXCEPTION") propagate_exceptions(futures) end = timer() logging.info("Complete MLflow Experiments Permissions Import Time: " + str(timedelta(seconds=end - start)))
def import_workspace_acls(self, workspace_log_file='acl_notebooks.log', dir_log_file='acl_directories.log', num_parallel=1): """ import the notebook and directory acls by looping over notebook and dir logfiles """ dir_acl_logs = self.get_export_dir() + dir_log_file notebook_acl_logs = self.get_export_dir() + workspace_log_file acl_notebooks_error_logger = logging_utils.get_error_logger( wmconstants.WM_IMPORT, wmconstants.WORKSPACE_NOTEBOOK_ACL_OBJECT, self.get_export_dir()) checkpoint_notebook_acl_set = self._checkpoint_service.get_checkpoint_key_set( wmconstants.WM_IMPORT, wmconstants.WORKSPACE_NOTEBOOK_ACL_OBJECT) with open(notebook_acl_logs) as nb_acls_fp: with ThreadPoolExecutor(max_workers=num_parallel) as executor: futures = [ executor.submit(self.apply_acl_on_object, nb_acl_str, acl_notebooks_error_logger, checkpoint_notebook_acl_set) for nb_acl_str in nb_acls_fp ] concurrent.futures.wait(futures, return_when="FIRST_EXCEPTION") propagate_exceptions(futures) acl_dir_error_logger = logging_utils.get_error_logger( wmconstants.WM_IMPORT, wmconstants.WORKSPACE_DIRECTORY_ACL_OBJECT, self.get_export_dir()) checkpoint_dir_acl_set = self._checkpoint_service.get_checkpoint_key_set( wmconstants.WM_IMPORT, wmconstants.WORKSPACE_DIRECTORY_ACL_OBJECT) with open(dir_acl_logs) as dir_acls_fp: with ThreadPoolExecutor(max_workers=num_parallel) as executor: futures = [ executor.submit(self.apply_acl_on_object, dir_acl_str, acl_dir_error_logger, checkpoint_dir_acl_set) for dir_acl_str in dir_acls_fp ] concurrent.futures.wait(futures, return_when="FIRST_EXCEPTION") propagate_exceptions(futures) print("Completed import ACLs of Notebooks and Directories")
def log_all_workspace_acls(self, workspace_log_file='user_workspace.log', dir_log_file='user_dirs.log', num_parallel=4): """ loop through all notebooks and directories to store their associated ACLs :param workspace_log_file: input file for user notebook listing :param dir_log_file: input file for user directory listing """ # define log file names for notebooks, folders, and libraries logging.info("Exporting the notebook permissions") start = timer() acl_notebooks_error_logger = logging_utils.get_error_logger( wmconstants.WM_EXPORT, wmconstants.WORKSPACE_NOTEBOOK_ACL_OBJECT, self.get_export_dir()) acl_notebooks_writer = ThreadSafeWriter( self.get_export_dir() + "acl_notebooks.log", "w") try: self.log_acl_to_file('notebooks', workspace_log_file, acl_notebooks_writer, acl_notebooks_error_logger, num_parallel) finally: acl_notebooks_writer.close() end = timer() logging.info("Complete Notebook ACLs Export Time: " + str(timedelta(seconds=end - start))) logging.info("Exporting the directories permissions") start = timer() acl_directory_error_logger = logging_utils.get_error_logger( wmconstants.WM_EXPORT, wmconstants.WORKSPACE_DIRECTORY_ACL_OBJECT, self.get_export_dir()) acl_directory_writer = ThreadSafeWriter( self.get_export_dir() + "acl_directories.log", "w") try: self.log_acl_to_file('directories', dir_log_file, acl_directory_writer, acl_directory_error_logger, num_parallel) finally: acl_directory_writer.close() end = timer() logging.info("Complete Directories ACLs Export Time: " + str(timedelta(seconds=end - start)))
def import_cluster_policies(self, log_file='cluster_policies.log', acl_log_file='acl_cluster_policies.log'): policies_log = self.get_export_dir() + log_file acl_policies_log = self.get_export_dir() + acl_log_file error_logger = logging_utils.get_error_logger( wmconstants.WM_IMPORT, wmconstants.CLUSTER_OBJECT, self.get_export_dir()) checkpoint_cluster_policies_set = self._checkpoint_service.get_checkpoint_key_set( wmconstants.WM_IMPORT, wmconstants.CLUSTER_OBJECT) # create the policies if os.path.exists(policies_log): with open(policies_log, 'r') as policy_fp: for p in policy_fp: policy_conf = json.loads(p) if 'policy_id' in policy_conf and checkpoint_cluster_policies_set.contains( policy_conf['policy_id']): continue # when creating the policy, we only need `name` and `definition` fields create_args = { 'name': policy_conf['name'], 'definition': policy_conf['definition'] } resp = self.post('/policies/clusters/create', create_args) ignore_error_list = ['INVALID_PARAMETER_VALUE'] if not logging_utils.log_reponse_error( error_logger, resp, ignore_error_list=ignore_error_list): if 'policy_id' in policy_conf: checkpoint_cluster_policies_set.write( policy_conf['policy_id']) # ACLs are created by using the `access_control_list` key with open(acl_policies_log, 'r') as acl_fp: id_map = self.get_policy_id_by_name_dict() for x in acl_fp: p_acl = json.loads(x) if 'object_id' in p_acl and checkpoint_cluster_policies_set.contains( p_acl['object_id']): continue acl_create_args = { 'access_control_list': self.build_acl_args(p_acl['access_control_list']) } policy_id = id_map[p_acl['name']] api = f'/permissions/cluster-policies/{policy_id}' resp = self.put(api, acl_create_args) if not logging_utils.log_reponse_error(error_logger, resp): if 'object_id' in p_acl: checkpoint_cluster_policies_set.write( p_acl['object_id']) else: logging.info('Skipping cluster policies as no log file exists')
def export_database(self, db_name, cluster_name=None, iam_role=None, metastore_dir='metastore/', success_log='success_metastore.log', has_unicode=False, db_log='database_details.log'): """ :param db_name: database name :param cluster_name: cluster to run against if provided :param iam_role: iam role to launch the cluster with :param metastore_dir: directory to store all the metadata :param has_unicode: whether the metadata has unicode characters to export :param db_log: specific database properties logfile :return: """ # check if instance profile exists, ask users to use --users first or enter yes to proceed. start = timer() if cluster_name: cid = self.start_cluster_by_name(cluster_name) current_iam = self.get_iam_role_by_cid(cid) else: current_iam = iam_role cid = self.launch_cluster(current_iam) end = timer() logging.info("Cluster creation time: " + str(timedelta(seconds=end - start))) time.sleep(5) ec_id = self.get_execution_context(cid) checkpoint_metastore_set = self._checkpoint_service.get_checkpoint_key_set( wmconstants.WM_EXPORT, wmconstants.METASTORE_TABLES) # if metastore failed log path exists, cleanup before re-running error_logger = logging_utils.get_error_logger( wmconstants.WM_EXPORT, wmconstants.METASTORE_TABLES, self.get_export_dir()) success_metastore_log_path = self.get_export_dir() + success_log if os.path.exists(success_metastore_log_path): os.remove(success_metastore_log_path) database_logfile = self.get_export_dir() + db_log resp = self.set_desc_database_helper(cid, ec_id) if self.is_verbose(): logging.info(resp) with open(database_logfile, 'w') as fp: db_json = self.get_desc_database_details(db_name, cid, ec_id) fp.write(json.dumps(db_json) + '\n') os.makedirs(self.get_export_dir() + metastore_dir + db_name, exist_ok=True) self.log_all_tables(db_name, cid, ec_id, metastore_dir, error_logger, success_metastore_log_path, current_iam, checkpoint_metastore_set, has_unicode)
def import_all_groups(self, group_log_dir='groups/'): group_error_logger = logging_utils.get_error_logger( wmconstants.WM_IMPORT, wmconstants.GROUP_OBJECT, self.get_export_dir()) group_dir = self.get_export_dir() + group_log_dir current_user_ids = self.get_user_id_mapping() self.import_groups(group_dir, current_user_ids, group_error_logger) # assign the users to IAM roles if on AWS if self.is_aws(): logging.info("Update group role assignments") self.assign_group_roles(group_dir, group_error_logger) # need to separate role assignment and entitlements to support Azure logging.info("Updating groups entitlements") self.assign_group_entitlements(group_dir, group_error_logger)
def log_all_secrets_acls(self, log_name='secret_scopes_acls.log'): acls_file = self.get_export_dir() + log_name error_logger = logging_utils.get_error_logger( wmconstants.WM_EXPORT, wmconstants.SECRET_OBJECT, self.get_export_dir()) scopes_list = self.get_secret_scopes_list() with open(acls_file, 'w') as fp: for scope_json in scopes_list: scope_name = scope_json.get('name', None) resp = self.get('/secrets/acls/list', {'scope': scope_name}) if logging_utils.log_reponse_error(error_logger, resp): return else: resp['scope_name'] = scope_name fp.write(json.dumps(resp) + '\n')
def import_instance_pools(self, log_file='instance_pools.log'): pool_log = self.get_export_dir() + log_file error_logger = logging_utils.get_error_logger( wmconstants.WM_IMPORT, wmconstants.INSTANCE_POOL_OBJECT, self.get_export_dir()) if not os.path.exists(pool_log): logging.info("No instance pools to import.") return with open(pool_log, 'r') as fp: for line in fp: pool_conf = json.loads(line) pool_resp = self.post('/instance-pools/create', pool_conf) ignore_error_list = ['INVALID_PARAMETER_VALUE'] logging_utils.log_reponse_error( error_logger, pool_resp, ignore_error_list=ignore_error_list)
def import_all_users(self, user_log_file='users.log', num_parallel=4): checkpoint_users_set = self._checkpoint_service.get_checkpoint_key_set( wmconstants.WM_IMPORT, wmconstants.USER_OBJECT) user_log = self.get_export_dir() + user_log_file user_error_logger = logging_utils.get_error_logger( wmconstants.WM_IMPORT, wmconstants.USER_OBJECT, self.get_export_dir()) self.import_users(user_log, user_error_logger, checkpoint_users_set, num_parallel) current_user_ids = self.get_user_id_mapping() self.log_failed_users(current_user_ids, user_log, user_error_logger) # assign the users to IAM roles if on AWS if self.is_aws(): logging.info("Update user role assignments") self.assign_user_roles(current_user_ids, user_error_logger, user_log_file) # need to separate role assignment and entitlements to support Azure logging.info("Updating users entitlements") self.assign_user_entitlements(current_user_ids, user_error_logger, user_log_file)
def export_mlflow_runs(self, start_date, log_sql_file='mlflow_runs.db', experiment_log='mlflow_experiments.log', num_parallel=4): """ Exports the Mlflow run objects. This can be run only after export_mlflow_experiments is complete. Unlike other objects, we save the data into sqlite tables, given the possible scale of runs objects. """ experiments_logfile = self.export_dir + experiment_log mlflow_runs_checkpointer = self._checkpoint_service.get_checkpoint_key_set( wmconstants.WM_EXPORT, wmconstants.MLFLOW_RUNS) start = timer() con = sqlite3.connect(self.export_dir + log_sql_file) with con: con.execute(''' CREATE TABLE IF NOT EXISTS runs (id TEXT UNIQUE, start_time INT, run_obj TEXT) ''') con.close() error_logger = logging_utils.get_error_logger( wmconstants.WM_EXPORT, wmconstants.MLFLOW_RUN_OBJECT, self.export_dir) start_date = start_date if start_date else datetime.now() - timedelta( days=30) start_time_epoch_ms = start_date.timestamp() * 1000 with open(experiments_logfile, 'r') as fp: with ThreadPoolExecutor(max_workers=num_parallel) as executor: futures = [ executor.submit(self._export_runs_in_an_experiment, start_time_epoch_ms, log_sql_file, experiment_str, mlflow_runs_checkpointer, error_logger) for experiment_str in fp ] results = concurrent.futures.wait( futures, return_when="FIRST_EXCEPTION") for result in results.done: if result.exception() is not None: raise result.exception() end = timer() logging.info("Complete MLflow Runs Export Time: " + str(timedelta(seconds=end - start)))
def log_job_configs(self, users_list=[], log_file='jobs.log', acl_file='acl_jobs.log'): """ log all job configs and the ACLs for each job :param users_list: a list of users / emails to filter the results upon (optional for group exports) :param log_file: log file to store job configs as json entries per line :param acl_file: log file to store job ACLs :return: """ jobs_log = self.get_export_dir() + log_file acl_jobs_log = self.get_export_dir() + acl_file error_logger = logging_utils.get_error_logger(wmconstants.WM_EXPORT, wmconstants.JOB_OBJECT, self.get_export_dir()) # pinned by cluster_user is a flag per cluster jl_full = self.get_jobs_list(False) if users_list: # filter the jobs list to only contain users that exist within this list jl = list( filter(lambda x: x.get('creator_user_name', '') in users_list, jl_full)) else: jl = jl_full with open(jobs_log, "w") as log_fp, open(acl_jobs_log, 'w') as acl_fp: for x in jl: job_id = x['job_id'] new_job_name = x['settings']['name'] + ':::' + str(job_id) # grab the settings obj job_settings = x['settings'] # update the job name job_settings['name'] = new_job_name # reset the original struct with the new settings x['settings'] = job_settings log_fp.write(json.dumps(x) + '\n') job_perms = self.get(f'/preview/permissions/jobs/{job_id}') if not logging_utils.log_reponse_error(error_logger, job_perms): job_perms['job_name'] = new_job_name acl_fp.write(json.dumps(job_perms) + '\n')
def export_mlflow_experiments_acls( self, experiment_log='mlflow_experiments.log', acl_log_file='mlflow_experiments_acls.log', num_parallel=4): """ Export all experiments' permissions of already exported experiment objects logged in experiment_log file. :return: writes the result to acl_log_file """ experiments_logfile = self.export_dir + experiment_log acl_log_file_writer = ThreadSafeWriter(self.export_dir + acl_log_file, 'a') error_logger = logging_utils.get_error_logger( wmconstants.WM_EXPORT, wmconstants.MLFLOW_EXPERIMENT_PERMISSION_OBJECT, self.get_export_dir()) checkpoint_key_set = self._checkpoint_service.get_checkpoint_key_set( wmconstants.WM_EXPORT, wmconstants.MLFLOW_EXPERIMENT_PERMISSION_OBJECT) start = timer() try: with open(experiments_logfile, 'r') as fp: with ThreadPoolExecutor(max_workers=num_parallel) as executor: futures = [ executor.submit(self._get_mlflow_experiment_acls, acl_log_file_writer, experiment_str, checkpoint_key_set, error_logger) for experiment_str in fp ] concurrent.futures.wait(futures, return_when="FIRST_EXCEPTION") propagate_exceptions(futures) finally: acl_log_file_writer.close() end = timer() logging.info("Complete MLflow Experiments Permissions Export Time: " + str(timedelta(seconds=end - start)))
def import_mlflow_experiments(self, log_file='mlflow_experiments.log', id_map_file='mlflow_experiments_id_map.log', log_dir=None, num_parallel=4): mlflow_experiments_dir = log_dir if log_dir else self.export_dir experiments_logfile = mlflow_experiments_dir + log_file experiments_id_map_file = mlflow_experiments_dir + id_map_file error_logger = logging_utils.get_error_logger( wmconstants.WM_IMPORT, wmconstants.MLFLOW_EXPERIMENT_OBJECT, self.export_dir) mlflow_experiments_checkpointer = self._checkpoint_service.get_checkpoint_key_set( wmconstants.WM_IMPORT, wmconstants.MLFLOW_EXPERIMENT_OBJECT) start = timer() id_map_thread_safe_writer = ThreadSafeWriter(experiments_id_map_file, 'a') try: with open(experiments_logfile, 'r') as fp: with ThreadPoolExecutor(max_workers=num_parallel) as executor: futures = [ executor.submit(self._create_experiment, experiment_str, id_map_thread_safe_writer, mlflow_experiments_checkpointer, error_logger) for experiment_str in fp ] concurrent.futures.wait(futures, return_when="FIRST_EXCEPTION") propagate_exceptions(futures) finally: id_map_thread_safe_writer.close() end = timer() logging.info("Complete MLflow Experiments Import Time: " + str(timedelta(seconds=end - start)))
def download_notebooks(self, ws_log_file='user_workspace.log', ws_dir='artifacts/', num_parallel=4): """ Loop through all notebook paths in the logfile and download individual notebooks :param ws_log_file: logfile for all notebook paths in the workspace :param ws_dir: export directory to store all notebooks :return: None """ checkpoint_notebook_set = self._checkpoint_service.get_checkpoint_key_set( wmconstants.WM_EXPORT, wmconstants.WORKSPACE_NOTEBOOK_OBJECT) ws_log = self.get_export_dir() + ws_log_file notebook_error_logger = logging_utils.get_error_logger( wmconstants.WM_EXPORT, wmconstants.WORKSPACE_NOTEBOOK_OBJECT, self.get_export_dir()) num_notebooks = 0 if not os.path.exists(ws_log): raise Exception( "Run --workspace first to download full log of all notebooks.") with open(ws_log, "r") as fp: # notebook log metadata file now contains object_id to help w/ ACL exports # pull the path from the data to download the individual notebook contents with ThreadPoolExecutor(max_workers=num_parallel) as executor: futures = [ executor.submit(self.download_notebook_helper, notebook_data, checkpoint_notebook_set, notebook_error_logger, self.get_export_dir() + ws_dir) for notebook_data in fp ] for future in concurrent.futures.as_completed(futures): dl_resp = future.result() if 'error' not in dl_resp: num_notebooks += 1 return num_notebooks
def log_cluster_configs(self, log_file='clusters.log', acl_log_file='acl_clusters.log', filter_user=None): """ Log the current cluster configs in json file :param log_file: log the cluster configs :param acl_log_file: log the ACL definitions :param filter_user: user name to filter and log the cluster config :return: """ cluster_log = self.get_export_dir() + log_file acl_cluster_log = self.get_export_dir() + acl_log_file error_logger = logging_utils.get_error_logger( wmconstants.WM_EXPORT, wmconstants.CLUSTER_OBJECT, self.get_export_dir()) # pinned by cluster_user is a flag per cluster cl_raw = self.get_cluster_list(False) cluster_list = self.remove_automated_clusters(cl_raw) ip_list = self.get('/instance-profiles/list').get( 'instance_profiles', []) nonempty_ip_list = [] if ip_list: # filter none if we hit a profile w/ a none object # generate list of registered instance profiles to check cluster configs against nonempty_ip_list = list( filter(None, [x.get('instance_profile_arn', None) for x in ip_list])) # filter on these items as MVP of the cluster configs # https://docs.databricks.com/api/latest/clusters.html#request-structure with open(cluster_log, 'w') as log_fp, open(acl_cluster_log, 'w') as acl_log_fp: for cluster_json in cluster_list: run_properties = set(list( cluster_json.keys())) - self.create_configs for p in run_properties: del cluster_json[p] if 'aws_attributes' in cluster_json: aws_conf = cluster_json.pop('aws_attributes') iam_role = aws_conf.get('instance_profile_arn', None) if iam_role and ip_list: if iam_role not in nonempty_ip_list: logging.info("Skipping log of default IAM role: " + iam_role) del aws_conf['instance_profile_arn'] cluster_json['aws_attributes'] = aws_conf cluster_json['aws_attributes'] = aws_conf cluster_perms = self.get_cluster_acls( cluster_json['cluster_id'], cluster_json['cluster_name']) if cluster_perms['http_status_code'] == 200: acl_log_fp.write(json.dumps(cluster_perms) + '\n') else: error_logger.error( f'Failed to get cluster ACL: {cluster_perms}') if filter_user: if cluster_json['creator_user_name'] == filter_user: log_fp.write(json.dumps(cluster_json) + '\n') else: log_fp.write(json.dumps(cluster_json) + '\n')
def export_user_home(self, username, local_export_dir, num_parallel=4): """ Export the provided user's home directory :param username: user's home directory to export :param local_export_dir: folder location to do single user exports :return: None """ original_export_dir = self.get_export_dir() user_export_dir = self.get_export_dir() + local_export_dir user_root = '/Users/' + username.rstrip().lstrip() self.set_export_dir(user_export_dir + '/{0}/'.format(username)) print("Export path: {0}".format(self.get_export_dir())) os.makedirs(self.get_export_dir(), exist_ok=True) workspace_log_writer = ThreadSafeWriter( self.get_export_dir() + 'user_workspace.log', "a") libs_log_writer = ThreadSafeWriter( self.get_export_dir() + 'libraries.log', "a") dir_log_writer = ThreadSafeWriter( self.get_export_dir() + 'user_dirs.log', "a") checkpoint_item_log_set = self._checkpoint_service.get_checkpoint_key_set( wmconstants.WM_EXPORT, wmconstants.WORKSPACE_ITEM_LOG_OBJECT) try: num_of_nbs = self.log_all_workspace_items(user_root, workspace_log_writer, libs_log_writer, dir_log_writer, checkpoint_item_log_set) finally: workspace_log_writer.close() libs_log_writer.close() dir_log_writer.close() if num_of_nbs == 0: raise ValueError( 'User does not have any notebooks in this path. Please verify the case of the email' ) num_of_nbs_dl = self.download_notebooks(ws_dir='user_artifacts/') print(f"Total notebooks logged: {num_of_nbs}") print(f"Total notebooks downloaded: {num_of_nbs_dl}") if num_of_nbs != num_of_nbs_dl: print( f"Notebooks logged != downloaded. Check the failed download file at: {user_export_dir}" ) print(f"Exporting the notebook permissions for {username}") acl_notebooks_writer = ThreadSafeWriter("acl_notebooks.log", "w") acl_notebooks_error_logger = logging_utils.get_error_logger( wmconstants.WM_EXPORT, wmconstants.WORKSPACE_NOTEBOOK_ACL_OBJECT, self.get_export_dir()) try: self.log_acl_to_file('notebooks', 'user_workspace.log', acl_notebooks_writer, acl_notebooks_error_logger, num_parallel) finally: acl_notebooks_writer.close() print(f"Exporting the directories permissions for {username}") acl_directories_writer = ThreadSafeWriter("acl_directories.log", "w") acl_directories_error_logger = logging_utils.get_error_logger( wmconstants.WM_EXPORT, wmconstants.WORKSPACE_DIRECTORY_ACL_OBJECT, self.get_export_dir()) try: self.log_acl_to_file('directories', 'user_dirs.log', acl_directories_writer, acl_directories_error_logger, num_parallel) finally: acl_directories_writer.close() # reset the original export dir for other calls to this method using the same client self.set_export_dir(original_export_dir)
def import_all_secrets(self, log_dir='secret_scopes/'): scopes_dir = self.get_export_dir() + log_dir error_logger = logging_utils.get_error_logger( wmconstants.WM_IMPORT, wmconstants.SECRET_OBJECT, self.get_export_dir()) scopes_acl_dict = self.load_acl_dict() for root, subdirs, files in self.walk(scopes_dir): for scope_name in files: file_path = root + scope_name # print('Log file: ', file_path) # check if scopes acls are empty, then skip if scopes_acl_dict.get(scope_name, None) is None: print( "Scope is empty with no manage permissions. Skipping..." ) continue # check if users has can manage perms then we can add during creation time has_user_manage = self.has_users_can_manage_permission( scope_name, scopes_acl_dict) create_scope_args = {'scope': scope_name} if has_user_manage: create_scope_args['initial_manage_principal'] = 'users' other_permissions = self.get_all_other_permissions( scope_name, scopes_acl_dict) create_resp = self.post('/secrets/scopes/create', create_scope_args) logging_utils.log_reponse_error( error_logger, create_resp, ignore_error_list=['RESOURCE_ALREADY_EXISTS']) if other_permissions: # use this dict minus the `users:MANAGE` permissions and apply the other permissions to the scope for perm, principal_list in other_permissions.items(): put_acl_args = { "scope": scope_name, "permission": perm } for x in principal_list: put_acl_args["principal"] = x logging.info(put_acl_args) put_resp = self.post('/secrets/acls/put', put_acl_args) logging_utils.log_reponse_error( error_logger, put_resp) # loop through the scope and create the k/v pairs with open(file_path, 'r') as fp: for s in fp: s_dict = json.loads(s) k = s_dict.get('name') v = s_dict.get('value') if 'WARNING: skipped' in v: error_logger.error( f"Skipping scope {scope_name} as value is corrupted due to being too large \n" ) continue try: put_secret_args = { 'scope': scope_name, 'key': k, 'string_value': base64.b64decode( v.encode('ascii')).decode('ascii') } put_resp = self.post('/secrets/put', put_secret_args) logging_utils.log_reponse_error( error_logger, put_resp) except Exception as error: if "Invalid base64-encoded string" in str( error) or 'decode' in str( error) or "padding" in str(error): error_msg = f"secret_scope: {scope_name} has invalid invalid data characters: {str(error)} skipping.. and logging to error file." logging.error(error_msg) error_logger.error(error_msg) else: raise error
def import_mlflow_runs( self, src_client_config, log_sql_file='mlflow_runs.db', experiment_id_map_log='mlflow_experiments_id_map.log', run_id_map_log='mlflow_runs_id_map.log', ml_run_artifacts_dir='ml_run_artifacts/', num_parallel=4): """ Imports the Mlflow run objects. This can be run only after import_mlflow_experiments is complete. Input files are mlflow_runs.db, mlflow_experiments_id_map.log Outputs mlflow_runs_id_map.log which has the map of old_run_id -> new_run_id after imports. """ src_client = MlflowClient( f"databricks://{src_client_config['profile']}") experiment_id_map = self._load_experiment_id_map(self.export_dir + experiment_id_map_log) mlflow_runs_file = self.export_dir + log_sql_file os.makedirs(self.export_dir + ml_run_artifacts_dir, exist_ok=True) error_logger = logging_utils.get_error_logger( wmconstants.WM_IMPORT, wmconstants.MLFLOW_RUN_OBJECT, self.export_dir) # checkpoint is required since the checkpoint file is copied into mlflow_runs_id_map.log at the end of the step. assert self._checkpoint_service.checkpoint_enabled, "import_mlflow_runs requires --use-checkpoint to be enabled. If " \ " you need to actually rerun, remove the corresponding " \ "checkpoint file. e.g. logs/checkpoint/import_mlflow_runs.log" mlflow_runs_checkpointer = self._checkpoint_service.get_checkpoint_key_map( wmconstants.WM_IMPORT, wmconstants.MLFLOW_RUN_OBJECT) # This checkpointer is used to checkpoint individual steps for more optimal checkpointing. # e.g. checkpoint run_creation, log_batch, and artifact download_upload separately mlflow_runs_steps_checkpointer = self._checkpoint_service.get_checkpoint_key_map( wmconstants.WM_IMPORT, wmconstants.MLFLOW_RUN_OBJECT + "_steps") start = timer() con = sqlite3.connect(mlflow_runs_file) cur = con.execute("SELECT * FROM runs") # TODO(kevin): make this configurable later runs = cur.fetchmany(10000) while (len(runs) > 0): with ThreadPoolExecutor(max_workers=num_parallel) as executor: # run_id = run[0] # start_time = run[1] # run_obj = json.loads(run[2]) futures = [ executor.submit(self._create_run_and_log, src_client, mlflow_runs_file, run[0], run[1], json.loads(run[2]), experiment_id_map, self.export_dir + ml_run_artifacts_dir, error_logger, mlflow_runs_checkpointer, mlflow_runs_steps_checkpointer) for run in runs ] concurrent.futures.wait(futures, return_when="FIRST_EXCEPTION") propagate_exceptions(futures) runs = cur.fetchmany(10000) shutil.copy(mlflow_runs_checkpointer.get_file_path(), self.export_dir + run_id_map_log) con.close() end = timer() logging.info("Complete MLflow Runs Import Time: " + str(timedelta(end - start)))
def import_job_configs(self, log_file='jobs.log', acl_file='acl_jobs.log'): jobs_log = self.get_export_dir() + log_file acl_jobs_log = self.get_export_dir() + acl_file error_logger = logging_utils.get_error_logger(wmconstants.WM_IMPORT, wmconstants.JOB_OBJECT, self.get_export_dir()) if not os.path.exists(jobs_log): logging.info("No job configurations to import.") return # get an old cluster id to new cluster id mapping object cluster_mapping = self.get_cluster_id_mapping() old_2_new_policy_ids = self.get_new_policy_id_dict( ) # dict { old_policy_id : new_policy_id } checkpoint_job_configs_set = self._checkpoint_service.get_checkpoint_key_set( wmconstants.WM_IMPORT, wmconstants.JOB_OBJECT) def adjust_ids_for_cluster(settings): #job_settings or task_settings if 'existing_cluster_id' in settings: old_cid = settings['existing_cluster_id'] # set new cluster id for existing cluster attribute new_cid = cluster_mapping.get(old_cid, None) if not new_cid: logging.info( "Existing cluster has been removed. Resetting job to use new cluster." ) settings.pop('existing_cluster_id') settings[ 'new_cluster'] = self.get_jobs_default_cluster_conf() else: settings['existing_cluster_id'] = new_cid else: # new cluster config cluster_conf = settings['new_cluster'] if 'policy_id' in cluster_conf: old_policy_id = cluster_conf['policy_id'] cluster_conf['policy_id'] = old_2_new_policy_ids[ old_policy_id] # check for instance pools and modify cluster attributes if 'instance_pool_id' in cluster_conf: new_cluster_conf = self.cleanup_cluster_pool_configs( cluster_conf, job_creator, True) else: new_cluster_conf = cluster_conf settings['new_cluster'] = new_cluster_conf with open(jobs_log, 'r') as fp: for line in fp: job_conf = json.loads(line) # need to do str(...), otherwise the job_id is recognized as integer which becomes # str vs int which never matches. # (in which case, the checkpoint never recognizes that the job_id is already checkpointed) if 'job_id' in job_conf and checkpoint_job_configs_set.contains( str(job_conf['job_id'])): continue job_creator = job_conf.get('creator_user_name', '') job_settings = job_conf['settings'] job_schedule = job_settings.get('schedule', None) if job_schedule: # set all imported jobs as paused job_schedule['pause_status'] = 'PAUSED' job_settings['schedule'] = job_schedule if 'format' not in job_settings or job_settings.get( 'format') == 'SINGLE_TASK': adjust_ids_for_cluster(job_settings) else: for task_settings in job_settings.get('tasks', []): adjust_ids_for_cluster(task_settings) logging.info("Current Job Name: {0}".format( job_conf['settings']['name'])) # creator can be none if the user is no longer in the org. see our docs page create_resp = self.post('/jobs/create', job_settings) if logging_utils.check_error(create_resp): logging.info( "Resetting job to use default cluster configs due to expired configurations." ) job_settings[ 'new_cluster'] = self.get_jobs_default_cluster_conf() create_resp_retry = self.post('/jobs/create', job_settings) if not logging_utils.log_reponse_error( error_logger, create_resp_retry): if 'job_id' in job_conf: checkpoint_job_configs_set.write( job_conf["job_id"]) else: raise RuntimeError( "Import job has failed. Refer to the previous log messages to investigate." ) else: if 'job_id' in job_conf: checkpoint_job_configs_set.write(job_conf["job_id"]) # update the jobs with their ACLs with open(acl_jobs_log, 'r') as acl_fp: job_id_by_name = self.get_job_id_by_name() for line in acl_fp: acl_conf = json.loads(line) if 'object_id' in acl_conf and checkpoint_job_configs_set.contains( acl_conf['object_id']): continue current_job_id = job_id_by_name[acl_conf['job_name']] job_path = f'jobs/{current_job_id}' # contains `/jobs/{job_id}` path api = f'/preview/permissions/{job_path}' # get acl permissions for jobs acl_perms = self.build_acl_args( acl_conf['access_control_list'], True) acl_create_args = {'access_control_list': acl_perms} acl_resp = self.patch(api, acl_create_args) if not logging_utils.log_reponse_error( error_logger, acl_resp) and 'object_id' in acl_conf: checkpoint_job_configs_set.write(acl_conf['object_id']) else: raise RuntimeError( "Import job has failed. Refer to the previous log messages to investigate." ) # update the imported job names self.update_imported_job_names(error_logger, checkpoint_job_configs_set)
def import_hive_metastore(self, cluster_name=None, metastore_dir='metastore/', views_dir='metastore_views/', has_unicode=False, should_repair_table=False): metastore_local_dir = self.get_export_dir() + metastore_dir metastore_view_dir = self.get_export_dir() + views_dir error_logger = logging_utils.get_error_logger( wmconstants.WM_IMPORT, wmconstants.METASTORE_TABLES, self.get_export_dir()) checkpoint_metastore_set = self._checkpoint_service.get_checkpoint_key_set( wmconstants.WM_IMPORT, wmconstants.METASTORE_TABLES) os.makedirs(metastore_view_dir, exist_ok=True) (cid, ec_id) = self.get_or_launch_cluster(cluster_name) # get local databases db_list = self.listdir(metastore_local_dir) # make directory in DBFS root bucket path for tmp data self.post('/dbfs/mkdirs', {'path': '/tmp/migration/'}) # iterate over the databases saved locally all_db_details_json = self.get_database_detail_dict() for db_name in db_list: # create a dir to host the view ddl if we find them os.makedirs(metastore_view_dir + db_name, exist_ok=True) # get the local database path to list tables local_db_path = metastore_local_dir + db_name # get a dict of the database attributes database_attributes = all_db_details_json.get(db_name, {}) if not database_attributes: logging.info(all_db_details_json) raise ValueError( 'Missing Database Attributes Log. Re-run metastore export') create_db_resp = self.create_database_db(db_name, ec_id, cid, database_attributes) if logging_utils.log_reponse_error(error_logger, create_db_resp): logging.error( f"Failed to create database {db_name} during metastore import. Exiting Import." ) return db_path = database_attributes.get('Location') if os.path.isdir(local_db_path): # all databases should be directories, no files at this level # list all the tables in the database local dir tables = self.listdir(local_db_path) for tbl_name in tables: # build the path for the table where the ddl is stored full_table_name = f"{db_name}.{tbl_name}" if not checkpoint_metastore_set.contains(full_table_name): logging.info(f"Importing table {full_table_name}") local_table_ddl = metastore_local_dir + db_name + '/' + tbl_name if not self.move_table_view(db_name, tbl_name, local_table_ddl): # we hit a table ddl here, so we apply the ddl resp = self.apply_table_ddl( local_table_ddl, ec_id, cid, db_path, has_unicode) if not logging_utils.log_reponse_error( error_logger, resp): checkpoint_metastore_set.write(full_table_name) else: logging.info( f'Moving view ddl to re-apply later: {db_name}.{tbl_name}' ) else: logging.error( "Error: Only databases should exist at this level: {0}". format(db_name)) self.delete_dir_if_empty(metastore_view_dir + db_name) views_db_list = self.listdir(metastore_view_dir) for db_name in views_db_list: local_view_db_path = metastore_view_dir + db_name database_attributes = all_db_details_json.get(db_name, '') db_path = database_attributes.get('Location') if os.path.isdir(local_view_db_path): views = self.listdir(local_view_db_path) for view_name in views: full_view_name = f'{db_name}.{view_name}' if not checkpoint_metastore_set.contains(full_view_name): logging.info(f"Importing view {full_view_name}") local_view_ddl = metastore_view_dir + db_name + '/' + view_name resp = self.apply_table_ddl(local_view_ddl, ec_id, cid, db_path, has_unicode) if logging_utils.log_reponse_error(error_logger, resp): checkpoint_metastore_set.write(full_view_name) logging.info(resp) # repair legacy tables if should_repair_table: self.report_legacy_tables_to_fix() self.repair_legacy_tables(cluster_name)
def export_hive_metastore(self, cluster_name=None, metastore_dir='metastore/', db_log='database_details.log', success_log='success_metastore.log', has_unicode=False): start = timer() checkpoint_metastore_set = self._checkpoint_service.get_checkpoint_key_set( wmconstants.WM_EXPORT, wmconstants.METASTORE_TABLES) error_logger = logging_utils.get_error_logger( wmconstants.WM_EXPORT, wmconstants.METASTORE_TABLES, self.get_export_dir()) instance_profiles = self.get_instance_profiles_list() if cluster_name: cid = self.start_cluster_by_name(cluster_name) current_iam_role = self.get_iam_role_by_cid(cid) elif instance_profiles: # if any instance profile exists, lets start w/ this on the first cluster to launch and export current_iam_role = instance_profiles[0] cid = self.launch_cluster(iam_role=current_iam_role) else: current_iam_role = None cid = self.launch_cluster() end = timer() logging.info("Cluster creation time: " + str(timedelta(seconds=end - start))) time.sleep(5) ec_id = self.get_execution_context(cid) # if metastore failed log path exists, cleanup before re-running success_metastore_log_path = self.get_export_dir() + success_log database_logfile = self.get_export_dir() + db_log if os.path.exists(success_metastore_log_path): os.remove(success_metastore_log_path) all_dbs = self.get_all_databases(error_logger, cid, ec_id) resp = self.set_desc_database_helper(cid, ec_id) if self.is_verbose(): logging.info(resp) with open(database_logfile, 'w') as fp: for db_name in all_dbs: logging.info(f"Fetching details from database: {db_name}") os.makedirs(self.get_export_dir() + metastore_dir + db_name, exist_ok=True) db_json = self.get_desc_database_details(db_name, cid, ec_id) fp.write(json.dumps(db_json) + '\n') self.log_all_tables(db_name, cid, ec_id, metastore_dir, error_logger, success_metastore_log_path, current_iam_role, checkpoint_metastore_set, has_unicode) failed_log_file = logging_utils.get_error_log_file( wmconstants.WM_EXPORT, wmconstants.METASTORE_TABLES, self.get_export_dir()) total_failed_entries = self.get_num_of_lines(failed_log_file) if (not self.is_skip_failed() ) and self.is_aws() and total_failed_entries > 0: logging.info( "Retrying failed metastore export with registered IAM roles") remaining_iam_roles = instance_profiles[1:] self.retry_failed_metastore_export(cid, failed_log_file, error_logger, remaining_iam_roles, success_metastore_log_path, has_unicode, checkpoint_metastore_set) logging.info("Failed count before retry: " + str(total_failed_entries)) logging.info("Total Databases attempted export: " + str(len(all_dbs))) else: logging.error("Failed count: " + str(total_failed_entries)) logging.info("Total Databases attempted export: " + str(len(all_dbs)))
def import_user_home(self, username, local_export_dir): """ Import the provided user's home directory logs/user_exports/{{USERNAME}}/ stores the log files to understand what was exported logs/user_exports/{{USERNAME}}/user_artifacts/ stores the notebook contents :param username: user's home directory to export :param local_export_dir: the log directory for this users workspace items :return: None """ original_export_dir = self.get_export_dir() user_import_dir = self.get_export_dir() + local_export_dir if self.does_user_exist(username): print("Yes, we can upload since the user exists") else: print( "User must exist before we upload the notebook contents. Please add the user to the platform first" ) user_root = '/Users/' + username.rstrip().lstrip() self.set_export_dir(user_import_dir + '/{0}/'.format(username)) print("Import local path: {0}".format(self.get_export_dir())) notebook_dir = self.get_export_dir() + 'user_artifacts/' for root, subdirs, files in self.walk(notebook_dir): upload_dir = '/' + root.replace(notebook_dir, '') # if the upload dir is the 2 root directories, skip and continue if upload_dir == '/' or upload_dir == '/Users': continue if not self.is_user_ws_root(upload_dir): # if it is not the /Users/[email protected]/ root path, don't create the folder resp_mkdirs = self.post(WS_MKDIRS, {'path': upload_dir}) print(resp_mkdirs) for f in files: # get full path for the local notebook file local_file_path = os.path.join(root, f) # create upload path and remove file format extension ws_file_path = upload_dir + '/' + f # generate json args with binary data for notebook to upload to the workspace path nb_input_args = self.get_user_import_args( local_file_path, ws_file_path) # call import to the workspace if self.is_verbose(): print("Path: {0}".format(nb_input_args['path'])) resp_upload = self.post(WS_IMPORT, nb_input_args) if self.is_verbose(): print(resp_upload) # import the user's workspace ACLs notebook_acl_logs = user_import_dir + f'/{username}/acl_notebooks.log' acl_notebooks_error_logger = logging_utils.get_error_logger( wmconstants.WM_IMPORT, wmconstants.WORKSPACE_NOTEBOOK_ACL_OBJECT, self.get_export_dir()) if os.path.exists(notebook_acl_logs): print(f"Importing the notebook acls for {username}") with open(notebook_acl_logs) as nb_acls_fp: for nb_acl_str in nb_acls_fp: self.apply_acl_on_object(nb_acl_str, acl_notebooks_error_logger) dir_acl_logs = user_import_dir + f'/{username}/acl_directories.log' acl_dir_error_logger = logging_utils.get_error_logger( wmconstants.WM_IMPORT, wmconstants.WORKSPACE_DIRECTORY_ACL_OBJECT, self.get_export_dir()) if os.path.exists(dir_acl_logs): print(f"Importing the directory acls for {username}") with open(dir_acl_logs) as dir_acls_fp: for dir_acl_str in dir_acls_fp: self.apply_acl_on_object(dir_acl_str, acl_dir_error_logger) self.set_export_dir(original_export_dir)
def import_cluster_configs(self, log_file='clusters.log', acl_log_file='acl_clusters.log', filter_user=None): """ Import cluster configs and update appropriate properties / tags in the new env :param log_file: :return: """ cluster_log = self.get_export_dir() + log_file acl_cluster_log = self.get_export_dir() + acl_log_file if not os.path.exists(cluster_log): logging.info("No clusters to import.") return current_cluster_names = set([ x.get('cluster_name', None) for x in self.get_cluster_list(False) ]) old_2_new_policy_ids = self.get_new_policy_id_dict( ) # dict of {old_id : new_id} error_logger = logging_utils.get_error_logger( wmconstants.WM_IMPORT, wmconstants.CLUSTER_OBJECT, self.get_export_dir()) checkpoint_cluster_configs_set = self._checkpoint_service.get_checkpoint_key_set( wmconstants.WM_IMPORT, wmconstants.CLUSTER_OBJECT) # get instance pool id mappings with open(cluster_log, 'r') as fp: for line in fp: cluster_conf = json.loads(line) if 'cluster_id' in cluster_conf and checkpoint_cluster_configs_set.contains( cluster_conf['cluster_id']): continue cluster_name = cluster_conf['cluster_name'] if cluster_name in current_cluster_names: logging.info( "Cluster already exists, skipping: {0}".format( cluster_name)) continue cluster_creator = cluster_conf.pop('creator_user_name') if 'policy_id' in cluster_conf: old_policy_id = cluster_conf['policy_id'] cluster_conf['policy_id'] = old_2_new_policy_ids[ old_policy_id] # check for instance pools and modify cluster attributes if 'instance_pool_id' in cluster_conf: new_cluster_conf = self.cleanup_cluster_pool_configs( cluster_conf, cluster_creator) else: # update cluster configs for non-pool clusters # add original creator tag to help with DBU tracking if 'custom_tags' in cluster_conf: tags = cluster_conf['custom_tags'] tags['OriginalCreator'] = cluster_creator cluster_conf['custom_tags'] = tags else: cluster_conf['custom_tags'] = { 'OriginalCreator': cluster_creator } new_cluster_conf = cluster_conf print("Creating cluster: {0}".format( new_cluster_conf['cluster_name'])) cluster_resp = self.post('/clusters/create', new_cluster_conf) if cluster_resp['http_status_code'] == 200: stop_resp = self.post( '/clusters/delete', {'cluster_id': cluster_resp['cluster_id']}) if 'pinned_by_user_name' in cluster_conf: pin_resp = self.post( '/clusters/pin', {'cluster_id': cluster_resp['cluster_id']}) if 'cluster_id' in cluster_conf: checkpoint_cluster_configs_set.write( cluster_conf['cluster_id']) else: logging_utils.log_reponse_error(error_logger, cluster_resp) print(cluster_resp) # TODO: May be put it into a separate step to make it more rerunnable. self._log_cluster_ids_and_original_creators(log_file) # add cluster ACLs # loop through and reapply cluster ACLs with open(acl_cluster_log, 'r') as acl_fp: for x in acl_fp: data = json.loads(x) if 'object_id' in data and checkpoint_cluster_configs_set.contains( data['object_id']): continue cluster_name = data['cluster_name'] print(f'Applying acl for {cluster_name}') acl_args = { 'access_control_list': self.build_acl_args(data['access_control_list']) } cid = self.get_cluster_id_by_name(cluster_name) if cid is None: error_message = f'Cluster id must exist in new env for cluster_name: {cluster_name}. ' \ f'Re-import cluster configs.' raise ValueError(error_message) api = f'/preview/permissions/clusters/{cid}' resp = self.put(api, acl_args) if not logging_utils.log_reponse_error(error_logger, resp): if 'object_id' in data: checkpoint_cluster_configs_set.write(data['object_id']) print(resp)
def import_all_workspace_items(self, artifact_dir='artifacts/', archive_missing=False, num_parallel=4): """ import all notebooks into a new workspace. Walks the entire artifacts/ directory in parallel, and also upload all the files in each of the directories in parallel. WARNING: Because it parallelizes both on directory walking and file uploading, it can spawn as many threads as num_parallel * num_parallel :param artifact_dir: notebook download directory :param failed_log: failed import log :param archive_missing: whether to put missing users into a /Archive/ top level directory """ src_dir = self.get_export_dir() + artifact_dir error_logger = logging_utils.get_error_logger( wmconstants.WM_IMPORT, wmconstants.WORKSPACE_NOTEBOOK_OBJECT, self.get_export_dir()) checkpoint_notebook_set = self._checkpoint_service.get_checkpoint_key_set( wmconstants.WM_IMPORT, wmconstants.WORKSPACE_NOTEBOOK_OBJECT) num_exported_users = self.get_num_of_saved_users(src_dir) num_current_users = self.get_current_users() if num_current_users == 0: logging.info( "No registered users in existing environment. Please import users / groups first." ) raise ValueError("No registered users in the current environment") if (num_current_users < num_exported_users) and (not archive_missing): logging.info("Exported number of user workspaces: {0}".format( num_exported_users)) logging.info("Current number of user workspaces: {0}".format( num_current_users)) logging.info( "Re-run with the `--archive-missing` flag to load missing users into a separate directory" ) raise ValueError( "Current number of users is less than number of user workspaces to import." ) archive_users = set() def _upload_all_files(root, subdirs, files): ''' Upload all files in parallel in root (current) directory. ''' # replace the local directory with empty string to get the notebook workspace directory nb_dir = '/' + root.replace(src_dir, '') upload_dir = nb_dir if not nb_dir == '/': upload_dir = nb_dir + '/' if self.is_user_ws_item(upload_dir): ws_user = self.get_user(upload_dir) if archive_missing: if ws_user in archive_users: upload_dir = upload_dir.replace('Users', 'Archive', 1) elif not self.does_user_exist(ws_user): # add the user to the cache / set of missing users logging.info( "User workspace does not exist, adding to archive cache: {0}" .format(ws_user)) archive_users.add(ws_user) # append the archive path to the upload directory upload_dir = upload_dir.replace('Users', 'Archive', 1) else: logging.info( "User workspace exists: {0}".format(ws_user)) elif not self.does_user_exist(ws_user): logging.info( "User {0} is missing. " "Please re-run with --archive-missing flag " "or first verify all users exist in the new workspace". format(ws_user)) return else: logging.info("Uploading for user: {0}".format(ws_user)) # make the top level folder before uploading files within the loop if not self.is_user_ws_root(upload_dir): # if it is not the /Users/[email protected]/ root path, don't create the folder resp_mkdirs = self.post(WS_MKDIRS, {'path': upload_dir}) if 'error_code' in resp_mkdirs: resp_mkdirs['path'] = upload_dir logging_utils.log_reponse_error(error_logger, resp_mkdirs) def _file_upload_helper(f): logging.info("Uploading: {0}".format(f)) # create the local file path to load the DBC file local_file_path = os.path.join(root, f) # create the ws full file path including filename ws_file_path = upload_dir + f if checkpoint_notebook_set.contains(ws_file_path): return # generate json args with binary data for notebook to upload to the workspace path nb_input_args = self.get_user_import_args( local_file_path, ws_file_path) # call import to the workspace if self.is_verbose(): logging.info("Path: {0}".format(nb_input_args['path'])) resp_upload = self.post(WS_IMPORT, nb_input_args) if 'error_code' in resp_upload: resp_upload['path'] = ws_file_path logging.info(f'Error uploading file: {ws_file_path}') logging_utils.log_reponse_error(error_logger, resp_upload) else: checkpoint_notebook_set.write(ws_file_path) with ThreadPoolExecutor(max_workers=num_parallel) as executor: futures = [ executor.submit(_file_upload_helper, file) for file in files ] concurrent.futures.wait(futures, return_when="FIRST_EXCEPTION") propagate_exceptions(futures) with ThreadPoolExecutor(max_workers=num_parallel) as executor: futures = [ executor.submit(_upload_all_files, walk[0], walk[1], walk[2]) for walk in self.walk(src_dir) ] concurrent.futures.wait(futures, return_when="FIRST_EXCEPTION") propagate_exceptions(futures)