def import_workspace_dir(self, source_path, target_path, overwrite, exclude_hidden_files, headers=None): # pylint: disable=too-many-locals filenames = os.listdir(source_path) if exclude_hidden_files: # for now, just exclude hidden files or directories based on starting '.' filenames = [f for f in filenames if not f.startswith('.')] try: self.mkdirs(target_path, headers=headers) except HTTPError as e: click.echo(e.response.json()) return for filename in filenames: cur_src = os.path.join(source_path, filename) # don't use os.path.join here since it will set \ on Windows cur_dst = target_path.rstrip('/') + '/' + filename if os.path.isdir(cur_src): self.import_workspace_dir(cur_src, cur_dst, overwrite, exclude_hidden_files, headers=headers) elif os.path.isfile(cur_src): ext = WorkspaceLanguage.get_extension(cur_src) if ext != '': cur_dst = cur_dst[:-len(ext)] (language, file_format) = WorkspaceLanguage.to_language_and_format(cur_src) self.import_workspace(cur_src, cur_dst, language, file_format, overwrite, headers=headers) click.echo('{} -> {}'.format(cur_src, cur_dst)) else: extensions = ', '.join(WorkspaceLanguage.EXTENSIONS) click.echo(('{} does not have a valid extension of {}. Skip this file and ' + 'continue.').format(cur_src, extensions))
def _import_dir_helper(source_path, target_path, overwrite, exclude_hidden_files): # Try doing the os.listdir before creating the dir in Databricks. filenames = os.listdir(source_path) if exclude_hidden_files: # for now, just exclude hidden files or directories based on starting '.' filenames = [f for f in filenames if not f.startswith('.')] try: mkdirs(target_path) except HTTPError as e: click.echo(e.response.json()) return for filename in filenames: cur_src = os.path.join(source_path, filename) # don't use os.path.join here since it will set \ on Windows cur_dst = target_path.rstrip('/') + '/' + filename if os.path.isdir(cur_src): _import_dir_helper(cur_src, cur_dst, overwrite, exclude_hidden_files) elif os.path.isfile(cur_src): ext = WorkspaceLanguage.get_extension(cur_src) if ext != '': cur_dst = cur_dst[:-len(ext)] (language, file_format) = WorkspaceLanguage.to_language_and_format(cur_src) import_workspace(cur_src, cur_dst, language, file_format, overwrite) click.echo('{} -> {}'.format(cur_src, cur_dst)) else: extensions = ', '.join(WorkspaceLanguage.EXTENSIONS) click.echo(('{} does not have a valid extension of {}. Skip this file and ' + 'continue.').format(cur_src, extensions))
def _download_workspace(self, resource_properties, overwrite): """ Download workspace asset. :param resource_properties: dict of properties for the workspace asset. Must contain the 'source_path', 'path' and 'object_type' fields. :param overwrite: Whether or not to overwrite the contents of workspace notebooks. """ local_path = resource_properties.get(WORKSPACE_RESOURCE_SOURCE_PATH) workspace_path = resource_properties.get(WORKSPACE_RESOURCE_PATH) object_type = resource_properties.get(WORKSPACE_RESOURCE_OBJECT_TYPE) click.echo('Downloading {} from Databricks path {} to {}'.format(object_type, workspace_path, local_path)) if object_type == NOTEBOOK: # Inference of notebook language and format. A tuple of (language, fmt) or Nonetype. language_fmt = WorkspaceLanguage.to_language_and_format(local_path) if language_fmt is None: raise StackError("Workspace Notebook language and format cannot be inferred." "Please check file extension of notebook 'source_path'.") (_, fmt) = language_fmt local_dir = os.path.dirname(os.path.abspath(local_path)) if not os.path.exists(local_dir): os.makedirs(local_dir) self.workspace_client.export_workspace(workspace_path, local_path, fmt, overwrite) elif object_type == DIRECTORY: self.workspace_client.export_workspace_dir(workspace_path, local_path, overwrite) else: raise StackError("Invalid value for '{}' field: {}" .format(WORKSPACE_RESOURCE_OBJECT_TYPE, object_type))
def _deploy_workspace(self, resource_properties, databricks_id, overwrite): """ Deploy workspace asset. :param resource_properties: dict of properties for the workspace asset. Must contain the 'source_path', 'path' and 'object_type' fields. :param databricks_id: dict containing physical identifier of workspace asset on databricks. Should contain the field 'path'. :param overwrite: Whether or not to overwrite the contents of workspace notebooks. :return: (dict, dict) of (databricks_id, deploy_output). databricks_id is the physical ID for the stack status that contains the workspace path of the notebook or directory on datbricks. deploy_output is the initial information about the asset on databricks at deploy time returned by the REST API. """ local_path = resource_properties.get(WORKSPACE_RESOURCE_SOURCE_PATH) workspace_path = resource_properties.get(WORKSPACE_RESOURCE_PATH) object_type = resource_properties.get(WORKSPACE_RESOURCE_OBJECT_TYPE) actual_object_type = DIRECTORY if os.path.isdir( local_path) else NOTEBOOK if object_type != actual_object_type: raise StackError('Field "{}" ({}) not consistent ' 'with actual object type ({})'.format( WORKSPACE_RESOURCE_OBJECT_TYPE, object_type, actual_object_type)) click.echo('Uploading {} from {} to Databricks workspace at {}'.format( object_type, local_path, workspace_path)) if object_type == NOTEBOOK: # Inference of notebook language and format language_fmt = WorkspaceLanguage.to_language_and_format(local_path) if language_fmt is None: raise StackError( "Workspace notebook language and format cannot be inferred. " "Please check file extension of notebook file.") language, fmt = language_fmt # Create needed directories in workspace. self.workspace_client.mkdirs(os.path.dirname(workspace_path)) self.workspace_client.import_workspace(local_path, workspace_path, language, fmt, overwrite) elif object_type == DIRECTORY: self.workspace_client.import_workspace_dir( local_path, workspace_path, overwrite, exclude_hidden_files=True) else: # Shouldn't reach here because of verification of object_type above. assert False if databricks_id and databricks_id[ WORKSPACE_RESOURCE_PATH] != workspace_path: # databricks_id['path'] is the workspace path from the last deployment. Alert when # changed click.echo("Workspace asset had path changed from {} to {}".format( databricks_id[WORKSPACE_RESOURCE_PATH], workspace_path)) new_databricks_id = {WORKSPACE_RESOURCE_PATH: workspace_path} deploy_output = self.workspace_client.client.get_status(workspace_path) return new_databricks_id, deploy_output
def _deploy_workspace(self, resource_properties, physical_id, overwrite): """ Deploy workspace asset. :param resource_properties: dict of properties for the workspace asset. Must contain the 'source_path' and 'path' fields. The other fields will be inferred if not provided. :param physical_id: dict containing physical identifier of workspace asset on databricks. Should contain the field 'path'. :param overwrite: Whether or not to overwrite the contents of workspace notebooks. :return: (dict, dict) of (physical_id, deploy_output). physical_id is the physical ID for the stack status that contains the workspace path of the notebook or directory on datbricks. deploy_output is the initial information about the asset on databricks at deploy time returned by the REST API. """ # Required fields. TODO(alinxie) put in _validate_config local_path = resource_properties.get('source_path') workspace_path = resource_properties.get('path') object_type = resource_properties.get('object_type') actual_object_type = 'DIRECTORY' if os.path.isdir( local_path) else 'NOTEBOOK' if object_type != actual_object_type: raise StackError("Field 'object_type' ({}) not consistent" "with actual object type ({})".format( object_type, actual_object_type)) click.echo('Uploading {} from {} to Databricks workspace at {}'.format( object_type, local_path, workspace_path)) if object_type == 'NOTEBOOK': # Inference of notebook language and format language_fmt = WorkspaceLanguage.to_language_and_format(local_path) if language_fmt is None: raise StackError( "Workspace notebook language and format cannot be inferred" "Please check file extension of notebook file.") language, fmt = language_fmt # Create needed directories in workspace. self.workspace_client.mkdirs(os.path.dirname(workspace_path)) self.workspace_client.import_workspace(local_path, workspace_path, language, fmt, overwrite) elif object_type == 'DIRECTORY': self.workspace_client.import_workspace_dir( local_path, workspace_path, overwrite, exclude_hidden_files=True) else: # Shouldn't reach here because of verification of object_type above. assert False if physical_id and physical_id['path'] != workspace_path: # physical_id['path'] is the workspace path from the last deployment. Alert when changed click.echo("Workspace asset had path changed from {} to {}".format( physical_id['path'], workspace_path)) new_physical_id = {'path': workspace_path} deploy_output = self.workspace_client.client.get_status(workspace_path) return new_physical_id, deploy_output