def delete_file(self, path): """Delete file at path.""" path = path.strip('/') hdfs_path = to_os_path(path, self.root_dir) if self._hdfs_dir_exists(hdfs_path): listing = self._hdfs_ls(hdfs_path) # Don't delete non-empty directories. # A directory containing only leftover checkpoints is # considered empty. #cp_dir = getattr(self.checkpoints, 'checkpoint_dir', None) #for longentry in listing: # entry = longentry.strip('/').rsplit('/', 1)[-1] # if entry != cp_dir: # raise web.HTTPError(400, u'Directory %s not empty' % hdfs_path) elif not self._hdfs_file_exists(hdfs_path): raise web.HTTPError(404, u'File does not exist: %s' % hdfs_path) if self._hdfs_dir_exists(hdfs_path): self.log.debug("Removing directory %s", hdfs_path) try: self.hdfs.delete(hdfs_path, recursive=True) except: raise HTTPError(403, u'Permission denied: %s' % path) else: self.log.debug("Removing file %s", hdfs_path) try: self.hdfs.delete(hdfs_path, recursive=False) except: raise HTTPError(403, u'Permission denied: %s' % path)
def _base_model(self, path): """Build the common base of a hdfscontents model""" hdfs_path = to_os_path(path, self.root_dir) info = self.hdfs.get_path_info(hdfs_path) last_modified = tz.utcfromtimestamp(info.get(u'last_mod')) # TODO: don't have time created! now storing last accessed instead created = tz.utcfromtimestamp(info.get(u'last_access')) # Create the base model. model = {} model['name'] = path.rsplit('/', 1)[-1] model['path'] = path model['last_modified'] = last_modified model['created'] = created model['content'] = None model['format'] = None model['mimetype'] = None # TODO: Now just checking if user have write permission in HDFS. Need to cover all cases and check the user & group? try: model['writable'] = (info.get(u'permissions') & 0o0200) > 0 except OSError: self.log.error("Failed to check write permissions on %s", hdfs_path) model['writable'] = False return model
def _dir_model(self, path, content=True): """Build a model for a directory if content is requested, will include a listing of the directory """ hdfs_path = to_os_path(path, self.root_dir) four_o_four = u'directory does not exist: %r' % path if not self.dir_exists(path): raise web.HTTPError(404, four_o_four) elif self.is_hidden(path): self.log.info( "Refusing to serve hidden directory %r, via 404 Error", hdfs_path) raise web.HTTPError(404, four_o_four) model = self._base_model(path) model['type'] = 'directory' if content: model['content'] = contents = [] for subpath in self._hdfs_ls(hdfs_path): name = subpath.strip('/').rsplit('/', 1)[-1] if self.should_list( name) and not self._hdfs_is_hidden(subpath): contents.append( self.get(path='%s/%s' % (path, name), content=False)) model['format'] = 'json' return model
def _file_model(self, path, content=True, format=None): """Build a model for a file if content is requested, include the file hdfscontents. format: If 'text', the hdfscontents will be decoded as UTF-8. If 'base64', the raw bytes hdfscontents will be encoded as base64. If not specified, try to decode as UTF-8, and fall back to base64 """ model = self._base_model(path) model['type'] = 'file' hdfs_path = to_os_path(path, self.root_dir) model['mimetype'] = mimetypes.guess_type(hdfs_path)[0] if content: content, format = self._read_file(hdfs_path, format) if model['mimetype'] is None: default_mime = { 'text': 'text/plain', 'base64': 'application/octet-stream' }[format] model['mimetype'] = default_mime model.update( content=content, format=format, ) return model
def _get_git_notebooks(self, base_arg): # Sometimes the root dir of the files is not cwd nb_root = getattr(self.contents_manager, 'root_dir', None) # Resolve base argument to a file system path base = os.path.realpath(to_os_path(base_arg, nb_root)) # Ensure path/root_dir that can be sent to git: try: git_root = find_repo_root(base) except InvalidGitRepositoryError as e: self.log.exception(e) raise HTTPError(422, 'Invalid notebook: %s' % base) base = os.path.relpath(base, git_root) # Get the base/remote notebooks: try: for fbase, fremote in changed_notebooks('HEAD', None, base, git_root): base_nb = read_notebook(fbase, on_null='minimal') remote_nb = read_notebook(fremote, on_null='minimal') break # there should only ever be one set of files else: # The filename was either invalid or the file is unchanged # Assume unchanged, and let read_notebook handle error # reporting if invalid base_nb = self.read_notebook(os.path.join(git_root, base)) remote_nb = base_nb except (InvalidGitRepositoryError, BadName) as e: self.log.exception(e) raise HTTPError(422, 'Invalid notebook: %s' % base_arg) return base_nb, remote_nb
def save(self, model, path=''): """ Save a file or directory model to path. Should return the saved model with no content. Save implementations should call self.run_pre_save_hook(model=model, path=path) prior to writing any data. """ path = path.strip('/') if 'type' not in model: raise web.HTTPError(400, u'No file type provided') if 'content' not in model and model['type'] != 'directory': raise web.HTTPError(400, u'No file content provided') path = path.strip('/') hdfs_path = to_os_path(path, self.root_dir) self.log.debug("Saving %s", hdfs_path) self.run_pre_save_hook(model=model, path=path) try: if model['type'] == 'notebook': nb = nbformat.from_dict(model['content']) self.check_and_sign(nb, path) self._save_notebook(hdfs_path, nb) # One checkpoint should always exist for notebooks. if not self.checkpoints.list_checkpoints(path): self.create_checkpoint(path) elif model['type'] == 'file': # Missing format will be handled internally by _save_file. self._save_file(hdfs_path, model['content'], model.get('format')) elif model['type'] == 'directory': self._save_directory(hdfs_path, model, path) else: raise web.HTTPError( 400, "Unhandled hdfscontents type: %s" % model['type']) except web.HTTPError: raise except Exception as e: self.log.error(u'Error while saving file: %s %s', path, e, exc_info=True) raise web.HTTPError( 500, u'Unexpected error while saving file: %s %s' % (path, e)) validation_message = None if model['type'] == 'notebook': self.validate_notebook_model(model) validation_message = model.get('message', None) model = self.get(path, content=False) if validation_message: model['message'] = validation_message #self.run_post_save_hook(model=model, os_path=hdfs_path) return model
def cwd_for_path(self, path): """Turn API path into absolute OS path.""" os_path = to_os_path(path, self.root_dir) # in the case of notebooks and kernels not being on the same filesystem, # walk up to root_dir if the paths don't exist while not os.path.isdir(os_path) and os_path != self.root_dir: os_path = os.path.dirname(os_path) return os_path
def start_kernel(self, kernel_id=None, path=None, **kwargs): if path: env = kwargs.pop('env', os.environ).copy() env["virtual_path"] = to_os_path(path, "/") kwargs["env"] = env kernel_id = yield super(PGKernelManager, self).start_kernel(kernel_id=kernel_id, path=path, **kwargs) # py2-compat raise gen.Return(kernel_id)
def rename_file(self, old_path, new_path): """Rename a file.""" old_path = old_path.strip('/') new_path = new_path.strip('/') if new_path == old_path: return new_hdfs_path = to_os_path(new_path, self.root_dir) old_hdfs_path = to_os_path(old_path, self.root_dir) # Should we proceed with the move? if self._hdfs_exists(new_hdfs_path): raise web.HTTPError(409, u'File already exists: %s' % new_path) # Move the file try: self._hdfs_move_file(old_hdfs_path, new_hdfs_path) except Exception as e: raise web.HTTPError(500, u'Unknown error renaming file: %s %s' % (old_path, e))
def cwd_for_path(self, path): """Turn API path into absolute OS path.""" os_path = to_os_path(path, self.root_dir) # in the case of notebooks and kernels not being on the same filesystem, # walk up to root_dir if the paths don't exist while not os.path.isdir(os_path) and os_path != self.root_dir: os_path = os.path.dirname(os_path) # iOS: make sure we can access the directory. Otherwise, start from ~/Documents: import sys if (sys.platform == "darwin" and os.uname().machine.startswith("iP")): if not os.access(os_path, os.X_OK | os.W_OK): os_path = os.path.join(os.path.expanduser('~'), 'Documents') return os_path
def _notebook_model(self, path, content=True): """Build a notebook model if content is requested, the notebook content will be populated as a JSON structure (not double-serialized) """ model = self._base_model(path) model['type'] = 'notebook' if content: hdfs_path = to_os_path(path, self.root_dir) nb = self._read_notebook(hdfs_path, as_version=4) self.mark_trusted_cells(nb, path) model['content'] = nb model['format'] = 'json' self.validate_notebook_model(model) return model
def is_hidden(self, path): """Is path a hidden directory or file? Parameters ---------- path : string The path to check. This is an API path (`/` separated, relative to root dir). Returns ------- hidden : bool Whether the path is hidden. """ path = path.strip('/') hdfs_path = to_os_path(path, self.root_dir) return self._hdfs_is_hidden(hdfs_path)
def get_git_notebooks(self, file_path_arg, ref_base='HEAD', ref_remote=None): """ Gets the content of the before and after state of the notebook based on the given Git refs. :param file_path_arg: The path to the file being diffed :param ref_base: the Git ref for the "local" or the "previous" state :param ref_remote: the Git ref for the "remote" or the "current" state :return: (base_nb, remote_nb) """ # Sometimes the root dir of the files is not cwd nb_root = getattr(self.contents_manager, 'root_dir', None) # Resolve base argument to a file system path file_path = os.path.realpath(to_os_path(file_path_arg, nb_root)) # Ensure path/root_dir that can be sent to git: try: git_root = find_repo_root(file_path) except InvalidGitRepositoryError as e: self.log.exception(e) raise HTTPError(422, 'Invalid notebook: %s' % file_path) file_path = os.path.relpath(file_path, git_root) # Get the base/remote notebooks: try: for fbase, fremote in changed_notebooks(ref_base, ref_remote, file_path, git_root): base_nb = read_notebook(fbase, on_null='minimal') remote_nb = read_notebook(fremote, on_null='minimal') break # there should only ever be one set of files else: # The filename was either invalid or the file is unchanged # Assume unchanged, and let read_notebook handle error # reporting if invalid base_nb = self.read_notebook(os.path.join(git_root, file_path)) remote_nb = base_nb except (InvalidGitRepositoryError, BadName) as e: self.log.exception(e) raise HTTPError(422, 'Invalid notebook: %s' % file_path_arg) except GitCommandNotFound as e: self.log.exception(e) raise HTTPError( 500, 'Could not find git executable. ' 'Please ensure git is available to the server process.') return base_nb, remote_nb
def dir_exists(self, path): """Does a directory exist at the given path? Like os.path.isdir Parameters ---------- path : string The relative API style path to check Returns ------- exists : bool Whether the path does indeed exist. """ path = path.strip('/') hdfs_path = to_os_path(path, self.root_dir) return self._hdfs_dir_exists(hdfs_path)
def file_exists(self, path=''): """Does a file exist at the given path? Like os.path.isfile Override this method in subclasses. Parameters ---------- path : string The API path of a file to check for. Returns ------- exists : bool Whether the file exists. """ path = path.strip('/') hdfs_path = to_os_path(path, self.root_dir) return self._hdfs_file_exists(hdfs_path)
def exists(self, path): """Does a file or directory exist at the given path? Like os.path.exists Parameters ---------- path : string The API path of a file or directory to check for. Returns ------- exists : bool Whether the target exists. """ path = path.strip('/') hdfs_path = to_os_path(path, self.root_dir) return self._hdfs_exists(hdfs_path)
def _get_os_path(self, path): """Given an API path, return its file system path. Parameters ---------- path : string The relative API path to the named file. Returns ------- path : string Native, absolute OS path to for a file. Raises ------ 404: if path is outside root """ root = os.path.abspath(self.root_dir) os_path = to_os_path(path, root) if not (os.path.abspath(os_path) + os.path.sep).startswith(root): raise HTTPError(404, "%s is outside root contents directory" % path) return os_path
def _get_git_notebooks(self, base_arg): # Sometimes the root dir of the files is not cwd nb_root = getattr(self.contents_manager, 'root_dir', None) # Resolve base argument to a file system path base = os.path.realpath(to_os_path(base_arg, nb_root)) # Ensure path/root_dir that can be sent to git: try: git_root = find_repo_root(base) except InvalidGitRepositoryError as e: self.log.exception(e) raise HTTPError(422, 'Invalid notebook: %s' % base) base = os.path.relpath(base, git_root) # Get the base/remote notebooks: try: for fbase, fremote in changed_notebooks('HEAD', None, base, git_root): base_nb = read_notebook(fbase, on_null='minimal') remote_nb = read_notebook(fremote, on_null='minimal') break # there should only ever be one set of files else: # The filename was either invalid or the file is unchanged # Assume unchanged, and let read_notebook handle error # reporting if invalid base_nb = self.read_notebook(os.path.join(git_root, base)) remote_nb = base_nb except (InvalidGitRepositoryError, BadName) as e: self.log.exception(e) raise HTTPError(422, 'Invalid notebook: %s' % base_arg) except GitCommandNotFound as e: self.log.exception(e) raise HTTPError( 500, 'Could not find git executable. ' 'Please ensure git is available to the server process.') return base_nb, remote_nb
def to_os_path(self, api_path): return to_os_path(api_path, root=self.notebook_dir)
def save(self, model, path=''): """ Save a file or directory model to path. Should return the saved model with no content. Save implementations should call self.run_pre_save_hook(model=model, path=path) prior to writing any data. """ path = path.strip('/') if 'type' not in model: raise web.HTTPError(400, u'No file type provided') if 'content' not in model and model['type'] != 'directory': raise web.HTTPError(400, u'No file content provided') path = path.strip('/') hdfs_path = to_os_path(path, self.root_dir) self.log.info("Saving %s size=%d type=%s format=%s chunk=%d writable=%s", hdfs_path, len(model['content']) if 'content' in model else 'None', model['type'], model['format'] if 'format' in model else 'None', model['chunk'] if 'chunk' in model else 0, str(model['writable']) if 'writable' in model else 'None') self.run_pre_save_hook(model=model, path=path) try: if model['type'] == 'notebook': nb = nbformat.from_dict(model['content']) self.check_and_sign(nb, path) self._save_notebook(hdfs_path, nb) # One checkpoint should always exist for notebooks. if not self.checkpoints.list_checkpoints(path): self.create_checkpoint(path) elif model['type'] == 'file': # Missing format will be handled internally by _save_file. # large file are saved in chunks # model['chunk'] is 1 for the first chunk # chunks numbered greater than 1 are appended to the file if self.is_compressed_file(hdfs_path) and model['format'] == 'text': raise HTTPError(500, u'Detected compressed file format - text is not editable') append = True if 'chunk' not in model or model['chunk'] == 1: append = False self._save_file(hdfs_path, model['content'], model.get('format'), append) elif model['type'] == 'directory': self._save_directory(hdfs_path, model, path) else: raise HTTPError(400, "Unhandled hdfscontents type: %s" % model['type']) except HTTPError: raise except Exception as e: self.log.error(u'Error while saving file: %s %s', path, e, exc_info=True) raise HTTPError(500, u'Unexpected error while saving file: %s %s' % (path, e)) validation_message = None if model['type'] == 'notebook': self.validate_notebook_model(model) validation_message = model.get('message', None) model = self.get(path, content=False) if validation_message: model['message'] = validation_message # self.run_post_save_hook(model=model, os_path=hdfs_path) return model
def to_os_path(self, api_path): return to_os_path(api_path, root=self.td.name)
def _get_hdfs_path(self, path): return to_os_path(path, self.root_dir)