def _get_plus_paths( self, path:DocPath) -> List[DocPath]: lines = path.split(self._plus) if len(lines) == 0: return lines first = lines[0] lines = lines[1:] mark = first.find(self._hashmark) if mark > -1: first = first[0:mark+1] else: first += self._hashmark lines = [ DocPath(first+line) for line in lines] return lines
def filter_root_names_for_path(self, roots: List[str], path: DocPath) -> List[str]: logging.info( f"Context.filter_root_names_for_path: starting roots: {roots}, path: {path}" ) filetype = None hashmark = self.metadata.config.get("filenames", "hashmark", "#") logging.info( f"Context.filter_root_names_for_path: hashmark: {hashmark}") if path.find(hashmark) > -1: filetype = "cdocs" else: filetype = self.get_filetype(path) logging.info( f"Context.filter_root_names_for_path: filetype: {filetype}") ab = self.metadata.accepted_by logging.info(f"Context.filter_root_names_for_path: ab: {ab}") aroots = ab.get(filetype) if aroots is None: aroots = [] logging.info( f"Context.filter_root_names_for_path: found {aroots} for filetype. filtering roots using that list." ) filtered = [item for item in roots if item in aroots] if roots != filtered: logging.info( f"Context.filter_root_names_for_path: filtered (by accepted) {roots} to {filtered}" ) return filtered
def _get_concat_paths(self, path:DocPath) -> Optional[List[DocPath]]: filepath = self._pather.get_full_file_path(path) try: content = self._read_doc(filepath) lines = [DocPath(line) for line in content.split('\n')] return lines except DocNotFoundException: logging.warn(f"Cdocs._get_concat_paths: No such doc {path}. returning None.") return None
def _add_labels_to_tokens(self, path:DocPath, tokens:JsonDict, recurse:Optional[bool]=True) -> JsonDict: apath = path if path.find(self._hashmark) > -1: apath = apath[0:apath.find(self._hashmark)] if apath.find(self._plus) > -1: apath = apath[0:apath.find(self._plus)] labels = self.get_labels(apath, recurse) ltokens = { "label__"+k:v for k,v in labels.items()} tokens = {**ltokens, **tokens} return JsonDict(tokens)
def get_concat_doc(self, path:DocPath) -> Doc: if path is None : raise DocNotFoundException("path can not be None") if path.find('.concat') == -1: raise BadDocPath("path must have a .concat file extension") paths = self._get_concat_paths(path) if paths is None: raise DocNotFoundException(f'No concat instruction file at {path}') content = self.concatter.concat(paths) return Doc(content)
def get_compose_doc(self, path:DocPath) -> Doc: if path is None : raise DocNotFoundException("path can not be None") filepath:FilePath = self._pather.get_full_file_path(path) try: print(f"Cdocs.get_compose_doc: fp: {filepath}") content = self._read_doc(filepath) tokens:dict = self.get_tokens(path[0:path.rindex('/')]) content = self.transformer.transform(content, path, tokens, True) return Doc(content) except Exception as e: logging.error(f"Cdocs.get_compose_doc: cannot compose {path}: {e}") raise ComposeDocException(f'{path} failed to compose')
def _get_doc_for_root(self, path:DocPath, pluspaths:List[DocPath], root:FilePath) -> Doc: logging.info(f"Cdocs._get_doc_for_root: path: {path}. plus paths: {pluspaths}. root: {root}") if len(pluspaths) > 0: logging.info(f"Cdocs._get_doc_for_root: stripping down base path from plus path(s)") plus = path.find(self._plus) path = path[0:plus] logging.info(f"Cdocs._get_doc_for_root: base path is now {path}") logging.info(f"Cdocs._get_doc_for_root: checking pather: {self._pather} for path: {path}") filepath = self._pather.get_full_file_path_for_root(path, root) logging.info(f"Cdocs._get_doc_for_root: filepath from pather: {filepath}") content = self._read_doc(filepath) logging.info(f"Cdocs._get_doc_for_root: content from {filepath} is {len(content) if content is not None else 0} chars. transforming with: {self.transformer}.") content = self.transformer.transform(content, path, None, True) if len(pluspaths) > 0: content = self.concatter.join(content, self.concatter.concat(pluspaths)) return Doc(content)
def _get_doc(self, path:DocPath, notfound) -> Optional[Doc]: logging.info(f"Cdocs._get_doc: looking for path: {path} in root: {self.rootname}. notfound: {notfound}") if path is None : raise DocNotFoundException("path can not be None") if path.find('.') > -1: if self.filer.get_filetype(path) == 'cdocs': raise BadDocPath("dots are not allowed in cdoc paths") if notfound is None: logging.info("Cdocs._get_doc: notfound is None. you should fix this unless you want None returns.") notfound = False logging.info(f"Cdocs._get_doc: path: {path}") pluspaths = self._get_plus_paths(path) logging.info(f"Cdocs._get_doc: pluspaths to concationate: {pluspaths}") root = self.get_doc_root() logging.info(f"Cdocs._get_doc: root {root}") doc = self._get_doc_for_root(path, pluspaths, root) if doc is None and notfound: doc = self.get_404() return doc
def list_docs(self, path: DocPath) -> List[Doc]: logging.info(f"SimpleLister.list_docs: path: {path}") # # a docpath points to a directory which may have a sibling file named # the same as the directory, but with a file extension. # # listing the docpath means listing the parent directory's files # so docpath: # /x/y/z # would list: # /x/y # and might return: # /x/y/z.xml # /x/y/fish.xml # /x/y/bats.xml # it would not return anything anything like: # /x/y/z/z.xml # /x/y/z/fish.xml # /x/y/z/bats.xml # # why does it work that way? because /x/y/z is an identifier # in concept-space, not a filesystem path. as a concept, it includes # materials that explain the space. the materials may require # multiple files that are all grouped by the concept /x/y/z. we # address the fish.xml and bats.xml files as: # /x/y/z#fish.xml # /x/y/z#bats.xml # # one way to think about it is that z.xml is the important thing and # 'z' the directory is just an indication that the concept described # by z.xml contains more things. # # if /x/y/z/a.xml doesn't have a sibling 'a' directory, is /x/y/z/a # still good docpath? yes. that just means that the concept # /x/y/z/a doesn't contain any further concepts. # # all this nets out that z.xml, fish.xml and bats.xml just # describe /x/y/z. # # is that a good way to do it? sure. it works, it makes sense # logically and you can get used to it pretty easily. # apath = path if path[0:1] == '/' and path.count( '/') >= 2 or path[0:1] != '/' and path.count('/') >= 1: apath = path[0:apath.rindex('/')] logging.info(f"SimpleLister.list_docs: apath: {apath}") root_path = self._cdocs.get_doc_root() logging.info(f"SimpleLister.list_docs: root: {root_path}") if apath[0:1] == '/': apath = apath[1:] the_path = os.path.join(root_path, apath) logging.info(f"SimpleLister.list_docs: the_path: {the_path}") if os.path.exists(the_path): logging.info(f"SimpleLister.list_docs: path exists") if not os.path.isdir(the_path): logging.info( f"SimpleLister.list_docs: the_path: {the_path} is a directory" ) name = the_path[the_path.rindex("/") + 1:] logging.info(f"SimpleLister.list_docs: returning name: {name}") return [name] else: files = os.listdir(the_path) logging.info(f"SimpleLister.list_docs: files: {files}") files = [ f for f in files if f[0:1] != '.' and os.path.isfile(os.path.join(the_path, f)) ] logging.info(f"SimpleLister.list_docs: returning: {files}") return files else: logging.info( f"SimpleLister.list_docs: directory at {the_path} doesn't exist. no files found. returning []." ) return []
def get_doc_from_roots(self, rootnames: List[str], path: DocPath, notfound: Optional[bool] = True, splitplus: Optional[bool] = True) -> Optional[Doc]: """ rootnames: a list of named roots to search path: the docpath. may have hash and plusses notfound: if true, return a default notfound if no results splitplus: if true, plus concats can be on different roots. i.e. for /x/y/z+a+b /x/y/z, /x/y/z/a, /x/y/z/b can all be on different roots. """ logging.info( f"Context.get_doc_from_roots: first match wins. rootnames: {rootnames}, path: {path}, notfound: {notfound}, splitplus: {splitplus}" ) plusmark = self._metadata.config.get("filenames", "plus") plus = path.find(plusmark) if plus > -1 and splitplus: if self._nosplitplus is None: nsp = self._metadata.config.get("defaults", "nosplitplus", "") self._nosplitplus = nsp.split(',') if len(self._nosplitplus) > 0: rootnames = [ name for name in rootnames if not name in self._nosplitplus ] logging.info( f"Context.get_doc_from_roots: nsp filtered rootnames: {rootnames}" ) # split into paths and call get_doc_from_roots on each, then concat # # /r/o/o/t.html#fish # needs to become /r/o/o # # /r/o/o/t#fish # needs to become /r/o/o/t # # /r/o/o/t # needs to become /r/o/o/t # logging.info(f"Context.get_doc_from_roots: path: {path}") logging.info(f"Context.get_doc_from_roots: rootnames: {rootnames}") logging.info(f"Context.get_doc_from_roots: notfound: {notfound}") logging.info(f"Context.get_doc_from_roots: splitplus: {splitplus}") paths = path.split(plusmark) logging.info(f"Context.get_doc_from_roots: paths: {paths}") rootpath = paths[0] logging.info(f"Context.get_doc_from_roots: rootpath: {rootpath}") hashmark = self._metadata.config.get("filenames", "hashmark") logging.info(f"Context.get_doc_from_roots: hashmark: {hashmark}") rootpath = rootpath.split(hashmark)[0] logging.info(f"Context.get_doc_from_roots: rootpath: {rootpath}") paths = [ p if p.find(rootpath) > -1 else rootpath + "/" + p for p in paths ] logging.info(f"Context.get_doc_from_roots: paths: {paths}") result = [] for path in paths: logging.info( f"Context.get_doc_from_roots: .... next path: {path}") r = self.get_doc_from_roots(rootnames, path, notfound) if r is not None: result.append(r) if len(result) == 0 and notfound: return self._get_default_not_found() return "".join(result) else: rootnames = self.filter_root_names_for_path(rootnames, path) logging.info( f"Context.get_doc_from_roots: rootnames: {rootnames} - not spliting pluses, first root locks in the pluses" ) for _ in rootnames: cdocs = self.keyed_cdocs[_] logging.info( f"Context.get_doc_from_roots: cdocs: {_} -> {cdocs.get_doc_root()}" ) doc = cdocs.get_doc(path, False) logging.info(f"found doc: {type(doc)}") if doc is not None: return doc if notfound: return self._get_default_not_found()
def get_full_file_path_for_root(self, path: DocPath, root: FilePath) -> FilePath: logging.info( f"SimplePather.get_full_file_path_for_root: path: {path}, root: {root}" ) path = path.strip('/\\') if path == '': logging.info( f"SimplePather.get_full_file_path_for_root: path points to root. returning root." ) return root logging.info( f"SimplePather.get_full_file_path_for_root: getting filename for {path}" ) filename = self.get_filename(path) logging.info( f"SimplePather.get_full_file_path_for_root: path: {path}, filename: {filename}, root: {root}" ) if filename is None: pass else: path = path[0:path.find(self._hashmark)] # # why do we change out root here? seems to do no harm, but... # #logging.info(f"SimplePather.get_full_file_path_for_root: root: {root}, _docs_path: {self._docs_path}") #root = self._docs_path # # if path were None we'd never get here! # #if path is None: # logging.error(f"SimplePather.get_full_file_path_for_root: path is None") path = os.path.join(root, path) apath = path logging.info( f"SimplePather.get_full_file_path_for_root: joined root: {root} with path to get: {apath}" ) i = path.find(".") logging.info( f"SimplePather.get_full_file_path_for_root: index of '.': {i}") if filename is None and i <= len(root): logging.info( f"SimplePather.get_full_file_path_for_root: filename is None and no '.'" ) apath = self._find_path(apath) elif filename is None: logging.info( f"SimplePather.get_full_file_path_for_root: filename is None and there is a '.'" ) pass else: logging.info( f"SimplePather.get_full_file_path_for_root: apath: {apath}, last char: {apath[-1:]}, filename: {filename}" ) apath = apath + ('' if apath[-1:] == '/' else os.path.sep) + filename apath = self._find_path(apath) if apath is None: logging.info( f"SimplePather.get_full_file_path_for_root: apath is None! from: {self._rootname}->{apath}" ) return FilePath(apath)