Esempio n. 1
0
 def _get_plus_paths( self, path:DocPath) -> List[DocPath]:
     lines = path.split(self._plus)
     if len(lines) == 0:
         return lines
     first = lines[0]
     lines = lines[1:]
     mark = first.find(self._hashmark)
     if mark > -1:
         first = first[0:mark+1]
     else:
         first += self._hashmark
     lines = [ DocPath(first+line) for line in lines]
     return lines
Esempio n. 2
0
 def filter_root_names_for_path(self, roots: List[str],
                                path: DocPath) -> List[str]:
     logging.info(
         f"Context.filter_root_names_for_path: starting roots: {roots}, path: {path}"
     )
     filetype = None
     hashmark = self.metadata.config.get("filenames", "hashmark", "#")
     logging.info(
         f"Context.filter_root_names_for_path: hashmark: {hashmark}")
     if path.find(hashmark) > -1:
         filetype = "cdocs"
     else:
         filetype = self.get_filetype(path)
     logging.info(
         f"Context.filter_root_names_for_path: filetype: {filetype}")
     ab = self.metadata.accepted_by
     logging.info(f"Context.filter_root_names_for_path: ab: {ab}")
     aroots = ab.get(filetype)
     if aroots is None:
         aroots = []
     logging.info(
         f"Context.filter_root_names_for_path: found {aroots} for filetype. filtering roots using that list."
     )
     filtered = [item for item in roots if item in aroots]
     if roots != filtered:
         logging.info(
             f"Context.filter_root_names_for_path: filtered (by accepted) {roots} to {filtered}"
         )
     return filtered
Esempio n. 3
0
 def _get_concat_paths(self, path:DocPath) -> Optional[List[DocPath]]:
     filepath = self._pather.get_full_file_path(path)
     try:
         content = self._read_doc(filepath)
         lines = [DocPath(line) for line in content.split('\n')]
         return lines
     except DocNotFoundException:
         logging.warn(f"Cdocs._get_concat_paths: No such doc {path}. returning None.")
         return None
Esempio n. 4
0
 def _add_labels_to_tokens(self, path:DocPath, tokens:JsonDict, recurse:Optional[bool]=True) -> JsonDict:
     apath = path
     if path.find(self._hashmark) > -1:
         apath = apath[0:apath.find(self._hashmark)]
     if apath.find(self._plus) > -1:
         apath = apath[0:apath.find(self._plus)]
     labels = self.get_labels(apath, recurse)
     ltokens = { "label__"+k:v for k,v in labels.items()}
     tokens  = {**ltokens, **tokens}
     return JsonDict(tokens)
Esempio n. 5
0
 def get_concat_doc(self, path:DocPath) -> Doc:
     if path is None :
         raise DocNotFoundException("path can not be None")
     if path.find('.concat') == -1:
         raise BadDocPath("path must have a .concat file extension")
     paths = self._get_concat_paths(path)
     if paths is None:
         raise DocNotFoundException(f'No concat instruction file at {path}')
     content = self.concatter.concat(paths)
     return Doc(content)
Esempio n. 6
0
 def get_compose_doc(self, path:DocPath) -> Doc:
     if path is None :
         raise DocNotFoundException("path can not be None")
     filepath:FilePath = self._pather.get_full_file_path(path)
     try:
         print(f"Cdocs.get_compose_doc: fp: {filepath}")
         content = self._read_doc(filepath)
         tokens:dict = self.get_tokens(path[0:path.rindex('/')])
         content = self.transformer.transform(content, path, tokens, True)
         return Doc(content)
     except Exception as e:
         logging.error(f"Cdocs.get_compose_doc: cannot compose {path}: {e}")
         raise ComposeDocException(f'{path} failed to compose')
Esempio n. 7
0
 def _get_doc_for_root(self, path:DocPath, pluspaths:List[DocPath], root:FilePath) -> Doc:
     logging.info(f"Cdocs._get_doc_for_root: path: {path}. plus paths: {pluspaths}. root: {root}")
     if len(pluspaths) > 0:
         logging.info(f"Cdocs._get_doc_for_root: stripping down base path from plus path(s)")
         plus = path.find(self._plus)
         path = path[0:plus]
         logging.info(f"Cdocs._get_doc_for_root: base path is now {path}")
     logging.info(f"Cdocs._get_doc_for_root: checking pather: {self._pather} for path: {path}")
     filepath = self._pather.get_full_file_path_for_root(path, root)
     logging.info(f"Cdocs._get_doc_for_root: filepath from pather: {filepath}")
     content = self._read_doc(filepath)
     logging.info(f"Cdocs._get_doc_for_root: content from {filepath} is {len(content) if content is not None else 0} chars. transforming with: {self.transformer}.")
     content = self.transformer.transform(content, path, None, True)
     if len(pluspaths) > 0:
         content = self.concatter.join(content, self.concatter.concat(pluspaths))
     return Doc(content)
Esempio n. 8
0
 def _get_doc(self, path:DocPath, notfound) -> Optional[Doc]:
     logging.info(f"Cdocs._get_doc: looking for path: {path} in root: {self.rootname}. notfound: {notfound}")
     if path is None :
         raise DocNotFoundException("path can not be None")
     if path.find('.') > -1:
         if self.filer.get_filetype(path) == 'cdocs':
             raise BadDocPath("dots are not allowed in cdoc paths")
     if notfound is None:
         logging.info("Cdocs._get_doc: notfound is None. you should fix this unless you want None returns.")
         notfound = False
     logging.info(f"Cdocs._get_doc: path: {path}")
     pluspaths = self._get_plus_paths(path)
     logging.info(f"Cdocs._get_doc: pluspaths to concationate: {pluspaths}")
     root = self.get_doc_root()
     logging.info(f"Cdocs._get_doc: root {root}")
     doc = self._get_doc_for_root(path, pluspaths, root)
     if doc is None and notfound:
        doc = self.get_404()
     return doc
Esempio n. 9
0
 def list_docs(self, path: DocPath) -> List[Doc]:
     logging.info(f"SimpleLister.list_docs: path: {path}")
     #
     # a docpath points to a directory which may have a sibling file named
     # the same as the directory, but with a file extension.
     #
     # listing the docpath means listing the parent directory's files
     # so docpath:
     #    /x/y/z
     # would list:
     #    /x/y
     # and might return:
     #    /x/y/z.xml
     #    /x/y/fish.xml
     #    /x/y/bats.xml
     # it would not return anything anything like:
     #    /x/y/z/z.xml
     #    /x/y/z/fish.xml
     #    /x/y/z/bats.xml
     #
     # why does it work that way? because /x/y/z is an identifier
     # in concept-space, not a filesystem path. as a concept, it includes
     # materials that explain the space. the materials may require
     # multiple files that are all grouped by the concept /x/y/z. we
     # address the fish.xml and bats.xml files as:
     #    /x/y/z#fish.xml
     #    /x/y/z#bats.xml
     #
     # one way to think about it is that z.xml is the important thing and
     # 'z' the directory is just an indication that the concept described
     # by z.xml contains more things.
     #
     # if /x/y/z/a.xml doesn't have a sibling 'a' directory, is /x/y/z/a
     # still good docpath? yes. that just means that the concept
     # /x/y/z/a doesn't contain any further concepts.
     #
     # all this nets out that z.xml, fish.xml and bats.xml just
     # describe /x/y/z.
     #
     # is that a good way to do it? sure. it works, it makes sense
     # logically and you can get used to it pretty easily.
     #
     apath = path
     if path[0:1] == '/' and path.count(
             '/') >= 2 or path[0:1] != '/' and path.count('/') >= 1:
         apath = path[0:apath.rindex('/')]
     logging.info(f"SimpleLister.list_docs: apath: {apath}")
     root_path = self._cdocs.get_doc_root()
     logging.info(f"SimpleLister.list_docs: root: {root_path}")
     if apath[0:1] == '/':
         apath = apath[1:]
     the_path = os.path.join(root_path, apath)
     logging.info(f"SimpleLister.list_docs: the_path: {the_path}")
     if os.path.exists(the_path):
         logging.info(f"SimpleLister.list_docs: path exists")
         if not os.path.isdir(the_path):
             logging.info(
                 f"SimpleLister.list_docs: the_path: {the_path} is a directory"
             )
             name = the_path[the_path.rindex("/") + 1:]
             logging.info(f"SimpleLister.list_docs: returning name: {name}")
             return [name]
         else:
             files = os.listdir(the_path)
             logging.info(f"SimpleLister.list_docs: files: {files}")
             files = [
                 f for f in files if f[0:1] != '.'
                 and os.path.isfile(os.path.join(the_path, f))
             ]
             logging.info(f"SimpleLister.list_docs: returning: {files}")
             return files
     else:
         logging.info(
             f"SimpleLister.list_docs: directory at {the_path} doesn't exist. no files found. returning []."
         )
         return []
Esempio n. 10
0
 def get_doc_from_roots(self,
                        rootnames: List[str],
                        path: DocPath,
                        notfound: Optional[bool] = True,
                        splitplus: Optional[bool] = True) -> Optional[Doc]:
     """
         rootnames: a list of named roots to search
         path: the docpath. may have hash and plusses
         notfound: if true, return a default notfound if no results
         splitplus: if true, plus concats can be on different roots.
                    i.e. for /x/y/z+a+b /x/y/z, /x/y/z/a, /x/y/z/b
                    can all be on different roots.
     """
     logging.info(
         f"Context.get_doc_from_roots: first match wins. rootnames: {rootnames}, path: {path}, notfound: {notfound}, splitplus: {splitplus}"
     )
     plusmark = self._metadata.config.get("filenames", "plus")
     plus = path.find(plusmark)
     if plus > -1 and splitplus:
         if self._nosplitplus is None:
             nsp = self._metadata.config.get("defaults", "nosplitplus", "")
             self._nosplitplus = nsp.split(',')
         if len(self._nosplitplus) > 0:
             rootnames = [
                 name for name in rootnames if not name in self._nosplitplus
             ]
             logging.info(
                 f"Context.get_doc_from_roots: nsp filtered rootnames: {rootnames}"
             )
         # split into paths and call get_doc_from_roots on each, then concat
         #
         #  /r/o/o/t.html#fish
         # needs to become /r/o/o
         #
         #  /r/o/o/t#fish
         # needs to become /r/o/o/t
         #
         #  /r/o/o/t
         # needs to become /r/o/o/t
         #
         logging.info(f"Context.get_doc_from_roots: path: {path}")
         logging.info(f"Context.get_doc_from_roots: rootnames: {rootnames}")
         logging.info(f"Context.get_doc_from_roots: notfound: {notfound}")
         logging.info(f"Context.get_doc_from_roots: splitplus: {splitplus}")
         paths = path.split(plusmark)
         logging.info(f"Context.get_doc_from_roots: paths: {paths}")
         rootpath = paths[0]
         logging.info(f"Context.get_doc_from_roots: rootpath: {rootpath}")
         hashmark = self._metadata.config.get("filenames", "hashmark")
         logging.info(f"Context.get_doc_from_roots: hashmark: {hashmark}")
         rootpath = rootpath.split(hashmark)[0]
         logging.info(f"Context.get_doc_from_roots: rootpath: {rootpath}")
         paths = [
             p if p.find(rootpath) > -1 else rootpath + "/" + p
             for p in paths
         ]
         logging.info(f"Context.get_doc_from_roots: paths: {paths}")
         result = []
         for path in paths:
             logging.info(
                 f"Context.get_doc_from_roots: .... next path: {path}")
             r = self.get_doc_from_roots(rootnames, path, notfound)
             if r is not None:
                 result.append(r)
         if len(result) == 0 and notfound:
             return self._get_default_not_found()
         return "".join(result)
     else:
         rootnames = self.filter_root_names_for_path(rootnames, path)
         logging.info(
             f"Context.get_doc_from_roots: rootnames: {rootnames} - not spliting pluses, first root locks in the pluses"
         )
         for _ in rootnames:
             cdocs = self.keyed_cdocs[_]
             logging.info(
                 f"Context.get_doc_from_roots: cdocs: {_} -> {cdocs.get_doc_root()}"
             )
             doc = cdocs.get_doc(path, False)
             logging.info(f"found doc: {type(doc)}")
             if doc is not None:
                 return doc
         if notfound:
             return self._get_default_not_found()
Esempio n. 11
0
 def get_full_file_path_for_root(self, path: DocPath,
                                 root: FilePath) -> FilePath:
     logging.info(
         f"SimplePather.get_full_file_path_for_root: path: {path}, root: {root}"
     )
     path = path.strip('/\\')
     if path == '':
         logging.info(
             f"SimplePather.get_full_file_path_for_root: path points to root. returning root."
         )
         return root
     logging.info(
         f"SimplePather.get_full_file_path_for_root: getting filename for {path}"
     )
     filename = self.get_filename(path)
     logging.info(
         f"SimplePather.get_full_file_path_for_root: path: {path}, filename: {filename}, root: {root}"
     )
     if filename is None:
         pass
     else:
         path = path[0:path.find(self._hashmark)]
     #
     # why do we change out root here?  seems to do no harm, but...
     #
     #logging.info(f"SimplePather.get_full_file_path_for_root: root: {root}, _docs_path: {self._docs_path}")
     #root = self._docs_path
     #
     # if path were None we'd never get here!
     #
     #if path is None:
     #    logging.error(f"SimplePather.get_full_file_path_for_root: path is None")
     path = os.path.join(root, path)
     apath = path
     logging.info(
         f"SimplePather.get_full_file_path_for_root: joined root: {root} with path to get: {apath}"
     )
     i = path.find(".")
     logging.info(
         f"SimplePather.get_full_file_path_for_root: index of '.': {i}")
     if filename is None and i <= len(root):
         logging.info(
             f"SimplePather.get_full_file_path_for_root: filename is None and no '.'"
         )
         apath = self._find_path(apath)
     elif filename is None:
         logging.info(
             f"SimplePather.get_full_file_path_for_root: filename is None and there is a '.'"
         )
         pass
     else:
         logging.info(
             f"SimplePather.get_full_file_path_for_root: apath: {apath}, last char: {apath[-1:]}, filename: {filename}"
         )
         apath = apath + (''
                          if apath[-1:] == '/' else os.path.sep) + filename
         apath = self._find_path(apath)
     if apath is None:
         logging.info(
             f"SimplePather.get_full_file_path_for_root: apath is None! from: {self._rootname}->{apath}"
         )
     return FilePath(apath)