def _process_doc(self, pages, path, root, block, doc): # Add Houdini-specific fields to each document path = paths.basepath(path) attrs = block.get("attrs", {}) doctype = attrs.get("type", "").strip() context = attrs.get("context", "").strip().replace(",", "") or None if doctype == "node": if context in ("pop", "part") or context.endswith("_state"): return internal = attrs.get("internal") if internal: doc["grams"] += " %s" % internal if doc.get("category") == "_": if path.startswith("/shelf/"): doc["category"] = "tool" elif path.startswith("/ref/util/"): doc["category"] = "utility" elif path.startswith("/gallery/shop/"): doc["category"] = "gallery/shop" elif doctype == "node": doc["category"] = "%s/%s" % (doctype, context) doc["grams"] += " %s" % context elif doctype in ("hscript", "expression", "example", "homclass", "hommodule", "vex"): doc["category"] = doctype replaces = attrs.get("replaces") rsection = functions.subblock_by_id(block, "replaces") if rsection: rlist = " ".join(link.get("fullpath", "") for link in functions.find_links(rsection)) if replaces: replaces = replaces + " " + rlist else: replaces = rlist doc.update({ "context": context, "bestbet": attrs.get("bestbet"), "helpid": attrs.get("helpid"), "superclass": attrs.get("superclass"), "version": attrs.get("version"), "replaces": replaces or None, "examplefor": root.get("examplefor"), "examplefile": root.get("examplefile"), "group": attrs.get("group"), }) # Add example file info if root is block and "examplefile" in root: otlpath = root["examplefile"] # Usages file should be in the same location with .usages ext usagespath = paths.basepath(otlpath) + ".usages" if pages.exists(usagespath): usagescontent = pages.content(usagespath) usages = ws_exp.split(usagescontent) doc["uses"] = " ".join(usages)
def _render_json(self, path, stylesname, templatename, jsondata, extras, searcher): # Render the page template kwargs = { "path": path, "basepath": paths.basepath(path), "is_index_page": self.is_index_page(path), "rel": util.make_rel_fn(path, self.index_page_name), "searcher": searcher, } if extras: kwargs.update(extras) # Create a function to render JSON to HTML stylesname = stylesname or self.page_style styleobj = self.style(stylesname) stylectx, render = styleobj.context_and_function(path, jsondata, kwargs) # Create a function to apply the stylesheet to a given object def render_styles(obj): return render(stylectx, obj) # Get the page template template = self._template_for_page(templatename, jsondata) html = template.render(docroot=jsondata, render_styles=render_styles, **kwargs) return html
def directory_page(pages, dirpath): """ Renders a simple template to show the files in a directory. """ store = pages.store names = store.list_dir(dirpath) files = [] for name in names: path = paths.join(dirpath, name) link = path if pages.is_wiki(link): link = paths.basepath(link) isdir = store.is_dir(path) if isdir: size = -1 mod = -1 else: size = store.size(path) mod = store.last_modified(path) files.append({ "path": path, "link": link, "name": name, "ext": paths.extension(name), "isdir": isdir, "size": size, "modified": mod, }) return flask.render_template("/templates/dir.jinja2", path=dirpath, files=files)
def _make_doc(self, pages, path, root, block, text): attrs = block.get("attrs", {}) blocktype = block.get("type") body = block.get("body") is_root = blocktype == "root" # If a title was not passed in: if this is the root, look for a title # block, otherwise use the block text title = self._get_title(block) or paths.basename(path) container = False path = paths.basepath(path) if is_root: # Store a boolean if this page has subtopics subtopics = functions.subblock_by_id(block, "subtopics") container = subtopics and bool(subtopics.get("body")) else: blockid = functions.block_id(block) path = "%s#%s" % (path, blockid) # Look for a summary block summary = self._get_block_text(body, "summary") # Look for tags in the page attributes tags = attrs.get("tags", "").strip().replace(",", "") # Find outgoing links outgoing = [] for link in functions.find_links(block): val = link.get("value") if val: outgoing.append(pages.full_path(path, val)) outgoing = " ".join(outgoing) doctype = attrs.get("type") d = { "path": path, "status": attrs.get("status"), "category": "_", "content": functions.string(text), "title": title, "sortkey": attrs.get("sortkey") or title.lower(), "summary": summary, "grams": title, "type": doctype, "tags": tags, "icon": attrs.get("icon"), "links": outgoing, "container": container, "parent": self._get_path_attr(block, path, attrs, "parent"), "bestbet": attrs.get("bestbet"), } return d
def apply(self, block, context, basepath=None): if block.get("type") == "root": self._apply_to_subtopics(block, context) basepath = basepath or paths.basepath(context.get("path")) if "text" in block: self.text(context, block["text"], basepath) # Recurse for subblock in block.get("body", ()): self.apply(subblock, context, basepath)
def debug_wiki_structure(path): pages = get_wikipages() indexer = get_indexer() searcher = indexer.searcher() path = paths.normalize("/" + path) path = pages.source_path(path) jsondata = pages.json(paths.basepath(path), conditional=False, extra_context=flask.request.args, searcher=searcher) return flask.render_template("/templates/debug_wiki.jinja2", path=path, root=jsondata, searcher=searcher)
def _find_files(self, pages, prefix, reader, clean): store = pages.store existing = set(paths.basepath(p) for p in store.list_all() if pages.is_wiki(p)) # if prefix: # print("prefix=", prefix) # raise Exception # changed = set() # new = set() # for p in existing: # if not p.startswith(prefix): # continue # if ("path", p.encode("utf8")) in reader: # print("!!!!!!!!!!!!") # changed.add(p) # else: # new.add(p) # return new, changed, () if clean: new = existing changed = set() deleted = set() else: # Read all the stored field dicts from the index and build a # dictionary mapping paths to their last indexed mod time modtimes = {} for fs in reader.all_stored_fields(): p = fs["path"] if "#" in p: continue modtime = fs["modified"] modtimes[p] = modtime indexedpaths = set(modtimes) new = existing - indexedpaths deleted = indexedpaths - existing both = existing - new - deleted changed = set() for path in both: ix_mod = modtimes[path] store_mod = store.last_modified(pages.source_path(path)) if store_mod > ix_mod: self.logger.debug("%s changed: %s > %s", path, store_mod, ix_mod) changed.add(path) return new, changed, deleted
def generate(dirpath, prefix="/", vars=None, longest=10, cache=True, nocache=False): pages = flaskapp.get_wikipages(manager.app) logger = manager.app.logger dirpath = _exp(dirpath) indexer = flaskapp.get_indexer(manager.app) searcher = indexer.searcher() if nocache: empty_cache(pages) count = 0 largest = [] if vars: vars = _parse_vars(vars) manager.app.config.setdefault("VARS", {}).update(vars) t = util.perf_counter() for path in get_prefixed_paths(pages, prefix): if not pages.is_wiki_source(path): continue logger.debug("Generating %s", path) count += 1 tt = util.perf_counter() html = pages.html(path, save_to_cache=cache, searcher=searcher) tt = util.perf_counter() - tt htmlpath = paths.basepath(path) + ".html" filepath = os.path.join(dirpath, htmlpath[1:]) # Make sure the destination directory exists, then create the file. parentdirpath = os.path.dirname(filepath) if not os.path.exists(parentdirpath): os.makedirs(parentdirpath) with open(filepath, "w") as f: f.write(html.encode("utf8")) # Keep track of slowest pages if len(largest) < longest or tt > largest[0][0]: if len(largest) >= longest: largest.pop(0) bisect.insort(largest, (tt, path)) totaltime = util.perf_counter() - t logger.info("Generated %s files in %s secs", count, totaltime) logger.info("Average %s sec per page", totaltime / count) logger.info("Top %s longest times:") for gentime, path in largest: logger.info("%s | %03.04f secs ", path, gentime)
def _process_example_page(self, block, context, is_node_eg, is_panel_eg): path = context["path"] attrs = block.get("attrs", {}) # Example authors are very lax about giving the example documents # titles; if the document doesn't have a title, make one up from the # file name title = functions.first_subblock_of_type(block, "title") if not title: name = text_type(paths.barename(path)) body = block.setdefault("body", []) body.insert(0, { "type": "title", "indent": 0, "text": [name] }) # Check for an explicit exampleFor property, otherwise guess it # from the example's directory tree if is_node_eg: block.setdefault("attrs", {})["type"] = "example" if "exampleFor" in attrs: egfor = attrs["exampleFor"] elif "examplefor" in attrs: egfor = attrs["examplefor"] else: egfor = self._node_path_from_example_path(path) # Attach the list of nodes to the root block["examplefor"] = egfor egpath = None # Check for an explicit exampleFile property, otherwise guess it # by looking for the example name with an extension if "exampleFile" in attrs: egpath = attrs["exampleFile"] elif "examplefile" in attrs: egpath = attrs["examplefile"] elif is_node_eg: base = paths.basepath(path) for ext in (".hda", ".otl"): egpath = base + ext if context.pages.exists(egpath): break elif is_panel_eg: egpath = self._file_path_from_panel_path(path) if egpath: egpath = paths.join(path, egpath) if context.pages.exists(egpath): block["examplefile"] = egpath
def _get_include_content(self, path, root, context, icache, ref): incpath, name, value, unwrap = self._parse_include_path(ref) if incpath and incpath != paths.basepath(path): # The include is in another page incpath = paths.join(path, incpath) _, ext = paths.split_extension(incpath) if not ext: incpath = context.pages.source_path(incpath) incpath = paths.join(path, incpath) return self._load_include(incpath, root, context, icache, name, value, unwrap) elif name and value: # If no path was given, or it was this page's path, grab the target # from this page return self._target(root, name, value, unwrap)
def apply(self, block, context): basepath = paths.basepath(context.get("path")) # Find the subtopics section subtopics = functions.subblock_by_id(block, "subtopics") if not subtopics: return attrs = subtopics.get("attrs", {}) maxdepth = int(attrs.get("maxdepth", "0")) if not maxdepth: return topics = functions.find_items(subtopics, "subtopics_item") for item in topics: self._apply(context, item, basepath, 1, maxdepth)
def _index_usages(pages, logger, prefix="/examples/nodes/"): from houdinihelp.hsearch import usages_for_otl # Find all .otl files under the given prefix changed = False store = pages.store for path in store.list_all(prefix): if not pages.is_wiki_source(path): continue # Look for an hda or otl file with the same name as this wiki file bp = paths.basepath(path) exts = (".hda", ".otl") for ext in exts: p = bp + ext if store.exists(p): otlpath = p break else: continue # Check if there's a usages file already and if it's newer than the otl usagespath = bp + ".usages" if store.exists(usagespath): otlmod = store.last_modified(otlpath) usagesmod = store.last_modified(usagespath) if otlmod <= usagesmod: continue # Get the real file path corresponding to the OTL's virtual path filepath = pages.file_path(otlpath) if filepath: print("Generating usages for %s" % filepath) # Find all node usages in the OTL usages = usages_for_otl(filepath) # Write the usages to a file alongside the otl file basename = paths.basename(usagespath) parentdir = os.path.dirname(filepath) usagesfile = os.path.join(parentdir, basename) with open(usagesfile, "wb") as outfile: output = "\n".join(usages) + "\n" outfile.write(output.encode("utf8")) changed = True return changed
def _parent_info(json, path): # Find the subtopics section subtopics = functions.subblock_by_id(json, "subtopics") if subtopics: stbody = subtopics.get("body") if stbody: # Remove body = functions.collapse(stbody, ("col_group", "col")) subtopics["body"] = body return { "path": path, "basepath": paths.basepath(path), "title": json.get("title", ()), "summary": json.get("summary", ()), "attrs": json.get("attrs", {}), "subtopics": subtopics, }
def apply(self, block, context, root=None): searcher = context.searcher if not searcher: return basepath = paths.basepath(context.get("path")) root = root or block for parent in block.get("parents", ()): psubs = parent.get("subtopics") if psubs: self.apply(psubs, context, root) if block.get("type") in self.itemtypes: icache = {} self._run_search(context, basepath, block, icache) else: for subblock in block.get("body", ()): self.apply(subblock, context, root)
def apply(self, block, context, root=None, in_replaces=False): attrs = block.get("attrs", {}) if root is None: root = block # Find any documents that replace this one searcher = context.searcher if searcher: path = paths.basepath(context["path"]) # Only run the search for pages the start with one of the # prefixes listed in the class's prefixes attribute for prefix in self.prefixes: if path.startswith(prefix): repls = [] for doc in searcher.documents(replaces=path): d = {} for f in self.fields: if f in doc: d[f] = doc[f] repls.append(d) if repls: block["replacedby"] = repls break # Look for "replaces" property on block if "replaces" in attrs: self._do(block) # Look for a "replaces" section if block.get("role") == "section" and block.get("id") == "replaces": in_replaces = True elif in_replaces: for span in block.get("text", ()): if isinstance(span, dict) and span.get("type") == "link": rpath = span.get("fullpath") if rpath: rpath = parse_shortcut(rpath) if "replaces" in root: root["replaces"] += " " + rpath else: root["replaces"] = rpath for subblock in block.get("body", ()): self.apply(subblock, context, root, in_replaces)
def apply(self, block, context): path = paths.basepath(context["path"]) if not path.startswith("/nodes/"): return # Assume if it doesn't have a parameters section it's not a node body = block.get("body", ()) parms = functions.first_subblock_of_type(body, "parameters_section") if not parms: return from houdinihelp import path_to_components from houdinihelp import path_to_nodetype from houdinihelp import table_to_dir nodeinfo = path_to_components(path) if nodeinfo is None: return # Fill in missing properties from information in path attrs = block.setdefault("attrs", {}) if "type" not in attrs: attrs["type"] = "node" if "context" not in attrs: attrs["context"] = table_to_dir[nodeinfo.table] if "internal" not in attrs: attrs["internal"] = nodeinfo.corename if "version" not in attrs: attrs["version"] = nodeinfo.version if "namespace" not in attrs: attrs["namespace"] = nodeinfo.namespace body = block.get("body", ()) title = functions.first_subblock_of_type(body, "title") if title is None: # Get the node label from HOM nodetype = path_to_nodetype(path) if nodetype: title = nodetype.description() if title: # Create a fake title block and add it to the beginning of # the document body tblock = {"type": "title", "text": [title]} body.insert(0, tblock)
def _process_node_page(self, block, context): path = context["path"] pages = context.pages searcher = context.searcher if not searcher: return # Look for an examples section on this page body = block.setdefault("body", []) egblock = functions.first_subblock_of_type(body, "examples_section") if egblock: found = True else: # This page doesn't have an examples section, we have to # make one found = False egblock = { "type": "examples_section", "role": "section", "id": "examples", "level": 1, "text": "Examples", } has_egs = False # Find direct examples vpath = paths.basepath(path) egdocs = searcher.documents(examplefor=vpath) if egdocs: # Put them in an attribute on the examples section egblock["examples"] = self._example_items(pages, egdocs, context, include=True) has_egs = True # Find usages usagedocs = searcher.documents(uses=vpath) if usagedocs: # Put them in an attribute on the examples section egblock["usages"] = self._example_items(pages, usagedocs, context) has_egs = True # If we have examples and the page didn't have its own examples # section, append the one we made to the body if has_egs and not found: body.append(egblock)
def apply(self, block, context): # Only operate on HOM class documents attrs = block.get("attrs", {}) if attrs.get("type") != "homclass": return path = paths.basepath(context["path"]) pages = context.pages searcher = context.searcher # Find the subclasses using the full-text index if searcher: self._annotate_subclasses(searcher, path, block) # Get a list of methods on this class, so we can check if one of # the super methods is overridden methodnames = self._get_method_names(block) # Recursively load the docs for superclasses supers = list(self._superclasses(pages, methodnames, context, block)) block["superclasses"] = supers
def _file_path_from_panel_path(path): return paths.basepath(path) + ".pypanel"
def json(self, path, wcontext=None, ext=".json", conditional=True, postprocess=True, save_to_cache=True, extra_context=None, searcher=None, allow_redirect=False): store = self.store path = self._check_source(path) jsonpath = paths.basepath(path) + ext if wcontext is None: wcontext = self.wiki_context( path, conditional=conditional, save_to_cache=save_to_cache, searcher=searcher ) if extra_context: wcontext.update(extra_context) else: old_context = wcontext wcontext = wcontext.push({"path": path}) wcontext.searcher = searcher or old_context.searcher wcontext.pages = self # Set up holders for cached and parse times in the context; these may # be useful for debugging if "parse_time" not in wcontext: wcontext["parse_time"] = {} if "cached" not in wcontext: wcontext["cached"] = set() times = wcontext["parse_time"] cached = wcontext["cached"] jsondata = None # Try to get the JSON data from the cache if wcontext.get("conditional") and self.caching: jsondata = self.get_cached_json(path, jsonpath) if jsondata is not None: # print("From cache", path) # Add the file to the context's debug list of cached files cached.add(path) if jsondata is None: # It wasn't in the cache, so we'll have to load and parse it t = compat.perf_counter() # Load the content of the file source = store.content(path, "utf8") # Parse the wiki markup jsondata = parse_string(source) # Run preprocessers self.pre_pipe().apply(jsondata, wcontext) # Store the parsing time in the context for debugging times[path] = compat.perf_counter() - t # If we're caching, save the parsed JSON to a file in the cache. # Note that we don't add the json data to the mem cache here; we # only do that when a cached json file is loaded. This has the # effect of only caching a document in memory if it's been accessed # at least *twice*. if self.caching and save_to_cache: jsonified = json.dumps(jsondata) self.put_cache_file(jsonpath, jsonified.encode("utf8")) if postprocess: # Run postprocessors self.post_pipe().apply(jsondata, wcontext) attrs = jsondata.get("attrs") if allow_redirect and attrs and "redirect" in attrs: fullpath = self.full_path(path, attrs["redirect"]) raise Redirect(fullpath) return jsondata
def update(self, pages, prefix="", clean=False): if clean: schema = self.searchables.schema() self.index = index.create_in(self.indexdir, schema=schema) idx = self.index # self.logger.info("Indexing %s files to %s", # ("all" if clean else "changed"), self.indexdir) doccount = 0 pagecount = 0 t = compat.perf_counter() try: w = idx.writer() except index.LockError: raise LockError new, changed, deleted = self._find_files(pages, prefix, w.reader(), clean) didsomething = False if new or changed or deleted: if deleted: didsomething = True for delpath in sorted(changed | deleted): delpath = paths.basepath(delpath) self.logger.info("Deleting %s from index", delpath) # w.delete_unique("path", delpath) w.delete_by_query(query.Term("path", delpath)) w.delete_by_query(query.Prefix("path", delpath + "#")) for addpath in sorted(new | changed): addpath = paths.basepath(addpath) added = False if addpath in changed: self.logger.info("Removing %s from index", addpath) w.delete_by_query(query.Term("path", addpath)) w.delete_by_query(query.Prefix("path", addpath + "#")) didsomething = True for doc in self.documents(pages, addpath): self._sanitize_doc(doc) self.logger.debug("Indexing %s", doc["path"]) try: if clean or "#" in doc["path"]: w.add_document(**doc) else: w.update_document(**doc) except ValueError: self.logger.error("Error indexing %r", doc) raise added = True doccount += 1 if added: pagecount += 1 didsomething = True if didsomething: self.logger.info("Committing index changes") w.commit() self.logger.info("Indexed %d docs from %d pages in %.06f seconds", doccount, pagecount, compat.perf_counter() - t) else: # self.logger.info("No changes to commit") w.cancel() return didsomething