class Plugin(Plugin_Base): name = "teedoc-plugin-markdown-parser" desc = "markdown parser plugin for teedoc" defautl_config = { "parse_files": ["md"], "mathjax": { "enable": True, "file_name": "tex-mml-chtml", # http://docs.mathjax.org/en/latest/web/components/index.html "config": { "loader": { "load": ['output/svg'] }, "tex": { "inlineMath": [['$', '$'], ['\\(', '\\)']] }, "svg": { "fontCache": 'global' } } } } def on_init(self, config, doc_src_path, site_config, logger=None, multiprocess=True, **kw_args): ''' @config a dict object @logger teedoc.logger.Logger object ''' self.multiprocess = multiprocess self.logger = Fake_Logger() if not logger else logger self.doc_src_path = doc_src_path self.site_config = site_config self.config = Plugin.defautl_config mathjax_config = self.config["mathjax"] if "mathjax" in config: for k, v in config["mathjax"].items(): if type(v) != dict: mathjax_config[k] = v else: mathjax_config[k].update(v) self.config.update(config) self.config["mathjax"] = mathjax_config self.logger.i("-- plugin <{}> init".format(self.name)) self.logger.i("-- plugin <{}> config: {}".format( self.name, self.config)) if not self.multiprocess: from .renderer import create_markdown_parser from .parse_metadata import Meta_Parser self.create_markdown_parser = create_markdown_parser self.Meta_Parser = Meta_Parser def on_new_process_init(self): ''' for multiple processing, for below func, will be called in new process, every time create a new process, this func will be invoke ''' from .renderer import create_markdown_parser from .parse_metadata import Meta_Parser self.md_parser = create_markdown_parser() self.meta_parser = Meta_Parser() def on_new_process_del(self): ''' for multiple processing, for below func, will be called in new process, every time exit a new process, this func will be invoke ''' del self.md_parser del self.meta_parser def on_parse_files(self, files): # result, format must be this result = {"ok": False, "msg": "", "htmls": OrderedDict()} # function parse md file is disabled if not "md" in self.config["parse_files"]: result[ "msg"] = "disabled markdown parse, but only support markdown" return result self.logger.d("-- plugin <{}> parse {} files".format( self.name, len(files))) # self.logger.d("files: {}".format(files)) for file in files: ext = os.path.splitext(file)[1].lower() if ext.endswith("md"): with open(file, encoding="utf-8") as f: content = f.read().strip() content = self._update_link(content) try: if not self.multiprocess: md_parser = self.create_markdown_parser() meta_parser = self.Meta_Parser() else: md_parser = self.md_parser meta_parser = self.meta_parser metadata, content_no_meta = meta_parser.parse_meta( content) html = md_parser(content_no_meta) except Exception as e: import io, traceback traceback.print_exc() self.logger.w( "parse markdown file {} fail, please check markdown content format" .format(file)) continue if "title" in metadata: title = metadata["title"] else: title = "" if "keywords" in metadata and not metadata[ "keywords"].strip() == "": keywords = metadata["keywords"].split(",") else: keywords = [] if "tags" in metadata and not metadata["tags"].strip( ) == "": tags = metadata["tags"].split(",") else: tags = [] if "desc" in metadata: desc = metadata["desc"] else: desc = "" date = None ts = int(os.stat(file).st_mtime) if "date" in metadata: date = metadata["date"].strip().lower() # set date to false to disable date display if date and (date == "false" or date == "none"): date = "" else: GMT_FORMAT = '%Y-%m-%d' try: date_obj = datetime.strptime(date, GMT_FORMAT) ts = int(date_obj.timestamp()) except Exception as e: pass if "author" in metadata: author = metadata["author"] else: author = "" result["htmls"][file] = { "title": title, "desc": desc, "keywords": keywords, "tags": tags, "body": html, "date": date, "ts": ts, "author": author, # "toc": html.toc_html if html.toc_html else "", "toc": "", # just empty, toc generated by js but not python "metadata": metadata, "raw": content } else: result["htmls"][file] = None result['ok'] = True return result def on_parse_pages(self, files): result = self.on_parse_files(files) return result def on_add_html_header_items(self, type_name): items = [] items.append( '<meta name="markdown-generator" content="teedoc-plugin-markdown-parser">' ) if self.config["mathjax"]["enable"]: items.append('''<script> MathJax = {}; </script>'''.format(json.dumps(self.config["mathjax"]["config"]))) items.append( '<script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>' ) items.append( '<script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/{}.js"></script>' .format(self.config["mathjax"]["file_name"])) return items def _update_link(self, content): def re_del(c): ret = c[0] links = re.findall('\((.*?)\)', c[0]) if len(links) > 0: for link in links: if link.startswith(".") or os.path.isabs(link): ret = re.sub("README.md", "index.html", c[0], flags=re.I) ret = re.sub(r".md", ".html", ret, re.I) return ret return ret def re_del_ipynb(c): ret = c[0] links = re.findall('\((.*?)\)', c[0]) if len(links) > 0: for link in links: if link.startswith(".") or os.path.isabs(link): ret = re.sub("README.ipynb", "index.html", c[0], flags=re.I) ret = re.sub(r".ipynb", ".html", ret, re.I) return ret return ret # <a class="anchor-link" href="#链接"> </a></h2><p><a href="./syntax_markdown.md">markdown 语法</a> content = re.sub(r'\[.*?\]\(.*?\.md\)', re_del, content, flags=re.I) content = re.sub(r'\[.*?\]\(.*?\.ipynb\)', re_del_ipynb, content, flags=re.I) return content
class Plugin(Plugin_Base): name = "teedoc-plugin-search" desc = "search support for teedoc" defautl_config = { "content_type": "raw", # supported_content_type "search_hint": "Search", "input_hint": "Keywords separated by space", "loading_hint": "Loading, wait please ...", "download_err_hint": "Download error, please check network and refresh again", "other_docs_result_hint": "Result from other docs", "curr_doc_result_hint": "Result from current doc", "env": { "main_color": "#4caf7d", "main_color_dark": "#1b4c33", "hint_shadow_color": "rgba(76, 175, 125, 0.38)" } } supported_content_type = ["raw", "html"] def on_init(self, config, doc_src_path, site_config, logger=None, multiprocess=True, **kw_args): ''' @config a dict object @logger teedoc.logger.Logger object ''' self.logger = Fake_Logger() if not logger else logger self.doc_src_path = doc_src_path self.site_config = site_config self.config = Plugin.defautl_config self.config.update(config) self.logger.i("-- plugin <{}> init".format(self.name)) self.logger.i("-- plugin <{}> config: {}".format( self.name, self.config)) if not self.config["content_type"] in self.supported_content_type: self.logger.e( "-- plugin <{}> config content_type error: {}, should be in {}" .format(self.name, self.config["content_type"], self.supported_content_type)) if self.config["content_type"] == "raw": self.content_from = "raw" else: self.content_from = "body" self.module_path = os.path.abspath( os.path.dirname(os.path.abspath(__file__))) self.assets_abs_path = os.path.join(self.module_path, "assets") self.temp_dir = os.path.join(tempfile.gettempdir(), "teedoc_plugin_search") if os.path.exists(self.temp_dir): shutil.rmtree(self.temp_dir) os.makedirs(self.temp_dir) self.css = { "/static/css/search/style.css": os.path.join(self.assets_abs_path, "style.css"), } self.footer_js = { "/static/js/search/search_main.js": os.path.join(self.assets_abs_path, "search_main.js") } self.images = { "/static/image/search/close.svg": os.path.join(self.assets_abs_path, "close.svg"), "/static/image/search/search.svg": os.path.join(self.assets_abs_path, "search.svg"), } # set site_root_url env value if not "env" in self.config: self.config['env'] = {} self.config['env']["site_root_url"] = self.site_config["site_root_url"] # replace variable in css with value vars = self.config["env"] self.css = self._update_file_var(self.css, vars, self.temp_dir) self.footer_js = self._update_file_var(self.footer_js, vars, self.temp_dir) # files to copy self.html_header_items = self._generate_html_header_items() self.files_to_copy = {} self.files_to_copy.update(self.css) self.files_to_copy.update(self.footer_js) self.files_to_copy.update(self.images) self.html_js_items = self._generate_html_js_items() self.content = {"articles": {}, "pages": {}} self.docs_name = {} def on_del(self): if os.path.exists(self.temp_dir): try: shutil.rmtree(self.temp_dir) except Exception: pass def _generate_html_header_items(self): items = [] # css for url in self.css: item = '<link rel="stylesheet" href="{}" type="text/css"/>'.format( url) items.append(item) return items def _generate_html_js_items(self): items = [] for url in self.footer_js: item = '<script src="{}"></script>'.format(url) items.append(item) return items def _update_file_var(self, files, vars, temp_dir): for url, path in files.items(): with open(path, encoding='utf-8') as f: content = f.read() for k, v in vars.items(): content = content.replace( "${}{}{}".format("{", k.strip(), "}"), v) temp_path = os.path.join(temp_dir, os.path.basename(path)) with open(temp_path, "w", encoding='utf-8') as fw: fw.write(content) files[url] = temp_path return files def _get_conf(self, config, new_config, conf_name): if conf_name in new_config: return True, new_config[conf_name] return False, config[conf_name] def on_parse_start(self, type_name, url, dirs, doc_config, new_config): if not "name" in doc_config: self.logger.w(f'doc dir "{dirs[0]}"\'s config no "name" keyword') self.docs_name[url] = url else: self.docs_name[url] = doc_config["name"] self.doc_locale = doc_config[ "locale"] if "locale" in doc_config else None # self.new_config = copy.deepcopy(self.config) # self.new_config = update_config(self.new_config, new_config) self.new_config = new_config def on_add_html_header_items(self, type_name): return self.html_header_items def on_add_html_footer_js_items(self, type_name): return self.html_js_items def on_add_navbar_items(self): ''' @config config cover self.config ''' # i18n # set default or custom settings names = [ "search_hint", "input_hint", "loading_hint", "download_err_hint", "other_docs_result_hint", "curr_doc_result_hint" ] flags = [False] * len(names) # flags have costum settings configs = [""] * len(names) for i, conf in enumerate(names): flags[i], configs[i] = self._get_conf(self.config, self.new_config, conf) # get translation if don't have custom setting if False in flags and self.doc_locale: import gettext try: lang = gettext.translation('messages', localedir=os.path.join( self.module_path, 'locales'), languages=[self.doc_locale]) lang.install() _ = lang.gettext if not flags[0]: configs[0] = _("search_hint") if not flags[1]: configs[1] = _("search_input_hint") if not flags[2]: configs[2] = _("search_loading_hint") if not flags[3]: configs[3] = _("search_download_err_hint") if not flags[4]: configs[4] = _("search_other_docs_result_hint") if not flags[5]: configs[5] = _("search_curr_doc_result_hint") except Exception as e: pass configs = tuple(configs) search_btn = '''<a id="search"><span class="icon"></span><span class="placeholder">%s</span> <div id="search_hints"> <span id="search_input_hint">%s</span> <span id="search_loading_hint">%s</span> <span id="search_download_err_hint">%s</span> <span id="search_other_docs_result_hint">%s</span> <span id="search_curr_doc_result_hint">%s</span> </div></a>''' % configs items = [search_btn] return items def on_copy_files(self): res = self.files_to_copy self.files_to_copy = {} return res def on_htmls(self, htmls_files, htmls_pages, htmls_blog=None): ''' update htmls, may not all html, just partially htmls_files: { "/get_started/zh":{ "url":{ "title": "", "desc": "", "keywords": [], "body": html, "url": "", "raw": "" } } } ''' # for file, html in htmls_files.items(): # self.content["articles"][html["url"]] = html["raw"] # for file, html in htmls_pages.items(): # self.content["pages"][html["url"]] = html["raw"] self.logger.i("generate search index") if htmls_blog: htmls_pages.update(copy.deepcopy(htmls_blog)) docs_url = list(htmls_files.keys()) pages_url = list(htmls_pages.keys()) index_content = {} sub_index_path = [] generated_index_json = {} for i, url in enumerate(docs_url): index_content[url] = [ self.docs_name[url], "{}static/search_index/index_{}.json".format( self.site_config["site_root_url"], i) ] path = os.path.join(self.temp_dir, "index_{}.json".format(i)) sub_index_path.append(path) # write content to sub index file content = {} for page_url in htmls_files[url]: content[page_url] = { "title": htmls_files[url][page_url]["title"], "content": htmls_files[url][page_url][self.content_from] } with open(path, "w", encoding="utf-8") as f: json.dump(content, f, ensure_ascii=False) for i, url in enumerate(pages_url, len(docs_url)): index_content[url] = [ self.docs_name[url], "{}static/search_index/index_{}.json".format( self.site_config["site_root_url"], i) ] path = os.path.join(self.temp_dir, "index_{}.json".format(i)) sub_index_path.append(path) # write content to sub index file content = {} for page_url in htmls_pages[url]: content[page_url] = { "title": htmls_pages[url][page_url]["title"], "content": htmls_pages[url][page_url][self.content_from] } with open(path, "w", encoding="utf-8") as f: json.dump(content, f, ensure_ascii=False) # write content to files # index file index_path = os.path.join(self.temp_dir, "index.json") with open(index_path, "w", encoding="utf-8") as f: json.dump(index_content, f, ensure_ascii=False) # add to copy file list generated_index_json["/static/search_index/index.json"] = index_path for i, path in enumerate(sub_index_path): generated_index_json["/static/search_index/index_{}.json".format( i)] = path self.files_to_copy.update(generated_index_json) return True