Ejemplo n.º 1
0
class Plugin(Plugin_Base):
    name = "teedoc-plugin-markdown-parser"
    desc = "markdown parser plugin for teedoc"
    defautl_config = {
        "parse_files": ["md"],
        "mathjax": {
            "enable": True,
            "file_name":
            "tex-mml-chtml",  # http://docs.mathjax.org/en/latest/web/components/index.html
            "config": {
                "loader": {
                    "load": ['output/svg']
                },
                "tex": {
                    "inlineMath": [['$', '$'], ['\\(', '\\)']]
                },
                "svg": {
                    "fontCache": 'global'
                }
            }
        }
    }

    def on_init(self,
                config,
                doc_src_path,
                site_config,
                logger=None,
                multiprocess=True,
                **kw_args):
        '''
            @config a dict object
            @logger teedoc.logger.Logger object
        '''
        self.multiprocess = multiprocess
        self.logger = Fake_Logger() if not logger else logger
        self.doc_src_path = doc_src_path
        self.site_config = site_config
        self.config = Plugin.defautl_config
        mathjax_config = self.config["mathjax"]
        if "mathjax" in config:
            for k, v in config["mathjax"].items():
                if type(v) != dict:
                    mathjax_config[k] = v
                else:
                    mathjax_config[k].update(v)
        self.config.update(config)
        self.config["mathjax"] = mathjax_config
        self.logger.i("-- plugin <{}> init".format(self.name))
        self.logger.i("-- plugin <{}> config: {}".format(
            self.name, self.config))
        if not self.multiprocess:
            from .renderer import create_markdown_parser
            from .parse_metadata import Meta_Parser
            self.create_markdown_parser = create_markdown_parser
            self.Meta_Parser = Meta_Parser

    def on_new_process_init(self):
        '''
            for multiple processing, for below func, will be called in new process,
            every time create a new process, this func will be invoke
        '''
        from .renderer import create_markdown_parser
        from .parse_metadata import Meta_Parser
        self.md_parser = create_markdown_parser()
        self.meta_parser = Meta_Parser()

    def on_new_process_del(self):
        '''
            for multiple processing, for below func, will be called in new process,
            every time exit a new process, this func will be invoke
        '''
        del self.md_parser
        del self.meta_parser

    def on_parse_files(self, files):
        # result, format must be this
        result = {"ok": False, "msg": "", "htmls": OrderedDict()}
        # function parse md file is disabled
        if not "md" in self.config["parse_files"]:
            result[
                "msg"] = "disabled markdown parse, but only support markdown"
            return result
        self.logger.d("-- plugin <{}> parse {} files".format(
            self.name, len(files)))
        # self.logger.d("files: {}".format(files))

        for file in files:
            ext = os.path.splitext(file)[1].lower()
            if ext.endswith("md"):
                with open(file, encoding="utf-8") as f:
                    content = f.read().strip()
                    content = self._update_link(content)
                    try:
                        if not self.multiprocess:
                            md_parser = self.create_markdown_parser()
                            meta_parser = self.Meta_Parser()
                        else:
                            md_parser = self.md_parser
                            meta_parser = self.meta_parser
                        metadata, content_no_meta = meta_parser.parse_meta(
                            content)
                        html = md_parser(content_no_meta)
                    except Exception as e:
                        import io, traceback
                        traceback.print_exc()
                        self.logger.w(
                            "parse markdown file {} fail, please check markdown content format"
                            .format(file))
                        continue
                    if "title" in metadata:
                        title = metadata["title"]
                    else:
                        title = ""
                    if "keywords" in metadata and not metadata[
                            "keywords"].strip() == "":
                        keywords = metadata["keywords"].split(",")
                    else:
                        keywords = []
                    if "tags" in metadata and not metadata["tags"].strip(
                    ) == "":
                        tags = metadata["tags"].split(",")
                    else:
                        tags = []
                    if "desc" in metadata:
                        desc = metadata["desc"]
                    else:
                        desc = ""
                    date = None
                    ts = int(os.stat(file).st_mtime)
                    if "date" in metadata:
                        date = metadata["date"].strip().lower()
                        # set date to false to disable date display
                        if date and (date == "false" or date == "none"):
                            date = ""
                        else:
                            GMT_FORMAT = '%Y-%m-%d'
                            try:
                                date_obj = datetime.strptime(date, GMT_FORMAT)
                                ts = int(date_obj.timestamp())
                            except Exception as e:
                                pass
                    if "author" in metadata:
                        author = metadata["author"]
                    else:
                        author = ""
                    result["htmls"][file] = {
                        "title": title,
                        "desc": desc,
                        "keywords": keywords,
                        "tags": tags,
                        "body": html,
                        "date": date,
                        "ts": ts,
                        "author": author,
                        # "toc": html.toc_html if html.toc_html else "",
                        "toc":
                        "",  # just empty, toc generated by js but not python
                        "metadata": metadata,
                        "raw": content
                    }
            else:
                result["htmls"][file] = None
        result['ok'] = True
        return result

    def on_parse_pages(self, files):
        result = self.on_parse_files(files)
        return result

    def on_add_html_header_items(self, type_name):
        items = []
        items.append(
            '<meta name="markdown-generator" content="teedoc-plugin-markdown-parser">'
        )
        if self.config["mathjax"]["enable"]:
            items.append('''<script>
MathJax = {};
</script>'''.format(json.dumps(self.config["mathjax"]["config"])))
            items.append(
                '<script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>'
            )
            items.append(
                '<script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/{}.js"></script>'
                .format(self.config["mathjax"]["file_name"]))
        return items

    def _update_link(self, content):
        def re_del(c):
            ret = c[0]
            links = re.findall('\((.*?)\)', c[0])
            if len(links) > 0:
                for link in links:
                    if link.startswith(".") or os.path.isabs(link):
                        ret = re.sub("README.md",
                                     "index.html",
                                     c[0],
                                     flags=re.I)
                        ret = re.sub(r".md", ".html", ret, re.I)
                        return ret
            return ret

        def re_del_ipynb(c):
            ret = c[0]
            links = re.findall('\((.*?)\)', c[0])
            if len(links) > 0:
                for link in links:
                    if link.startswith(".") or os.path.isabs(link):
                        ret = re.sub("README.ipynb",
                                     "index.html",
                                     c[0],
                                     flags=re.I)
                        ret = re.sub(r".ipynb", ".html", ret, re.I)
                        return ret
            return ret

        # <a class="anchor-link" href="#&#38142;&#25509;"> </a></h2><p><a href="./syntax_markdown.md">markdown 语法</a>
        content = re.sub(r'\[.*?\]\(.*?\.md\)', re_del, content, flags=re.I)
        content = re.sub(r'\[.*?\]\(.*?\.ipynb\)',
                         re_del_ipynb,
                         content,
                         flags=re.I)
        return content
Ejemplo n.º 2
0
class Plugin(Plugin_Base):
    name = "teedoc-plugin-markdown-parser"
    desc = "markdown parser plugin for teedoc"
    defautl_config = {"parse_files": ["md"]}

    def __init__(self, config, doc_src_path, site_config, logger=None):
        '''
            @config a dict object
            @logger teedoc.logger.Logger object
        '''
        self.logger = Fake_Logger() if not logger else logger
        self.doc_src_path = doc_src_path
        self.site_config = site_config
        self.config = Plugin.defautl_config
        self.config.update(config)
        self.logger.i("-- plugin <{}> init".format(self.name))
        self.logger.i("-- plugin <{}> config: {}".format(
            self.name, self.config))
        self._extention = {
            "toc": {
                "depth": config["toc_depth"] if "toc_depth" in config else 3
            },
            "metadata": None,
            "fenced-code-blocks": None,
            "highlightjs-lang": None,
            "break-on-newline": None,
            "code-friendly": None,
            "cuddled-lists": None,
            "footnotes": None,
            "strike": None,
            "spoiler": None,
            "tables": None,
            "task_list": None
        }

    def on_parse_files(self, files):
        # result, format must be this
        result = {"ok": False, "msg": "", "htmls": OrderedDict()}
        # function parse md file is disabled
        if not "md" in self.config["parse_files"]:
            result[
                "msg"] = "disabled markdown parse, but only support markdown"
            return result
        self.logger.d("-- plugin <{}> parse {} files".format(
            self.name, len(files)))
        # self.logger.d("files: {}".format(files))

        for file in files:
            ext = os.path.splitext(file)[1].lower()
            if ext.endswith("md"):
                with open(file, encoding="utf-8") as f:
                    content = f.read().strip()
                    content = self._update_link(content)
                    parser = markdown2.Markdown(extras=self._extention)
                    parser._toc_html = ""
                    html = parser.convert(content)
                    if "title" in html.metadata:
                        title = html.metadata["title"]
                    else:
                        title = ""
                    if "keywords" in html.metadata:
                        keywords = html.metadata["keywords"].split(",")
                    else:
                        keywords = []
                    if "tags" in html.metadata:
                        tags = html.metadata["tags"].split(",")
                    else:
                        tags = []
                    if "desc" in html.metadata:
                        desc = html.metadata["desc"]
                    else:
                        desc = []
                    result["htmls"][file] = {
                        "title": title,
                        "desc": desc,
                        "keywords": keywords,
                        "tags": tags,
                        "body": html,
                        "toc": html.toc_html if html.toc_html else "",
                        "metadata": html.metadata,
                        "raw": content
                    }
            else:
                result["htmls"][file] = None
        result['ok'] = True
        return result

    def on_parse_pages(self, files):
        result = self.on_parse_files(files)
        return result

    def on_add_html_header_items(self):
        items = []
        items.append(
            '<meta name="markdown-generator" content="teedoc-plugin-markdown-parser">'
        )
        return items

    def _update_link(self, content):
        def re_del(c):
            ret = c[0].replace(".md", ".html")
            ret = re.sub("README.md", "index.html", c[0], flags=re.I)
            ret = re.sub(r".md", ".html", ret, re.I)
            return ret

        content = re.sub(r'\[.*?\]\(.*?\.md\)', re_del, content, flags=re.I)
        return content
Ejemplo n.º 3
0
class Plugin(Plugin_Base):
    name = "teedoc-plugin-jupyter-notebook-parser"
    desc = "jupyter notebook parser plugin for teedoc"
    defautl_config = {
        "parse_files": ["ipynb"]
    }

    def on_init(self, config, doc_src_path, site_config, logger = None, multiprocess = True, **kw_args):
        '''
            @config a dict object
            @logger teedoc.logger.Logger object
        '''
        self.logger = Fake_Logger() if not logger else logger
        self.doc_src_path = doc_src_path
        self.site_config = site_config
        self.config = Plugin.defautl_config
        self.config.update(config)
        self.logger.i("-- plugin <{}> init".format(self.name))
        self.logger.i("-- plugin <{}> config: {}".format(self.name, self.config))

        

    def on_parse_files(self, files):
        # result, format must be this
        result = {
            "ok": False,
            "msg": "",
            "htmls": OrderedDict()
        }
        # function parse md file is disabled
        if not "ipynb" in self.config["parse_files"]:
            result["msg"] = "disabled notebook parse, but only support notebook"
            return result
        self.logger.d("-- plugin <{}> parse {} files".format(self.name, len(files)))
        # self.logger.d("files: {}".format(files))
        
        for file in files:
            name = os.path.basename(file)
            # ignore temp file
            if name.startswith(".~"):
                result["htmls"][file] = None
                continue
            ext = os.path.splitext(file)[1].lower()
            if ext.endswith("ipynb"):
                html = convert_ipynb_to_html(file)
                html.body = self._update_link_html(html.body)
                metadata = html.metadata
                date = None
                ts = int(os.stat(file).st_mtime)
                if "date" in metadata:
                    date = metadata["date"].strip().lower()
                    # set date to false to disable date display
                    if date and (date == "false" or date == "none"):
                        date = ""
                    else:
                        GMT_FORMAT = '%Y-%m-%d'
                        try:
                            date_obj = datetime.strptime(date, GMT_FORMAT)
                            ts = int(date_obj.timestamp())
                        except Exception as e:
                            pass
                if "author" in metadata:
                    author = metadata["author"]
                else:
                    author = ""
                result["htmls"][file] = {
                    "title": html.title,
                    "desc": html.desc,
                    "keywords": html.keywords,
                    "tags": html.tags,
                    "body": html.body,
                    "author": author,
                    "date": date,
                    "ts": ts,
                    "toc": html.toc,
                    "metadata": metadata,
                    "raw": html.raw
                }
            else:
                result["htmls"][file] = None
        result['ok'] = True
        return result
    
    def on_parse_pages(self, files):
        result = self.on_parse_files(files)
        return result

    
    def on_add_html_header_items(self, type_name):
        items = []
        items.append('<meta name="html-generator" content="teedoc-plugin-jupyter-notebook-parser">')
        return items
    
    def _update_link_html(self, content):
        def re_del(c):
            ret = c[0]
            links = re.findall('href="(.*?)"', c[0])
            if len(links) > 0:
                for link in links:
                    if link.startswith(".") or os.path.isabs(link):
                        ret = re.sub("README.md", "index.html", c[0], flags=re.I)
                        ret = re.sub(r".md", ".html", ret, re.I)
                        return ret
            return ret
        def re_del_ipynb(c):
            ret = c[0]
            links = re.findall('href="(.*?)"', c[0])
            if len(links) > 0:
                for link in links:
                    if link.startswith(".") or os.path.isabs(link):
                        ret = re.sub("README.ipynb", "index.html", c[0], flags=re.I)
                        ret = re.sub(r".ipynb", ".html", ret, re.I)
                        return ret
            return ret
        # <a class="anchor-link" href="#&#38142;&#25509;"> </a></h2><p><a href="./syntax_markdown.md">markdown 语法</a>
        content = re.sub(r'\<a.*?href=.*?\.md.*?\</a\>', re_del, content, flags=re.I)
        content = re.sub(r'\<a.*?href=.*?\.ipynb.*?\</a\>', re_del_ipynb, content, flags=re.I)
        return content
Ejemplo n.º 4
0
class Plugin(Plugin_Base):
    name = "teedoc-plugin-blog"
    desc = "markdown parser plugin for teedoc"
    defautl_config = {"parse_files": ["md"]}

    def on_init(self,
                config,
                doc_src_path,
                site_config,
                logger=None,
                multiprocess=True,
                **kw_args):
        '''
            @config a dict object
            @logger teedoc.logger.Logger object
        '''
        self.multiprocess = multiprocess
        self.logger = Fake_Logger() if not logger else logger
        self.doc_src_path = doc_src_path
        self.site_config = site_config
        self.config = Plugin.defautl_config
        self.config.update(config)
        self.logger.i("-- plugin <{}> init".format(self.name))
        self.logger.i("-- plugin <{}> config: {}".format(
            self.name, self.config))
        self.temp_dir = os.path.join(tempfile.gettempdir(),
                                     "teedoc_plugin_blog")
        if os.path.exists(self.temp_dir):
            shutil.rmtree(self.temp_dir)
        os.makedirs(self.temp_dir)
        self.assets_abs_path = os.path.join(
            os.path.dirname(os.path.abspath(__file__)), "assets")
        self.assets = {
            "/static/js/plugin_blog/main.js":
            os.path.join(self.assets_abs_path, "main.js"),
        }
        vars = {"site_root_url": self.site_config["site_root_url"]}
        self.assets = self._update_file_var(self.assets, vars, self.temp_dir)
        self.files_to_copy = self.assets.copy()  # must use copy
        blog_url = list(self.site_config["route"]["blog"].keys())
        if len(blog_url) > 1:
            self.logger.e("only support one blog url path")
            raise Exception("only support one blog url path")
        self.blog_url = blog_url[0]
        self.blog_dir = os.path.join(
            self.doc_src_path,
            self.site_config["route"]["blog"][self.blog_url]).replace(
                "\\", "/")
        self.index_content = {"items": {}}

    def on_new_process_init(self):
        '''
            for multiple processing, for below func, will be called in new process,
            every time create a new process, this func will be invoke
        '''
        self.md_parser = create_markdown_parser()
        self.meta_parser = Meta_Parser()

    def on_new_process_del(self):
        '''
            for multiple processing, for below func, will be called in new process,
            every time exit a new process, this func will be invoke
        '''
        del self.md_parser
        del self.meta_parser

    def on_parse_blog(self, files):
        # result, format must be this
        result = {"ok": False, "msg": "", "htmls": OrderedDict()}
        # function parse md file is disabled
        if not "md" in self.config["parse_files"]:
            result[
                "msg"] = "disabled markdown parse, but only support markdown"
            return result
        self.logger.d("-- plugin <{}> parse {} files".format(
            self.name, len(files)))
        # self.logger.d("files: {}".format(files))

        for file in files:
            ext = os.path.splitext(file)[1].lower()
            if ext.endswith("md"):
                with open(file, encoding="utf-8") as f:
                    content = f.read().strip()
                    content = self._update_link(content)
                    blog_index_file_path = os.path.join(
                        self.blog_dir, "readme.md").replace("\\", "/").lower()
                    is_blog_index = file.lower() == blog_index_file_path
                    if is_blog_index:
                        content += '\n<div id="blog_list"></div>'
                    if not self.multiprocess:
                        md_parser = create_markdown_parser()
                        meta_parser = Meta_Parser()
                    else:
                        md_parser = self.md_parser
                        meta_parser = self.meta_parser
                    metadata, content_no_meta = meta_parser.parse_meta(content)
                    html = md_parser(content_no_meta)
                    if "<!-- more -->" in html:
                        brief = html[:html.find("<!-- more -->")].strip()
                    else:
                        brief = html[:500].strip()
                    if "title" in metadata:
                        title = metadata["title"]
                    else:
                        title = ""
                    if "keywords" in metadata and not metadata[
                            "keywords"].strip() == "":
                        keywords = metadata["keywords"].split(",")
                    else:
                        keywords = []
                    if "tags" in metadata and not metadata["tags"].strip(
                    ) == "":
                        tags = metadata["tags"].split(",")
                    else:
                        tags = []
                    if "desc" in metadata:
                        desc = metadata["desc"]
                    else:
                        desc = ""
                    html_str = '<span id="blog_start"></span>' + html
                    # date default last edit time
                    ts = int(os.stat(file).st_mtime)
                    date_file_edit = time.strftime("%Y-%m-%d",
                                                   time.localtime(ts))
                    if "date" in metadata:
                        date = metadata["date"].strip().lower()
                        # set date to false to disable date display
                        if date and (date == "false" or date == "none"):
                            date = ""
                        else:
                            GMT_FORMAT = '%Y-%m-%d'
                            try:
                                date_obj = datetime.strptime(date, GMT_FORMAT)
                                ts = int(date_obj.timestamp())
                            except Exception as e:
                                date = date_file_edit
                    else:
                        date = date_file_edit
                    if "author" in metadata:
                        author = metadata["author"]
                    else:
                        author = ""
                    result["htmls"][file] = {
                        "title": title,
                        "desc": desc,
                        "keywords": keywords,
                        "tags": tags,
                        "body": html_str,
                        # "toc": html.toc_html if html.toc_html else "",
                        "toc":
                        "",  # just empty, toc generated by js but not python
                        "metadata": metadata,
                        "raw": content,
                        "date": date,
                        "ts": ts,
                        "author": author,
                        "brief": brief
                    }
            else:
                result["htmls"][file] = None
        result['ok'] = True
        return result

    def on_add_html_header_items(self, type_name):
        items = []
        items.append(
            '<meta name="blog-generator" content="teedoc-plugin-blog">')
        return items

    def _update_link(self, content):
        def re_del(c):
            ret = c[0]
            links = re.findall('\((.*?)\)', c[0])
            if len(links) > 0:
                for link in links:
                    if link.startswith(".") or os.path.isabs(link):
                        ret = re.sub("README.md",
                                     "index.html",
                                     c[0],
                                     flags=re.I)
                        ret = re.sub(r".md", ".html", ret, re.I)
                        return ret
            return ret

        def re_del_ipynb(c):
            ret = c[0]
            links = re.findall('\((.*?)\)', c[0])
            if len(links) > 0:
                for link in links:
                    if link.startswith(".") or os.path.isabs(link):
                        ret = re.sub("README.ipynb",
                                     "index.html",
                                     c[0],
                                     flags=re.I)
                        ret = re.sub(r".ipynb", ".html", ret, re.I)
                        return ret
            return ret

        # <a class="anchor-link" href="#&#38142;&#25509;"> </a></h2><p><a href="./syntax_markdown.md">markdown 语法</a>
        content = re.sub(r'\[.*?\]\(.*?\.md\)', re_del, content, flags=re.I)
        content = re.sub(r'\[.*?\]\(.*?\.ipynb\)',
                         re_del_ipynb,
                         content,
                         flags=re.I)
        return content

    def on_htmls(self, htmls_files, htmls_pages, htmls_blog=None):
        '''
            update htmls, may not all html, just partially
            htmls_blog: {
                "/blog/":{
                    "url":{
                                "title": "",
                                "desc": "",
                                "keywords": [],
                                "body": html,
                                "tags": [],
                                "url": "",
                                "raw": "",
                                "date": date,
                                "ts": 12344566,
                                "author": author,
                                "brief": "",
                                "metadata": {}
                          }
                }
            }
        '''
        if not htmls_blog:
            return True
        blog_url = list(htmls_blog.keys())
        if len(blog_url) == 0:
            return True
        index_url = ""
        blog_url = blog_url[0]
        index_url = "{}static/blog_index/index.json".format(
            self.site_config["site_root_url"])
        index_path = os.path.join(self.temp_dir, "index.json")
        for url, item in htmls_blog[blog_url].items():
            # except blog index.html
            if url == os.path.join(blog_url, "index.html"):
                continue
            new_item = {
                "title": item["title"],
                "desc": item["desc"],
                "keywords": item["keywords"],
                "tags": item["tags"],
                "url": url,
                "date": item["date"],
                "ts": item["ts"],
                "author": item["author"],
                "brief": item["brief"],
            }
            self.index_content["items"][url] = new_item
        # sort by date
        self.index_content["items"] = OrderedDict(
            sorted(self.index_content["items"].items(),
                   key=lambda v: v[1]["ts"],
                   reverse=True))
        #   write content to sub index file
        with open(index_path, "w", encoding="utf-8") as f:
            json.dump(self.index_content, f, ensure_ascii=False)

        # add to copy file list
        generated_index_json = {"/static/blog_index/index.json": index_path}
        self.files_to_copy.update(generated_index_json)
        return True

    def on_copy_files(self):
        res = self.files_to_copy
        self.files_to_copy = {}
        return res

    def on_add_html_footer_js_items(self, type_name):
        for url in self.assets:
            html_js_items = ['<script src="{}"></script>'.format(url)]
        return html_js_items

    def _update_file_var(self, files, vars, temp_dir):
        for url, path in files.items():
            with open(path, encoding='utf-8') as f:
                content = f.read()
                for k, v in vars.items():
                    content = content.replace(
                        "${}{}{}".format("{", k.strip(), "}"), v)
                temp_path = os.path.join(temp_dir, os.path.basename(path))
                with open(temp_path, "w", encoding='utf-8') as fw:
                    fw.write(content)
                files[url] = temp_path
        return files