def read(self, filename): """Convert a revealjs markdown file to html and return it Requires pypandoc (https://github.com/bebraw/pypandoc) and pandoc (https://pandoc.org/) to work correctly. """ # TODO: use markdown reader to parse the reveal.js markdown # https://github.com/danielfrg/pelican-ipynb/blob/master/markup.py#L62 reader = MarkdownReader(self.settings) md_content, metadata = reader.read(filename) metadata["template"] = "blank" # TODO: using the markdown reader converts the file contents to HTML, # but we just want plain text because pandoc should be converting it # instead. The trouble is, we also want to get the metadata md_converter = Markdown(**self.settings["MARKDOWN"]) md_converter.convertFile(filename, output=os.devnull) md_content = "\n".join(md_converter.lines) # metadata = getattr(md_converter, "Meta", {}) revealjs_content = pypandoc.convert_text(md_content, to="revealjs", format="md", extra_args=[ "-s", "-V", "revealjs-url=https://revealjs.com", # "--slide-level", "2", ], ) return (revealjs_content, metadata)
def read(self, filepath): metadata = {} metadata['ipython'] = True # Files filedir = os.path.dirname(filepath) filename = os.path.basename(filepath) metadata_filename = filename.split('.')[0] + '.ipynb-meta' metadata_filepath = os.path.join(filedir, metadata_filename) if os.path.exists(metadata_filepath): # Metadata is on a external file, process using Pelican MD Reader md_reader = MarkdownReader(self.settings) _content, metadata = md_reader.read(metadata_filepath) else: # Load metadata from ipython notebook file ipynb_file = open(filepath) notebook_metadata = json.load(ipynb_file)['metadata'] # Change to standard pelican metadata for key, value in notebook_metadata.items(): key = key.lower() if key in ("title", "date", "category", "tags", "slug", "author"): metadata[key] = self.process_metadata(key, value) keys = [k.lower() for k in metadata.keys()] if not set(['title', 'date', 'slug']).issubset(set(keys)): # Probably using ipynb.liquid mode md_filename = filename.split('.')[0] + '.md' md_filepath = os.path.join(filedir, md_filename) if not os.path.exists(md_filepath): raise Exception( "Could not find metadata in `.ipynb-meta`, inside `.ipynb` or external `.md` file." ) else: raise Exception( "Could not find metadata in `.ipynb-meta` or inside `.ipynb` but found `.md` file, " "assuming that this notebook is for liquid tag usage if true ignore this error" ) content, info = get_html_from_filepath(filepath) # Generate Summary: Do it before cleaning CSS if 'summary' not in [key.lower() for key in self.settings.keys()]: content = '<body>{0}</body>'.format( content) # So Pelican HTMLReader works parser = MyHTMLParser(self.settings, filename) # Python 3 str don't have decode try: c = content.decode("utf-8") except AttributeError as e: c = content parser.feed(c) parser.close() content = parser.body metadata['summary'] = parser.summary content = fix_css(content, info) return content, metadata
def add_static_comments(gen, content): if gen.settings['PELICAN_COMMENT_SYSTEM'] != True: return content.comments_count = 0 content.comments = [] #Modify the local context, so we get proper values for the feed context = copy.copy(gen.context) context['SITEURL'] += "/" + content.url context['SITENAME'] = "Comments for: " + content.title context['SITESUBTITLE'] = "" path = gen.settings['PELICAN_COMMENT_SYSTEM_FEED'] % content.slug writer = Writer(gen.output_path, settings=gen.settings) folder = os.path.join(gen.settings['PELICAN_COMMENT_SYSTEM_DIR'], content.slug) if not os.path.isdir(folder): logger.debug("No comments found for: " + content.slug) writer.write_feed([], context, path) return reader = MarkdownReader(gen.settings) comments = [] replies = [] for file in os.listdir(folder): name, extension = os.path.splitext(file) if extension[1:].lower() in reader.file_extensions: com_content, meta = reader.read(os.path.join(folder, file)) avatar_path = avatars.getAvatarPath(name, meta) com = Comment(file, avatar_path, com_content, meta, gen.settings, file, context) if 'replyto' in meta: replies.append(com) else: comments.append(com) writer.write_feed(comments + replies, context, path) #TODO: Fix this O(n²) loop for reply in replies: for comment in chain(comments, replies): if comment.id == reply.metadata['replyto']: comment.addReply(reply) count = 0 for comment in comments: comment.sortReplies() count += comment.countReplies() comments = sorted(comments) content.comments_count = len(comments) + count content.comments = comments
def read(self, filepath): metadata = {} # Files filedir = os.path.dirname(filepath) filename = os.path.basename(filepath) metadata_filename = filename.split('.')[0] + '.ipynb-meta' metadata_filepath = os.path.join(filedir, metadata_filename) # Load metadata if os.path.exists(metadata_filepath): # Metadata is on a external file, process using Pelican MD Reader md_reader = MarkdownReader(self.settings) _content, metadata = md_reader.read(metadata_filepath) else: # Load metadata from ipython notebook file ipynb_file = open(filepath) metadata = json.load(ipynb_file)['metadata'] # Fix metadata to pelican standars for key, value in metadata.items(): del metadata[key] key = key.lower() metadata[key] = self.process_metadata(key, value) metadata['ipython'] = True # Convert ipython notebook to html config = Config({'CSSHTMLHeaderTransformer': {'enabled': True, 'highlight_class': '.highlight-ipynb'}}) exporter = HTMLExporter(config=config, template_file='basic', filters={'highlight2html': custom_highlighter}) content, info = exporter.from_filename(filepath) # Process using Pelican HTMLReader content = '<body>{0}</body>'.format(content) # So Pelican HTMLReader works parser = MyHTMLParser(self.settings, filename) parser.feed(content) parser.close() body = parser.body summary = parser.summary metadata['summary'] = summary # Remove some CSS styles, so it doesn't break the themes. def filter_tags(style_text): style_list = style_text.split('\n') exclude = ['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'a', 'ul', 'ol', 'li', '.rendered_html', '@media', '.navbar', 'nav.navbar', '.navbar-text', 'code', 'pre', 'div.text_cell_render'] style_list = [i for i in style_list if len(list(filter(i.startswith, exclude))) == 0] ans = '\n'.join(style_list) return '<style type=\"text/css\">{0}</style>'.format(ans) css = '\n'.join(filter_tags(css) for css in info['inlining']['css']) css = css + CUSTOM_CSS body = css + body return body, metadata
def read(self, filepath): metadata = {} metadata['ipython'] = True # Files filedir = os.path.dirname(filepath) filename = os.path.basename(filepath) metadata_filename = os.path.splitext(filename)[0] + '.ipynb-meta' metadata_filepath = os.path.join(filedir, metadata_filename) if os.path.exists(metadata_filepath): # Metadata is on a external file, # process using Pelican MD Reader md_reader = MarkdownReader(self.settings) _content, metadata = md_reader.read(metadata_filepath) else: # Load metadata from ipython notebook file ipynb_file = open(filepath) notebook_metadata = json.load(ipynb_file)['metadata'] # Change to standard pelican metadata for key, value in notebook_metadata.items(): key = key.lower() if key in ("title", "date", "category", "tags", "slug", "author"): metadata[key] = self.process_metadata(key, value) keys = [k.lower() for k in metadata.keys()] if not set(['title', 'date']).issubset(set(keys)): # Probably using ipynb.liquid mode md_filename = filename.split('.')[0] + '.md' md_filepath = os.path.join(filedir, md_filename) if not os.path.exists(md_filepath): raise Exception("Could not find metadata in `.ipynb-meta`, inside `.ipynb` or external `.md` file.") else: raise Exception("Could not find metadata in `.ipynb-meta` or inside `.ipynb` but found `.md` file, " "assuming that this notebook is for liquid tag usage if true ignore this error") content, info = get_html_from_filepath(filepath) # Generate Summary: Do it before cleaning CSS if 'summary' not in [key.lower() for key in self.settings.keys()]: parser = MyHTMLParser(self.settings, filename) if isinstance(content, six.binary_type): # PY2 (str) or PY3 (bytes) to PY2 (unicode) or PY3 (str) # unicode_literals makes format() try to decode as ASCII. Enforce decoding as UTF-8. content = '<body>{0}</body>'.format(content.decode("utf-8")) else: # Content already decoded content = '<body>{0}</body>'.format(content) parser.feed(content) parser.close() content = parser.body if ('IPYNB_USE_META_SUMMARY' in self.settings.keys() and \ self.settings['IPYNB_USE_META_SUMMARY'] == False) or \ 'IPYNB_USE_META_SUMMARY' not in self.settings.keys(): metadata['summary'] = parser.summary ignore_css = True if 'IPYNB_IGNORE_CSS' in self.settings.keys() else False content = fix_css(content, info, ignore_css=ignore_css) return content, metadata
def read(self, filepath): metadata = {} # Files filedir = os.path.dirname(filepath) filename = os.path.basename(filepath) metadata_filename = filename.split('.')[0] + '.ipynb-meta' metadata_filepath = os.path.join(filedir, metadata_filename) # Load metadata if os.path.exists(metadata_filepath): # Metadata is on a external file, process using Pelican MD Reader md_reader = MarkdownReader(self.settings) _content, metadata = md_reader.read(metadata_filepath) else: # Load metadata from ipython notebook file ipynb_file = open(filepath) metadata = json.load(ipynb_file)['metadata'] # Fix metadata to pelican standards for key, value in metadata.items(): del metadata[key] key = key.lower() metadata[key] = self.process_metadata(key, value) metadata['ipython'] = True # Convert ipython notebook to html config = Config({'CSSHTMLHeaderTransformer': {'enabled': True, 'highlight_class': '.highlight-ipynb'}}) exporter = HTMLExporter(config=config, template_file='basic', filters={'highlight2html': custom_highlighter}) content, info = exporter.from_filename(filepath) # Process using Pelican HTMLReader content = '<body>{0}</body>'.format(content) # So Pelican HTMLReader works parser = MyHTMLParser(self.settings, filename) parser.feed(content) parser.close() body = parser.body summary = parser.summary metadata['summary'] = summary # Remove some CSS styles, so it doesn't break the themes. def filter_tags(style_text): style_list = style_text.split('\n') exclude = ['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'a', 'ul', 'ol', 'li', '.rendered_html', '@media', '.navbar', 'nav.navbar', '.navbar-text', 'code', 'pre', 'div.text_cell_render'] style_list = [i for i in style_list if len(list(filter(i.startswith, exclude))) == 0] ans = '\n'.join(style_list) return '<style type=\"text/css\">{0}</style>'.format(ans) css = '\n'.join(filter_tags(css) for css in info['inlining']['css']) css = css + CUSTOM_CSS body = css + body return body, metadata
def read(self, filepath): metadata = {} metadata['ipython'] = True # Files filedir = os.path.dirname(filepath) filename = os.path.basename(filepath) metadata_filename = filename.split('.')[0] + '.ipynb-meta' metadata_filepath = os.path.join(filedir, metadata_filename) if os.path.exists(metadata_filepath): # Metadata is on a external file, # process using Pelican MD Reader md_reader = MarkdownReader(self.settings) _content, metadata = md_reader.read(metadata_filepath) else: # Load metadata from ipython notebook file ipynb_file = open(filepath) notebook_metadata = json.load(ipynb_file)['metadata'] # Change to standard pelican metadata for key, value in notebook_metadata.items(): key = key.lower() if key in ("title", "date", "category", "tags", "slug", "author"): metadata[key] = self.process_metadata(key, value) keys = [k.lower() for k in metadata.keys()] if not set(['title', 'date']).issubset(set(keys)): # Probably using ipynb.liquid mode md_filename = filename.split('.')[0] + '.md' md_filepath = os.path.join(filedir, md_filename) if not os.path.exists(md_filepath): raise Exception("Could not find metadata in `.ipynb-meta`, inside `.ipynb` or external `.md` file.") else: raise Exception("Could not find metadata in `.ipynb-meta` or inside `.ipynb` but found `.md` file, " "assuming that this notebook is for liquid tag usage if true ignore this error") content, info = get_html_from_filepath(filepath) # Generate Summary: Do it before cleaning CSS if 'summary' not in [key.lower() for key in self.settings.keys()]: parser = MyHTMLParser(self.settings, filename) if isinstance(content, six.binary_type): # PY2 (str) or PY3 (bytes) to PY2 (unicode) or PY3 (str) # unicode_literals makes format() try to decode as ASCII. Enforce decoding as UTF-8. content = '<body>{0}</body>'.format(content.decode("utf-8")) else: # Content already decoded content = '<body>{0}</body>'.format(content) parser.feed(content) parser.close() content = parser.body if ('IPYNB_USE_META_SUMMARY' in self.settings.keys() and \ self.settings['IPYNB_USE_META_SUMMARY'] == False) or \ 'IPYNB_USE_META_SUMMARY' not in self.settings.keys(): metadata['summary'] = parser.summary ignore_css = True if 'IPYNB_IGNORE_CSS' in self.settings.keys() else False content = fix_css(content, info, ignore_css=ignore_css) return content, metadata
def add_static_comments(gen, content): if gen.settings['PELICAN_COMMENT_SYSTEM'] != True: return content.comments_count = 0 content.comments = [] #Modify the local context, so we get proper values for the feed context = copy.copy(gen.context) context['SITEURL'] += "/" + content.url context['SITENAME'] = "Comments for: " + content.title context['SITESUBTITLE'] = "" path = gen.settings['PELICAN_COMMENT_SYSTEM_FEED'] % content.slug writer = Writer(gen.output_path, settings=gen.settings) folder = os.path.join(gen.settings['PELICAN_COMMENT_SYSTEM_DIR'], content.slug) if not os.path.isdir(folder): logger.debug("No comments found for: " + content.slug) writer.write_feed( [], context, path) return reader = MarkdownReader(gen.settings) comments = [] replies = [] for file in os.listdir(folder): name, extension = os.path.splitext(file) if extension[1:].lower() in reader.file_extensions: com_content, meta = reader.read(os.path.join(folder, file)) avatar_path = avatars.getAvatarPath(name, meta) com = Comment(file, avatar_path, com_content, meta, gen.settings, file, context) if 'replyto' in meta: replies.append( com ) else: comments.append( com ) writer.write_feed( comments + replies, context, path) #TODO: Fix this O(n²) loop for reply in replies: for comment in chain(comments, replies): if comment.id == reply.metadata['replyto']: comment.addReply(reply) count = 0 for comment in comments: comment.sortReplies() count += comment.countReplies() comments = sorted(comments) content.comments_count = len(comments) + count content.comments = comments
def read(self, filepath): metadata = {'ipython': True} # Files filedir = os.path.dirname(filepath) filename = os.path.basename(filepath) metadata_filename = filename.split('.')[0] + '.ipynb-meta' metadata_filepath = os.path.join(filedir, metadata_filename) if os.path.exists(metadata_filepath): # Metadata is on a external file, process using Pelican MD Reader md_reader = MarkdownReader(self.settings) _content, metadata = md_reader.read(metadata_filepath) else: # Load metadata from ipython notebook file ipynb_file = open(filepath) notebook_metadata = json.load(ipynb_file)['metadata'] # Change to standard pelican metadata for key, value in notebook_metadata.items(): key = key.lower() if key in ("title", "date", "category", "tags", "slug", "author"): metadata[key] = self.process_metadata(key, value) keys = [k.lower() for k in metadata.keys()] if not {'title', 'date'}.issubset(set(keys)): # Probably using ipynb.liquid mode md_filename = filename.split('.')[0] + '.md' md_filepath = os.path.join(filedir, md_filename) if not os.path.exists(md_filepath): raise Exception( "Could not find metadata in `.ipynb-meta`, inside `.ipynb` or external `.md` file." ) else: raise Exception( "Could not find metadata in `.ipynb-meta` or inside `.ipynb` but found `.md` file, " "assuming that this notebook is for liquid tag usage if true ignore this error" ) content, info = get_html_from_filepath(filepath) # Generate Summary: Do it before cleaning CSS if 'summary' not in [key.lower() for key in self.settings.keys()]: parser = MyHTMLParser(self.settings, filename) parser.feed('<body>') parser.feed(content) parser.feed('</body>') parser.close() if ('IPYNB_USE_META_SUMMARY' in self.settings.keys() and self.settings['IPYNB_USE_META_SUMMARY'] == False) or \ 'IPYNB_USE_META_SUMMARY' not in self.settings.keys(): metadata['summary'] = parser.summary return content, metadata
def read(self, filepath): metadata = {} metadata['ipython'] = True # Files filedir = os.path.dirname(filepath) filename = os.path.basename(filepath) metadata_filename = filename.split('.')[0] + '.ipynb-meta' metadata_filepath = os.path.join(filedir, metadata_filename) if os.path.exists(metadata_filepath): # Metadata is on a external file, process using Pelican MD Reader md_reader = MarkdownReader(self.settings) _content, metadata = md_reader.read(metadata_filepath) else: # Load metadata from ipython notebook file ipynb_file = open(filepath) notebook_metadata = json.load(ipynb_file)['metadata'] # Change to standard pelican metadata for key, value in notebook_metadata.items(): key = key.lower() if key in ("title", "date", "category", "tags", "slug", "author"): metadata[key] = self.process_metadata(key, value) keys = [k.lower() for k in metadata.keys()] if not set(['title', 'date']).issubset(set(keys)): # Probably using ipynb.liquid mode md_filename = filename.split('.')[0] + '.md' md_filepath = os.path.join(filedir, md_filename) if not os.path.exists(md_filepath): raise Exception("Could not find metadata in `.ipynb-meta`, inside `.ipynb` or external `.md` file.") else: raise Exception("""Could not find metadata in `.ipynb-meta` or inside `.ipynb` but found `.md` file, assuming that this notebook is for liquid tag usage if true ignore this error""") content, info = get_html_from_filepath(filepath) # Generate Summary: Do it before cleaning CSS if 'summary' not in [key.lower() for key in self.settings.keys()]: parser = MyHTMLParser(self.settings, filename) if hasattr(content, 'decode'): # PY2 content = '<body>%s</body>' % content.encode('utf-8') content = content.decode("utf-8") else: content = '<body>%s</body>' % content parser.feed(content) parser.close() content = parser.body if ('IPYNB_USE_META_SUMMARY' in self.settings.keys() and self.settings['IPYNB_USE_META_SUMMARY'] is False) or 'IPYNB_USE_META_SUMMARY' not in self.settings.keys(): metadata['summary'] = parser.summary content = fix_css(content, info) return content, metadata
def _read_articles(self): content_path = os.path.join(PROJECT_DIR, PATH) reader = MarkdownReader({ 'MD_EXTENSIONS': ['codehilite(css_class=highlight)', 'extra'] }) for file in os.listdir(content_path): if file.endswith('.md'): content, metadata = reader.read( os.path.join(content_path, file) ) self.articles.append({ 'content': content, 'metadata': metadata })
def add_static_comments(gen, metadata): if gen.settings['PELICAN_COMMENT_SYSTEM'] != True: return metadata['comments_count'] = 0 metadata['comments'] = [] if not 'slug' in metadata: logger.warning( "pelican_comment_system: cant't locate comments files without slug tag in the article" ) return reader = MarkdownReader(gen.settings) comments = [] replies = [] folder = os.path.join(gen.settings['PELICAN_COMMENT_SYSTEM_DIR'], metadata['slug']) if not os.path.isdir(folder): logger.debug("No comments found for: " + metadata['slug']) return for file in os.listdir(folder): name, extension = os.path.splitext(file) if extension[1:].lower() in reader.file_extensions: content, meta = reader.read(folder + "/" + file) meta['locale_date'] = strftime(meta['date'], gen.settings['DEFAULT_DATE_FORMAT']) com = Comment(name, meta, content) if 'replyto' in meta: replies.append(com) else: comments.append(com) #TODO: Fix this O(n²) loop for reply in replies: for comment in chain(comments, replies): if comment.id == reply.metadata['replyto']: comment.addReply(reply) count = 0 for comment in comments: comment.sortReplies() count += comment.countReplies() comments = sorted(comments) metadata['comments_count'] = len(comments) + count metadata['comments'] = comments
def read(self, filename): """Convert a revealjs markdown file to html and return it Requires pypandoc (https://github.com/bebraw/pypandoc) and pandoc (https://pandoc.org/) to work correctly. """ # TODO: use markdown reader to parse the reveal.js markdown # github.com/danielfrg/pelican-ipynb/blob/master/markup.py#L62 reader = MarkdownReader(self.settings) md_content, metadata = reader.read(filename) # TODO: using the markdown reader converts the file contents to HTML, # but we just want plain text because pandoc should be converting it # instead. The trouble is, we also want to get the metadata extracmd = "" if "theme" in metadata: extracmd = extracmd + " " + "--variable theme=%s" % metadata[ "theme"] if "revealoptions" in metadata: if "transition" in metadata["revealoptions"]: extracmd = (extracmd + " " + "--variable transition=%s" % metadata["revealoptions"]["transition"]) command = "pandoc --to revealjs -f markdown %s %s" % (extracmd, filename) # Define template for Pelican metadata["template"] = "revealmd" p = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE) try: stdout, stderr = p.communicate(str.encode("utf8")) except OSError: raise RuntimeError( 'Pandoc died with exitcode "%s" during conversion.' % p.returncode) revealjs_content = stdout.decode("utf8") # Patch revealjs_content to convert 'back' "{" and "}" returntext = revealjs_content.replace("%7B", "{").replace("%7D", "}") return returntext, metadata
def _create_pdf(self, obj, output_path): filename = obj.slug + ".pdf" output_pdf = os.path.join(output_path, filename) mdreader = MarkdownReader(self.settings) _, ext = os.path.splitext(obj.source_path) if ext == ".rst": with open(obj.source_path, encoding="utf-8") as f: text = f.read() header = "" elif ext[1:] in mdreader.file_extensions and mdreader.enabled: text, meta = mdreader.read(obj.source_path) header = "" if "title" in meta: title = meta["title"] header = title + "\n" + "#" * len(title) + "\n\n" del meta["title"] for k in list(meta): # We can't support all fields, so we strip the ones that won't # look good if k not in self.supported_md_fields: del meta[k] header += "\n".join([":{}: {}".format(k, meta[k]) for k in meta]) header += "\n\n.. raw:: html\n\n\t" text = text.replace("\n", "\n\t") # rst2pdf casts the text to str and will break if it finds # non-escaped characters. Here we nicely escape them to XML/HTML # entities before proceeding text = text.encode("ascii", "xmlcharrefreplace").decode() else: # We don't support this format logger.warn("Ignoring unsupported file " + obj.source_path) return # Find intra-site links and replace placeholder with actual path / url hrefs = self._get_intrasite_link_regex() text = hrefs.sub(lambda m: obj._link_replacer(obj.get_siteurl(), m), text) logger.info(" [ok] writing %s" % output_pdf) self.pdfcreator.createPdf(text=(header + text), output=output_pdf)
def read(self, filepath): metadata = {} # Files filedir = os.path.dirname(filepath) filename = os.path.basename(filepath) metadata_filename = filename.split('.')[0] + '.ipynb-meta' metadata_filepath = os.path.join(filedir, metadata_filename) # Load metadata if os.path.exists(metadata_filepath): # Metadata is on a external file, process using Pelican MD Reader md_reader = MarkdownReader(self.settings) _content, metadata = md_reader.read(metadata_filepath) else: # Load metadata from ipython notebook file ipynb_file = open(filepath) metadata = json.load(ipynb_file)['metadata'] # Fix metadata to pelican standards for key, value in metadata.items(): del metadata[key] key = key.lower() metadata[key] = self.process_metadata(key, value) metadata['ipython'] = True # Convert ipython notebook to html config = Config({'CSSHTMLHeaderTransformer': {'enabled': True, 'highlight_class': '.highlight-ipynb'}}) exporter = HTMLExporter( config=config, template_file='plugins/ipynb/templates/dsbytes_full', filters={'highlight2html': custom_highlighter} ) content, info = exporter.from_filename(filepath) content = '<html><body>' + content + '</body></html>' # Process using Pelican HTMLReader parser = MyHTMLParser(self.settings, filename) parser.feed(content) parser.close() body = parser.body summary = parser.summary metadata['summary'] = summary return body, metadata
def add_static_comments(gen, metadata): if gen.settings['PELICAN_COMMENT_SYSTEM'] != True: return metadata['comments_count'] = 0 metadata['comments'] = [] if not 'slug' in metadata: logger.warning("pelican_comment_system: cant't locate comments files without slug tag in the article") return reader = MarkdownReader(gen.settings) comments = [] replies = [] folder = os.path.join(gen.settings['PELICAN_COMMENT_SYSTEM_DIR'], metadata['slug']) if not os.path.isdir(folder): logger.debug("No comments found for: " + metadata['slug']) return for file in os.listdir(folder): name, extension = os.path.splitext(file) if extension[1:].lower() in reader.file_extensions: content, meta = reader.read(folder + "/" + file) meta['locale_date'] = strftime(meta['date'], gen.settings['DEFAULT_DATE_FORMAT']) com = Comment(name, meta, content) if 'replyto' in meta: replies.append( com ) else: comments.append( com ) #TODO: Fix this O(n²) loop for reply in replies: for comment in chain(comments, replies): if comment.id == reply.metadata['replyto']: comment.addReply(reply) count = 0 for comment in comments: comment.sortReplies() count += comment.countReplies() comments = sorted(comments) metadata['comments_count'] = len(comments) + count metadata['comments'] = comments
def _create_pdf(self, obj, output_path): filename = obj.slug + '.pdf' output_pdf = os.path.join(output_path, filename) mdreader = MarkdownReader(self.settings) _, ext = os.path.splitext(obj.source_path) if ext == '.rst': with open(obj.source_path, encoding='utf-8') as f: text = f.read() header = '' elif ext[1:] in mdreader.file_extensions and mdreader.enabled: text, meta = mdreader.read(obj.source_path) header = '' if 'title' in meta: title = meta['title'] header = title + '\n' + '#' * len(title) + '\n\n' del meta['title'] for k in meta.keys(): # We can't support all fields, so we strip the ones that won't # look good if k not in self.supported_md_fields: del meta[k] header += '\n'.join([':%s: %s' % (k, meta[k]) for k in meta]) header += '\n\n.. raw:: html\n\n\t' text = text.replace('\n', '\n\t') # rst2pdf casts the text to str and will break if it finds # non-escaped characters. Here we nicely escape them to XML/HTML # entities before proceeding text = text.encode('ascii', 'xmlcharrefreplace') else: # We don't support this format logger.warn('Ignoring unsupported file ' + obj.source_path) return logger.info(' [ok] writing %s' % output_pdf) self.pdfcreator.createPdf(text=(header+text), output=output_pdf)
class TestToCGeneration(unittest.TestCase): def setUp(self): toc.init_default_config(None) self.settings = get_settings() self.md_reader = MarkdownReader(self.settings) def _handle_article_generation(self, path): content, metadata = self.md_reader.read(path) return Article(content=content, metadata=metadata) def _generate_toc(self, article_path, expected_path): result = self._handle_article_generation(article_path) toc.generate_toc(result) expected = "" with open(expected_path, 'r') as f: expected = f.read() return result, expected def test_toc_generation(self): result, expected = self._generate_toc( "test_data/article_with_headers.md", "test_data/article_with_headers_toc.html" ) self.assertEqual(result.toc, expected) def test_toc_generation_nonascii(self): result, expected = self._generate_toc( "test_data/article_with_headers_nonascii.md", "test_data/article_with_headers_toc_nonascii.html" ) self.assertEqual(result.toc, expected) def test_toc_generation_exclude_small_headers(self): self.settings['TOC']['EXCLUDE_SMALL_HEADERS'] = True result, expected = self._generate_toc( "test_data/article_with_headers_exclude_small_headers.md", "test_data/article_with_headers_toc_exclude_small_headers.html" ) self.assertEqual(result.toc, expected) def test_no_toc_generation(self): article_without_headers_path = "test_data/article_without_headers.md" article_without_headers = self._handle_article_generation( article_without_headers_path) toc.generate_toc(article_without_headers) with self.assertRaises(AttributeError): self.assertIsNone(article_without_headers.toc)
def read(self, filepath): metadata = {} # Files filedir = os.path.dirname(filepath) filename = os.path.basename(filepath) metadata_filename = filename.split('.')[0] + '.ipynb-meta' metadata_filepath = os.path.join(filedir, metadata_filename) # Load metadata if os.path.exists(metadata_filepath): # Metadata is on a external file, process using Pelican MD Reader md_reader = MarkdownReader(self.settings) _content, metadata = md_reader.read(metadata_filepath) else: # Load metadata from ipython notebook file ipynb_file = open(filepath) metadata = json.load(ipynb_file)['metadata'] # Fix metadata to pelican standards for key, value in metadata.items(): del metadata[key] key = key.lower() metadata[key] = self.process_metadata(key, value) metadata['ipython'] = True # Convert ipython notebook to html config = Config({ 'CSSHTMLHeaderTransformer': { 'enabled': True, 'highlight_class': '.highlight-ipynb' } }) exporter = HTMLExporter(config=config, template_file='basic', filters={'highlight2html': custom_highlighter}) content, info = exporter.from_filename(filepath) if BeautifulSoup: soup = BeautifulSoup(content) for i in soup.findAll("div", {"class": "input"}): if i.findChildren()[1].find(text='#ignore') is not None: i.extract() else: soup = content # Process using Pelican HTMLReader content = '<body>{0}</body>'.format( soup) # So Pelican HTMLReader works parser = MyHTMLParser(self.settings, filename) parser.feed(content) parser.close() body = parser.body if ('IPYNB_USE_META_SUMMARY' in self.settings.keys() and \ self.settings['IPYNB_USE_META_SUMMARY'] == False) or \ 'IPYNB_USE_META_SUMMARY' not in self.settings.keys(): metadata['summary'] = parser.summary def filter_css(style_text): ''' HACK: IPython returns a lot of CSS including its own bootstrap. Get only the IPython Notebook CSS styles. ''' index = style_text.find('/*!\n*\n* IPython notebook\n*\n*/') if index > 0: style_text = style_text[index:] index = style_text.find('/*!\n*\n* IPython notebook webapp\n*\n*/') if index > 0: style_text = style_text[:index] style_text = re.sub(r'color\:\#0+(;)?', '', style_text) style_text = re.sub( r'\.rendered_html[a-z0-9,._ ]*\{[a-z0-9:;%.#\-\s\n]+\}', '', style_text) return '<style type=\"text/css\">{0}</style>'.format(style_text) ipython_css = '\n'.join( filter_css(css_style) for css_style in info['inlining']['css']) body = ipython_css + body + LATEX_CUSTOM_SCRIPT return body, metadata
'css_class': 'highlight' }, 'markdown.extensions.extra': {}, 'markdown.extensions.meta': {}, }, 'output_format': 'html5', } THEME = "./Flex" # does nothing? # THEME_COLOR = 'light' # defines order of page titles in the header PAGE_ORDER_BY = 'page-order' # prevent Pelican from reading files matching the following patterns IGNORE_FILES = ['.#*', 'includes', 'templates', 'README.md'] # place files replacing theme templates in ./content/templates THEME_TEMPLATES_OVERRIDES = ['./content/templates'] DIRECT_TEMPLATES = (('index', )) # copied to /output without modification STATIC_PATHS = ['images', 'css'] # render markdown contents from files in /content/includes and make # accesible from INCLUDES variable in html templates INCLUDES = {} for fname in glob.glob('./content/includes/*.md'): pth = Path(fname) INCLUDES[pth.stem], _ = MarkdownReader(config).read(fname)
def setUp(self): toc.init_default_config(None) self.settings = get_settings() self.md_reader = MarkdownReader(self.settings)
def read(self, filepath): metadata = {} metadata["jupyter_notebook"] = True start = 0 end = None # Files filedir = os.path.dirname(filepath) filename = os.path.basename(filepath) metadata_filename = os.path.splitext(filename)[0] + ".nbdata" metadata_filepath = os.path.join(filedir, metadata_filename) if os.path.exists(metadata_filepath): # Found and .nbdata file # Process it using Pelican MD Reader md_reader = MarkdownReader(self.settings) _content, metadata = md_reader.read(metadata_filepath) elif self.settings.get("IPYNB_MARKUP_USE_FIRST_CELL"): # No external .md file: # Load metadata from the first cell of the notebook file with open(filepath) as ipynb_file: nb_json = json.load(ipynb_file) metacell = "\n".join(nb_json["cells"][0]["source"]) # Convert Markdown title and listings to standard metadata items metacell = re.sub(r"^#+\s+", "title: ", metacell, flags=re.MULTILINE) metacell = re.sub(r"^\s*[*+-]\s+", "", metacell, flags=re.MULTILINE) # Unfortunately we can not pass MarkdownReader an in-memory # string, so we have to work with a temporary file with tempfile.NamedTemporaryFile( "w+", encoding="utf-8") as metadata_file: md_reader = MarkdownReader(self.settings) metadata_file.write(metacell) metadata_file.flush() _content, metadata = md_reader.read(metadata_file.name) # Skip metacell start = 1 else: raise Exception( "Error processing f{filepath}: " "Could not find metadata in: .nbdata file or in the first cell of the notebook." "If this notebook is used with liquid tags then you can safely ignore this error." ) if "subcells" in metadata: start, end = ast.literal_eval(metadata["subcells"]) preprocessors = self.settings.get("IPYNB_PREPROCESSORS", []) template = self.settings.get("IPYNB_EXPORT_TEMPLATE", None) content, info = get_html_from_filepath( filepath, start=start, end=end, preprocessors=preprocessors, template=template, colorscheme=self.settings.get("IPYNB_COLORSCHEME"), ) # Generate summary: Do it before cleaning CSS keys = [k.lower() for k in metadata.keys()] use_meta_summary = self.settings.get("IPYNB_GENERATE_SUMMARY", True) if "summary" not in keys and use_meta_summary: parser = MyHTMLParser(self.settings, filename) content = "<body>{0}</body>".format(content) parser.feed(content) parser.close() # content = parser.body metadata["summary"] = parser.summary # Write/fix content fix_css = self.settings.get("IPYNB_FIX_CSS", True) ignore_css = self.settings.get("IPYNB_SKIP_CSS", False) content = parse_css(content, info, fix_css=fix_css, ignore_css=ignore_css) if self.settings.get("IPYNB_NB_SAVE_AS"): output_path = self.settings.get("OUTPUT_PATH") nb_output_fullpath = self.settings.get("IPYNB_NB_SAVE_AS").format( **metadata) nb_output_dir = os.path.join(output_path, os.path.dirname(nb_output_fullpath)) if not os.path.isdir(nb_output_dir): os.makedirs(nb_output_dir, exist_ok=True) copyfile(filepath, os.path.join(output_path, nb_output_fullpath)) metadata["nb_path"] = nb_output_fullpath return content, metadata
def read(self, filepath): metadata = {} metadata['jupyter_notebook'] = True start = 0 end = None # Files filedir = os.path.dirname(filepath) filename = os.path.basename(filepath) metadata_filename = os.path.splitext(filename)[0] + '.nbdata' metadata_filepath = os.path.join(filedir, metadata_filename) if os.path.exists(metadata_filepath): # When metadata is in an external file, process the MD file using Pelican MD Reader md_reader = MarkdownReader(self.settings) _content, metadata = md_reader.read(metadata_filepath) else: # No external .md file: Load metadata from ipython notebook file with open(filepath) as ipynb_file: doc = json.load(ipynb_file) if self.settings.get('IPYNB_USE_METACELL'): # Option 2: Use metadata on the first notebook cell metacell = "\n".join(doc['cells'][0]['source']) # Convert Markdown title and listings to standard metadata items metacell = re.sub(r'^#+\s+', 'title: ', metacell, flags=re.MULTILINE) metacell = re.sub(r'^\s*[*+-]\s+', '', metacell, flags=re.MULTILINE) # Unfortunately we can not pass MarkdownReader an in-memory # string, so we have to work with a temporary file with tempfile.NamedTemporaryFile('w+', encoding='utf-8') as metadata_file: md_reader = MarkdownReader(self.settings) metadata_file.write(metacell) metadata_file.flush() _content, metadata = md_reader.read(metadata_file.name) # Skip metacell start = 1 else: # Option 3: Read metadata from inside the notebook notebook_metadata = doc['metadata'] # Change to standard pelican metadata for key, value in notebook_metadata.items(): key = key.lower() if key in ("title", "date", "category", "tags", "slug", "author"): metadata[key] = self.process_metadata(key, value) keys = [k.lower() for k in metadata.keys()] if not set(['title', 'date']).issubset(set(keys)): # Probably using ipynb.liquid mode md_filename = filename.split('.')[0] + '.md' md_filepath = os.path.join(filedir, md_filename) if not os.path.exists(md_filepath): raise Exception("Could not find metadata in `.nbdata` file or inside `.ipynb`") else: raise Exception("Could not find metadata in `.nbdata` file or inside `.ipynb` but found `.md` file, " "assuming that this notebook is for liquid tag usage if true ignore this error") if 'subcells' in metadata: start, end = ast.literal_eval(metadata['subcells']) preprocessors = self.settings.get('IPYNB_PREPROCESSORS', []) template = self.settings.get('IPYNB_EXPORT_TEMPLATE', None) content, info = get_html_from_filepath(filepath, start=start, end=end, preprocessors=preprocessors, template=template, ) # Generate summary: Do it before cleaning CSS use_meta_summary = self.settings.get('IPYNB_GENERATE_SUMMARY', True) if 'summary' not in keys and use_meta_summary: parser = MyHTMLParser(self.settings, filename) if isinstance(content, six.binary_type): # unicode_literals makes format() try to decode as ASCII. Enforce decoding as UTF-8. content = '<body>{0}</body>'.format(content.decode("utf-8")) else: # Content already decoded content = '<body>{0}</body>'.format(content) parser.feed(content) parser.close() # content = parser.body metadata['summary'] = parser.summary # Write/fix content fix_css = self.settings.get('IPYNB_FIX_CSS', True) ignore_css = self.settings.get('IPYNB_SKIP_CSS', False) content = parse_css(content, info, fix_css=fix_css, ignore_css=ignore_css) if self.settings.get('IPYNB_NB_SAVE_AS'): output_path = self.settings.get('OUTPUT_PATH') nb_output_fullpath = self.settings.get('IPYNB_NB_SAVE_AS').format(**metadata) nb_output_dir = os.path.join(output_path, os.path.dirname(nb_output_fullpath)) if not os.path.isdir(nb_output_dir): os.makedirs(nb_output_dir, exist_ok=True) copyfile(filepath, os.path.join(output_path, nb_output_fullpath)) metadata['nb_path'] = nb_output_fullpath return content, metadata
import logging logger = logging.getLogger(__name__) import os import codecs from pelican.readers import MarkdownReader from pelican.settings import DEFAULT_CONFIG ARTICLE_DIR = 'content/articles' # Iterate over all files in ARTICLE_DIR (recursively with walk) # and try parsing them with pelicans' MarkdownReader md_reader = MarkdownReader(DEFAULT_CONFIG) for dirpath, subdirs, files in os.walk(ARTICLE_DIR): for file in files: logger.warn('[MD] Trying to parse {}...'.format(file)) path = os.path.join(dirpath, file) md_reader.read(path)
def setUpClass(cls): toc.init_default_config(None) cls.settings = get_settings() cls.md_reader = MarkdownReader(cls.settings)
def read(self, filepath): metadata = {} metadata['jupyter_notebook'] = True start = 0 end = None # Files filedir = os.path.dirname(filepath) filename = os.path.basename(filepath) metadata_filename = os.path.splitext(filename)[0] + '.nbdata' metadata_filepath = os.path.join(filedir, metadata_filename) if os.path.exists(metadata_filepath): # When metadata is in an external file, process the MD file using Pelican MD Reader md_reader = MarkdownReader(self.settings) _content, metadata = md_reader.read(metadata_filepath) else: # No external .md file: Load metadata from ipython notebook file with open(filepath) as ipynb_file: doc = json.load(ipynb_file) if self.settings.get('IPYNB_USE_METACELL'): # Option 2: Use metadata on the first notebook cell metacell = "\n".join(doc['cells'][0]['source']) # Convert Markdown title and listings to standard metadata items metacell = re.sub(r'^#+\s+', 'title: ', metacell, flags=re.MULTILINE) metacell = re.sub(r'^\s*[*+-]\s+', '', metacell, flags=re.MULTILINE) # Unfortunately we can not pass MarkdownReader an in-memory # string, so we have to work with a temporary file with tempfile.NamedTemporaryFile( 'w+', encoding='utf-8') as metadata_file: md_reader = MarkdownReader(self.settings) metadata_file.write(metacell) metadata_file.flush() _content, metadata = md_reader.read(metadata_file.name) # Skip metacell start = 1 else: # Option 3: Read metadata from inside the notebook notebook_metadata = doc['metadata'] # Change to standard pelican metadata for key, value in notebook_metadata.items(): key = key.lower() if key in ("title", "date", "category", "tags", "slug", "author"): metadata[key] = self.process_metadata(key, value) keys = [k.lower() for k in metadata.keys()] if not set(['title', 'date']).issubset(set(keys)): # Probably using ipynb.liquid mode md_filename = filename.split('.')[0] + '.md' md_filepath = os.path.join(filedir, md_filename) if not os.path.exists(md_filepath): raise Exception( "Could not find metadata in `.nbdata` file or inside `.ipynb`" ) else: raise Exception( "Could not find metadata in `.nbdata` file or inside `.ipynb` but found `.md` file, " "assuming that this notebook is for liquid tag usage if true ignore this error" ) if 'subcells' in metadata: start, end = ast.literal_eval(metadata['subcells']) preprocessors = self.settings.get('IPYNB_PREPROCESSORS', []) template = self.settings.get('IPYNB_EXPORT_TEMPLATE', None) content, info = get_html_from_filepath( filepath, start=start, end=end, preprocessors=preprocessors, template=template, colorscheme=self.settings.get('IPYNB_COLORSCHEME'), ) # Generate summary: Do it before cleaning CSS use_meta_summary = self.settings.get('IPYNB_GENERATE_SUMMARY', True) if 'summary' not in keys and use_meta_summary: parser = MyHTMLParser(self.settings, filename) if isinstance(content, six.binary_type): # unicode_literals makes format() try to decode as ASCII. Enforce decoding as UTF-8. content = '<body>{0}</body>'.format(content.decode("utf-8")) else: # Content already decoded content = '<body>{0}</body>'.format(content) parser.feed(content) parser.close() # content = parser.body metadata['summary'] = parser.summary # Write/fix content fix_css = self.settings.get('IPYNB_FIX_CSS', True) ignore_css = self.settings.get('IPYNB_SKIP_CSS', False) content = parse_css(content, info, fix_css=fix_css, ignore_css=ignore_css) if self.settings.get('IPYNB_NB_SAVE_AS'): output_path = self.settings.get('OUTPUT_PATH') nb_output_fullpath = self.settings.get('IPYNB_NB_SAVE_AS').format( **metadata) nb_output_dir = os.path.join(output_path, os.path.dirname(nb_output_fullpath)) if not os.path.isdir(nb_output_dir): os.makedirs(nb_output_dir, exist_ok=True) copyfile(filepath, os.path.join(output_path, nb_output_fullpath)) metadata['nb_path'] = nb_output_fullpath return content, metadata
def read(self, filepath): metadata = {} # Files filedir = os.path.dirname(filepath) filename = os.path.basename(filepath) metadata_filename = filename.split('.')[0] + '.ipynb-meta' metadata_filepath = os.path.join(filedir, metadata_filename) # Load metadata if os.path.exists(metadata_filepath): # Metadata is on a external file, process using Pelican MD Reader md_reader = MarkdownReader(self.settings) _content, metadata = md_reader.read(metadata_filepath) else: # Load metadata from ipython notebook file ipynb_file = open(filepath) metadata = json.load(ipynb_file)['metadata'] # Fix metadata to pelican standars for key, value in metadata.items(): del metadata[key] key = key.lower() metadata[key] = self.process_metadata(key, value) metadata['ipython'] = True # Convert ipython notebook to html config = Config({'CSSHTMLHeaderTransformer': {'enabled': True, 'highlight_class': '.highlight-ipynb'}}) exporter = HTMLExporter(config=config, template_file='basic', filters={'highlight2html': custom_highlighter}) content, info = exporter.from_filename(filepath) soup = BeautifulSoup(content) # find all the inputs with hide and strip them out inputs = 0 for i in soup.findAll("div", {"class" : "input"}): if i.findChildren()[1].findChild().findChild().findChild().find(text='#HIDE') != None: i.extract() else: inputs = inputs+1 tag = soup.new_tag('a', href="javascript:toggle('input%s');" % inputs , target='_self') tag.string = i.findChildren()[0].text.strip() i.findChildren()[0].clear() i.findChildren()[0].append(tag) try: i.find("div", {"class" :"input_area"})['id'] = 'input%s' %inputs except: print i # Process using Pelican HTMLReader content = '<body>{0}</body>'.format(unicode(soup)) # So Pelican HTMLReader works parser = MyHTMLParser(self.settings, filename) parser.feed(content) parser.close() body = parser.body summary = parser.summary metadata['summary'] = summary # Remove some CSS styles, so it doesn't break the themes. def filter_tags(style_text): style_list = style_text.split('\n') exclude = ['body','p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'a', 'ul', 'ol', 'li', '.rendered_html', '@media', '.navbar', 'nav.navbar', '.navbar-text', 'code', 'pre', 'div.text_cell_render','body'] style_list = [i for i in style_list if len(list(filter(i.startswith, exclude))) == 0] ans = '\n'.join(style_list) return '<style type=\"text/css\">{0}</style>'.format(ans) css = '\n'.join(filter_tags(css) for css in info['inlining']['css']) css = CUSTOM_CSS body = css + body body = body + js return body, metadata
def read(self, filepath): metadata = {} metadata['ipython'] = True start = 0 end = None # Files filedir = os.path.dirname(filepath) filename = os.path.basename(filepath) metadata_filename = os.path.splitext(filename)[0] + '.ipynb-meta' metadata_filepath = os.path.join(filedir, metadata_filename) # When metadata is in a external file, process using Pelican MD Reader md_reader = MarkdownReader(self.settings) if os.path.exists(metadata_filepath): _content, metadata = md_reader.read(metadata_filepath) else: # Load metadata from ipython notebook file with open(filepath) as ipynb_file: doc = json.load(ipynb_file) if self.settings.get('IPYNB_USE_METACELL'): metacell = "\n".join(doc['cells'][0]['source']) # Convert Markdown title and listings to standard metadata items metacell = re.sub(r'^#+\s+', 'title: ', metacell, flags=re.MULTILINE) metacell = re.sub(r'^\s*[*+-]\s+', '', metacell, flags=re.MULTILINE) # Unfortunately we can not pass MarkdownReader an in-memory # string, so we have to work with a temporary file with tempfile.NamedTemporaryFile( 'w+', encoding='utf-8') as metadata_file: metadata_file.write(metacell) metadata_file.flush() _content, metadata = md_reader.read(metadata_file.name) # Skip metacell start = 1 else: notebook_metadata = doc['metadata'] # Change to standard pelican metadata for key, value in notebook_metadata.items(): key = key.lower() if key in ("title", "date", "category", "tags", "slug", "author"): metadata[key] = self.process_metadata(key, value) keys = [k.lower() for k in metadata.keys()] if not set(['title', 'date']).issubset(set(keys)): # Probably using ipynb.liquid mode md_filename = filename.split('.')[0] + '.md' md_filepath = os.path.join(filedir, md_filename) if not os.path.exists(md_filepath): raise Exception( "Could not find metadata in `.ipynb-meta`, inside `.ipynb` or external `.md` file." ) else: raise Exception( "Could not find metadata in `.ipynb-meta` or inside `.ipynb` but found `.md` file, " "assuming that this notebook is for liquid tag usage if true ignore this error" ) if 'subcells' in metadata: start, end = ast.literal_eval(metadata['subcells']) content, info = get_html_from_filepath( filepath, preprocessors=self.settings.get('IPYNB_PREPROCESSORS', []), start=start, end=end, template=self.settings.get('IPYNB_EXPORT_TEMPLATE')) # Generate Summary: Do it before cleaning CSS if 'summary' not in keys: parser = MyHTMLParser(self.settings, filename) if isinstance( content, six.binary_type ): # PY2 (str) or PY3 (bytes) to PY2 (unicode) or PY3 (str) # unicode_literals makes format() try to decode as ASCII. Enforce decoding as UTF-8. content = '<body>{0}</body>'.format(content.decode("utf-8")) else: # Content already decoded content = '<body>{0}</body>'.format(content) parser.feed(content) parser.close() content = parser.body if ('IPYNB_USE_META_SUMMARY' in self.settings.keys() and \ self.settings['IPYNB_USE_META_SUMMARY'] == False) or \ 'IPYNB_USE_META_SUMMARY' not in self.settings.keys(): metadata['summary'] = parser.summary ignore_css = True if 'IPYNB_IGNORE_CSS' in self.settings.keys( ) else False content = fix_css(content, info, ignore_css=ignore_css) return content, metadata
def read(self, filepath): metadata = {} # Files filedir = os.path.dirname(filepath) filename = os.path.basename(filepath) metadata_filename = filename.split(".")[0] + ".ipynb-meta" metadata_filepath = os.path.join(filedir, metadata_filename) # Load metadata if os.path.exists(metadata_filepath): # Metadata is on a external file, process using Pelican MD Reader md_reader = MarkdownReader(self.settings) _content, metadata = md_reader.read(metadata_filepath) else: # Load metadata from ipython notebook file ipynb_file = open(filepath) metadata = json.load(ipynb_file)["metadata"] # Fix metadata to pelican standards for key, value in metadata.items(): del metadata[key] key = key.lower() metadata[key] = self.process_metadata(key, value) metadata["ipython"] = True # Convert ipython notebook to html config = Config({"CSSHTMLHeaderTransformer": {"enabled": True, "highlight_class": ".highlight-ipynb"}}) exporter = HTMLExporter(config=config, template_file="basic", filters={"highlight2html": custom_highlighter}) content, info = exporter.from_filename(filepath) # Process using Pelican HTMLReader content = "<body>{0}</body>".format(content) # So Pelican HTMLReader works parser = MyHTMLParser(self.settings, filename) parser.feed(content) parser.close() body = parser.body summary = parser.summary metadata["summary"] = summary # Remove some CSS styles, so it doesn't break the themes. def filter_tags(style_text): style_list = style_text.split("\n") exclude = [ "p", "h1", "h2", "h3", "h4", "h5", "h6", "a", "ul", "ol", "li", ".rendered_html", "@media", ".navbar", "nav.navbar", ".navbar-text", "code", "pre", "div.text_cell_render", ] style_list = [i for i in style_list if len(list(filter(i.startswith, exclude))) == 0] ans = "\n".join(style_list) return '<style type="text/css">{0}</style>'.format(ans) css = "\n".join(filter_tags(css) for css in info["inlining"]["css"]) css = css + CUSTOM_CSS body = css + body return body, metadata
def read(self, filepath): metadata = {} # Files filedir = os.path.dirname(filepath) filename = os.path.basename(filepath) metadata_filename = filename.split('.')[0] + '.ipynb-meta' metadata_filepath = os.path.join(filedir, metadata_filename) # Load metadata if os.path.exists(metadata_filepath): # Metadata is on a external file, process using Pelican MD Reader md_reader = MarkdownReader(self.settings) _content, metadata = md_reader.read(metadata_filepath) else: # Load metadata from ipython notebook file ipynb_file = open(filepath) metadata = json.load(ipynb_file)['metadata'] # Fix metadata to pelican standards for key, value in metadata.items(): del metadata[key] key = key.lower() metadata[key] = self.process_metadata(key, value) metadata['ipython'] = True # Convert ipython notebook to html config = Config({'CSSHTMLHeaderTransformer': {'enabled': True, 'highlight_class': '.highlight-ipynb'}}) exporter = HTMLExporter(config=config, template_file='basic', filters={'highlight2html': custom_highlighter}) content, info = exporter.from_filename(filepath) if BeautifulSoup: soup = BeautifulSoup(content) for i in soup.findAll("div", {"class" : "input"}): if i.findChildren()[1].find(text='#ignore') is not None: i.extract() else: soup = content # Process using Pelican HTMLReader content = '<body>{0}</body>'.format(soup) # So Pelican HTMLReader works parser = MyHTMLParser(self.settings, filename) parser.feed(content) parser.close() body = parser.body summary = parser.summary metadata['summary'] = summary def filter_css(style_text): ''' HACK: IPython returns a lot of CSS including its own bootstrap. Get only the IPython Notebook CSS styles. ''' index = style_text.find('/*!\n*\n* IPython notebook\n*\n*/') if index > 0: style_text = style_text[index:] index = style_text.find('/*!\n*\n* IPython notebook webapp\n*\n*/') if index > 0: style_text = style_text[:index] style_text = re.sub(r'color\:\#0+(;)?', '', style_text) style_text = re.sub(r'\.rendered_html[a-z0-9 ]*\{[a-z0-9:;%.#\-\s\n]+\}', '', style_text) return '<style type=\"text/css\">{0}</style>'.format(style_text) ipython_css = '\n'.join(filter_css(css_style) for css_style in info['inlining']['css']) body = ipython_css + body + LATEX_CUSTOM_SCRIPT return body, metadata
def read(self, filepath): metadata = {} # Files filedir = os.path.dirname(filepath) filename = os.path.basename(filepath) metadata_filename = filename.split('.')[0] + '.ipynb-meta' metadata_filepath = os.path.join(filedir, metadata_filename) # If filename starts with draft, set the status accordingly if filename.lower().startswith('draft'): metadata['status'] = 'draft' with open(filepath) as f: nb = nbformat.read(f, 'ipynb') # readin ipynb content first_cell = nb.worksheets[0].cells[0] # Read in metadata metadata = join_metadata(metadata, nb.metadata) if 'pelican' in first_cell.metadata: m = first_cell.metadata['pelican'] metadata = join_metadata(metadata, m) if os.path.exists(metadata_filepath): # Metadata is on a external file, process using Pelican MD Reader md_reader = MarkdownReader(self.settings) _content, m = md_reader.read(metadata_filepath) metadata = join_metadata(metadata, m) # Reformat metadata into pelican acceptable format for k, v in metadata.items(): del metadata[k] k = k.lower() metadata[k] = self.process_metadata(k, v) metadata['ipython'] = True # use first cell as the title if flag is set field = 'IPYNB_FIRST_CELL_HEADING_AS_TITLE' if self.settings.get(field, False) and first_cell.cell_type == 'heading': metadata['title'] = first_cell.source # Truncate the first cell from notebook nb.worksheets[0].cells = nb.worksheets[0].cells[1:] # Convert ipython notebook to html config = Config({'CSSHTMLHeaderPreprocessor': {'enabled': True, 'highlight_class': '.highlight-ipynb'}}) exporter = HTMLExporter(config=config, template_file='basic', filters={'highlight2html': custom_highlighter}) content, info = exporter.from_notebook_node(nb) if BeautifulSoup: soup = BeautifulSoup(content) for i in soup.findAll("div", {"class" : "input"}): if i.findChildren()[1].find(text='#ignore') is not None: i.extract() else: soup = content content = '<body>{0}</body>'.format(soup) # So Pelican HTMLReader works parser = MyHTMLParser(self.settings, filename) parser.feed(content) parser.close() body = parser.body summary = parser.summary field = 'IPYNB_FIRST_CONTENT_AS_SUMMARY' first_cell = nb.worksheets[0].cells[0] if self.settings.get(field, False) and first_cell.cell_type == 'markdown': raw = nb.worksheets[0].cells[0].source md = markdown.Markdown() metadata['summary'] = md.convert(raw) else: metadata['summary'] = summary # Remove some CSS styles, so it doesn't break the theme. def filter_tags(style_text): style_list = style_text.split('\n') exclude = ['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'a', 'ul', 'ol', 'li', '.rendered_html', '@media', '.navbar', 'nav.navbar', '.navbar-text', 'code', 'pre', 'div.text_cell_render'] style_list = [i for i in style_list if len(list(filter(i.startswith, exclude))) == 0] ans = '\n'.join(style_list) return '<style type=\"text/css\">{0}</style>'.format(ans) css = '\n'.join(filter_tags(css) for css in info['inlining']['css']) css = css + CUSTOM_CSS body = css + body return body, metadata