def sanitize_comments_html(html): from calibre.ebooks.markdown import Markdown text = html2text(html) md = Markdown(safe_mode="remove") cleansed = re.sub("\n+", "", md.convert(text)) return cleansed
def sanitize_comments_html(html): from calibre.ebooks.markdown import Markdown import bleach text = html2text(html) md = Markdown() html = md.convert(text) cleansed = re.sub(u'\n+', u'', bleach.clean(html)) return cleansed
def convert_markdown(txt, title='', extensions=('footnotes', 'tables', 'toc')): from calibre.ebooks.conversion.plugins.txt_input import MD_EXTENSIONS from calibre.ebooks.markdown import Markdown extensions = [x.lower() for x in extensions if x.lower() in MD_EXTENSIONS] md = Markdown( extensions, safe_mode=False) return HTML_TEMPLATE % (title, md.convert(txt))
def convert_markdown(txt, title='', extensions=DEFAULT_MD_EXTENSIONS): from calibre.ebooks.conversion.plugins.txt_input import MD_EXTENSIONS from calibre.ebooks.markdown import Markdown extensions = [ 'calibre.ebooks.markdown.extensions.' + x.lower() for x in extensions if x.lower() in MD_EXTENSIONS ] md = Markdown(extensions=extensions) return HTML_TEMPLATE % (title, md.convert(txt))
def test_markdown(): from calibre.ebooks.markdown import Markdown Markdown(extensions=['extra']) from calibre.library.comments import sanitize_html sanitize_html( b'''<script>moo</script>xxx<img src="http://moo.com/x.jpg">''') fprint('Markdown OK!')
def markdown(val): try: md = markdown.Markdown except AttributeError: from calibre.ebooks.markdown import Markdown md = markdown.Markdown = Markdown() return md.convert(val)
def get_images_from_polyglot_text(txt: str, base_dir: str = '', file_ext: str = 'txt') -> set: from calibre.ebooks.oeb.base import OEB_IMAGES from calibre import guess_type if not base_dir: base_dir = os.getcwd() images = set() def check_path(path: str) -> None: if path and not os.path.isabs(path) and guess_type( path)[0] in OEB_IMAGES and os.path.exists( os.path.join(base_dir, path)): images.add(path) if file_ext in ('txt', 'text', 'textile'): # Textile for m in re.finditer( r'(?mu)(?:[\[{])?\!(?:\. )?(?P<path>[^\s(!]+)\s?(?:\(([^\)]+)\))?\!(?::(\S+))?(?:[\]}]|(?=\s|$))', txt): path = m.group('path') check_path(path) if file_ext in ('txt', 'text', 'md', 'markdown'): # Markdown from markdown import Markdown html = HTML_TEMPLATE % ('', Markdown().convert(txt)) from html5_parser import parse root = parse(html) for img in root.iterdescendants('img'): path = img.get('src') check_path(path) return images
def convert_markdown_with_metadata(txt, title='', extensions=DEFAULT_MD_EXTENSIONS): from calibre.ebooks.conversion.plugins.txt_input import MD_EXTENSIONS from calibre.ebooks.markdown import Markdown from calibre.ebooks.metadata.book.base import Metadata from calibre.utils.date import parse_only_date from calibre.db.write import get_series_values extensions = [ 'calibre.ebooks.markdown.extensions.' + x.lower() for x in extensions if x.lower() in MD_EXTENSIONS ] meta_ext = 'calibre.ebooks.markdown.extensions.meta' if meta_ext not in extensions: extensions.append(meta_ext) md = Markdown(extensions=extensions) html = md.convert(txt) mi = Metadata(title or _('Unknown')) m = md.Meta for k, v in {'date': 'pubdate', 'summary': 'comments'}.iteritems(): if v not in m and k in m: m[v] = m.pop(k) for k in 'title authors series tags pubdate comments publisher rating'.split( ): val = m.get(k) if val: mf = mi.metadata_for_field(k) if not mf.get('is_multiple'): val = val[0] if k == 'series': val, si = get_series_values(val) mi.series_index = 1 if si is None else si if k == 'rating': try: val = max(0, min(int(float(val)), 10)) except Exception: continue if mf.get('datatype') == 'datetime': try: val = parse_only_date(val, assume_utc=False) except Exception: continue setattr(mi, k, val) return mi, HTML_TEMPLATE % (mi.title, html)
def build_extension(self, ext_name, configs): if '.' in ext_name or ':' in ext_name: return Markdown.build_extension(self, ext_name, configs) ext_name = 'markdown.extensions.' + ext_name module = importlib.import_module(ext_name) if hasattr(module, 'makeExtension'): return module.makeExtension(**configs) for name, x in vars(module).items(): if type(x) is type and issubclass(x, Extension) and x is not Extension: return x(**configs) raise ImportError('No extension class in {}'.format(ext_name))
def convert_markdown_with_metadata(txt, title='', extensions=DEFAULT_MD_EXTENSIONS): from calibre.ebooks.conversion.plugins.txt_input import MD_EXTENSIONS from calibre.ebooks.markdown import Markdown from calibre.ebooks.metadata.book.base import Metadata from calibre.utils.date import parse_only_date from calibre.db.write import get_series_values extensions = ['calibre.ebooks.markdown.extensions.' + x.lower() for x in extensions if x.lower() in MD_EXTENSIONS] meta_ext = 'calibre.ebooks.markdown.extensions.meta' if meta_ext not in extensions: extensions.append(meta_ext) md = Markdown(extensions=extensions) html = md.convert(txt) mi = Metadata(title or _('Unknown')) m = md.Meta for k, v in {'date':'pubdate', 'summary':'comments'}.iteritems(): if v not in m and k in m: m[v] = m.pop(k) for k in 'title authors series tags pubdate comments publisher rating'.split(): val = m.get(k) if val: mf = mi.metadata_for_field(k) if not mf.get('is_multiple'): val = val[0] if k == 'series': val, si = get_series_values(val) mi.series_index = 1 if si is None else si if k == 'rating': try: val = max(0, min(int(float(val)), 10)) except Exception: continue if mf.get('datatype') == 'datetime': try: val = parse_only_date(val, assume_utc=False) except Exception: continue setattr(mi, k, val) return mi, HTML_TEMPLATE % (mi.title, html)
def sanitize_comments_html(html): from calibre.ebooks.markdown import Markdown text = html2text(html) md = Markdown() html = md.convert(text) return html
def sanitize_comments_html(html): from calibre.ebooks.markdown import Markdown text = html2text(html) md = Markdown(safe_mode='remove') cleansed = re.sub('\n+', '', md.convert(text)) return cleansed
def convert_markdown(txt, title='', extensions=DEFAULT_MD_EXTENSIONS): from calibre.ebooks.conversion.plugins.txt_input import MD_EXTENSIONS from calibre.ebooks.markdown import Markdown extensions = ['calibre.ebooks.markdown.extensions.' + x.lower() for x in extensions if x.lower() in MD_EXTENSIONS] md = Markdown(extensions=extensions) return HTML_TEMPLATE % (title, md.convert(txt))
def sanitize_comments_html(html): text = html2text(html) md = Markdown(safe_mode='remove') cleansed = re.sub('\n+', '', md.convert(text)) return cleansed
def test_markdown(): from calibre.ebooks.markdown import Markdown Markdown(extensions=['extra']) import bleach bleach.clean(u'xxx<script>a</script>') print('Markdown OK!')