Ejemplo n.º 1
0
def sanitize_comments_html(html):
    from calibre.ebooks.markdown import Markdown

    text = html2text(html)
    md = Markdown(safe_mode="remove")
    cleansed = re.sub("\n+", "", md.convert(text))
    return cleansed
Ejemplo n.º 2
0
def sanitize_comments_html(html):
    from calibre.ebooks.markdown import Markdown
    import bleach
    text = html2text(html)
    md = Markdown()
    html = md.convert(text)
    cleansed = re.sub(u'\n+', u'', bleach.clean(html))
    return cleansed
Ejemplo n.º 3
0
def sanitize_comments_html(html):
    from calibre.ebooks.markdown import Markdown
    import bleach
    text = html2text(html)
    md = Markdown()
    html = md.convert(text)
    cleansed = re.sub(u'\n+', u'', bleach.clean(html))
    return cleansed
Ejemplo n.º 4
0
def convert_markdown(txt, title='', extensions=('footnotes', 'tables', 'toc')):
    from calibre.ebooks.conversion.plugins.txt_input import MD_EXTENSIONS
    from calibre.ebooks.markdown import Markdown
    extensions = [x.lower() for x in extensions if x.lower() in MD_EXTENSIONS]
    md = Markdown(
          extensions,
          safe_mode=False)
    return HTML_TEMPLATE % (title, md.convert(txt))
Ejemplo n.º 5
0
def convert_markdown(txt, title='', extensions=DEFAULT_MD_EXTENSIONS):
    from calibre.ebooks.conversion.plugins.txt_input import MD_EXTENSIONS
    from calibre.ebooks.markdown import Markdown
    extensions = [
        'calibre.ebooks.markdown.extensions.' + x.lower() for x in extensions
        if x.lower() in MD_EXTENSIONS
    ]
    md = Markdown(extensions=extensions)
    return HTML_TEMPLATE % (title, md.convert(txt))
Ejemplo n.º 6
0
def test_markdown():
    from calibre.ebooks.markdown import Markdown
    Markdown(extensions=['extra'])
    from calibre.library.comments import sanitize_html
    sanitize_html(
        b'''<script>moo</script>xxx<img src="http://moo.com/x.jpg">''')
    fprint('Markdown OK!')
Ejemplo n.º 7
0
def markdown(val):
    try:
        md = markdown.Markdown
    except AttributeError:
        from calibre.ebooks.markdown import Markdown
        md = markdown.Markdown = Markdown()
    return md.convert(val)
Ejemplo n.º 8
0
def get_images_from_polyglot_text(txt: str,
                                  base_dir: str = '',
                                  file_ext: str = 'txt') -> set:
    from calibre.ebooks.oeb.base import OEB_IMAGES
    from calibre import guess_type
    if not base_dir:
        base_dir = os.getcwd()
    images = set()

    def check_path(path: str) -> None:
        if path and not os.path.isabs(path) and guess_type(
                path)[0] in OEB_IMAGES and os.path.exists(
                    os.path.join(base_dir, path)):
            images.add(path)

    if file_ext in ('txt', 'text', 'textile'):
        # Textile
        for m in re.finditer(
                r'(?mu)(?:[\[{])?\!(?:\. )?(?P<path>[^\s(!]+)\s?(?:\(([^\)]+)\))?\!(?::(\S+))?(?:[\]}]|(?=\s|$))',
                txt):
            path = m.group('path')
            check_path(path)

    if file_ext in ('txt', 'text', 'md', 'markdown'):
        # Markdown
        from markdown import Markdown
        html = HTML_TEMPLATE % ('', Markdown().convert(txt))
        from html5_parser import parse
        root = parse(html)
        for img in root.iterdescendants('img'):
            path = img.get('src')
            check_path(path)
    return images
Ejemplo n.º 9
0
def convert_markdown_with_metadata(txt,
                                   title='',
                                   extensions=DEFAULT_MD_EXTENSIONS):
    from calibre.ebooks.conversion.plugins.txt_input import MD_EXTENSIONS
    from calibre.ebooks.markdown import Markdown
    from calibre.ebooks.metadata.book.base import Metadata
    from calibre.utils.date import parse_only_date
    from calibre.db.write import get_series_values
    extensions = [
        'calibre.ebooks.markdown.extensions.' + x.lower() for x in extensions
        if x.lower() in MD_EXTENSIONS
    ]
    meta_ext = 'calibre.ebooks.markdown.extensions.meta'
    if meta_ext not in extensions:
        extensions.append(meta_ext)
    md = Markdown(extensions=extensions)
    html = md.convert(txt)
    mi = Metadata(title or _('Unknown'))
    m = md.Meta
    for k, v in {'date': 'pubdate', 'summary': 'comments'}.iteritems():
        if v not in m and k in m:
            m[v] = m.pop(k)
    for k in 'title authors series tags pubdate comments publisher rating'.split(
    ):
        val = m.get(k)
        if val:
            mf = mi.metadata_for_field(k)
            if not mf.get('is_multiple'):
                val = val[0]
            if k == 'series':
                val, si = get_series_values(val)
                mi.series_index = 1 if si is None else si
            if k == 'rating':
                try:
                    val = max(0, min(int(float(val)), 10))
                except Exception:
                    continue
            if mf.get('datatype') == 'datetime':
                try:
                    val = parse_only_date(val, assume_utc=False)
                except Exception:
                    continue
            setattr(mi, k, val)
    return mi, HTML_TEMPLATE % (mi.title, html)
Ejemplo n.º 10
0
 def build_extension(self, ext_name, configs):
     if '.' in ext_name or ':' in ext_name:
         return Markdown.build_extension(self, ext_name, configs)
     ext_name = 'markdown.extensions.' + ext_name
     module = importlib.import_module(ext_name)
     if hasattr(module, 'makeExtension'):
         return module.makeExtension(**configs)
     for name, x in vars(module).items():
         if type(x) is type and issubclass(x, Extension) and x is not Extension:
             return x(**configs)
     raise ImportError('No extension class in {}'.format(ext_name))
Ejemplo n.º 11
0
 def build_extension(self, ext_name, configs):
     if '.' in ext_name or ':' in ext_name:
         return Markdown.build_extension(self, ext_name, configs)
     ext_name = 'markdown.extensions.' + ext_name
     module = importlib.import_module(ext_name)
     if hasattr(module, 'makeExtension'):
         return module.makeExtension(**configs)
     for name, x in vars(module).items():
         if type(x) is type and issubclass(x, Extension) and x is not Extension:
             return x(**configs)
     raise ImportError('No extension class in {}'.format(ext_name))
Ejemplo n.º 12
0
def convert_markdown_with_metadata(txt, title='', extensions=DEFAULT_MD_EXTENSIONS):
    from calibre.ebooks.conversion.plugins.txt_input import MD_EXTENSIONS
    from calibre.ebooks.markdown import Markdown
    from calibre.ebooks.metadata.book.base import Metadata
    from calibre.utils.date import parse_only_date
    from calibre.db.write import get_series_values
    extensions = ['calibre.ebooks.markdown.extensions.' + x.lower() for x in extensions if x.lower() in MD_EXTENSIONS]
    meta_ext = 'calibre.ebooks.markdown.extensions.meta'
    if meta_ext not in extensions:
        extensions.append(meta_ext)
    md = Markdown(extensions=extensions)
    html = md.convert(txt)
    mi = Metadata(title or _('Unknown'))
    m = md.Meta
    for k, v in {'date':'pubdate', 'summary':'comments'}.iteritems():
        if v not in m and k in m:
            m[v] = m.pop(k)
    for k in 'title authors series tags pubdate comments publisher rating'.split():
        val = m.get(k)
        if val:
            mf = mi.metadata_for_field(k)
            if not mf.get('is_multiple'):
                val = val[0]
            if k == 'series':
                val, si = get_series_values(val)
                mi.series_index = 1 if si is None else si
            if k == 'rating':
                try:
                    val = max(0, min(int(float(val)), 10))
                except Exception:
                    continue
            if mf.get('datatype') == 'datetime':
                try:
                    val = parse_only_date(val, assume_utc=False)
                except Exception:
                    continue
            setattr(mi, k, val)
    return mi, HTML_TEMPLATE % (mi.title, html)
Ejemplo n.º 13
0
def sanitize_comments_html(html):
    from calibre.ebooks.markdown import Markdown
    text = html2text(html)
    md = Markdown()
    html = md.convert(text)
    return html
Ejemplo n.º 14
0
def sanitize_comments_html(html):
    from calibre.ebooks.markdown import Markdown
    text = html2text(html)
    md = Markdown(safe_mode='remove')
    cleansed = re.sub('\n+', '', md.convert(text))
    return cleansed
Ejemplo n.º 15
0
def sanitize_comments_html(html):
    from calibre.ebooks.markdown import Markdown
    text = html2text(html)
    md = Markdown()
    html = md.convert(text)
    return html
Ejemplo n.º 16
0
def convert_markdown(txt, title='', extensions=DEFAULT_MD_EXTENSIONS):
    from calibre.ebooks.conversion.plugins.txt_input import MD_EXTENSIONS
    from calibre.ebooks.markdown import Markdown
    extensions = ['calibre.ebooks.markdown.extensions.' + x.lower() for x in extensions if x.lower() in MD_EXTENSIONS]
    md = Markdown(extensions=extensions)
    return HTML_TEMPLATE % (title, md.convert(txt))
Ejemplo n.º 17
0
def sanitize_comments_html(html):
    text = html2text(html)
    md = Markdown(safe_mode='remove')
    cleansed = re.sub('\n+', '', md.convert(text))
    return cleansed
Ejemplo n.º 18
0
def test_markdown():
    from calibre.ebooks.markdown import Markdown
    Markdown(extensions=['extra'])
    import bleach
    bleach.clean(u'xxx<script>a</script>')
    print('Markdown OK!')
Ejemplo n.º 19
0
def sanitize_comments_html(html):
    text = html2text(html)
    md = Markdown(safe_mode='remove')
    cleansed = re.sub('\n+', '', md.convert(text))
    return cleansed