Example #1
0
def sanitize_comments_html(html):
    from calibre.ebooks.markdown import Markdown

    text = html2text(html)
    md = Markdown(safe_mode="remove")
    cleansed = re.sub("\n+", "", md.convert(text))
    return cleansed
Example #2
0
def sanitize_comments_html(html):
    from calibre.ebooks.markdown import Markdown
    import bleach
    text = html2text(html)
    md = Markdown()
    html = md.convert(text)
    cleansed = re.sub(u'\n+', u'', bleach.clean(html))
    return cleansed
Example #3
0
def sanitize_comments_html(html):
    from calibre.ebooks.markdown import Markdown
    import bleach
    text = html2text(html)
    md = Markdown()
    html = md.convert(text)
    cleansed = re.sub(u'\n+', u'', bleach.clean(html))
    return cleansed
Example #4
0
def convert_markdown(txt, title='', extensions=('footnotes', 'tables', 'toc')):
    from calibre.ebooks.conversion.plugins.txt_input import MD_EXTENSIONS
    from calibre.ebooks.markdown import Markdown
    extensions = [x.lower() for x in extensions if x.lower() in MD_EXTENSIONS]
    md = Markdown(
          extensions,
          safe_mode=False)
    return HTML_TEMPLATE % (title, md.convert(txt))
Example #5
0
def convert_markdown(txt, title='', extensions=DEFAULT_MD_EXTENSIONS):
    from calibre.ebooks.conversion.plugins.txt_input import MD_EXTENSIONS
    from calibre.ebooks.markdown import Markdown
    extensions = [
        'calibre.ebooks.markdown.extensions.' + x.lower() for x in extensions
        if x.lower() in MD_EXTENSIONS
    ]
    md = Markdown(extensions=extensions)
    return HTML_TEMPLATE % (title, md.convert(txt))
Example #6
0
def convert_markdown_with_metadata(txt,
                                   title='',
                                   extensions=DEFAULT_MD_EXTENSIONS):
    from calibre.ebooks.conversion.plugins.txt_input import MD_EXTENSIONS
    from calibre.ebooks.markdown import Markdown
    from calibre.ebooks.metadata.book.base import Metadata
    from calibre.utils.date import parse_only_date
    from calibre.db.write import get_series_values
    extensions = [
        'calibre.ebooks.markdown.extensions.' + x.lower() for x in extensions
        if x.lower() in MD_EXTENSIONS
    ]
    meta_ext = 'calibre.ebooks.markdown.extensions.meta'
    if meta_ext not in extensions:
        extensions.append(meta_ext)
    md = Markdown(extensions=extensions)
    html = md.convert(txt)
    mi = Metadata(title or _('Unknown'))
    m = md.Meta
    for k, v in {'date': 'pubdate', 'summary': 'comments'}.iteritems():
        if v not in m and k in m:
            m[v] = m.pop(k)
    for k in 'title authors series tags pubdate comments publisher rating'.split(
    ):
        val = m.get(k)
        if val:
            mf = mi.metadata_for_field(k)
            if not mf.get('is_multiple'):
                val = val[0]
            if k == 'series':
                val, si = get_series_values(val)
                mi.series_index = 1 if si is None else si
            if k == 'rating':
                try:
                    val = max(0, min(int(float(val)), 10))
                except Exception:
                    continue
            if mf.get('datatype') == 'datetime':
                try:
                    val = parse_only_date(val, assume_utc=False)
                except Exception:
                    continue
            setattr(mi, k, val)
    return mi, HTML_TEMPLATE % (mi.title, html)
Example #7
0
def convert_markdown_with_metadata(txt, title='', extensions=DEFAULT_MD_EXTENSIONS):
    from calibre.ebooks.conversion.plugins.txt_input import MD_EXTENSIONS
    from calibre.ebooks.markdown import Markdown
    from calibre.ebooks.metadata.book.base import Metadata
    from calibre.utils.date import parse_only_date
    from calibre.db.write import get_series_values
    extensions = ['calibre.ebooks.markdown.extensions.' + x.lower() for x in extensions if x.lower() in MD_EXTENSIONS]
    meta_ext = 'calibre.ebooks.markdown.extensions.meta'
    if meta_ext not in extensions:
        extensions.append(meta_ext)
    md = Markdown(extensions=extensions)
    html = md.convert(txt)
    mi = Metadata(title or _('Unknown'))
    m = md.Meta
    for k, v in {'date':'pubdate', 'summary':'comments'}.iteritems():
        if v not in m and k in m:
            m[v] = m.pop(k)
    for k in 'title authors series tags pubdate comments publisher rating'.split():
        val = m.get(k)
        if val:
            mf = mi.metadata_for_field(k)
            if not mf.get('is_multiple'):
                val = val[0]
            if k == 'series':
                val, si = get_series_values(val)
                mi.series_index = 1 if si is None else si
            if k == 'rating':
                try:
                    val = max(0, min(int(float(val)), 10))
                except Exception:
                    continue
            if mf.get('datatype') == 'datetime':
                try:
                    val = parse_only_date(val, assume_utc=False)
                except Exception:
                    continue
            setattr(mi, k, val)
    return mi, HTML_TEMPLATE % (mi.title, html)
Example #8
0
def sanitize_comments_html(html):
    from calibre.ebooks.markdown import Markdown
    text = html2text(html)
    md = Markdown()
    html = md.convert(text)
    return html
Example #9
0
def sanitize_comments_html(html):
    from calibre.ebooks.markdown import Markdown
    text = html2text(html)
    md = Markdown(safe_mode='remove')
    cleansed = re.sub('\n+', '', md.convert(text))
    return cleansed
Example #10
0
def sanitize_comments_html(html):
    from calibre.ebooks.markdown import Markdown
    text = html2text(html)
    md = Markdown()
    html = md.convert(text)
    return html
Example #11
0
def sanitize_comments_html(html):
    text = html2text(html)
    md = Markdown(safe_mode='remove')
    cleansed = re.sub('\n+', '', md.convert(text))
    return cleansed
Example #12
0
def sanitize_comments_html(html):
    text = html2text(html)
    md = Markdown(safe_mode='remove')
    cleansed = re.sub('\n+', '', md.convert(text))
    return cleansed
Example #13
0
def convert_markdown(txt, title='', extensions=DEFAULT_MD_EXTENSIONS):
    from calibre.ebooks.conversion.plugins.txt_input import MD_EXTENSIONS
    from calibre.ebooks.markdown import Markdown
    extensions = ['calibre.ebooks.markdown.extensions.' + x.lower() for x in extensions if x.lower() in MD_EXTENSIONS]
    md = Markdown(extensions=extensions)
    return HTML_TEMPLATE % (title, md.convert(txt))