Example #1
0
def beautify_text(raw, syntax):
    from lxml import etree
    from calibre.ebooks.oeb.polish.parsing import parse
    from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree, pretty_html_tree
    from calibre.ebooks.chardet import strip_encoding_declarations
    if syntax == 'xml':
        root = etree.fromstring(strip_encoding_declarations(raw))
        pretty_xml_tree(root)
    elif syntax == 'css':
        import logging
        from calibre.ebooks.oeb.base import serialize, _css_logger
        from calibre.ebooks.oeb.polish.utils import setup_cssutils_serialization
        from cssutils import CSSParser, log
        setup_cssutils_serialization(tprefs['editor_tab_stop_width'])
        log.setLevel(logging.WARN)
        log.raiseExceptions = False
        parser = CSSParser(
            loglevel=logging.WARNING,
            # We dont care about @import rules
            fetcher=lambda x: (None, None),
            log=_css_logger)
        data = parser.parseString(raw, href='<string>', validate=False)
        return serialize(data, 'text/css')
    else:
        root = parse(raw, line_numbers=False)
        pretty_html_tree(None, root)
    return etree.tostring(root, encoding=unicode)
Example #2
0
File: utils.py Project: kba/calibre
def parse_css(data,
              fname='<string>',
              is_declaration=False,
              decode=None,
              log_level=None,
              css_preprocessor=None):
    if log_level is None:
        import logging
        log_level = logging.WARNING
    from cssutils import CSSParser, log
    from calibre.ebooks.oeb.base import _css_logger
    log.setLevel(log_level)
    log.raiseExceptions = False
    if isinstance(data, bytes):
        data = data.decode('utf-8') if decode is None else decode(data)
    if css_preprocessor is not None:
        data = css_preprocessor(data)
    parser = CSSParser(
        loglevel=log_level,
        # We dont care about @import rules
        fetcher=lambda x: (None, None),
        log=_css_logger)
    if is_declaration:
        data = parser.parseStyle(data, validate=False)
    else:
        data = parser.parseString(data, href=fname, validate=False)
    return data
Example #3
0
def beautify_text(raw, syntax):
    from lxml import etree
    from calibre.ebooks.oeb.polish.parsing import parse
    from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree, pretty_html_tree
    from calibre.ebooks.chardet import strip_encoding_declarations
    if syntax == 'xml':
        root = etree.fromstring(strip_encoding_declarations(raw))
        pretty_xml_tree(root)
    elif syntax == 'css':
        import logging
        from calibre.ebooks.oeb.base import serialize, _css_logger
        from calibre.ebooks.oeb.polish.utils import setup_cssutils_serialization
        from cssutils import CSSParser, log
        setup_cssutils_serialization(tprefs['editor_tab_stop_width'])
        log.setLevel(logging.WARN)
        log.raiseExceptions = False
        parser = CSSParser(loglevel=logging.WARNING,
                           # We dont care about @import rules
                           fetcher=lambda x: (None, None), log=_css_logger)
        data = parser.parseString(raw, href='<string>', validate=False)
        return serialize(data, 'text/css')
    else:
        root = parse(raw, line_numbers=False)
        pretty_html_tree(None, root)
    return etree.tostring(root, encoding=unicode)
Example #4
0
 def parse_css(self, data, fname):
     from cssutils import CSSParser, log
     log.setLevel(logging.WARN)
     log.raiseExceptions = False
     data = self.decode(data)
     data = self.css_preprocessor(data)
     parser = CSSParser(loglevel=logging.WARNING,
                        # We dont care about @import rules
                        fetcher=lambda x: (None, None), log=_css_logger)
     data = parser.parseString(data, href=fname, validate=False)
     return data
Example #5
0
 def parse_css(self, data, fname='<string>'):
     from cssutils import CSSParser, log
     log.setLevel(logging.WARN)
     log.raiseExceptions = False
     data = self.decode(data)
     if not self.tweak_mode:
         data = self.css_preprocessor(data)
     parser = CSSParser(loglevel=logging.WARNING,
                        # We dont care about @import rules
                        fetcher=lambda x: (None, None), log=_css_logger)
     data = parser.parseString(data, href=fname, validate=False)
     return data
Example #6
0
 def parse_css(self, data, fname='<string>', is_declaration=False):
     from cssutils import CSSParser, log
     log.setLevel(logging.WARN)
     log.raiseExceptions = False
     if isinstance(data, bytes):
         data = self.decode(data)
     if not self.tweak_mode:
         data = self.css_preprocessor(data)
     parser = CSSParser(loglevel=logging.WARNING,
                        # We dont care about @import rules
                        fetcher=lambda x: (None, None), log=_css_logger)
     if is_declaration:
         data = parser.parseStyle(data, validate=False)
     else:
         data = parser.parseString(data, href=fname, validate=False)
     return data
Example #7
0
 def __init__(self, *args, **kw):
     # Delay expensive import as long as possible
     from cssutils import parseString, log, css
     # change cssutils default logging
     log.setLevel(FATAL)
     self.parseString = parseString
     self.ColorValue = css.ColorValue
     HTMLParser.__init__(self, *args, **kw)
     self.sami = ''
     self.line = ''
     self.styles = {}
     self.queue = deque()
     self.langs = set()
     self.last_element = ''
     self.name2codepoint = name2codepoint.copy()
     self.name2codepoint['apos'] = 0x0027
     self.convert_charrefs = False
Example #8
0
def parse_css(data, fname='<string>', is_declaration=False, decode=None, log_level=None, css_preprocessor=None):
    if log_level is None:
        import logging
        log_level = logging.WARNING
    from cssutils import CSSParser, log
    from calibre.ebooks.oeb.base import _css_logger
    log.setLevel(log_level)
    log.raiseExceptions = False
    if isinstance(data, bytes):
        data = data.decode('utf-8') if decode is None else decode(data)
    if css_preprocessor is not None:
        data = css_preprocessor(data)
    parser = CSSParser(loglevel=log_level,
                        # We dont care about @import rules
                        fetcher=lambda x: (None, None), log=_css_logger)
    if is_declaration:
        data = parser.parseStyle(data, validate=False)
    else:
        data = parser.parseString(data, href=fname, validate=False)
    return data
Example #9
0
        cssproperties)
try:
    from cssutils.css import PropertyValue
except ImportError:
    raise RuntimeError('You need cssutils >= 0.9.9 for calibre')
from cssutils import (profile as cssprofiles, parseString, parseStyle, log as
        cssutils_log, CSSParser, profiles, replaceUrls)
from lxml import etree
from cssselect import HTMLTranslator

from calibre import force_unicode
from calibre.ebooks import unit_convert
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
from calibre.ebooks.oeb.base import XPNSMAP, xpath, urlnormalize

cssutils_log.setLevel(logging.WARN)

_html_css_stylesheet = None
css_to_xpath = HTMLTranslator().css_to_xpath

def html_css_stylesheet():
    global _html_css_stylesheet
    if _html_css_stylesheet is None:
        html_css = open(os.path.join(os.path.dirname(__file__), 'html.css'), 'rb').read()
        _html_css_stylesheet = parseString(html_css, validate=False)
        _html_css_stylesheet.namespaces['h'] = XHTML_NS
    return _html_css_stylesheet

XHTML_CSS_NAMESPACE = '@namespace "%s";\n' % XHTML_NS

INHERITED = set(['azimuth', 'border-collapse', 'border-spacing',
Example #10
0
                          cssproperties)
try:
    from cssutils.css import PropertyValue
except ImportError:
    raise RuntimeError('You need cssutils >= 0.9.9 for calibre')
from cssutils import (profile as cssprofiles, parseString, parseStyle, log as
                      cssutils_log, CSSParser, profiles, replaceUrls)
from lxml import etree
from cssselect import HTMLTranslator

from calibre import force_unicode
from calibre.ebooks import unit_convert
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
from calibre.ebooks.oeb.base import XPNSMAP, xpath, urlnormalize

cssutils_log.setLevel(logging.WARN)

_html_css_stylesheet = None
css_to_xpath = HTMLTranslator().css_to_xpath


def html_css_stylesheet():
    global _html_css_stylesheet
    if _html_css_stylesheet is None:
        html_css = open(os.path.join(os.path.dirname(__file__), 'html.css'),
                        'rb').read()
        _html_css_stylesheet = parseString(html_css, validate=False)
        _html_css_stylesheet.namespaces['h'] = XHTML_NS
    return _html_css_stylesheet

Example #11
0
    from html.parser import HTMLParser

from logging import FATAL
from xml.sax.saxutils import escape
from copy import deepcopy, copy

from cssutils import parseString, log, css as cssutils_css
from bs4 import BeautifulSoup, NavigableString

from .base import BaseReader, BaseWriter, CaptionSet, CaptionList, Caption, CaptionNode, DEFAULT_LANGUAGE_CODE
from .exceptions import CaptionReadNoCaptions, CaptionReadSyntaxError, InvalidInputError
from .geometry import Layout, Alignment, Padding, Size


# change cssutils default logging
log.setLevel(FATAL)


SAMI_BASE_MARKUP = u"""
<sami>
    <head>
        <style type="text/css"/>
    </head>
    <body/>
</sami>"""


class SAMIReader(BaseReader):
    def __init__(self, *args, **kw):
        super(SAMIReader, self).__init__(*args, **kw)
        self.line = []
Example #12
0
    BaseWriter,
    CaptionSet,
    CaptionList,
    Caption,
    CaptionNode,
    DEFAULT_LANGUAGE_CODE,
)
from .exceptions import (
    CaptionReadNoCaptions,
    CaptionReadSyntaxError,
    InvalidInputError,
)
from .geometry import Layout, Alignment, Padding, Size

# change cssutils default logging
log.setLevel(FATAL)

SAMI_BASE_MARKUP = '''
<sami>
    <head>
        <style type="text/css"/>
    </head>
    <body/>
</sami>'''


class SAMIReader(BaseReader):
    def __init__(self, *args, **kw):
        super().__init__(*args, **kw)
        self.line = []
        self.first_alignment = None