Example #1
0
def smarten_punctuation(container, report):
    from calibre.ebooks.conversion.preprocess import smarten_punctuation
    smartened = False
    for path in container.spine_items:
        name = container.abspath_to_name(path)
        changed = False
        with container.open(name, 'r+b') as f:
            html = container.decode(f.read())
            newhtml = smarten_punctuation(html, container.log)
            if newhtml != html:
                changed = True
                report(_('Smartened punctuation in: %s')%name)
                newhtml = strip_encoding_declarations(newhtml)
                f.seek(0)
                f.truncate()
                f.write(codecs.BOM_UTF8 + newhtml.encode('utf-8'))
        if changed:
            # Add an encoding declaration (it will be added automatically when
            # serialized)
            root = container.parsed(name)
            for m in root.xpath('descendant::*[local-name()="meta" and @http-equiv]'):
                m.getparent().remove(m)
            container.dirty(name)
            smartened = True
    if not smartened:
        report(_('No punctuation that could be smartened found'))
    return smartened
Example #2
0
def smarten_punctuation(container, report):
    from calibre.ebooks.conversion.preprocess import smarten_punctuation
    smartened = False
    for path in container.spine_items:
        name = container.abspath_to_name(path)
        changed = False
        with container.open(name, 'r+b') as f:
            html = container.decode(f.read())
            newhtml = smarten_punctuation(html, container.log)
            if newhtml != html:
                changed = True
                report(_('Smartened punctuation in: %s') % name)
                newhtml = strip_encoding_declarations(newhtml)
                f.seek(0)
                f.truncate()
                f.write(codecs.BOM_UTF8 + newhtml.encode('utf-8'))
        if changed:
            # Add an encoding declaration (it will be added automatically when
            # serialized)
            root = container.parsed(name)
            for m in root.xpath(
                    'descendant::*[local-name()="meta" and @http-equiv]'):
                m.getparent().remove(m)
            container.dirty(name)
            smartened = True
    if not smartened:
        report(_('No punctuation that could be smartened found'))
    return smartened
Example #3
0
 def postprocess_book(self, oeb, opts, log):
     from calibre.ebooks.oeb.base import XHTML_NS, XPath, XHTML
     for item in oeb.spine:
         root = item.data
         if not hasattr(root, 'xpath'):
             continue
         for bad in ('metadata', 'guide'):
             metadata = XPath('//h:' + bad)(root)
             if metadata:
                 for x in metadata:
                     x.getparent().remove(x)
         body = XPath('//h:body')(root)
         if body:
             body = body[0]
             if len(body) == 1 and body[0].tag == XHTML('pre'):
                 pre = body[0]
                 from calibre.ebooks.txt.processor import convert_basic, \
                     separate_paragraphs_single_line
                 from calibre.ebooks.chardet import xml_to_unicode
                 from lxml import etree
                 import copy
                 self.log(
                     'LIT file with all text in singe <pre> tag detected')
                 html = separate_paragraphs_single_line(pre.text)
                 html = convert_basic(html).replace(
                     '<html>', '<html xmlns="%s">' % XHTML_NS)
                 html = xml_to_unicode(html,
                                       strip_encoding_pats=True,
                                       resolve_entities=True)[0]
                 if opts.smarten_punctuation:
                     # SmartyPants skips text inside <pre> tags
                     from calibre.ebooks.conversion.preprocess import smarten_punctuation
                     html = smarten_punctuation(html, self.log)
                 root = etree.fromstring(html)
                 body = XPath('//h:body')(root)
                 pre.tag = XHTML('div')
                 pre.text = ''
                 for elem in body:
                     ne = copy.deepcopy(elem)
                     pre.append(ne)
Example #4
0
 def postprocess_book(self, oeb, opts, log):
     from calibre.ebooks.oeb.base import XHTML_NS, XPath, XHTML
     for item in oeb.spine:
         root = item.data
         if not hasattr(root, 'xpath'):
             continue
         for bad in ('metadata', 'guide'):
             metadata = XPath('//h:'+bad)(root)
             if metadata:
                 for x in metadata:
                     x.getparent().remove(x)
         body = XPath('//h:body')(root)
         if body:
             body = body[0]
             if len(body) == 1 and body[0].tag == XHTML('pre'):
                 pre = body[0]
                 from calibre.ebooks.txt.processor import convert_basic, \
                     separate_paragraphs_single_line
                 from calibre.ebooks.chardet import xml_to_unicode
                 from lxml import etree
                 import copy
                 self.log('LIT file with all text in singe <pre> tag detected')
                 html = separate_paragraphs_single_line(pre.text)
                 html = convert_basic(html).replace('<html>',
                         '<html xmlns="%s">'%XHTML_NS)
                 html = xml_to_unicode(html, strip_encoding_pats=True,
                         resolve_entities=True)[0]
                 if opts.smarten_punctuation:
                     # SmartyPants skips text inside <pre> tags
                     from calibre.ebooks.conversion.preprocess import smarten_punctuation
                     html = smarten_punctuation(html, self.log)
                 root = etree.fromstring(html)
                 body = XPath('//h:body')(root)
                 pre.tag = XHTML('div')
                 pre.text = ''
                 for elem in body:
                     ne = copy.deepcopy(elem)
                     pre.append(ne)
Example #5
0
 def smarten_punctuation(self):
     from calibre.ebooks.conversion.preprocess import smarten_punctuation
     html = self.html
     newhtml = smarten_punctuation(html)
     if html != newhtml:
         self.html = newhtml