Esempio n. 1
0
def primary_find(pat, src):
    "find that ignores case and accents on letters"
    if _icu_not_ok:
        from calibre.utils.filenames import ascii_text

        return py_find(ascii_text(pat), ascii_text(src))
    return primary_icu_find(pat, src)
Esempio n. 2
0
def primary_startswith(a, b):
    if _icu_not_ok:
        from calibre.utils.filenames import ascii_text
        return ascii_text(a).lower().startswith(ascii_text(b).lower())
    try:
        return icu_startswith(_primary_collator, a, b)
    except AttributeError:
        return icu_startswith(primary_collator(), a, b)
Esempio n. 3
0
def primary_startswith(a, b):
    if _icu_not_ok:
        from calibre.utils.filenames import ascii_text
        return ascii_text(a).lower().startswith(ascii_text(b).lower())
    try:
        return icu_startswith(_primary_collator, a, b)
    except AttributeError:
        return icu_startswith(primary_collator(), a, b)
Esempio n. 4
0
def primary_find(pat, src):
    'find that ignores case and accents on letters'
    if _icu_not_ok:
        from calibre.utils.filenames import ascii_text
        return py_find(ascii_text(pat), ascii_text(src))
    try:
        return icu_find(_primary_collator, pat, src)
    except AttributeError:
        return icu_find(primary_collator(), pat, src)
Esempio n. 5
0
def primary_strcmp(a, b):
    'strcmp that ignores case and accents on letters'
    if _icu_not_ok:
        from calibre.utils.filenames import ascii_text
        return py_strcmp(ascii_text(a), ascii_text(b))
    try:
        return _primary_collator.strcmp(a, b)
    except AttributeError:
        return primary_collator().strcmp(a, b)
Esempio n. 6
0
def primary_strcmp(a, b):
    'strcmp that ignores case and accents on letters'
    if _icu_not_ok:
        from calibre.utils.filenames import ascii_text
        return py_strcmp(ascii_text(a), ascii_text(b))
    try:
        return _primary_collator.strcmp(a, b)
    except AttributeError:
        return primary_collator().strcmp(a, b)
Esempio n. 7
0
def primary_find(pat, src):
    'find that ignores case and accents on letters'
    if _icu_not_ok:
        from calibre.utils.filenames import ascii_text
        return py_find(ascii_text(pat), ascii_text(src))
    try:
        return icu_find(_primary_collator, pat, src)
    except AttributeError:
        return icu_find(primary_collator(), pat, src)
Esempio n. 8
0
 def write_unicode_text(self, text, ignore_errors=False):
     ' Windows only method that writes unicode strings correctly to the windows console using the Win32 API '
     if self.is_console:
         from ctypes import wintypes, byref, c_wchar_p
         written = wintypes.DWORD(0)
         chunk = len(text)
         while text:
             t, text = text[:chunk], text[chunk:]
             wt = c_wchar_p(t)
             if not self.write_console(self.file_handle, wt, self.wcslen(wt), byref(written), None):
                 # Older versions of windows can fail to write large strings
                 # to console with WriteConsoleW (seen it happen on Win XP)
                 import ctypes, winerror
                 err = ctypes.get_last_error()
                 if err == winerror.ERROR_NOT_ENOUGH_MEMORY and chunk >= 128:
                     # Retry with a smaller chunk size (give up if chunk < 128)
                     chunk = chunk // 2
                     text = t + text
                     continue
                 if err == winerror.ERROR_GEN_FAILURE:
                     # On newer windows, this happens when trying to write
                     # non-ascii chars to the console and the console is set
                     # to use raster fonts (the default). In this case
                     # rather than failing, write an informative error
                     # message and the asciized version of the text.
                     print ('Non-ASCII text detected. You must set your Console\'s font to'
                            ' Lucida Console or Consolas or some other TrueType font to see this text', file=self.stream, end=' -- ')
                     from calibre.utils.filenames import ascii_text
                     print (ascii_text(t + text), file=self.stream, end='')
                     continue
                 if not ignore_errors:
                     raise ctypes.WinError(err)
Esempio n. 9
0
def create_service(desc, type, port, properties, add_hostname, use_ip_address=None):
    port = int(port)
    try:
        hostname = ascii_text(force_unicode(socket.gethostname())).partition('.')[0]
    except:
        hostname = 'Unknown'

    if add_hostname:
        try:
            desc += ' (on %s port %d)'%(hostname, port)
        except:
            try:
                desc += ' (on %s)'%hostname
            except:
                pass

    if use_ip_address:
        local_ip = use_ip_address
    else:
        local_ip = get_external_ip()
    type = type+'.local.'
    from calibre.utils.Zeroconf import ServiceInfo
    return ServiceInfo(type, desc+'.'+type,
                          address=socket.inet_aton(local_ip),
                          port=port,
                          properties=properties,
                          server=hostname+'.local.')
Esempio n. 10
0
 def write_unicode_text(self, text, ignore_errors=False):
     ' Windows only method that writes unicode strings correctly to the windows console using the Win32 API '
     if self.is_console:
         from ctypes import wintypes, byref, c_wchar_p
         written = wintypes.DWORD(0)
         chunk = len(text)
         while text:
             t, text = text[:chunk], text[chunk:]
             wt = c_wchar_p(t)
             if not self.write_console(self.file_handle, wt, self.wcslen(wt), byref(written), None):
                 # Older versions of windows can fail to write large strings
                 # to console with WriteConsoleW (seen it happen on Win XP)
                 import ctypes, winerror
                 err = ctypes.get_last_error()
                 if err == winerror.ERROR_NOT_ENOUGH_MEMORY and chunk >= 128:
                     # Retry with a smaller chunk size (give up if chunk < 128)
                     chunk = chunk // 2
                     text = t + text
                     continue
                 if err == winerror.ERROR_GEN_FAILURE:
                     # On newer windows, this happens when trying to write
                     # non-ascii chars to the console and the console is set
                     # to use raster fonts (the default). In this case
                     # rather than failing, write an informative error
                     # message and the asciized version of the text.
                     print ('Non-ASCII text detected. You must set your Console\'s font to'
                            ' Lucida Console or Consolas or some other TrueType font to see this text', file=self.stream, end=' -- ')
                     from calibre.utils.filenames import ascii_text
                     print (ascii_text(t + text), file=self.stream, end='')
                     continue
                 if not ignore_errors:
                     raise ctypes.WinError(err)
Esempio n. 11
0
def create_service(desc,
                   type,
                   port,
                   properties,
                   add_hostname,
                   use_ip_address=None):
    port = int(port)
    try:
        hostname = ascii_text(force_unicode(
            socket.gethostname())).partition('.')[0]
    except:
        hostname = 'Unknown'

    if add_hostname:
        try:
            desc += ' (on %s port %d)' % (hostname, port)
        except:
            try:
                desc += ' (on %s)' % hostname
            except:
                pass

    if use_ip_address:
        local_ip = use_ip_address
    else:
        local_ip = get_external_ip()
    type = type + '.local.'
    from calibre.utils.Zeroconf import ServiceInfo
    return ServiceInfo(type,
                       desc + '.' + type,
                       address=socket.inet_aton(local_ip),
                       port=port,
                       properties=properties,
                       server=hostname + '.local.')
Esempio n. 12
0
def generate_anchor(name, existing):
    x = y = 'id_' + re.sub(r'[^0-9a-zA-Z_]', '', ascii_text(name)).lstrip('_')
    c = 1
    while y in existing:
        y = '%s_%d' % (x, c)
        c += 1
    return y
Esempio n. 13
0
def generate_anchor(name, existing):
    x = y = 'id_' + re.sub(r'[^0-9a-zA-Z_]', '', ascii_text(name)).lstrip('_')
    c = 1
    while y in existing:
        y = '%s_%d' % (x, c)
        c += 1
    return y
Esempio n. 14
0
def first_char(item):
    val = getattr(item, 'sort', item.name)
    if not val:
        val = 'A'
    for c in ascii_text(val):
        if c.isalnum():
            return c
    return 'A'
Esempio n. 15
0
def primary_sort_key(val):
    'A sort key that ignores case and diacritics'
    if _icu_not_ok:
        from calibre.utils.filenames import ascii_text
        return ascii_text(val).lower()
    try:
        return _primary_collator.sort_key(val)
    except AttributeError:
        return primary_collator().sort_key(val)
Esempio n. 16
0
def primary_sort_key(val):
    'A sort key that ignores case and diacritics'
    if _icu_not_ok:
        from calibre.utils.filenames import ascii_text
        return ascii_text(val).lower()
    try:
        return _primary_collator.sort_key(val)
    except AttributeError:
        return primary_collator().sort_key(val)
Esempio n. 17
0
            def tpl_replace(objtplname) :

                tpl_field = re.sub(u'[\\{\\}]', u'', objtplname.group())

                if tpl_field in TEMPLATE_ALLOWED_FIELDS :
                    if tpl_field in ['pubdate', 'timestamp'] :
                        tpl_field = isoformat(entry[tpl_field]).partition('T')[0]
                    elif tpl_field in ['tags', 'authors'] :
                        tpl_field =entry[tpl_field][0]
                    elif tpl_field in ['id', 'series_index'] :
                        tpl_field = str(entry[tpl_field])
                    else :
                        tpl_field = entry[tpl_field]
                    return ascii_text(tpl_field)
                else:
                    return u''
Esempio n. 18
0
            def tpl_replace(objtplname) :

                tpl_field = re.sub(r'[\{\}]', '', objtplname.group())

                if tpl_field in TEMPLATE_ALLOWED_FIELDS :
                    if tpl_field in ['pubdate', 'timestamp'] :
                        tpl_field = isoformat(entry[tpl_field]).partition('T')[0]
                    elif tpl_field in ['tags', 'authors'] :
                        tpl_field =entry[tpl_field][0]
                    elif tpl_field in ['id', 'series_index'] :
                        tpl_field = unicode_type(entry[tpl_field])
                    else :
                        tpl_field = entry[tpl_field]
                    return ascii_text(tpl_field)
                else:
                    return ''
Esempio n. 19
0
def create_service(desc, service_type, port, properties, add_hostname, use_ip_address=None):
    port = int(port)
    try:
        hostname = ascii_text(force_unicode(socket.gethostname())).partition('.')[0]
    except:
        hostname = 'Unknown'

    if add_hostname:
        try:
            desc += ' (on %s port %d)'%(hostname, port)
        except:
            try:
                desc += ' (on %s)'%hostname
            except:
                pass

    if use_ip_address:
        local_ip = use_ip_address
    else:
        local_ip = get_external_ip()
    if not local_ip:
        raise ValueError('Failed to determine local IP address to advertise via BonJour')
    service_type = service_type+'.local.'
    service_name = desc + '.' + service_type
    server_name = hostname+'.local.'
    if ispy3:
        from zeroconf import ServiceInfo
    else:
        from calibre.utils.Zeroconf import ServiceInfo

        def enc(x):
            if isinstance(x, unicode_type):
                x = x.encode('ascii')
            return x

        service_type = enc(service_type)
        service_name = enc(service_name)
        server_name = enc(server_name)
        if properties:
            properties = {enc(k): enc(v) for k, v in iteritems(properties)}

    return ServiceInfo(
        service_type, service_name,
        address=socket.inet_aton(local_ip),
        port=port,
        properties=properties,
        server=server_name)
Esempio n. 20
0
def create_service(desc,
                   service_type,
                   port,
                   properties,
                   add_hostname,
                   use_ip_address=None):
    port = int(port)
    try:
        hostname = ascii_text(force_unicode(
            socket.gethostname())).partition('.')[0]
    except:
        hostname = 'Unknown'

    if add_hostname:
        try:
            desc += ' (on %s port %d)' % (hostname, port)
        except:
            try:
                desc += ' (on %s)' % hostname
            except:
                pass

    if use_ip_address:
        local_ip = use_ip_address
    else:
        local_ip = get_external_ip()
    if not local_ip:
        raise ValueError(
            'Failed to determine local IP address to advertise via BonJour')
    service_type = service_type + '.local.'
    service_name = desc + '.' + service_type
    server_name = hostname + '.local.'
    from zeroconf import ServiceInfo

    return ServiceInfo(service_type,
                       service_name,
                       addresses=[
                           socket.inet_aton(local_ip),
                       ],
                       port=port,
                       properties=properties,
                       server=server_name)
Esempio n. 21
0
def safe_localhost():
    # RFC 2821 says we should use the fqdn in the EHLO/HELO verb, and
    # if that can't be calculated, that we should use a domain literal
    # instead (essentially an encoded IP address like [A.B.C.D]).
    fqdn = socket.getfqdn()
    if '.' in fqdn:
        # Some mail servers have problems with non-ascii local hostnames, see
        # https://bugs.launchpad.net/bugs/1256549
        try:
            local_hostname = ascii_text(fqdn)
        except:
            local_hostname = 'localhost.localdomain'
    else:
        # We can't find an fqdn hostname, so use a domain literal
        addr = '127.0.0.1'
        try:
            addr = socket.gethostbyname(socket.gethostname())
        except socket.gaierror:
            pass
        local_hostname = '[%s]' % addr
    return local_hostname
Esempio n. 22
0
    def flatten_node(self, node, stylizer, names, styles, pseudo_styles, psize, item_id):
        if not isinstance(node.tag, basestring) or namespace(node.tag) != XHTML_NS:
            return
        tag = barename(node.tag)
        style = stylizer.style(node)
        cssdict = style.cssdict()
        try:
            font_size = style["font-size"]
        except:
            font_size = self.sbase if self.sbase is not None else self.context.source.fbase
        if "align" in node.attrib:
            if tag != "img":
                cssdict["text-align"] = node.attrib["align"]
            else:
                val = node.attrib["align"]
                if val in ("middle", "bottom", "top"):
                    cssdict["vertical-align"] = val
                elif val in ("left", "right"):
                    cssdict["float"] = val
            del node.attrib["align"]
        if node.tag == XHTML("font"):
            tags = [
                "descendant::h:%s" % x
                for x in ("p", "div", "table", "h1", "h2", "h3", "h4", "h5", "h6", "ol", "ul", "dl", "blockquote")
            ]
            tag = "div" if XPath("|".join(tags))(node) else "span"
            node.tag = XHTML(tag)
            if "size" in node.attrib:

                def force_int(raw):
                    return int(re.search(r"([0-9+-]+)", raw).group(1))

                size = node.attrib["size"].strip()
                if size:
                    fnums = self.context.source.fnums
                    if size[0] in ("+", "-"):
                        # Oh, the warcrimes
                        try:
                            esize = 3 + force_int(size)
                        except:
                            esize = 3
                        if esize < 1:
                            esize = 1
                        if esize > 7:
                            esize = 7
                        font_size = fnums[esize]
                    else:
                        try:
                            font_size = fnums[force_int(size)]
                        except:
                            font_size = fnums[3]
                    cssdict["font-size"] = "%.1fpt" % font_size
                del node.attrib["size"]
            if "face" in node.attrib:
                cssdict["font-family"] = node.attrib["face"]
                del node.attrib["face"]
        if "color" in node.attrib:
            try:
                cssdict["color"] = Property("color", node.attrib["color"]).value
            except (ValueError, SyntaxErr):
                pass
            del node.attrib["color"]
        if "bgcolor" in node.attrib:
            try:
                cssdict["background-color"] = Property("background-color", node.attrib["bgcolor"]).value
            except (ValueError, SyntaxErr):
                pass
            del node.attrib["bgcolor"]
        if cssdict.get("font-weight", "").lower() == "medium":
            cssdict["font-weight"] = "normal"  # ADE chokes on font-weight medium

        fsize = font_size
        is_drop_cap = (
            cssdict.get("float", None) == "left"
            and "font-size" in cssdict
            and len(node) == 0
            and node.text
            and len(node.text) == 1
        )
        is_drop_cap = is_drop_cap or (
            # The docx input plugin generates drop caps that look like this
            len(node) == 1
            and not node.text
            and len(node[0]) == 0
            and node[0].text
            and not node[0].tail
            and len(node[0].text) == 1
            and "line-height" in cssdict
            and "font-size" in cssdict
        )
        if not self.context.disable_font_rescaling and not is_drop_cap:
            _sbase = self.sbase if self.sbase is not None else self.context.source.fbase
            dyn_rescale = dynamic_rescale_factor(node)
            if dyn_rescale is not None:
                fsize = self.fmap[_sbase]
                fsize *= dyn_rescale
                cssdict["font-size"] = "%0.5fem" % (fsize / psize)
                psize = fsize
            elif "font-size" in cssdict or tag == "body":
                fsize = self.fmap[font_size]
                try:
                    cssdict["font-size"] = "%0.5fem" % (fsize / psize)
                except ZeroDivisionError:
                    cssdict["font-size"] = "%.1fpt" % fsize
                psize = fsize

        try:
            minlh = self.context.minimum_line_height / 100.0
            if not is_drop_cap and style["line-height"] < minlh * fsize:
                cssdict["line-height"] = str(minlh)
        except:
            self.oeb.logger.exception("Failed to set minimum line-height")

        if cssdict:
            for x in self.filter_css:
                cssdict.pop(x, None)

        if cssdict:
            if self.lineh and self.fbase and tag != "body":
                self.clean_edges(cssdict, style, psize)
            if "display" in cssdict and cssdict["display"] == "in-line":
                cssdict["display"] = "inline"
            if self.unfloat and "float" in cssdict and cssdict.get("display", "none") != "none":
                del cssdict["display"]
            if self.untable and "display" in cssdict and cssdict["display"].startswith("table"):
                display = cssdict["display"]
                if display == "table-cell":
                    cssdict["display"] = "inline"
                else:
                    cssdict["display"] = "block"
            if "vertical-align" in cssdict and cssdict["vertical-align"] == "sup":
                cssdict["vertical-align"] = "super"
        if self.lineh and "line-height" not in cssdict:
            lineh = self.lineh / psize
            cssdict["line-height"] = "%0.5fem" % lineh

        if (self.context.remove_paragraph_spacing or self.context.insert_blank_line) and tag in ("p", "div"):
            if item_id != "calibre_jacket" or self.context.output_profile.name == "Kindle":
                for prop in ("margin", "padding", "border"):
                    for edge in ("top", "bottom"):
                        cssdict["%s-%s" % (prop, edge)] = "0pt"
            if self.context.insert_blank_line:
                cssdict["margin-top"] = cssdict["margin-bottom"] = "%fem" % self.context.insert_blank_line_size
            indent_size = self.context.remove_paragraph_spacing_indent_size
            keep_indents = indent_size < 0.0
            if (
                self.context.remove_paragraph_spacing
                and not keep_indents
                and cssdict.get("text-align", None) not in ("center", "right")
            ):
                cssdict["text-indent"] = "%1.1fem" % indent_size

        pseudo_classes = style.pseudo_classes(self.filter_css)
        if cssdict or pseudo_classes:
            keep_classes = set()

            if cssdict:
                items = sorted(cssdict.items())
                css = u";\n".join(u"%s: %s" % (key, val) for key, val in items)
                classes = node.get("class", "").strip() or "calibre"
                klass = ascii_text(STRIPNUM.sub("", classes.split()[0].replace("_", "")))
                if css in styles:
                    match = styles[css]
                else:
                    match = klass + str(names[klass] or "")
                    styles[css] = match
                    names[klass] += 1
                node.attrib["class"] = match
                keep_classes.add(match)

            for psel, cssdict in pseudo_classes.iteritems():
                items = sorted(cssdict.iteritems())
                css = u";\n".join(u"%s: %s" % (key, val) for key, val in items)
                pstyles = pseudo_styles[psel]
                if css in pstyles:
                    match = pstyles[css]
                else:
                    # We have to use a different class for each psel as
                    # otherwise you can have incorrect styles for a situation
                    # like: a:hover { color: red } a:link { color: blue } a.x:hover { color: green }
                    # If the pcalibre class for a:hover and a:link is the same,
                    # then the class attribute for a.x tags will contain both
                    # that class and the class for a.x:hover, which is wrong.
                    klass = "pcalibre"
                    match = klass + str(names[klass] or "")
                    pstyles[css] = match
                    names[klass] += 1
                keep_classes.add(match)
                node.attrib["class"] = " ".join(keep_classes)

        elif "class" in node.attrib:
            del node.attrib["class"]
        if "style" in node.attrib:
            del node.attrib["style"]
        for child in node:
            self.flatten_node(child, stylizer, names, styles, pseudo_styles, psize, item_id)
Esempio n. 23
0
 def strip_accents(self, s):
     return ascii_text(s)
Esempio n. 24
0
    def flatten_node(self, node, stylizer, names, styles, pseudo_styles, psize,
                     item_id):
        if not isinstance(node.tag, basestring) \
           or namespace(node.tag) != XHTML_NS:
            return
        tag = barename(node.tag)
        style = stylizer.style(node)
        cssdict = style.cssdict()
        try:
            font_size = style['font-size']
        except:
            font_size = self.sbase if self.sbase is not None else \
                self.context.source.fbase
        if 'align' in node.attrib:
            if tag != 'img':
                cssdict['text-align'] = node.attrib['align']
            else:
                val = node.attrib['align']
                if val in ('middle', 'bottom', 'top'):
                    cssdict['vertical-align'] = val
                elif val in ('left', 'right'):
                    cssdict['float'] = val
            del node.attrib['align']
        if node.tag == XHTML('font'):
            tags = [
                'descendant::h:%s' % x
                for x in ('p', 'div', 'table', 'h1', 'h2', 'h3', 'h4', 'h5',
                          'h6', 'ol', 'ul', 'dl', 'blockquote')
            ]
            tag = 'div' if XPath('|'.join(tags))(node) else 'span'
            node.tag = XHTML(tag)
            if 'size' in node.attrib:

                def force_int(raw):
                    return int(re.search(r'([0-9+-]+)', raw).group(1))

                size = node.attrib['size'].strip()
                if size:
                    fnums = self.context.source.fnums
                    if size[0] in ('+', '-'):
                        # Oh, the warcrimes
                        try:
                            esize = 3 + force_int(size)
                        except:
                            esize = 3
                        if esize < 1:
                            esize = 1
                        if esize > 7:
                            esize = 7
                        font_size = fnums[esize]
                    else:
                        try:
                            font_size = fnums[force_int(size)]
                        except:
                            font_size = fnums[3]
                    cssdict['font-size'] = '%.1fpt' % font_size
                del node.attrib['size']
            if 'face' in node.attrib:
                cssdict['font-family'] = node.attrib['face']
                del node.attrib['face']
        if 'color' in node.attrib:
            try:
                cssdict['color'] = Property('color',
                                            node.attrib['color']).value
            except (ValueError, SyntaxErr):
                pass
            del node.attrib['color']
        if 'bgcolor' in node.attrib:
            try:
                cssdict['background-color'] = Property(
                    'background-color', node.attrib['bgcolor']).value
            except (ValueError, SyntaxErr):
                pass
            del node.attrib['bgcolor']
        if cssdict.get('font-weight', '').lower() == 'medium':
            cssdict[
                'font-weight'] = 'normal'  # ADE chokes on font-weight medium

        fsize = font_size
        is_drop_cap = (cssdict.get('float', None) == 'left'
                       and 'font-size' in cssdict and len(node) == 0
                       and node.text and len(node.text) == 1)
        if not self.context.disable_font_rescaling and not is_drop_cap:
            _sbase = self.sbase if self.sbase is not None else \
                self.context.source.fbase
            dyn_rescale = dynamic_rescale_factor(node)
            if dyn_rescale is not None:
                fsize = self.fmap[_sbase]
                fsize *= dyn_rescale
                cssdict['font-size'] = '%0.5fem' % (fsize / psize)
                psize = fsize
            elif 'font-size' in cssdict or tag == 'body':
                fsize = self.fmap[font_size]
                try:
                    cssdict['font-size'] = "%0.5fem" % (fsize / psize)
                except ZeroDivisionError:
                    cssdict['font-size'] = '%.1fpt' % fsize
                psize = fsize

        try:
            minlh = self.context.minimum_line_height / 100.
            if not is_drop_cap and style['line-height'] < minlh * fsize:
                cssdict['line-height'] = str(minlh)
        except:
            self.oeb.logger.exception('Failed to set minimum line-height')

        if cssdict:
            for x in self.filter_css:
                cssdict.pop(x, None)

        if cssdict:
            if self.lineh and self.fbase and tag != 'body':
                self.clean_edges(cssdict, style, psize)
            if 'display' in cssdict and cssdict['display'] == 'in-line':
                cssdict['display'] = 'inline'
            if self.unfloat and 'float' in cssdict \
               and cssdict.get('display', 'none') != 'none':
                del cssdict['display']
            if self.untable and 'display' in cssdict \
               and cssdict['display'].startswith('table'):
                display = cssdict['display']
                if display == 'table-cell':
                    cssdict['display'] = 'inline'
                else:
                    cssdict['display'] = 'block'
            if 'vertical-align' in cssdict \
               and cssdict['vertical-align'] == 'sup':
                cssdict['vertical-align'] = 'super'
        if self.lineh and 'line-height' not in cssdict:
            lineh = self.lineh / psize
            cssdict['line-height'] = "%0.5fem" % lineh

        if (self.context.remove_paragraph_spacing
                or self.context.insert_blank_line) and tag in ('p', 'div'):
            if item_id != 'calibre_jacket' or self.context.output_profile.name == 'Kindle':
                for prop in ('margin', 'padding', 'border'):
                    for edge in ('top', 'bottom'):
                        cssdict['%s-%s' % (prop, edge)] = '0pt'
            if self.context.insert_blank_line:
                cssdict['margin-top'] = cssdict['margin-bottom'] = \
                    '%fem'%self.context.insert_blank_line_size
            indent_size = self.context.remove_paragraph_spacing_indent_size
            keep_indents = indent_size < 0.0
            if (self.context.remove_paragraph_spacing and not keep_indents
                    and cssdict.get('text-align',
                                    None) not in ('center', 'right')):
                cssdict['text-indent'] = "%1.1fem" % indent_size

        pseudo_classes = style.pseudo_classes(self.filter_css)
        if cssdict or pseudo_classes:
            keep_classes = set()

            if cssdict:
                items = cssdict.items()
                items.sort()
                css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items)
                classes = node.get('class', '').strip() or 'calibre'
                klass = ascii_text(
                    STRIPNUM.sub('',
                                 classes.split()[0].replace('_', '')))
                if css in styles:
                    match = styles[css]
                else:
                    match = klass + str(names[klass] or '')
                    styles[css] = match
                    names[klass] += 1
                node.attrib['class'] = match
                keep_classes.add(match)

            for psel, cssdict in pseudo_classes.iteritems():
                items = sorted(cssdict.iteritems())
                css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items)
                pstyles = pseudo_styles[psel]
                if css in pstyles:
                    match = pstyles[css]
                else:
                    # We have to use a different class for each psel as
                    # otherwise you can have incorrect styles for a situation
                    # like: a:hover { color: red } a:link { color: blue } a.x:hover { color: green }
                    # If the pcalibre class for a:hover and a:link is the same,
                    # then the class attribute for a.x tags will contain both
                    # that class and the class for a.x:hover, which is wrong.
                    klass = 'pcalibre'
                    match = klass + str(names[klass] or '')
                    pstyles[css] = match
                    names[klass] += 1
                keep_classes.add(match)
                node.attrib['class'] = ' '.join(keep_classes)

        elif 'class' in node.attrib:
            del node.attrib['class']
        if 'style' in node.attrib:
            del node.attrib['style']
        for child in node:
            self.flatten_node(child, stylizer, names, styles, pseudo_styles,
                              psize, item_id)
Esempio n. 25
0
def primary_find(pat, src):
    'find that ignores case and accents on letters'
    if _icu_not_ok:
        from calibre.utils.filenames import ascii_text
        return py_find(ascii_text(pat), ascii_text(src))
    return primary_icu_find(pat, src)
 def strip_accents(self, s):
     return ascii_text(s)
Esempio n. 27
0
def sanitize_bookmark_name(base):
    return re.sub(r'[^0-9a-zA-Z]', '_', ascii_text(base))
Esempio n. 28
0
def sanitize_bookmark_name(base):
    return re.sub(r'[^0-9a-zA-Z]', '_', ascii_text(base))
def sanitize_bookmark_name(base):
    # Max length allowed by Word appears to be 40, we use 32 to leave some
    # space for making the name unique
    return re.sub(r'[^0-9a-zA-Z]', '_', ascii_text(base))[:32].rstrip('_')
Esempio n. 30
0
    def flatten_node(self, node, stylizer, names, styles, pseudo_styles, psize, item_id):
        if not isinstance(node.tag, basestring) \
           or namespace(node.tag) != XHTML_NS:
               return
        tag = barename(node.tag)
        style = stylizer.style(node)
        cssdict = style.cssdict()
        try:
            font_size = style['font-size']
        except:
            font_size = self.sbase if self.sbase is not None else \
                self.context.source.fbase
        if 'align' in node.attrib:
            if tag != 'img':
                cssdict['text-align'] = node.attrib['align']
            else:
                val = node.attrib['align']
                if val in ('middle', 'bottom', 'top'):
                    cssdict['vertical-align'] = val
                elif val in ('left', 'right'):
                    cssdict['float'] = val
            del node.attrib['align']
        if node.tag == XHTML('font'):
            tags = ['descendant::h:%s'%x for x in ('p', 'div', 'table', 'h1',
                'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'ul', 'dl', 'blockquote')]
            tag = 'div' if XPath('|'.join(tags))(node) else 'span'
            node.tag = XHTML(tag)
            if 'size' in node.attrib:
                def force_int(raw):
                    return int(re.search(r'([0-9+-]+)', raw).group(1))
                size = node.attrib['size'].strip()
                if size:
                    fnums = self.context.source.fnums
                    if size[0] in ('+', '-'):
                        # Oh, the warcrimes
                        try:
                            esize = 3 + force_int(size)
                        except:
                            esize = 3
                        if esize < 1:
                            esize = 1
                        if esize > 7:
                            esize = 7
                        font_size = fnums[esize]
                    else:
                        try:
                            font_size = fnums[force_int(size)]
                        except:
                            font_size = fnums[3]
                    cssdict['font-size'] = '%.1fpt'%font_size
                del node.attrib['size']
            if 'face' in node.attrib:
                cssdict['font-family'] = node.attrib['face']
                del node.attrib['face']
        if 'color' in node.attrib:
            try:
                cssdict['color'] = Property('color', node.attrib['color']).value
            except (ValueError, SyntaxErr):
                pass
            del node.attrib['color']
        if 'bgcolor' in node.attrib:
            try:
                cssdict['background-color'] = Property('background-color', node.attrib['bgcolor']).value
            except (ValueError, SyntaxErr):
                pass
            del node.attrib['bgcolor']
        if cssdict.get('font-weight', '').lower() == 'medium':
            cssdict['font-weight'] = 'normal' # ADE chokes on font-weight medium

        fsize = font_size
        if not self.context.disable_font_rescaling:
            _sbase = self.sbase if self.sbase is not None else \
                self.context.source.fbase
            dyn_rescale = dynamic_rescale_factor(node)
            if dyn_rescale is not None:
                fsize = self.fmap[_sbase]
                fsize *= dyn_rescale
                cssdict['font-size'] = '%0.5fem'%(fsize/psize)
                psize = fsize
            elif 'font-size' in cssdict or tag == 'body':
                fsize = self.fmap[font_size]
                try:
                    cssdict['font-size'] = "%0.5fem" % (fsize / psize)
                except ZeroDivisionError:
                    cssdict['font-size'] = '%.1fpt'%fsize
                psize = fsize

        try:
            minlh = self.context.minimum_line_height / 100.
            if style['line-height'] < minlh * fsize:
                cssdict['line-height'] = str(minlh)
        except:
            self.oeb.logger.exception('Failed to set minimum line-height')

        if cssdict:
            for x in self.filter_css:
                cssdict.pop(x, None)

        if cssdict:
            if self.lineh and self.fbase and tag != 'body':
                self.clean_edges(cssdict, style, psize)
            if 'display' in cssdict and cssdict['display'] == 'in-line':
                cssdict['display'] = 'inline'
            if self.unfloat and 'float' in cssdict \
               and cssdict.get('display', 'none') != 'none':
                del cssdict['display']
            if self.untable and 'display' in cssdict \
               and cssdict['display'].startswith('table'):
                display = cssdict['display']
                if display == 'table-cell':
                    cssdict['display'] = 'inline'
                else:
                    cssdict['display'] = 'block'
            if 'vertical-align' in cssdict \
               and cssdict['vertical-align'] == 'sup':
                cssdict['vertical-align'] = 'super'
        if self.lineh and 'line-height' not in cssdict:
            lineh = self.lineh / psize
            cssdict['line-height'] = "%0.5fem" % lineh

        if (self.context.remove_paragraph_spacing or
                self.context.insert_blank_line) and tag in ('p', 'div'):
            if item_id != 'calibre_jacket' or self.context.output_profile.name == 'Kindle':
                for prop in ('margin', 'padding', 'border'):
                    for edge in ('top', 'bottom'):
                        cssdict['%s-%s'%(prop, edge)] = '0pt'
            if self.context.insert_blank_line:
                cssdict['margin-top'] = cssdict['margin-bottom'] = \
                    '%fem'%self.context.insert_blank_line_size
            indent_size = self.context.remove_paragraph_spacing_indent_size
            keep_indents = indent_size < 0.0
            if (self.context.remove_paragraph_spacing and not keep_indents and
                cssdict.get('text-align', None) not in ('center', 'right')):
                cssdict['text-indent'] =  "%1.1fem" % indent_size

        pseudo_classes = style.pseudo_classes(self.filter_css)
        if cssdict or pseudo_classes:
            keep_classes = set()

            if cssdict:
                items = cssdict.items()
                items.sort()
                css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items)
                classes = node.get('class', '').strip() or 'calibre'
                klass = ascii_text(STRIPNUM.sub('', classes.split()[0].replace('_', '')))
                if css in styles:
                    match = styles[css]
                else:
                    match = klass + str(names[klass] or '')
                    styles[css] = match
                    names[klass] += 1
                node.attrib['class'] = match
                keep_classes.add(match)

            for psel, cssdict in pseudo_classes.iteritems():
                items = sorted(cssdict.iteritems())
                css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items)
                pstyles = pseudo_styles[psel]
                if css in pstyles:
                    match = pstyles[css]
                else:
                    # We have to use a different class for each psel as
                    # otherwise you can have incorrect styles for a situation
                    # like: a:hover { color: red } a:link { color: blue } a.x:hover { color: green }
                    # If the pcalibre class for a:hover and a:link is the same,
                    # then the class attribute for a.x tags will contain both
                    # that class and the class for a.x:hover, which is wrong.
                    klass = 'pcalibre'
                    match = klass + str(names[klass] or '')
                    pstyles[css] = match
                    names[klass] += 1
                keep_classes.add(match)
                node.attrib['class'] = ' '.join(keep_classes)

        elif 'class' in node.attrib:
            del node.attrib['class']
        if 'style' in node.attrib:
            del node.attrib['style']
        for child in node:
            self.flatten_node(child, stylizer, names, styles, pseudo_styles, psize, item_id)
Esempio n. 31
0
    def flatten_node(self, node, stylizer, names, styles, pseudo_styles, psize, item_id):
        if not isinstance(node.tag, string_or_bytes) \
           or namespace(node.tag) != XHTML_NS:
            return
        tag = barename(node.tag)
        style = stylizer.style(node)
        cssdict = style.cssdict()
        try:
            font_size = style['font-size']
        except:
            font_size = self.sbase if self.sbase is not None else \
                self.context.source.fbase
        if tag == 'body' and isinstance(font_size, numbers.Number):
            stylizer.body_font_size = font_size
        if 'align' in node.attrib:
            if tag != 'img':
                cssdict['text-align'] = node.attrib['align']
                if cssdict['text-align'] == 'center':
                    # align=center causes tables to be center aligned,
                    # which text-align does not. And the ever trustworthy Word
                    # uses this construct in its HTML output. See
                    # https://bugs.launchpad.net/bugs/1569583
                    if tag == 'table':
                        if 'margin-left' not in cssdict and 'margin-right' not in cssdict:
                            cssdict['margin-left'] = cssdict['margin-right'] = 'auto'
                    else:
                        for table in node.iterchildren(XHTML("table")):
                            ts = stylizer.style(table)
                            if ts.get('margin-left') is None and ts.get('margin-right') is None:
                                ts.set('margin-left', 'auto')
                                ts.set('margin-right', 'auto')
            else:
                val = node.attrib['align']
                if val in ('middle', 'bottom', 'top'):
                    cssdict['vertical-align'] = val
                elif val in ('left', 'right'):
                    cssdict['float'] = val
            del node.attrib['align']
        if 'valign' in node.attrib and tag == 'td':
            if cssdict.get('vertical-align') == 'inherit':
                cssdict['vertical-align'] = node.attrib['valign']
            del node.attrib['valign']
        if node.tag == XHTML('font'):
            tags = ['descendant::h:%s'%x for x in ('p', 'div', 'table', 'h1',
                'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'ul', 'dl', 'blockquote')]
            tag = 'div' if XPath('|'.join(tags))(node) else 'span'
            node.tag = XHTML(tag)
            if 'size' in node.attrib:
                def force_int(raw):
                    return int(re.search(r'([0-9+-]+)', raw).group(1))
                size = node.attrib['size'].strip()
                if size:
                    fnums = self.context.source.fnums
                    if size[0] in ('+', '-'):
                        # Oh, the warcrimes
                        try:
                            esize = 3 + force_int(size)
                        except:
                            esize = 3
                        if esize < 1:
                            esize = 1
                        if esize > 7:
                            esize = 7
                        font_size = fnums[esize]
                    else:
                        try:
                            font_size = fnums[force_int(size)]
                        except:
                            font_size = fnums[3]
                    cssdict['font-size'] = '%.1fpt'%font_size
                del node.attrib['size']
            if 'face' in node.attrib:
                cssdict['font-family'] = node.attrib['face']
                del node.attrib['face']
        if 'color' in node.attrib:
            try:
                cssdict['color'] = Property('color', node.attrib['color']).value
            except (ValueError, SyntaxErr):
                pass
            del node.attrib['color']
        if 'bgcolor' in node.attrib:
            try:
                cssdict['background-color'] = Property('background-color', node.attrib['bgcolor']).value
            except (ValueError, SyntaxErr):
                pass
            del node.attrib['bgcolor']
        if tag == 'ol' and 'type' in node.attrib:
            del node.attrib['type']
        if cssdict.get('font-weight', '').lower() == 'medium':
            cssdict['font-weight'] = 'normal'  # ADE chokes on font-weight medium

        fsize = font_size
        is_drop_cap = (cssdict.get('float', None) == 'left' and 'font-size' in cssdict and len(node) == 0 and node.text and (
            len(node.text) == 1 or (len(node.text) == 2 and 0x2000 <= ord(node.text[0]) <= 0x206f)))
        # Detect drop caps generated by the docx input plugin
        if node.tag and node.tag.endswith('}p') and len(node) == 0 and node.text and len(node.text.strip()) == 1 and \
                not node.tail and 'line-height' in cssdict and 'font-size' in cssdict:
            dp = node.getparent()
            if dp.tag and dp.tag.endswith('}div') and len(dp) == 1 and not dp.text:
                if stylizer.style(dp).cssdict().get('float', None) == 'left':
                    is_drop_cap = True
        if not self.context.disable_font_rescaling and not is_drop_cap:
            _sbase = self.sbase if self.sbase is not None else \
                self.context.source.fbase
            dyn_rescale = dynamic_rescale_factor(node)
            if dyn_rescale is not None:
                fsize = self.fmap[_sbase]
                fsize *= dyn_rescale
                cssdict['font-size'] = '%0.5fem'%(fsize/psize)
                psize = fsize
            elif 'font-size' in cssdict or tag == 'body':
                fsize = self.fmap[font_size]
                try:
                    cssdict['font-size'] = "%0.5fem" % (fsize / psize)
                except ZeroDivisionError:
                    cssdict['font-size'] = '%.1fpt'%fsize
                psize = fsize

        try:
            minlh = self.context.minimum_line_height / 100.
            if not is_drop_cap and style['line-height'] < minlh * fsize:
                cssdict['line-height'] = str(minlh)
        except:
            self.oeb.logger.exception('Failed to set minimum line-height')

        if cssdict:
            for x in self.filter_css:
                popval = cssdict.pop(x, None)
                if self.body_font_family and popval and x == 'font-family' \
                    and popval.partition(',')[0][1:-1] == self.body_font_family.partition(',')[0][1:-1]:
                    cssdict[x] = popval

        if cssdict:
            if self.lineh and self.fbase and tag != 'body':
                self.clean_edges(cssdict, style, psize)
            if 'display' in cssdict and cssdict['display'] == 'in-line':
                cssdict['display'] = 'inline'
            if self.unfloat and 'float' in cssdict \
               and cssdict.get('display', 'none') != 'none':
                del cssdict['display']
            if self.untable and 'display' in cssdict \
               and cssdict['display'].startswith('table'):
                display = cssdict['display']
                if display == 'table-cell':
                    cssdict['display'] = 'inline'
                else:
                    cssdict['display'] = 'block'
            if 'vertical-align' in cssdict \
               and cssdict['vertical-align'] == 'sup':
                cssdict['vertical-align'] = 'super'
        if self.lineh and 'line-height' not in cssdict:
            lineh = self.lineh / psize
            cssdict['line-height'] = "%0.5fem" % lineh

        if (self.context.remove_paragraph_spacing or self.context.insert_blank_line) and tag in ('p', 'div'):
            if item_id != 'calibre_jacket' or self.context.output_profile.name == 'Kindle':
                for prop in ('margin', 'padding', 'border'):
                    for edge in ('top', 'bottom'):
                        cssdict['%s-%s'%(prop, edge)] = '0pt'
            if self.context.insert_blank_line:
                cssdict['margin-top'] = cssdict['margin-bottom'] = \
                    '%fem'%self.context.insert_blank_line_size
            indent_size = self.context.remove_paragraph_spacing_indent_size
            keep_indents = indent_size < 0.0
            if (self.context.remove_paragraph_spacing and not keep_indents and cssdict.get('text-align', None) not in ('center', 'right')):
                cssdict['text-indent'] =  "%1.1fem" % indent_size

        pseudo_classes = style.pseudo_classes(self.filter_css)
        if cssdict or pseudo_classes:
            keep_classes = set()

            if cssdict:
                items = sorted(iteritems(cssdict))
                css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items)
                classes = node.get('class', '').strip() or 'calibre'
                # lower() because otherwise if the document uses the same class
                # name with different case, both cases will apply, leading
                # to incorrect results.
                klass = ascii_text(STRIPNUM.sub('', classes.split()[0])).lower().strip().replace(' ', '_')
                if css in styles:
                    match = styles[css]
                else:
                    match = klass + str(names[klass] or '')
                    styles[css] = match
                    names[klass] += 1
                node.attrib['class'] = match
                keep_classes.add(match)

            for psel, cssdict in iteritems(pseudo_classes):
                items = sorted(iteritems(cssdict))
                css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items)
                pstyles = pseudo_styles[psel]
                if css in pstyles:
                    match = pstyles[css]
                else:
                    # We have to use a different class for each psel as
                    # otherwise you can have incorrect styles for a situation
                    # like: a:hover { color: red } a:link { color: blue } a.x:hover { color: green }
                    # If the pcalibre class for a:hover and a:link is the same,
                    # then the class attribute for a.x tags will contain both
                    # that class and the class for a.x:hover, which is wrong.
                    klass = 'pcalibre'
                    match = klass + str(names[klass] or '')
                    pstyles[css] = match
                    names[klass] += 1
                keep_classes.add(match)
                node.attrib['class'] = ' '.join(keep_classes)

        elif 'class' in node.attrib:
            del node.attrib['class']
        if 'style' in node.attrib:
            del node.attrib['style']
        for child in node:
            self.flatten_node(child, stylizer, names, styles, pseudo_styles, psize, item_id)
Esempio n. 32
0
    def initialise_new_file(self, pathtoebook):
        self.meta, self.errors = {}, {}
        self.rename_file_map = {}
        self.is_scrambled = False
        self.dummyimg = None
        self.dummysvg = ''
        self.runButton.setEnabled(True)
        self.buttonBox.button(QDialogButtonBox.Save).setEnabled(False)

        fileok = True
        if not os.path.isfile(pathtoebook):
            fileok = False
        else:
            try:
                self.ebook = get_container(pathtoebook)
            except:
                fileok = False
                msg = "Source ebook must be de-DRM'd and in one of these formats:" \
                    "\n- azw3\n- epub\n- kepub\n- kepub.epub.\n\nPlease select another."
                error_dialog(self,
                             CAPTION,
                             msg,
                             show=True,
                             show_copy_button=True)

        if not fileok:
            self.log.append('No ebook selected yet')
        else:
            self.cleanup_dirs.append(self.ebook.root)
            tdir = PersistentTemporaryDirectory('_scramble_clone_orig')
            self.cleanup_dirs.append(tdir)
            self.eborig = clone_container(self.ebook, tdir)

            dirn, fname, ext, is_kepub_epub = get_fileparts(
                self.ebook.path_to_ebook)
            ext = ext.lower()
            format = 'kepub' if is_kepub_epub else ext

            if self.book_id is not None:
                # calibre library book
                self.cleanup_files.append(self.ebook.path_to_ebook)
            sourcepath = self.ebook.path_to_ebook

            self.dummyimg = get_resources('images/' + format + '.png')
            self.dummysvg = get_resources('images/' + format + '.svg')

            if self.from_calibre:
                # calibre plugin
                self.dirout = ''
            else:
                # standalone version
                self.dirout = dirn
                self.log.append('\n--- New ebook: %s' % sourcepath)

            fn = fname + '_scrambled.'
            fn += 'kepub.' + ext if is_kepub_epub else ext
            self.fname_scrambled_ebook = ascii_text(fn)
            self.sourcefile.setText(sourcepath)
            self.savefile.setText(self.fname_scrambled_ebook)
            self.meta['orig'] = get_metadata(self.ebook)
            self.errors['orig'] = get_run_check_error(self.ebook)

        self.viewlog()