Python urlnormalize Examples, calibre.ebooks.oeb.base.urlnormalize Python Examples

Example #1

0

Show file

File: reader.py Project: aimylios/calibre

    def _toc_from_navpoint(self, item, toc, navpoint):
        children = xpath(navpoint, 'ncx:navPoint')
        for child in children:
            title = ''.join(xpath(child, 'ncx:navLabel/ncx:text/text()'))
            title = COLLAPSE_RE.sub(' ', title.strip())
            href = xpath(child, 'ncx:content/@src')
            if not title:
                self._toc_from_navpoint(item, toc, child)
                continue
            if (not href or not href[0]) and not xpath(child, 'ncx:navPoint'):
                # This node is useless
                continue
            href = item.abshref(urlnormalize(href[0])) if href and href[0] else ''
            path, _ = urldefrag(href)
            if path and path not in self.oeb.manifest.hrefs:
                path = urlnormalize(path)
            if href and path not in self.oeb.manifest.hrefs:
                self.logger.warn('TOC reference %r not found' % href)
                gc = xpath(child, 'ncx:navPoint')
                if not gc:
                    # This node is useless
                    continue
            id = child.get('id')
            klass = child.get('class', 'chapter')

            try:
                po = int(child.get('playOrder', self.oeb.toc.next_play_order()))
            except:
                po = self.oeb.toc.next_play_order()

            authorElement = xpath(child,
                    'descendant::calibre:meta[@name = "author"]')
            if authorElement:
                author = authorElement[0].text
            else:
                author = None

            descriptionElement = xpath(child,
                    'descendant::calibre:meta[@name = "description"]')
            if descriptionElement:
                description = etree.tostring(descriptionElement[0],
                method='text', encoding=unicode).strip()
                if not description:
                    description = None
            else:
                description = None

            index_image = xpath(child,
                    'descendant::calibre:meta[@name = "toc_thumbnail"]')
            toc_thumbnail = (index_image[0].text if index_image else None)
            if not toc_thumbnail or not toc_thumbnail.strip():
                toc_thumbnail = None

            node = toc.add(title, href, id=id, klass=klass,
                    play_order=po, description=description, author=author,
                           toc_thumbnail=toc_thumbnail)

            self._toc_from_navpoint(item, node, child)

Example #2

0

Show file

File: serializer.py Project: Eksmo/calibre

 def serialize_elem(self, elem, item, nsrmap=NSRMAP):
     buf = self.buf
     if not isinstance(elem.tag, basestring) \
         or namespace(elem.tag) not in nsrmap:
             return
     tag = prefixname(elem.tag, nsrmap)
     # Previous layers take care of @name
     id_ = elem.attrib.pop('id', None)
     if id_:
         href = '#'.join((item.href, id_))
         offset = self.anchor_offset or buf.tell()
         key = urlnormalize(href)
         # Only set this id_offset if it wasn't previously seen
         self.id_offsets[key] = self.id_offsets.get(key, offset)
     if self.anchor_offset is not None and \
         tag == 'a' and not elem.attrib and \
         not len(elem) and not elem.text:
             return
     self.anchor_offset = buf.tell()
     buf.write(b'<')
     buf.write(tag.encode('utf-8'))
     if elem.attrib:
         for attr, val in elem.attrib.items():
             if namespace(attr) not in nsrmap:
                 continue
             attr = prefixname(attr, nsrmap)
             buf.write(b' ')
             if attr == 'href':
                 if self.serialize_href(val, item):
                     continue
             elif attr == 'src':
                 href = urlnormalize(item.abshref(val))
                 if href in self.images:
                     index = self.images[href]
                     self.used_images.add(href)
                     buf.write(b'recindex="%05d"' % index)
                     continue
             buf.write(attr.encode('utf-8'))
             buf.write(b'="')
             self.serialize_text(val, quot=True)
             buf.write(b'"')
     buf.write(b'>')
     if elem.text or len(elem) > 0:
         if elem.text:
             self.anchor_offset = None
             self.serialize_text(elem.text)
         for child in elem:
             self.serialize_elem(child, item)
             if child.tail:
                 self.anchor_offset = None
                 self.serialize_text(child.tail)
     buf.write(b'</%s>' % tag.encode('utf-8'))

Example #3

0

Show file

File: trimmanifest.py Project: 089git/calibre

 def __call__(self, oeb, context):
     import cssutils
     oeb.logger.info('Trimming unused files from manifest...')
     self.opts = context
     used = set()
     for term in oeb.metadata:
         for item in oeb.metadata[term]:
             if item.value in oeb.manifest.hrefs:
                 used.add(oeb.manifest.hrefs[item.value])
             elif item.value in oeb.manifest.ids:
                 used.add(oeb.manifest.ids[item.value])
     for ref in oeb.guide.values():
         path, _ = urldefrag(ref.href)
         if path in oeb.manifest.hrefs:
             used.add(oeb.manifest.hrefs[path])
     # TOC items are required to be in the spine
     for item in oeb.spine:
         used.add(item)
     unchecked = used
     while unchecked:
         new = set()
         for item in unchecked:
             if (item.media_type in OEB_DOCS or
                 item.media_type[-4:] in ('/xml', '+xml')) and \
                item.data is not None:
                 hrefs = [r[2] for r in iterlinks(item.data)]
                 for href in hrefs:
                     if isinstance(href, bytes):
                         href = href.decode('utf-8')
                     try:
                         href = item.abshref(urlnormalize(href))
                     except:
                         continue
                     if href in oeb.manifest.hrefs:
                         found = oeb.manifest.hrefs[href]
                         if found not in used:
                             new.add(found)
             elif item.media_type == CSS_MIME:
                 for href in cssutils.getUrls(item.data):
                     href = item.abshref(urlnormalize(href))
                     if href in oeb.manifest.hrefs:
                         found = oeb.manifest.hrefs[href]
                         if found not in used:
                             new.add(found)
         used.update(new)
         unchecked = new
     for item in oeb.manifest.values():
         if item not in used:
             oeb.logger.info('Trimming %r from manifest' % item.href)
             oeb.manifest.remove(item)

Example #4

0

Show file

    def handle_embedded_fonts(self):
        ''' Make sure all fonts are embeddable. '''
        from calibre.ebooks.oeb.base import urlnormalize
        from calibre.utils.fonts.utils import remove_embed_restriction

        processed = set()
        for item in list(self.oeb.manifest):
            if not hasattr(item.data, 'cssRules'):
                continue
            for i, rule in enumerate(item.data.cssRules):
                if rule.type == rule.FONT_FACE_RULE:
                    try:
                        s = rule.style
                        src = s.getProperty('src').propertyValue[0].uri
                    except:
                        continue
                    path = item.abshref(src)
                    ff = self.oeb.manifest.hrefs.get(urlnormalize(path), None)
                    if ff is None:
                        continue

                    raw = nraw = ff.data
                    if path not in processed:
                        processed.add(path)
                        try:
                            nraw = remove_embed_restriction(raw)
                        except:
                            continue
                        if nraw != raw:
                            ff.data = nraw
                            self.oeb.container.write(path, nraw)

Example #5

0

Show file

File: reader.py Project: mihailim/calibre

 def _spine_add_extra(self):
     manifest = self.oeb.manifest
     spine = self.oeb.spine
     unchecked = set(spine)
     selector = XPath('h:body//h:a/@href')
     extras = set()
     while unchecked:
         new = set()
         for item in unchecked:
             if item.media_type not in OEB_DOCS:
                 # TODO: handle fallback chains
                 continue
             for href in selector(item.data):
                 href, _ = urldefrag(href)
                 if not href:
                     continue
                 try:
                     href = item.abshref(urlnormalize(href))
                 except ValueError:  # Malformed URL
                     continue
                 if href not in manifest.hrefs:
                     continue
                 found = manifest.hrefs[href]
                 if found.media_type not in OEB_DOCS or \
                    found in spine or found in extras:
                     continue
                 new.add(found)
         extras.update(new)
         unchecked = new
     version = int(self.oeb.version[0])
     for item in sorted(extras):
         if version >= 2:
             self.logger.warn(
                 'Spine-referenced file %r not in spine' % item.href)
         spine.add(item, linear=False)

Example #6

0

Show file

File: toc.py Project: JapaChin/calibre

def find_previous_calibre_inline_toc(oeb):
    if "toc" in oeb.guide:
        href = urlnormalize(oeb.guide["toc"].href.partition("#")[0])
        if href in oeb.manifest.hrefs:
            item = oeb.manifest.hrefs[href]
            if hasattr(item.data, "xpath") and XPath('//h:body[@id="calibre_generated_inline_toc"]')(item.data):
                return item

Example #7

0

Show file

File: reader.py Project: mihailim/calibre

 def _toc_from_html(self, opf):
     if 'toc' not in self.oeb.guide:
         return False
     self.log.debug('Reading TOC from HTML...')
     itempath, frag = urldefrag(self.oeb.guide['toc'].href)
     item = self.oeb.manifest.hrefs[itempath]
     html = item.data
     if frag:
         elems = xpath(html, './/*[@id="%s"]' % frag)
         if not elems:
             elems = xpath(html, './/*[@name="%s"]' % frag)
         elem = elems[0] if elems else html
         while elem != html and not xpath(elem, './/h:a[@href]'):
             elem = elem.getparent()
         html = elem
     titles = defaultdict(list)
     order = []
     for anchor in xpath(html, './/h:a[@href]'):
         href = anchor.attrib['href']
         href = item.abshref(urlnormalize(href))
         path, frag = urldefrag(href)
         if path not in self.oeb.manifest.hrefs:
             continue
         title = xml2text(anchor)
         title = COLLAPSE_RE.sub(' ', title.strip())
         if href not in titles:
             order.append(href)
         titles[href].append(title)
     toc = self.oeb.toc
     for href in order:
         toc.add(' '.join(titles[href]), href)
     return True

Example #8

0

Show file

 def serialize_href(self, href, base=None):
     '''
     Serialize the href attribute of an <a> or <reference> tag. It is
     serialized as filepos="000000000" and a pointer to its location is
     stored in self.href_offsets so that the correct value can be filled in
     at the end.
     '''
     hrefs = self.oeb.manifest.hrefs
     try:
         path, frag = urldefrag(urlnormalize(href))
     except ValueError:
         # Unparseable URL
         return False
     if path and base:
         path = base.abshref(path)
     if path and path not in hrefs:
         return False
     buf = self.buf
     item = hrefs[path] if path else None
     if item and item.spine_position is None:
         return False
     path = item.href if item else base.href
     href = '#'.join((path, frag)) if frag else path
     buf.write(b'filepos=')
     self.href_offsets[href].append(buf.tell())
     buf.write(b'0000000000')
     return True

Example #9

0

Show file

File: filenames.py Project: pombreda/calibre-1

    def __call__(self, oeb, opts):
        import cssutils
        self.log = oeb.logger
        self.opts = opts
        self.oeb = oeb

        for item in oeb.manifest.items:
            self.current_item = item
            if etree.iselement(item.data):
                rewrite_links(self.current_item.data, self.url_replacer)
            elif hasattr(item.data, 'cssText'):
                cssutils.replaceUrls(item.data, self.url_replacer)

        if self.oeb.guide:
            for ref in self.oeb.guide.values():
                href = urlnormalize(ref.href)
                href, frag = urldefrag(href)
                replacement = self.rename_map.get(href, None)
                if replacement is not None:
                    nhref = replacement
                    if frag:
                        nhref += '#' + frag
                    ref.href = nhref

        if self.oeb.toc:
            self.fix_toc_entry(self.oeb.toc)

Example #10

0

Show file

File: serializer.py Project: Eksmo/calibre

 def serialize_href(self, href, base=None):
     '''
     Serialize the href attribute of an <a> or <reference> tag. It is
     serialized as filepos="000000000" and a pointer to its location is
     stored in self.href_offsets so that the correct value can be filled in
     at the end.
     '''
     hrefs = self.oeb.manifest.hrefs
     try:
         path, frag = urldefrag(urlnormalize(href))
     except ValueError:
         # Unparseable URL
         return False
     if path and base:
         path = base.abshref(path)
     if path and path not in hrefs:
         return False
     buf = self.buf
     item = hrefs[path] if path else None
     if item and item.spine_position is None:
         return False
     path = item.href if item else base.href
     href = '#'.join((path, frag)) if frag else path
     buf.write(b'filepos=')
     self.href_offsets[href].append(buf.tell())
     buf.write(b'0000000000')
     return True

Example #11

0

Show file

File: filenames.py Project: AEliu/calibre

    def __call__(self, oeb, opts):
        import cssutils
        self.log = oeb.logger
        self.opts = opts
        self.oeb = oeb

        for item in oeb.manifest.items:
            self.current_item = item
            if etree.iselement(item.data):
                rewrite_links(self.current_item.data, self.url_replacer)
            elif hasattr(item.data, 'cssText'):
                cssutils.replaceUrls(item.data, self.url_replacer)

        if self.oeb.guide:
            for ref in self.oeb.guide.values():
                href = urlnormalize(ref.href)
                href, frag = urldefrag(href)
                replacement = self.rename_map.get(href, None)
                if replacement is not None:
                    nhref = replacement
                    if frag:
                        nhref += '#' + frag
                    ref.href = nhref

        if self.oeb.toc:
            self.fix_toc_entry(self.oeb.toc)

Example #12

0

Show file

File: toc.py Project: MarioJC/calibre

def find_previous_calibre_inline_toc(oeb):
    if 'toc' in oeb.guide:
        href = urlnormalize(oeb.guide['toc'].href.partition('#')[0])
        if href in oeb.manifest.hrefs:
            item = oeb.manifest.hrefs[href]
            if (hasattr(item.data, 'xpath') and XPath('//h:body[@id="calibre_generated_inline_toc"]')(item.data)):
                return item

Example #13

0

Show file

 def _toc_from_html(self, opf):
     if 'toc' not in self.oeb.guide:
         return False
     self.log.debug('Reading TOC from HTML...')
     itempath, frag = urldefrag(self.oeb.guide['toc'].href)
     item = self.oeb.manifest.hrefs[itempath]
     html = item.data
     if frag:
         elems = xpath(html, './/*[@id="%s"]' % frag)
         if not elems:
             elems = xpath(html, './/*[@name="%s"]' % frag)
         elem = elems[0] if elems else html
         while elem != html and not xpath(elem, './/h:a[@href]'):
             elem = elem.getparent()
         html = elem
     titles = defaultdict(list)
     order = []
     for anchor in xpath(html, './/h:a[@href]'):
         href = anchor.attrib['href']
         href = item.abshref(urlnormalize(href))
         path, frag = urldefrag(href)
         if path not in self.oeb.manifest.hrefs:
             continue
         title = xml2text(anchor)
         title = COLLAPSE_RE.sub(' ', title.strip())
         if href not in titles:
             order.append(href)
         titles[href].append(title)
     toc = self.oeb.toc
     for href in order:
         toc.add(' '.join(titles[href]), href)
     return True

Example #14

0

Show file

File: pdf_output.py Project: smdx023/calibre

    def process_fonts(self):
        ''' Make sure all fonts are embeddable '''
        from calibre.ebooks.oeb.base import urlnormalize
        from calibre.utils.fonts.utils import remove_embed_restriction

        processed = set()
        for item in list(self.oeb.manifest):
            if not hasattr(item.data, 'cssRules'):
                continue
            for i, rule in enumerate(item.data.cssRules):
                if rule.type == rule.FONT_FACE_RULE:
                    try:
                        s = rule.style
                        src = s.getProperty('src').propertyValue[0].uri
                    except:
                        continue
                    path = item.abshref(src)
                    ff = self.oeb.manifest.hrefs.get(urlnormalize(path), None)
                    if ff is None:
                        continue

                    raw = nraw = ff.data
                    if path not in processed:
                        processed.add(path)
                        try:
                            nraw = remove_embed_restriction(raw)
                        except:
                            continue
                        if nraw != raw:
                            ff.data = nraw
                            self.oeb.container.write(path, nraw)

Example #15

0

Show file

File: subset.py Project: Farb/calibre

def find_font_face_rules(sheet, oeb):
    '''
    Find all @font-face rules in the given sheet and extract the relevant info from them.
    sheet can be either a ManifestItem or a CSSStyleSheet.
    '''
    ans = []
    try:
        rules = sheet.data.cssRules
    except AttributeError:
        rules = sheet.cssRules

    for i, rule in enumerate(rules):
        if rule.type != rule.FONT_FACE_RULE:
            continue
        props = get_font_properties(rule, default='normal')
        if not props['font-family'] or not props['src']:
            continue

        try:
            path = sheet.abshref(props['src'])
        except AttributeError:
            path = props['src']
        ff = oeb.manifest.hrefs.get(urlnormalize(path), None)
        if not ff:
            continue
        props['item'] = ff
        if props['font-weight'] in {'bolder', 'lighter'}:
            props['font-weight'] = '400'
        props['weight'] = int(props['font-weight'])
        props['rule'] = rule
        props['chars'] = set()
        ans.append(props)

    return ans

Example #16

0

Show file

File: subset.py Project: bjhemens/calibre

    def find_embedded_fonts(self):
        """
        Find all @font-face rules and extract the relevant info from them.
        """
        self.embedded_fonts = []
        for item in self.oeb.manifest:
            if not hasattr(item.data, "cssRules"):
                continue
            for i, rule in enumerate(item.data.cssRules):
                if rule.type != rule.FONT_FACE_RULE:
                    continue
                props = self.get_font_properties(rule, default="normal")
                if not props["font-family"] or not props["src"]:
                    continue

                path = item.abshref(props["src"])
                ff = self.oeb.manifest.hrefs.get(urlnormalize(path), None)
                if not ff:
                    continue
                props["item"] = ff
                if props["font-weight"] in {"bolder", "lighter"}:
                    props["font-weight"] = "400"
                props["weight"] = int(props["font-weight"])
                props["chars"] = set()
                props["rule"] = rule
                self.embedded_fonts.append(props)

Example #17

0

Show file

File: subset.py Project: john-peterson/calibre

    def find_embedded_fonts(self):
        '''
        Find all @font-face rules and extract the relevant info from them.
        '''
        self.embedded_fonts = []
        for item in self.oeb.manifest:
            if not hasattr(item.data, 'cssRules'): continue
            for i, rule in enumerate(item.data.cssRules):
                if rule.type != rule.FONT_FACE_RULE:
                    continue
                props = self.get_font_properties(rule, default='normal')
                if not props['font-family'] or not props['src']:
                    continue

                path = item.abshref(props['src'])
                ff = self.oeb.manifest.hrefs.get(urlnormalize(path), None)
                if not ff:
                    continue
                props['item'] = ff
                if props['font-weight'] in {'bolder', 'lighter'}:
                    props['font-weight'] = '400'
                props['weight'] = int(props['font-weight'])
                props['chars'] = set()
                props['rule'] = rule
                self.embedded_fonts.append(props)

Example #18

0

Show file

File: subset.py Project: kobolabs/calibre

    def find_embedded_fonts(self):
        '''
        Find all @font-face rules and extract the relevant info from them.
        '''
        self.embedded_fonts = []
        for item in self.oeb.manifest:
            if not hasattr(item.data, 'cssRules'): continue
            for i, rule in enumerate(item.data.cssRules):
                if rule.type != rule.FONT_FACE_RULE:
                    continue
                props = self.get_font_properties(rule, default='normal')
                if not props['font-family'] or not props['src']:
                    continue

                path = item.abshref(props['src'])
                ff = self.oeb.manifest.hrefs.get(urlnormalize(path), None)
                if not ff:
                    continue
                props['item'] = ff
                if props['font-weight'] in {'bolder', 'lighter'}:
                    props['font-weight'] = '400'
                props['weight'] = int(props['font-weight'])
                props['chars'] = set()
                props['rule'] = rule
                self.embedded_fonts.append(props)

Example #19

0

Show file

 def _spine_add_extra(self):
     manifest = self.oeb.manifest
     spine = self.oeb.spine
     unchecked = set(spine)
     selector = XPath('h:body//h:a/@href')
     extras = set()
     while unchecked:
         new = set()
         for item in unchecked:
             if item.media_type not in OEB_DOCS:
                 # TODO: handle fallback chains
                 continue
             for href in selector(item.data):
                 href, _ = urldefrag(href)
                 if not href:
                     continue
                 try:
                     href = item.abshref(urlnormalize(href))
                 except ValueError:  # Malformed URL
                     continue
                 if href not in manifest.hrefs:
                     continue
                 found = manifest.hrefs[href]
                 if found.media_type not in OEB_DOCS or \
                    found in spine or found in extras:
                     continue
                 new.add(found)
         extras.update(new)
         unchecked = new
     version = int(self.oeb.version[0])
     for item in sorted(extras):
         if version >= 2:
             self.logger.warn('Spine-referenced file %r not in spine' %
                              item.href)
         spine.add(item, linear=False)

Example #20

0

Show file

File: subset.py Project: WilliamRJohns/glacier.io

def find_font_face_rules(sheet, oeb):
    '''
    Find all @font-face rules in the given sheet and extract the relevant info from them.
    sheet can be either a ManifestItem or a CSSStyleSheet.
    '''
    ans = []
    try:
        rules = sheet.data.cssRules
    except AttributeError:
        rules = sheet.cssRules

    for i, rule in enumerate(rules):
        if rule.type != rule.FONT_FACE_RULE:
            continue
        props = get_font_properties(rule, default='normal')
        if not props['font-family'] or not props['src']:
            continue

        try:
            path = sheet.abshref(props['src'])
        except AttributeError:
            path = props['src']
        ff = oeb.manifest.hrefs.get(urlnormalize(path), None)
        if not ff:
            continue
        props['item'] = ff
        if props['font-weight'] in {'bolder', 'lighter'}:
            props['font-weight'] = '400'
        props['weight'] = int(props['font-weight'])
        props['rule'] = rule
        props['chars'] = set()
        ans.append(props)

    return ans

Example #21

0

Show file

File: toc.py Project: youngshook/KindleEar

    def __init__(self, oeb, opts):
        self.oeb, self.opts, self.log = oeb, opts, oeb.log
        self.title = opts.toc_title or DEFAULT_TITLE
        self.at_start = opts.mobi_toc_at_start
        self.generated_item = None
        self.added_toc_guide_entry = False
        self.has_toc = oeb.toc and oeb.toc.count() > 1

        if 'toc' in oeb.guide:
            # Remove spurious toc entry from guide if it is not in spine or it
            # does not have any hyperlinks
            href = urlnormalize(oeb.guide['toc'].href.partition('#')[0])
            if href in oeb.manifest.hrefs:
                item = oeb.manifest.hrefs[href]
                if (hasattr(item.data, 'xpath')
                        and XPath('//h:a[@href]')(item.data)):
                    if oeb.spine.index(item) < 0:
                        oeb.spine.add(item, linear=False)
                    return
                elif self.has_toc:
                    oeb.guide.remove('toc')
            else:
                oeb.guide.remove('toc')

        if (not self.has_toc or 'toc' in oeb.guide or opts.no_inline_toc
                or getattr(opts, 'mobi_passthrough', False)):
            return

        self.log.info('\tGenerating in-line ToC')

        embed_css = ''
        s = getattr(oeb, 'store_embed_font_rules', None)
        if getattr(s, 'body_font_family', None):
            css = [x.cssText for x in s.rules
                   ] + ['body { font-family: %s }' % s.body_font_family]
            embed_css = '\n\n'.join(css)

        root = etree.fromstring(
            TEMPLATE.format(xhtmlns=XHTML_NS,
                            title=self.title,
                            embed_css=embed_css,
                            extra_css=(opts.extra_css or '')))
        parent = XPath('//h:ul')(root)[0]
        parent.text = '\n\t'
        for child in self.oeb.toc:
            self.process_toc_node(child, parent)

        id, href = oeb.manifest.generate('contents', 'contents.xhtml')
        item = self.generated_item = oeb.manifest.add(id,
                                                      href,
                                                      XHTML_MIME,
                                                      data=root)
        if self.at_start:
            oeb.spine.insert(0, item, linear=True)
        else:
            oeb.spine.add(item, linear=False)

        oeb.guide.add('toc', 'Table of Contents', href)

Example #22

0

Show file

File: htmltoc.py Project: zyhong/calibre

    def __call__(self, oeb, context):
        has_toc = getattr(getattr(oeb, 'toc', False), 'nodes', False)

        if 'toc' in oeb.guide:
            # Ensure toc pointed to in <guide> is in spine
            from calibre.ebooks.oeb.base import urlnormalize
            href = urlnormalize(oeb.guide['toc'].href)
            if href in oeb.manifest.hrefs:
                item = oeb.manifest.hrefs[href]
                if (hasattr(item.data, 'xpath')
                        and XPath('//h:a[@href]')(item.data)):
                    if oeb.spine.index(item) < 0:
                        if self.position == 'end':
                            oeb.spine.add(item, linear=False)
                        else:
                            oeb.spine.insert(0, item, linear=True)
                    return
                elif has_toc:
                    oeb.guide.remove('toc')
            else:
                oeb.guide.remove('toc')
        if not has_toc:
            return
        oeb.logger.info('Generating in-line TOC...')
        title = self.title or oeb.translate(DEFAULT_TITLE)
        style = self.style
        if style not in STYLE_CSS:
            oeb.logger.error('Unknown TOC style %r' % style)
            style = 'nested'
        id, css_href = oeb.manifest.generate('tocstyle', 'tocstyle.css')
        oeb.manifest.add(id, css_href, CSS_MIME, data=STYLE_CSS[style])
        language = unicode_type(oeb.metadata.language[0])
        contents = element(None,
                           XHTML('html'),
                           nsmap={None: XHTML_NS},
                           attrib={XML('lang'): language})
        head = element(contents, XHTML('head'))
        htitle = element(head, XHTML('title'))
        htitle.text = title
        element(head,
                XHTML('link'),
                rel='stylesheet',
                type=CSS_MIME,
                href=css_href)
        body = element(contents,
                       XHTML('body'),
                       attrib={'class': 'calibre_toc'})
        h1 = element(body, XHTML('h2'), attrib={'class': 'calibre_toc_header'})
        h1.text = title
        self.add_toc_level(body, oeb.toc)
        id, href = oeb.manifest.generate('contents', 'contents.xhtml')
        item = oeb.manifest.add(id, href, XHTML_MIME, data=contents)
        if self.position == 'end':
            oeb.spine.add(item, linear=False)
        else:
            oeb.spine.insert(0, item, linear=True)
        oeb.guide.add('toc', 'Table of Contents', href)

Example #23

0

Show file

def find_previous_calibre_inline_toc(oeb):
    if 'toc' in oeb.guide:
        href = urlnormalize(oeb.guide['toc'].href.partition('#')[0])
        if href in oeb.manifest.hrefs:
            item = oeb.manifest.hrefs[href]
            if (hasattr(item.data, 'xpath')
                    and XPath('//h:body[@id="calibre_generated_inline_toc"]')(
                        item.data)):
                return item

Example #24

0

Show file

File: oeb2html.py Project: j-howell/calibre

 def rewrite_link(self, url, page=None):
     if not page:
         return url
     abs_url = page.abshref(urlnormalize(url))
     if abs_url in self.images:
         return 'images/%s' % self.images[abs_url]
     if abs_url in self.links:
         return self.links[abs_url]
     return url

Example #25

0

Show file

File: rasterize.py Project: zwlistu/calibre

 def rasterize_item(self, item):
     html = item.data
     hrefs = self.oeb.manifest.hrefs
     for elem in xpath(html, '//h:img[@src]'):
         src = urlnormalize(elem.attrib['src'])
         image = hrefs.get(item.abshref(src), None)
         if image and image.media_type == SVG_MIME:
             style = self.stylizer(item).style(elem)
             self.rasterize_external(elem, style, item, image)
     for elem in xpath(html, '//h:object[@type="%s" and @data]' % SVG_MIME):
         data = urlnormalize(elem.attrib['data'])
         image = hrefs.get(item.abshref(data), None)
         if image and image.media_type == SVG_MIME:
             style = self.stylizer(item).style(elem)
             self.rasterize_external(elem, style, item, image)
     for elem in xpath(html, '//svg:svg'):
         style = self.stylizer(item).style(elem)
         self.rasterize_inline(elem, style, item)

Example #26

0

Show file

File: rasterize.py Project: 089git/calibre

 def rasterize_item(self, item):
     html = item.data
     hrefs = self.oeb.manifest.hrefs
     for elem in xpath(html, '//h:img[@src]'):
         src = urlnormalize(elem.attrib['src'])
         image = hrefs.get(item.abshref(src), None)
         if image and image.media_type == SVG_MIME:
             style = self.stylizer(item).style(elem)
             self.rasterize_external(elem, style, item, image)
     for elem in xpath(html, '//h:object[@type="%s" and @data]' % SVG_MIME):
         data = urlnormalize(elem.attrib['data'])
         image = hrefs.get(item.abshref(data), None)
         if image and image.media_type == SVG_MIME:
             style = self.stylizer(item).style(elem)
             self.rasterize_external(elem, style, item, image)
     for elem in xpath(html, '//svg:svg'):
         style = self.stylizer(item).style(elem)
         self.rasterize_inline(elem, style, item)

Example #27

0

Show file

 def rewrite_link(self, url, page=None):
     if not page:
         return url
     abs_url = page.abshref(urlnormalize(url))
     if abs_url in self.images:
         return 'images/%s' % self.images[abs_url]
     if abs_url in self.links:
         return self.links[abs_url]
     return url

Example #28

0

Show file

File: reader.py Project: yws/calibre

    def _toc_from_navpoint(self, item, toc, navpoint):
        children = xpath(navpoint, 'ncx:navPoint')
        for child in children:
            title = ''.join(xpath(child, 'ncx:navLabel/ncx:text/text()'))
            title = COLLAPSE_RE.sub(' ', title.strip())
            href = xpath(child, 'ncx:content/@src')
            if not title:
                self._toc_from_navpoint(item, toc, child)
                continue
            if (not href or not href[0]) and not xpath(child, 'ncx:navPoint'):
                # This node is useless
                continue
            href = item.abshref(urlnormalize(href[0])) if href and href[0] else ''
            path, _ = urldefrag(href)
            if href and path not in self.oeb.manifest.hrefs:
                self.logger.warn('TOC reference %r not found' % href)
                gc = xpath(child, 'ncx:navPoint')
                if not gc:
                    # This node is useless
                    continue
            id = child.get('id')
            klass = child.get('class', 'chapter')

            try:
                po = int(child.get('playOrder', self.oeb.toc.next_play_order()))
            except:
                po = self.oeb.toc.next_play_order()

            authorElement = xpath(child,
                    'descendant::calibre:meta[@name = "author"]')
            if authorElement:
                author = authorElement[0].text
            else:
                author = None

            descriptionElement = xpath(child,
                    'descendant::calibre:meta[@name = "description"]')
            if descriptionElement:
                description = etree.tostring(descriptionElement[0],
                method='text', encoding=unicode).strip()
                if not description:
                    description = None
            else:
                description = None

            index_image = xpath(child,
                    'descendant::calibre:meta[@name = "toc_thumbnail"]')
            toc_thumbnail = (index_image[0].text if index_image else None)
            if not toc_thumbnail or not toc_thumbnail.strip():
                toc_thumbnail = None

            node = toc.add(title, href, id=id, klass=klass,
                    play_order=po, description=description, author=author,
                           toc_thumbnail=toc_thumbnail)

            self._toc_from_navpoint(item, node, child)

Example #29

0

Show file

File: cover.py Project: syn-gowthamsrungarapu/calibre

 def inspect_cover(self, href):
     from calibre.ebooks.oeb.base import urlnormalize
     for x in self.oeb.manifest:
         if x.href == urlnormalize(href):
             try:
                 raw = x.data
                 return identify_data(raw)[:2]
             except:
                 self.log.exception('Failed to read image dimensions')
     return None, None

Example #30

0

Show file

File: cover.py Project: 089git/calibre

 def inspect_cover(self, href):
     from calibre.ebooks.oeb.base import urlnormalize
     for x in self.oeb.manifest:
         if x.href == urlnormalize(href):
             try:
                 raw = x.data
                 return identify_data(raw)[:2]
             except:
                 self.log.exception('Failed to read image dimensions')
     return None, None

Example #31

0

Show file

File: toc.py Project: BobPyron/calibre

    def __init__(self, oeb, opts):
        self.oeb, self.opts, self.log = oeb, opts, oeb.log
        self.title = opts.toc_title or DEFAULT_TITLE
        self.at_start = opts.mobi_toc_at_start
        self.generated_item = None
        self.added_toc_guide_entry = False
        self.has_toc = oeb.toc and oeb.toc.count() > 1

        if 'toc' in oeb.guide:
            # Remove spurious toc entry from guide if it is not in spine or it
            # does not have any hyperlinks
            href = urlnormalize(oeb.guide['toc'].href.partition('#')[0])
            if href in oeb.manifest.hrefs:
                item = oeb.manifest.hrefs[href]
                if (hasattr(item.data, 'xpath') and
                    XPath('//h:a[@href]')(item.data)):
                    if oeb.spine.index(item) < 0:
                        oeb.spine.add(item, linear=False)
                    return
                elif self.has_toc:
                    oeb.guide.remove('toc')
            else:
                oeb.guide.remove('toc')

        if (not self.has_toc or 'toc' in oeb.guide or opts.no_inline_toc or
            getattr(opts, 'mobi_passthrough', False)):
            return

        self.log('\tGenerating in-line ToC')

        embed_css = ''
        s = getattr(oeb, 'store_embed_font_rules', None)
        if getattr(s, 'body_font_family', None):
            css = [x.cssText for x in s.rules] + [
                    'body { font-family: %s }'%s.body_font_family]
            embed_css = '\n\n'.join(css)

        root = etree.fromstring(TEMPLATE.format(xhtmlns=XHTML_NS,
            title=self.title, embed_css=embed_css,
            extra_css=(opts.extra_css or '')))
        parent = XPath('//h:ul')(root)[0]
        parent.text = '\n\t'
        for child in self.oeb.toc:
            self.process_toc_node(child, parent)

        id, href = oeb.manifest.generate('contents', 'contents.xhtml')
        item = self.generated_item = oeb.manifest.add(id, href, XHTML_MIME,
                data=root)
        if self.at_start:
            oeb.spine.insert(0, item, linear=True)
        else:
            oeb.spine.add(item, linear=False)

        oeb.guide.add('toc', 'Table of Contents', href)

Example #32

0

Show file

File: pdf_output.py Project: wynick27/calibre

    def handle_embedded_fonts(self):
        ''' On windows, Qt uses GDI which does not support OpenType
        (CFF) fonts, so we need to nuke references to OpenType
        fonts. Qt's directwrite text backend is not mature.
        Also make sure all fonts are embeddable. '''
        from calibre.ebooks.oeb.base import urlnormalize
        from calibre.utils.fonts.utils import remove_embed_restriction
        from PyQt5.Qt import QByteArray, QRawFont

        font_warnings = set()
        processed = set()
        is_cff = {}
        for item in list(self.oeb.manifest):
            if not hasattr(item.data, 'cssRules'):
                continue
            remove = set()
            for i, rule in enumerate(item.data.cssRules):
                if rule.type == rule.FONT_FACE_RULE:
                    try:
                        s = rule.style
                        src = s.getProperty('src').propertyValue[0].uri
                    except:
                        continue
                    path = item.abshref(src)
                    ff = self.oeb.manifest.hrefs.get(urlnormalize(path), None)
                    if ff is None:
                        continue

                    raw = nraw = ff.data
                    if path not in processed:
                        processed.add(path)
                        try:
                            nraw = remove_embed_restriction(raw)
                        except:
                            continue
                        if nraw != raw:
                            ff.data = nraw
                            self.oeb.container.write(path, nraw)

                    if iswindows:
                        if path not in is_cff:
                            f = QRawFont(QByteArray(nraw), 12)
                            is_cff[path] = f.isValid() and len(
                                f.fontTable('head')) == 0
                        if is_cff[path]:
                            if path not in font_warnings:
                                font_warnings.add(path)
                                self.log.warn(
                                    'CFF OpenType fonts are not supported on windows, ignoring: %s'
                                    % path)
                            remove.add(i)
            for i in sorted(remove, reverse=True):
                item.data.cssRules.pop(i)

Example #33

0

Show file

File: htmltoc.py Project: 089git/calibre

    def __call__(self, oeb, context):
        has_toc = getattr(getattr(oeb, 'toc', False), 'nodes', False)

        if 'toc' in oeb.guide:
            # Ensure toc pointed to in <guide> is in spine
            from calibre.ebooks.oeb.base import urlnormalize
            href = urlnormalize(oeb.guide['toc'].href)
            if href in oeb.manifest.hrefs:
                item = oeb.manifest.hrefs[href]
                if (hasattr(item.data, 'xpath') and
                    XPath('//h:a[@href]')(item.data)):
                    if oeb.spine.index(item) < 0:
                        if self.position == 'end':
                            oeb.spine.add(item, linear=False)
                        else:
                            oeb.spine.insert(0, item, linear=True)
                    return
                elif has_toc:
                    oeb.guide.remove('toc')
            else:
                oeb.guide.remove('toc')
        if not has_toc:
            return
        oeb.logger.info('Generating in-line TOC...')
        title = self.title or oeb.translate(DEFAULT_TITLE)
        style = self.style
        if style not in STYLE_CSS:
            oeb.logger.error('Unknown TOC style %r' % style)
            style = 'nested'
        id, css_href = oeb.manifest.generate('tocstyle', 'tocstyle.css')
        oeb.manifest.add(id, css_href, CSS_MIME, data=STYLE_CSS[style])
        language = str(oeb.metadata.language[0])
        contents = element(None, XHTML('html'), nsmap={None: XHTML_NS},
                           attrib={XML('lang'): language})
        head = element(contents, XHTML('head'))
        htitle = element(head, XHTML('title'))
        htitle.text = title
        element(head, XHTML('link'), rel='stylesheet', type=CSS_MIME,
                href=css_href)
        body = element(contents, XHTML('body'),
                       attrib={'class': 'calibre_toc'})
        h1 = element(body, XHTML('h2'),
                     attrib={'class': 'calibre_toc_header'})
        h1.text = title
        self.add_toc_level(body, oeb.toc)
        id, href = oeb.manifest.generate('contents', 'contents.xhtml')
        item = oeb.manifest.add(id, href, XHTML_MIME, data=contents)
        if self.position == 'end':
            oeb.spine.add(item, linear=False)
        else:
            oeb.spine.insert(0, item, linear=True)
        oeb.guide.add('toc', 'Table of Contents', href)

Example #34

0

Show file

File: boss.py Project: CyberTech/calibre

 def rename_requested(self, oldname, newname):
     self.commit_all_editors_to_container()
     if guess_type(oldname) != guess_type(newname):
         args = os.path.splitext(oldname) + os.path.splitext(newname)
         if not confirm(
             _(
                 "You are changing the file type of {0}<b>{1}</b> to {2}<b>{3}</b>."
                 " Doing so can cause problems, are you sure?"
             ).format(*args),
             "confirm-filetype-change",
             parent=self.gui,
             title=_("Are you sure?"),
             config_set=tprefs,
         ):
             return
     if urlnormalize(newname) != newname:
         if not confirm(
             _(
                 "The name you have chosen {0} contains special characters, internally"
                 " it will look like: {1}Try to use only the English alphabet [a-z], numbers [0-9],"
                 " hyphens and underscores for file names. Other characters can cause problems for "
                 " different ebook viewers. Are you sure you want to proceed?"
             ).format("<pre>%s</pre>" % newname, "<pre>%s</pre>" % urlnormalize(newname)),
             "confirm-urlunsafe-change",
             parent=self.gui,
             title=_("Are you sure?"),
             config_set=tprefs,
         ):
             return
     self.add_savepoint(_("Rename %s") % oldname)
     name_map = {oldname: newname}
     self.gui.blocking_job(
         "rename_file",
         _("Renaming and updating links..."),
         partial(self.rename_done, name_map),
         rename_files,
         current_container(),
         name_map,
     )

Example #35

0

Show file

File: main.py Project: 2014gwang/KindleEar

 def pointer(item, oref):
     ref = urlnormalize(item.abshref(oref))
     idx = self.resources.item_map.get(ref, None)
     if idx is not None:
         is_image = self.resources.records[idx-1][:4] not in {b'FONT'}
         idx = to_ref(idx)
         if is_image:
             self.used_images.add(ref)
             return 'kindle:embed:%s?mime=%s'%(idx,
                     self.resources.mime_map[ref])
         else:
             return 'kindle:embed:%s'%idx
     return oref

Example #36

0

Show file

 def pointer(item, oref):
     ref = urlnormalize(item.abshref(oref))
     idx = self.resources.item_map.get(ref, None)
     if idx is not None:
         is_image = self.resources.records[idx - 1][:4] not in {b'FONT'}
         idx = to_ref(idx)
         if is_image:
             self.used_images.add(ref)
             return 'kindle:embed:%s?mime=%s' % (
                 idx, self.resources.mime_map[ref])
         else:
             return 'kindle:embed:%s' % idx
     return oref

Example #37

0

Show file

    def process_fonts(self):
        ''' Make sure all fonts are embeddable. Also remove some fonts that cause problems. '''
        from calibre.ebooks.oeb.base import urlnormalize
        from calibre.utils.fonts.utils import remove_embed_restriction

        processed = set()
        for item in list(self.oeb.manifest):
            if not hasattr(item.data, 'cssRules'):
                continue
            for i, rule in enumerate(item.data.cssRules):
                if rule.type == rule.FONT_FACE_RULE:
                    try:
                        s = rule.style
                        src = s.getProperty('src').propertyValue[0].uri
                    except:
                        continue
                    path = item.abshref(src)
                    ff = self.oeb.manifest.hrefs.get(urlnormalize(path), None)
                    if ff is None:
                        continue

                    raw = nraw = ff.data
                    if path not in processed:
                        processed.add(path)
                        try:
                            nraw = remove_embed_restriction(raw)
                        except:
                            continue
                        if nraw != raw:
                            ff.data = nraw
                            self.oeb.container.write(path, nraw)
                elif iswindows and rule.type == rule.STYLE_RULE:
                    from tinycss.fonts3 import parse_font_family, serialize_font_family
                    s = rule.style
                    f = s.getProperty(u'font-family')
                    if f is not None:
                        font_families = parse_font_family(
                            f.propertyValue.cssText)
                        ff = [
                            x for x in font_families if x.lower() != u'courier'
                        ]
                        if len(ff) != len(font_families):
                            if 'courier' not in self.filtered_font_warnings:
                                # See https://bugs.launchpad.net/bugs/1665835
                                self.filtered_font_warnings.add(u'courier')
                                self.log.warn(
                                    u'Removing courier font family as it does not render on windows'
                                )
                            f.propertyValue.cssText = serialize_font_family(
                                ff or [u'monospace'])

Example #38

0

Show file

File: filenames.py Project: AEliu/calibre

    def fix_toc_entry(self, toc):
        if toc.href:
            href = urlnormalize(toc.href)
            href, frag = urldefrag(href)
            replacement = self.rename_map.get(href, None)

            if replacement is not None:
                nhref = replacement
                if frag:
                    nhref = '#'.join((nhref, frag))
                toc.href = nhref

        for x in toc:
            self.fix_toc_entry(x)

Example #39

0

Show file

    def fix_toc_entry(self, toc):
        if toc.href:
            href = urlnormalize(toc.href)
            href, frag = urldefrag(href)
            replacement = self.rename_map.get(href, None)

            if replacement is not None:
                nhref = replacement
                if frag:
                    nhref = '#'.join((nhref, frag))
                toc.href = nhref

        for x in toc:
            self.fix_toc_entry(x)

Example #40

0

Show file

File: pdf_output.py Project: botmtl/calibre

    def handle_embedded_fonts(self):
        """ On windows, Qt uses GDI which does not support OpenType
        (CFF) fonts, so we need to nuke references to OpenType
        fonts. Qt's directwrite text backend is not mature.
        Also make sure all fonts are embeddable. """
        from calibre.ebooks.oeb.base import urlnormalize
        from calibre.utils.fonts.utils import remove_embed_restriction
        from PyQt5.Qt import QByteArray, QRawFont

        font_warnings = set()
        processed = set()
        is_cff = {}
        for item in list(self.oeb.manifest):
            if not hasattr(item.data, "cssRules"):
                continue
            remove = set()
            for i, rule in enumerate(item.data.cssRules):
                if rule.type == rule.FONT_FACE_RULE:
                    try:
                        s = rule.style
                        src = s.getProperty("src").propertyValue[0].uri
                    except:
                        continue
                    path = item.abshref(src)
                    ff = self.oeb.manifest.hrefs.get(urlnormalize(path), None)
                    if ff is None:
                        continue

                    raw = nraw = ff.data
                    if path not in processed:
                        processed.add(path)
                        try:
                            nraw = remove_embed_restriction(raw)
                        except:
                            continue
                        if nraw != raw:
                            ff.data = nraw
                            self.oeb.container.write(path, nraw)

                    if iswindows:
                        if path not in is_cff:
                            f = QRawFont(QByteArray(nraw), 12)
                            is_cff[path] = f.isValid() and len(f.fontTable("head")) == 0
                        if is_cff[path]:
                            if path not in font_warnings:
                                font_warnings.add(path)
                                self.log.warn("CFF OpenType fonts are not supported on windows, ignoring: %s" % path)
                            remove.add(i)
            for i in sorted(remove, reverse=True):
                item.data.cssRules.pop(i)

Example #41

0

Show file

File: boss.py Project: mayankgautam/calibre

 def rename_requested(self, oldname, newname):
     self.commit_all_editors_to_container()
     if guess_type(oldname) != guess_type(newname):
         args = os.path.splitext(oldname) + os.path.splitext(newname)
         if not confirm(
             _('You are changing the file type of {0}<b>{1}</b> to {2}<b>{3}</b>.'
               ' Doing so can cause problems, are you sure?').format(*args),
             'confirm-filetype-change', parent=self.gui, title=_('Are you sure?'),
             config_set=tprefs):
             return
     if urlnormalize(newname) != newname:
         if not confirm(
             _('The name you have chosen {0} contains special characters, internally'
               ' it will look like: {1}Try to use only the English alphabet [a-z], numbers [0-9],'
               ' hyphens and underscores for file names. Other characters can cause problems for '
               ' different ebook viewers. Are you sure you want to proceed?').format(
                   '<pre>%s</pre>'%newname, '<pre>%s</pre>' % urlnormalize(newname)),
             'confirm-urlunsafe-change', parent=self.gui, title=_('Are you sure?'), config_set=tprefs):
                 return
     self.add_savepoint(_('Rename %s') % oldname)
     self.gui.blocking_job(
         'rename_file', _('Renaming and updating links...'), partial(self.rename_done, oldname, newname),
         rename_files, current_container(), {oldname: newname})

Example #42

0

Show file

File: htmltoc.py Project: Chansie/KindleEar

    def __call__(self, oeb, context):
        has_toc = getattr(getattr(oeb, "toc", False), "nodes", False)

        if "toc" in oeb.guide:
            # Ensure toc pointed to in <guide> is in spine
            from calibre.ebooks.oeb.base import urlnormalize

            href = urlnormalize(oeb.guide["toc"].href)
            if href in oeb.manifest.hrefs:
                item = oeb.manifest.hrefs[href]
                if hasattr(item.data, "xpath") and XPath("//h:a[@href]")(item.data):
                    if oeb.spine.index(item) < 0:
                        if self.position == "end":
                            oeb.spine.add(item, linear=False)
                        else:
                            oeb.spine.insert(0, item, linear=True)
                    return
                elif has_toc:
                    oeb.guide.remove("toc")
            else:
                oeb.guide.remove("toc")
        if not has_toc:
            return
        oeb.logger.info("Generating in-line TOC...")
        title = self.title or oeb.translate(DEFAULT_TITLE)
        style = self.style
        if style not in STYLE_CSS:
            oeb.logger.error("Unknown TOC style %r" % style)
            style = "nested"
        id, css_href = oeb.manifest.generate("tocstyle", "tocstyle.css")
        oeb.manifest.add(id, css_href, CSS_MIME, data=STYLE_CSS[style])
        language = str(oeb.metadata.language[0])
        contents = element(None, XHTML("html"), nsmap={None: XHTML_NS}, attrib={XML("lang"): language})
        head = element(contents, XHTML("head"))
        htitle = element(head, XHTML("title"))
        htitle.text = title
        element(head, XHTML("link"), rel="stylesheet", type=CSS_MIME, href=css_href)
        body = element(contents, XHTML("body"), attrib={"class": "calibre_toc"})
        h1 = element(body, XHTML("h1"), attrib={"class": "calibre_toc_header"})
        h1.text = title
        self.add_toc_level(body, oeb.toc)
        id, href = oeb.manifest.generate("contents", "contents.xhtml")
        item = oeb.manifest.add(id, href, XHTML_MIME, data=contents)
        if self.position == "end":
            oeb.spine.add(item, linear=False)
        else:
            oeb.spine.insert(0, item, linear=True)
        oeb.guide.add("toc", "Table of Contents", href)

Example #43

0

Show file

    def replace_internal_links_with_placeholders(self):
        self.link_map = {}
        count = 0
        hrefs = {item.href for item in self.oeb.spine}
        for item in self.oeb.spine:
            root = self.data(item)

            for a in XPath('//h:a[@href]')(root):
                count += 1
                ref = item.abshref(a.get('href'))
                href, _, frag = ref.partition('#')
                href = urlnormalize(href)
                if href in hrefs:
                    placeholder = 'kindle:pos:fid:0000:off:%s'%to_href(count)
                    self.link_map[placeholder] = (href, frag)
                    a.set('href', placeholder)

Example #44

0

Show file

File: main.py Project: 2014gwang/KindleEar

    def replace_internal_links_with_placeholders(self):
        self.link_map = {}
        count = 0
        hrefs = {item.href for item in self.oeb.spine}
        for item in self.oeb.spine:
            root = self.data(item)

            for a in XPath('//h:a[@href]')(root):
                count += 1
                ref = item.abshref(a.get('href'))
                href, _, frag = ref.partition('#')
                href = urlnormalize(href)
                if href in hrefs:
                    placeholder = 'kindle:pos:fid:0000:off:%s'%to_href(count)
                    self.link_map[placeholder] = (href, frag)
                    a.set('href', placeholder)

Example #45

0

Show file

File: toc.py Project: syn-gowthamsrungarapu/calibre

    def __init__(self, oeb, opts):
        self.oeb, self.opts, self.log = oeb, opts, oeb.log
        self.title = opts.toc_title or DEFAULT_TITLE
        self.at_start = opts.mobi_toc_at_start
        self.generated_item = None
        self.added_toc_guide_entry = False
        self.has_toc = oeb.toc and oeb.toc.count() > 1

        if 'toc' in oeb.guide:
            # Remove spurious toc entry from guide if it is not in spine or it
            # does not have any hyperlinks
            href = urlnormalize(oeb.guide['toc'].href)
            if href in oeb.manifest.hrefs:
                item = oeb.manifest.hrefs[href]
                if (hasattr(item.data, 'xpath')
                        and XPath('//h:a[@href]')(item.data)):
                    if oeb.spine.index(item) < 0:
                        oeb.spine.add(item, linear=False)
                    return
                elif self.has_toc:
                    oeb.guide.remove('toc')
            else:
                oeb.guide.remove('toc')

        if not self.has_toc or 'toc' in oeb.guide or opts.no_inline_toc:
            return

        self.log('\tGenerating in-line ToC')

        root = etree.fromstring(
            TEMPLATE.format(xhtmlns=XHTML_NS, title=self.title))
        parent = XPath('//h:ul')(root)[0]
        parent.text = '\n\t'
        for child in self.oeb.toc:
            self.process_toc_node(child, parent)

        id, href = oeb.manifest.generate('contents', 'contents.xhtml')
        item = self.generated_item = oeb.manifest.add(id,
                                                      href,
                                                      XHTML_MIME,
                                                      data=root)
        if self.at_start:
            oeb.spine.insert(0, item, linear=True)
        else:
            oeb.spine.add(item, linear=False)

        oeb.guide.add('toc', 'Table of Contents', href)

Example #46

0

Show file

    def rewrite_links(self, url):
        href, frag = urldefrag(url)
        try:
            href = self.current_item.abshref(href)
        except ValueError:
            # Unparseable URL
            return url
        href = urlnormalize(href)
        if href in self.map:
            anchor_map = self.map[href]
            nhref = anchor_map[frag if frag else None]
            nhref = self.current_item.relhref(nhref)
            if frag:
                nhref = '#'.join((urlunquote(nhref), frag))

            return nhref
        return url

Example #47

0

Show file

File: rasterize.py Project: kobolabs/calibre

 def dataize_svg(self, item, svg=None):
     if svg is None:
         svg = item.data
     hrefs = self.oeb.manifest.hrefs
     for elem in xpath(svg, '//svg:*[@xl:href]'):
         href = urlnormalize(elem.attrib[XLINK('href')])
         path = urldefrag(href)[0]
         if not path:
             continue
         abshref = item.abshref(path)
         if abshref not in hrefs:
             continue
         linkee = hrefs[abshref]
         data = base64.encodestring(str(linkee))
         data = "data:%s;base64,%s" % (linkee.media_type, data)
         elem.attrib[XLINK('href')] = data
     return svg

Example #48

0

Show file

File: jacket.py Project: jimman2003/calibre

 def remove_images(self, item, limit=1):
     path = XPath('//h:img[@src]')
     removed = 0
     for img in path(item.data):
         if removed >= limit:
             break
         href = item.abshref(img.get('src'))
         image = self.oeb.manifest.hrefs.get(href)
         if image is None:
             href = urlnormalize(href)
             image = self.oeb.manifest.hrefs.get(href)
         if image is not None:
             self.oeb.manifest.remove(image)
             self.oeb.guide.remove_by_href(href)
             img.getparent().remove(img)
             removed += 1
     return removed

Example #49

0

Show file

File: jacket.py Project: JimmXinu/calibre

 def remove_images(self, item, limit=1):
     path = XPath('//h:img[@src]')
     removed = 0
     for img in path(item.data):
         if removed >= limit:
             break
         href  = item.abshref(img.get('src'))
         image = self.oeb.manifest.hrefs.get(href)
         if image is None:
             href = urlnormalize(href)
             image = self.oeb.manifest.hrefs.get(href)
         if image is not None:
             self.oeb.manifest.remove(image)
             self.oeb.guide.remove_by_href(href)
             img.getparent().remove(img)
             removed += 1
     return removed

Example #50

0

Show file

File: filenames.py Project: AEliu/calibre

    def url_replacer(self, orig_url):
        url = urlnormalize(orig_url)
        parts = urlparse(url)
        if parts.scheme:
            # Only rewrite local URLs
            return orig_url
        path, frag = urldefrag(url)
        if self.renamed_items_map:
            orig_item = self.renamed_items_map.get(self.current_item.href, self.current_item)
        else:
            orig_item = self.current_item

        href = orig_item.abshref(path)
        replacement = self.current_item.relhref(self.rename_map.get(href, href))
        if frag:
            replacement += '#' + frag
        return replacement

Example #51

0

Show file

File: split.py Project: iwannafly/calibre

    def rewrite_links(self, url):
        href, frag = urldefrag(url)
        try:
            href = self.current_item.abshref(href)
        except ValueError:
            # Unparseable URL
            return url
        href = urlnormalize(href)
        if href in self.map:
            anchor_map = self.map[href]
            nhref = anchor_map[frag if frag else None]
            nhref = self.current_item.relhref(nhref)
            if frag:
                nhref = '#'.join((urlunquote(nhref), frag))

            return nhref
        return url

Example #52

0

Show file

File: rasterize.py Project: BobPyron/calibre

 def dataize_svg(self, item, svg=None):
     if svg is None:
         svg = item.data
     hrefs = self.oeb.manifest.hrefs
     for elem in xpath(svg, '//svg:*[@xl:href]'):
         href = urlnormalize(elem.attrib[XLINK('href')])
         path = urldefrag(href)[0]
         if not path:
             continue
         abshref = item.abshref(path)
         if abshref not in hrefs:
             continue
         linkee = hrefs[abshref]
         data = base64.encodestring(str(linkee))
         data = "data:%s;base64,%s" % (linkee.media_type, data)
         elem.attrib[XLINK('href')] = data
     return svg

Example #53

0

Show file

File: toc.py Project: Eksmo/calibre

    def __init__(self, oeb, opts):
        self.oeb, self.opts, self.log = oeb, opts, oeb.log
        self.title = opts.toc_title or DEFAULT_TITLE
        self.at_start = opts.mobi_toc_at_start
        self.generated_item = None
        self.added_toc_guide_entry = False
        self.has_toc = oeb.toc and oeb.toc.count() > 1

        if 'toc' in oeb.guide:
            # Remove spurious toc entry from guide if it is not in spine or it
            # does not have any hyperlinks
            href = urlnormalize(oeb.guide['toc'].href)
            if href in oeb.manifest.hrefs:
                item = oeb.manifest.hrefs[href]
                if (hasattr(item.data, 'xpath') and
                    XPath('//h:a[@href]')(item.data)):
                    if oeb.spine.index(item) < 0:
                        oeb.spine.add(item, linear=False)
                    return
                elif self.has_toc:
                    oeb.guide.remove('toc')
            else:
                oeb.guide.remove('toc')

        if not self.has_toc or 'toc' in oeb.guide or opts.no_inline_toc:
            return

        self.log('\tGenerating in-line ToC')

        root = etree.fromstring(TEMPLATE.format(xhtmlns=XHTML_NS,
            title=self.title))
        parent = XPath('//h:ul')(root)[0]
        parent.text = '\n\t'
        for child in self.oeb.toc:
            self.process_toc_node(child, parent)

        id, href = oeb.manifest.generate('contents', 'contents.xhtml')
        item = self.generated_item = oeb.manifest.add(id, href, XHTML_MIME,
                data=root)
        if self.at_start:
            oeb.spine.insert(0, item, linear=True)
        else:
            oeb.spine.add(item, linear=False)

        oeb.guide.add('toc', 'Table of Contents', href)

Example #54

0

Show file

File: reader.py Project: yws/calibre

 def _guide_from_opf(self, opf):
     guide = self.oeb.guide
     manifest = self.oeb.manifest
     for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'):
         ref_href = elem.get('href')
         path = urlnormalize(urldefrag(ref_href)[0])
         if path not in manifest.hrefs:
             corrected_href = None
             for href in manifest.hrefs:
                 if href.lower() == path.lower():
                     corrected_href = href
                     break
             if corrected_href is None:
                 self.logger.warn(u'Guide reference %r not found' % ref_href)
                 continue
             ref_href = corrected_href
         typ = elem.get('type')
         if typ not in guide:
             guide.add(typ, elem.get('title'), ref_href)

Example #55

0

Show file

    def url_replacer(self, orig_url):
        url = urlnormalize(orig_url)
        parts = urlparse(url)
        if parts.scheme:
            # Only rewrite local URLs
            return orig_url
        path, frag = urldefrag(url)
        if self.renamed_items_map:
            orig_item = self.renamed_items_map.get(self.current_item.href,
                                                   self.current_item)
        else:
            orig_item = self.current_item

        href = orig_item.abshref(path)
        replacement = self.current_item.relhref(self.rename_map.get(
            href, href))
        if frag:
            replacement += '#' + frag
        return replacement

Example #56

0

Show file

File: reader.py Project: yws/calibre

 def _pages_from_page_map(self, opf):
     item = self._find_page_map(opf)
     if item is None:
         return False
     pmap = item.data
     pages = self.oeb.pages
     for page in xpath(pmap, 'o2:page'):
         name = page.get('name', '')
         href = page.get('href')
         if not href:
             continue
         name = COLLAPSE_RE.sub(' ', name.strip())
         href = item.abshref(urlnormalize(href))
         type = 'normal'
         if not name:
             type = 'special'
         elif name.lower().strip('ivxlcdm') == '':
             type = 'front'
         pages.add(name, href, type=type)
     return True

Example #57

0

Show file

File: rasterize.py Project: zwlistu/calibre

 def dataize_svg(self, item, svg=None):
     if svg is None:
         svg = item.data
     hrefs = self.oeb.manifest.hrefs
     for elem in xpath(svg, '//svg:*[@xl:href]'):
         href = urlnormalize(elem.attrib[XLINK('href')])
         path = urldefrag(href)[0]
         if not path:
             continue
         abshref = item.abshref(path)
         if abshref not in hrefs:
             continue
         linkee = hrefs[abshref]
         data = str(linkee)
         ext = what(None, data) or 'jpg'
         with PersistentTemporaryFile(suffix='.' + ext) as pt:
             pt.write(data)
             self.temp_files.append(pt.name)
         elem.attrib[XLINK('href')] = pt.name
     return svg

Example #58

0

Show file

    def replace_internal_links_with_placeholders(self):
        self.link_map = {}
        count = 0
        hrefs = {item.href for item in self.oeb.spine}
        for item in self.oeb.spine:
            root = self.data(item)

            for a in XPath('//h:a[@href]')(root):
                count += 1
                ref = item.abshref(a.get('href'))
                href, _, frag = ref.partition('#')
                try:
                    href = urlnormalize(href)
                except ValueError:
                    # a non utf-8 quoted url? Since we cannot interpret it, pass it through.
                    pass
                if href in hrefs:
                    placeholder = 'kindle:pos:fid:0000:off:%s' % to_href(count)
                    self.link_map[placeholder] = (href, frag)
                    a.set('href', placeholder)

Example #59

0

Show file

        def serialize_toc_level(tocref, href=None):
            # add the provided toc level to the output stream
            # if href is provided add a link ref to the toc level output (e.g. feed_0/index.html)
            if href is not None:
                # resolve the section url in id_offsets
                buf.write('<mbp:pagebreak />')
                self.id_offsets[urlnormalize(href)] = buf.tell()

            if tocref.klass == "periodical":
                buf.write('<div> <div height="1em"></div>')
            else:
                t = tocref.title
                #modify by arroz, get rid of
                if isinstance(t, unicode):
                    t = t.encode('utf-8')
                buf.write(
                    '<div></div> <div> <h2 height="1em"><font size="+2"><b>')
                buf.write(t)
                buf.write('</b></font></h2> <div height="1em"></div>')

            buf.write('<ul>')

            for tocitem in tocref.nodes:
                buf.write('<li><a filepos=')
                itemhref = tocitem.href
                if tocref.klass == 'periodical':
                    # This is a section node.
                    # For periodical tocs, the section urls are like r'feed_\d+/index.html'
                    # We dont want to point to the start of the first article
                    # so we change the href.
                    itemhref = re.sub(r'article_\d+/', '', itemhref)
                self.href_offsets[itemhref].append(buf.tell())
                buf.write('0000000000')
                buf.write(' ><font size="+1"><b><u>')
                t = tocitem.title
                if isinstance(t, unicode):
                    t = t.encode('utf-8')
                buf.write(t)
                buf.write('</u></b></font></a></li>')

            buf.write('</ul><div height="1em"></div></div><mbp:pagebreak />')

Example #60

0

Show file

File: reader.py Project: yws/calibre

 def _toc_from_tour(self, opf):
     result = xpath(opf, 'o2:tours/o2:tour')
     if not result:
         return False
     self.log.debug('Reading TOC from tour...')
     tour = result[0]
     toc = self.oeb.toc
     toc.title = tour.get('title')
     sites = xpath(tour, 'o2:site')
     for site in sites:
         title = site.get('title')
         href = site.get('href')
         if not title or not href:
             continue
         path, _ = urldefrag(urlnormalize(href))
         if path not in self.oeb.manifest.hrefs:
             self.logger.warn('TOC reference %r not found' % href)
             continue
         id = site.get('id')
         toc.add(title, href, id=id)
     return True