Python xpathの例、ebook_converter.ebooks.oeb.base.xpath Pythonの例

コード例 #1

0

ファイルを表示

 def _clean_opf(self, opf):
     nsmap = {}
     for elem in opf.iter(tag=etree.Element):
         nsmap.update(elem.nsmap)
     for elem in opf.iter(tag=etree.Element):
         if (parse_utils.namespace(elem.tag) in ('', const.OPF1_NS)
                 and ':' not in parse_utils.barename(elem.tag)):
             elem.tag = base.tag('opf', parse_utils.barename(elem.tag))
     nsmap.update(const.OPF2_NSMAP)
     attrib = dict(opf.attrib)
     nroot = etree.Element(base.tag('opf', 'package'),
                           nsmap={None: const.OPF2_NS},
                           attrib=attrib)
     metadata = etree.SubElement(nroot,
                                 base.tag('opf', 'metadata'),
                                 nsmap=nsmap)
     ignored = (base.tag('opf',
                         'dc-metadata'), base.tag('opf', 'x-metadata'))
     for elem in base.xpath(opf, 'o2:metadata//*'):
         if elem.tag in ignored:
             continue
         if parse_utils.namespace(elem.tag) in const.DC_NSES:
             tag = parse_utils.barename(elem.tag).lower()
             elem.tag = '{%s}%s' % (const.DC11_NS, tag)
         if elem.tag.startswith('dc:'):
             tag = elem.tag.partition(':')[-1].lower()
             elem.tag = '{%s}%s' % (const.DC11_NS, tag)
         metadata.append(elem)
     for element in base.xpath(opf, 'o2:metadata//o2:meta'):
         metadata.append(element)
     for tag in ('o2:manifest', 'o2:spine', 'o2:tours', 'o2:guide'):
         for element in base.xpath(opf, tag):
             nroot.append(element)
     return nroot

コード例 #2

0

ファイルを表示

 def _toc_from_spine(self, opf):
     self.log.warn('Generating default TOC from spine...')
     toc = self.oeb.toc
     titles = []
     headers = []
     for item in self.oeb.spine:
         if not item.linear:
             continue
         html = item.data
         title = ''.join(base.xpath(html, '/h:html/h:head/h:title/text()'))
         title = base.COLLAPSE_RE.sub(' ', title.strip())
         if title:
             titles.append(title)
         headers.append('(unlabled)')
         for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'):
             expr = '/h:html/h:body//h:%s[position()=1]/text()'
             header = ''.join(base.xpath(html, expr % tag))
             header = base.COLLAPSE_RE.sub(' ', header.strip())
             if header:
                 headers[-1] = header
                 break
     use = titles
     if len(titles) > len(set(titles)):
         use = headers
     for title, item in zip(use, self.oeb.spine):
         if not item.linear:
             continue
         toc.add(title, item.href)
     return True

コード例 #3

0

ファイルを表示

 def _spine_from_opf(self, opf):
     spine = self.oeb.spine
     manifest = self.oeb.manifest
     for elem in base.xpath(opf, '/o2:package/o2:spine/o2:itemref'):
         idref = elem.get('idref')
         if idref not in manifest.ids:
             self.logger.warn('Spine item %r not found' % idref)
             continue
         item = manifest.ids[idref]
         if (item.media_type.lower() in base.OEB_DOCS
                 and hasattr(item.data, 'xpath')
                 and not getattr(item.data, 'tag', '').endswith('}ncx')):
             spine.add(item, elem.get('linear'))
         else:
             if (hasattr(item.data, 'tag') and item.data.tag
                     and item.data.tag.endswith('}html')):
                 item.media_type = base.XHTML_MIME
                 spine.add(item, elem.get('linear'))
             else:
                 self.oeb.log.warn('The item %s is not a XML document.'
                                   ' Removing it from spine.' % item.href)
     if len(spine) == 0:
         raise base.OEBError("Spine is empty")
     self._spine_add_extra()
     for val in base.xpath(
             opf, '/o2:package/o2:spine/@page-progression-direction'):
         if val in {'ltr', 'rtl'}:
             spine.page_progression_direction = val

コード例 #4

0

ファイルを表示

 def _toc_from_html(self, opf):
     if 'toc' not in self.oeb.guide:
         return False
     self.log.debug('Reading TOC from HTML...')
     itempath, frag = urllib.parse.urldefrag(self.oeb.guide['toc'].href)
     item = self.oeb.manifest.hrefs[itempath]
     html = item.data
     if frag:
         elems = base.xpath(html, './/*[@id="%s"]' % frag)
         if not elems:
             elems = base.xpath(html, './/*[@name="%s"]' % frag)
         elem = elems[0] if elems else html
         while elem != html and not base.xpath(elem, './/h:a[@href]'):
             elem = elem.getparent()
         html = elem
     titles = collections.defaultdict(list)
     order = []
     for anchor in base.xpath(html, './/h:a[@href]'):
         href = anchor.attrib['href']
         href = item.abshref(base.urlnormalize(href))
         path, frag = urllib.parse.urldefrag(href)
         if path not in self.oeb.manifest.hrefs:
             continue
         title = base.xml2text(anchor)
         title = base.COLLAPSE_RE.sub(' ', title.strip())
         if href not in titles:
             order.append(href)
         titles[href].append(title)
     toc = self.oeb.toc
     for href in order:
         toc.add(' '.join(titles[href]), href)
     return True

コード例 #5

0

ファイルを表示

 def _toc_from_ncx(self, item):
     if (item is None) or (item.data is None):
         return False
     self.log.debug('Reading TOC from NCX...')
     ncx = item.data
     title = ''.join(base.xpath(ncx, 'ncx:docTitle/ncx:text/text()'))
     title = base.COLLAPSE_RE.sub(' ', title.strip())
     title = title or str(self.oeb.metadata.title[0])
     toc = self.oeb.toc
     toc.title = title
     navmaps = base.xpath(ncx, 'ncx:navMap')
     for navmap in navmaps:
         self._toc_from_navpoint(item, toc, navmap)
     return True

コード例 #6

0

ファイルを表示

 def _manifest_from_opf(self, opf):
     manifest = self.oeb.manifest
     for elem in base.xpath(opf, '/o2:package/o2:manifest/o2:item'):
         id = elem.get('id')
         href = elem.get('href')
         media_type = elem.get('media-type', None)
         if media_type is None:
             media_type = elem.get('mediatype', None)
         if not media_type or media_type == 'text/xml':
             guessed = mimetypes.guess_type(href)[0]
             media_type = guessed or media_type or base.BINARY_MIME
         if hasattr(media_type, 'lower'):
             media_type = media_type.lower()
         fallback = elem.get('fallback')
         if href in manifest.hrefs:
             self.logger.warn('Duplicate manifest entry for %r' % href)
             continue
         if not self.oeb.container.exists(href):
             self.logger.warn('Manifest item %r not found' % href)
             continue
         if id in manifest.ids:
             self.logger.warn('Duplicate manifest id %r' % id)
             id, href = manifest.generate(id, href)
         manifest.add(id, href, media_type, fallback)
     invalid = self._manifest_prune_invalid()
     self._manifest_add_missing(invalid)

コード例 #7

0

ファイルを表示

ファイル: rasterize.py プロジェクト: gryf/ebook-converter

 def rasterize_item(self, item):
     html = item.data
     hrefs = self.oeb.manifest.hrefs
     for elem in xpath(html, '//h:img[@src]'):
         src = urlnormalize(elem.attrib['src'])
         image = hrefs.get(item.abshref(src), None)
         if image and image.media_type == SVG_MIME:
             style = self.stylizer(item).style(elem)
             self.rasterize_external(elem, style, item, image)
     for elem in xpath(html, '//h:object[@type="%s" and @data]' % SVG_MIME):
         data = urlnormalize(elem.attrib['data'])
         image = hrefs.get(item.abshref(data), None)
         if image and image.media_type == SVG_MIME:
             style = self.stylizer(item).style(elem)
             self.rasterize_external(elem, style, item, image)
     for elem in xpath(html, '//svg:svg'):
         style = self.stylizer(item).style(elem)
         self.rasterize_inline(elem, style, item)

コード例 #8

0

ファイルを表示

 def _toc_from_tour(self, opf):
     result = base.xpath(opf, 'o2:tours/o2:tour')
     if not result:
         return False
     self.log.debug('Reading TOC from tour...')
     tour = result[0]
     toc = self.oeb.toc
     toc.title = tour.get('title')
     sites = base.xpath(tour, 'o2:site')
     for site in sites:
         title = site.get('title')
         href = site.get('href')
         if not title or not href:
             continue
         path, _ = urllib.parse.urldefrag(base.urlnormalize(href))
         if path not in self.oeb.manifest.hrefs:
             self.logger.warn('TOC reference %r not found' % href)
             continue
         id = site.get('id')
         toc.add(title, href, id=id)
     return True

コード例 #9

0

ファイルを表示

 def _pages_from_ncx(self, opf, item):
     if item is None:
         return False
     ncx = item.data
     if ncx is None:
         return False
     ptargets = base.xpath(ncx, 'ncx:pageList/ncx:pageTarget')
     if not ptargets:
         return False
     pages = self.oeb.pages
     for ptarget in ptargets:
         name = ''.join(base.xpath(ptarget, 'ncx:navLabel/ncx:text/text()'))
         name = base.COLLAPSE_RE.sub(' ', name.strip())
         href = base.xpath(ptarget, 'ncx:content/@src')
         if not href:
             continue
         href = item.abshref(base.urlnormalize(href[0]))
         id = ptarget.get('id')
         type = ptarget.get('type', 'normal')
         klass = ptarget.get('class')
         pages.add(name, href, type=type, id=id, klass=klass)
     return True

コード例 #10

0

ファイルを表示

 def _find_page_map(self, opf):
     result = base.xpath(opf, '/o2:package/o2:spine/@page-map')
     if result:
         id = result[0]
         if id not in self.oeb.manifest.ids:
             return None
         item = self.oeb.manifest.ids[id]
         self.oeb.manifest.remove(item)
         return item
     for item in self.oeb.manifest.values():
         if item.media_type == base.PAGE_MAP_MIME:
             self.oeb.manifest.remove(item)
             return item
     return None

コード例 #11

0

ファイルを表示

 def _guide_from_opf(self, opf):
     guide = self.oeb.guide
     manifest = self.oeb.manifest
     for elem in base.xpath(opf, '/o2:package/o2:guide/o2:reference'):
         ref_href = elem.get('href')
         path = base.urlnormalize(urllib.parse.urldefrag(ref_href)[0])
         if path not in manifest.hrefs:
             corrected_href = None
             for href in manifest.hrefs:
                 if href.lower() == path.lower():
                     corrected_href = href
                     break
             if corrected_href is None:
                 self.logger.warn('Guide reference %r not found' % ref_href)
                 continue
             ref_href = corrected_href
         typ = elem.get('type')
         if typ not in guide:
             guide.add(typ, elem.get('title'), ref_href)

コード例 #12

0

ファイルを表示

ファイル: rasterize.py プロジェクト: gryf/ebook-converter

 def dataize_svg(self, item, svg=None):
     if svg is None:
         svg = item.data
     hrefs = self.oeb.manifest.hrefs
     for elem in xpath(svg, '//svg:*[@xl:href]'):
         href = urlnormalize(elem.attrib[base.tag('xlink', 'href')])
         path = urllib.parse.urldefrag(href)[0]
         if not path:
             continue
         abshref = item.abshref(path)
         if abshref not in hrefs:
             continue
         linkee = hrefs[abshref]
         data = linkee.bytes_representation
         ext = what(None, data) or 'jpg'
         with PersistentTemporaryFile(suffix='.' + ext) as pt:
             pt.write(data)
             self.temp_files.append(pt.name)
         elem.attrib[base.tag('xlink', 'href')] = pt.name
     return svg

コード例 #13

0

ファイルを表示

 def _pages_from_page_map(self, opf):
     item = self._find_page_map(opf)
     if item is None:
         return False
     pmap = item.data
     pages = self.oeb.pages
     for page in base.xpath(pmap, 'o2:page'):
         name = page.get('name', '')
         href = page.get('href')
         if not href:
             continue
         name = base.COLLAPSE_RE.sub(' ', name.strip())
         href = item.abshref(base.urlnormalize(href))
         type = 'normal'
         if not name:
             type = 'special'
         elif name.lower().strip('ivxlcdm') == '':
             type = 'front'
         pages.add(name, href, type=type)
     return True

コード例 #14

0

ファイルを表示

ファイル: html_input.py プロジェクト: keshavbhatt/ebook-converter

    def create_oebbook(self, htmlpath, basedir, opts, log, mi):
        import uuid
        from ebook_converter.ebooks.conversion.plumber import create_oebbook
        from ebook_converter.ebooks.oeb.base import (DirContainer,
            rewrite_links, urlnormalize, BINARY_MIME, OEB_STYLES,
            xpath, urlquote)
        from ebook_converter.ebooks.oeb.transforms.metadata import \
            meta_info_to_oeb_metadata
        from ebook_converter.ebooks.html.input import get_filelist
        from ebook_converter.ebooks.metadata import string_to_authors
        from ebook_converter.utils.localization import canonicalize_lang
        import css_parser, logging
        css_parser.log.setLevel(logging.WARN)
        self.OEB_STYLES = OEB_STYLES
        oeb = create_oebbook(log, None, opts, self,
                encoding=opts.input_encoding, populate=False)
        self.oeb = oeb

        metadata = oeb.metadata
        meta_info_to_oeb_metadata(mi, metadata, log)
        if not metadata.language:
            l = canonicalize_lang(getattr(opts, 'language', None))
            if not l:
                oeb.logger.warn('Language not specified')
                l = get_lang().replace('_', '-')
            metadata.add('language', l)
        if not metadata.creator:
            a = getattr(opts, 'authors', None)
            if a:
                a = string_to_authors(a)
            if not a:
                oeb.logger.warn('Creator not specified')
                a = [self.oeb.translate('Unknown')]
            for aut in a:
                metadata.add('creator', aut)
        if not metadata.title:
            oeb.logger.warn('Title not specified')
            metadata.add('title', self.oeb.translate('Unknown'))
        bookid = str(uuid.uuid4())
        metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
        for ident in metadata.identifier:
            if 'id' in ident.attrib:
                self.oeb.uid = metadata.identifier[0]
                break

        filelist = get_filelist(htmlpath, basedir, opts, log)
        filelist = [f for f in filelist if not f.is_binary]
        htmlfile_map = {}
        for f in filelist:
            path = f.path
            oeb.container = DirContainer(os.path.dirname(path), log,
                    ignore_opf=True)
            bname = os.path.basename(path)
            id, href = oeb.manifest.generate(id='html', href=sanitize_file_name(bname))
            htmlfile_map[path] = href
            item = oeb.manifest.add(id, href, 'text/html')
            if path == htmlpath and '%' in path:
                bname = urlquote(bname)
            item.html_input_href = bname
            oeb.spine.add(item, True)

        self.added_resources = {}
        self.log = log
        self.log('Normalizing filename cases')
        for path, href in htmlfile_map.items():
            self.added_resources[path] = href
        self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
        self.urldefrag = urllib.parse.urldefrag
        self.BINARY_MIME = BINARY_MIME

        self.log('Rewriting HTML links')
        for f in filelist:
            path = f.path
            dpath = os.path.dirname(path)
            oeb.container = DirContainer(dpath, log, ignore_opf=True)
            href = htmlfile_map[path]
            try:
                item = oeb.manifest.hrefs[href]
            except KeyError:
                item = oeb.manifest.hrefs[urlnormalize(href)]
            rewrite_links(item.data,
                          functools.partial(self.resource_adder, base=dpath))

        for item in oeb.manifest.values():
            if item.media_type in self.OEB_STYLES:
                dpath = None
                for path, href in self.added_resources.items():
                    if href == item.href:
                        dpath = os.path.dirname(path)
                        break
                css_parser.replaceUrls(item.data,
                        functools.partial(self.resource_adder, base=dpath))

        toc = self.oeb.toc
        self.oeb.auto_generated_toc = True
        titles = []
        headers = []
        for item in self.oeb.spine:
            if not item.linear:
                continue
            html = item.data
            title = ''.join(xpath(html, '/h:html/h:head/h:title/text()'))
            title = re.sub(r'\s+', ' ', title.strip())
            if title:
                titles.append(title)
            headers.append('(unlabled)')
            for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'):
                expr = '/h:html/h:body//h:%s[position()=1]/text()'
                header = ''.join(xpath(html, expr % tag))
                header = re.sub(r'\s+', ' ', header.strip())
                if header:
                    headers[-1] = header
                    break
        use = titles
        if len(titles) > len(set(titles)):
            use = headers
        for title, item in zip(use, self.oeb.spine):
            if not item.linear:
                continue
            toc.add(title, item.href)

        oeb.container = DirContainer(os.getcwd(), oeb.log, ignore_opf=True)
        return oeb

コード例 #15

0

ファイルを表示

ファイル: stylizer.py プロジェクト: gryf/ebook-converter

    def __init__(self, tree, path, oeb, opts, profile=None,
            extra_css='', user_css='', base_css=''):
        self.oeb, self.opts = oeb, opts
        self.profile = profile
        if self.profile is None:
            # Use the default profile. This should really be using
            # opts.output_profile, but I don't want to risk changing it, as
            # doing so might well have hard to debug font size effects.
            from ebook_converter.customize.ui import output_profiles
            for x in output_profiles():
                if x.short_name == 'default':
                    self.profile = x
                    break
        if self.profile is None:
            # Just in case the default profile is removed in the future :)
            self.profile = opts.output_profile
        self.body_font_size = self.profile.fbase
        self.logger = oeb.logger
        item = oeb.manifest.hrefs[path]
        basename = os.path.basename(path)
        cssname = os.path.splitext(basename)[0] + '.css'
        stylesheets = [html_css_stylesheet()]
        if base_css:
            stylesheets.append(parseString(base_css, validate=False))
        style_tags = base.xpath(tree, '//*[local-name()="style" or local-name()="link"]')

        # Add css_parser parsing profiles from output_profile
        for profile in self.opts.output_profile.extra_css_modules:
            cssprofiles.addProfile(profile['name'],
                                        profile['props'],
                                        profile['macros'])

        parser = CSSParser(fetcher=self._fetch_css_file,
                log=logging.getLogger('calibre.css'))
        for elem in style_tags:
            if (elem.tag == base.tag('xhtml', 'style') and elem.get('type', base.CSS_MIME) in base.OEB_STYLES and media_ok(elem.get('media'))):
                text = elem.text if elem.text else ''
                for x in elem:
                    t = getattr(x, 'text', None)
                    if t:
                        text += '\n\n' + uenc.force_unicode(t, 'utf-8')
                    t = getattr(x, 'tail', None)
                    if t:
                        text += '\n\n' + uenc.force_unicode(t, 'utf-8')
                if text:
                    text = oeb.css_preprocessor(text)
                    # We handle @import rules separately
                    parser.setFetcher(lambda x: ('utf-8', b''))
                    stylesheet = parser.parseString(text, href=cssname,
                            validate=False)
                    parser.setFetcher(self._fetch_css_file)
                    for rule in stylesheet.cssRules:
                        if rule.type == rule.IMPORT_RULE:
                            ihref = item.abshref(rule.href)
                            if not media_ok(rule.media.mediaText):
                                continue
                            hrefs = self.oeb.manifest.hrefs
                            if ihref not in hrefs:
                                self.logger.warning('Ignoring missing '
                                                    'stylesheet in @import '
                                                    'rule: %s', rule.href)
                                continue
                            sitem = hrefs[ihref]
                            if sitem.media_type not in base.OEB_STYLES:
                                self.logger.warning('CSS @import of non-CSS '
                                                    'file %r', rule.href)
                                continue
                            stylesheets.append(sitem.data)
                    # Make links to resources absolute, since these rules will
                    # be folded into a stylesheet at the root
                    replaceUrls(stylesheet, item.abshref,
                            ignoreImportRules=True)
                    stylesheets.append(stylesheet)
            elif (elem.tag == base.tag('xhtml', 'link') and elem.get('href') and elem.get(
                    'rel', 'stylesheet').lower() == 'stylesheet' and elem.get(
                    'type', base.CSS_MIME).lower() in base.OEB_STYLES and media_ok(elem.get('media'))
                ):
                href = base.urlnormalize(elem.attrib['href'])
                path = item.abshref(href)
                sitem = oeb.manifest.hrefs.get(path, None)
                if sitem is None:
                    self.logger.warning('Stylesheet %r referenced by file %r '
                                        'not in manifest', path, item.href)
                    continue
                if not hasattr(sitem.data, 'cssRules'):
                    self.logger.warning('Stylesheet %r referenced by file %r '
                                        'is not CSS', path, item.href)
                    continue
                stylesheets.append(sitem.data)
        csses = {'extra_css':extra_css, 'user_css':user_css}
        for w, x in csses.items():
            if x:
                try:
                    text = x
                    stylesheet = parser.parseString(text, href=cssname,
                            validate=False)
                    stylesheets.append(stylesheet)
                except Exception:
                    self.logger.exception('Failed to parse %s, ignoring.', w)
                    self.logger.debug('Bad css: %s', x)

        # using oeb to store the rules, page rule and font face rules
        # and generating them again if opts, profile or stylesheets are different
        if (not hasattr(self.oeb, 'stylizer_rules')) \
            or not self.oeb.stylizer_rules.same_rules(self.opts, self.profile, stylesheets):
            self.oeb.stylizer_rules = StylizerRules(self.opts, self.profile, stylesheets)
        self.rules = self.oeb.stylizer_rules.rules
        self.page_rule = self.oeb.stylizer_rules.page_rule
        self.font_face_rules = self.oeb.stylizer_rules.font_face_rules
        self.flatten_style = self.oeb.stylizer_rules.flatten_style

        self._styles = {}
        pseudo_pat = re.compile(':{1,2}(%s)' % ('|'.join(INAPPROPRIATE_PSEUDO_CLASSES)), re.I)
        select = Select(tree, ignore_inappropriate_pseudo_classes=True)

        for _, _, cssdict, text, _ in self.rules:
            fl = pseudo_pat.search(text)
            try:
                matches = tuple(select(text))
            except SelectorError as err:
                self.logger.error('Ignoring CSS rule with invalid selector: '
                                  '%r (%s)', text, err)
                continue

            if fl is not None:
                fl = fl.group(1)
                if fl == 'first-letter' and getattr(self.oeb,
                        'plumber_output_format', '').lower() in {'mobi', 'docx'}:
                    # Fake first-letter
                    for elem in matches:
                        for x in elem.iter('*'):
                            if x.text:
                                punctuation_chars = []
                                text = str(x.text)
                                while text:
                                    category = unicodedata.category(text[0])
                                    if category[0] not in {'P', 'Z'}:
                                        break
                                    punctuation_chars.append(text[0])
                                    text = text[1:]

                                special_text = ''.join(punctuation_chars) + \
                                        (text[0] if text else '')
                                span = x.makeelement('{%s}span' %
                                                     const.XHTML_NS)
                                span.text = special_text
                                span.set('data-fake-first-letter', '1')
                                span.tail = text[1:]
                                x.text = None
                                x.insert(0, span)
                                self.style(span)._update_cssdict(cssdict)
                                break
                else:  # Element pseudo-class
                    for elem in matches:
                        self.style(elem)._update_pseudo_class(fl, cssdict)
            else:
                for elem in matches:
                    self.style(elem)._update_cssdict(cssdict)
        for elem in base.xpath(tree, '//h:*[@style]'):
            self.style(elem)._apply_style_attr(url_replacer=item.abshref)
        num_pat = re.compile(r'[0-9.]+$')
        for elem in base.xpath(tree, '//h:img[@width or @height]'):
            style = self.style(elem)
            # Check if either height or width is not default
            is_styled = style._style.get('width', 'auto') != 'auto' or \
                    style._style.get('height', 'auto') != 'auto'
            if not is_styled:
                # Update img style dimension using width and height
                upd = {}
                for prop in ('width', 'height'):
                    val = elem.get(prop, '').strip()
                    try:
                        del elem.attrib[prop]
                    except:
                        pass
                    if val:
                        if num_pat.match(val) is not None:
                            val += 'px'
                        upd[prop] = val
                if upd:
                    style._update_cssdict(upd)

コード例 #16

0

ファイルを表示

    def _toc_from_navpoint(self, item, toc, navpoint):
        children = base.xpath(navpoint, 'ncx:navPoint')
        for child in children:
            title = ''.join(base.xpath(child, 'ncx:navLabel/ncx:text/text()'))
            title = base.COLLAPSE_RE.sub(' ', title.strip())
            href = base.xpath(child, 'ncx:content/@src')
            if not title:
                self._toc_from_navpoint(item, toc, child)
                continue
            if (not href
                    or not href[0]) and not base.xpath(child, 'ncx:navPoint'):
                # This node is useless
                continue
            if href and href[0]:
                href = item.abshref(base.urlnormalize(href[0]))
            else:
                href = ''
            path, _ = urllib.parse.urldefrag(href)
            if path and path not in self.oeb.manifest.hrefs:
                path = base.urlnormalize(path)
            if href and path not in self.oeb.manifest.hrefs:
                self.logger.warn('TOC reference %r not found' % href)
                gc = base.xpath(child, 'ncx:navPoint')
                if not gc:
                    # This node is useless
                    continue
            id = child.get('id')
            klass = child.get('class', 'chapter')

            try:
                po = int(child.get('playOrder',
                                   self.oeb.toc.next_play_order()))
            except Exception:
                po = self.oeb.toc.next_play_order()

            authorElement = base.xpath(
                child, 'descendant::calibre:meta[@name = "author"]')
            if authorElement:
                author = authorElement[0].text
            else:
                author = None

            descriptionElement = base.xpath(
                child, 'descendant::calibre:meta[@name = '
                '"description"]')
            if descriptionElement:
                description = etree.tostring(descriptionElement[0],
                                             method='text',
                                             encoding='unicode').strip()
                if not description:
                    description = None
            else:
                description = None

            index_image = base.xpath(
                child, 'descendant::calibre:meta[@name = '
                '"toc_thumbnail"]')
            toc_thumbnail = (index_image[0].text if index_image else None)
            if not toc_thumbnail or not toc_thumbnail.strip():
                toc_thumbnail = None

            node = toc.add(title,
                           href,
                           id=id,
                           klass=klass,
                           play_order=po,
                           description=description,
                           author=author,
                           toc_thumbnail=toc_thumbnail)

            self._toc_from_navpoint(item, node, child)