Example #1
0
    def flatten_head(self, item, href, global_href):
        html = item.data
        head = html.find(base.tag('xhtml', 'head'))

        def safe_lower(x):
            try:
                x = x.lower()
            except Exception:
                pass
            return x

        for node in html.xpath(
                '//*[local-name()="style" or local-name()="link"]'):
            if node.tag == base.tag('xhtml', 'link') \
               and safe_lower(node.get('rel', 'stylesheet')) == 'stylesheet' \
               and safe_lower(node.get('type', base.CSS_MIME)) in base.OEB_STYLES:
                node.getparent().remove(node)
            elif node.tag == base.tag('xhtml', 'style') \
                 and node.get('type', base.CSS_MIME) in base.OEB_STYLES:
                node.getparent().remove(node)
        href = item.relhref(href)
        l = etree.SubElement(head,
                             base.tag('xhtml', 'link'),
                             rel='stylesheet',
                             type=base.CSS_MIME,
                             href=href)
        l.tail = '\n'
        if global_href:
            href = item.relhref(global_href)
            l = etree.SubElement(head,
                                 base.tag('xhtml', 'link'),
                                 rel='stylesheet',
                                 type=base.CSS_MIME,
                                 href=href)
            l.tail = '\n'
Example #2
0
 def _clean_opf(self, opf):
     nsmap = {}
     for elem in opf.iter(tag=etree.Element):
         nsmap.update(elem.nsmap)
     for elem in opf.iter(tag=etree.Element):
         if (parse_utils.namespace(elem.tag) in ('', const.OPF1_NS)
                 and ':' not in parse_utils.barename(elem.tag)):
             elem.tag = base.tag('opf', parse_utils.barename(elem.tag))
     nsmap.update(const.OPF2_NSMAP)
     attrib = dict(opf.attrib)
     nroot = etree.Element(base.tag('opf', 'package'),
                           nsmap={None: const.OPF2_NS},
                           attrib=attrib)
     metadata = etree.SubElement(nroot,
                                 base.tag('opf', 'metadata'),
                                 nsmap=nsmap)
     ignored = (base.tag('opf',
                         'dc-metadata'), base.tag('opf', 'x-metadata'))
     for elem in base.xpath(opf, 'o2:metadata//*'):
         if elem.tag in ignored:
             continue
         if parse_utils.namespace(elem.tag) in const.DC_NSES:
             tag = parse_utils.barename(elem.tag).lower()
             elem.tag = '{%s}%s' % (const.DC11_NS, tag)
         if elem.tag.startswith('dc:'):
             tag = elem.tag.partition(':')[-1].lower()
             elem.tag = '{%s}%s' % (const.DC11_NS, tag)
         metadata.append(elem)
     for element in base.xpath(opf, 'o2:metadata//o2:meta'):
         metadata.append(element)
     for tag in ('o2:manifest', 'o2:spine', 'o2:tours', 'o2:guide'):
         for element in base.xpath(opf, tag):
             nroot.append(element)
     return nroot
Example #3
0
 def process_node(html_parent, toc, level=1, indent='  ', style_level=2):
     li = html_parent.makeelement(base.tag('xhtml', 'li'))
     li.tail = '\n' + (indent * level)
     html_parent.append(li)
     name, frag = toc.dest, toc.frag
     href = '#'
     if name:
         href = container.name_to_href(name, toc_name)
         if frag:
             href += '#' + frag
     a = li.makeelement(base.tag('xhtml', 'a'), href=href)
     a.text = toc.title
     li.append(a)
     if len(toc) > 0:
         parent = li.makeelement(base.tag('xhtml', 'ul'))
         parent.set('class', 'level%d' % (style_level))
         li.append(parent)
         a.tail = '\n\n' + (indent * (level + 2))
         parent.text = '\n' + (indent * (level + 3))
         parent.tail = '\n\n' + (indent * (level + 1))
         for child in toc:
             process_node(parent,
                          child,
                          level + 3,
                          style_level=style_level + 1)
         parent[-1].tail = '\n' + (indent * (level + 2))
Example #4
0
def get_nav_landmarks(container):
    nav = find_existing_nav_toc(container)
    if nav and container.has_name(nav):
        root = container.parsed(nav)
        et = base('epub', 'type')
        for elem in root.iterdescendants(base.tag('xhtml', 'nav')):
            if elem.get(et) == 'landmarks':
                for li in elem.iterdescendants(base.tag('xhtml', 'li')):
                    for a in li.iterdescendants(base.tag('xhtml', 'a')):
                        href, rtype = a.get('href'), a.get(et)
                        if href:
                            title = etree.tostring(a,
                                                   method='text',
                                                   encoding='unicode',
                                                   with_tail=False).strip()
                            href, frag = href.partition('#')[::2]
                            name = container.href_to_name(href, nav)
                            if container.has_name(name):
                                yield {
                                    'dest': name,
                                    'frag': frag,
                                    'title': title or '',
                                    'type': rtype or ''
                                }
                            break
Example #5
0
def set_authors(root, prefixes, refines, authors):
    ensure_prefix(root, prefixes, 'marc')
    for item in XPath('./opf:metadata/dc:creator')(root):
        props = properties_for_id_with_scheme(item.get('id'), prefixes,
                                              refines)
        opf_role = item.get(oeb_base.tag('opf', 'role'))
        if ((opf_role and opf_role.lower() != 'aut')
                or (props.get('role') and not is_relators_role(props, 'aut'))):
            continue
        remove_element(item, refines)
    metadata = XPath('./opf:metadata')(root)[0]
    for author in authors:
        if author.name:
            a = metadata.makeelement(oeb_base.tag('dc', 'creator'))
            aid = ensure_id(a)
            a.text = author.name
            metadata.append(a)
            m = metadata.makeelement(oeb_base.tag('opf', 'meta'),
                                     attrib={
                                         'refines': '#' + aid,
                                         'property': 'role',
                                         'scheme': 'marc:relators'
                                     })
            m.text = 'aut'
            metadata.append(m)
            if author.sort:
                m = metadata.makeelement(oeb_base.tag('opf', 'meta'),
                                         attrib={
                                             'refines': '#' + aid,
                                             'property': 'file-as'
                                         })
                m.text = author.sort
                metadata.append(m)
Example #6
0
def set_guide_item(container, item_type, title, name, frag=None):
    ref_tag = base.tag('opf', 'reference')
    href = None
    if name:
        href = container.name_to_href(name, container.opf_name)
        if frag:
            href += '#' + frag

    guides = container.opf_xpath('//opf:guide')
    if not guides and href:
        g = container.opf.makeelement(base.tag('opf', 'guide'),
                                      nsmap={'opf': const.OPF2_NS})
        container.insert_into_xml(container.opf, g)
        guides = [g]

    for guide in guides:
        matches = []
        for child in guide.iterchildren(etree.Element):
            if (child.tag == ref_tag
                    and child.get('type', '').lower() == item_type.lower()):
                matches.append(child)
        if not matches and href:
            r = guide.makeelement(ref_tag,
                                  type=item_type,
                                  nsmap={'opf': const.OPF2_NS})
            container.insert_into_xml(guide, r)
            matches.append(r)
        for m in matches:
            if href:
                m.set('title', title)
                m.set('href', href)
                m.set('type', item_type)
            else:
                container.remove_from_xml(m)
    container.dirty(container.opf_name)
Example #7
0
 def map_resources(self, oeb_book):
     for item in oeb_book.manifest:
         if item.media_type in base.OEB_IMAGES:
             if item.href not in self.images:
                 ext = os.path.splitext(item.href)[1]
                 fname = '%s%s' % (len(self.images), ext)
                 fname = fname.zfill(10)
                 self.images[item.href] = fname
         if item in oeb_book.spine:
             self.get_link_id(item.href)
             root = item.data.find(base.tag('xhtml', 'body'))
             link_attrs = set(html.defs.link_attrs)
             link_attrs.add(base.tag('xlink', 'href'))
             for el in root.iter():
                 attribs = el.attrib
                 try:
                     if not isinstance(el.tag, (str, bytes)):
                         continue
                 except:
                     continue
                 for attr in attribs:
                     if attr in link_attrs:
                         href = item.abshref(attribs[attr])
                         href, id = urllib.parse.urldefrag(href)
                         if href in self.base_hrefs:
                             self.get_link_id(href, id)
    def upshift_markup(self):  # {{{
        'Upgrade markup to comply with XHTML 1.1 where possible'
        for x in self.oeb.spine:
            root = x.data
            if (not root.get(base.tag('xml', 'lang'))) and (root.get('lang')):
                root.set(base.tag('xml', 'lang'), root.get('lang'))
            body = base.XPath('//h:body')(root)
            if body:
                body = body[0]

            if not hasattr(body, 'xpath'):
                continue
            for u in base.XPath('//h:u')(root):
                u.tag = 'span'

            seen_ids, seen_names = set(), set()
            for x in base.XPath('//*[@id or @name]')(root):
                eid, name = x.get('id', None), x.get('name', None)
                if eid:
                    if eid in seen_ids:
                        del x.attrib['id']
                    else:
                        seen_ids.add(eid)
                if name:
                    if name in seen_names:
                        del x.attrib['name']
                    else:
                        seen_names.add(name)
Example #9
0
def linearize_jacket(oeb):
    for x in oeb.spine[:4]:
        if XPath(JACKET_XPATH)(x.data):
            for e in XPath('//h:table|//h:tr|//h:th')(x.data):
                e.tag = base.tag('xhtml', 'div')
            for e in XPath('//h:td')(x.data):
                e.tag = base.tag('xhtml', 'span')
            break
Example #10
0
 def add_toc_level(self, elem, toc):
     for node in toc:
         block = base.element(elem, base.tag('xhtml', 'div'),
                              attrib={'class': 'calibre_toc_block'})
         line = base.element(block, base.tag('xhtml', 'a'),
                             attrib={'href': node.href,
                                     'class': 'calibre_toc_line'})
         line.text = node.title
         self.add_toc_level(block, node)
Example #11
0
 def create_li(ol, entry):
     li = ol.makeelement(base.tag('xhtml', 'li'))
     ol.append(li)
     a = li.makeelement(base.tag('xhtml', 'a'))
     li.append(a)
     href = container.name_to_href(entry['dest'], tocname)
     if entry['frag']:
         href += '#' + entry['frag']
     a.set('href', href)
     return a
Example #12
0
 def mobimlize_spine(self):
     'Iterate over the spine and convert it to MOBIML'
     for item in self.oeb.spine:
         stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, self.profile)
         body = item.data.find(base.tag('xhtml', 'body'))
         nroot = etree.Element(base.tag('xhtml', 'html'), nsmap=MOBI_NSMAP)
         nbody = etree.SubElement(nroot, base.tag('xhtml', 'body'))
         self.current_spine_item = item
         self.mobimlize_elem(body, stylizer, BlockState(nbody),
                             [FormatState()])
         item.data = nroot
Example #13
0
    def detect_chapters(self):
        self.detected_chapters = []
        self.chapter_title_attribute = None

        def find_matches(expr, doc):
            try:
                ans = XPath(expr)(doc)
                len(ans)
                return ans
            except Exception:
                self.log.warn('Invalid chapter expression, ignoring: %s' %
                              expr)
                return []

        if self.opts.chapter:
            chapter_path, title_attribute = (
                self.get_toc_parts_for_xpath(self.opts.chapter))
            self.chapter_title_attribute = title_attribute
            for item in self.oeb.spine:
                for x in find_matches(chapter_path, item.data):
                    self.detected_chapters.append((item, x))

            chapter_mark = self.opts.chapter_mark
            page_break_before = 'display: block; page-break-before: always'
            page_break_after = 'display: block; page-break-after: always'
            c = collections.Counter()
            for item, elem in self.detected_chapters:
                c[item] += 1
                text = base.xml2text(elem).strip()
                text = re.sub(r'\s+', ' ', text.strip())
                self.log('\tDetected chapter:', text[:50])
                if chapter_mark == 'none':
                    continue
                if chapter_mark == 'rule':
                    mark = elem.makeelement(base.tag('xhtml', 'hr'))
                elif chapter_mark == 'pagebreak':
                    if c[item] < 3 and at_start(elem):
                        # For the first two elements in this item, check if
                        # they are at the start of the file, in which case
                        # inserting a page break in unnecessary and can lead
                        # to extra blank pages in the PDF Output plugin. We
                        # need to use two as feedbooks epubs match both a
                        # heading tag and its containing div with the default
                        # chapter expression.
                        continue
                    mark = elem.makeelement(base.tag('xhtml', 'div'),
                                            style=page_break_after)
                else:  # chapter_mark == 'both':
                    mark = elem.makeelement(base.tag('xhtml', 'hr'),
                                            style=page_break_before)
                try:
                    elem.addprevious(mark)
                except TypeError:
                    self.log.exception('Failed to mark chapter')
Example #14
0
def create_manifest_item(root, href_template, id_template, media_type=None):
    all_ids = frozenset(root.xpath('//*/@id'))
    all_hrefs = frozenset(root.xpath('//*/@href'))
    href = ensure_unique(href_template, all_hrefs)
    item_id = ensure_unique(id_template, all_ids)
    manifest = root.find(base.tag('opf', 'manifest'))
    if manifest is not None:
        i = manifest.makeelement(base.tag('opf', 'item'))
        i.set('href', href), i.set('id', item_id)
        i.set('media-type', media_type or guess_type(href_template))
        manifest.append(i)
        return i
Example #15
0
 def mangle_spine(self):
     id, href = self.oeb.manifest.generate('manglecase', 'manglecase.css')
     self.oeb.manifest.add(id, href, base.CSS_MIME, data=CASE_MANGLER_CSS)
     for item in self.oeb.spine:
         html = item.data
         relhref = item.relhref(href)
         etree.SubElement(html.find(base.tag('xhtml', 'head')),
                          base.tag('xhtml', 'link'), rel='stylesheet',
                          href=relhref, type=base.CSS_MIME)
         stylizer = Stylizer(html, item.href, self.oeb, self.opts,
                             self.profile)
         self.mangle_elem(html.find(base.tag('xhtml', 'body')), stylizer)
Example #16
0
def add_from_li(container, li, parent, nav_name):
    dest = frag = text = None
    for x in li.iterchildren(base.tag('xhtml', 'a'), base.tag('xhtml',
                                                              'span')):
        text = (etree.tostring(
            x, method='text', encoding='unicode', with_tail=False).strip()
                or ' '.join(x.xpath('descendant-or-self::*/@title')).strip())
        href = x.get('href')
        if href:
            dest = (nav_name if href.startswith('#') else
                    container.href_to_name(href, base=nav_name))
            frag = urllib.parse.urlparse(href).fragment or None
        break
    return parent.add(text or None, dest or None, frag or None)
Example #17
0
def set_identifiers(root,
                    prefixes,
                    refines,
                    new_identifiers,
                    force_identifiers=False):
    uid = root.get('unique-identifier')
    package_identifier = None
    for ident in XPath('./opf:metadata/dc:identifier')(root):
        if uid is not None and uid == ident.get('id'):
            package_identifier = ident
            continue
        val = (ident.text or '').strip()
        if not val:
            ident.getparent().remove(ident)
            continue
        scheme, val = parse_identifier(ident, val, refines)
        if (not scheme or not val or force_identifiers
                or scheme in new_identifiers):
            remove_element(ident, refines)
            continue
    metadata = XPath('./opf:metadata')(root)[0]
    for scheme, val in new_identifiers.items():
        ident = metadata.makeelement(oeb_base.tag('dc', 'ident'))
        ident.text = '%s:%s' % (scheme, val)
        if package_identifier is None:
            metadata.append(ident)
        else:
            p = package_identifier.getparent()
            p.insert(p.index(package_identifier), ident)
Example #18
0
def create_rating(root, prefixes, val):
    ensure_prefix(root, prefixes, 'calibre', CALIBRE_PREFIX)
    m = XPath('./opf:metadata')(root)[0]
    d = m.makeelement(oeb_base.tag('opf', 'meta'),
                      attrib={'property': 'calibre:rating'})
    d.text = val
    m.append(d)
Example #19
0
def read_authors(root, prefixes, refines):
    roled_authors, unroled_authors = [], []

    def author(item, props, val):
        aus = None
        file_as = props.get('file-as')
        if file_as:
            aus = file_as[0][-1]
        else:
            aus = item.get(oeb_base.tag('opf', 'file_as')) or None
        return Author(normalize_whitespace(val), normalize_whitespace(aus))

    for item in XPath('./opf:metadata/dc:creator')(root):
        val = (item.text or '').strip()
        if val:
            props = properties_for_id_with_scheme(item.get('id'), prefixes,
                                                  refines)
            role = props.get('role')
            opf_role = item.get(oeb_base.tag('opf', 'role'))
            if role:
                if is_relators_role(props, 'aut'):
                    roled_authors.append(author(item, props, val))
            elif opf_role:
                if opf_role.lower() == 'aut':
                    roled_authors.append(author(item, props, val))
            else:
                unroled_authors.append(author(item, props, val))

    return uniq(roled_authors or unroled_authors)
Example #20
0
 def author(item, props, val):
     aus = None
     file_as = props.get('file-as')
     if file_as:
         aus = file_as[0][-1]
     else:
         aus = item.get(oeb_base.tag('opf', 'file_as')) or None
     return Author(normalize_whitespace(val), normalize_whitespace(aus))
Example #21
0
def create_series(root, refines, series, series_index):
    m = XPath('./opf:metadata')(root)[0]
    d = m.makeelement(oeb_base.tag('opf', 'meta'),
                      attrib={'property': 'belongs-to-collection'})
    d.text = series
    m.append(d)
    set_refines(d, refines, refdef('collection-type', 'series'),
                refdef('group-position', series_index))
Example #22
0
def set_pubdate(root, prefixes, refines, val):
    for date in XPath('./opf:metadata/dc:date')(root):
        remove_element(date, refines)
    if not is_date_undefined(val):
        val = isoformat(val)
        m = XPath('./opf:metadata')(root)[0]
        d = m.makeelement(oeb_base.tag('dc', 'date'))
        d.text = val
        m.append(d)
Example #23
0
 def process_node(xml_parent, toc_parent):
     for child in toc_parent:
         play_order['c'] += 1
         point = etree.SubElement(xml_parent,
                                  base.tag('ncx', 'navPoint'),
                                  id='num_%d' % play_order['c'],
                                  playOrder=str(play_order['c']))
         label = etree.SubElement(point, base.tag('ncx', 'navLabel'))
         title = child.title
         if title:
             title = spat.sub(' ', title)
         etree.SubElement(label, base.tag('ncx', 'text')).text = title
         if child.dest:
             href = to_href(child.dest)
             if child.frag:
                 href += '#' + child.frag
             etree.SubElement(point, base.tag('ncx', 'content'), src=href)
         process_node(point, child)
Example #24
0
def set_comments(root, prefixes, refines, val):
    for dc in XPath('./opf:metadata/dc:description')(root):
        remove_element(dc, refines)
    m = XPath('./opf:metadata')(root)[0]
    if val:
        val = val.strip()
        if val:
            c = m.makeelement(oeb_base.tag('dc', 'desc'))
            c.text = val
            m.append(c)
    def convert_metadata(self, oeb):

        package = etree.Element(base.tag('opf', 'package'),
                                attrib={'version': '2.0'},
                                nsmap={None: const.OPF2_NS})
        oeb.metadata.to_opf2(package)
        self.mi = opf_meta.OPF(io.BytesIO(etree.tostring(package,
                                                         encoding='utf-8')),
                               populate_spine=False,
                               try_to_guess_cover=False).to_book_metadata()
Example #26
0
def set_publisher(root, prefixes, refines, val):
    for dc in XPath('./opf:metadata/dc:publisher')(root):
        remove_element(dc, refines)
    m = XPath('./opf:metadata')(root)[0]
    if val:
        val = val.strip()
        if val:
            c = m.makeelement(oeb_base.tag('dc', 'publisher'))
            c.text = normalize_whitespace(val)
            m.append(c)
Example #27
0
 def process_node(xml_parent, toc_parent):
     for child in toc_parent:
         li = xml_parent.makeelement(base.tag('xhtml', 'li'))
         xml_parent.append(li)
         title = child.title or ''
         title = spat.sub(' ', title).strip()
         a = li.makeelement(base.tag('xhtml',
                                     'a' if child.dest else 'span'))
         a.text = title
         li.append(a)
         if child.dest:
             href = to_href(child.dest)
             if child.frag:
                 href += '#' + child.frag
             a.set('href', href)
         if len(child):
             ol = li.makeelement(base.tag('xhtml', 'ol'))
             li.append(ol)
             process_node(ol, child)
Example #28
0
def create_timestamp(root, prefixes, m, val):
    if not is_date_undefined(val):
        ensure_prefix(root, prefixes, 'calibre', CALIBRE_PREFIX)
        ensure_prefix(root, prefixes, 'dcterms')
        val = w3cdtf(val)
        d = m.makeelement(oeb_base.tag('opf', 'meta'),
                          attrib={'property': 'calibre:timestamp',
                                  'scheme': 'dcterms:W3CDTF'})
        d.text = val
        m.append(d)
Example #29
0
def ensure_single_nav_of_type(root, ntype='toc'):
    et = base('epub', 'type')
    navs = [
        n for n in root.iterdescendants(base.tag('xhtml', 'nav'))
        if n.get(et) == ntype
    ]
    for x in navs[1:]:
        extract(x)
    if navs:
        nav = navs[0]
        tail = nav.tail
        attrib = dict(nav.attrib)
        nav.clear()
        nav.attrib.update(attrib)
        nav.tail = tail
    else:
        nav = root.makeelement(base.tag('xhtml', 'nav'))
        first_child(root, base.tag('xhtml', 'body')).append(nav)
    nav.set(et, ntype)
    return nav
Example #30
0
 def dataize_svg(self, item, svg=None):
     if svg is None:
         svg = item.data
     hrefs = self.oeb.manifest.hrefs
     for elem in xpath(svg, '//svg:*[@xl:href]'):
         href = urlnormalize(elem.attrib[base.tag('xlink', 'href')])
         path = urllib.parse.urldefrag(href)[0]
         if not path:
             continue
         abshref = item.abshref(path)
         if abshref not in hrefs:
             continue
         linkee = hrefs[abshref]
         data = linkee.bytes_representation
         ext = what(None, data) or 'jpg'
         with PersistentTemporaryFile(suffix='.' + ext) as pt:
             pt.write(data)
             self.temp_files.append(pt.name)
         elem.attrib[base.tag('xlink', 'href')] = pt.name
     return svg