def check_opf(container): errors = [] opf_version = container.opf_version_parsed if container.opf.tag != OPF('package'): err = BaseError(_('The OPF does not have the correct root element'), container.opf_name, container.opf.sourceline) err.HELP = xml(_( 'The opf must have the root element <package> in namespace {0}, like this: <package xmlns="{0}">')).format(OPF2_NS) errors.append(err) elif container.opf.get('version') is None and container.book_type == 'epub': err = BaseError(_('The OPF does not have a version'), container.opf_name, container.opf.sourceline) err.HELP = xml(_( 'The <package> tag in the OPF must have a version attribute. This is usually version="2.0" for EPUB2 and AZW3 and version="3.0" for EPUB3')) errors.append(err) for tag in ('metadata', 'manifest', 'spine'): if not container.opf_xpath('/opf:package/opf:' + tag): errors.append(MissingSection(container.opf_name, tag)) all_ids = set(container.opf_xpath('//*/@id')) if '' in all_ids: for empty_id_tag in container.opf_xpath('//*[@id=""]'): errors.append(EmptyID(container.opf_name, empty_id_tag.sourceline)) all_ids.discard('') for elem in container.opf_xpath('//*[@idref]'): if elem.get('idref') not in all_ids: errors.append(IncorrectIdref(container.opf_name, elem.get('idref'), elem.sourceline)) nl_items = [elem.sourceline for elem in container.opf_xpath('//opf:spine/opf:itemref[@linear="no"]')] if nl_items: errors.append(NonLinearItems(container.opf_name, nl_items)) seen, dups = {}, {} for item in container.opf_xpath('/opf:package/opf:manifest/opf:item'): href = item.get('href', None) if href is None: errors.append(NoHref(container.opf_name, item.get('id', None), item.sourceline)) else: hname = container.href_to_name(href, container.opf_name) if not hname or not container.exists(hname): errors.append(MissingHref(container.opf_name, href, item.sourceline)) if href in seen: if href not in dups: dups[href] = [seen[href]] dups[href].append(item.sourceline) else: seen[href] = item.sourceline errors.extend(DuplicateHref(container.opf_name, eid, locs) for eid, locs in iteritems(dups)) seen, dups = {}, {} for item in container.opf_xpath('/opf:package/opf:spine/opf:itemref[@idref]'): ref = item.get('idref') if ref in seen: if ref not in dups: dups[ref] = [seen[ref]] dups[ref].append(item.sourceline) else: seen[ref] = item.sourceline errors.extend(DuplicateHref(container.opf_name, eid, locs, for_spine=True) for eid, locs in iteritems(dups)) spine = container.opf_xpath('/opf:package/opf:spine[@toc]') if spine: spine = spine[0] mitems = [x for x in container.opf_xpath('/opf:package/opf:manifest/opf:item[@id]') if x.get('id') == spine.get('toc')] if mitems: mitem = mitems[0] if mitem.get('media-type', '') != guess_type('a.ncx'): errors.append(IncorrectToc(container.opf_name, mitem.sourceline, bad_mimetype=mitem.get('media-type'))) else: errors.append(IncorrectToc(container.opf_name, spine.sourceline, bad_idref=spine.get('toc'))) else: spine = container.opf_xpath('/opf:package/opf:spine') if spine: spine = spine[0] ncx = container.manifest_type_map.get(guess_type('a.ncx')) if ncx: ncx_name = ncx[0] rmap = {v:k for k, v in iteritems(container.manifest_id_map)} ncx_id = rmap.get(ncx_name) if ncx_id: errors.append(MissingNCXRef(container.opf_name, spine.sourceline, ncx_id)) if opf_version.major > 2: existing_nav = find_existing_nav_toc(container) if existing_nav is None: errors.append(MissingNav(container.opf_name, 0)) else: toc = parse_nav(container, existing_nav) if len(toc) == 0: errors.append(EmptyNav(existing_nav, 0)) covers = container.opf_xpath('/opf:package/opf:metadata/opf:meta[@name="cover"]') if len(covers) > 0: if len(covers) > 1: errors.append(MultipleCovers(container.opf_name, [c.sourceline for c in covers])) manifest_ids = set(container.opf_xpath('/opf:package/opf:manifest/opf:item/@id')) for cover in covers: if cover.get('content', None) not in manifest_ids: errors.append(IncorrectCover(container.opf_name, cover.sourceline, cover.get('content', ''))) raw = etree.tostring(cover) try: n, c = raw.index(b'name="'), raw.index(b'content="') except ValueError: n = c = -1 if n > -1 and c > -1 and n > c: errors.append(NookCover(container.opf_name, cover.sourceline)) uid = container.opf.get('unique-identifier', None) if uid is None or not container.opf_xpath('/opf:package/opf:metadata/dc:identifier[@id=%r]' % uid): errors.append(NoUID(container.opf_name)) for elem in container.opf_xpath('/opf:package/opf:metadata/dc:identifier'): if not elem.text or not elem.text.strip(): errors.append(EmptyIdentifier(container.opf_name, elem.sourceline)) for item, name, linear in container.spine_iter: mt = container.mime_map[name] if mt != XHTML_MIME: iid = item.get('idref', None) lnum = None if iid: mitem = container.opf_xpath('/opf:package/opf:manifest/opf:item[@id=%r]' % iid) if mitem: lnum = mitem[0].sourceline else: iid = None errors.append(BadSpineMime(name, iid, mt, lnum, container.opf_name)) return errors
def check_opf(container): errors = [] opf_version = container.opf_version_parsed if container.opf.tag != OPF('package'): err = BaseError(_('The OPF does not have the correct root element'), container.opf_name, container.opf.sourceline) err.HELP = xml(_( 'The opf must have the root element <package> in namespace {0}, like this: <package xmlns="{0}">')).format(OPF2_NS) errors.append(err) elif container.opf.get('version') is None and container.book_type == 'epub': err = BaseError(_('The OPF does not have a version'), container.opf_name, container.opf.sourceline) err.HELP = xml(_( 'The <package> tag in the OPF must have a version attribute. This is usually version="2.0" for EPUB2 and AZW3 and version="3.0" for EPUB3')) errors.append(err) for tag in ('metadata', 'manifest', 'spine'): if not container.opf_xpath('/opf:package/opf:' + tag): errors.append(MissingSection(container.opf_name, tag)) all_ids = set(container.opf_xpath('//*/@id')) for elem in container.opf_xpath('//*[@idref]'): if elem.get('idref') not in all_ids: errors.append(IncorrectIdref(container.opf_name, elem.get('idref'), elem.sourceline)) nl_items = [elem.sourceline for elem in container.opf_xpath('//opf:spine/opf:itemref[@linear="no"]')] if nl_items: errors.append(NonLinearItems(container.opf_name, nl_items)) seen, dups = {}, {} for item in container.opf_xpath('/opf:package/opf:manifest/opf:item'): href = item.get('href', None) if href is None: errors.append(NoHref(container.opf_name, item.get('id', None), item.sourceline)) else: hname = container.href_to_name(href, container.opf_name) if not hname or not container.exists(hname): errors.append(MissingHref(container.opf_name, href, item.sourceline)) if href in seen: if href not in dups: dups[href] = [seen[href]] dups[href].append(item.sourceline) else: seen[href] = item.sourceline errors.extend(DuplicateHref(container.opf_name, eid, locs) for eid, locs in dups.iteritems()) seen, dups = {}, {} for item in container.opf_xpath('/opf:package/opf:spine/opf:itemref[@idref]'): ref = item.get('idref') if ref in seen: if ref not in dups: dups[ref] = [seen[ref]] dups[ref].append(item.sourceline) else: seen[ref] = item.sourceline errors.extend(DuplicateHref(container.opf_name, eid, locs, for_spine=True) for eid, locs in dups.iteritems()) spine = container.opf_xpath('/opf:package/opf:spine[@toc]') if spine: spine = spine[0] mitems = [x for x in container.opf_xpath('/opf:package/opf:manifest/opf:item[@id]') if x.get('id') == spine.get('toc')] if mitems: mitem = mitems[0] if mitem.get('media-type', '') != guess_type('a.ncx'): errors.append(IncorrectToc(container.opf_name, mitem.sourceline, bad_mimetype=mitem.get('media-type'))) else: errors.append(IncorrectToc(container.opf_name, spine.sourceline, bad_idref=spine.get('toc'))) else: spine = container.opf_xpath('/opf:package/opf:spine') if spine: spine = spine[0] ncx = container.manifest_type_map.get(guess_type('a.ncx')) if ncx: ncx_name = ncx[0] rmap = {v:k for k, v in container.manifest_id_map.iteritems()} ncx_id = rmap.get(ncx_name) if ncx_id: errors.append(MissingNCXRef(container.opf_name, spine.sourceline, ncx_id)) if opf_version.major > 2: existing_nav = find_existing_nav_toc(container) if existing_nav is None: errors.append(MissingNav(container.opf_name, 0)) else: toc = parse_nav(container, existing_nav) if len(toc) == 0: errors.append(EmptyNav(existing_nav, 0)) covers = container.opf_xpath('/opf:package/opf:metadata/opf:meta[@name="cover"]') if len(covers) > 0: if len(covers) > 1: errors.append(MultipleCovers(container.opf_name, [c.sourceline for c in covers])) manifest_ids = set(container.opf_xpath('/opf:package/opf:manifest/opf:item/@id')) for cover in covers: if cover.get('content', None) not in manifest_ids: errors.append(IncorrectCover(container.opf_name, cover.sourceline, cover.get('content', ''))) raw = etree.tostring(cover) try: n, c = raw.index('name="'), raw.index('content="') except ValueError: n = c = -1 if n > -1 and c > -1 and n > c: errors.append(NookCover(container.opf_name, cover.sourceline)) uid = container.opf.get('unique-identifier', None) if uid is None or not container.opf_xpath('/opf:package/opf:metadata/dc:identifier[@id=%r]' % uid): errors.append(NoUID(container.opf_name)) for elem in container.opf_xpath('/opf:package/opf:metadata/dc:identifier'): if not elem.text or not elem.text.strip(): errors.append(EmptyIdentifier(container.opf_name, elem.sourceline)) for item, name, linear in container.spine_iter: mt = container.mime_map[name] if mt != XHTML_MIME: iid = item.get('idref', None) lnum = None if iid: mitem = container.opf_xpath('/opf:package/opf:manifest/opf:item[@id=%r]' % iid) if mitem: lnum = mitem[0].sourceline else: iid = None errors.append(BadSpineMime(name, iid, mt, lnum, container.opf_name)) return errors