Example #1
0
def run_checks(container):

    errors = []

    # Check parsing
    xml_items, html_items, raster_images, stylesheets = [], [], [], []
    for name, mt in container.mime_map.iteritems():
        items = None
        if mt in XML_TYPES:
            items = xml_items
        elif mt in OEB_DOCS:
            items = html_items
        elif mt in OEB_STYLES:
            items = stylesheets
        elif is_raster_image(mt):
            items = raster_images
        if items is not None:
            items.append((name, mt, container.open(name, "rb").read()))
    errors.extend(run_checkers(check_html_size, html_items))
    errors.extend(run_checkers(check_xml_parsing, xml_items))
    errors.extend(run_checkers(check_xml_parsing, html_items))
    errors.extend(run_checkers(check_raster_images, raster_images))

    for err in errors:
        if err.level > WARN:
            return errors

    # cssutils is not thread safe
    for name, mt, raw in stylesheets:
        if not raw:
            errors.append(EmptyFile(name))
            continue
        errors.extend(check_css_parsing(name, raw))

    for name, mt, raw in html_items + xml_items:
        errors.extend(check_encoding_declarations(name, container))

    for name, mt, raw in html_items:
        if not raw:
            continue
        root = container.parsed(name)
        for style in root.xpath('//*[local-name()="style"]'):
            if style.get("type", "text/css") == "text/css" and style.text:
                errors.extend(check_css_parsing(name, style.text, line_offset=style.sourceline - 1))
        for elem in root.xpath("//*[@style]"):
            raw = elem.get("style")
            if raw:
                errors.extend(check_css_parsing(name, raw, line_offset=elem.sourceline - 1, is_declaration=True))

    errors += check_mimetypes(container)
    errors += check_links(container) + check_link_destinations(container)
    errors += check_fonts(container)
    errors += check_filenames(container)
    errors += check_ids(container)
    errors += check_markup(container)
    errors += check_opf(container)

    return errors
Example #2
0
def run_checks(container):

    errors = []

    # Check parsing
    xml_items, html_items, raster_images, stylesheets = [], [], [], []
    for name, mt in container.mime_map.iteritems():
        items = None
        if mt in XML_TYPES:
            items = xml_items
        elif mt in OEB_DOCS:
            items = html_items
        elif mt in OEB_STYLES:
            items = stylesheets
        elif is_raster_image(mt):
            items = raster_images
        if items is not None:
            items.append((name, mt, container.open(name, 'rb').read()))
    errors.extend(run_checkers(check_html_size, html_items))
    errors.extend(run_checkers(check_xml_parsing, xml_items))
    errors.extend(run_checkers(check_xml_parsing, html_items))
    errors.extend(run_checkers(check_raster_images, raster_images))

    for err in errors:
        if err.level > WARN:
            return errors

    # cssutils is not thread safe
    for name, mt, raw in stylesheets:
        if not raw:
            errors.append(EmptyFile(name))
            continue
        errors.extend(check_css_parsing(name, raw))

    for name, mt, raw in html_items + xml_items:
        errors.extend(check_encoding_declarations(name, container))

    for name, mt, raw in html_items:
        if not raw:
            continue
        root = container.parsed(name)
        for style in root.xpath('//*[local-name()="style"]'):
            if style.get('type', 'text/css') == 'text/css' and style.text:
                errors.extend(check_css_parsing(name, style.text, line_offset=style.sourceline - 1))
        for elem in root.xpath('//*[@style]'):
            raw = elem.get('style')
            if raw:
                errors.extend(check_css_parsing(name, raw, line_offset=elem.sourceline - 1, is_declaration=True))

    errors += check_mimetypes(container)
    errors += check_links(container) + check_link_destinations(container)
    errors += check_fonts(container)
    errors += check_ids(container)
    errors += check_filenames(container)
    errors += check_markup(container)
    errors += check_opf(container)

    return errors
Example #3
0
    def fix_opf(self, container):
        spine_names = {n for n, l in container.spine_names}
        spine = container.opf_xpath('//opf:spine')[0]
        rmap = {v: k for k, v in iteritems(container.manifest_id_map)}
        # Add unreferenced text files to the spine
        for name, mt in iteritems(container.mime_map):
            if mt in OEB_DOCS and name not in spine_names:
                spine_names.add(name)
                container.insert_into_xml(
                    spine, spine.makeelement(OPF('itemref'), idref=rmap[name]))

        # Remove duplicate entries from spine
        seen = set()
        for item, name, linear in container.spine_iter:
            if name in seen:
                container.remove_from_xml(item)
            seen.add(name)

        # Remove the <guide> which is not needed in EPUB 3
        for guide in container.opf_xpath('//*[local-name()="guide"]'):
            guide.getparent().remove(guide)

        # Ensure that the cover-image property is set
        cover_id = rmap['_static/' + self.config.epub_cover[0]]
        for item in container.opf_xpath(
                '//opf:item[@id="{}"]'.format(cover_id)):
            item.set('properties', 'cover-image')
        for item in container.opf_xpath(
                '//opf:item[@href="epub-cover.xhtml"]'):
            item.set('properties', 'svg calibre:title-page')
        for item in container.opf_xpath('//opf:package'):
            prefix = item.get('prefix') or ''
            if prefix:
                prefix += ' '
            item.set('prefix', prefix + 'calibre: https://calibre-ebook.com')

        # Remove any <meta cover> tag as it is not needed in epub 3
        for meta in container.opf_xpath('//opf:meta[@name="cover"]'):
            meta.getparent().remove(meta)

        # Remove unreferenced files
        for error in check_links(container):
            if error.__class__ is UnreferencedResource:
                container.remove_item(error.name)

        # Pretty print the OPF
        pretty_opf(container.parsed(container.opf_name))
        container.dirty(container.opf_name)
Example #4
0
def run_checks(container):

    errors = []

    # Check parsing
    xml_items, html_items, raster_images, stylesheets = [], [], [], []
    for name, mt in container.mime_map.iteritems():
        items = None
        if mt in XML_TYPES:
            items = xml_items
        elif mt in OEB_DOCS:
            items = html_items
        elif mt in OEB_STYLES:
            items = stylesheets
        elif is_raster_image(mt):
            items = raster_images
        if items is not None:
            items.append((name, mt, container.open(name, 'rb').read()))
    errors.extend(run_checkers(check_xml_parsing, xml_items))
    errors.extend(run_checkers(check_xml_parsing, html_items))
    errors.extend(run_checkers(check_raster_images, raster_images))

    # cssutils is not thread safe
    for name, mt, raw in stylesheets:
        errors.extend(check_css_parsing(name, raw))
    for name, mt, raw in html_items:
        root = container.parsed(name)
        for style in root.xpath('//*[local-name()="style"]'):
            if style.get('type', 'text/css') == 'text/css':
                errors.extend(
                    check_css_parsing(name,
                                      style.text,
                                      line_offset=style.sourceline - 1))
        for elem in root.xpath('//*[@style]'):
            raw = elem.get('style')
            if raw:
                errors.extend(
                    check_css_parsing(name,
                                      raw,
                                      line_offset=elem.sourceline - 1,
                                      is_declaration=True))

    errors += check_links(container)
    errors += check_fonts(container)

    return errors
Example #5
0
def run_checks(container):

    errors = []

    # Check parsing
    xml_items, html_items, raster_images, stylesheets = [], [], [], []
    for name, mt in container.mime_map.iteritems():
        items = None
        if mt in XML_TYPES:
            items = xml_items
        elif mt in OEB_DOCS:
            items = html_items
        elif mt in OEB_STYLES:
            items = stylesheets
        elif is_raster_image(mt):
            items = raster_images
        if items is not None:
            items.append((name, mt, container.open(name, 'rb').read()))
    errors.extend(run_checkers(check_html_size, html_items))
    errors.extend(run_checkers(check_xml_parsing, xml_items))
    errors.extend(run_checkers(check_xml_parsing, html_items))
    errors.extend(run_checkers(check_raster_images, raster_images))

    # cssutils is not thread safe
    for name, mt, raw in stylesheets:
        errors.extend(check_css_parsing(name, raw))
    for name, mt, raw in html_items:
        root = container.parsed(name)
        for style in root.xpath('//*[local-name()="style"]'):
            if style.get('type', 'text/css') == 'text/css' and style.text:
                errors.extend(check_css_parsing(name, style.text, line_offset=style.sourceline - 1))
        for elem in root.xpath('//*[@style]'):
            raw = elem.get('style')
            if raw:
                errors.extend(check_css_parsing(name, raw, line_offset=elem.sourceline - 1, is_declaration=True))

    errors += check_mimetypes(container)
    errors += check_links(container) + check_link_destinations(container)
    errors += check_fonts(container)
    errors += check_filenames(container)
    errors += check_ids(container)
    errors += check_opf(container)

    return errors
Example #6
0
    def fix_opf(self, container):
        spine_names = {n for n, l in container.spine_names}
        spine = container.opf_xpath('//opf:spine')[0]
        rmap = {v: k for k, v in container.manifest_id_map.iteritems()}
        # Add unreferenced text files to the spine
        for name, mt in container.mime_map.iteritems():
            if mt in OEB_DOCS and name not in spine_names:
                spine_names.add(name)
                container.insert_into_xml(
                    spine, spine.makeelement(OPF('itemref'), idref=rmap[name]))

        # Remove duplicate entries from spine
        seen = set()
        for item, name, linear in container.spine_iter:
            if name in seen:
                container.remove_from_xml(item)
            seen.add(name)

        # Ensure that the meta cover tag is correct
        cover_id = rmap['_static/' + self.config.epub_cover[0]]
        for meta in container.opf_xpath('//opf:meta[@name="cover"]'):
            meta.set('content', cover_id)

        # Add description metadata
        metadata = container.opf_xpath('//opf:metadata')[0]
        container.insert_into_xml(metadata,
                                  metadata.makeelement(DC('description')))
        metadata[-1].text = 'Comprehensive documentation for calibre'

        # Remove search.html since it is useless in EPUB
        container.remove_item('search.html')

        # Remove unreferenced files
        for error in check_links(container):
            if error.__class__ is UnreferencedResource:
                container.remove_item(error.name)

        # Pretty print the OPF
        pretty_opf(container.parsed(container.opf_name))
        container.dirty(container.opf_name)
Example #7
0
    def fix_opf(self, container):
        spine_names = {n for n, l in container.spine_names}
        spine = container.opf_xpath('//opf:spine')[0]
        rmap = {v:k for k, v in container.manifest_id_map.iteritems()}
        # Add unreferenced text files to the spine
        for name, mt in container.mime_map.iteritems():
            if mt in OEB_DOCS and name not in spine_names:
                spine_names.add(name)
                container.insert_into_xml(spine, spine.makeelement(OPF('itemref'), idref=rmap[name]))

        # Remove duplicate entries from spine
        seen = set()
        for item, name, linear in container.spine_iter:
            if name in seen:
                container.remove_from_xml(item)
            seen.add(name)

        # Ensure that the meta cover tag is correct
        cover_id = rmap['_static/' + self.config.epub_cover[0]]
        for meta in container.opf_xpath('//opf:meta[@name="cover"]'):
            meta.set('content', cover_id)

        # Add description metadata
        metadata = container.opf_xpath('//opf:metadata')[0]
        container.insert_into_xml(metadata, metadata.makeelement(DC('description')))
        metadata[-1].text = 'Comprehensive documentation for calibre'

        # Remove search.html since it is useless in EPUB
        container.remove_item('search.html')

        # Remove unreferenced files
        for error in check_links(container):
            if error.__class__ is UnreferencedResource:
                container.remove_item(error.name)

        # Pretty print the OPF
        pretty_opf(container.parsed(container.opf_name))
        container.dirty(container.opf_name)
Example #8
0
    def fix_opf(self, container):
        spine_names = {n for n, l in container.spine_names}
        spine = container.opf_xpath('//opf:spine')[0]
        rmap = {v:k for k, v in iteritems(container.manifest_id_map)}
        # Add unreferenced text files to the spine
        for name, mt in iteritems(container.mime_map):
            if mt in OEB_DOCS and name not in spine_names:
                spine_names.add(name)
                container.insert_into_xml(spine, spine.makeelement(OPF('itemref'), idref=rmap[name]))

        # Remove duplicate entries from spine
        seen = set()
        for item, name, linear in container.spine_iter:
            if name in seen:
                container.remove_from_xml(item)
            seen.add(name)

        # Remove the <guide> which is not needed in EPUB 3
        for guide in container.opf_xpath('//*[local-name()="guide"]'):
            guide.getparent().remove(guide)

        # Ensure that the cover-image property is set
        cover_id = rmap['_static/' + self.config.epub_cover[0]]
        for item in container.opf_xpath('//opf:item[@id="{}"]'.format(cover_id)):
            item.set('properties', 'cover-image')

        # Remove any <meta cover> tag as it is not needed in epub 3
        for meta in container.opf_xpath('//opf:meta[@name="cover"]'):
            meta.getparent().remove(meta)

        # Remove unreferenced files
        for error in check_links(container):
            if error.__class__ is UnreferencedResource:
                container.remove_item(error.name)

        # Pretty print the OPF
        pretty_opf(container.parsed(container.opf_name))
        container.dirty(container.opf_name)