Exemple #1
0
    def __call__(self, oeb, opts):
        import cssutils
        self.log = oeb.logger
        self.opts = opts
        self.oeb = oeb

        for item in oeb.manifest.items:
            self.current_item = item
            if etree.iselement(item.data):
                rewrite_links(self.current_item.data, self.url_replacer)
            elif hasattr(item.data, 'cssText'):
                cssutils.replaceUrls(item.data, self.url_replacer)

        if self.oeb.guide:
            for ref in self.oeb.guide.values():
                href = urlnormalize(ref.href)
                href, frag = urldefrag(href)
                replacement = self.rename_map.get(href, None)
                if replacement is not None:
                    nhref = replacement
                    if frag:
                        nhref += '#' + frag
                    ref.href = nhref

        if self.oeb.toc:
            self.fix_toc_entry(self.oeb.toc)
Exemple #2
0
    def test_replaceUrls(self):
        "cssutils.replaceUrls()"
        cssutils.ser.prefs.keepAllProperties = True

        css='''
        @import "im1";
        @import url(im2);
        a {
            background-image: url(c) !important;
            background-\image: url(b);
            background: url(a) no-repeat !important;
            }'''
        s = cssutils.parseString(css)
        cssutils.replaceUrls(s, lambda old: "NEW" + old)
        self.assertEqual(u'@import "NEWim1";', s.cssRules[0].cssText)
        self.assertEqual(u'NEWim2', s.cssRules[1].href)
        self.assertEqual(u'''background-image: url(NEWc) !important;
background-\\image: url(NEWb);
background: url(NEWa) no-repeat !important''', s.cssRules[2].style.cssText)

        cssutils.ser.prefs.keepAllProperties = False

        # CSSStyleDeclaration
        style = cssutils.parseStyle(u'''color: red;
                                        background-image:
                                            url(1.png),
                                            url('2.png')''')
        cssutils.replaceUrls(style, lambda url: 'prefix/'+url)
        self.assertEqual(style.cssText, u'''color: red;
background-image: url(prefix/1.png), url(prefix/2.png)''')
Exemple #3
0
    def virtualize_resources(self):

        changed = set()
        link_uid = self.book_render_data['link_uid']
        resource_template = link_uid + '|{}|'
        xlink_xpath = XPath('//*[@xl:href]')
        link_xpath = XPath('//h:a[@href]')

        def link_replacer(base, url):
            if url.startswith('#'):
                frag = urlunquote(url[1:])
                if not frag:
                    return url
                changed.add(base)
                return resource_template.format(encode_url(base, frag))
            purl = urlparse(url)
            if purl.netloc or purl.query:
                return url
            if purl.scheme and purl.scheme != 'file':
                return url
            if not purl.path or purl.path.startswith('/'):
                return url
            url, frag = purl.path, purl.fragment
            name = self.href_to_name(url, base)
            if name:
                if self.has_name(name):
                    frag = urlunquote(frag)
                    url = resource_template.format(encode_url(name, frag))
                else:
                    url = 'missing:' + quote(name)
                changed.add(base)
            return url

        for name, mt in self.mime_map.iteritems():
            mt = mt.lower()
            if mt in OEB_STYLES:
                replaceUrls(self.parsed(name), partial(link_replacer, name))
                self.virtualized_names.add(name)
            elif mt in OEB_DOCS:
                self.virtualized_names.add(name)
                root = self.parsed(name)
                rewrite_links(root, partial(link_replacer, name))
                for a in link_xpath(root):
                    href = a.get('href')
                    if href.startswith(link_uid):
                        a.set('href', 'javascript:void(0)')
                        parts = decode_url(href.split('|')[1])
                        a.set('data-' + link_uid, json.dumps({'name':parts[0], 'frag':parts[1]}, ensure_ascii=False))
                    else:
                        a.set('target', '_blank')
                        a.set('rel', 'noopener noreferrer')
                    changed.add(name)
            elif mt == 'image/svg+xml':
                self.virtualized_names.add(name)
                changed = False
                xlink = XLINK('href')
                for elem in xlink_xpath(self.parsed(name)):
                    elem.set(xlink, link_replacer(name, elem.get(xlink)))

        tuple(map(self.dirty, changed))
Exemple #4
0
    def __call__(self, oeb, context):
        oeb.logger.info('Flattening CSS and remapping font sizes...')
        self.context = self.opts =context
        self.oeb = oeb

        self.filter_css = frozenset()
        if self.opts.filter_css:
            try:
                self.filter_css = {x.strip().lower() for x in
                    self.opts.filter_css.split(',')}
            except:
                self.oeb.log.warning('Failed to parse filter_css, ignoring')
            else:
                from calibre.ebooks.oeb.normalize_css import normalize_filter_css
                self.filter_css = frozenset(normalize_filter_css(self.filter_css))
                self.oeb.log.debug('Filtering CSS properties: %s'%
                    ', '.join(self.filter_css))

        for item in oeb.manifest.values():
            # Make all links to resources absolute, as these sheets will be
            # consolidated into a single stylesheet at the root of the document
            if item.media_type in OEB_STYLES:
                cssutils.replaceUrls(item.data, item.abshref,
                        ignoreImportRules=True)

        self.body_font_family, self.embed_font_rules = self.get_embed_font_info(
                self.opts.embed_font_family)
        # Store for use in output plugins/transforms that generate content,
        # like the AZW3 output inline ToC.
        self.oeb.store_embed_font_rules = EmbedFontsCSSRules(self.body_font_family,
                self.embed_font_rules)
        self.stylize_spine()
        self.sbase = self.baseline_spine() if self.fbase else None
        self.fmap = FontMapper(self.sbase, self.fbase, self.fkey)
        self.flatten_spine()
    def update_internal_css(self, original_path, content):
        """Given the contents of the css file, parse out any urls so we
        have them locally. Note: that if cssutils is not installed it
        will return the content as is.

        """
        def replace_url(url):
            """This is called with the cssutils.replaceUrls function"""
            parsed = urlparse.urlparse(original_path)
            # In the case where the path contains .., we make the url proper
            if url.startswith('..'):
                url = os.path.abspath(
                    os.path.join(
                        os.path.dirname(parsed.path), url))
                # Build the full url with domain so we can download it
                url = '{}://{}{}'.format(parsed.scheme, parsed.netloc, url)
            # Generate the local file for the url
            return self.get_original_file(url)

        # We can parse the css urls if cssutils is available
        if cssutils:
            sheet = cssutils.parseString(content)
            cssutils.replaceUrls(sheet, replace_url)
            return sheet.cssText
        return content
    def __call__(self, oeb, context):
        oeb.logger.info('Flattening CSS and remapping font sizes...')
        self.context = self.opts =context
        self.oeb = oeb

        self.filter_css = frozenset()
        if self.opts.filter_css:
            try:
                self.filter_css = frozenset([x.strip().lower() for x in
                    self.opts.filter_css.split(',')])
            except:
                self.oeb.log.warning('Failed to parse filter_css, ignoring')
            else:
                self.oeb.log.debug('Filtering CSS properties: %s'%
                    ', '.join(self.filter_css))

        for item in oeb.manifest.values():
            # Make all links to resources absolute, as these sheets will be
            # consolidated into a single stylesheet at the root of the document
            if item.media_type in OEB_STYLES:
                cssutils.replaceUrls(item.data, item.abshref,
                        ignoreImportRules=True)

        self.stylize_spine()
        self.sbase = self.baseline_spine() if self.fbase else None
        self.fmap = FontMapper(self.sbase, self.fbase, self.fkey)
        self.flatten_spine()
    def run(self):
        # Step 0: ensure that the document_root and base_path variables are
        # set. If the file that's being processed was inside a source that has
        # either one or both not set, then this processor can't run.
        if self.document_root is None or self.base_path is None:
            raise DocumentRootAndBasePathRequiredException

        # We don't rename the file, so we can use the default output file.

        parser = CSSParser(log=None, loglevel=logging.CRITICAL)
        sheet = parser.parseFile(self.input_file)

        # Step 1: ensure the file has URLs. If it doesn't, we can stop the
        # processing.
        url_count = 0
        for url in getUrls(sheet):
            url_count += 1
            break
        if url_count == 0:
            return self.input_file

        # Step 2: resolve the relative URLs to absolute paths.
        replaceUrls(sheet, self.resolveToAbsolutePath)

        # Step 3: verify that each of these files has been synced.
        synced_files_db = urljoin(sys.path[0] + os.sep, SYNCED_FILES_DB)
        self.dbcon = sqlite3.connect(synced_files_db)
        self.dbcon.text_factory = unicode  # This is the default, but we set it explicitly, just to be sure.
        self.dbcur = self.dbcon.cursor()
        all_synced = True
        for urlstring in getUrls(sheet):
            # Skip absolute URLs.
            if urlstring.startswith("http://") or urlstring.startswith("https://"):
                continue

            # Skip broken references in the CSS file. This would otherwise
            # prevent this CSS file from ever passing through this processor.
            if not os.path.exists(urlstring):
                continue

            # Get the CDN URL for the given absolute path.
            self.dbcur.execute("SELECT url FROM synced_files WHERE input_file=?", (urlstring,))
            result = self.dbcur.fetchone()

            if result == None:
                raise RequestToRequeueException(
                    "The file '%s' has not yet been synced to the server '%s'" % (urlstring, self.process_for_server)
                )
            else:
                cdn_url = result[0]

        # Step 4: resolve the absolute paths to CDN URLs.
        replaceUrls(sheet, self.resolveToCDNURL)

        # Step 5: write the updated CSS to the output file.
        f = open(self.output_file, "w")
        f.write(sheet.cssText)
        f.close()

        return self.output_file
Exemple #8
0
    def __call__(self, oeb, opts):
        import cssutils
        self.log = oeb.logger
        self.opts = opts
        self.oeb = oeb

        for item in oeb.manifest.items:
            self.current_item = item
            if etree.iselement(item.data):
                rewrite_links(self.current_item.data, self.url_replacer)
            elif hasattr(item.data, 'cssText'):
                cssutils.replaceUrls(item.data, self.url_replacer)

        if self.oeb.guide:
            for ref in list(self.oeb.guide.values()):
                href = urlnormalize(ref.href)
                href, frag = urldefrag(href)
                replacement = self.rename_map.get(href, None)
                if replacement is not None:
                    nhref = replacement
                    if frag:
                        nhref += '#' + frag
                    ref.href = nhref

        if self.oeb.toc:
            self.fix_toc_entry(self.oeb.toc)
Exemple #9
0
def replace_urls(routes, filepath):
    stylesheet = cssutils.parseFile(filepath)
    cssutils.replaceUrls(
        stylesheet,
        ft.partial(routed_url, filepath, routes)
    )
    return stylesheet.cssText
Exemple #10
0
    def __call__(self, oeb, context):
        oeb.logger.info('Flattening CSS and remapping font sizes...')
        self.context = self.opts =context
        self.oeb = oeb

        self.filter_css = frozenset()
        if self.opts.filter_css:
            try:
                self.filter_css = {x.strip().lower() for x in
                    self.opts.filter_css.split(',')}
            except:
                self.oeb.log.warning('Failed to parse filter_css, ignoring')
            else:
                from calibre.ebooks.oeb.normalize_css import normalize_filter_css
                self.filter_css = frozenset(normalize_filter_css(self.filter_css))
                self.oeb.log.debug('Filtering CSS properties: %s'%
                    ', '.join(self.filter_css))

        for item in oeb.manifest.values():
            # Make all links to resources absolute, as these sheets will be
            # consolidated into a single stylesheet at the root of the document
            if item.media_type in OEB_STYLES:
                cssutils.replaceUrls(item.data, item.abshref,
                        ignoreImportRules=True)

        self.body_font_family, self.embed_font_rules = self.get_embed_font_info(
                self.opts.embed_font_family)
        # Store for use in output plugins/transforms that generate content,
        # like the AZW3 output inline ToC.
        self.oeb.store_embed_font_rules = EmbedFontsCSSRules(self.body_font_family,
                self.embed_font_rules)
        self.stylize_spine()
        self.sbase = self.baseline_spine() if self.fbase else None
        self.fmap = FontMapper(self.sbase, self.fbase, self.fkey)
        self.flatten_spine()
Exemple #11
0
    def test_replaceUrls(self):
        "cssutils.replaceUrls()"
        cssutils.ser.prefs.keepAllProperties = True

        css = '''
        @import "im1";
        @import url(im2);
        a {
            background-image: url(c) !important;
            background-\image: url(b);
            background: url(a) no-repeat !important;
            }'''
        s = cssutils.parseString(css)
        cssutils.replaceUrls(s, lambda old: "NEW" + old)
        self.assertEqual(u'@import "NEWim1";', s.cssRules[0].cssText)
        self.assertEqual(u'NEWim2', s.cssRules[1].href)
        self.assertEqual(
            u'''background-image: url(NEWc) !important;
background-\\image: url(NEWb);
background: url(NEWa) no-repeat !important''', s.cssRules[2].style.cssText)

        cssutils.ser.prefs.keepAllProperties = False

        # CSSStyleDeclaration
        style = cssutils.parseStyle(u'''color: red;
                                        background-image:
                                            url(1.png),
                                            url('2.png')''')
        cssutils.replaceUrls(style, lambda url: 'prefix/' + url)
        self.assertEqual(
            style.cssText, u'''color: red;
background-image: url(prefix/1.png), url(prefix/2.png)''')
Exemple #12
0
    def copy_css(self):
        base_dir = os.path.abspath(os.path.dirname(self.css_file))
        self.font_list = []

        def replace_url(url):
            source_file = os.path.abspath(os.path.join(base_dir, url))

            if os.path.splitext(url)[1].lower() in ('.ttf', '.otf'):
                dest_file = os.path.abspath(os.path.join(self.temp_content_dir, 'fonts', os.path.basename(source_file)))
                new_url = 'fonts/' + os.path.basename(url)
                self.font_list.append(new_url)
            else:
                dest_file = os.path.abspath(
                    os.path.join(self.temp_content_dir, 'images', 'css_' + os.path.basename(source_file)))
                new_url = 'images/css_' + os.path.basename(url)

            try:
                copy_file(source_file, dest_file)
            except:
                self.log.error('File {0}, referred by css, not found.'.format(url))

            return new_url

        if self.parse_css:
            cssutils.profile.addProfile('CSS extentions',
                                        {'-webkit-hyphens': 'none',
                                         'adobe-hyphenate': 'none',
                                         '-moz-hyphens': 'none',
                                         '-ms-hyphens': 'none',
                                         'hyphens': 'none|manual|auto'})
            stylesheet = cssutils.parseFile(self.css_file)
            cssutils.replaceUrls(stylesheet, replace_url)
            write_file(str(stylesheet.cssText, 'utf-8'), os.path.join(self.temp_content_dir, 'stylesheet.css'))
        else:
            copy_file(self.css_file, os.path.join(self.temp_content_dir, 'stylesheet.css'))
Exemple #13
0
def replace_urls_epub(epub_zip, routes, root_dir, filepath):
    style_string = epub_zip.read(os.path.join(root_dir, filepath))
    stylesheet = cssutils.parseString(style_string)
    cssutils.replaceUrls(
        stylesheet,
        ft.partial(routed_url, filepath, routes)
    )
    return stylesheet.cssText
def replaceurls(stylesheet, replacer):
	"""
	Replace all URLs appearing in the :class:`CSSStyleSheet` :obj:`stylesheet`.
	For each URL the function :obj:`replacer` will be called and the URL will
	be replaced with the result.
	"""
	def newreplacer(u):
		return str(replacer(url.URL(u)))
	cssutils.replaceUrls(stylesheet, newreplacer)
Exemple #15
0
    def virtualize_resources(self):

        changed = set()
        link_uid = self.book_render_data['link_uid']
        resource_template = link_uid + '|{}|'
        xlink_xpath = XPath('//*[@xl:href]')
        link_xpath = XPath('//h:a[@href]')

        def link_replacer(base, url):
            if url.startswith('#'):
                frag = urlunquote(url[1:])
                if not frag:
                    return url
                changed.add(base)
                return resource_template.format(encode_url(base, frag))
            purl = urlparse(url)
            if purl.netloc or purl.query:
                return url
            if purl.scheme and purl.scheme != 'file':
                return url
            if not purl.path or purl.path.startswith('/'):
                return url
            url, frag = purl.path, purl.fragment
            name = self.href_to_name(url, base)
            if name:
                frag = urlunquote(frag)
                url = resource_template.format(encode_url(name, frag))
                changed.add(base)
            return url

        for name, mt in self.mime_map.iteritems():
            mt = mt.lower()
            if mt in OEB_STYLES:
                replaceUrls(self.parsed(name), partial(link_replacer, name))
                self.virtualized_names.add(name)
            elif mt in OEB_DOCS:
                self.virtualized_names.add(name)
                root = self.parsed(name)
                rewrite_links(root, partial(link_replacer, name))
                for a in link_xpath(root):
                    href = a.get('href')
                    if href.startswith(link_uid):
                        a.set('href', 'javascript:void(0)')
                        parts = decode_url(href.split('|')[1])
                        a.set('data-' + link_uid, json.dumps({'name':parts[0], 'frag':parts[1]}, ensure_ascii=False))
                    else:
                        a.set('target', '_blank')
                    changed.add(name)
            elif mt == 'image/svg+xml':
                self.virtualized_names.add(name)
                changed = False
                xlink = XLINK('href')
                for elem in xlink_xpath(self.parsed(name)):
                    elem.set(xlink, link_replacer(name, elem.get(xlink)))

        tuple(map(self.dirty, changed))
def replaceurls(stylesheet, replacer):
    """
	Replace all URLs appearing in the :class:`CSSStyleSheet` :obj:`stylesheet`.
	For each URL the function :obj:`replacer` will be called and the URL will
	be replaced with the result.
	"""
    def newreplacer(u):
        return str(replacer(url.URL(u)))

    cssutils.replaceUrls(stylesheet, newreplacer)
Exemple #17
0
 def finish(self):
     """
     Parse the buffered response body, rewrite its URLs, write the result to
     the wrapped request, and finish the wrapped request.
     """
     stylesheet = ''.join(self._buffer)
     parser = CSSParser()
     css = parser.parseString(stylesheet)
     replaceUrls(css, self._replace)
     self.request.write(css.cssText)
     return self.request.finish()
Exemple #18
0
def download_css_imports(soup, url_parts, root_url, css_file, dst_folder, index_path):
    """ 
    parse css file and download imported css to dst_folder 
    """
    if os.path.exists(dst_folder + "/" + css_file):
        logging.debug("processing css " + css_file + "...")

        tmp_url_parts = deepcopy(url_parts)
        css_text_file = open(dst_folder + "/" + css_file, "r+")
        input_file = css_text_file.read()

        encoding = get_encoding(input_file)
        if "UTF-8" not in encoding:
            input_file = input_file.decode(encoding).encode("UTF-8")

        try:
            sheet = cssutils.parseString(input_file)
        except Exception as e:
            logging.error("Unable to parse " + css_file)
            return

        css_download_url_refs(root_url, url_parts, sheet, dst_folder)

        index_depth = css_file.count("/")
        i_path = ""
        for i in range(index_depth):
            i_path += "../"

        org_file_names = []
        new_file_names = []
        for rule in sheet:
            if rule.type == rule.IMPORT_RULE:
                file_name = rule.href.split("/")[-1]
                file_name = sanitize_file_name(file_name)
                tmp_url_parts = list(urlparse.urlparse(rule.href))
                new_src = create_directories(dst_folder, tmp_url_parts[1] + tmp_url_parts[2])
                full_path = os.path.join(dst_folder, new_src)
                outpath = os.path.join(full_path, file_name)

                tmp_url_parts[2] = rule.href

                if rule.href.lower().startswith("http"):
                    download_file(rule.href, outpath)
                else:
                    download_file(urlparse.urlunparse(tmp_url_parts), outpath)

                rule.href = rule.href.replace("http://", "")
                rule.href = rule.href.replace("https://", "")
                rule.href = i_path + rule.href.replace("../", "")

        cssutils.replaceUrls(sheet, lambda url: css_url_replacer(url, new_file_names, org_file_names, i_path))
        css_text_file.seek(0)
        css_text_file.write(sheet.cssText)
Exemple #19
0
    def css_fix(self, css, inline=False):
        # Disable cssusilt warnings and errors for imperfect css source
        cssutils.log.setLevel(logging.CRITICAL)

        if inline:
            declaration = cssutils.parseStyle(css)
            cssutils.replaceUrls(declaration, self.url_fix)
            rv = declaration.cssText
        else: # style tag or external stylesheet
            sheet = cssutils.parseString(css)
            cssutils.replaceUrls(sheet, self.url_fix)
            rv = sheet.cssText
        return rv
Exemple #20
0
def _pack_css(css_path, css, root_dir):
    def replacer(resource_url):
        if not _can_encode(resource_url):
            return resource_url

        fullpath = _determine_fullpath(css_path, resource_url, root_dir)
        tag_mime, tag_data = _get_resource(fullpath)
        encoded_resource = make_data_uri(tag_mime, tag_data)
        return encoded_resource

    stylesheet = cssutils.parseString(css)
    cssutils.replaceUrls(stylesheet, replacer)

    return str.encode(css)
Exemple #21
0
    def replace(self, css):
        cssutils.log.setLevel(logging.CRITICAL)
        cssutils.cssproductions.MACROS[u'name'] = ur'[\*]?{nmchar}+'

        try:
            sheet = cssutils.parseString(css)
        except:
            sheet = cssutils.css.CSSStyleDeclaration(cssText=css)

        replacer = lambda url: reverse(u'viewer',
                                       args=(self.get_access_uri(url), ))
        cssutils.replaceUrls(sheet, replacer)

        return sheet.cssText
Exemple #22
0
 def _apply_style_attr(self, url_replacer=None):
     attrib = self._element.attrib
     if 'style' not in attrib:
         return
     css = attrib['style'].split(';')
     css = filter(None, (x.strip() for x in css))
     css = [y.strip() for y in css]
     css = [y for y in css if self.MS_PAT.match(y) is None]
     css = '; '.join(css)
     try:
         style = parseStyle(css, validate=False)
     except CSSSyntaxError:
         return
     if url_replacer is not None:
         replaceUrls(style, url_replacer, ignoreImportRules=True)
     self._style.update(self._stylizer.flatten_style(style))
Exemple #23
0
 def _apply_style_attr(self, url_replacer=None):
     attrib = self._element.attrib
     if 'style' not in attrib:
         return
     css = attrib['style'].split(';')
     css = filter(None, (x.strip() for x in css))
     css = [y.strip() for y in css]
     css = [y for y in css if self.MS_PAT.match(y) is None]
     css = '; '.join(css)
     try:
         style = parseStyle(css, validate=False)
     except CSSSyntaxError:
         return
     if url_replacer is not None:
         replaceUrls(style, url_replacer, ignoreImportRules=True)
     self._style.update(self._stylizer.flatten_style(style))
Exemple #24
0
def replace_links(container, link_map, frag_map=lambda name, frag:frag):
    ncx_type = guess_type('toc.ncx')
    for name, media_type in container.mime_map.iteritems():
        repl = LinkReplacer(name, container, link_map, frag_map)
        if media_type.lower() in OEB_DOCS:
            rewrite_links(container.parsed(name), repl)
        elif media_type.lower() in OEB_STYLES:
            replaceUrls(container.parsed(name), repl)
        elif media_type.lower() == ncx_type:
            for elem in container.parsed(name).xpath('//*[@src]'):
                src = elem.get('src')
                nsrc = repl(src)
                if src != nsrc:
                    elem.set('src', nsrc)

        if repl.replaced:
            container.dirty(name)
Exemple #25
0
def minify(source, output, **options):
    '''Minifies CSS from a file and outputs it to a different file.
    :type source: :class:`django.core.files.File`
    :type output: :class:`django.core.files.File`
    '''
    ser.prefs.useMinified()
    base_path = getattr(source, 'path', source.name)
    stylesheet = parseString(source.read(),
                             href='file://%s' % pathname2url(base_path))
    css_files = get_recursive_imports(stylesheet, base=base_path)
    # concatenate the stylesheets
    minified = resolveImports(stylesheet)
    # replace static asset references with the "live" URLs
    replaceUrls(minified, replace_static_refs, ignoreImportRules=True)
    # generate minified CSS
    css = minified.cssText
    output.write(css)
    output.seek(0)
Exemple #26
0
def minify(source, output, **options):
    '''Minifies CSS from a file and outputs it to a different file.
    :type source: :class:`django.core.files.File`
    :type output: :class:`django.core.files.File`
    '''
    ser.prefs.useMinified()
    base_path = getattr(source, 'path', source.name)
    stylesheet = parseString(source.read(),
                             href='file://%s' % pathname2url(base_path))
    css_files = get_recursive_imports(stylesheet, base=base_path)
    # concatenate the stylesheets
    minified = resolveImports(stylesheet)
    # replace static asset references with the "live" URLs
    replaceUrls(minified, replace_static_refs, ignoreImportRules=True)
    # generate minified CSS
    css = minified.cssText
    output.write(css)
    output.seek(0)
Exemple #27
0
    def replace_resource_links(self):
        ''' Replace links to resources (raster images/fonts) with pointers to
        the MOBI record containing the resource. The pointers are of the form:
        kindle:embed:XXXX?mime=image/* The ?mime= is apparently optional and
        not used for fonts. '''
        def pointer(item, oref):
            ref = urlnormalize(item.abshref(oref))
            idx = self.resources.item_map.get(ref, None)
            if idx is not None:
                is_image = self.resources.records[idx - 1][:4] not in {b'FONT'}
                idx = to_ref(idx)
                if is_image:
                    self.used_images.add(ref)
                    return 'kindle:embed:%s?mime=%s' % (
                        idx, self.resources.mime_map[ref])
                else:
                    return 'kindle:embed:%s' % idx
            return oref

        for item in self.oeb.manifest:

            if item.media_type in XML_DOCS:
                root = self.data(item)
                for tag in XPath('//h:img|//svg:image')(root):
                    for attr, ref in tag.attrib.iteritems():
                        if attr.split('}')[-1].lower() in {'src', 'href'}:
                            tag.attrib[attr] = pointer(item, ref)

                for tag in XPath('//h:style')(root):
                    if tag.text:
                        sheet = cssutils.parseString(tag.text, validate=False)
                        replacer = partial(pointer, item)
                        cssutils.replaceUrls(sheet,
                                             replacer,
                                             ignoreImportRules=True)
                        repl = sheet.cssText
                        if isbytestring(repl):
                            repl = repl.decode('utf-8')
                        tag.text = '\n' + repl + '\n'

            elif item.media_type in OEB_STYLES:
                sheet = self.data(item)
                replacer = partial(pointer, item)
                cssutils.replaceUrls(sheet, replacer, ignoreImportRules=True)
Exemple #28
0
    def replace_resource_links(self):
        ''' Replace links to resources (raster images/fonts) with pointers to
        the MOBI record containing the resource. The pointers are of the form:
        kindle:embed:XXXX?mime=image/* The ?mime= is apparently optional and
        not used for fonts. '''

        def pointer(item, oref):
            ref = urlnormalize(item.abshref(oref))
            idx = self.resources.item_map.get(ref, None)
            if idx is not None:
                is_image = self.resources.records[idx-1][:4] not in {b'FONT'}
                idx = to_ref(idx)
                if is_image:
                    self.used_images.add(ref)
                    return 'kindle:embed:%s?mime=%s'%(idx,
                            self.resources.mime_map[ref])
                else:
                    return 'kindle:embed:%s'%idx
            return oref

        for item in self.oeb.manifest:

            if item.media_type in XML_DOCS:
                root = self.data(item)
                for tag in XPath('//h:img|//svg:image')(root):
                    for attr, ref in tag.attrib.iteritems():
                        if attr.split('}')[-1].lower() in {'src', 'href'}:
                            tag.attrib[attr] = pointer(item, ref)

                for tag in XPath('//h:style')(root):
                    if tag.text:
                        sheet = cssutils.parseString(tag.text, validate=False)
                        replacer = partial(pointer, item)
                        cssutils.replaceUrls(sheet, replacer,
                                ignoreImportRules=True)
                        repl = sheet.cssText
                        if isbytestring(repl):
                            repl = repl.decode('utf-8')
                        tag.text = '\n'+ repl + '\n'

            elif item.media_type in OEB_STYLES:
                sheet = self.data(item)
                replacer = partial(pointer, item)
                cssutils.replaceUrls(sheet, replacer, ignoreImportRules=True)
Exemple #29
0
    def replace_links(self, name, replace_func):
        ''' Replace all links in name using replace_func, which must be a
        callable that accepts a URL and returns the replaced URL. It must also
        have a 'replaced' attribute that is set to True if any actual
        replacement is done. Convenient ways of creating such callables are
        using the :class:`LinkReplacer` and :class:`LinkRebaser` classes. '''
        media_type = self.mime_map.get(name, guess_type(name))
        if name == self.opf_name:
            for elem in self.opf_xpath('//*[@href]'):
                elem.set('href', replace_func(elem.get('href')))
        elif media_type.lower() in OEB_DOCS:
            rewrite_links(self.parsed(name), replace_func)
        elif media_type.lower() in OEB_STYLES:
            replaceUrls(self.parsed(name), replace_func)
        elif media_type.lower() == guess_type('toc.ncx'):
            for elem in self.parsed(name).xpath('//*[@src]'):
                elem.set('src', replace_func(elem.get('src')))

        if replace_func.replaced:
            self.dirty(name)
        return replace_func.replaced
    def test_replaceUrls(self):
        "cssutils.replaceUrls()"
        cssutils.ser.prefs.keepAllProperties = True

        css='''
        @import "im1";
        @import url(im2);
        a {
            background-image: url(c) !important;
            background-\image: url(b);
            background: url(a) no-repeat !important;
            }'''
        s = cssutils.parseString(css)
        cssutils.replaceUrls(s, lambda old: "NEW" + old)
        self.assertEqual(u'@import "NEWim1";', s.cssRules[0].cssText)
        self.assertEqual(u'NEWim2', s.cssRules[1].href)
        self.assertEqual(u'''background-image: url(NEWc) !important;
background-\\image: url(NEWb);
background: url(NEWa) no-repeat !important''', s.cssRules[2].style.cssText)

        cssutils.ser.prefs.keepAllProperties = False
Exemple #31
0
    def replace_links(self, name, replace_func):
        ''' Replace all links in name using replace_func, which must be a
        callable that accepts a URL and returns the replaced URL. It must also
        have a 'replaced' attribute that is set to True if any actual
        replacement is done. Convenient ways of creating such callables are
        using the :class:`LinkReplacer` and :class:`LinkRebaser` classes. '''
        media_type = self.mime_map.get(name, guess_type(name))
        if name == self.opf_name:
            for elem in self.opf_xpath('//*[@href]'):
                elem.set('href', replace_func(elem.get('href')))
        elif media_type.lower() in OEB_DOCS:
            rewrite_links(self.parsed(name), replace_func)
        elif media_type.lower() in OEB_STYLES:
            replaceUrls(self.parsed(name), replace_func)
        elif media_type.lower() == guess_type('toc.ncx'):
            for elem in self.parsed(name).xpath('//*[@src]'):
                elem.set('src', replace_func(elem.get('src')))

        if replace_func.replaced:
            self.dirty(name)
        return replace_func.replaced
Exemple #32
0
    def test_replaceUrls(self):
        "cssutils.replaceUrls()"
        cssutils.ser.prefs.keepAllProperties = True

        css = '''
        @import "im1";
        @import url(im2);
        a {
            background-image: url(c) !important;
            background-\image: url(b);
            background: url(a) no-repeat !important;
            }'''
        s = cssutils.parseString(css)
        cssutils.replaceUrls(s, lambda old: "NEW" + old)
        self.assertEqual(u'@import "NEWim1";', s.cssRules[0].cssText)
        self.assertEqual(u'NEWim2', s.cssRules[1].href)
        self.assertEqual(
            u'''background-image: url(NEWc) !important;
background-\\image: url(NEWb);
background: url(NEWa) no-repeat !important''', s.cssRules[2].style.cssText)

        cssutils.ser.prefs.keepAllProperties = False
Exemple #33
0
	def cache_style_content(self, content, inline=False):
		"""
		Caches all required URI's and Imports.
		Returns,
		- updated css content
		"""
		if inline:
			sheet = cssutils.parseStyle(content)
		else:
			sheet = cssutils.parseString(content, href=self.url)
		if not inline:
			for rule in sheet.cssRules:
				if rule.type == rule.IMPORT_RULE:
					f = self._recursive_cache_resource(rule.styleSheet.href)
					rule.href = f
		def replacer(url):
			if url.startswith('data'):
				return url
			# TODOs:
			# Check for absolute url before joining
			return self._recursive_cache_resource(urljoin(self.url, url))
		cssutils.replaceUrls(sheet, replacer, ignoreImportRules=True)
		return sheet.cssText
Exemple #34
0
    def create_oebbook(self, htmlpath, basedir, opts, log, mi):
        import uuid
        from calibre.ebooks.conversion.plumber import create_oebbook
        from calibre.ebooks.oeb.base import (DirContainer, rewrite_links,
                                             urlnormalize, urldefrag,
                                             BINARY_MIME, OEB_STYLES, xpath)
        from calibre import guess_type
        from calibre.ebooks.oeb.transforms.metadata import \
            meta_info_to_oeb_metadata
        from calibre.ebooks.html.input import get_filelist
        from calibre.ebooks.metadata import string_to_authors
        from calibre.utils.localization import canonicalize_lang
        import cssutils, logging
        cssutils.log.setLevel(logging.WARN)
        self.OEB_STYLES = OEB_STYLES
        oeb = create_oebbook(log,
                             None,
                             opts,
                             self,
                             encoding=opts.input_encoding,
                             populate=False)
        self.oeb = oeb

        metadata = oeb.metadata
        meta_info_to_oeb_metadata(mi, metadata, log)
        if not metadata.language:
            l = canonicalize_lang(getattr(opts, 'language', None))
            if not l:
                oeb.logger.warn(u'Language not specified')
                l = get_lang().replace('_', '-')
            metadata.add('language', l)
        if not metadata.creator:
            a = getattr(opts, 'authors', None)
            if a:
                a = string_to_authors(a)
            if not a:
                oeb.logger.warn('Creator not specified')
                a = [self.oeb.translate(__('Unknown'))]
            for aut in a:
                metadata.add('creator', aut)
        if not metadata.title:
            oeb.logger.warn('Title not specified')
            metadata.add('title', self.oeb.translate(__('Unknown')))
        bookid = str(uuid.uuid4())
        metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
        for ident in metadata.identifier:
            if 'id' in ident.attrib:
                self.oeb.uid = metadata.identifier[0]
                break

        filelist = get_filelist(htmlpath, basedir, opts, log)
        filelist = [f for f in filelist if not f.is_binary]
        htmlfile_map = {}
        for f in filelist:
            path = f.path
            oeb.container = DirContainer(os.path.dirname(path),
                                         log,
                                         ignore_opf=True)
            bname = os.path.basename(path)
            id, href = oeb.manifest.generate(id='html',
                                             href=ascii_filename(bname))
            htmlfile_map[path] = href
            item = oeb.manifest.add(id, href, 'text/html')
            item.html_input_href = bname
            oeb.spine.add(item, True)

        self.added_resources = {}
        self.log = log
        self.log('Normalizing filename cases')
        for path, href in htmlfile_map.items():
            if not self.is_case_sensitive(path):
                path = path.lower()
            self.added_resources[path] = href
        self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
        self.urldefrag = urldefrag
        self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME

        self.log('Rewriting HTML links')
        for f in filelist:
            path = f.path
            dpath = os.path.dirname(path)
            oeb.container = DirContainer(dpath, log, ignore_opf=True)
            item = oeb.manifest.hrefs[htmlfile_map[path]]
            rewrite_links(item.data, partial(self.resource_adder, base=dpath))

        for item in oeb.manifest.values():
            if item.media_type in self.OEB_STYLES:
                dpath = None
                for path, href in self.added_resources.items():
                    if href == item.href:
                        dpath = os.path.dirname(path)
                        break
                cssutils.replaceUrls(item.data,
                                     partial(self.resource_adder, base=dpath))

        toc = self.oeb.toc
        self.oeb.auto_generated_toc = True
        titles = []
        headers = []
        for item in self.oeb.spine:
            if not item.linear:
                continue
            html = item.data
            title = ''.join(xpath(html, '/h:html/h:head/h:title/text()'))
            title = re.sub(r'\s+', ' ', title.strip())
            if title:
                titles.append(title)
            headers.append('(unlabled)')
            for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'):
                expr = '/h:html/h:body//h:%s[position()=1]/text()'
                header = ''.join(xpath(html, expr % tag))
                header = re.sub(r'\s+', ' ', header.strip())
                if header:
                    headers[-1] = header
                    break
        use = titles
        if len(titles) > len(set(titles)):
            use = headers
        for title, item in izip(use, self.oeb.spine):
            if not item.linear:
                continue
            toc.add(title, item.href)

        oeb.container = DirContainer(os.getcwdu(), oeb.log, ignore_opf=True)
        return oeb
Exemple #35
0
    def virtualize_resources(self):

        changed = set()
        link_uid = self.book_render_data['link_uid']
        resource_template = link_uid + '|{}|'
        xlink_xpath = XPath('//*[@xl:href]')
        link_xpath = XPath('//h:a[@href]')
        res_link_xpath = XPath('//h:link[@href]')

        def link_replacer(base, url):
            if url.startswith('#'):
                frag = urlunquote(url[1:])
                if not frag:
                    return url
                changed.add(base)
                return resource_template.format(encode_url(base, frag))
            purl = urlparse(url)
            if purl.netloc or purl.query:
                return url
            if purl.scheme and purl.scheme != 'file':
                return url
            if not purl.path or purl.path.startswith('/'):
                return url
            url, frag = purl.path, purl.fragment
            name = self.href_to_name(url, base)
            if name:
                if self.has_name(name):
                    frag = urlunquote(frag)
                    url = resource_template.format(encode_url(name, frag))
                else:
                    if isinstance(name, unicode):
                        name = name.encode('utf-8')
                    url = 'missing:' + force_unicode(quote(name), 'utf-8')
                changed.add(base)
            return url

        ltm = self.book_render_data['link_to_map']

        for name, mt in self.mime_map.iteritems():
            mt = mt.lower()
            if mt in OEB_STYLES:
                replaceUrls(self.parsed(name), partial(link_replacer, name))
                self.virtualized_names.add(name)
            elif mt in OEB_DOCS:
                self.virtualized_names.add(name)
                root = self.parsed(name)
                for link in res_link_xpath(root):
                    ltype = (link.get('type') or 'text/css').lower()
                    rel = (link.get('rel') or 'stylesheet').lower()
                    if ltype != 'text/css' or rel != 'stylesheet':
                        # This link will not be loaded by the browser anyway
                        # and will causes the resource load check to hang
                        link.attrib.clear()
                        changed.add(name)
                rewrite_links(root, partial(link_replacer, name))
                for a in link_xpath(root):
                    href = a.get('href')
                    if href.startswith(link_uid):
                        a.set('href', 'javascript:void(0)')
                        parts = decode_url(href.split('|')[1])
                        lname, lfrag = parts[0], parts[1]
                        ltm.setdefault(lname, {}).setdefault(lfrag or '', set()).add(name)
                        a.set('data-' + link_uid, json.dumps({'name':lname, 'frag':lfrag}, ensure_ascii=False))
                    else:
                        a.set('target', '_blank')
                        a.set('rel', 'noopener noreferrer')
                    changed.add(name)
            elif mt == 'image/svg+xml':
                self.virtualized_names.add(name)
                changed.add(name)
                xlink = XLINK('href')
                for elem in xlink_xpath(self.parsed(name)):
                    elem.set(xlink, link_replacer(name, elem.get(xlink)))

        for name, amap in ltm.iteritems():
            for k, v in tuple(amap.iteritems()):
                amap[k] = tuple(v)  # needed for JSON serialization

        tuple(map(self.dirty, changed))
Exemple #36
0
    def __init__(self, tree, path, oeb, opts, profile=None,
            extra_css='', user_css='', base_css=''):
        self.oeb, self.opts = oeb, opts
        self.profile = profile
        if self.profile is None:
            # Use the default profile. This should really be using
            # opts.output_profile, but I don't want to risk changing it, as
            # doing so might well have hard to debug font size effects.
            from calibre.customize.ui import output_profiles
            for x in output_profiles():
                if x.short_name == 'default':
                    self.profile = x
                    break
        if self.profile is None:
            # Just in case the default profile is removed in the future :)
            self.profile = opts.output_profile
        self.body_font_size = self.profile.fbase
        self.logger = oeb.logger
        item = oeb.manifest.hrefs[path]
        basename = os.path.basename(path)
        cssname = os.path.splitext(basename)[0] + '.css'
        stylesheets = [html_css_stylesheet()]
        if base_css:
            stylesheets.append(parseString(base_css, validate=False))
        style_tags = xpath(tree, '//*[local-name()="style" or local-name()="link"]')

        # Add cssutils parsing profiles from output_profile
        for profile in self.opts.output_profile.extra_css_modules:
            cssprofiles.addProfile(profile['name'],
                                        profile['props'],
                                        profile['macros'])

        parser = CSSParser(fetcher=self._fetch_css_file,
                log=logging.getLogger('calibre.css'))
        self.font_face_rules = []
        for elem in style_tags:
            if (elem.tag == XHTML('style') and
                elem.get('type', CSS_MIME) in OEB_STYLES):
                text = elem.text if elem.text else u''
                for x in elem:
                    t = getattr(x, 'text', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                    t = getattr(x, 'tail', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                if text:
                    text = oeb.css_preprocessor(text)
                    # We handle @import rules separately
                    parser.setFetcher(lambda x: ('utf-8', b''))
                    stylesheet = parser.parseString(text, href=cssname,
                            validate=False)
                    parser.setFetcher(self._fetch_css_file)
                    for rule in stylesheet.cssRules:
                        if rule.type == rule.IMPORT_RULE:
                            ihref = item.abshref(rule.href)
                            if rule.media.mediaText == 'amzn-mobi':
                                continue
                            hrefs = self.oeb.manifest.hrefs
                            if ihref not in hrefs:
                                self.logger.warn('Ignoring missing stylesheet in @import rule:', rule.href)
                                continue
                            sitem = hrefs[ihref]
                            if sitem.media_type not in OEB_STYLES:
                                self.logger.warn('CSS @import of non-CSS file %r' % rule.href)
                                continue
                            stylesheets.append(sitem.data)
                    for rule in tuple(stylesheet.cssRules.rulesOfType(CSSRule.PAGE_RULE)):
                        stylesheet.cssRules.remove(rule)
                    # Make links to resources absolute, since these rules will
                    # be folded into a stylesheet at the root
                    replaceUrls(stylesheet, item.abshref,
                            ignoreImportRules=True)
                    stylesheets.append(stylesheet)
            elif elem.tag == XHTML('link') and elem.get('href') \
                 and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \
                 and elem.get('type', CSS_MIME).lower() in OEB_STYLES:
                href = urlnormalize(elem.attrib['href'])
                path = item.abshref(href)
                sitem = oeb.manifest.hrefs.get(path, None)
                if sitem is None:
                    self.logger.warn(
                        'Stylesheet %r referenced by file %r not in manifest' %
                        (path, item.href))
                    continue
                if not hasattr(sitem.data, 'cssRules'):
                    self.logger.warn(
                    'Stylesheet %r referenced by file %r is not CSS'%(path,
                        item.href))
                    continue
                stylesheets.append(sitem.data)
        csses = {'extra_css':extra_css, 'user_css':user_css}
        for w, x in csses.items():
            if x:
                try:
                    text = x
                    stylesheet = parser.parseString(text, href=cssname,
                            validate=False)
                    stylesheets.append(stylesheet)
                except:
                    self.logger.exception('Failed to parse %s, ignoring.'%w)
                    self.logger.debug('Bad css: ')
                    self.logger.debug(x)
        rules = []
        index = 0
        self.stylesheets = set()
        self.page_rule = {}
        for sheet_index, stylesheet in enumerate(stylesheets):
            href = stylesheet.href
            self.stylesheets.add(href)
            for rule in stylesheet.cssRules:
                if rule.type == rule.MEDIA_RULE:
                    media = {rule.media.item(i) for i in
                             xrange(rule.media.length)}
                    if not media.intersection({'all', 'screen', 'amzn-kf8'}):
                        continue
                    for subrule in rule.cssRules:
                        rules.extend(self.flatten_rule(subrule, href, index, is_user_agent_sheet=sheet_index==0))
                        index += 1
                else:
                    rules.extend(self.flatten_rule(rule, href, index, is_user_agent_sheet=sheet_index==0))
                    index = index + 1
        rules.sort()
        self.rules = rules
        self._styles = {}
        pseudo_pat = re.compile(ur':{1,2}(%s)' % ('|'.join(INAPPROPRIATE_PSEUDO_CLASSES)), re.I)
        select = Select(tree, ignore_inappropriate_pseudo_classes=True)

        for _, _, cssdict, text, _ in rules:
            fl = pseudo_pat.search(text)
            try:
                matches = tuple(select(text))
            except SelectorError as err:
                self.logger.error('Ignoring CSS rule with invalid selector: %r (%s)' % (text, as_unicode(err)))
                continue

            if fl is not None:
                fl = fl.group(1)
                if fl == 'first-letter' and getattr(self.oeb,
                        'plumber_output_format', '').lower() in {u'mobi', u'docx'}:
                    # Fake first-letter
                    from lxml.builder import ElementMaker
                    E = ElementMaker(namespace=XHTML_NS)
                    for elem in matches:
                        for x in elem.iter('*'):
                            if x.text:
                                punctuation_chars = []
                                text = unicode(x.text)
                                while text:
                                    category = unicodedata.category(text[0])
                                    if category[0] not in {'P', 'Z'}:
                                        break
                                    punctuation_chars.append(text[0])
                                    text = text[1:]

                                special_text = u''.join(punctuation_chars) + \
                                        (text[0] if text else u'')
                                span = E.span(special_text)
                                span.set('data-fake-first-letter', '1')
                                span.tail = text[1:]
                                x.text = None
                                x.insert(0, span)
                                self.style(span)._update_cssdict(cssdict)
                                break
                else:  # Element pseudo-class
                    for elem in matches:
                        self.style(elem)._update_pseudo_class(fl, cssdict)
            else:
                for elem in matches:
                    self.style(elem)._update_cssdict(cssdict)
        for elem in xpath(tree, '//h:*[@style]'):
            self.style(elem)._apply_style_attr(url_replacer=item.abshref)
        num_pat = re.compile(r'[0-9.]+$')
        for elem in xpath(tree, '//h:img[@width or @height]'):
            style = self.style(elem)
            # Check if either height or width is not default
            is_styled = style._style.get('width', 'auto') != 'auto' or \
                    style._style.get('height', 'auto') != 'auto'
            if not is_styled:
                # Update img style dimension using width and height
                upd = {}
                for prop in ('width', 'height'):
                    val = elem.get(prop, '').strip()
                    try:
                        del elem.attrib[prop]
                    except:
                        pass
                    if val:
                        if num_pat.match(val) is not None:
                            val += 'px'
                        upd[prop] = val
                if upd:
                    style._update_cssdict(upd)
Exemple #37
0
    def create_oebbook(self, htmlpath, basedir, opts, log, mi):
        import uuid
        from calibre.ebooks.conversion.plumber import create_oebbook
        from calibre.ebooks.oeb.base import (DirContainer,
            rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES,
            xpath)
        from calibre import guess_type
        from calibre.ebooks.oeb.transforms.metadata import \
            meta_info_to_oeb_metadata
        from calibre.ebooks.html.input import get_filelist
        from calibre.ebooks.metadata import string_to_authors
        from calibre.utils.localization import canonicalize_lang
        import cssutils, logging
        cssutils.log.setLevel(logging.WARN)
        self.OEB_STYLES = OEB_STYLES
        oeb = create_oebbook(log, None, opts, self,
                encoding=opts.input_encoding, populate=False)
        self.oeb = oeb

        metadata = oeb.metadata
        meta_info_to_oeb_metadata(mi, metadata, log)
        if not metadata.language:
            l = canonicalize_lang(getattr(opts, 'language', None))
            if not l:
                oeb.logger.warn(u'Language not specified')
                l = get_lang().replace('_', '-')
            metadata.add('language', l)
        if not metadata.creator:
            a = getattr(opts, 'authors', None)
            if a:
                a = string_to_authors(a)
            if not a:
                oeb.logger.warn('Creator not specified')
                a = [self.oeb.translate(__('Unknown'))]
            for aut in a:
                metadata.add('creator', aut)
        if not metadata.title:
            oeb.logger.warn('Title not specified')
            metadata.add('title', self.oeb.translate(__('Unknown')))
        bookid = str(uuid.uuid4())
        metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
        for ident in metadata.identifier:
            if 'id' in ident.attrib:
                self.oeb.uid = metadata.identifier[0]
                break

        filelist = get_filelist(htmlpath, basedir, opts, log)
        filelist = [f for f in filelist if not f.is_binary]
        htmlfile_map = {}
        for f in filelist:
            path = f.path
            oeb.container = DirContainer(os.path.dirname(path), log,
                    ignore_opf=True)
            bname = os.path.basename(path)
            id, href = oeb.manifest.generate(id='html',
                    href=ascii_filename(bname))
            htmlfile_map[path] = href
            item = oeb.manifest.add(id, href, 'text/html')
            item.html_input_href = bname
            oeb.spine.add(item, True)

        self.added_resources = {}
        self.log = log
        self.log('Normalizing filename cases')
        for path, href in htmlfile_map.items():
            if not self.is_case_sensitive(path):
                path = path.lower()
            self.added_resources[path] = href
        self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
        self.urldefrag = urldefrag
        self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME

        self.log('Rewriting HTML links')
        for f in filelist:
            path = f.path
            dpath = os.path.dirname(path)
            oeb.container = DirContainer(dpath, log, ignore_opf=True)
            item = oeb.manifest.hrefs[htmlfile_map[path]]
            rewrite_links(item.data, partial(self.resource_adder, base=dpath))

        for item in oeb.manifest.values():
            if item.media_type in self.OEB_STYLES:
                dpath = None
                for path, href in self.added_resources.items():
                    if href == item.href:
                        dpath = os.path.dirname(path)
                        break
                cssutils.replaceUrls(item.data,
                        partial(self.resource_adder, base=dpath))

        toc = self.oeb.toc
        self.oeb.auto_generated_toc = True
        titles = []
        headers = []
        for item in self.oeb.spine:
            if not item.linear:
                continue
            html = item.data
            title = ''.join(xpath(html, '/h:html/h:head/h:title/text()'))
            title = re.sub(r'\s+', ' ', title.strip())
            if title:
                titles.append(title)
            headers.append('(unlabled)')
            for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'):
                expr = '/h:html/h:body//h:%s[position()=1]/text()'
                header = ''.join(xpath(html, expr % tag))
                header = re.sub(r'\s+', ' ', header.strip())
                if header:
                    headers[-1] = header
                    break
        use = titles
        if len(titles) > len(set(titles)):
            use = headers
        for title, item in izip(use, self.oeb.spine):
            if not item.linear:
                continue
            toc.add(title, item.href)

        oeb.container = DirContainer(os.getcwdu(), oeb.log, ignore_opf=True)
        return oeb
Exemple #38
0
    for x in t.tokenize(css):
        print x
#
#    v = cssutils.css.URIValue(u'url(/**/1)')
#    print v.cssText
#    v.uri = 'uri'
#    print v.cssText
#    v.value = 'value'
#    print v.cssText

    sys.exit(1)

if 1:
    # request by Walter
    style = cssutils.parseStyle("background-image: url(1.png), url('2.png')")
    cssutils.replaceUrls(style, lambda url: 'prefix/'+url)
    print style.cssText

    sys.exit(1)

if 0:
    # ISSUE 35
    css = """div.one {color: expression((function(ele){ele.style.behavior="none";})(this));}   """
    css = """div.one {color: expression(function(ele){ele.style.behavior="none";})(this);}   """
    sheet = cssutils.parseString(css)
    print sheet.cssText

    sys.exit(1)


Exemple #39
0
    def build(self):
        if os.path.exists(self.builddir):  # make sure it doesn't exist
            shutil.rmtree(self.builddir)

        # favicons
        print("* Copying Favicons")
        shutil.copytree(
            f"{self.srcdir}/favicons",
            f"{self.builddir}/")  # copy over favicons, create builddir

        # sass
        print("* Compiling SCSS")
        os.makedirs(os.path.join(self.builddir, "css"))
        ocsspath = os.path.join(self.builddir, "css", "main.css")
        with open(ocsspath, "w+") as css:
            css.write(sass.compile(filename="src/scss/styles.scss"))

        # copy over js
        print("* Moving JS")
        os.makedirs(os.path.join(self.builddir, "scripts"))
        shutil.copyfile(f"{self.srcdir}/scripts/main.js",
                        f"{self.builddir}/scripts/main.js"
                        )  # copy over favicons, create builddir

        # replace images
        cssFile = cssutils.parseFile(ocsspath)
        cssutils.replaceUrls(
            cssFile,
            lambda x: os.path.join(
                "..",
                self.file_hash(
                    os.path.join("src", "scss", os.path.normpath(x)), "images"
                ),
            ).replace("\\", "/"),
            ignoreImportRules=True,
        )
        with open(ocsspath, "wb") as css:
            css.write(cssFile.cssText)

        # html
        print("* Gathering HTML files to build")
        html_to_build = []
        for dirpath, _, filenames in os.walk(self.srcdir):
            if dirpath.find("snippets") == -1 and dirpath.find(
                    "projects") == -1:
                for filename in filenames:
                    if filename == "index.html":
                        html_to_build.append((dirpath, filename))

        print("* Building HTML files")
        for path, filename in html_to_build:
            src_dir = path
            build_file = filename
            build_base = pathlib.Path(path)
            print(f"  - src: {path} ({build_base.parts}) : {build_file}")
            build_base = pathlib.Path(*build_base.parts[1:])
            build_base = os.path.join(self.builddir, build_base)
            main_page = os.path.samefile(self.srcdir, path)
            print(f"  - build_base: {build_base}")
            print(f"  - main_page: {main_page}\n")
            with PageBuilder(
                    src_dir,
                    build_base,
                    build_file,
                    self.file_hash,
                    "" if main_page else "..",
                {},
            ) as pb:
                with open(os.path.join(path, filename)) as f:
                    data = f.read()
                pb.feed(data)

        # blog/projects
        print("* Building Blog")
        for page, subpage, prefix in [("blog", "posts", "../../..")]:
            pagedir = os.path.join(self.srcdir, page)
            if os.path.exists(pagedir):
                with open(os.path.join(pagedir, "data.json"),
                          "r",
                          encoding="utf-8") as f:
                    posts = json.load(f)
                for i, post in enumerate(posts):
                    # convert dates to [date] object
                    for key in ["post_date", "update_date"]:
                        if key in post:
                            post[key] = date(
                                *([int(x) for x in post[key].split("-")]))
                            post[key] = post[key].strftime("%B %d, %Y")
                        else:
                            post[key] = None
                    # create slug
                    if "url" not in post:
                        post["url"] = slugify(post["title"])
                    # author
                    post["author"] = "Rahul Yesantharao"
                    post["num"] = i + 1

            # print("* POST DATA *")
            # print(posts)
            # print()
            postdest = os.path.join(os.path.join(self.builddir, page), subpage)
            if not os.path.exists(postdest):
                os.mkdir(postdest)
            for i in range(len(posts)):
                postdir = os.path.join(postdest, posts[i]["url"])
                os.mkdir(postdir)
                print(f"  - {posts[i]['url']} -> {pagedir}")
                with PageBuilder(pagedir, postdir, "index.html",
                                 self.file_hash, prefix, posts[i]) as pb:
                    with open(os.path.join(pagedir, "post.html"),
                              "r",
                              encoding="utf-8") as f:
                        data = f.read()
                        pb.feed(data)

        # images and css (hashed files)
        cssfiles = []
        jsfiles = []
        print("* Moving hashed files")
        os.makedirs(os.path.join(self.builddir, "images"))
        for src in self.hashed_files:
            dst = self.hashed_files[src]
            dst = os.path.join(self.builddir, dst)
            if dst.endswith(".js"):
                jsfiles.append(dst)
            if dst.endswith(".css"):
                cssfiles.append(dst)
            print(f"  - {src} -> {dst}")
            if not os.path.exists(dst):
                shutil.copyfile(src, dst)

        # delete unhashed css file
        os.remove(os.path.join(self.builddir, "css", "main.css"))
        os.remove(os.path.join(self.builddir, "scripts", "main.js"))

        # postprocess
        print("* Postprocessing!")
        print(cssfiles)
        print(jsfiles)
        os.system(f"bash postprocess.sh {cssfiles[0]} {jsfiles[0]}")
Exemple #40
0
    def __init__(self, tree, path, oeb, opts, profile=None,
            extra_css='', user_css='', base_css=''):
        self.oeb, self.opts = oeb, opts
        self.profile = profile
        if self.profile is None:
            # Use the default profile. This should really be using
            # opts.output_profile, but I don't want to risk changing it, as
            # doing so might well have hard to debug font size effects.
            from calibre.customize.ui import output_profiles
            for x in output_profiles():
                if x.short_name == 'default':
                    self.profile = x
                    break
        if self.profile is None:
            # Just in case the default profile is removed in the future :)
            self.profile = opts.output_profile
        self.body_font_size = self.profile.fbase
        self.logger = oeb.logger
        item = oeb.manifest.hrefs[path]
        basename = os.path.basename(path)
        cssname = os.path.splitext(basename)[0] + '.css'
        stylesheets = [html_css_stylesheet()]
        if base_css:
            stylesheets.append(parseString(base_css, validate=False))
        style_tags = xpath(tree, '//*[local-name()="style" or local-name()="link"]')

        # Add cssutils parsing profiles from output_profile
        for profile in self.opts.output_profile.extra_css_modules:
            cssprofiles.addProfile(profile['name'],
                                        profile['props'],
                                        profile['macros'])

        parser = CSSParser(fetcher=self._fetch_css_file,
                log=logging.getLogger('calibre.css'))
        self.font_face_rules = []
        for elem in style_tags:
            if (elem.tag == XHTML('style') and elem.get('type', CSS_MIME) in OEB_STYLES and media_ok(elem.get('media'))):
                text = elem.text if elem.text else u''
                for x in elem:
                    t = getattr(x, 'text', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                    t = getattr(x, 'tail', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                if text:
                    text = oeb.css_preprocessor(text)
                    # We handle @import rules separately
                    parser.setFetcher(lambda x: ('utf-8', b''))
                    stylesheet = parser.parseString(text, href=cssname,
                            validate=False)
                    parser.setFetcher(self._fetch_css_file)
                    for rule in stylesheet.cssRules:
                        if rule.type == rule.IMPORT_RULE:
                            ihref = item.abshref(rule.href)
                            if not media_ok(rule.media.mediaText):
                                continue
                            hrefs = self.oeb.manifest.hrefs
                            if ihref not in hrefs:
                                self.logger.warn('Ignoring missing stylesheet in @import rule:', rule.href)
                                continue
                            sitem = hrefs[ihref]
                            if sitem.media_type not in OEB_STYLES:
                                self.logger.warn('CSS @import of non-CSS file %r' % rule.href)
                                continue
                            stylesheets.append(sitem.data)
                    # Make links to resources absolute, since these rules will
                    # be folded into a stylesheet at the root
                    replaceUrls(stylesheet, item.abshref,
                            ignoreImportRules=True)
                    stylesheets.append(stylesheet)
            elif (elem.tag == XHTML('link') and elem.get('href') and elem.get(
                    'rel', 'stylesheet').lower() == 'stylesheet' and elem.get(
                    'type', CSS_MIME).lower() in OEB_STYLES and media_ok(elem.get('media'))
                ):
                href = urlnormalize(elem.attrib['href'])
                path = item.abshref(href)
                sitem = oeb.manifest.hrefs.get(path, None)
                if sitem is None:
                    self.logger.warn(
                        'Stylesheet %r referenced by file %r not in manifest' %
                        (path, item.href))
                    continue
                if not hasattr(sitem.data, 'cssRules'):
                    self.logger.warn(
                    'Stylesheet %r referenced by file %r is not CSS'%(path,
                        item.href))
                    continue
                stylesheets.append(sitem.data)
        csses = {'extra_css':extra_css, 'user_css':user_css}
        for w, x in csses.items():
            if x:
                try:
                    text = x
                    stylesheet = parser.parseString(text, href=cssname,
                            validate=False)
                    stylesheets.append(stylesheet)
                except:
                    self.logger.exception('Failed to parse %s, ignoring.'%w)
                    self.logger.debug('Bad css: ')
                    self.logger.debug(x)
        rules = []
        index = 0
        self.stylesheets = set()
        self.page_rule = {}
        for sheet_index, stylesheet in enumerate(stylesheets):
            href = stylesheet.href
            self.stylesheets.add(href)
            for rule in stylesheet.cssRules:
                if rule.type == rule.MEDIA_RULE:
                    if media_ok(rule.media.mediaText):
                        for subrule in rule.cssRules:
                            rules.extend(self.flatten_rule(subrule, href, index, is_user_agent_sheet=sheet_index==0))
                            index += 1
                else:
                    rules.extend(self.flatten_rule(rule, href, index, is_user_agent_sheet=sheet_index==0))
                    index = index + 1
        rules.sort()
        self.rules = rules
        self._styles = {}
        pseudo_pat = re.compile(ur':{1,2}(%s)' % ('|'.join(INAPPROPRIATE_PSEUDO_CLASSES)), re.I)
        select = Select(tree, ignore_inappropriate_pseudo_classes=True)

        for _, _, cssdict, text, _ in rules:
            fl = pseudo_pat.search(text)
            try:
                matches = tuple(select(text))
            except SelectorError as err:
                self.logger.error('Ignoring CSS rule with invalid selector: %r (%s)' % (text, as_unicode(err)))
                continue

            if fl is not None:
                fl = fl.group(1)
                if fl == 'first-letter' and getattr(self.oeb,
                        'plumber_output_format', '').lower() in {u'mobi', u'docx'}:
                    # Fake first-letter
                    for elem in matches:
                        for x in elem.iter('*'):
                            if x.text:
                                punctuation_chars = []
                                text = unicode(x.text)
                                while text:
                                    category = unicodedata.category(text[0])
                                    if category[0] not in {'P', 'Z'}:
                                        break
                                    punctuation_chars.append(text[0])
                                    text = text[1:]

                                special_text = u''.join(punctuation_chars) + \
                                        (text[0] if text else u'')
                                span = x.makeelement('{%s}span' % XHTML_NS)
                                span.text = special_text
                                span.set('data-fake-first-letter', '1')
                                span.tail = text[1:]
                                x.text = None
                                x.insert(0, span)
                                self.style(span)._update_cssdict(cssdict)
                                break
                else:  # Element pseudo-class
                    for elem in matches:
                        self.style(elem)._update_pseudo_class(fl, cssdict)
            else:
                for elem in matches:
                    self.style(elem)._update_cssdict(cssdict)
        for elem in xpath(tree, '//h:*[@style]'):
            self.style(elem)._apply_style_attr(url_replacer=item.abshref)
        num_pat = re.compile(r'[0-9.]+$')
        for elem in xpath(tree, '//h:img[@width or @height]'):
            style = self.style(elem)
            # Check if either height or width is not default
            is_styled = style._style.get('width', 'auto') != 'auto' or \
                    style._style.get('height', 'auto') != 'auto'
            if not is_styled:
                # Update img style dimension using width and height
                upd = {}
                for prop in ('width', 'height'):
                    val = elem.get(prop, '').strip()
                    try:
                        del elem.attrib[prop]
                    except:
                        pass
                    if val:
                        if num_pat.match(val) is not None:
                            val += 'px'
                        upd[prop] = val
                if upd:
                    style._update_cssdict(upd)
    def __init__(self,
                 tree,
                 path,
                 oeb,
                 opts,
                 profile=None,
                 extra_css='',
                 user_css=''):
        self.oeb, self.opts = oeb, opts
        self.profile = profile
        if self.profile is None:
            # Use the default profile. This should really be using
            # opts.output_profile, but I don't want to risk changing it, as
            # doing so might well have hard to debug font size effects.
            from calibre.customize.ui import output_profiles
            for x in output_profiles():
                if x.short_name == 'default':
                    self.profile = x
                    break
        if self.profile is None:
            # Just in case the default profile is removed in the future :)
            self.profile = opts.output_profile
        self.logger = oeb.logger
        item = oeb.manifest.hrefs[path]
        basename = os.path.basename(path)
        cssname = os.path.splitext(basename)[0] + '.css'
        stylesheets = [html_css_stylesheet()]
        head = xpath(tree, '/h:html/h:head')
        if head:
            head = head[0]
        else:
            head = []

        # Add cssutils parsing profiles from output_profile
        for profile in self.opts.output_profile.extra_css_modules:
            cssprofiles.addProfile(profile['name'], profile['props'],
                                   profile['macros'])

        parser = CSSParser(fetcher=self._fetch_css_file,
                           log=logging.getLogger('calibre.css'))
        self.font_face_rules = []
        for elem in head:
            if (elem.tag == XHTML('style')
                    and elem.get('type', CSS_MIME) in OEB_STYLES):
                text = elem.text if elem.text else u''
                for x in elem:
                    t = getattr(x, 'text', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                    t = getattr(x, 'tail', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                if text:
                    text = XHTML_CSS_NAMESPACE + text
                    text = oeb.css_preprocessor(text)
                    stylesheet = parser.parseString(text,
                                                    href=cssname,
                                                    validate=False)
                    stylesheet.namespaces['h'] = XHTML_NS
                    stylesheets.append(stylesheet)
                    # Make links to resources absolute, since these rules will
                    # be folded into a stylesheet at the root
                    replaceUrls(stylesheet,
                                item.abshref,
                                ignoreImportRules=True)
            elif elem.tag == XHTML('link') and elem.get('href') \
                 and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \
                 and elem.get('type', CSS_MIME).lower() in OEB_STYLES:
                href = urlnormalize(elem.attrib['href'])
                path = item.abshref(href)
                sitem = oeb.manifest.hrefs.get(path, None)
                if sitem is None:
                    self.logger.warn(
                        'Stylesheet %r referenced by file %r not in manifest' %
                        (path, item.href))
                    continue
                if not hasattr(sitem.data, 'cssRules'):
                    self.logger.warn(
                        'Stylesheet %r referenced by file %r is not CSS' %
                        (path, item.href))
                    continue
                stylesheets.append(sitem.data)
        csses = {'extra_css': extra_css, 'user_css': user_css}
        for w, x in csses.items():
            if x:
                try:
                    text = XHTML_CSS_NAMESPACE + x
                    stylesheet = parser.parseString(text,
                                                    href=cssname,
                                                    validate=False)
                    stylesheet.namespaces['h'] = XHTML_NS
                    stylesheets.append(stylesheet)
                except:
                    self.logger.exception('Failed to parse %s, ignoring.' % w)
                    self.logger.debug('Bad css: ')
                    self.logger.debug(x)
        rules = []
        index = 0
        self.stylesheets = set()
        self.page_rule = {}
        for stylesheet in stylesheets:
            href = stylesheet.href
            self.stylesheets.add(href)
            for rule in stylesheet.cssRules:
                rules.extend(self.flatten_rule(rule, href, index))
                index = index + 1
        rules.sort()
        self.rules = rules
        self._styles = {}
        for _, _, cssdict, text, _ in rules:
            fl = ':first-letter' in text
            if fl:
                text = text.replace(':first-letter', '')
            selector = get_css_selector(text)
            matches = selector(tree, self.logger)
            if fl:
                from lxml.builder import ElementMaker
                E = ElementMaker(namespace=XHTML_NS)
                for elem in matches:
                    for x in elem.iter():
                        if x.text:
                            punctuation_chars = []
                            text = unicode(x.text)
                            while text:
                                if not unicodedata.category(
                                        text[0]).startswith('P'):
                                    break
                                punctuation_chars.append(text[0])
                                text = text[1:]

                            special_text = u''.join(punctuation_chars) + \
                                    (text[0] if text else u'')
                            span = E.span(special_text)
                            span.tail = text[1:]
                            x.text = None
                            x.insert(0, span)
                            self.style(span)._update_cssdict(cssdict)
                            break
            else:
                for elem in matches:
                    self.style(elem)._update_cssdict(cssdict)
        for elem in xpath(tree, '//h:*[@style]'):
            self.style(elem)._apply_style_attr(url_replacer=item.abshref)
        num_pat = re.compile(r'\d+$')
        for elem in xpath(tree, '//h:img[@width or @height]'):
            style = self.style(elem)
            # Check if either height or width is not default
            is_styled = style._style.get('width', 'auto') != 'auto' or \
                    style._style.get('height', 'auto') != 'auto'
            if not is_styled:
                # Update img style dimension using width and height
                upd = {}
                for prop in ('width', 'height'):
                    val = elem.get(prop, '').strip()
                    try:
                        del elem.attrib[prop]
                    except:
                        pass
                    if val:
                        if num_pat.match(val) is not None:
                            val += 'px'
                        upd[prop] = val
                if upd:
                    style._update_cssdict(upd)
Exemple #42
0
def getSoupView(soup, css, url=''):
	"""
	soup
		a BeautifulSoup 4 object
	css
		a CSS StyleSheet string
	
	returns style view
		a dict of tuples
	"""
	sheet = cssutils.parseString(css,href=url)
		
	cssutils.replaceUrls(sheet,lambda u: urlparse.urljoin(url, u), ignoreImportRules=True)
	view = {}
	specificities = {} # needed temporarily 

	# TODO: filter rules simpler?, add @media
	gens = []
	for i_rule in sheet:
		if i_rule.type == i_rule.IMPORT_RULE:
			cssutils.replaceUrls(i_rule.styleSheet,lambda u: urlparse.urljoin(i_rule.href, u), ignoreImportRules=True)
			rules = (rule for rule in i_rule.styleSheet if rule.type == rule.STYLE_RULE)
			gens.append(rules)
			
	rules = (rule for rule in sheet if rule.type == rule.STYLE_RULE)
	if gens:
		import itertools
		gens.append(rules)
		rules = itertools.chain(*gens)
	for rule in rules:
		for selector in rule.selectorList:
			#log(0, 'SELECTOR', selector.selectorText)
			# TODO: make this a callback to be able to use other stuff than lxml
			if ':' in selector.selectorText: continue #Ignore pseudo:classes because we can't use them, plus the match when we don't want them to on bs4
			matching = soup.select(selector.selectorText)
			for element in matching:
				ID = id(element)
				if ID not in view:
					# add initial empty style declatation
					view[ID] = (element,cssutils.css.CSSStyleDeclaration()) # @UndefinedVariable
					specificities[ID] = {}
														
				for p in rule.style:
					# update style declaration
					if p not in view[ID][1]:
						# setProperty needs a new Property object and
						# MUST NOT reuse the existing Property
						# which would be the same for all elements!
						# see Issue #23
						view[ID][1].setProperty(p.name, p.value, p.priority)
						specificities[ID][p.name] = selector.specificity
						#log(2, view[element].getProperty('color'))
						
					else:
						#log(2, view[element].getProperty('color'))
						sameprio = (p.priority == 
									view[ID][1].getPropertyPriority(p.name))
						if not sameprio and bool(p.priority) or (
						   sameprio and selector.specificity >= 
										specificities[ID][p.name]):
							# later, more specific or higher prio 
							view[ID][1].setProperty(p.name, p.value, p.priority)
					

	return view
Exemple #43
0
    def run(self):
        # Step 0: ensure that the document_root and base_path variables are
        # set. If the file that's being processed was inside a source that has
        # either one or both not set, then this processor can't run.
        if self.document_root is None or self.base_path is None:
            raise DocumentRootAndBasePathRequiredException

        # We don't rename the file, so we can use the default output file.

        parser = CSSParser(log=None, loglevel=logging.critical)
        sheet = parser.parseFile(self.input_file)

        # Step 1: ensure the file has URLs. If it doesn't, we can stop the
        # processing.
        url_count = 0
        for url in getUrls(sheet):
            url_count += 1
            break
        if url_count == 0:
            return self.input_file

        # Step 2: resolve the relative URLs to absolute paths.
        replaceUrls(sheet, self.resolveToAbsolutePath)

        # Step 3: verify that each of these files has been synced.
        synced_files_db = urljoin(sys.path[0] + os.sep, SYNCED_FILES_DB)
        self.dbcon = sqlite3.connect(synced_files_db)
        self.dbcur = self.dbcon.cursor()
        all_synced = True
        for urlstring in getUrls(sheet):
            # Skip absolute URLs.
            if urlstring.startswith("http://") or urlstring.startswith(
                    "https://"):
                continue

            # Skip broken references in the CSS file. This would otherwise
            # prevent this CSS file from ever passing through this processor.
            if not os.path.exists(urlstring):
                continue

            # Get the CDN URL for the given absolute path.
            self.dbcur.execute(
                "SELECT url FROM synced_files WHERE input_file=?",
                (urlstring, ))
            result = self.dbcur.fetchone()

            if result == None:
                raise RequestToRequeueException(
                    "The file '%s' has not yet been synced to the server '%s'"
                    % (urlstring, self.process_for_server))
            else:
                cdn_url = result[0]

        # Step 4: resolve the absolute paths to CDN URLs.
        replaceUrls(sheet, self.resolveToCDNURL)

        # Step 5: write the updated CSS to the output file.
        f = open(self.output_file, 'w')
        f.write(sheet.cssText)
        f.close()

        return self.output_file
Exemple #44
0
    for x in t.tokenize(css):
        print(x)
#
#    v = cssutils.css.URIValue(u'url(/**/1)')
#    print v.cssText
#    v.uri = 'uri'
#    print v.cssText
#    v.value = 'value'
#    print v.cssText

    sys.exit(1)

if 1:
    # request by Walter
    style = cssutils.parseStyle("background-image: url(1.png), url('2.png')")
    cssutils.replaceUrls(style, lambda url: 'prefix/' + url)
    print(style.cssText)

    sys.exit(1)

if 0:
    # ISSUE 35
    css = """div.one {color: expression((function(ele){ele.style.behavior="none";})(this));}   """
    css = """div.one {color: expression(function(ele){ele.style.behavior="none";})(this);}   """
    sheet = cssutils.parseString(css)
    print(sheet.cssText)

    sys.exit(1)

if 1:
    css = """
Exemple #45
0
    def __init__(self,
                 tree,
                 path,
                 oeb,
                 opts,
                 profile=None,
                 extra_css='',
                 user_css=''):
        self.oeb, self.opts = oeb, opts
        self.profile = profile
        if self.profile is None:
            self.profile = opts.output_profile
        self.logger = oeb.logger
        item = oeb.manifest.hrefs[path]
        basename = os.path.basename(path)
        cssname = os.path.splitext(basename)[0] + '.css'
        stylesheets = [html_css_stylesheet()]
        head = xpath(tree, '/h:html/h:head')
        if head:
            head = head[0]
        else:
            head = []

        # Add cssutils parsing profiles from output_profile
        for profile in self.opts.output_profile.extra_css_modules:
            cssprofiles.addProfile(profile['name'], profile['props'],
                                   profile['macros'])

        parser = CSSParser(fetcher=self._fetch_css_file,
                           log=logging.getLogger('calibre.css'))
        self.font_face_rules = []
        for elem in head:
            if (elem.tag == XHTML('style')
                    and elem.get('type', CSS_MIME) in OEB_STYLES):
                text = elem.text if elem.text else u''
                for x in elem:
                    t = getattr(x, 'text', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                    t = getattr(x, 'tail', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                if text:
                    text = oeb.css_preprocessor(text, add_namespace=True)
                    # We handle @import rules separately
                    parser.setFetcher(lambda x: ('utf-8', b''))
                    stylesheet = parser.parseString(text,
                                                    href=cssname,
                                                    validate=False)
                    parser.setFetcher(self._fetch_css_file)
                    stylesheet.namespaces['h'] = XHTML_NS
                    for rule in stylesheet.cssRules:
                        if rule.type == rule.IMPORT_RULE:
                            ihref = item.abshref(rule.href)
                            if rule.media.mediaText == 'amzn-mobi':
                                continue
                            hrefs = self.oeb.manifest.hrefs
                            if ihref not in hrefs:
                                self.logger.warn(
                                    'Ignoring missing stylesheet in @import rule:',
                                    rule.href)
                                continue
                            sitem = hrefs[ihref]
                            if sitem.media_type not in OEB_STYLES:
                                self.logger.warn(
                                    'CSS @import of non-CSS file %r' %
                                    rule.href)
                                continue
                            stylesheets.append(sitem.data)
                    # Make links to resources absolute, since these rules will
                    # be folded into a stylesheet at the root
                    replaceUrls(stylesheet,
                                item.abshref,
                                ignoreImportRules=True)
                    stylesheets.append(stylesheet)
            elif elem.tag == XHTML('link') and elem.get('href') \
                 and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \
                 and elem.get('type', CSS_MIME).lower() in OEB_STYLES:
                href = urlnormalize(elem.attrib['href'])
                path = item.abshref(href)
                sitem = oeb.manifest.hrefs.get(path, None)
                if sitem is None:
                    self.logger.warn(
                        'Stylesheet %r referenced by file %r not in manifest' %
                        (path, item.href))
                    continue
                if not hasattr(sitem.data, 'cssRules'):
                    self.logger.warn(
                        'Stylesheet %r referenced by file %r is not CSS' %
                        (path, item.href))
                    continue
                stylesheets.append(sitem.data)
        csses = {'extra_css': extra_css, 'user_css': user_css}
        for w, x in csses.items():
            if x:
                try:
                    text = XHTML_CSS_NAMESPACE + x
                    stylesheet = parser.parseString(text,
                                                    href=cssname,
                                                    validate=False)
                    stylesheet.namespaces['h'] = XHTML_NS
                    stylesheets.append(stylesheet)
                except:
                    self.logger.exception('Failed to parse %s, ignoring.' % w)
                    self.logger.debug('Bad css: ')
                    self.logger.debug(x)
        rules = []
        index = 0
        self.stylesheets = set()
        self.page_rule = {}
        for stylesheet in stylesheets:
            href = stylesheet.href
            self.stylesheets.add(href)
            for rule in stylesheet.cssRules:
                if rule.type == rule.MEDIA_RULE:
                    media = {
                        rule.media.item(i)
                        for i in xrange(rule.media.length)
                    }
                    if not media.intersection({'all', 'screen', 'amzn-kf8'}):
                        continue
                    for subrule in rule.cssRules:
                        rules.extend(self.flatten_rule(subrule, href, index))
                        index += 1
                else:
                    rules.extend(self.flatten_rule(rule, href, index))
                    index = index + 1
        rules.sort()
        self.rules = rules
        self._styles = {}
        pseudo_pat = re.compile(
            ur':(first-letter|first-line|link|hover|visited|active|focus|before|after)',
            re.I)
        for _, _, cssdict, text, _ in rules:
            fl = pseudo_pat.search(text)
            if fl is not None:
                text = text.replace(fl.group(), '')
            selector = get_css_selector(text, self.oeb.log)
            matches = selector(tree, self.logger)
            if fl is not None:
                fl = fl.group(1)
                if fl == 'first-letter' and getattr(self.oeb,
                                                    'plumber_output_format',
                                                    '').lower() == u'mobi':
                    # Fake first-letter
                    from lxml.builder import ElementMaker
                    E = ElementMaker(namespace=XHTML_NS)
                    for elem in matches:
                        for x in elem.iter():
                            if x.text:
                                punctuation_chars = []
                                text = unicode(x.text)
                                while text:
                                    category = unicodedata.category(text[0])
                                    if category[0] not in {'P', 'Z'}:
                                        break
                                    punctuation_chars.append(text[0])
                                    text = text[1:]

                                special_text = u''.join(punctuation_chars) + \
                                        (text[0] if text else u'')
                                span = E.span(special_text)
                                span.tail = text[1:]
                                x.text = None
                                x.insert(0, span)
                                self.style(span)._update_cssdict(cssdict)
                                break
                else:  # Element pseudo-class
                    for elem in matches:
                        self.style(elem)._update_pseudo_class(fl, cssdict)
            else:
                for elem in matches:
                    self.style(elem)._update_cssdict(cssdict)
        for elem in xpath(tree, '//h:*[@style]'):
            self.style(elem)._apply_style_attr(url_replacer=item.abshref)
        num_pat = re.compile(r'\d+$')
        for elem in xpath(tree, '//h:img[@width or @height]'):
            style = self.style(elem)
            # Check if either height or width is not default
            is_styled = style._style.get('width', 'auto') != 'auto' or \
                    style._style.get('height', 'auto') != 'auto'
            if not is_styled:
                # Update img style dimension using width and height
                upd = {}
                for prop in ('width', 'height'):
                    val = elem.get(prop, '').strip()
                    try:
                        del elem.attrib[prop]
                    except:
                        pass
                    if val:
                        if num_pat.match(val) is not None:
                            val += 'px'
                        upd[prop] = val
                if upd:
                    style._update_cssdict(upd)
Exemple #46
0
    def __init__(self, tree, path, oeb, opts, profile=None,
            extra_css='', user_css=''):
        self.oeb, self.opts = oeb, opts
        self.profile = profile
        if self.profile is None:
            # Use the default profile. This should really be using
            # opts.output_profile, but I don't want to risk changing it, as
            # doing so might well have hard to debug font size effects.
            from calibre.customize.ui import output_profiles
            for x in output_profiles():
                if x.short_name == 'default':
                    self.profile = x
                    break
        if self.profile is None:
            # Just in case the default profile is removed in the future :)
            self.profile = opts.output_profile
        self.logger = oeb.logger
        item = oeb.manifest.hrefs[path]
        basename = os.path.basename(path)
        cssname = os.path.splitext(basename)[0] + '.css'
        stylesheets = [html_css_stylesheet()]
        head = xpath(tree, '/h:html/h:head')
        if head:
            head = head[0]
        else:
            head = []

        # Add cssutils parsing profiles from output_profile
        for profile in self.opts.output_profile.extra_css_modules:
            cssprofiles.addProfile(profile['name'],
                                        profile['props'],
                                        profile['macros'])

        parser = CSSParser(fetcher=self._fetch_css_file,
                log=logging.getLogger('calibre.css'))
        self.font_face_rules = []
        for elem in head:
            if (elem.tag == XHTML('style') and
                elem.get('type', CSS_MIME) in OEB_STYLES):
                text = elem.text if elem.text else u''
                for x in elem:
                    t = getattr(x, 'text', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                    t = getattr(x, 'tail', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                if text:
                    text = XHTML_CSS_NAMESPACE + text
                    text = oeb.css_preprocessor(text)
                    stylesheet = parser.parseString(text, href=cssname,
                            validate=False)
                    stylesheet.namespaces['h'] = XHTML_NS
                    stylesheets.append(stylesheet)
                    # Make links to resources absolute, since these rules will
                    # be folded into a stylesheet at the root
                    replaceUrls(stylesheet, item.abshref,
                            ignoreImportRules=True)
            elif elem.tag == XHTML('link') and elem.get('href') \
                 and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \
                 and elem.get('type', CSS_MIME).lower() in OEB_STYLES:
                href = urlnormalize(elem.attrib['href'])
                path = item.abshref(href)
                sitem = oeb.manifest.hrefs.get(path, None)
                if sitem is None:
                    self.logger.warn(
                        'Stylesheet %r referenced by file %r not in manifest' %
                        (path, item.href))
                    continue
                if not hasattr(sitem.data, 'cssRules'):
                    self.logger.warn(
                    'Stylesheet %r referenced by file %r is not CSS'%(path,
                        item.href))
                    continue
                stylesheets.append(sitem.data)
        csses = {'extra_css':extra_css, 'user_css':user_css}
        for w, x in csses.items():
            if x:
                try:
                    text = XHTML_CSS_NAMESPACE + x
                    stylesheet = parser.parseString(text, href=cssname,
                            validate=False)
                    stylesheet.namespaces['h'] = XHTML_NS
                    stylesheets.append(stylesheet)
                except:
                    self.logger.exception('Failed to parse %s, ignoring.'%w)
                    self.logger.debug('Bad css: ')
                    self.logger.debug(x)
        rules = []
        index = 0
        self.stylesheets = set()
        self.page_rule = {}
        for stylesheet in stylesheets:
            href = stylesheet.href
            self.stylesheets.add(href)
            for rule in stylesheet.cssRules:
                rules.extend(self.flatten_rule(rule, href, index))
                index = index + 1
        rules.sort()
        self.rules = rules
        self._styles = {}
        for _, _, cssdict, text, _ in rules:
            fl = ':first-letter' in text
            if fl:
                text = text.replace(':first-letter', '')
            selector = get_css_selector(text)
            matches = selector(tree, self.logger)
            if fl:
                from lxml.builder import ElementMaker
                E = ElementMaker(namespace=XHTML_NS)
                for elem in matches:
                    for x in elem.iter():
                        if x.text:
                            punctuation_chars = []
                            text = unicode(x.text)
                            while text:
                                if not unicodedata.category(text[0]).startswith('P'):
                                    break
                                punctuation_chars.append(text[0])
                                text = text[1:]

                            special_text = u''.join(punctuation_chars) + \
                                    (text[0] if text else u'')
                            span = E.span(special_text)
                            span.tail = text[1:]
                            x.text = None
                            x.insert(0, span)
                            self.style(span)._update_cssdict(cssdict)
                            break
            else:
                for elem in matches:
                    self.style(elem)._update_cssdict(cssdict)
        for elem in xpath(tree, '//h:*[@style]'):
            self.style(elem)._apply_style_attr(url_replacer=item.abshref)
        num_pat = re.compile(r'\d+$')
        for elem in xpath(tree, '//h:img[@width or @height]'):
            style = self.style(elem)
            # Check if either height or width is not default
            is_styled = style._style.get('width', 'auto') != 'auto' or \
                    style._style.get('height', 'auto') != 'auto'
            if not is_styled:
                # Update img style dimension using width and height
                upd = {}
                for prop in ('width', 'height'):
                    val = elem.get(prop, '').strip()
                    try:
                        del elem.attrib[prop]
                    except:
                        pass
                    if val:
                        if num_pat.match(val) is not None:
                            val += 'px'
                        upd[prop] = val
                if upd:
                    style._update_cssdict(upd)
Exemple #47
0
    log = logging.getLogger('csscache')
    handler = logging.StreamHandler(sys.stderr)
    handler.setLevel(logging.ERROR)
    log.addHandler(handler)

    if options.minified:
        cssutils.ser.prefs.useMinified()

    # Create the parser
    parser = cssutils.CSSParser(log=log,
                                raiseExceptions=True,
                                parseComments=not options.minified,
                                validate=False)
    try:
        # Parse the original file
        sheet = parser.parseFile(args[0])
    except Exception, e:
        sys.stderr.write('Error: %s %s\n' % (css_path, e.args[0]))
        sys.exit(1)

    # Replace all the urls
    replacer = partial(cache_bust_replacer, options, css_path, img_rel_path)
    cssutils.replaceUrls(sheet, replacer, ignoreImportRules=True)

    # print the new css
    sys.stdout.write(sheet.cssText)


if __name__ == "__main__":
    main()
 def rewrite_links (self, f):
     """ Rewrite all links using the function f. """
     cssutils.replaceUrls (self.sheet, f)
Exemple #49
0
    def virtualize_resources(self):

        changed = set()
        link_uid = self.book_render_data['link_uid']
        resource_template = link_uid + '|{}|'
        xlink_xpath = XPath('//*[@xl:href]')
        link_xpath = XPath('//h:a[@href]')
        res_link_xpath = XPath('//h:link[@href]')

        def link_replacer(base, url):
            if url.startswith('#'):
                frag = urlunquote(url[1:])
                if not frag:
                    return url
                changed.add(base)
                return resource_template.format(encode_url(base, frag))
            purl = urlparse(url)
            if purl.netloc or purl.query:
                return url
            if purl.scheme and purl.scheme != 'file':
                return url
            if not purl.path or purl.path.startswith('/'):
                return url
            url, frag = purl.path, purl.fragment
            name = self.href_to_name(url, base)
            if name:
                if self.has_name(name):
                    frag = urlunquote(frag)
                    url = resource_template.format(encode_url(name, frag))
                else:
                    if isinstance(name, unicode):
                        name = name.encode('utf-8')
                    url = 'missing:' + force_unicode(quote(name), 'utf-8')
                changed.add(base)
            return url

        ltm = self.book_render_data['link_to_map']

        for name, mt in self.mime_map.iteritems():
            mt = mt.lower()
            if mt in OEB_STYLES:
                replaceUrls(self.parsed(name), partial(link_replacer, name))
                self.virtualized_names.add(name)
            elif mt in OEB_DOCS:
                self.virtualized_names.add(name)
                root = self.parsed(name)
                for link in res_link_xpath(root):
                    ltype = (link.get('type') or 'text/css').lower()
                    rel = (link.get('rel') or 'stylesheet').lower()
                    if ltype != 'text/css' or rel != 'stylesheet':
                        # This link will not be loaded by the browser anyway
                        # and will causes the resource load check to hang
                        link.attrib.clear()
                        changed.add(name)
                rewrite_links(root, partial(link_replacer, name))
                for a in link_xpath(root):
                    href = a.get('href')
                    if href.startswith(link_uid):
                        a.set('href', 'javascript:void(0)')
                        parts = decode_url(href.split('|')[1])
                        lname, lfrag = parts[0], parts[1]
                        ltm.setdefault(lname,
                                       {}).setdefault(lfrag or '',
                                                      set()).add(name)
                        a.set(
                            'data-' + link_uid,
                            json.dumps({
                                'name': lname,
                                'frag': lfrag
                            },
                                       ensure_ascii=False))
                    else:
                        a.set('target', '_blank')
                        a.set('rel', 'noopener noreferrer')
                    changed.add(name)
            elif mt == 'image/svg+xml':
                self.virtualized_names.add(name)
                changed.add(name)
                xlink = XLINK('href')
                for elem in xlink_xpath(self.parsed(name)):
                    elem.set(xlink, link_replacer(name, elem.get(xlink)))

        for name, amap in ltm.iteritems():
            for k, v in tuple(amap.iteritems()):
                amap[k] = tuple(v)  # needed for JSON serialization

        tuple(map(self.dirty, changed))
Exemple #50
0
    def __init__(self, tree, path, oeb, opts, profile=None,
            extra_css='', user_css=''):
        self.oeb, self.opts = oeb, opts
        self.profile = profile
        if self.profile is None:
            self.profile = opts.output_profile
        self.logger = oeb.logger
        item = oeb.manifest.hrefs[path]
        basename = os.path.basename(path)
        cssname = os.path.splitext(basename)[0] + '.css'
        stylesheets = [html_css_stylesheet()]
        head = xpath(tree, '/h:html/h:head')
        if head:
            head = head[0]
        else:
            head = []

        # Add cssutils parsing profiles from output_profile
        for profile in self.opts.output_profile.extra_css_modules:
            cssprofiles.addProfile(profile['name'],
                                        profile['props'],
                                        profile['macros'])

        parser = CSSParser(fetcher=self._fetch_css_file,
                log=logging.getLogger('calibre.css'))
        self.font_face_rules = []
        for elem in head:
            if (elem.tag == XHTML('style') and
                elem.get('type', CSS_MIME) in OEB_STYLES):
                text = elem.text if elem.text else u''
                for x in elem:
                    t = getattr(x, 'text', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                    t = getattr(x, 'tail', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                if text:
                    text = oeb.css_preprocessor(text, add_namespace=True)
                    # We handle @import rules separately
                    parser.setFetcher(lambda x: ('utf-8', b''))
                    stylesheet = parser.parseString(text, href=cssname,
                            validate=False)
                    parser.setFetcher(self._fetch_css_file)
                    stylesheet.namespaces['h'] = XHTML_NS
                    for rule in stylesheet.cssRules:
                        if rule.type == rule.IMPORT_RULE:
                            ihref = item.abshref(rule.href)
                            if rule.media.mediaText == 'amzn-mobi':
                                continue
                            hrefs = self.oeb.manifest.hrefs
                            if ihref not in hrefs:
                                self.logger.warn('Ignoring missing stylesheet in @import rule:', rule.href)
                                continue
                            sitem = hrefs[ihref]
                            if sitem.media_type not in OEB_STYLES:
                                self.logger.warn('CSS @import of non-CSS file %r' % rule.href)
                                continue
                            stylesheets.append(sitem.data)
                    # Make links to resources absolute, since these rules will
                    # be folded into a stylesheet at the root
                    replaceUrls(stylesheet, item.abshref,
                            ignoreImportRules=True)
                    stylesheets.append(stylesheet)
            elif elem.tag == XHTML('link') and elem.get('href') \
                 and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \
                 and elem.get('type', CSS_MIME).lower() in OEB_STYLES:
                href = urlnormalize(elem.attrib['href'])
                path = item.abshref(href)
                sitem = oeb.manifest.hrefs.get(path, None)
                if sitem is None:
                    self.logger.warn(
                        'Stylesheet %r referenced by file %r not in manifest' %
                        (path, item.href))
                    continue
                if not hasattr(sitem.data, 'cssRules'):
                    self.logger.warn(
                    'Stylesheet %r referenced by file %r is not CSS'%(path,
                        item.href))
                    continue
                stylesheets.append(sitem.data)
        csses = {'extra_css':extra_css, 'user_css':user_css}
        for w, x in csses.items():
            if x:
                try:
                    text = XHTML_CSS_NAMESPACE + x
                    stylesheet = parser.parseString(text, href=cssname,
                            validate=False)
                    stylesheet.namespaces['h'] = XHTML_NS
                    stylesheets.append(stylesheet)
                except:
                    self.logger.exception('Failed to parse %s, ignoring.'%w)
                    self.logger.debug('Bad css: ')
                    self.logger.debug(x)
        rules = []
        index = 0
        self.stylesheets = set()
        self.page_rule = {}
        for stylesheet in stylesheets:
            href = stylesheet.href
            self.stylesheets.add(href)
            for rule in stylesheet.cssRules:
                if rule.type == rule.MEDIA_RULE:
                    media = {rule.media.item(i) for i in
                             xrange(rule.media.length)}
                    if not media.intersection({'all', 'screen', 'amzn-kf8'}):
                        continue
                    for subrule in rule.cssRules:
                        rules.extend(self.flatten_rule(subrule, href, index))
                        index += 1
                else:
                    rules.extend(self.flatten_rule(rule, href, index))
                    index = index + 1
        rules.sort()
        self.rules = rules
        self._styles = {}
        pseudo_pat = re.compile(ur':(first-letter|first-line|link|hover|visited|active|focus|before|after)', re.I)
        for _, _, cssdict, text, _ in rules:
            fl = pseudo_pat.search(text)
            if fl is not None:
                text = text.replace(fl.group(), '')
            selector = get_css_selector(text, self.oeb.log)
            matches = selector(tree, self.logger)
            if fl is not None:
                fl = fl.group(1)
                if fl == 'first-letter' and getattr(self.oeb,
                        'plumber_output_format', '').lower() == u'mobi':
                    # Fake first-letter
                    from lxml.builder import ElementMaker
                    E = ElementMaker(namespace=XHTML_NS)
                    for elem in matches:
                        for x in elem.iter():
                            if x.text:
                                punctuation_chars = []
                                text = unicode(x.text)
                                while text:
                                    category = unicodedata.category(text[0])
                                    if category[0] not in {'P', 'Z'}:
                                        break
                                    punctuation_chars.append(text[0])
                                    text = text[1:]

                                special_text = u''.join(punctuation_chars) + \
                                        (text[0] if text else u'')
                                span = E.span(special_text)
                                span.tail = text[1:]
                                x.text = None
                                x.insert(0, span)
                                self.style(span)._update_cssdict(cssdict)
                                break
                else:  # Element pseudo-class
                    for elem in matches:
                        self.style(elem)._update_pseudo_class(fl, cssdict)
            else:
                for elem in matches:
                    self.style(elem)._update_cssdict(cssdict)
        for elem in xpath(tree, '//h:*[@style]'):
            self.style(elem)._apply_style_attr(url_replacer=item.abshref)
        num_pat = re.compile(r'\d+$')
        for elem in xpath(tree, '//h:img[@width or @height]'):
            style = self.style(elem)
            # Check if either height or width is not default
            is_styled = style._style.get('width', 'auto') != 'auto' or \
                    style._style.get('height', 'auto') != 'auto'
            if not is_styled:
                # Update img style dimension using width and height
                upd = {}
                for prop in ('width', 'height'):
                    val = elem.get(prop, '').strip()
                    try:
                        del elem.attrib[prop]
                    except:
                        pass
                    if val:
                        if num_pat.match(val) is not None:
                            val += 'px'
                        upd[prop] = val
                if upd:
                    style._update_cssdict(upd)
Exemple #51
0
    def __init__(self, tree, path, oeb, opts, profile=None, extra_css="", user_css=""):
        self.oeb, self.opts = oeb, opts
        self.profile = profile
        if self.profile is None:
            # Use the default profile. This should really be using
            # opts.output_profile, but I don't want to risk changing it, as
            # doing so might well have hard to debug font size effects.
            from calibre.customize.ui import output_profiles

            for x in output_profiles():
                if x.short_name == "default":
                    self.profile = x
                    break
        if self.profile is None:
            # Just in case the default profile is removed in the future :)
            self.profile = opts.output_profile
        self.body_font_size = self.profile.fbase
        self.logger = oeb.logger
        item = oeb.manifest.hrefs[path]
        basename = os.path.basename(path)
        cssname = os.path.splitext(basename)[0] + ".css"
        stylesheets = [html_css_stylesheet()]
        style_tags = xpath(tree, '//*[local-name()="style" or local-name()="link"]')

        # Add cssutils parsing profiles from output_profile
        for profile in self.opts.output_profile.extra_css_modules:
            cssprofiles.addProfile(profile["name"], profile["props"], profile["macros"])

        parser = CSSParser(fetcher=self._fetch_css_file, log=logging.getLogger("calibre.css"))
        self.font_face_rules = []
        for elem in style_tags:
            if elem.tag == XHTML("style") and elem.get("type", CSS_MIME) in OEB_STYLES:
                text = elem.text if elem.text else u""
                for x in elem:
                    t = getattr(x, "text", None)
                    if t:
                        text += u"\n\n" + force_unicode(t, u"utf-8")
                    t = getattr(x, "tail", None)
                    if t:
                        text += u"\n\n" + force_unicode(t, u"utf-8")
                if text:
                    text = oeb.css_preprocessor(text, add_namespace=True)
                    # We handle @import rules separately
                    parser.setFetcher(lambda x: ("utf-8", b""))
                    stylesheet = parser.parseString(text, href=cssname, validate=False)
                    parser.setFetcher(self._fetch_css_file)
                    stylesheet.namespaces["h"] = XHTML_NS
                    for rule in stylesheet.cssRules:
                        if rule.type == rule.IMPORT_RULE:
                            ihref = item.abshref(rule.href)
                            if rule.media.mediaText == "amzn-mobi":
                                continue
                            hrefs = self.oeb.manifest.hrefs
                            if ihref not in hrefs:
                                self.logger.warn("Ignoring missing stylesheet in @import rule:", rule.href)
                                continue
                            sitem = hrefs[ihref]
                            if sitem.media_type not in OEB_STYLES:
                                self.logger.warn("CSS @import of non-CSS file %r" % rule.href)
                                continue
                            stylesheets.append(sitem.data)
                    for rule in tuple(stylesheet.cssRules.rulesOfType(CSSRule.PAGE_RULE)):
                        stylesheet.cssRules.remove(rule)
                    # Make links to resources absolute, since these rules will
                    # be folded into a stylesheet at the root
                    replaceUrls(stylesheet, item.abshref, ignoreImportRules=True)
                    stylesheets.append(stylesheet)
            elif (
                elem.tag == XHTML("link")
                and elem.get("href")
                and elem.get("rel", "stylesheet").lower() == "stylesheet"
                and elem.get("type", CSS_MIME).lower() in OEB_STYLES
            ):
                href = urlnormalize(elem.attrib["href"])
                path = item.abshref(href)
                sitem = oeb.manifest.hrefs.get(path, None)
                if sitem is None:
                    self.logger.warn("Stylesheet %r referenced by file %r not in manifest" % (path, item.href))
                    continue
                if not hasattr(sitem.data, "cssRules"):
                    self.logger.warn("Stylesheet %r referenced by file %r is not CSS" % (path, item.href))
                    continue
                stylesheets.append(sitem.data)
        csses = {"extra_css": extra_css, "user_css": user_css}
        for w, x in csses.items():
            if x:
                try:
                    text = XHTML_CSS_NAMESPACE + x
                    stylesheet = parser.parseString(text, href=cssname, validate=False)
                    stylesheet.namespaces["h"] = XHTML_NS
                    stylesheets.append(stylesheet)
                except:
                    self.logger.exception("Failed to parse %s, ignoring." % w)
                    self.logger.debug("Bad css: ")
                    self.logger.debug(x)
        rules = []
        index = 0
        self.stylesheets = set()
        self.page_rule = {}
        for sheet_index, stylesheet in enumerate(stylesheets):
            href = stylesheet.href
            self.stylesheets.add(href)
            for rule in stylesheet.cssRules:
                if rule.type == rule.MEDIA_RULE:
                    media = {rule.media.item(i) for i in xrange(rule.media.length)}
                    if not media.intersection({"all", "screen", "amzn-kf8"}):
                        continue
                    for subrule in rule.cssRules:
                        rules.extend(self.flatten_rule(subrule, href, index, is_user_agent_sheet=sheet_index == 0))
                        index += 1
                else:
                    rules.extend(self.flatten_rule(rule, href, index, is_user_agent_sheet=sheet_index == 0))
                    index = index + 1
        rules.sort()
        self.rules = rules
        self._styles = {}
        pseudo_pat = re.compile(ur":(first-letter|first-line|link|hover|visited|active|focus|before|after)", re.I)
        for _, _, cssdict, text, _ in rules:
            fl = pseudo_pat.search(text)
            if fl is not None:
                text = text.replace(fl.group(), "")
            selector = get_css_selector(text, self.oeb.log)
            matches = selector(tree, self.logger)
            if fl is not None:
                fl = fl.group(1)
                if fl == "first-letter" and getattr(self.oeb, "plumber_output_format", "").lower() == u"mobi":
                    # Fake first-letter
                    from lxml.builder import ElementMaker

                    E = ElementMaker(namespace=XHTML_NS)
                    for elem in matches:
                        for x in elem.iter():
                            if x.text:
                                punctuation_chars = []
                                text = unicode(x.text)
                                while text:
                                    category = unicodedata.category(text[0])
                                    if category[0] not in {"P", "Z"}:
                                        break
                                    punctuation_chars.append(text[0])
                                    text = text[1:]

                                special_text = u"".join(punctuation_chars) + (text[0] if text else u"")
                                span = E.span(special_text)
                                span.tail = text[1:]
                                x.text = None
                                x.insert(0, span)
                                self.style(span)._update_cssdict(cssdict)
                                break
                else:  # Element pseudo-class
                    for elem in matches:
                        self.style(elem)._update_pseudo_class(fl, cssdict)
            else:
                for elem in matches:
                    self.style(elem)._update_cssdict(cssdict)
        for elem in xpath(tree, "//h:*[@style]"):
            self.style(elem)._apply_style_attr(url_replacer=item.abshref)
        num_pat = re.compile(r"[0-9.]+$")
        for elem in xpath(tree, "//h:img[@width or @height]"):
            style = self.style(elem)
            # Check if either height or width is not default
            is_styled = style._style.get("width", "auto") != "auto" or style._style.get("height", "auto") != "auto"
            if not is_styled:
                # Update img style dimension using width and height
                upd = {}
                for prop in ("width", "height"):
                    val = elem.get(prop, "").strip()
                    try:
                        del elem.attrib[prop]
                    except:
                        pass
                    if val:
                        if num_pat.match(val) is not None:
                            val += "px"
                        upd[prop] = val
                if upd:
                    style._update_cssdict(upd)
Exemple #52
0
    def create_oebbook(self, htmlpath, basedir, opts, log, mi):
        import uuid
        from calibre.ebooks.conversion.plumber import create_oebbook
        from calibre.ebooks.oeb.base import (
            DirContainer,
            rewrite_links,
            urlnormalize,
            urldefrag,
            BINARY_MIME,
            OEB_STYLES,
            xpath,
        )
        from calibre import guess_type
        from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
        from calibre.ebooks.html.input import get_filelist
        import cssutils, logging

        cssutils.log.setLevel(logging.WARN)
        self.OEB_STYLES = OEB_STYLES
        oeb = create_oebbook(log, None, opts, self, encoding=opts.input_encoding, populate=False)
        self.oeb = oeb

        metadata = oeb.metadata
        meta_info_to_oeb_metadata(mi, metadata, log)
        if not metadata.language:
            oeb.logger.warn("Language not specified")
            metadata.add("language", get_lang().replace("_", "-"))
        if not metadata.creator:
            oeb.logger.warn("Creator not specified")
            metadata.add("creator", self.oeb.translate(__("Unknown")))
        if not metadata.title:
            oeb.logger.warn("Title not specified")
            metadata.add("title", self.oeb.translate(__("Unknown")))
        bookid = str(uuid.uuid4())
        metadata.add("identifier", bookid, id="uuid_id", scheme="uuid")
        for ident in metadata.identifier:
            if "id" in ident.attrib:
                self.oeb.uid = metadata.identifier[0]
                break

        filelist = get_filelist(htmlpath, basedir, opts, log)
        filelist = [f for f in filelist if not f.is_binary]
        htmlfile_map = {}
        for f in filelist:
            path = f.path
            oeb.container = DirContainer(os.path.dirname(path), log, ignore_opf=True)
            bname = os.path.basename(path)
            id, href = oeb.manifest.generate(id="html", href=ascii_filename(bname))
            htmlfile_map[path] = href
            item = oeb.manifest.add(id, href, "text/html")
            item.html_input_href = bname
            oeb.spine.add(item, True)

        self.added_resources = {}
        self.log = log
        self.log("Normalizing filename cases")
        for path, href in htmlfile_map.items():
            if not self.is_case_sensitive(path):
                path = path.lower()
            self.added_resources[path] = href
        self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
        self.urldefrag = urldefrag
        self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME

        self.log("Rewriting HTML links")
        for f in filelist:
            path = f.path
            dpath = os.path.dirname(path)
            oeb.container = DirContainer(dpath, log, ignore_opf=True)
            item = oeb.manifest.hrefs[htmlfile_map[path]]
            rewrite_links(item.data, partial(self.resource_adder, base=dpath))

        for item in oeb.manifest.values():
            if item.media_type in self.OEB_STYLES:
                dpath = None
                for path, href in self.added_resources.items():
                    if href == item.href:
                        dpath = os.path.dirname(path)
                        break
                cssutils.replaceUrls(item.data, partial(self.resource_adder, base=dpath))

        toc = self.oeb.toc
        self.oeb.auto_generated_toc = True
        titles = []
        headers = []
        for item in self.oeb.spine:
            if not item.linear:
                continue
            html = item.data
            title = "".join(xpath(html, "/h:html/h:head/h:title/text()"))
            title = re.sub(r"\s+", " ", title.strip())
            if title:
                titles.append(title)
            headers.append("(unlabled)")
            for tag in ("h1", "h2", "h3", "h4", "h5", "strong"):
                expr = "/h:html/h:body//h:%s[position()=1]/text()"
                header = "".join(xpath(html, expr % tag))
                header = re.sub(r"\s+", " ", header.strip())
                if header:
                    headers[-1] = header
                    break
        use = titles
        if len(titles) > len(set(titles)):
            use = headers
        for title, item in izip(use, self.oeb.spine):
            if not item.linear:
                continue
            toc.add(title, item.href)

        oeb.container = DirContainer(os.getcwdu(), oeb.log, ignore_opf=True)
        return oeb
Exemple #53
0
    def create_oebbook(self, htmlpath, mi, encoding='utf-8', pretty_print=False):
        cssutils.log.setLevel(logging.WARN)
        self.OEB_STYLES = OEB_STYLES
        basedir = os.path.dirname(htmlpath)
        html_preprocessor = HTMLPreProcessor()
        assert encoding
        oeb = OEBBook(html_preprocessor, pretty_print=pretty_print, input_encoding=encoding)
        self.oeb = oeb

        metadata = oeb.metadata
        meta_info_to_oeb_metadata(mi, metadata)
        if not metadata.language:
            logging.warn('Language not specified')
            metadata.add('language', 'en')
        if not metadata.creator:
            logging.warn('Creator not specified')
            metadata.add('creator', 'Unknown')
        if not metadata.title:
            logging.warn('Title not specified')
            metadata.add('title', 'Unknown')
        bookid = str(uuid.uuid4())
        metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
        for ident in metadata.identifier:
            if 'id' in ident.attrib:
                self.oeb.uid = metadata.identifier[0]
                break

        filelist = get_filelist(htmlpath, basedir)
        filelist = [f for f in filelist if not f.is_binary]
        htmlfile_map = {}
        for f in filelist:
            path = f.path
            oeb.container = DirContainer(os.path.dirname(path), ignore_opf=True)
            bname = os.path.basename(path)
            id, href = oeb.manifest.generate(id='html', href=bname)
            htmlfile_map[path] = href
            item = oeb.manifest.add(id, href, 'text/html')
            item.html_input_href = bname
            oeb.spine.add(item, True)

        self.added_resources = {}
        for path, href in list(htmlfile_map.items()):
            if not self.is_case_sensitive(path):
                path = path.lower()
            self.added_resources[path] = href
        self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
        self.urldefrag = urldefrag
        self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME

        for f in filelist:
            path = f.path
            dpath = os.path.dirname(path)
            oeb.container = DirContainer(dpath, ignore_opf=True)
            item = oeb.manifest.hrefs[htmlfile_map[path]]
            rewrite_links(item.data, partial(self.resource_adder, base=dpath))

        for item in list(oeb.manifest.values()):
            if item.media_type in self.OEB_STYLES:
                dpath = None
                for path, href in list(self.added_resources.items()):
                    if href == item.href:
                        dpath = os.path.dirname(path)
                        break
                cssutils.replaceUrls(item.data, partial(self.resource_adder, base=dpath))

        toc = self.oeb.toc
        headers = []
        for item in self.oeb.spine:
            if not item.linear:
                continue
            html = item.data
            for header in find_headers(html):
                headers.append((item, header))
        for i, (item, header) in enumerate(headers):
            if not item.linear:
                continue
            tocid = 'tocid{}'.format(i)
            header.attrib['id'] = tocid
            link = '{}#{}'.format(item.href, tocid)
            toc.add(header.text, link)
        oeb.container = DirContainer(os.getcwd(), ignore_opf=True)
        return oeb
Exemple #54
0
    # Configure the logger
    log = logging.getLogger('csscache')
    handler = logging.StreamHandler(sys.stderr)
    handler.setLevel(logging.ERROR)
    log.addHandler(handler)

    if options.minified:
        cssutils.ser.prefs.useMinified()

    # Create the parser
    parser = cssutils.CSSParser(log=log,
                                raiseExceptions=True,
                                parseComments=not options.minified,
                                validate=False)
    try:
        # Parse the original file
        sheet = parser.parseFile(args[0])
    except Exception, e:
        sys.stderr.write('Error: %s %s\n' % (css_path, e.args[0]))
        sys.exit(1)

    # Replace all the urls
    replacer = partial(cache_bust_replacer, options, css_path, img_rel_path)
    cssutils.replaceUrls(sheet, replacer, ignoreImportRules=True)

    # print the new css
    sys.stdout.write(sheet.cssText)

if __name__ == "__main__":
    main()
Exemple #55
0
def getSoupView(soup, css, url=''):
    """
	soup
		a BeautifulSoup 4 object
	css
		a CSS StyleSheet string
	
	returns style view
		a dict of tuples
	"""
    sheet = cssutils.parseString(css, href=url)

    cssutils.replaceUrls(sheet,
                         lambda u: urlparse.urljoin(url, u),
                         ignoreImportRules=True)
    view = {}
    specificities = {}  # needed temporarily

    # TODO: filter rules simpler?, add @media
    gens = []
    for i_rule in sheet:
        if i_rule.type == i_rule.IMPORT_RULE:
            cssutils.replaceUrls(i_rule.styleSheet,
                                 lambda u: urlparse.urljoin(i_rule.href, u),
                                 ignoreImportRules=True)
            rules = (rule for rule in i_rule.styleSheet
                     if rule.type == rule.STYLE_RULE)
            gens.append(rules)

    rules = (rule for rule in sheet if rule.type == rule.STYLE_RULE)
    if gens:
        import itertools
        gens.append(rules)
        rules = itertools.chain(*gens)
    for rule in rules:
        for selector in rule.selectorList:
            #log(0, 'SELECTOR', selector.selectorText)
            # TODO: make this a callback to be able to use other stuff than lxml
            if ':' in selector.selectorText:
                continue  #Ignore pseudo:classes because we can't use them, plus the match when we don't want them to on bs4
            matching = soup.select(selector.selectorText)
            for element in matching:
                ID = id(element)
                if ID not in view:
                    # add initial empty style declatation
                    view[ID] = (element, cssutils.css.CSSStyleDeclaration()
                                )  # @UndefinedVariable
                    specificities[ID] = {}

                for p in rule.style:
                    # update style declaration
                    if p not in view[ID][1]:
                        # setProperty needs a new Property object and
                        # MUST NOT reuse the existing Property
                        # which would be the same for all elements!
                        # see Issue #23
                        view[ID][1].setProperty(p.name, p.value, p.priority)
                        specificities[ID][p.name] = selector.specificity
                        #log(2, view[element].getProperty('color'))

                    else:
                        #log(2, view[element].getProperty('color'))
                        sameprio = (
                            p.priority == view[ID][1].getPropertyPriority(
                                p.name))
                        if not sameprio and bool(p.priority) or (
                                sameprio and selector.specificity >=
                                specificities[ID][p.name]):
                            # later, more specific or higher prio
                            view[ID][1].setProperty(p.name, p.value,
                                                    p.priority)

    return view
Exemple #56
0
 def rewrite_links (self, f):
     """ Rewrite all links using the function f. """
     cssutils.replaceUrls (self.sheet, f)