def __init__(self, args, pseudo_element_name='{http://www.w3.org/1999/xhtml}span'): self.node_at = {} self.evaluator = ContentEvaluator(self.node_at) self.reprocess = [] # nodes with content: target-counter(....) and the current counter values at that point for the node: (etree.Element, {'name', 4}) self.args = args self.verbose = False if args is not None: self.verbose = args.verbose self.pseudo_element_name = pseudo_element_name
class AddNumbering(object): def __init__(self, args, pseudo_element_name='{http://www.w3.org/1999/xhtml}span'): self.node_at = {} self.evaluator = ContentEvaluator(self.node_at) self.reprocess = [] # nodes with content: target-counter(....) and the current counter values at that point for the node: (etree.Element, {'name', 4}) self.args = args self.verbose = False if args is not None: self.verbose = args.verbose self.pseudo_element_name = pseudo_element_name def transform(self, html, explicit_styles = [], pretty_print = True): xpath = etree.XPath('//*') supported_properties = {} supported_content = {} for (props, contents) in FEATURES.values(): for x in props: supported_properties[x] = True for x in contents: supported_content[x] = True def del_features(feature): (props, contents) = FEATURES[feature] for x in props: supported_properties[x] = False for x in contents: supported_content[x] = False if self.args is not None: if self.args.no_counter: del_features('counter') if self.args.no_target: del_features('target') if self.args.no_string: del_features('string') if self.args.no_move: del_features('move') if self.verbose: print >> sys.stderr, 'LOG: Supported properties: %s' % str(supported_properties) if self.verbose: print >> sys.stderr, 'LOG: Supported content values: %s' % str(supported_content) p = premailer.Premailer(html, supported_properties=supported_properties, supported_content=supported_content, explicit_styles=explicit_styles, remove_classes=False, custom_style_attrib=STYLE_ATTRIBUTE, verbose=self.verbose) html = p.transform(pretty_print=pretty_print) html = etree.parse(StringIO(html)) nodes = xpath(html) # Passes: # - expand all pseudo nodes and remove all hidden ones # - find all the targets we'll need to look up # - calculate all the counters and save counters that will need to be looked up (target-counter) # - recalculate all the remaining content (that has target-counter) by looking up the nodes # - remove the styling attribute if self.verbose: print >> sys.stderr, "-------- Finding target nodes ( CSS target-counter() or target-text() ) : %d" % len(nodes) for node in nodes: style = node.attrib.get(STYLE_ATTRIBUTE, '') style = PropertyParser().parse(style) if 'content' in style: for (name, value) in style['content']: attr = None if 'target-text' == name: (attr, _) = value if 'target-counter' == name: (attr, _, _) = value if attr: id = node.attrib.get(attr, None) if id[0] == '#': id = id[1:] self.node_at[id] = None if self.verbose: print >> sys.stderr, "-------- Creating pseudo elements ( CSS :before and :after ) : %d" % len(nodes) for node in nodes: style = node.attrib.get(STYLE_ATTRIBUTE, '') self.expand_pseudo(node, style) if self.verbose: print >> sys.stderr, "-------- Running counters and generating simple content", nodes = xpath(html) # we may have added pseudo nodes so re-self.update if self.verbose: print >> sys.stderr, ": %d" % len(nodes) # This has to be done in a separate pass so we can look up target-counter for node in nodes: self.mutate_node(node) if self.verbose: print >> sys.stderr, "-------- Resolving link counters ( CSS3 target-counter ) : %d" % len(self.reprocess) for (node, self.evaluator.state.countersAt) in self.reprocess: self.evaluator.state.counters = self.evaluator.state.countersAt d = PropertyParser().parse(node.attrib.get(STYLE_ATTRIBUTE, '')) if 'content' in d: self._replace_content(node, d['content']) # also remove non-pseudo elements for child in node: if not self.is_pseudo(child): node.remove(child) if self.verbose: print >> sys.stderr, "-------- Moving nodes ( CSS3 http://www.w3.org/TR/css3-content/#moving ) : %d" % len(nodes) nodes = xpath(html) # we may have removed nodes re-self.update move_to_destinations = {} # name -> list of nodes waiting to be dumped for node in nodes: style = PropertyParser().parse(node.attrib.get(STYLE_ATTRIBUTE, '')) if 'move-to' in style: dest = style['move-to'] if dest != 'here': # Ignore if it's 'here' if dest not in move_to_destinations: move_to_destinations[dest] = [] move_to_destinations[dest].append(node) # Can't just remove the node. because of tails... if node.tail is not None: if node.getprevious() is not None: if node.getprevious().tail is not None: node.getprevious().tail += node.tail else: node.getprevious().tail = node.tail else: if node.getparent().text is not None: node.getparent().text += node.tail else: node.getparent().text = node.tail node.tail = None node.getparent().remove(node) if 'content' in style: for (name, pending_name) in style['content']: if 'pending' == name: #TODO remove all children and text pending_name = str(pending_name) if pending_name in move_to_destinations: for n in move_to_destinations[pending_name]: node.append(n) move_to_destinations[pending_name] = [] # Clean up the HTML. if STYLE_ATTRIBUTE != 'style': for node in nodes: if STYLE_ATTRIBUTE in node.attrib: del node.attrib[STYLE_ATTRIBUTE] return html def is_pseudo(self, node): return node.attrib.get('class', '') in ('pseudo-before', 'pseudo-after') def _replace_content(self, node, content): # because of lxml's use of text tails, if we have: # <node><pseudo-before>...</pseudo-before>...</node> # # then if we just set node.text='foo' then we'd get: # <node>foo<pseudo-before>...</pseudo-before>...</node> # # instead of the expected: # <node><pseudo-before>...</pseudo-before>foo</node> # text = self.evaluator.eval_content(node, content) if len(node) > 0 and self.is_pseudo(node[0]): node[0].tail = text else: node.text = text def update_counters(self, node, d): if 'counter-reset' in d: for (name, v) in d['counter-reset']: if name == 'none': continue if self.verbose: print >> sys.stderr, "Resetting %s to %d" % (name, v) self.evaluator.state.counters[name] = v if 'counter-increment' in d: for (name, v) in d['counter-increment']: if self.verbose: print >> sys.stderr, "Incrementing %s by %s" % (name, str(v)) if name not in self.evaluator.state.counters: self.evaluator.state.counters[name] = 0 self.evaluator.state.counters[name] += v def mutate_node(self, node): d = PropertyParser().parse(node.attrib.get(STYLE_ATTRIBUTE, '')) if d: self.update_counters(node, d) # if there's a target-counter pointing to this node, squirrel the counter (TODO: Should this be done _before_ incrementing?) id = node.attrib.get('id', None) if id and id in self.node_at: self.node_at[id] = (node, State(self.evaluator.state)) if d: # We'll have to look up the id later to find the counter if 'content' in d: has_target = False for (key, _) in d['content']: if key in [ 'target-counter', 'target-text' ]: has_target = True if has_target: self.reprocess.append((node, State(self.evaluator.state))) else: self._replace_content(node, d['content']) # http://www.w3.org/TR/css3-gcpm/#setting-named-strings-the-string-set-pro if 'string-set' in d: has_target = False for (_, string_value) in d['string-set']: for (operation, _) in string_value: if operation in [ 'target-counter', 'target-text' ]: has_target = True if has_target: self.reprocess.append((node, State(self.evaluator.state))) else: for (string_name, string_value) in d['string-set']: string_computed = self.evaluator.eval_content(node, string_value) # Note: The 1st "value" is actually the string name print "Setting string %s to [%s]" % (string_name, string_computed) self.evaluator.state.strings[string_name] = string_computed def expand_pseudo(self, node, style, class_ = ''): d = parse_style(style, class_) if 'display' in d and 'none' == d['display']: node.getparent().remove(node) return newStyle = _style_to_string(d) node.attrib[STYLE_ATTRIBUTE] = newStyle # Also, if there's a target-counter then add it to the list if 'content' in d: content = ContentPropertyParser().parse(d['content']) if content is not None: for (function, args) in content: attr = None if function == 'target-counter': (attr, _, _) = args if function == 'target-text': (attr, _) = args if attr: n = node # If it's a pseudo element use the parent's attribute if class_ != '': n = node.getparent() id = n.attrib.get(attr, '') if id and len(id) > 0: # omit the hash tag if id[0] == '#': id = id[1:] self.node_at[id] = None else: if self.verbose: print >> sys.stderr, "WARNING: Ignoring lookup to a non-internal id: '%s' on a %s" % (href, n.tag) if not class_ and ':before' in style: pseudo = etree.Element(self.pseudo_element_name) pseudo.attrib['class'] = 'pseudo-before' node.insert(0, pseudo) if node.text: pseudo.tail = node.text node.text = '' self.expand_pseudo(pseudo, style, ':before') if not class_ and ':after' in style: pseudo = etree.Element(self.pseudo_element_name) pseudo.attrib['class'] = 'pseudo-after' node.append(pseudo) self.expand_pseudo(pseudo, style, ':after')