def comment(self, content): if self._incl_comm is None: return if self._norm_sp is not None: content = ' '.join(content.split()) if len(content) > 0: self._n_elmt += 1 content = content.encode(self.encoding) xpath = '%s/comment()' % self._xpath _inc_xpath(self._h, xpath) # nodes construction for comment node = [NT_COMM, 'comment()', content, [], None, 0, self._h[xpath]] link_node(self._p_stack[-1], node)
def characters(self, ch): if self._norm_sp is not None: ch = ' '.join(ch.split()) if len(ch) > 0 and ch != "\n" and ch != ' ': ch = ch.encode(self.encoding) parent = self._p_stack[-1] # if sibling text nodes if parent[N_CHILDS] and parent[N_CHILDS][-1][N_TYPE] == NT_TEXT: n = parent[N_CHILDS][-1] n[N_VALUE] = n[N_VALUE] + ch else: self._n_elmt += 1 xpath = '%s/text()' % self._xpath _inc_xpath(self._h, xpath) # nodes construction for text node = [NT_TEXT, 'text()', ch, [],None,0, self._h[xpath]] link_node(parent, node)
def startElement(self, name, attrs): name = name.encode(self.encoding) # process xpath self._xpath = "%s%s%s" % (self._xpath, '/', name) _inc_xpath(self._h, self._xpath) # nodes construction for element node = [NT_NODE, name, name, [], None, self._n_elmt+1, self._h[self._xpath]] self._n_elmt += 1 self._xpath = "%s%s%s%s" % (self._xpath, '[', self._h[self._xpath], ']') # nodes construction for element's attributes keys = attrs.keys() # sort attributes to avoid further moves keys.sort() for key in keys: key = key.encode(self.encoding) self._n_elmt += 2 attr_node = [NT_ATTN, '@%sName'%key, key, [], None, 1, 0] link_node(node, attr_node) link_node(attr_node, [NT_ATTV, '@%s'%key, attrs.get(key, '').encode(self.encoding), [],None,0,0]) link_node(self._p_stack[-1], node) # set current element on the top of the father stack self._p_stack.append(node)