def tokens(cls, selector): tokens = List() prev = None for op, tag in ElementPath.xpath_tokenizer(selector): if op: token = Op(cls.unquote_attribute(op) if prev == "=" else op) else: token = Tag(cls.unquote_attribute(tag) if prev == "=" else tag) tokens.append(token) prev = token return tokens
def tokens(cls, selector): tokens = List() prev = None for op, tag in ElementPath.xpath_tokenizer(selector): if op: token = Op(cls.unquote_attribute(op) if prev == '=' else op) else: token = Tag(cls.unquote_attribute(tag) if prev == '=' else tag) tokens.append(token) prev = token return tokens
def xpath_tokenizer(p): """ Test the XPath tokenizer. >>> # tests from the xml specification >>> xpath_tokenizer("*") ['*'] >>> xpath_tokenizer("text()") ['text', '()'] >>> xpath_tokenizer("@name") ['@', 'name'] >>> xpath_tokenizer("@*") ['@', '*'] >>> xpath_tokenizer("para[1]") ['para', '[', '1', ']'] >>> xpath_tokenizer("para[last()]") ['para', '[', 'last', '()', ']'] >>> xpath_tokenizer("*/para") ['*', '/', 'para'] >>> xpath_tokenizer("/doc/chapter[5]/section[2]") ['/', 'doc', '/', 'chapter', '[', '5', ']', '/', 'section', '[', '2', ']'] >>> xpath_tokenizer("chapter//para") ['chapter', '//', 'para'] >>> xpath_tokenizer("//para") ['//', 'para'] >>> xpath_tokenizer("//olist/item") ['//', 'olist', '/', 'item'] >>> xpath_tokenizer(".") ['.'] >>> xpath_tokenizer(".//para") ['.', '//', 'para'] >>> xpath_tokenizer("..") ['..'] >>> xpath_tokenizer("../@lang") ['..', '/', '@', 'lang'] >>> xpath_tokenizer("chapter[title]") ['chapter', '[', 'title', ']'] >>> xpath_tokenizer("employee[@secretary and @assistant]") ['employee', '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']'] >>> # additional tests >>> xpath_tokenizer("{http://spam}egg") ['{http://spam}egg'] >>> xpath_tokenizer("./spam.egg") ['.', '/', 'spam.egg'] >>> xpath_tokenizer(".//{http://spam}egg") ['.', '//', '{http://spam}egg'] """ out = [] for op, tag in ElementPath.xpath_tokenizer(p): out.append(op or tag) return out
def _structure_parse_root_path(self, _root_path): """Extract information from the root path to get sufficient information for specifying the destination and creating new nodes. \n * If many XPaths in the string, it uses the first one * Parses the root node name * Parses the row parent node path * Parses the row node name""" if _root_path == "": raise Exception("_parse_root_path: Root path cannot be empty") # Parse XPath tokens _root_xpath_tokens = list(_elementpath.xpath_tokenizer(self.rows_xpath)) # Use only first path for destination # TODO: This part is never used, why? try: _first_splitter = _root_xpath_tokens.index(('', '|')) print("_parse_root_path: Multiple XPaths, using first.") _split_xpath = "".join((_a[0] + _a[1] for _a in _root_xpath_tokens[0:_first_splitter])) except ValueError: _split_xpath = self.rows_xpath # Get the root node name if _root_xpath_tokens[0][0] == "/": _root_node_name = _root_xpath_tokens[1][1] else: raise Exception( "_parse_root_path: It is necessary to have an absolute (\"/node\") top node name in the XPath") print("_root_node_name=" + str(_root_node_name)) # Get the row node name _row_node_name_idx = find_previous_match(_root_xpath_tokens, len(_root_xpath_tokens) - 1, ("/", "")) if _row_node_name_idx == -1: raise Exception("_parse_root_path: Cannot find start of condition.\nXPath = " + _root_path) else: _row_node_name = _root_xpath_tokens[_row_node_name_idx + 1][1] if _row_node_name_idx == 0: raise Exception("_parse_root_path: The row node cannot be the root node.\nXPath = " + _root_path) print("_structure_row_node_name=" + str(_row_node_name)) # Move on backward, what's left is the path to the parent of the row node, we need it to be able # to create rows, as the path may not return a node to find a parent from _row_node_parent_xpath = "".join((_a[0] + _a[1] for _a in _root_xpath_tokens[0:_row_node_name_idx])) print("_row_node_parent_xpath=" + str(_row_node_parent_xpath)) return _root_node_name, _row_node_name, _row_node_parent_xpath
def tokens(cls, selector): """ >>> xpath_tokenizer('resource-lists') ['resource-lists'] >>> xpath_tokenizer('list[@name="friends"]') ['list', '[', '@', 'name', '=', 'friends', ']'] We cannot properly tokenize an URI like this :( >>> uri_ugly = 'external[@anchor="http://xcap.example.org/resource-lists/users/sip:[email protected]/index/~~/resource-lists/list[@name="mkting"]"]' >>> len(xpath_tokenizer(uri_ugly)) # expected 7 10 To feed such URI to this function, replace quote \" with " >>> uri_nice = 'external[@anchor="http://xcap.example.org/resource-lists/users/sip:[email protected]/index/~~/resource-lists/list[@name="mkting"]"]' >>> len(xpath_tokenizer(uri_nice)) # expected 7 7 """ def unquote_attr_value(s): # XXX currently equivalent but differently encoded URIs won't be considered equal (", etc.) if len(s) > 1 and s[0] == s[-1] and s[0] in '"\'': return s[1:-1] raise NodeParsingError tokens = List() prev = None for op, tag in ElementPath.xpath_tokenizer(selector): if prev == '=': unq = unquote_attr_value else: unq = lambda x:x if op: x = Op(unq(op)) else: x = Tag(unq(tag)) tokens.append(x) prev = x return tokens
def tokens(cls, selector): """ >>> xpath_tokenizer('resource-lists') ['resource-lists'] >>> xpath_tokenizer('list[@name="friends"]') ['list', '[', '@', 'name', '=', 'friends', ']'] We cannot properly tokenize an URI like this :( >>> uri_ugly = 'external[@anchor="http://xcap.example.org/resource-lists/users/sip:[email protected]/index/~~/resource-lists/list[@name="mkting"]"]' >>> len(xpath_tokenizer(uri_ugly)) # expected 7 10 To feed such URI to this function, replace quote \" with " >>> uri_nice = 'external[@anchor="http://xcap.example.org/resource-lists/users/sip:[email protected]/index/~~/resource-lists/list[@name="mkting"]"]' >>> len(xpath_tokenizer(uri_nice)) # expected 7 7 """ def unquote_attr_value(s): # XXX currently equivalent but differently encoded URIs won't be considered equal (", etc.) if len(s) > 1 and s[0] == s[-1] and s[0] in '"\'': return s[1:-1] raise NodeParsingError tokens = List() prev = None for op, tag in ElementPath.xpath_tokenizer(selector): if prev == '=': unq = unquote_attr_value else: unq = lambda x: x if op: x = Op(unq(op)) else: x = Tag(unq(tag)) tokens.append(x) prev = x return tokens
def _structure_create_xpath_nodes(self, _node, _xpath): """Used an xpath to create nodes that match the path and its conditions(names, attributes and so forth)""" print("_create_xpath_nodes: " + str(_xpath)) _curr_node = _node # Break up the string in its tokens _tokens = list(_elementpath.xpath_tokenizer(_xpath)) print(str(_tokens)) # Iterate through tokens, until we have gone through the entire XPath. _token_idx = 0 # But first, move past any root reference and conditions. # TODO: This is a bit ugly, perhaps. if len(_tokens) > 1 and (_tokens[0][0] == "/") and (_tokens[1][1] == _node.tag): print("_create_xpath_nodes: Ignoring root node path and condition.") _token_idx += 1 while _token_idx < len(_tokens) and _tokens[_token_idx][0] != "/": _token_idx += 1 while _token_idx < len(_tokens): print("_tokens[" + str(_token_idx) + "][0]:" + str(_tokens[_token_idx][0])) # Is this a new level? if _tokens[_token_idx][0] == "/": # Then the next is the name of the node _token_idx += 1 if _tokens[_token_idx][0] == "": _next_name = _tokens[_token_idx][1] # Is the next token a condition? if _token_idx + 1 < len(_tokens) and _tokens[_token_idx + 1][0] == "[": # It was, move on _token_idx += 1 # Find ending of condition _end_cond_idx = self.find_next_match(_tokens, _token_idx, ("]", "")) if _end_cond_idx == -1: raise Exception("_create_xpath_nodes: Cannot find end of condition.\nXPath = " + _xpath) # Create relative path _check_path = "".join((_a[0] + _a[1] for _a in _tokens[_token_idx - 1:_end_cond_idx + 1])) # Then check if it exists print("_check_path:" + str(_check_path)) _found_nodes = _curr_node.xpath(_check_path) # Node found, move on if _found_nodes and len(_found_nodes) == 1: _token_idx += 1 _curr_node = _found_nodes[0] else: # If not found create node print("add node: " + str(_next_name)) _curr_node = SubElement(_curr_node, _next_name) # If they can be discerned(@id=value), add attributes if _tokens[_token_idx + 3][0] == "=" and _tokens[_token_idx + 1][0] == "@": print("set attribute to satisfy this path: " + str(_check_path)) _curr_node.set(_tokens[_token_idx + 2][1], str(_tokens[_token_idx + 4][0]).strip("\"'")) _token_idx += 5 else: if isinstance(_curr_node, str): raise Exception( "_structure_create_xpath_nodes - Internal error: Node is a string, missing node ref, " "was _add_node_ref used? \nData :" + _curr_node + " | " + _next_name + " | " + _xpath + " | " + str(_tokens)) _found_nodes = _curr_node.xpath(_next_name) # Node found, move on if len(_found_nodes) == 1: _curr_node = _found_nodes[0] else: print("Create new node: " + str(_next_name)) _curr_node = SubElement(_curr_node, _next_name) _token_idx += 1 return _curr_node