def seq2tree_repr_to_ast_tree_helper(tree_repr, offset): """convert a seq2tree representation to AST tree""" # extract node name node_name_end = offset while node_name_end < len(tree_repr) and tree_repr[node_name_end] != ' ': node_name_end += 1 node_repr = tree_repr[offset:node_name_end] m = node_re.match(node_repr) n_type = m.group('type') n_type = type_str_to_type(n_type) n_label = m.group('label') n_value = m.group('value') if n_type in {int, float, str, bool}: n_value = n_type(n_value) n_label = None if n_label == '' else n_label n_value = None if n_value == '' else n_value node = ASTNode(n_type, label=n_label, value=n_value) offset = node_name_end if offset == len(tree_repr): return node, offset offset += 1 if tree_repr[offset] == '(': offset += 2 while True: child_node, offset = seq2tree_repr_to_ast_tree_helper( tree_repr, offset=offset) node.add_child(child_node) if offset >= len(tree_repr) or tree_repr[offset] == ')': offset += 2 break return node, offset
def unary_link_to_closure(unary_link): closure = ASTNode(unary_link.type) last_node = unary_link.get_leaves()[0] closure_child = ASTNode(last_node.type) prod, _ = unary_link.get_productions() closure_child_label = '@'.join( str(rule).replace(' ', '$') for rule in prod) closure_child.label = closure_child_label closure.add_child(closure_child) return closure
def ifttt_ast_to_parse_tree_helper(s, offset): """ adapted from ifttt codebase """ if s[offset] != '(': raise RuntimeError( 'malformed string: node did not start with open paren at position ' + offset) offset += 1 # extract node name(type) name = '' if s[offset] == '\"': offset += 1 while s[offset] != '\"': if s[offset] == '\\': offset += 1 name += s[offset] offset += 1 offset += 1 else: while s[offset] != ' ' and s[offset] != ')': name += s[offset] offset += 1 node = ASTNode(name) while True: if s[offset] == ')': offset += 1 return node, offset if s[offset] != ' ': raise RuntimeError( 'malformed string: node should have either had a ' 'close paren or a space at position ' + offset) offset += 1 child_node, offset = ifttt_ast_to_parse_tree_helper(s, offset) node.add_child(child_node)
def extract_unary_closure_helper(parse_tree, unary_link, last_node): if parse_tree.is_leaf: if unary_link and unary_link.size > 2: return [unary_link] else: return [] elif len(parse_tree.children) > 1: unary_links = [] if unary_link and unary_link.size > 2: unary_links.append(unary_link) for child in parse_tree.children: new_node = ASTNode(child.type) child_unary_links = extract_unary_closure_helper( child, new_node, new_node) unary_links.extend(child_unary_links) return unary_links else: # has a single child child = parse_tree.children[0] new_node = ASTNode(child.type, label=child.label) last_node.add_child(new_node) last_node = new_node return extract_unary_closure_helper(child, unary_link, last_node)
def break_value_nodes(tree, hs=False): """inplace break value nodes with a string separaed by spaces""" if tree.type == str and tree.value is not None: assert tree.is_leaf if hs: tokens = re.sub(r'([a-z])([A-Z])', r'\1 #MERGE# \2', tree.value).split(' ') else: tokens = tree.value.split(' ') tree.value = 'NT' for token in tokens: assert token is not None tree.add_child(ASTNode(tree.type, value=escape(token))) else: for child in tree.children: break_value_nodes(child, hs=hs)
def compressed_ast_to_normal(parse_tree): if parse_tree.label and '@' in parse_tree.label and '$' in parse_tree.label: label = parse_tree.label label = label.replace('$', ' ') rule_reprs = label.split('@') intermediate_nodes = [] first_node = last_node = None for rule_repr in rule_reprs: m = rule_regex.match(rule_repr) p = m.group('parent') c = m.group('child') cl = m.group('clabel') p_type = type_str_to_type(p) c_type = type_str_to_type(c) node = ASTNode(c_type, label=cl) if last_node: last_node.add_child(node) if not first_node: first_node = node last_node = node intermediate_nodes.append(node) last_node.value = parse_tree.value for child in parse_tree.children: last_node.add_child(child) compressed_ast_to_normal(child) parent_node = parse_tree.parent assert len(parent_node.children) == 1 del parent_node.children[0] parent_node.add_child(first_node) # return first_node else: new_child_trees = [] for child in parse_tree.children[:]: compressed_ast_to_normal(child)
def __getitem__(self, lhs): key_node = ASTNode(lhs.type, None) # Rules are indexed by types only if key_node in self.rule_index: return self.rule_index[key_node] else: KeyError('key=%s' % key_node)
def add_root(tree): root_node = ASTNode('root') root_node.add_child(tree) return root_node
def python_ast_to_parse_tree(node): assert isinstance(node, ast.AST) node_type = type(node) tree = ASTNode(node_type) # it's a leaf AST node, e.g., ADD, Break, etc. if len(node._fields) == 0: return tree # if it's a compositional AST node with empty fields if is_compositional_leaf(node): epsilon = ASTNode('epsilon') tree.add_child(epsilon) return tree fields_info = PY_AST_NODE_FIELDS[node_type.__name__] for field_name, field_value in ast.iter_fields(node): # remove ctx stuff if field_name in NODE_FIELD_BLACK_LIST: continue # omit empty fields, including empty lists if field_value is None or (isinstance(field_value, list) and len(field_value) == 0): continue # now it's not empty! field_type = fields_info[field_name]['type'] is_list_field = fields_info[field_name]['is_list'] if isinstance(field_value, ast.AST): child = ASTNode(field_type, field_name) child.add_child(python_ast_to_parse_tree(field_value)) elif type(field_value) is str or type(field_value) is int or \ type(field_value) is float or type(field_value) is object or \ type(field_value) is bool: # if field_type != type(field_value): # print 'expect [%s] type, got [%s]' % (field_type, type(field_value)) child = ASTNode(type(field_value), field_name, value=field_value) elif is_list_field: list_node_type = typename(field_type) + '*' child = ASTNode(list_node_type, field_name) for n in field_value: if field_type in {ast.comprehension, ast.excepthandler, ast.arguments, ast.keyword, ast.alias}: child.add_child(python_ast_to_parse_tree(n)) else: intermediate_node = ASTNode(field_type) if field_type is str: intermediate_node.value = n else: intermediate_node.add_child(python_ast_to_parse_tree(n)) child.add_child(intermediate_node) else: raise RuntimeError('unknown AST node field!') tree.add_child(child) return tree
def extract_unary_closure(parse_tree): root_node_copy = ASTNode(parse_tree.type) unary_links = extract_unary_closure_helper(parse_tree, root_node_copy, root_node_copy) return unary_links