def build_node(self, node, srcnode): # if self.m_bRemoveUnary: # while srcnode.type == "constituent" and len(srcnode.children) == 1: # srcnode = srcnode.children[0] if not self.m_bEmptyKeep and srcnode.name == '-NONE-': return False node.type = srcnode.type # if srcnode.name != '-NONE-': if not (srcnode.name[0]=='-' and srcnode.name[-1]=='-'): node.name = self.escape(srcnode.name.split("-")[0]) else: # node.name = '-NONE-' node.name = srcnode.name node.start_index = srcnode.start_index node.end_index = srcnode.end_index if node.type == 'token': node.token = self.escape(srcnode.token) if srcnode.type == "constituent": node.children = [] for srcchildnode in srcnode.children: childnode = fidtree.CTreeNode() if self.build_node(childnode, srcchildnode): node.children.append(childnode) if node.children == []: return False if self.m_bRemoveUnary and len(node.children)==1: node.copy(node.children[0]) return True
def process_noroot(self, sSentence, wfile): # don't process empty sentences if sSentence.strip() == "": return # find the cfg node head = fidtree.parse_object(sSentence) if head.name == "ROOT": head = head.children[0] if type(head) == list: lHead = head else: lHead = [head] # output the dep node for head in lHead: # print head if self.m_bBinarize: outh = CBinarizedTreeNode() # print outh self.build_binarized_node(outh, head) wfile.write(outh.utf8print()+"\n") #print outh.utf8print() else: outh = fidtree.CTreeNode() self.build_node(outh, head) wfile.write(outh.utf8print()+"\n")
def cmpbasenp(src, tgt): if src.type == 'token': tgt.type = 'token' tgt.token = src.token tgt.name = src.name return False, [src.token] else: hasnp = False children = [] stg = [] for srcchild in src.children: tgtchild = fidtree.CTreeNode() subnp, sub = cmpbasenp(srcchild, tgtchild) hasnp |= subnp stg.extend(sub) children.append(tgtchild) if src.name == 'NP': if not hasnp: tgt.type = 'token' tgt.name = 'NP' tgt.token = '__' + '_'.join(stg) + '__' return True, [] hasnp = True tgt.type = 'constituent' tgt.children = children tgt.name = src.name return hasnp, stg
def build_node(self, srcnode): if srcnode.type == "token": node = fidtree.CTreeNode() node.name = self.escape(srcnode.name) node.type = srcnode.type node.token = self.escape(srcnode.token) return [node] else: assert srcnode.type == "constituent" if srcnode.temporary: lNode = [] lNode.extend(self.build_node(srcnode.left_child)) if srcnode.head_child != "s": lNode.extend(self.build_node(srcnode.right_child)) return lNode else: node = fidtree.CTreeNode() node.name = self.escape(srcnode.name) node.type = srcnode.type node.children = [] node.children.extend(self.build_node(srcnode.left_child)) if srcnode.head_child != "s": node.children.extend(self.build_node(srcnode.right_child)) return [node]
def process(self, sSentence): # don't process empty sentences if sSentence.strip() == "": return # find the cfg node head = fidtree.parse_object(sSentence) if type(head) == list: lHead = head else: lHead = [head] # output the dep node for head in lHead: # print head if self.m_bBinarize: outh = CBinarizedTreeNode() # print outh self.build_binarized_node(outh, head) print outh else: outh = fidtree.CTreeNode() self.build_node(outh, head) print outh