コード例 #1
0
   def build_node(self, node, srcnode):
#      if self.m_bRemoveUnary:
#         while srcnode.type == "constituent" and len(srcnode.children) == 1:
#            srcnode = srcnode.children[0]
      if not self.m_bEmptyKeep and srcnode.name == '-NONE-':
         return False
      node.type = srcnode.type
#      if srcnode.name != '-NONE-':
      if not (srcnode.name[0]=='-' and srcnode.name[-1]=='-'):
          node.name = self.escape(srcnode.name.split("-")[0])
      else:
#          node.name = '-NONE-'
          node.name = srcnode.name
      node.start_index = srcnode.start_index
      node.end_index = srcnode.end_index
      if node.type == 'token':
         node.token = self.escape(srcnode.token)
         
      if srcnode.type == "constituent":
         node.children = []
         for srcchildnode in srcnode.children:
            childnode = fidtree.CTreeNode()
            if self.build_node(childnode, srcchildnode):
               node.children.append(childnode)
         if node.children == []:
            return False
         if self.m_bRemoveUnary and len(node.children)==1:
            node.copy(node.children[0])
      return True
コード例 #2
0
 def process_noroot(self, sSentence, wfile):
    # don't process empty sentences
    if sSentence.strip() == "":
       return
    # find the cfg node
    head = fidtree.parse_object(sSentence)
    if head.name == "ROOT":
        head = head.children[0]
    if type(head) == list:
       lHead = head
    else:
       lHead = [head]                         
    # output the dep node
    for head in lHead:
      # print head
       if self.m_bBinarize:
          outh = CBinarizedTreeNode()
    #      print outh
          self.build_binarized_node(outh, head)
          wfile.write(outh.utf8print()+"\n")
          #print outh.utf8print()
       else:
          outh = fidtree.CTreeNode()
          self.build_node(outh, head)
          wfile.write(outh.utf8print()+"\n")
コード例 #3
0
ファイル: unbinarize.py プロジェクト: svn2github/zpar-mirror
def cmpbasenp(src, tgt):
    if src.type == 'token':
        tgt.type = 'token'
        tgt.token = src.token
        tgt.name = src.name
        return False, [src.token]
    else:
        hasnp = False
        children = []
        stg = []
        for srcchild in src.children:
            tgtchild = fidtree.CTreeNode()
            subnp, sub = cmpbasenp(srcchild, tgtchild)
            hasnp |= subnp
            stg.extend(sub)
            children.append(tgtchild)
        if src.name == 'NP':
            if not hasnp:
                tgt.type = 'token'
                tgt.name = 'NP'
                tgt.token = '__' + '_'.join(stg) + '__'
                return True, []
            hasnp = True
        tgt.type = 'constituent'
        tgt.children = children
        tgt.name = src.name
        return hasnp, stg
コード例 #4
0
ファイル: unbinarize.py プロジェクト: svn2github/zpar-mirror
 def build_node(self, srcnode):
     if srcnode.type == "token":
         node = fidtree.CTreeNode()
         node.name = self.escape(srcnode.name)
         node.type = srcnode.type
         node.token = self.escape(srcnode.token)
         return [node]
     else:
         assert srcnode.type == "constituent"
         if srcnode.temporary:
             lNode = []
             lNode.extend(self.build_node(srcnode.left_child))
             if srcnode.head_child != "s":
                 lNode.extend(self.build_node(srcnode.right_child))
             return lNode
         else:
             node = fidtree.CTreeNode()
             node.name = self.escape(srcnode.name)
             node.type = srcnode.type
             node.children = []
             node.children.extend(self.build_node(srcnode.left_child))
             if srcnode.head_child != "s":
                 node.children.extend(self.build_node(srcnode.right_child))
             return [node]
コード例 #5
0
 def process(self, sSentence):
    # don't process empty sentences
    if sSentence.strip() == "":
       return
    # find the cfg node
    head = fidtree.parse_object(sSentence)
    if type(head) == list:
       lHead = head
    else:
       lHead = [head]                         
    # output the dep node
    for head in lHead:
      # print head
       if self.m_bBinarize:
          outh = CBinarizedTreeNode()
    #      print outh
          self.build_binarized_node(outh, head)
          print outh
       else:
          outh = fidtree.CTreeNode()
          self.build_node(outh, head)
          print outh