コード例 #1
0
ファイル: head_finder.py プロジェクト: jkkummerfeld/nlp-util
def pennconverter_find_heads(tree, head_map=None):
  if head_map is None:
    head_map = {}
    tree = treebanks.remove_coindexation(tree, False)
  for subtree in tree.subtrees:
    pennconverter_find_heads(subtree, head_map)

  if log: print "Head for", tree.span, tree.label

  # A word is it's own head
  if tree.word is not None:
    head = (tree.span, tree.word, tree.label)
    add_head(head_map, tree, head)
    return head_map

  # First handle conjunctions
  coord = pennconverter_is_coord(tree)
  if coord:
    if not add_if_match(tree, {'CC', 'CONJP'}, head_map, True):
      if not add_if_match(tree, {',', ':'}, head_map, True):
        add_head(head_map, tree, get_head(head_map, tree.subtrees[-1]))
    return head_map
  
  # If the label for this node is not in the table we are either at the bottom,
  # at an NP, or have an error
  base_label = treebanks.split_label_type_and_function(tree.label)[0]
  if base_label not in pennconverter_mapping_table:
    if base_label in ['NP', 'NML']:
      collins_NP(tree, head_map)
    elif base_label in ['PP', 'WHPP']:
      pennconverter_PP(tree, head_map)
    else:
      add_head(head_map, tree, get_head(head_map, tree.subtrees[-1]))
    return head_map
  
  # Look through and take the first/last occurrence that matches
  info = pennconverter_mapping_table[base_label]
  for label in info[1]:
    for i in xrange(len(tree.subtrees)):
      if info[0] == 'right':
        i = len(tree.subtrees) - i - 1
      subtree = tree.subtrees[i]
      if isinstance(label, str):
        if subtree.label == label:
          add_head(head_map, tree, get_head(head_map, subtree))
          return head_map
      else:
        if re.match(label, subtree.label) is not None:
          add_head(head_map, tree, get_head(head_map, subtree))
          return head_map

  # Final fallback
  if info[0] == 'left':
    add_head(head_map, tree, get_head(head_map, tree.subtrees[0]))
  else:
    add_head(head_map, tree, get_head(head_map, tree.subtrees[-1]))

  return head_map
コード例 #2
0
ファイル: head_finder.py プロジェクト: jkkummerfeld/nlp-util
def get_signature(head_map, tree):
  tree_repr = (tree.span, tree.label)
  if tree_repr in head_map:
    return tree_repr
  tree_repr = (tree.wordspan, treebanks.remove_coindexation(tree, False).label)
  if tree_repr in head_map:
    return tree_repr
  tree_repr = (tree.wordspan, treebanks.split_label_type_and_function(tree.label)[0])
  if tree_repr in head_map:
    return tree_repr
  return None
コード例 #3
0
ファイル: head_finder.py プロジェクト: jkkummerfeld/nlp-util
def get_head(head_map, tree, amend_for_trace=False):
  if not amend_for_trace:
    tree_repr = (tree.span, tree.label)
    if tree_repr in head_map:
      return head_map[tree_repr]
  tree_repr = (tree.wordspan, treebanks.remove_coindexation(tree, False).label)
  if tree_repr in head_map:
    return head_map[tree_repr]
  tree_repr = (tree.wordspan, treebanks.split_label_type_and_function(tree.label)[0])
  if tree_repr in head_map:
    return head_map[tree_repr]
  return None
コード例 #4
0
def without_func(label):
  return treebanks.split_label_type_and_function(label)[0]