Esempio n. 1
0
 def GetRelevantRules(self, src_tree, src_path_state, \
                      trg_tree=None, trg_path=None, rhs_child_pos=None):
   """
   if trg_tree and trg_path is given, it returns rules that match the src_tree
   AND the trg_tree (at src_path and trg_path).
   """
   # Retrieving from cache, if result has already been computed.
   result_id = (src_path_state, rhs_child_pos) if trg_tree is None else \
               (src_path_state, trg_path, rhs_child_pos)
   if result_id in self.relevant_rules_cache:
     return self.relevant_rules_cache[result_id]
   src_path, state = src_path_state
   src_subtree = tree_index(src_tree, src_path)
   src_pos, src_num_branches = LabelAndRank(src_subtree)
   if trg_tree is None:
     trg_subtree, trg_pos, trg_num_branches = None, None, None
   else:
     trg_subtree = tree_index(trg_tree, trg_path)
     trg_pos, trg_num_branches = LabelAndRank(trg_subtree)
   rule_indices = self.GetCandidateRuleIndices(
     state, src_pos, src_num_branches, trg_pos, trg_num_branches)
   rules = [self.rules[i] for i in rule_indices]
   relevant_rules = FilterMatchingRules(
     rules, src_subtree, trg_subtree, rhs_child_pos)
   # If there are no relevant rules in our set, we search in other resources
   # e.g. WordNet for previously unseen (yet valid) rules for this tree pattern.
   if not relevant_rules and self.rule_backoffs:
     relevant_rules = \
       ProduceUnseenRelevantRules(state, src_subtree, self.similarity_scorer)
   relevant_rules = FilterMatchingRules(
     relevant_rules, src_subtree, trg_subtree, rhs_child_pos)
   # Storing result in cache.
   self.relevant_rules_cache[result_id] = relevant_rules
   return relevant_rules
Esempio n. 2
0
    def apply(self, tree, statemap):
        """Returns a pair (new tree, new statemap) by applying this rule"""
        if not statemap:
            return (None, None)
        path_states = list([(path, state)
                            for (path, state) in statemap.items()])
        # Sort path_state map so that we apply the rule to the left-most branch
        # consistently.
        path_states = sorted(path_states, key=itemgetter(0))
        # Get the left-most path and state.
        (path, state) = path_states[0]
        indexed = tree_index(tree, path)

        newsubtree = replace(indexed, self.lhs, self.rhs)
        newstates_with_prepend = {
            tuple(list(path) + list(rulepath)): state
            for (rulepath, state) in self.newstates.items()
        }
        newstatemap = deepcopy(statemap)
        del newstatemap[path]
        newstatemap.update(newstates_with_prepend)

        if path == ():
            out = (newsubtree, newstatemap)
        else:
            newtree = deepcopy(tree)
            newtree[path] = newsubtree
            out = (newtree, newstatemap)
        return out
Esempio n. 3
0
 def GetRelevantRules(self, tree, path_state):
   path, state = path_state
   subtree = tree_index(tree, path)
   relevant_rules = []
   if not isinstance(subtree, NLTKTree):
     tree_branches_pos = subtree
     tree_pos = subtree
   else:
     tree_branches_pos = \
       ' '.join([get_top(t) for t in subtree if isinstance(t, NLTKTree)])
     tree_pos = get_top(subtree)
   rules_indices = self.index[(state, tree_pos, tree_branches_pos)]
   relevant_rules = [self.rules[i] for i in rules_indices]
   return relevant_rules
Esempio n. 4
0
def ObtainTreePattern(tree, path, subpaths):
    subtree = tree_index(tree, path)
    if not subpaths:
        return deepcopy(subtree)
    if not isinstance(subtree, NLTKTree) and (subpaths[0] == ()
                                              or path == subpaths[0]):
        return '?x0|'
    if isinstance(subtree, NLTKTree) and (subpaths[0] == ()
                                          or path == subpaths[0]):
        return '?x0|' + get_top(subtree)
    if not isinstance(subtree, NLTKTree) and subpaths[0] != ():
        raise(ValueError, \
              'String {0} cannot be indexed by {1}'.format(subtree, subpaths))
    depth_subtree = len(path)
    tree_pattern = deepcopy(subtree)
    for i, subpath in enumerate(subpaths):
        subpath_relative = subpath[depth_subtree:]
        branch = tree_index(tree, subpath)
        if not isinstance(branch, NLTKTree):
            tree_pattern[subpath_relative] = '?x' + str(i) + '|'
        else:
            tree_pattern[subpath_relative] = '?x' + str(i) + '|' + get_top(
                branch)
    return tree_pattern
Esempio n. 5
0
def GetDisjointPaths(tree,
                     path,
                     min_paths,
                     max_paths,
                     max_depth=4,
                     candidate_paths=None):
    """
  This function returns a list of tuples of disjoint paths. E.g.
  [((path1, path2)), ((path1, path3)), ...]. A list such as
  [()] represents a single tuple of disjoint paths, which contains
  no paths. That element "()" is important to signal tree patterns
  to have no subpaths, converting them into simple subtrees with no variables.
  candidate_paths is a variable containing a list of tuples, where
  each tuple is a valid path in the tree. If the candidate_paths
  variable is set, the disjoint paths will be generated from the
  list of candidate paths. Otherwise, the disjoint paths will be
  generated from the subpaths of the parameter "path".
  """
    subtree = tree_index(tree, path)
    # Case where the subtree is a string.
    # The disjoint paths [()] means that there is only one set of disjoint paths,
    # which is (), that is the empty set of disjoint paths.
    if not isinstance(subtree, NLTKTree):
        disjoint_paths = []
        # Add the tuple containing zero number of paths.
        if min_paths == 0:
            disjoint_paths.append(())
        if max_paths > 0:
            disjoint_paths.append((path, ))
        return disjoint_paths
    # Case where the subtree is a tree.
    if candidate_paths == None:
        paths = [path + subpath for subpath in subtree.treepositions() \
                                  if len(subpath) < max_depth \
                                     and len(subpath) > 0]
    else:
        paths = candidate_paths
    # Return a generator to save memory for large combinations, at the
    # expense of some speed.
    # Update: return a list instead, for caching and faster computation.
    return list(itertools.chain(
      combined_paths \
      for k in range(min_paths, max_paths + 1) \
        for combined_paths in itertools.combinations(paths, k) \
          if AreDisjointPaths(combined_paths)))
Esempio n. 6
0
def GetDisjointPathsWithPermutations(tree,
                                     path,
                                     min_paths,
                                     max_paths,
                                     max_depth=4):
    subtree = tree_index(tree, path)
    # Case where the subtree is a string.
    if not isinstance(subtree, NLTKTree):
        disjoint_paths = []
        if min_paths == 0:
            disjoint_paths.append(())
        if min_paths < 2 and max_paths > 0:
            disjoint_paths.append(((), ))
        return disjoint_paths
    # Case where the subtree is a tree (return generator).
    paths = [path + subpath for subpath in subtree.treepositions() \
                              if len(subpath) < max_depth]
    return itertools.chain(
      permutted_paths \
      for k in range(min_paths, max_paths + 1) \
        for combined_paths in itertools.combinations(paths, k) \
          if AreDisjointPaths(combined_paths)
            for permutted_paths in itertools.permutations(combined_paths))