def GetRelevantRules(self, src_tree, src_path_state, \ trg_tree=None, trg_path=None, rhs_child_pos=None): """ if trg_tree and trg_path is given, it returns rules that match the src_tree AND the trg_tree (at src_path and trg_path). """ # Retrieving from cache, if result has already been computed. result_id = (src_path_state, rhs_child_pos) if trg_tree is None else \ (src_path_state, trg_path, rhs_child_pos) if result_id in self.relevant_rules_cache: return self.relevant_rules_cache[result_id] src_path, state = src_path_state src_subtree = tree_index(src_tree, src_path) src_pos, src_num_branches = LabelAndRank(src_subtree) if trg_tree is None: trg_subtree, trg_pos, trg_num_branches = None, None, None else: trg_subtree = tree_index(trg_tree, trg_path) trg_pos, trg_num_branches = LabelAndRank(trg_subtree) rule_indices = self.GetCandidateRuleIndices( state, src_pos, src_num_branches, trg_pos, trg_num_branches) rules = [self.rules[i] for i in rule_indices] relevant_rules = FilterMatchingRules( rules, src_subtree, trg_subtree, rhs_child_pos) # If there are no relevant rules in our set, we search in other resources # e.g. WordNet for previously unseen (yet valid) rules for this tree pattern. if not relevant_rules and self.rule_backoffs: relevant_rules = \ ProduceUnseenRelevantRules(state, src_subtree, self.similarity_scorer) relevant_rules = FilterMatchingRules( relevant_rules, src_subtree, trg_subtree, rhs_child_pos) # Storing result in cache. self.relevant_rules_cache[result_id] = relevant_rules return relevant_rules
def apply(self, tree, statemap): """Returns a pair (new tree, new statemap) by applying this rule""" if not statemap: return (None, None) path_states = list([(path, state) for (path, state) in statemap.items()]) # Sort path_state map so that we apply the rule to the left-most branch # consistently. path_states = sorted(path_states, key=itemgetter(0)) # Get the left-most path and state. (path, state) = path_states[0] indexed = tree_index(tree, path) newsubtree = replace(indexed, self.lhs, self.rhs) newstates_with_prepend = { tuple(list(path) + list(rulepath)): state for (rulepath, state) in self.newstates.items() } newstatemap = deepcopy(statemap) del newstatemap[path] newstatemap.update(newstates_with_prepend) if path == (): out = (newsubtree, newstatemap) else: newtree = deepcopy(tree) newtree[path] = newsubtree out = (newtree, newstatemap) return out
def GetRelevantRules(self, tree, path_state): path, state = path_state subtree = tree_index(tree, path) relevant_rules = [] if not isinstance(subtree, NLTKTree): tree_branches_pos = subtree tree_pos = subtree else: tree_branches_pos = \ ' '.join([get_top(t) for t in subtree if isinstance(t, NLTKTree)]) tree_pos = get_top(subtree) rules_indices = self.index[(state, tree_pos, tree_branches_pos)] relevant_rules = [self.rules[i] for i in rules_indices] return relevant_rules
def ObtainTreePattern(tree, path, subpaths): subtree = tree_index(tree, path) if not subpaths: return deepcopy(subtree) if not isinstance(subtree, NLTKTree) and (subpaths[0] == () or path == subpaths[0]): return '?x0|' if isinstance(subtree, NLTKTree) and (subpaths[0] == () or path == subpaths[0]): return '?x0|' + get_top(subtree) if not isinstance(subtree, NLTKTree) and subpaths[0] != (): raise(ValueError, \ 'String {0} cannot be indexed by {1}'.format(subtree, subpaths)) depth_subtree = len(path) tree_pattern = deepcopy(subtree) for i, subpath in enumerate(subpaths): subpath_relative = subpath[depth_subtree:] branch = tree_index(tree, subpath) if not isinstance(branch, NLTKTree): tree_pattern[subpath_relative] = '?x' + str(i) + '|' else: tree_pattern[subpath_relative] = '?x' + str(i) + '|' + get_top( branch) return tree_pattern
def GetDisjointPaths(tree, path, min_paths, max_paths, max_depth=4, candidate_paths=None): """ This function returns a list of tuples of disjoint paths. E.g. [((path1, path2)), ((path1, path3)), ...]. A list such as [()] represents a single tuple of disjoint paths, which contains no paths. That element "()" is important to signal tree patterns to have no subpaths, converting them into simple subtrees with no variables. candidate_paths is a variable containing a list of tuples, where each tuple is a valid path in the tree. If the candidate_paths variable is set, the disjoint paths will be generated from the list of candidate paths. Otherwise, the disjoint paths will be generated from the subpaths of the parameter "path". """ subtree = tree_index(tree, path) # Case where the subtree is a string. # The disjoint paths [()] means that there is only one set of disjoint paths, # which is (), that is the empty set of disjoint paths. if not isinstance(subtree, NLTKTree): disjoint_paths = [] # Add the tuple containing zero number of paths. if min_paths == 0: disjoint_paths.append(()) if max_paths > 0: disjoint_paths.append((path, )) return disjoint_paths # Case where the subtree is a tree. if candidate_paths == None: paths = [path + subpath for subpath in subtree.treepositions() \ if len(subpath) < max_depth \ and len(subpath) > 0] else: paths = candidate_paths # Return a generator to save memory for large combinations, at the # expense of some speed. # Update: return a list instead, for caching and faster computation. return list(itertools.chain( combined_paths \ for k in range(min_paths, max_paths + 1) \ for combined_paths in itertools.combinations(paths, k) \ if AreDisjointPaths(combined_paths)))
def GetDisjointPathsWithPermutations(tree, path, min_paths, max_paths, max_depth=4): subtree = tree_index(tree, path) # Case where the subtree is a string. if not isinstance(subtree, NLTKTree): disjoint_paths = [] if min_paths == 0: disjoint_paths.append(()) if min_paths < 2 and max_paths > 0: disjoint_paths.append(((), )) return disjoint_paths # Case where the subtree is a tree (return generator). paths = [path + subpath for subpath in subtree.treepositions() \ if len(subpath) < max_depth] return itertools.chain( permutted_paths \ for k in range(min_paths, max_paths + 1) \ for combined_paths in itertools.combinations(paths, k) \ if AreDisjointPaths(combined_paths) for permutted_paths in itertools.permutations(combined_paths))