def attach_quotes(self, deriv, span_begin, span_end, quote_type, higher, quotes): leaf_count = len(list(leaves(deriv))) first_index = 0 if (span_begin is None) else span_begin last_index = 0 if (span_end is None) else span_end begin_node = get_leaf(deriv, first_index, "forwards") end_node = get_leaf(deriv, last_index, "backwards") if end_node: end_node = self.punct_class.process_punct(deriv, end_node, span_end) lca_node = lca(begin_node, end_node) if lca_node: deriv = self.insert_quotes(deriv, lca_node, higher) quote_indices = [None, None] for index, leaf in enumerate(leaves(deriv)): if str(leaf.cat) == 'LQU': quote_indices[0] = index elif str(leaf.cat) == 'RQU': quote_indices[1] = index - 2 return deriv, quote_indices
def test_wsj0087_8(self): tree = load_ccgbank_tree("munge/tests/wsj_0087.auto", 7) leaf = get_leaf(tree, 6, "backwards") self.assertEqual("to", leaf.lex) appls = list(applications(leaf)) self.assertEqual(["fwd_appl", "conj_absorb", "conjoin", "bwd_appl", "fwd_appl", "bwd_appl", "fwd_appl", "fwd_appl", "r_punct_absorb"], appls)
def test_wsj0003_1(self): tree = load_ccgbank_tree("munge/tests/wsj_0003.auto", 0) leaf = get_leaf(tree, 10, "forwards") self.assertEqual("filters", leaf.lex) appls = list(applications(leaf)) self.assertEqual(["fwd_appl", "fwd_appl", "np_typechange", "fwd_appl", "fwd_appl", "fwd_appl", "fwd_appl", "appositive_typechange", "bwd_appl", "bwd_appl", "bwd_appl", "r_punct_absorb" ], appls)
def ImmediatelyPrecedes(candidate, node, context): if not node.is_leaf(): return False # does a node which matches 'candidate' occur immediately before _node_? root = get_root(node) node_index = get_index_of_leaf(root, node) successor = get_leaf(root, node_index+1) if not successor: return False if candidate.is_satisfied_by(successor, context): return True return False
def insert_quote(self, deriv, tokens, at, quote, quote_type): '''Performs the actual quote insertion. Returns the root of the newly quoted derivation (which may differ from the root of the input derivation).''' if quote == "begin": direction = "forwards" elif quote == "end": direction = "backwards" double = (quote_type == "``") node = get_leaf(deriv, at, direction) if (at is not None) and node: if quote == "end": # Process absorbed punctuation if self.punct_class: node = self.punct_class.process_punct(deriv, node, at) if node and is_sublist(smaller=text(node), larger=tokens): attachment_node = node while (attachment_node.parent and is_sublist(smaller=text(attachment_node.parent), larger=tokens)): attachment_node = attachment_node.parent prev_parent = attachment_node.parent was_left_child = (attachment_node.parent) and (attachment_node.parent.lch is attachment_node) if quote == "begin": new_node = Node(attachment_node.cat, 0, 2, parent=None, lch=make_open_quote_leaf(None, double), rch=attachment_node) elif quote == "end": new_node = Node(attachment_node.cat, 0, 2, parent=None, lch=attachment_node, rch=make_closed_quote_leaf(None, double)) if prev_parent: if was_left_child: prev_parent.lch = new_node else: prev_parent.rch = new_node else: return new_node # Replace the old root return deriv
def process_punct(deriv, node, at): return get_leaf(deriv, at + 1, "backwards")