Esempio n. 1
0
    def attach_quotes(self, deriv, span_begin, span_end, quote_type, higher,
                      quotes):
        leaf_count = len(list(leaves(deriv)))

        first_index = 0 if (span_begin is None) else span_begin
        last_index = 0 if (span_end is None) else span_end

        begin_node = get_leaf(deriv, first_index, "forwards")
        end_node = get_leaf(deriv, last_index, "backwards")

        if end_node:
            end_node = self.punct_class.process_punct(deriv, end_node,
                                                      span_end)

        lca_node = lca(begin_node, end_node)
        if lca_node:
            deriv = self.insert_quotes(deriv, lca_node, higher)

        quote_indices = [None, None]
        for index, leaf in enumerate(leaves(deriv)):
            if str(leaf.cat) == 'LQU':
                quote_indices[0] = index
            elif str(leaf.cat) == 'RQU':
                quote_indices[1] = index - 2

        return deriv, quote_indices
Esempio n. 2
0
 def test_wsj0087_8(self):
     tree = load_ccgbank_tree("munge/tests/wsj_0087.auto", 7)
     leaf = get_leaf(tree, 6, "backwards")
     
     self.assertEqual("to", leaf.lex)
     appls = list(applications(leaf))
     
     self.assertEqual(["fwd_appl", "conj_absorb", "conjoin", "bwd_appl", "fwd_appl",
                   "bwd_appl", "fwd_appl", "fwd_appl", "r_punct_absorb"], appls)
Esempio n. 3
0
 def test_wsj0003_1(self):
     tree = load_ccgbank_tree("munge/tests/wsj_0003.auto", 0)
     leaf = get_leaf(tree, 10, "forwards")
     
     self.assertEqual("filters", leaf.lex)
     appls = list(applications(leaf))
     
     self.assertEqual(["fwd_appl", "fwd_appl", "np_typechange", "fwd_appl", "fwd_appl",
                   "fwd_appl", "fwd_appl", "appositive_typechange", "bwd_appl",
                   "bwd_appl", "bwd_appl", "r_punct_absorb" ], appls)
Esempio n. 4
0
def ImmediatelyPrecedes(candidate, node, context):
    if not node.is_leaf(): return False
    
    # does a node which matches 'candidate' occur immediately before _node_?
    root = get_root(node)
    
    node_index = get_index_of_leaf(root, node)
    
    successor = get_leaf(root, node_index+1)
    if not successor: return False
    if candidate.is_satisfied_by(successor, context): return True
    
    return False
Esempio n. 5
0
 def attach_quotes(self, deriv, span_begin, span_end, quote_type, higher, quotes):
     leaf_count = len(list(leaves(deriv)))
     
     first_index = 0 if (span_begin is None) else span_begin
     last_index =  0 if (span_end is None)   else span_end
     
     begin_node = get_leaf(deriv, first_index, "forwards")
     end_node = get_leaf(deriv, last_index, "backwards")
     
     if end_node:
         end_node = self.punct_class.process_punct(deriv, end_node, span_end)
         
     lca_node = lca(begin_node, end_node)
     if lca_node:
         deriv = self.insert_quotes(deriv, lca_node, higher)
         
     quote_indices = [None, None]
     for index, leaf in enumerate(leaves(deriv)):
         if str(leaf.cat) == 'LQU':
             quote_indices[0] = index
         elif str(leaf.cat) == 'RQU':
             quote_indices[1] = index - 2
             
     return deriv, quote_indices
Esempio n. 6
0
    def insert_quote(self, deriv, tokens, at, quote, quote_type):
        '''Performs the actual quote insertion. Returns the root of the newly quoted derivation (which may differ
from the root of the input derivation).'''

        if quote == "begin": direction = "forwards"
        elif quote == "end": direction = "backwards"
        
        double = (quote_type == "``")
        
        node = get_leaf(deriv, at, direction)
        
        if (at is not None) and node:
            if quote == "end": # Process absorbed punctuation
                if self.punct_class:
                    node = self.punct_class.process_punct(deriv, node, at)
            
            if node and is_sublist(smaller=text(node), larger=tokens):
                attachment_node = node
                
                while (attachment_node.parent and is_sublist(smaller=text(attachment_node.parent),
                                                             larger=tokens)):
                    attachment_node = attachment_node.parent
                    
                prev_parent = attachment_node.parent
                was_left_child = (attachment_node.parent) and (attachment_node.parent.lch is attachment_node)
                
                if quote == "begin":
                    new_node = Node(attachment_node.cat, 0, 2, 
                                    parent=None, lch=make_open_quote_leaf(None, double),
                                    rch=attachment_node)
                elif quote == "end":
                    new_node = Node(attachment_node.cat, 0, 2,
                                    parent=None, lch=attachment_node,
                                    rch=make_closed_quote_leaf(None, double))
                                    
                if prev_parent:
                    if was_left_child:
                        prev_parent.lch = new_node
                    else:
                        prev_parent.rch = new_node
                else:
                    return new_node # Replace the old root

        return deriv
Esempio n. 7
0
 def process_punct(deriv, node, at):
     return get_leaf(deriv, at + 1, "backwards")
Esempio n. 8
0
 def process_punct(deriv, node, at):
     return get_leaf(deriv, at + 1, "backwards")