def numberSpans(tree, start, sentence): if type(tree) != nltk.tree.Tree: if tree.startswith("*") or tree == "0": return start, ([]), ([]) else: outgoing = ([(start, x) for x in sentence[start].get("children", [])]) return start + 1, ([(sentence[start]["head"] - 1, start) ]), outgoing else: tree.start = start incoming = ([]) outgoing = ([]) for child in tree: start, incomingC, outgoingC = numberSpans(child, start, sentence) incoming += incomingC outgoing += outgoingC tree.end = start incoming = ([(hd, dep) for hd, dep in incoming if hd < tree.start or hd >= tree.end]) outgoing = ([(hd, dep) for hd, dep in outgoing if dep < tree.start or dep >= tree.end]) tree.incoming = incoming tree.outgoing = outgoing return start, incoming, outgoing
def numberSpans(tree, start, sentence): if type(tree) != nltk.tree.Tree: if tree.startswith("*") or tree == "0": return start, ([]), ([]) else: #print("CHILDREN", start, sentence[start].get("children", [])) outgoing = ([(start, x) for x in sentence[start].get("children", [])]) #if len(sentence[start].get("children", [])) > 0: #print("OUTGOING", outgoing) #assert len(outgoing) > 0 # if sentence[start]["head"] == 0: # print("ROOT", start) return start + 1, ([(sentence[start]["head"] - 1, start) ]), outgoing else: tree.start = start incoming = ([]) outgoing = ([]) for child in tree: start, incomingC, outgoingC = numberSpans(child, start, sentence) incoming += incomingC outgoing += outgoingC tree.end = start #print(incoming, outgoing, tree.start, tree.end) # print(tree.start, tree.end, incoming, [(hd,dep) for hd, dep in incoming if hd < tree.start or hd>= tree.end]) incoming = ([(hd, dep) for hd, dep in incoming if hd < tree.start or hd >= tree.end]) outgoing = ([(hd, dep) for hd, dep in outgoing if dep < tree.start or dep >= tree.end]) tree.incoming = incoming tree.outgoing = outgoing #print(incoming, outgoing) return start, incoming, outgoing