def split_sents(passage): """ Split the paragraph and the DAG into sentences. The DAG is also transformed into a tree representation. """ # Node 1.1 is always a root FN node outgoing_edges = passage.layer('1').all[0].outgoing # root_nodes: H, U, L nodes at the top root_nodes = map(lambda x: node.Internal(x.child,x.tag,0),outgoing_edges) words = passage.layer('0').all par = ' '.join(map(lambda x: x.text,words)) tok_par_nodes = correct_split(sent_tokenize(par),root_nodes) # current_index starts at 1 like the nodes current_index = 1 for sent,head_nodes in tok_par_nodes: for head_node in head_nodes: tree = Tree(head_node) tree.fill_tree() print tree.print_tree_penn()