def align_yields(p1, p2): """finds the best alignment of words from two passages Note: this function is symetrical consider using reverse_mapping instead of calling it twice returns iterator of tuples (i,j) mapping from i - p1 positions to j - aligned p2 positions""" positions1, positions2 = break2common_sentences(p1, p2) terminals1 = extract_terminals(p1) terminals2 = extract_terminals(p2) # map the words in each sentence to each other if len(positions1) == len(positions2): mapping = set() sentence_start1 = 0 sentence_start2 = 0 for i in range(len(positions1)): sentence1 = terminals1[sentence_start1:positions1[i]] sentence2 = terminals2[sentence_start2:positions2[i]] for (j, k) in align(sentence1, sentence2, False)[1]: if j != -1: j += sentence_start1 if k != -1: k += sentence_start2 mapping.add((j, k)) sentence_start1 = positions1[i] sentence_start2 = positions2[i] return mapping else: print("Error number of sentences aqquired from break2common_sentences dow not match")
def align_yields(p1, p2): """finds the best alignment of words from two passages Note: this function is symetrical consider using reverse_mapping instead of calling it twice returns iterator of tuples (i,j) mapping from i - p1 positions to j - aligned p2 positions""" positions1, positions2 = break2common_sentences(p1, p2) terminals1 = extract_terminals(p1) terminals2 = extract_terminals(p2) # map the words in each sentence to each other if len(positions1) == len(positions2): mapping = set() sentence_start1 = 0 sentence_start2 = 0 for i in range(len(positions1)): sentence1 = terminals1[sentence_start1:positions1[i]] sentence2 = terminals2[sentence_start2:positions2[i]] for (j, k) in align(sentence1, sentence2, False)[1]: if j != -1: j += sentence_start1 if k != -1: k += sentence_start2 mapping.add((j, k)) sentence_start1 = positions1[i] sentence_start2 = positions2[i] return mapping else: print( "Error number of sentences aqquired from break2common_sentences dow not match" )
def get_lowest_fn(p): """ finds the FN that has terminals as children""" s = set() for term in extract_terminals(p): s.update([ edge.parent for edge in term.incoming if is_foundational(edge.parent) ]) return s
def get_lowest_FN(p): """ finds the FN that has terminals as children""" s = set() for term in extract_terminals(p): s.update([edge.parent for edge in term.incoming if is_foundational(edge.parent)]) return s