예제 #1
0
파일: align.py 프로젝트: huji-nlp/ucca
def align_yields(p1, p2):
	"""finds the best alignment of words from two passages
	Note: this function is symetrical
	consider using reverse_mapping instead of calling it twice

	returns iterator of tuples (i,j)
			mapping from i - p1 positions 
					to j - aligned p2 positions"""
	positions1, positions2 = break2common_sentences(p1, p2)
	terminals1 = extract_terminals(p1)
	terminals2 = extract_terminals(p2)

	# map the words in each sentence to each other
	if len(positions1) == len(positions2):
		mapping = set()
		sentence_start1 = 0
		sentence_start2 = 0
		for i in range(len(positions1)):
			sentence1 = terminals1[sentence_start1:positions1[i]]
			sentence2 = terminals2[sentence_start2:positions2[i]]
			for (j, k) in align(sentence1, sentence2, False)[1]:
				if j != -1:
					j += sentence_start1
				if k != -1:
					k += sentence_start2
				mapping.add((j, k))

			sentence_start1 = positions1[i]
			sentence_start2 = positions2[i]
		return mapping
	else:
		print("Error number of sentences aqquired from break2common_sentences dow not match")
예제 #2
0
def align_yields(p1, p2):
    """finds the best alignment of words from two passages
    Note: this function is symetrical
    consider using reverse_mapping instead of calling it twice

    returns iterator of tuples (i,j)
            mapping from i - p1 positions 
                    to j - aligned p2 positions"""
    positions1, positions2 = break2common_sentences(p1, p2)
    terminals1 = extract_terminals(p1)
    terminals2 = extract_terminals(p2)

    # map the words in each sentence to each other
    if len(positions1) == len(positions2):
        mapping = set()
        sentence_start1 = 0
        sentence_start2 = 0
        for i in range(len(positions1)):
            sentence1 = terminals1[sentence_start1:positions1[i]]
            sentence2 = terminals2[sentence_start2:positions2[i]]
            for (j, k) in align(sentence1, sentence2, False)[1]:
                if j != -1:
                    j += sentence_start1
                if k != -1:
                    k += sentence_start2
                mapping.add((j, k))

            sentence_start1 = positions1[i]
            sentence_start2 = positions2[i]
        return mapping
    else:
        print(
            "Error number of sentences aqquired from break2common_sentences dow not match"
        )
예제 #3
0
def get_lowest_fn(p):
    """ finds the FN that has terminals as children"""
    s = set()
    for term in extract_terminals(p):
        s.update([
            edge.parent for edge in term.incoming
            if is_foundational(edge.parent)
        ])
    return s
예제 #4
0
파일: align.py 프로젝트: huji-nlp/ucca
def get_lowest_FN(p):
	""" finds the FN that has terminals as children"""
	s = set()
	for term in extract_terminals(p):
		s.update([edge.parent for edge in term.incoming if is_foundational(edge.parent)])
	return s