def analyse_operations_sentence(orig_sent, sys_sent, ref_sents, orig_parse, sys_parse, ref_parses): word_aligns_orig_sys = align(orig_parse, sys_parse)[0] orig_annots, sys_annots = annotate_sentence(orig_sent.split(), sys_sent.split(), word_aligns_orig_sys, orig_parse, sys_parse) orig_auto_labels = _from_annots_to_labels(orig_annots, ORIG_OPS_LABELS, 'C') curr_sent_scores = [] for ref_sent, ref_parse in zip(ref_sents, ref_parses): word_aligns_orig_ref = align(orig_parse, ref_parse)[0] orig_annots, ref_annots = annotate_sentence(orig_sent.split(), ref_sent.split(), word_aligns_orig_ref, orig_parse, ref_parse) orig_silver_labels = _from_annots_to_labels(orig_annots, ORIG_OPS_LABELS, 'C') f1_per_label = f1_score(orig_silver_labels, orig_auto_labels, labels=ORIG_OPS_LABELS, average=None) curr_sent_scores.append(f1_per_label) return np.amax(curr_sent_scores, axis=0)
def test_align_str(): # aligning strings (output indexes start at 1) sentence1 = "Four men died in an accident." sentence2 = "4 people are dead from a collision." alignments = aligner.align(sentence1, sentence2) assert alignments[0] == [[7, 8], [2, 2], [3, 4], [1, 1], [6, 7], [5, 6]] assert alignments[1] == [['.', '.'], ['men', 'people'], ['died', 'dead'], ['Four', '4'], ['accident', 'collision'], ['an', 'a']]
def test_align_tokens(): # aligning sets of tokens (output indexes start at 1) sentence1 = ['Four', 'men', 'died', 'in', 'an', 'accident', '.'] sentence2 = ['4', 'people', 'are', 'dead', 'from', 'a', 'collision', '.'] alignments = aligner.align(sentence1, sentence2) assert alignments[0] == [[7, 8], [2, 2], [3, 4], [1, 1], [6, 7], [5, 6]] assert alignments[1] == [['.', '.'], ['men', 'people'], ['died', 'dead'], ['Four', '4'], ['accident', 'collision'], ['an', 'a']]
def align_scenes_sentences(scenes, synt_parse_sentences): # parse the scenes synt_parse_scenes = syntactic_parse_texts(scenes) all_scenes_alignments = [] for synt_scene in synt_parse_scenes: scene_alignments = [] for synt_sent in synt_parse_sentences: # word_alignments = [[word1_scene, word1_sentence], [word2_scene, word3_sentence], ...] word_alignments = align(synt_scene, synt_sent)[1] scene_alignments.append(word_alignments) all_scenes_alignments.append(scene_alignments) return all_scenes_alignments
def align_scenes_sentences(synt_scenes, synt_sents, allow_mutiple_matches): scenes_sents_aligns = [] already_matched = [] for synt_scene in synt_scenes: max_sent_aligns = [] for sent_num, synt_sent in enumerate(synt_sents): if not allow_mutiple_matches and sent_num in already_matched: continue word_alignments = align(synt_scene, synt_sent)[1] if len(word_alignments) > len(max_sent_aligns): max_sent_aligns = word_alignments max_sent_num = sent_num scenes_sents_aligns.append(max_sent_aligns) if len(max_sent_aligns) > 0: already_matched.append(max_sent_num) return scenes_sents_aligns