def Search(lst, c, k, suffix_errors, what): for m in c.terms: book, chapter, verse = T.sectionFromNode(m.start) gloss = F.gloss.v(L.u(m.start, 'lex')[0]) if c.id != 'Singletons': book, chapter, verse = T.sectionFromNode(m.start) gloss = F.gloss.v(L.u(m.start, 'lex')[0]) if m.surface == what: det = converse_pgn(F, m.start) if m.typ in {'VP', 'PPrP' } else '' lst.append(f'C{k}') print(f'{chapter}:{verse}', f'C{k}', m.typ, det, m.surface, '', gloss, m.note, sep='\t', end='\n') else: if m.surface == what: det = converse_pgn(F, m.start) if m.typ in {'VP', 'PPrP' } else '' lst.append(f'{chapter}:{verse}-Sing') print(f'{chapter}:{verse}', 'Sing', m.typ, det, m.surface, '', gloss, m.note, sep='\t', end='\n') return lst
def GetGraphData(corefs): annotation_errors = [] pos_dict = defaultdict(lambda: defaultdict(int)) pronoun_dict = defaultdict(lambda: defaultdict(int)) pronoun_pos_class_dict = defaultdict(lambda: defaultdict(int)) pronoun_pos_sing_dict = defaultdict(lambda: defaultdict(int)) for k, c in corefs.items(): if k != 0: pos_dict['first in chain'][c.first().typ] += 1 for m in c.terms: # Not necessary anymore, but left here to make explicit that suffix # is made into a separate mention type pa_typ = 'Sffx' if m.isSuffix else m.typ if k != 0: pos_dict['in class'][pa_typ] += 1 if pa_typ in {'VP', 'PPrP'}: if converse_pgn(F, m.start) != '': pronoun_dict['in class'][converse_pgn(F, m.start)] += 1 pronoun_pos_class_dict[pa_typ][converse_pgn( F, m.start)] += 1 elif m.isSuffix: if m.surface not in suffix_dict: annotation_errors.append( (k, m.start, m.surface, m.isSuffix)) else: pronoun_dict['in class'][suffix_dict[m.surface] [0]] += 1 pronoun_pos_class_dict[pa_typ][suffix_dict[m.surface] [0]] += 1 else: #'Singletons' pos_dict['singleton'][pa_typ] += 1 if pa_typ in {'VP', 'PPrP'}: if converse_pgn(F, m.start) != '': pronoun_dict['singleton'][converse_pgn(F, m.start)] += 1 pronoun_pos_sing_dict[pa_typ][converse_pgn( F, m.start)] += 1 elif m.isSuffix: if m.surface not in suffix_dict: annotation_errors.append( (k, m.start, m.surface, m.isSuffix)) else: pronoun_dict['singleton'][suffix_dict[m.surface] [0]] += 1 pronoun_pos_sing_dict[pa_typ][suffix_dict[m.surface] [0]] += 1 pos_dict['total'][pa_typ] += 1 return pos_dict, pronoun_dict, pronoun_pos_class_dict, pronoun_pos_sing_dict
def PrintCorefPattern(c, k, suffix_errors): print(f'C{k} Who/what: {c.id} /', end=' ') if c.id != 'Singletons': print(f'first: {c.first().surface}, type: {c.first().typ}', end='\n') print(divide) bad_words = [e[2] for e in suffix_errors] for m in c.terms: if m.typ in {'VP', 'PPrP'} and not m.isSuffix: pgn = converse_pgn(F, m.start) print(f'{m.surface} -{m.typ} {pgn}', f'{m.note} ', end=' ') elif m.isSuffix and m.surface in bad_words: print(f'{m.surface} -{m.typ} !CORRUPT ANN', f'{m.note} ', end=' ') elif m.isSuffix: pgn_suffix = suffix_dict[m.surface][0] print(f'{m.surface} -{m.typ} {pgn_suffix}', f'{m.note} ', end=' ') else: print(f'{m.surface} -{m.typ}', f'{m.note} ', end=' ') print('\n')
def SearchClassMention(i, results_set, c, k, suffix_errors, what): bad_words = [e[2] for e in suffix_errors] if c.id != 'Singletons': for m in c.terms: if what == m.surface: i += 1 results_set.add(f'C{k}') pattern_list = [] print( f'C{k} Who: {c.id}, first: {c.first().surface}, type: {c.first().typ}, corpus class: {i}', end='\n\n') print('verse', 'type', 'pgn', 'ann', '', 'gloss', 'note', sep='\t', end='\n') for m in c.terms: book, chapter, verse = T.sectionFromNode(m.start) gloss = F.gloss.v(L.u(m.start, 'lex')[0]) if m.typ in {'VP', 'PPrP'} and not m.isSuffix: pgn = converse_pgn(F, m.start) pattern_list.append(f'{m.typ} {pgn}') print(verse, m.typ, pgn, f'{m.surface} ', f'{gloss}', m.note, sep='\t', end='\n') elif m.isSuffix and m.surface in bad_words: print(verse, m.typ, ' ', f'{m.surface} ', '', f'{gloss}', '!CORRUPT ANN', m.note, sep='\t', end='\n') pattern_list.append(f'Sfx') elif m.isSuffix: pgn_suffix = suffix_dict[m.surface][0] print(verse, m.typ, pgn_suffix, f'{m.surface} ', '', ' ', m.note, sep='\t', end='\n') pattern_list.append(f'Sfx {pgn_suffix}') else: print(verse, m.typ, '', f'{m.surface} ', f'{gloss}', m.note, sep='\t', end='\n') pattern_list.append(m.typ) print('Pattern: ', pattern_list) print('\n') return results_set
def Pattern(i, lst, c, k, suffix_errors, mention_type, pgn_form): bad_words = [e[2] for e in suffix_errors] heading = f'C{k} Who: {c.id}, first: {c.first().surface}, type: {c.first().typ}, corpus class: {i}' if c.id != 'Singletons': if c.first().typ == mention_type and c.first().surface in pgn_form: i += 1 print(heading, end='\n\n') print('verse', 'type', 'pgn', 'ann', '', 'gloss', 'note', sep='\t', end='\n') lst.append(f'C{k}') pattern_list = [] for m in c.terms: gloss = F.gloss.v(L.u(m.start, 'lex')[0]) book, chapter, verse = T.sectionFromNode(m.start) if m.isSuffix and m.surface in bad_words: print(verse, m.typ, ' ', f'{m.surface} ', '', f'{gloss}', '!CORRUPT ANN', m.note, sep='\t', end='\n') pattern_list.append(f'Sfx') elif m.isSuffix: pgn_suffix = suffix_dict[m.surface][0] print(verse, m.typ, pgn_suffix, f'{m.surface} ', '', '', m.note, sep='\t', end='\n') pattern_list.append(f'Sfx {pgn_suffix}') elif not m.isSuffix: det = converse_pgn(F, m.start) if m.typ in {'VP', 'PPrP' } else '' pattern_list.append(f'{m.typ} {det}') print(verse, m.typ, det, f'{m.surface} ', f'{gloss}', m.note, sep='\t', end='\n') print('Pattern: ', pattern_list) print('\n') elif (c.first().typ == mention_type and pgn_form == []) or \ (c.first().surface in pgn_form and mention_type == ''): i += 1 print(heading, end='\n\n') print('verse', 'type', 'pgn', 'ann', '', 'gloss', 'note', sep='\t', end='\n') lst.append(f'C{k}') pattern_list = [] for m in c.terms: book, chapter, verse = T.sectionFromNode(m.start) gloss = F.gloss.v(L.u(m.start, 'lex')[0]) if m.isSuffix and m.surface in bad_words: print(verse, m.typ, ' ', f'{m.surface} ', '', f'{gloss}', '!CORRUPT ANN', m.note, sep='\t', end='\n') pattern_list.append(f'Sfx') elif m.isSuffix: pgn_suffix = suffix_dict[m.surface][0] print(verse, m.typ, pgn_suffix, f'{m.surface} ', '', ' ', m.note, sep='\t', end='\n') pattern_list.append(f'Sfx {pgn_suffix}') elif not m.isSuffix: det = converse_pgn(F, m.start) if m.typ in {'VP', 'PPrP' } else '' pattern_list.append(f'{m.typ} {det}') print(verse, m.typ, det, f'{m.surface} ', f'{gloss}', m.note, sep='\t', end='\n') print('Pattern: ', pattern_list) print('\n')
def Get(c, k, suffix_errors): divide = '-' * 70 bad_words = [e[2] for e in suffix_errors] print(f'C{k} Who/what: {c.id} /', end=' ') if c.id != 'Singletons': print(f'first: {c.first().surface}, type: {c.first().typ}', end='\n') print(divide) print('verse', 'id', 'type', 'pgn', 'ann', '', 'gloss', 'note', sep='\t', end='\n\n') for m in c.terms: gloss = F.gloss.v(L.u(m.start, 'lex')[0]) book, chapter, verse = T.sectionFromNode(m.start) if m.typ in {'VP', 'PPrP'} and not m.isSuffix: pgn = converse_pgn(F, m.start) print(verse, m.name, m.typ, pgn, f'{m.surface} ', f'{gloss}', m.note, sep='\t', end='\n') elif m.isSuffix and m.surface in bad_words: print(verse, m.name, m.typ, ' ', f'{m.surface} ', '', f'{gloss}', '!CORRUPT ANN', m.note, sep='\t', end='\n') elif m.isSuffix: pgn_suffix = suffix_dict[m.surface][0] print(verse, m.name, m.typ, pgn_suffix, f'{m.surface} ', '', ' ', m.note, sep='\t', end='\n') else: print(verse, m.name, m.typ, '', f'{m.surface} ', f'{gloss}', m.note, sep='\t', end='\n') print('\n')