def Search(lst, c, k, suffix_errors, what):

    for m in c.terms:
        book, chapter, verse = T.sectionFromNode(m.start)
        gloss = F.gloss.v(L.u(m.start, 'lex')[0])
        if c.id != 'Singletons':
            book, chapter, verse = T.sectionFromNode(m.start)
            gloss = F.gloss.v(L.u(m.start, 'lex')[0])
            if m.surface == what:
                det = converse_pgn(F, m.start) if m.typ in {'VP', 'PPrP'
                                                            } else ''
                lst.append(f'C{k}')
                print(f'{chapter}:{verse}',
                      f'C{k}',
                      m.typ,
                      det,
                      m.surface,
                      '',
                      gloss,
                      m.note,
                      sep='\t',
                      end='\n')
        else:
            if m.surface == what:
                det = converse_pgn(F, m.start) if m.typ in {'VP', 'PPrP'
                                                            } else ''
                lst.append(f'{chapter}:{verse}-Sing')
                print(f'{chapter}:{verse}',
                      'Sing',
                      m.typ,
                      det,
                      m.surface,
                      '',
                      gloss,
                      m.note,
                      sep='\t',
                      end='\n')
    return lst
Exemple #2
0
def GetGraphData(corefs):
    annotation_errors = []
    pos_dict = defaultdict(lambda: defaultdict(int))
    pronoun_dict = defaultdict(lambda: defaultdict(int))
    pronoun_pos_class_dict = defaultdict(lambda: defaultdict(int))
    pronoun_pos_sing_dict = defaultdict(lambda: defaultdict(int))

    for k, c in corefs.items():
        if k != 0:
            pos_dict['first in chain'][c.first().typ] += 1
        for m in c.terms:
            # Not necessary anymore, but left here to make explicit that suffix
            # is made into a separate mention type
            pa_typ = 'Sffx' if m.isSuffix else m.typ
            if k != 0:
                pos_dict['in class'][pa_typ] += 1
                if pa_typ in {'VP', 'PPrP'}:
                    if converse_pgn(F, m.start) != '':
                        pronoun_dict['in class'][converse_pgn(F, m.start)] += 1
                        pronoun_pos_class_dict[pa_typ][converse_pgn(
                            F, m.start)] += 1
                elif m.isSuffix:
                    if m.surface not in suffix_dict:
                        annotation_errors.append(
                            (k, m.start, m.surface, m.isSuffix))
                    else:
                        pronoun_dict['in class'][suffix_dict[m.surface]
                                                 [0]] += 1
                        pronoun_pos_class_dict[pa_typ][suffix_dict[m.surface]
                                                       [0]] += 1
            else:  #'Singletons'
                pos_dict['singleton'][pa_typ] += 1
                if pa_typ in {'VP', 'PPrP'}:
                    if converse_pgn(F, m.start) != '':
                        pronoun_dict['singleton'][converse_pgn(F,
                                                               m.start)] += 1
                        pronoun_pos_sing_dict[pa_typ][converse_pgn(
                            F, m.start)] += 1
                elif m.isSuffix:
                    if m.surface not in suffix_dict:
                        annotation_errors.append(
                            (k, m.start, m.surface, m.isSuffix))
                    else:
                        pronoun_dict['singleton'][suffix_dict[m.surface]
                                                  [0]] += 1
                        pronoun_pos_sing_dict[pa_typ][suffix_dict[m.surface]
                                                      [0]] += 1

            pos_dict['total'][pa_typ] += 1

    return pos_dict, pronoun_dict, pronoun_pos_class_dict, pronoun_pos_sing_dict
def PrintCorefPattern(c, k, suffix_errors):
    print(f'C{k} Who/what: {c.id} /', end=' ')
    if c.id != 'Singletons':
        print(f'first: {c.first().surface}, type: {c.first().typ}', end='\n')
    print(divide)
    bad_words = [e[2] for e in suffix_errors]
    for m in c.terms:
        if m.typ in {'VP', 'PPrP'} and not m.isSuffix:
            pgn = converse_pgn(F, m.start)
            print(f'{m.surface} -{m.typ} {pgn}', f'{m.note}  ', end=' ')

        elif m.isSuffix and m.surface in bad_words:
            print(f'{m.surface} -{m.typ} !CORRUPT ANN', f'{m.note}  ', end=' ')

        elif m.isSuffix:
            pgn_suffix = suffix_dict[m.surface][0]
            print(f'{m.surface} -{m.typ} {pgn_suffix}', f'{m.note}  ', end=' ')
        else:
            print(f'{m.surface} -{m.typ}', f'{m.note}  ', end=' ')
    print('\n')
def SearchClassMention(i, results_set, c, k, suffix_errors, what):
    bad_words = [e[2] for e in suffix_errors]
    if c.id != 'Singletons':
        for m in c.terms:
            if what == m.surface:
                i += 1
                results_set.add(f'C{k}')
                pattern_list = []

                print(
                    f'C{k} Who: {c.id}, first: {c.first().surface}, type: {c.first().typ}, corpus class: {i}',
                    end='\n\n')
                print('verse',
                      'type',
                      'pgn',
                      'ann',
                      '',
                      'gloss',
                      'note',
                      sep='\t',
                      end='\n')

                for m in c.terms:

                    book, chapter, verse = T.sectionFromNode(m.start)
                    gloss = F.gloss.v(L.u(m.start, 'lex')[0])

                    if m.typ in {'VP', 'PPrP'} and not m.isSuffix:
                        pgn = converse_pgn(F, m.start)
                        pattern_list.append(f'{m.typ} {pgn}')
                        print(verse,
                              m.typ,
                              pgn,
                              f'{m.surface}     ',
                              f'{gloss}',
                              m.note,
                              sep='\t',
                              end='\n')

                    elif m.isSuffix and m.surface in bad_words:
                        print(verse,
                              m.typ,
                              ' ',
                              f'{m.surface}    ',
                              '',
                              f'{gloss}',
                              '!CORRUPT ANN',
                              m.note,
                              sep='\t',
                              end='\n')
                        pattern_list.append(f'Sfx')

                    elif m.isSuffix:
                        pgn_suffix = suffix_dict[m.surface][0]
                        print(verse,
                              m.typ,
                              pgn_suffix,
                              f'{m.surface}    ',
                              '',
                              '    ',
                              m.note,
                              sep='\t',
                              end='\n')
                        pattern_list.append(f'Sfx {pgn_suffix}')

                    else:
                        print(verse,
                              m.typ,
                              '',
                              f'{m.surface}     ',
                              f'{gloss}',
                              m.note,
                              sep='\t',
                              end='\n')
                        pattern_list.append(m.typ)
                print('Pattern: ', pattern_list)
                print('\n')

    return results_set
def Pattern(i, lst, c, k, suffix_errors, mention_type, pgn_form):
    bad_words = [e[2] for e in suffix_errors]

    heading = f'C{k} Who: {c.id}, first: {c.first().surface}, type: {c.first().typ}, corpus class: {i}'

    if c.id != 'Singletons':
        if c.first().typ == mention_type and c.first().surface in pgn_form:
            i += 1
            print(heading, end='\n\n')
            print('verse',
                  'type',
                  'pgn',
                  'ann',
                  '',
                  'gloss',
                  'note',
                  sep='\t',
                  end='\n')
            lst.append(f'C{k}')
            pattern_list = []
            for m in c.terms:
                gloss = F.gloss.v(L.u(m.start, 'lex')[0])
                book, chapter, verse = T.sectionFromNode(m.start)

                if m.isSuffix and m.surface in bad_words:
                    print(verse,
                          m.typ,
                          ' ',
                          f'{m.surface}    ',
                          '',
                          f'{gloss}',
                          '!CORRUPT ANN',
                          m.note,
                          sep='\t',
                          end='\n')
                    pattern_list.append(f'Sfx')

                elif m.isSuffix:
                    pgn_suffix = suffix_dict[m.surface][0]
                    print(verse,
                          m.typ,
                          pgn_suffix,
                          f'{m.surface}    ',
                          '',
                          '',
                          m.note,
                          sep='\t',
                          end='\n')
                    pattern_list.append(f'Sfx {pgn_suffix}')

                elif not m.isSuffix:
                    det = converse_pgn(F, m.start) if m.typ in {'VP', 'PPrP'
                                                                } else ''
                    pattern_list.append(f'{m.typ} {det}')
                    print(verse,
                          m.typ,
                          det,
                          f'{m.surface}     ',
                          f'{gloss}',
                          m.note,
                          sep='\t',
                          end='\n')
            print('Pattern: ', pattern_list)
            print('\n')

        elif (c.first().typ == mention_type and pgn_form == []) or \
            (c.first().surface in pgn_form and mention_type == ''):
            i += 1
            print(heading, end='\n\n')
            print('verse',
                  'type',
                  'pgn',
                  'ann',
                  '',
                  'gloss',
                  'note',
                  sep='\t',
                  end='\n')
            lst.append(f'C{k}')
            pattern_list = []
            for m in c.terms:
                book, chapter, verse = T.sectionFromNode(m.start)
                gloss = F.gloss.v(L.u(m.start, 'lex')[0])

                if m.isSuffix and m.surface in bad_words:
                    print(verse,
                          m.typ,
                          ' ',
                          f'{m.surface}    ',
                          '',
                          f'{gloss}',
                          '!CORRUPT ANN',
                          m.note,
                          sep='\t',
                          end='\n')
                    pattern_list.append(f'Sfx')

                elif m.isSuffix:
                    pgn_suffix = suffix_dict[m.surface][0]
                    print(verse,
                          m.typ,
                          pgn_suffix,
                          f'{m.surface}    ',
                          '',
                          '    ',
                          m.note,
                          sep='\t',
                          end='\n')
                    pattern_list.append(f'Sfx {pgn_suffix}')

                elif not m.isSuffix:
                    det = converse_pgn(F, m.start) if m.typ in {'VP', 'PPrP'
                                                                } else ''
                    pattern_list.append(f'{m.typ} {det}')
                    print(verse,
                          m.typ,
                          det,
                          f'{m.surface}     ',
                          f'{gloss}',
                          m.note,
                          sep='\t',
                          end='\n')
            print('Pattern: ', pattern_list)
            print('\n')
def Get(c, k, suffix_errors):
    divide = '-' * 70
    bad_words = [e[2] for e in suffix_errors]
    print(f'C{k} Who/what: {c.id} /', end=' ')

    if c.id != 'Singletons':
        print(f'first: {c.first().surface}, type: {c.first().typ}', end='\n')
    print(divide)
    print('verse',
          'id',
          'type',
          'pgn',
          'ann',
          '',
          'gloss',
          'note',
          sep='\t',
          end='\n\n')

    for m in c.terms:
        gloss = F.gloss.v(L.u(m.start, 'lex')[0])
        book, chapter, verse = T.sectionFromNode(m.start)

        if m.typ in {'VP', 'PPrP'} and not m.isSuffix:
            pgn = converse_pgn(F, m.start)
            print(verse,
                  m.name,
                  m.typ,
                  pgn,
                  f'{m.surface}     ',
                  f'{gloss}',
                  m.note,
                  sep='\t',
                  end='\n')

        elif m.isSuffix and m.surface in bad_words:
            print(verse,
                  m.name,
                  m.typ,
                  ' ',
                  f'{m.surface}    ',
                  '',
                  f'{gloss}',
                  '!CORRUPT ANN',
                  m.note,
                  sep='\t',
                  end='\n')

        elif m.isSuffix:
            pgn_suffix = suffix_dict[m.surface][0]
            print(verse,
                  m.name,
                  m.typ,
                  pgn_suffix,
                  f'{m.surface}    ',
                  '',
                  '    ',
                  m.note,
                  sep='\t',
                  end='\n')

        else:
            print(verse,
                  m.name,
                  m.typ,
                  '',
                  f'{m.surface}     ',
                  f'{gloss}',
                  m.note,
                  sep='\t',
                  end='\n')
    print('\n')