コード例 #1
0
def test_ilg_basic(ilg_test_dir):
    basic_path = os.path.join(ilg_test_dir, 'test_basic.txt')
    tier_att = Attribute('transcription','tier')
    tier_att.delimiter = '.'
    ats = [AnnotationType('spelling', 'transcription',
                                        None, token = False, anchor = True),
                                    AnnotationType('transcription', None, None,
                                        token = False, base = True,
                                        attribute = tier_att)]
    ats[1].trans_delimiter = '.'
    corpus = load_discourse_ilg('test', basic_path, ats)
    print(corpus.words)
    print(corpus.lexicon.words)
    assert(corpus.lexicon.find('a').frequency == 2)
コード例 #2
0
def test_ilg_data(ilg_test_dir):
    basic_path = os.path.join(ilg_test_dir, 'test_basic.txt')
    tier_att = Attribute('transcription', 'tier')
    tier_att.delimiter = '.'
    ats = [
        AnnotationType('spelling',
                       'transcription',
                       None,
                       token=False,
                       anchor=True),
        AnnotationType('transcription',
                       None,
                       None,
                       token=False,
                       base=True,
                       attribute=tier_att)
    ]
    ats[1].trans_delimiter = '.'
    data = ilg_to_data(basic_path, ats)

    expected_words = []
    a = Annotation('a')
    a.references.append('transcription')
    a.begins.append(0)
    a.ends.append(2)
    expected_words.append(a)

    a = Annotation('a')
    a.references.append('transcription')
    a.begins.append(2)
    a.ends.append(4)
    expected_words.append(a)

    a = Annotation('b')
    a.references.append('transcription')
    a.begins.append(4)
    a.ends.append(6)
    expected_words.append(a)

    assert (data['spelling']._list == expected_words)
    assert (data['transcription']._list == [
        BaseAnnotation('a'),
        BaseAnnotation('b'),
        BaseAnnotation('a'),
        BaseAnnotation('b'),
        BaseAnnotation('c'),
        BaseAnnotation('d')
    ])
コード例 #3
0
def test_ilg_basic(ilg_test_dir):
    basic_path = os.path.join(ilg_test_dir, 'test_basic.txt')
    tier_att = Attribute('transcription', 'tier')
    tier_att.delimiter = '.'
    ats = [
        AnnotationType('spelling',
                       'transcription',
                       None,
                       token=False,
                       anchor=True),
        AnnotationType('transcription',
                       None,
                       None,
                       token=False,
                       base=True,
                       attribute=tier_att)
    ]
    ats[1].trans_delimiter = '.'
    corpus = load_discourse_ilg('test', basic_path, ats)
    print(corpus.words)
    print(corpus.lexicon.words)
    assert (corpus.lexicon.find('a').frequency == 2)
コード例 #4
0
def test_ilg_data(ilg_test_dir):
    basic_path = os.path.join(ilg_test_dir, 'test_basic.txt')
    tier_att = Attribute('transcription','tier')
    tier_att.delimiter = '.'
    ats = [AnnotationType('spelling', 'transcription',
                                        None, token = False, anchor = True),
                                    AnnotationType('transcription', None, None,
                                        token = False, base = True,
                                        attribute = tier_att)]
    ats[1].trans_delimiter = '.'
    data = ilg_to_data(basic_path, ats)

    expected_words = []
    a = Annotation('a')
    a.references.append('transcription')
    a.begins.append(0)
    a.ends.append(2)
    expected_words.append(a)

    a = Annotation('a')
    a.references.append('transcription')
    a.begins.append(2)
    a.ends.append(4)
    expected_words.append(a)

    a = Annotation('b')
    a.references.append('transcription')
    a.begins.append(4)
    a.ends.append(6)
    expected_words.append(a)

    assert(data['spelling']._list == expected_words)
    assert(data['transcription']._list == [BaseAnnotation('a'),
                                        BaseAnnotation('b'),
                                        BaseAnnotation('a'),
                                        BaseAnnotation('b'),
                                        BaseAnnotation('c'),
                                        BaseAnnotation('d')])