def test_basic(textgrid_test_dir):
    speaker = Speaker(None)
    path = os.path.join(textgrid_test_dir,'phone_word.TextGrid')
    data = textgrid_to_data(path, [AnnotationType('word','phone',None, anchor=True),
                                AnnotationType('phone',None,None, base=True)])
    expected_words = []

    a = Annotation('')
    a.references.append('phone')
    a.begins.append(0)
    a.ends.append(1)
    expected_words.append(a)

    a = Annotation('a')
    a.references.append('phone')
    a.begins.append(1)
    a.ends.append(3)
    expected_words.append(a)

    a = Annotation('')
    a.references.append('phone')
    a.begins.append(3)
    a.ends.append(4)
    expected_words.append(a)
    assert(data['word']._list == expected_words)

    assert(data['phone']._list == [BaseAnnotation('#', 0, 0.25),
                        BaseAnnotation('a', 0.25, 0.5),
                        BaseAnnotation('b', 0.5, 0.75),
                        BaseAnnotation('#', 0.75, 1)])
Example #2
0
def test_ilg_data(ilg_test_dir):
    basic_path = os.path.join(ilg_test_dir, 'test_basic.txt')
    tier_att = Attribute('transcription', 'tier')
    tier_att.delimiter = '.'
    ats = [
        AnnotationType('spelling',
                       'transcription',
                       None,
                       token=False,
                       anchor=True),
        AnnotationType('transcription',
                       None,
                       None,
                       token=False,
                       base=True,
                       attribute=tier_att)
    ]
    ats[1].trans_delimiter = '.'
    data = ilg_to_data(basic_path, ats)

    expected_words = []
    a = Annotation('a')
    a.references.append('transcription')
    a.begins.append(0)
    a.ends.append(2)
    expected_words.append(a)

    a = Annotation('a')
    a.references.append('transcription')
    a.begins.append(2)
    a.ends.append(4)
    expected_words.append(a)

    a = Annotation('b')
    a.references.append('transcription')
    a.begins.append(4)
    a.ends.append(6)
    expected_words.append(a)

    assert (data['spelling']._list == expected_words)
    assert (data['transcription']._list == [
        BaseAnnotation('a'),
        BaseAnnotation('b'),
        BaseAnnotation('a'),
        BaseAnnotation('b'),
        BaseAnnotation('c'),
        BaseAnnotation('d')
    ])
Example #3
0
def test_ilg_mismatched(ilg_test_dir):
    mismatched_path = os.path.join(ilg_test_dir, 'test_mismatched.txt')

    ats = [
        AnnotationType('spelling',
                       'transcription',
                       None,
                       token=False,
                       anchor=True),
        AnnotationType('transcription',
                       None,
                       None,
                       token=False,
                       base=True,
                       attribute=Attribute('transcription', 'tier'))
    ]
    ats[1].trans_delimiter = '.'
    with pytest.raises(ILGWordMismatchError):
        t = load_discourse_ilg('test', mismatched_path, ats)
Example #4
0
def test_ilg_basic(ilg_test_dir):
    basic_path = os.path.join(ilg_test_dir, 'test_basic.txt')
    tier_att = Attribute('transcription', 'tier')
    tier_att.delimiter = '.'
    ats = [
        AnnotationType('spelling',
                       'transcription',
                       None,
                       token=False,
                       anchor=True),
        AnnotationType('transcription',
                       None,
                       None,
                       token=False,
                       base=True,
                       attribute=tier_att)
    ]
    ats[1].trans_delimiter = '.'
    corpus = load_discourse_ilg('test', basic_path, ats)
    print(corpus.words)
    print(corpus.lexicon.words)
    assert (corpus.lexicon.find('a').frequency == 2)
def test_two_speakers(textgrid_test_dir):
    path = os.path.join(textgrid_test_dir,'2speakers.TextGrid')
    data = textgrid_to_data(path, [AnnotationType('Speaker 1 - word','Speaker 1 - phone',None, anchor=True, speaker = 'Speaker 1'),
                                AnnotationType('Speaker 1 - phone',None,None, base=True, speaker = 'Speaker 1'),
                                AnnotationType('Speaker 2 - word','Speaker 2 - phone',None, anchor=True, speaker = 'Speaker 2'),
                                AnnotationType('Speaker 2 - phone',None,None, base=True, speaker = 'Speaker 2')])
    data.collapse_speakers()
    print(data['word']._list)
    assert(data['word']._list == [{'label': '','token':{}, 'phone':(0,1)},
                        {'label': 'a','token':{}, 'phone':(1,3)},
                        {'label': 'b','token':{}, 'phone':(3,5)},
                        {'label': 'a','token':{}, 'phone':(5,7)},
                        {'label': 'c','token':{}, 'phone':(7,9)},
                        {'label': '','token':{}, 'phone':(9,10)}])
    assert(data['phone']._list == [{'label':'', 'begin': 0, 'end': 0.1},
                        {'label':'a', 'begin': 0.1, 'end': 0.2},
                        {'label':'b', 'begin': 0.2, 'end': 0.3},
                        {'label':'c', 'begin': 0.3, 'end': 0.4},
                        {'label':'d', 'begin': 0.4, 'end': 0.5},
                        {'label':'a', 'begin': 0.5, 'end': 0.6},
                        {'label':'b', 'begin': 0.6, 'end': 0.7},
                        {'label':'d', 'begin': 0.7, 'end': 0.8},
                        {'label':'e', 'begin': 0.8, 'end': 0.9},
                        {'label':'', 'begin': 0.9, 'end': 1}])
Example #6
0
def numeric_annotation_type():
    a = AnnotationType('test', None, None)
    a.attribute = Attribute('test', 'numeric')
    return a
Example #7
0
def transcription_annotation_type():
    a = AnnotationType('test', None, None)
    a.trans_delimiter = '.'
    a.attribute = Attribute('test', 'tier')
    return a
Example #8
0
def spelling_annotation_type():
    a = AnnotationType('test', None, None)
    a.attribute = Attribute('test', 'spelling')
    return a
Example #9
0
def numeric_annotation_type():
    a = AnnotationType('test', None, None)
    a.attribute = Attribute('test', 'numeric')
    return a
Example #10
0
def transcription_annotation_type():
    a = AnnotationType('test', None, None)
    a.trans_delimiter = '.'
    a.attribute = Attribute('test', 'tier')
    return a
Example #11
0
def spelling_annotation_type():
    a = AnnotationType('test', None, None)
    a.attribute = Attribute('test', 'spelling')
    return a