Beispiel #1
0
def corpus_data_syllable_morpheme_srur():
    levels = [SegmentTier('sr', 'phone', label=True),
              TranscriptionTier('ur', 'word'),
              GroupingTier('syllable', 'syllable'),
              MorphemeTier('morphemes', 'word'),
              OrthographyTier('word', 'word'),
              GroupingTier('line', 'line')]

    srs = [('b', 0, 0.1), ('aa', 0.1, 0.2), ('k', 0.2, 0.3), ('s', 0.3, 0.4),
           ('ah', 0.4, 0.5), ('s', 0.5, 0.6),
           ('er', 0.7, 0.8),
           ('f', 0.9, 1.0), ('er', 1.0, 1.1),
           ('p', 1.2, 1.3), ('ae', 1.3, 1.4), ('k', 1.4, 1.5), ('eng', 1.5, 1.6)]
    urs = [('b.aa.k.s-ah.z', 0, 0.6), ('aa.r', 0.7, 0.8),
           ('f.ao.r', 0.9, 1.1), ('p.ae.k-ih.ng', 1.2, 1.6)]
    syllables = [(0, 0.3), (0.3, 0.6), (0.7, 0.8), (0.9, 1.1),
                 (1.2, 1.5), (1.5, 1.6)]
    morphemes = [('box-PL', 0, 0.6), ('are', 0.7, 0.8),
                 ('for', 0.9, 1.1), ('pack-PROG', 1.2, 1.6)]
    words = [('boxes', 0, 0.6), ('are', 0.7, 0.8),
             ('for', 0.9, 1.1), ('packing', 1.2, 1.6)]
    lines = [(0, 1.6)]

    levels[0].add(srs)
    levels[1].add(urs)
    levels[2].add(syllables)
    levels[3].add(morphemes)
    levels[4].add(words)
    levels[5].add(lines)

    hierarchy = Hierarchy({'phone': 'syllable', 'syllable': 'word',
                           'word': 'line', 'line': None})
    parser = BaseParser(levels, hierarchy)
    data = parser.parse_discourse('test_syllable_morpheme')
    return data
Beispiel #2
0
def corpus_data_untimed():
    levels = [TextTranscriptionTier('transcription', 'word'),
              TextOrthographyTier('spelling', 'word'),
              TextMorphemeTier('morpheme', 'word'),
              GroupingTier('line', 'line')]

    transcriptions = [('k.ae.t-s', 0), ('aa.r', 1), ('k.y.uw.t', 2),
                      ('d.aa.g-z', 3), ('aa.r', 4), ('t.uw', 5),
                      ('ay', 6), ('g.eh.s', 7)]
    morphemes = [('cat-PL', 0), ('are', 1), ('cute', 2),
                 ('dog-PL', 3), ('are', 4), ('too', 5),
                 ('i', 6), ('guess', 7)]
    words = [('cats', 0), ('are', 1), ('cute', 2),
             ('dogs', 3), ('are', 4), ('too', 5),
             ('i', 6), ('guess', 7)]
    lines = [(0, 2), (3, 5), (6, 7)]

    levels[0].add(transcriptions)
    levels[1].add(words)
    levels[2].add(morphemes)
    levels[3].add(lines)

    hierarchy = Hierarchy({'word': 'line', 'line': None})
    parser = BaseParser(levels, hierarchy)
    data = parser.parse_discourse('test_untimed')
    return data
Beispiel #3
0
def corpus_data_timed():
    levels = [
        SegmentTier('label', 'phone'),
        OrthographyTier('label', 'word'),
        GroupingTier('line', 'line')
    ]
    phones = [('k', 0.0, 0.1), ('ae', 0.1, 0.2), ('t', 0.2, 0.3),
              ('s', 0.3, 0.4), ('aa', 0.5, 0.6), ('r', 0.6, 0.7),
              ('k', 0.8, 0.9), ('uw', 0.9, 1.0), ('t', 1.0, 1.1),
              ('d', 2.0, 2.1), ('aa', 2.1, 2.2), ('g', 2.2, 2.3),
              ('z', 2.3, 2.4), ('aa', 2.4, 2.5), ('r', 2.5, 2.6),
              ('t', 2.6, 2.7), ('uw', 2.7, 2.8), ('ay', 3.0, 3.1),
              ('g', 3.3, 3.4), ('eh', 3.4, 3.5), ('s', 3.5, 3.6)]
    words = [('cats', 0.0, 0.4), ('are', 0.5, 0.7), ('cute', 0.8, 1.1),
             ('dogs', 2.0, 2.4), ('are', 2.4, 2.6), ('too', 2.6, 2.8),
             ('i', 3.0, 3.1), ('guess', 3.3, 3.6)]
    lines = [(0.0, 1.1), (2.0, 2.8), (3.0, 3.6)]

    levels[0].add(phones)
    levels[1].add(words)
    levels[2].add(lines)
    hierarchy = Hierarchy({'phone': 'word', 'word': 'line', 'line': None})
    parser = BaseParser(levels, hierarchy)
    data = parser.parse_discourse('test_timed')
    return data