def corpus_data_syllable_morpheme_srur(): levels = [SegmentTier('sr', 'phone', label=True), TranscriptionTier('ur', 'word'), GroupingTier('syllable', 'syllable'), MorphemeTier('morphemes', 'word'), OrthographyTier('word', 'word'), GroupingTier('line', 'line')] srs = [('b', 0, 0.1), ('aa', 0.1, 0.2), ('k', 0.2, 0.3), ('s', 0.3, 0.4), ('ah', 0.4, 0.5), ('s', 0.5, 0.6), ('er', 0.7, 0.8), ('f', 0.9, 1.0), ('er', 1.0, 1.1), ('p', 1.2, 1.3), ('ae', 1.3, 1.4), ('k', 1.4, 1.5), ('eng', 1.5, 1.6)] urs = [('b.aa.k.s-ah.z', 0, 0.6), ('aa.r', 0.7, 0.8), ('f.ao.r', 0.9, 1.1), ('p.ae.k-ih.ng', 1.2, 1.6)] syllables = [(0, 0.3), (0.3, 0.6), (0.7, 0.8), (0.9, 1.1), (1.2, 1.5), (1.5, 1.6)] morphemes = [('box-PL', 0, 0.6), ('are', 0.7, 0.8), ('for', 0.9, 1.1), ('pack-PROG', 1.2, 1.6)] words = [('boxes', 0, 0.6), ('are', 0.7, 0.8), ('for', 0.9, 1.1), ('packing', 1.2, 1.6)] lines = [(0, 1.6)] levels[0].add(srs) levels[1].add(urs) levels[2].add(syllables) levels[3].add(morphemes) levels[4].add(words) levels[5].add(lines) hierarchy = Hierarchy({'phone': 'syllable', 'syllable': 'word', 'word': 'line', 'line': None}) parser = BaseParser(levels, hierarchy) data = parser.parse_discourse('test_syllable_morpheme') return data
def corpus_data_untimed(): levels = [TextTranscriptionTier('transcription', 'word'), TextOrthographyTier('spelling', 'word'), TextMorphemeTier('morpheme', 'word'), GroupingTier('line', 'line')] transcriptions = [('k.ae.t-s', 0), ('aa.r', 1), ('k.y.uw.t', 2), ('d.aa.g-z', 3), ('aa.r', 4), ('t.uw', 5), ('ay', 6), ('g.eh.s', 7)] morphemes = [('cat-PL', 0), ('are', 1), ('cute', 2), ('dog-PL', 3), ('are', 4), ('too', 5), ('i', 6), ('guess', 7)] words = [('cats', 0), ('are', 1), ('cute', 2), ('dogs', 3), ('are', 4), ('too', 5), ('i', 6), ('guess', 7)] lines = [(0, 2), (3, 5), (6, 7)] levels[0].add(transcriptions) levels[1].add(words) levels[2].add(morphemes) levels[3].add(lines) hierarchy = Hierarchy({'word': 'line', 'line': None}) parser = BaseParser(levels, hierarchy) data = parser.parse_discourse('test_untimed') return data
def corpus_data_timed(): levels = [ SegmentTier('label', 'phone'), OrthographyTier('label', 'word'), GroupingTier('line', 'line') ] phones = [('k', 0.0, 0.1), ('ae', 0.1, 0.2), ('t', 0.2, 0.3), ('s', 0.3, 0.4), ('aa', 0.5, 0.6), ('r', 0.6, 0.7), ('k', 0.8, 0.9), ('uw', 0.9, 1.0), ('t', 1.0, 1.1), ('d', 2.0, 2.1), ('aa', 2.1, 2.2), ('g', 2.2, 2.3), ('z', 2.3, 2.4), ('aa', 2.4, 2.5), ('r', 2.5, 2.6), ('t', 2.6, 2.7), ('uw', 2.7, 2.8), ('ay', 3.0, 3.1), ('g', 3.3, 3.4), ('eh', 3.4, 3.5), ('s', 3.5, 3.6)] words = [('cats', 0.0, 0.4), ('are', 0.5, 0.7), ('cute', 0.8, 1.1), ('dogs', 2.0, 2.4), ('are', 2.4, 2.6), ('too', 2.6, 2.8), ('i', 3.0, 3.1), ('guess', 3.3, 3.6)] lines = [(0.0, 1.1), (2.0, 2.8), (3.0, 3.6)] levels[0].add(phones) levels[1].add(words) levels[2].add(lines) hierarchy = Hierarchy({'phone': 'word', 'word': 'line', 'line': None}) parser = BaseParser(levels, hierarchy) data = parser.parse_discourse('test_timed') return data