def subannotation_data(): levels = [SegmentTier('label', 'phone'), OrthographyTier('label', 'word'), OrthographyTier('stop_information', 'phone')] levels[2].subannotation = True phones = [('k', 0.0, 0.1), ('ae', 0.1, 0.2), ('t', 0.2, 0.3), ('s', 0.3, 0.4), ('aa', 0.5, 0.6), ('r', 0.6, 0.7), ('k', 0.8, 0.9), ('u', 0.9, 1.0), ('t', 1.0, 1.1), ('d', 2.0, 2.1), ('aa', 2.1, 2.2), ('g', 2.2, 2.3), ('z', 2.3, 2.4), ('aa', 2.4, 2.5), ('r', 2.5, 2.6), ('t', 2.6, 2.7), ('uw', 2.7, 2.8), ('ay', 3.0, 3.1), ('g', 3.3, 3.4), ('eh', 3.4, 3.5), ('s', 3.5, 3.6)] words = [('cats', 0.0, 0.4), ('are', 0.5, 0.7), ('cute', 0.8, 1.1), ('dogs', 2.0, 2.4), ('are', 2.4, 2.6), ('too', 2.6, 2.8), ('i', 3.0, 3.1), ('guess', 3.3, 3.6)] info = [('burst', 0, 0.05), ('vot', 0.05, 0.1), ('closure', 0.2, 0.25), ('burst', 0.25, 0.26), ('vot', 0.26, 0.3), ('closure', 2.2, 2.25), ('burst', 2.25, 2.26), ('vot', 2.26, 2.3), ('voicing_during_closure', 2.2, 2.23), ('voicing_during_closure', 2.24, 2.25)] levels[0].add(phones) levels[1].add(words) levels[2].add(info) hierarchy = Hierarchy({'phone': 'word', 'word': None}) parser = BaseParser(levels, hierarchy) data = parser.parse_discourse('test_sub') return data
def corpus_data_syllable_morpheme_srur(): levels = [SegmentTier('sr', 'phone', label=True), TranscriptionTier('ur', 'word'), GroupingTier('syllable', 'syllable'), MorphemeTier('morphemes', 'word'), OrthographyTier('word', 'word'), GroupingTier('line', 'line')] srs = [('b', 0, 0.1), ('aa', 0.1, 0.2), ('k', 0.2, 0.3), ('s', 0.3, 0.4), ('ah', 0.4, 0.5), ('s', 0.5, 0.6), ('er', 0.7, 0.8), ('f', 0.9, 1.0), ('er', 1.0, 1.1), ('p', 1.2, 1.3), ('ae', 1.3, 1.4), ('k', 1.4, 1.5), ('eng', 1.5, 1.6)] urs = [('b.aa.k.s-ah.z', 0, 0.6), ('aa.r', 0.7, 0.8), ('f.ao.r', 0.9, 1.1), ('p.ae.k-ih.ng', 1.2, 1.6)] syllables = [(0, 0.3), (0.3, 0.6), (0.7, 0.8), (0.9, 1.1), (1.2, 1.5), (1.5, 1.6)] morphemes = [('box-PL', 0, 0.6), ('are', 0.7, 0.8), ('for', 0.9, 1.1), ('pack-PROG', 1.2, 1.6)] words = [('boxes', 0, 0.6), ('are', 0.7, 0.8), ('for', 0.9, 1.1), ('packing', 1.2, 1.6)] lines = [(0, 1.6)] levels[0].add(srs) levels[1].add(urs) levels[2].add(syllables) levels[3].add(morphemes) levels[4].add(words) levels[5].add(lines) hierarchy = Hierarchy({'phone': 'syllable', 'syllable': 'word', 'word': 'line', 'line': None}) parser = BaseParser(levels, hierarchy) data = parser.parse_discourse('test_syllable_morpheme') return data
def corpus_data_ur_sr(): levels = [SegmentTier('sr', 'phone'), OrthographyTier('word', 'word'), TranscriptionTier('ur', 'word')] srs = [('k', 0.0, 0.1), ('ae', 0.1, 0.2), ('s', 0.2, 0.4), ('aa', 0.5, 0.6), ('r', 0.6, 0.7), ('k', 0.8, 0.9), ('u', 0.9, 1.1), ('d', 2.0, 2.1), ('aa', 2.1, 2.2), ('g', 2.2, 2.25), ('ah', 2.25, 2.3), ('z', 2.3, 2.4), ('aa', 2.4, 2.5), ('r', 2.5, 2.6), ('t', 2.6, 2.7), ('uw', 2.7, 2.8), ('ay', 3.0, 3.1), ('g', 3.3, 3.4), ('eh', 3.4, 3.5), ('s', 3.5, 3.6)] words = [('cats', 0.0, 0.4), ('are', 0.5, 0.7), ('cute', 0.8, 1.1), ('dogs', 2.0, 2.4), ('are', 2.4, 2.6), ('too', 2.6, 2.8), ('i', 3.0, 3.1), ('guess', 3.3, 3.6)] urs = [('k.ae.t.s', 0.0, 0.4), ('aa.r', 0.5, 0.7), ('k.y.uw.t', 0.8, 1.1), ('d.aa.g.z', 2.0, 2.4), ('aa.r', 2.4, 2.6), ('t.uw', .6, 2.8), ('ay', 3.0, 3.1), ('g.eh.s', 3.3, 3.6)] levels[0].add(srs) levels[1].add(words) levels[2].add(urs) hierarchy = Hierarchy({'phone': 'word', 'word': None}) parser = BaseParser(levels, hierarchy) data = parser.parse_discourse('test_ursr') return data
def corpus_data_timed(): levels = [ SegmentTier('label', 'phone'), OrthographyTier('label', 'word'), GroupingTier('line', 'line') ] phones = [('k', 0.0, 0.1), ('ae', 0.1, 0.2), ('t', 0.2, 0.3), ('s', 0.3, 0.4), ('aa', 0.5, 0.6), ('r', 0.6, 0.7), ('k', 0.8, 0.9), ('uw', 0.9, 1.0), ('t', 1.0, 1.1), ('d', 2.0, 2.1), ('aa', 2.1, 2.2), ('g', 2.2, 2.3), ('z', 2.3, 2.4), ('aa', 2.4, 2.5), ('r', 2.5, 2.6), ('t', 2.6, 2.7), ('uw', 2.7, 2.8), ('ay', 3.0, 3.1), ('g', 3.3, 3.4), ('eh', 3.4, 3.5), ('s', 3.5, 3.6)] words = [('cats', 0.0, 0.4), ('are', 0.5, 0.7), ('cute', 0.8, 1.1), ('dogs', 2.0, 2.4), ('are', 2.4, 2.6), ('too', 2.6, 2.8), ('i', 3.0, 3.1), ('guess', 3.3, 3.6)] lines = [(0.0, 1.1), (2.0, 2.8), (3.0, 3.6)] levels[0].add(phones) levels[1].add(words) levels[2].add(lines) hierarchy = Hierarchy({'phone': 'word', 'word': 'line', 'line': None}) parser = BaseParser(levels, hierarchy) data = parser.parse_discourse('test_timed') return data