def corpus_data_ur_sr(): levels = [SegmentTier('sr', 'phone'), OrthographyTier('word', 'word'), TranscriptionTier('ur', 'word')] srs = [('k', 0.0, 0.1), ('ae', 0.1, 0.2), ('s', 0.2, 0.4), ('aa', 0.5, 0.6), ('r', 0.6, 0.7), ('k', 0.8, 0.9), ('u', 0.9, 1.1), ('d', 2.0, 2.1), ('aa', 2.1, 2.2), ('g', 2.2, 2.25), ('ah', 2.25, 2.3), ('z', 2.3, 2.4), ('aa', 2.4, 2.5), ('r', 2.5, 2.6), ('t', 2.6, 2.7), ('uw', 2.7, 2.8), ('ay', 3.0, 3.1), ('g', 3.3, 3.4), ('eh', 3.4, 3.5), ('s', 3.5, 3.6)] words = [('cats', 0.0, 0.4), ('are', 0.5, 0.7), ('cute', 0.8, 1.1), ('dogs', 2.0, 2.4), ('are', 2.4, 2.6), ('too', 2.6, 2.8), ('i', 3.0, 3.1), ('guess', 3.3, 3.6)] urs = [('k.ae.t.s', 0.0, 0.4), ('aa.r', 0.5, 0.7), ('k.y.uw.t', 0.8, 1.1), ('d.aa.g.z', 2.0, 2.4), ('aa.r', 2.4, 2.6), ('t.uw', .6, 2.8), ('ay', 3.0, 3.1), ('g.eh.s', 3.3, 3.6)] levels[0].add(srs) levels[1].add(words) levels[2].add(urs) hierarchy = Hierarchy({'phone': 'word', 'word': None}) parser = BaseParser(levels, hierarchy) data = parser.parse_discourse('test_ursr') return data
def corpus_data_syllable_morpheme_srur(): levels = [SegmentTier('sr', 'phone', label=True), TranscriptionTier('ur', 'word'), GroupingTier('syllable', 'syllable'), MorphemeTier('morphemes', 'word'), OrthographyTier('word', 'word'), GroupingTier('line', 'line')] srs = [('b', 0, 0.1), ('aa', 0.1, 0.2), ('k', 0.2, 0.3), ('s', 0.3, 0.4), ('ah', 0.4, 0.5), ('s', 0.5, 0.6), ('er', 0.7, 0.8), ('f', 0.9, 1.0), ('er', 1.0, 1.1), ('p', 1.2, 1.3), ('ae', 1.3, 1.4), ('k', 1.4, 1.5), ('eng', 1.5, 1.6)] urs = [('b.aa.k.s-ah.z', 0, 0.6), ('aa.r', 0.7, 0.8), ('f.ao.r', 0.9, 1.1), ('p.ae.k-ih.ng', 1.2, 1.6)] syllables = [(0, 0.3), (0.3, 0.6), (0.7, 0.8), (0.9, 1.1), (1.2, 1.5), (1.5, 1.6)] morphemes = [('box-PL', 0, 0.6), ('are', 0.7, 0.8), ('for', 0.9, 1.1), ('pack-PROG', 1.2, 1.6)] words = [('boxes', 0, 0.6), ('are', 0.7, 0.8), ('for', 0.9, 1.1), ('packing', 1.2, 1.6)] lines = [(0, 1.6)] levels[0].add(srs) levels[1].add(urs) levels[2].add(syllables) levels[3].add(morphemes) levels[4].add(words) levels[5].add(lines) hierarchy = Hierarchy({'phone': 'syllable', 'syllable': 'word', 'word': 'line', 'line': None}) parser = BaseParser(levels, hierarchy) data = parser.parse_discourse('test_syllable_morpheme') return data
def subannotation_data(): levels = [SegmentTier('label', 'phone'), OrthographyTier('label', 'word'), OrthographyTier('stop_information', 'phone')] levels[2].subannotation = True phones = [('k', 0.0, 0.1), ('ae', 0.1, 0.2), ('t', 0.2, 0.3), ('s', 0.3, 0.4), ('aa', 0.5, 0.6), ('r', 0.6, 0.7), ('k', 0.8, 0.9), ('u', 0.9, 1.0), ('t', 1.0, 1.1), ('d', 2.0, 2.1), ('aa', 2.1, 2.2), ('g', 2.2, 2.3), ('z', 2.3, 2.4), ('aa', 2.4, 2.5), ('r', 2.5, 2.6), ('t', 2.6, 2.7), ('uw', 2.7, 2.8), ('ay', 3.0, 3.1), ('g', 3.3, 3.4), ('eh', 3.4, 3.5), ('s', 3.5, 3.6)] words = [('cats', 0.0, 0.4), ('are', 0.5, 0.7), ('cute', 0.8, 1.1), ('dogs', 2.0, 2.4), ('are', 2.4, 2.6), ('too', 2.6, 2.8), ('i', 3.0, 3.1), ('guess', 3.3, 3.6)] info = [('burst', 0, 0.05), ('vot', 0.05, 0.1), ('closure', 0.2, 0.25), ('burst', 0.25, 0.26), ('vot', 0.26, 0.3), ('closure', 2.2, 2.25), ('burst', 2.25, 2.26), ('vot', 2.26, 2.3), ('voicing_during_closure', 2.2, 2.23), ('voicing_during_closure', 2.24, 2.25)] levels[0].add(phones) levels[1].add(words) levels[2].add(info) hierarchy = Hierarchy({'phone': 'word', 'word': None}) parser = BaseParser(levels, hierarchy) data = parser.parse_discourse('test_sub') return data
def corpus_data_untimed(): levels = [TextTranscriptionTier('transcription', 'word'), TextOrthographyTier('spelling', 'word'), TextMorphemeTier('morpheme', 'word'), GroupingTier('line', 'line')] transcriptions = [('k.ae.t-s', 0), ('aa.r', 1), ('k.y.uw.t', 2), ('d.aa.g-z', 3), ('aa.r', 4), ('t.uw', 5), ('ay', 6), ('g.eh.s', 7)] morphemes = [('cat-PL', 0), ('are', 1), ('cute', 2), ('dog-PL', 3), ('are', 4), ('too', 5), ('i', 6), ('guess', 7)] words = [('cats', 0), ('are', 1), ('cute', 2), ('dogs', 3), ('are', 4), ('too', 5), ('i', 6), ('guess', 7)] lines = [(0, 2), (3, 5), (6, 7)] levels[0].add(transcriptions) levels[1].add(words) levels[2].add(morphemes) levels[3].add(lines) hierarchy = Hierarchy({'word': 'line', 'line': None}) parser = BaseParser(levels, hierarchy) data = parser.parse_discourse('test_untimed') return data
def corpus_data_syllable_morpheme_srur(): levels = [SegmentTier('sr', 'phone', label = True), TranscriptionTier('ur', 'word'), GroupingTier('syllable', 'syllable'), MorphemeTier('morphemes', 'word'), OrthographyTier('word', 'word'), GroupingTier('line', 'line')] srs = [('b', 0, 0.1), ('aa', 0.1, 0.2), ('k', 0.2, 0.3), ('s', 0.3, 0.4), ('ah', 0.4, 0.5), ('s', 0.5, 0.6), ('er', 0.7, 0.8), ('f', 0.9, 1.0), ('er', 1.0, 1.1), ('p', 1.2, 1.3), ('ae', 1.3, 1.4), ('k', 1.4, 1.5), ('eng', 1.5, 1.6)] urs = [('b.aa.k.s-ah.z', 0, 0.6), ('aa.r', 0.7, 0.8), ('f.ao.r', 0.9, 1.1), ('p.ae.k-ih.ng', 1.2, 1.6)] syllables = [(0, 0.3), (0.3, 0.6), (0.7, 0.8), (0.9, 1.1), (1.2, 1.5), (1.5, 1.6)] morphemes = [('box-PL', 0, 0.6), ('are', 0.7, 0.8), ('for', 0.9, 1.1), ('pack-PROG', 1.2, 1.6)] words = [('boxes', 0, 0.6), ('are', 0.7, 0.8), ('for', 0.9, 1.1), ('packing', 1.2, 1.6)] lines = [(0, 1.6)] levels[0].add(srs) levels[1].add(urs) levels[2].add(syllables) levels[3].add(morphemes) levels[4].add(words) levels[5].add(lines) hierarchy = Hierarchy({'phone': 'syllable', 'syllable': 'word', 'word': 'line', 'line': None}) parser = BaseParser(levels, hierarchy) data = parser.parse_discourse('test_syllable_morpheme') return data
def corpus_data_untimed(): levels = [TextTranscriptionTier('transcription', 'word'), TextOrthographyTier('spelling', 'word'), TextMorphemeTier('morpheme', 'word'), GroupingTier('line', 'line')] transcriptions = [('k.ae.t-s', 0), ('aa.r', 1), ('k.y.uw.t', 2), ('d.aa.g-z', 3), ('aa.r', 4), ('t.uw', 5), ('ay', 6), ('g.eh.s', 7)] morphemes = [('cat-PL', 0), ('are', 1), ('cute', 2), ('dog-PL', 3), ('are', 4), ('too',5), ('i', 6), ('guess', 7)] words = [('cats', 0), ('are', 1), ('cute', 2), ('dogs', 3), ('are', 4), ('too', 5), ('i', 6), ('guess', 7)] lines = [(0, 2), (3, 5), (6, 7)] levels[0].add(transcriptions) levels[1].add(words) levels[2].add(morphemes) levels[3].add(lines) hierarchy = Hierarchy({'word': 'line', 'line': None}) parser = BaseParser(levels, hierarchy) data = parser.parse_discourse('test_untimed') return data
def corpus_data_ur_sr(): levels = [SegmentTier('sr', 'phone'), OrthographyTier('word', 'word'), TranscriptionTier('ur', 'word')] srs = [('k', 0.0, 0.1), ('ae', 0.1, 0.2), ('s', 0.2, 0.4), ('aa', 0.5, 0.6), ('r', 0.6, 0.7), ('k', 0.8, 0.9), ('u', 0.9, 1.1), ('d', 2.0, 2.1), ('aa', 2.1, 2.2), ('g', 2.2, 2.25), ('ah', 2.25, 2.3), ('z', 2.3, 2.4), ('aa', 2.4, 2.5), ('r', 2.5, 2.6), ('t', 2.6, 2.7), ('uw', 2.7, 2.8), ('ay', 3.0, 3.1), ('g', 3.3, 3.4), ('eh', 3.4, 3.5), ('s', 3.5, 3.6)] words = [('cats', 0.0, 0.4), ('are', 0.5, 0.7), ('cute', 0.8, 1.1), ('dogs', 2.0, 2.4), ('are', 2.4, 2.6), ('too', 2.6, 2.8), ('i', 3.0, 3.1), ('guess', 3.3, 3.6)] urs = [('k.ae.t.s', 0.0, 0.4), ('aa.r', 0.5, 0.7), ('k.y.uw.t', 0.8, 1.1), ('d.aa.g.z', 2.0, 2.4), ('aa.r', 2.4, 2.6), ('t.uw', .6, 2.8), ('ay', 3.0, 3.1), ('g.eh.s', 3.3, 3.6)] levels[0].add(srs) levels[1].add(words) levels[2].add(urs) hierarchy = Hierarchy({'phone':'word', 'word': None}) parser = BaseParser(levels, hierarchy) data = parser.parse_discourse('test_ursr') return data
def corpus_data_timed(): levels = [ SegmentTier('label', 'phone'), OrthographyTier('label', 'word'), GroupingTier('line', 'line') ] phones = [('k', 0.0, 0.1), ('ae', 0.1, 0.2), ('t', 0.2, 0.3), ('s', 0.3, 0.4), ('aa', 0.5, 0.6), ('r', 0.6, 0.7), ('k', 0.8, 0.9), ('uw', 0.9, 1.0), ('t', 1.0, 1.1), ('d', 2.0, 2.1), ('aa', 2.1, 2.2), ('g', 2.2, 2.3), ('z', 2.3, 2.4), ('aa', 2.4, 2.5), ('r', 2.5, 2.6), ('t', 2.6, 2.7), ('uw', 2.7, 2.8), ('ay', 3.0, 3.1), ('g', 3.3, 3.4), ('eh', 3.4, 3.5), ('s', 3.5, 3.6)] words = [('cats', 0.0, 0.4), ('are', 0.5, 0.7), ('cute', 0.8, 1.1), ('dogs', 2.0, 2.4), ('are', 2.4, 2.6), ('too', 2.6, 2.8), ('i', 3.0, 3.1), ('guess', 3.3, 3.6)] lines = [(0.0, 1.1), (2.0, 2.8), (3.0, 3.6)] levels[0].add(phones) levels[1].add(words) levels[2].add(lines) hierarchy = Hierarchy({'phone': 'word', 'word': 'line', 'line': None}) parser = BaseParser(levels, hierarchy) data = parser.parse_discourse('test_timed') return data
def subannotation_data(): levels = [SegmentTier('label', 'phone'), OrthographyTier('label', 'word'), OrthographyTier('stop_information', 'phone')] levels[2].subannotation = True phones = [('k', 0.0, 0.1), ('ae', 0.1, 0.2), ('t', 0.2, 0.3), ('s', 0.3, 0.4), ('aa', 0.5, 0.6), ('r', 0.6, 0.7), ('k', 0.8, 0.9), ('u', 0.9, 1.0), ('t', 1.0, 1.1), ('d', 2.0, 2.1), ('aa', 2.1, 2.2), ('g', 2.2, 2.3), ('z', 2.3, 2.4), ('aa', 2.4, 2.5), ('r', 2.5, 2.6), ('t', 2.6, 2.7), ('uw', 2.7, 2.8), ('ay', 3.0, 3.1), ('g', 3.3, 3.4), ('eh', 3.4, 3.5), ('s', 3.5, 3.6)] words = [('cats', 0.0, 0.4), ('are', 0.5, 0.7), ('cute', 0.8, 1.1), ('dogs', 2.0, 2.4), ('are', 2.4, 2.6), ('too', 2.6, 2.8), ('i', 3.0, 3.1), ('guess', 3.3, 3.6)] info = [('burst', 0, 0.05), ('vot', 0.05, 0.1), ('closure', 0.2, 0.25), ('burst', 0.25, 0.26), ('vot', 0.26, 0.3),('closure', 2.2, 2.25), ('burst', 2.25, 2.26), ('vot', 2.26, 2.3), ('voicing_during_closure', 2.2, 2.23),('voicing_during_closure', 2.24, 2.25)] levels[0].add(phones) levels[1].add(words) levels[2].add(info) hierarchy = Hierarchy({'phone':'word', 'word': None}) parser = BaseParser(levels, hierarchy) data = parser.parse_discourse('test_sub') return data
def corpus_data_untimed(): levels = [ TextTranscriptionTier("transcription", "word"), TextOrthographyTier("spelling", "word"), TextMorphemeTier("morpheme", "word"), GroupingTier("line", "line"), ] transcriptions = [ ("k.ae.t-s", 0), ("aa.r", 1), ("k.y.uw.t", 2), ("d.aa.g-z", 3), ("aa.r", 4), ("t.uw", 5), ("ay", 6), ("g.eh.s", 7), ] morphemes = [("cat-PL", 0), ("are", 1), ("cute", 2), ("dog-PL", 3), ("are", 4), ("too", 5), ("i", 6), ("guess", 7)] words = [("cats", 0), ("are", 1), ("cute", 2), ("dogs", 3), ("are", 4), ("too", 5), ("i", 6), ("guess", 7)] lines = [(0, 2), (3, 5), (6, 7)] levels[0].add(transcriptions) levels[1].add(words) levels[2].add(morphemes) levels[3].add(lines) hierarchy = Hierarchy({"word": "line", "line": None}) parser = BaseParser(levels, hierarchy) data = parser.parse_discourse("test_untimed") return data
def subannotation_data(): levels = [ SegmentTier("label", "phone"), OrthographyTier("label", "word"), OrthographyTier("stop_information", "phone"), ] levels[2].subannotation = True phones = [ ("k", 0.0, 0.1), ("ae", 0.1, 0.2), ("t", 0.2, 0.3), ("s", 0.3, 0.4), ("aa", 0.5, 0.6), ("r", 0.6, 0.7), ("k", 0.8, 0.9), ("u", 0.9, 1.0), ("t", 1.0, 1.1), ("d", 2.0, 2.1), ("aa", 2.1, 2.2), ("g", 2.2, 2.3), ("z", 2.3, 2.4), ("aa", 2.4, 2.5), ("r", 2.5, 2.6), ("t", 2.6, 2.7), ("uw", 2.7, 2.8), ("ay", 3.0, 3.1), ("g", 3.3, 3.4), ("eh", 3.4, 3.5), ("s", 3.5, 3.6), ] words = [ ("cats", 0.0, 0.4), ("are", 0.5, 0.7), ("cute", 0.8, 1.1), ("dogs", 2.0, 2.4), ("are", 2.4, 2.6), ("too", 2.6, 2.8), ("i", 3.0, 3.1), ("guess", 3.3, 3.6), ] info = [ ("burst", 0, 0.05), ("vot", 0.05, 0.1), ("closure", 0.2, 0.25), ("burst", 0.25, 0.26), ("vot", 0.26, 0.3), ("closure", 2.2, 2.25), ("burst", 2.25, 2.26), ("vot", 2.26, 2.3), ("voicing_during_closure", 2.2, 2.23), ("voicing_during_closure", 2.24, 2.25), ] levels[0].add(phones) levels[1].add(words) levels[2].add(info) hierarchy = Hierarchy({"phone": "word", "word": None}) parser = BaseParser(levels, hierarchy) data = parser.parse_discourse("test_sub") return data
def corpus_data_ur_sr(): levels = [SegmentTier("sr", "phone"), OrthographyTier("word", "word"), TranscriptionTier("ur", "word")] srs = [ ("k", 0.0, 0.1), ("ae", 0.1, 0.2), ("s", 0.2, 0.4), ("aa", 0.5, 0.6), ("r", 0.6, 0.7), ("k", 0.8, 0.9), ("u", 0.9, 1.1), ("d", 2.0, 2.1), ("aa", 2.1, 2.2), ("g", 2.2, 2.25), ("ah", 2.25, 2.3), ("z", 2.3, 2.4), ("aa", 2.4, 2.5), ("r", 2.5, 2.6), ("t", 2.6, 2.7), ("uw", 2.7, 2.8), ("ay", 3.0, 3.1), ("g", 3.3, 3.4), ("eh", 3.4, 3.5), ("s", 3.5, 3.6), ] words = [ ("cats", 0.0, 0.4), ("are", 0.5, 0.7), ("cute", 0.8, 1.1), ("dogs", 2.0, 2.4), ("are", 2.4, 2.6), ("too", 2.6, 2.8), ("i", 3.0, 3.1), ("guess", 3.3, 3.6), ] urs = [ ("k.ae.t.s", 0.0, 0.4), ("aa.r", 0.5, 0.7), ("k.y.uw.t", 0.8, 1.1), ("d.aa.g.z", 2.0, 2.4), ("aa.r", 2.4, 2.6), ("t.uw", 0.6, 2.8), ("ay", 3.0, 3.1), ("g.eh.s", 3.3, 3.6), ] levels[0].add(srs) levels[1].add(words) levels[2].add(urs) hierarchy = Hierarchy({"phone": "word", "word": None}) parser = BaseParser(levels, hierarchy) data = parser.parse_discourse("test_ursr") return data
def corpus_data_timed(): levels = [SegmentTier("label", "phone"), OrthographyTier("label", "word"), GroupingTier("line", "line")] phones = [ ("k", 0.0, 0.1), ("ae", 0.1, 0.2), ("t", 0.2, 0.3), ("s", 0.3, 0.4), ("aa", 0.5, 0.6), ("r", 0.6, 0.7), ("k", 0.8, 0.9), ("u", 0.9, 1.0), ("t", 1.0, 1.1), ("d", 2.0, 2.1), ("aa", 2.1, 2.2), ("g", 2.2, 2.3), ("z", 2.3, 2.4), ("aa", 2.4, 2.5), ("r", 2.5, 2.6), ("t", 2.6, 2.7), ("uw", 2.7, 2.8), ("ay", 3.0, 3.1), ("g", 3.3, 3.4), ("eh", 3.4, 3.5), ("s", 3.5, 3.6), ] words = [ ("cats", 0.0, 0.4), ("are", 0.5, 0.7), ("cute", 0.8, 1.1), ("dogs", 2.0, 2.4), ("are", 2.4, 2.6), ("too", 2.6, 2.8), ("i", 3.0, 3.1), ("guess", 3.3, 3.6), ] lines = [(0.0, 1.1), (2.0, 2.8), (3.0, 3.6)] levels[0].add(phones) levels[1].add(words) levels[2].add(lines) hierarchy = Hierarchy({"phone": "word", "word": "line", "line": None}) parser = BaseParser(levels, hierarchy) data = parser.parse_discourse("test_timed") return data
def corpus_data_syllable_morpheme_srur(): levels = [ SegmentTier("sr", "phone"), TranscriptionTier("ur", "word"), GroupingTier("syllable", "syllable"), MorphemeTier("morphemes", "word"), OrthographyTier("word", "word"), GroupingTier("line", "line"), ] srs = [ ("b", 0, 0.1), ("aa", 0.1, 0.2), ("k", 0.2, 0.3), ("s", 0.3, 0.4), ("ah", 0.4, 0.5), ("s", 0.5, 0.6), ("er", 0.7, 0.8), ("f", 0.9, 1.0), ("er", 1.0, 1.1), ("p", 1.2, 1.3), ("ae", 1.3, 1.4), ("k", 1.4, 1.5), ("eng", 1.5, 1.6), ] urs = [("b.aa.k.s-ah.z", 0, 0.6), ("aa.r", 0.7, 0.8), ("f.ao.r", 0.9, 1.1), ("p.ae.k-ih.ng", 1.2, 1.6)] syllables = [(0, 0.3), (0.3, 0.6), (0.7, 0.8), (0.9, 1.1), (1.2, 1.5), (1.5, 1.6)] morphemes = [("box-PL", 0, 0.6), ("are", 0.7, 0.8), ("for", 0.9, 1.1), ("pack-PROG", 1.2, 1.6)] words = [("boxes", 0, 0.6), ("are", 0.7, 0.8), ("for", 0.9, 1.1), ("packing", 1.2, 1.6)] lines = [(0, 1.6)] levels[0].add(srs) levels[1].add(urs) levels[2].add(syllables) levels[3].add(morphemes) levels[4].add(words) levels[5].add(lines) hierarchy = Hierarchy({"phone": "syllable", "syllable": "word", "word": "line", "line": None}) parser = BaseParser(levels, hierarchy) data = parser.parse_discourse("test_syllable_morpheme") return data
def corpus_data_timed(): levels = [SegmentTier('label', 'phone'), OrthographyTier('label', 'word'), GroupingTier('line', 'line')] phones = [('k', 0.0, 0.1), ('ae', 0.1, 0.2), ('t', 0.2, 0.3), ('s', 0.3, 0.4), ('aa', 0.5, 0.6), ('r', 0.6, 0.7), ('k', 0.8, 0.9), ('uw', 0.9, 1.0), ('t', 1.0, 1.1), ('d', 2.0, 2.1), ('aa', 2.1, 2.2), ('g', 2.2, 2.3), ('z', 2.3, 2.4), ('aa', 2.4, 2.5), ('r', 2.5, 2.6), ('t', 2.6, 2.7), ('uw', 2.7, 2.8), ('ay', 3.0, 3.1), ('g', 3.3, 3.4), ('eh', 3.4, 3.5), ('s', 3.5, 3.6)] words = [('cats', 0.0, 0.4), ('are', 0.5, 0.7), ('cute', 0.8, 1.1), ('dogs', 2.0, 2.4), ('are', 2.4, 2.6), ('too', 2.6, 2.8), ('i', 3.0, 3.1), ('guess', 3.3, 3.6)] lines = [(0.0, 1.1), (2.0, 2.8), (3.0, 3.6)] levels[0].add(phones) levels[1].add(words) levels[2].add(lines) hierarchy = Hierarchy({'phone':'word', 'word': 'line', 'line': None}) parser = BaseParser(levels, hierarchy) data = parser.parse_discourse('test_timed') return data