def test_language_region(self): # Language with region codes should be fine. p = parse_code('en-us') self.assertTagDesc(p['language'], 'en', 'English') self.assertTagDesc(p['region'], 'us', 'United States') self.assertNil(p, ['extlang', 'script', 'variants', 'extensions', 'grandfathered']) p = parse_code('en-gb') self.assertTagDesc(p['language'], 'en', 'English') self.assertTagDesc(p['region'], 'gb', 'United Kingdom') self.assertNil(p, ['extlang', 'script', 'variants', 'extensions', 'grandfathered']) p = parse_code('es-419') self.assertTagDesc(p['language'], 'es', 'Spanish') self.assertTagDesc(p['region'], '419', 'Latin America and the Caribbean') self.assertNil(p, ['extlang', 'script', 'variants', 'extensions', 'grandfathered']) # Regions cannot be given without a language. self.assertMalformed('419') self.assertMalformed('gb') # Invalid languages are still invalid, even with a region. self.assertMalformed('cheese-gb') # Invalid regions are invalid. self.assertMalformed('en-murica')
def test_normalization_grandfathered(self): p = parse_code('i-navajo') self.assertTagDesc(p['language'], 'nv', 'Navajo') self.assertNil(p, ['extlang', 'script', 'region', 'variants', 'extensions', 'grandfathered']) p = parse_code('art-lojban') self.assertTagDesc(p['language'], 'jbo', 'Lojban') self.assertNil(p, ['extlang', 'script', 'region', 'variants', 'extensions', 'grandfathered'])
def test_normalization_extlangs(self): p = parse_code('sgn-ase') self.assertTagDesc(p['language'], 'ase', 'American Sign Language') self.assertNil(p, ['extlang', 'script', 'region', 'variants', 'extensions', 'grandfathered']) p = parse_code('ar-abh') self.assertTagDesc(p['language'], 'abh', 'Tajiki Arabic') self.assertNil(p, ['extlang', 'script', 'region', 'variants', 'extensions', 'grandfathered'])
def test_normalization_languages(self): p = parse_code('ji-Latn') self.assertTagDesc(p['language'], 'yi', 'Yiddish') self.assertTagDesc(p['script'], 'latn', 'Latin') self.assertNil(p, ['extlang', 'region', 'variants', 'extensions', 'grandfathered']) p = parse_code('iw-u-foo-bar') self.assertTagDesc(p['language'], 'he', 'Hebrew') self.assertEqual(p['extensions'], {'u': ['foo', 'bar']}) self.assertNil(p, ['extlang', 'script', 'region', 'variants', 'grandfathered'])
def test_normalization_redundant(self): # These aren't special cases -- they're just provided in the subtag # registry as examples for god knows why. p = parse_code('zh-gan') self.assertTagDesc(p['language'], 'gan', 'Gan Chinese') self.assertNil(p, ['extlang', 'script', 'region', 'variants', 'extensions', 'grandfathered']) p = parse_code('zh-cmn-Hans') self.assertTagDesc(p['language'], 'cmn', 'Mandarin Chinese') self.assertTagDesc(p['script'], 'hans', 'Han (Simplified variant)') self.assertNil(p, ['extlang', 'region', 'variants', 'extensions', 'grandfathered'])
def test_normalization_suppress_script(self): p = parse_code('en-Latn') self.assertTagDesc(p['language'], 'en', 'English') self.assertNil(p, ['extlang', 'script', 'region', 'variants', 'extensions', 'grandfathered']) p = parse_code('is-Latn-IS-x-puffins') self.assertTagDesc(p['language'], 'is', 'Icelandic') self.assertTagDesc(p['region'], 'is', 'Iceland') self.assertEqual(p['extensions'], {'x': ['puffins']}) self.assertNil(p, ['extlang', 'script', 'variants', 'grandfathered']) p = parse_code('ja-Latn') self.assertTagDesc(p['language'], 'ja', 'Japanese') self.assertTagDesc(p['script'], 'latn', 'Latin') self.assertNil(p, ['extlang', 'region', 'variants', 'extensions', 'grandfathered'])
def __getitem__(self, k): parts = parse_code(k) def _get_part(part): return parts[part]['subtag'].lower() if parts[part] else None language = _get_part('language') region = _get_part('region') script = _get_part('script') return self._lookup_dict.lookup(language, region, script)
def test_language_script_region(self): p = parse_code('en-Arab-us') self.assertTagDesc(p['language'], 'en', 'English') self.assertTagDesc(p['script'], 'arab', 'Arabic') self.assertTagDesc(p['region'], 'us', 'United States') self.assertNil(p, ['extlang', 'variants', 'extensions', 'grandfathered']) p = parse_code('sr-Cyrl-RS') self.assertTagDesc(p['language'], 'sr', 'Serbian') self.assertTagDesc(p['script'], 'cyrl', 'Cyrillic') self.assertTagDesc(p['region'], 'rs', 'Serbia') self.assertNil(p, ['extlang', 'variants', 'extensions', 'grandfathered']) # Scripts and regions still require a language. self.assertMalformed('Latn-us') # Invalid language codes, scripts, and regions don't work. self.assertMalformed('minecraft-Latn-us') self.assertMalformed('en-cursive-us') self.assertMalformed('en-Latn-murica')
def test_language_script_region_variants(self): p = parse_code('hy-Latn-IT-arevela') self.assertTagDesc(p['language'], 'hy', 'Armenian') self.assertTagDesc(p['script'], 'latn', 'Latin') self.assertTagDesc(p['region'], 'it', 'Italy') self.assertTagDesc(p['variants'][0], 'arevela', 'Eastern Armenian') self.assertNil(p, ['extlang', 'extensions', 'grandfathered']) self.assertMalformed('Latn-IT-arevela') self.assertMalformed('hy-invalid-IT-arevela') self.assertMalformed('hy-Latn-invalid-arevela') self.assertMalformed('hy-Latn-IT-invalid')
def test_extensions(self): p = parse_code('x-cheese') self.assertEqual(p['extensions'], {'x': ['cheese']}) self.assertNil(p, ['language', 'extlang', 'script', 'region', 'variants', 'grandfathered']) p = parse_code('x-cheese-and-crackers') self.assertEqual(p['extensions'], {'x': ['cheese', 'and', 'crackers']}) self.assertNil(p, ['language', 'extlang', 'script', 'region', 'variants', 'grandfathered']) p = parse_code('fr-u-ham-and-swiss') self.assertTagDesc(p['language'], 'fr', 'French') self.assertEqual(p['extensions'], {'u': ['ham', 'and', 'swiss']}) self.assertNil(p, ['extlang', 'script', 'region', 'variants', 'grandfathered']) p = parse_code('hy-Latn-IT-arevela-x-phonebook-a-foo-b-bar-baz') self.assertTagDesc(p['language'], 'hy', 'Armenian') self.assertTagDesc(p['script'], 'latn', 'Latin') self.assertTagDesc(p['region'], 'it', 'Italy') self.assertTagDesc(p['variants'][0], 'arevela', 'Eastern Armenian') self.assertEqual(p['extensions'], {'x': ['phonebook'], 'a': ['foo'], 'b': ['bar', 'baz']}) self.assertNil(p, ['extlang', 'grandfathered']) # Extensions have to contain data. self.assertMalformed('x-') self.assertMalformed('x-x-foo') self.assertMalformed('x-a-foo') self.assertMalformed('x-foo-a') self.assertMalformed('x--eggs') self.assertMalformed('x-egg--dog') self.assertMalformed('x-egg-dog-') # Private use extensions can stand alone, but others cannot. self.assertMalformed('a-foo') self.assertMalformed('u-bar') self.assertMalformed('u-bar-x-baz')
def test_language_region_variants(self): p = parse_code('de-CH-1901') self.assertTagDesc(p['language'], 'de', 'German') self.assertTagDesc(p['region'], 'ch', 'Switzerland') self.assertTagDesc(p['variants'][0], '1901', 'Traditional German orthography') self.assertNil(p, ['extlang', 'script', 'extensions', 'grandfathered']) p = parse_code('sl-it-nedis') self.assertTagDesc(p['language'], 'sl', 'Slovenian') self.assertTagDesc(p['region'], 'it', 'Italy') self.assertTagDesc(p['variants'][0], 'nedis', 'Natisone dialect') self.assertNil(p, ['extlang', 'script', 'extensions', 'grandfathered']) p = parse_code('fr-419-1694acad') self.assertTagDesc(p['language'], 'fr', 'French') self.assertTagDesc(p['region'], '419', 'Latin America and the Caribbean') self.assertTagDesc(p['variants'][0], '1694acad', 'Early Modern French') self.assertNil(p, ['extlang', 'script', 'extensions', 'grandfathered']) self.assertMalformed('419-1694acad') self.assertMalformed('fr-2345-nedis') self.assertMalformed('fr-ca-01010101')
def test_language_script(self): # Languages with scripts should parse fine. p = parse_code('zh-Hans') self.assertTagDesc(p['language'], 'zh', 'Chinese') self.assertTagDesc(p['script'], 'hans', 'Han (Simplified variant)') self.assertNil(p, ['extlang', 'region', 'variants', 'extensions', 'grandfathered']) p = parse_code('zh-HANT') self.assertTagDesc(p['language'], 'zh', 'Chinese') self.assertTagDesc(p['script'], 'hant', 'Han (Traditional variant)') self.assertNil(p, ['extlang', 'region', 'variants', 'extensions', 'grandfathered']) # Scripts cannot stand without a language. self.assertMalformed('Cyrl') self.assertMalformed('Hant') # Invalid languages are still invalid, even with a script. self.assertMalformed('kitties-Hant') # Invalid scripts are invalid. self.assertMalformed('zh-Hannt')
def test_youtube(self): """Test a bunch of language codes YouTube uses. This should give us a nice variety of test cases. """ with open(join(dirname(__file__), 'youtube_languages.txt')) as f: for code in f: # For some reason youtube uses underscores in some of its codes. # We'll just strip them out here -- users of the library can do # sanitation stuff like this themselves. code = code.strip() code = code.replace('_', '-') self.assertIsNotNone(parse_code(code))
def test_language_variants(self): # p = parse_code('sl-rozaj') # self.assertTagDesc(p['language'], 'sl', 'Slovenian') # self.assertTagDesc(p['variants'][0], 'rozaj', 'Resian') # self.assertNil(p, ['extlang', 'script', 'region', 'extensions', # 'grandfathered']) p = parse_code('sl-rozaj-biske-1994') self.assertTagDesc(p['language'], 'sl', 'Slovenian') self.assertTagDesc(p['variants'][0], 'rozaj', 'Resian') self.assertTagDesc(p['variants'][1], 'biske', 'The San Giorgio dialect of Resian') self.assertTagDesc(p['variants'][2], '1994', 'Standardized Resian orthography') self.assertNil(p, ['extlang', 'script', 'region', 'extensions', 'grandfathered']) # Variants still require a language. self.assertMalformed('rozaj') self.assertMalformed('rozaj-biske') # Invalid variants don't work. self.assertMalformed('sl-rozajbad')
def test_bare_language(self): # Bare, simple language codes should parse fine. p = parse_code('en') self.assertTagDesc(p['language'], 'en', 'English') self.assertNil(p, ['extlang', 'script', 'region', 'variants', 'extensions', 'grandfathered']) p = parse_code('de') self.assertTagDesc(p['language'], 'de', 'German') self.assertNil(p, ['extlang', 'script', 'region', 'variants', 'extensions', 'grandfathered']) # Language codes are case-insensitive. self.assertEqual(parse_code('en'), parse_code('EN')) self.assertEqual(parse_code('en'), parse_code('eN')) # Invalid languages should throw errors. self.assertMalformed('cheese') self.assertMalformed('dogs')
def test_variant_prefixes_validation(self): self.assertInvalid('en-ase') self.assertInvalid('is-acy-ar') self.assertInvalid('jax-jax') # solba has the following prefix: # Prefix: sl-rozaj self.assertInvalid('sl-solba') self.assertIsNotNone(parse_code('sl-solba-rozaj')) self.assertIsNotNone(parse_code('sl-rozaj-solba')) # 1994 has the following prefixes: # Prefix: sl-rozaj # Prefix: sl-rozaj-biske # Prefix: sl-rozaj-njiva # Prefix: sl-rozaj-osojs # Prefix: sl-rozaj-solba self.assertInvalid('sl-1994') self.assertInvalid('sl-1994-solba') self.assertIsNotNone(parse_code('sl-1994-rozaj')) self.assertIsNotNone(parse_code('sl-1994-solba-rozaj')) self.assertIsNotNone(parse_code('sl-solba-1994-rozaj')) self.assertIsNotNone(parse_code('sl-solba-rozaj-1994')) self.assertIsNotNone(parse_code('sl-rozaj-solba-1994'))
def assertMalformed(self, code): return self.assertRaises(MalformedLanguageCodeException, lambda: parse_code(code))
def assertInvalid(self, code): return self.assertRaises(InvalidLanguageCodeException, lambda: parse_code(code))