Example #1
0
    def __init__(self):
        self.codes = set()
        self.guessit_exceptions = {}

        self.alpha3b = babelfish.get_language_converter('alpha3b')
        self.alpha2 = babelfish.get_language_converter('alpha2')
        self.name = babelfish.get_language_converter('name')

        self.codes |= LANGUAGES | self.alpha3b.codes | self.alpha2.codes | self.name.codes

        for (alpha3, country), synlist in SYN.items():
            for syn in synlist:
                self.guessit_exceptions[syn.lower()] = (alpha3, country, None)
                self.codes.add(syn)
Example #2
0
    def test_converter_opensubtitles(self):
        self.assertEqual(Language('fra').opensubtitles, Language('fra').alpha3b)
        self.assertEqual(Language('por', 'BR').opensubtitles, 'pob')
        self.assertEqual(Language.fromopensubtitles('fre'), Language('fra'))
        self.assertEqual(Language.fromopensubtitles('pob'), Language('por', 'BR'))
        self.assertEqual(Language.fromopensubtitles('pb'), Language('por', 'BR'))
        # Montenegrin is not recognized as an ISO language (yet?) but for now it is
        # unofficially accepted as Serbian from Montenegro
        self.assertEqual(Language.fromopensubtitles('mne'), Language('srp', 'ME'))
        self.assertEqual(Language.fromcode('pob', 'opensubtitles'), Language('por', 'BR'))
        with self.assertRaises(LanguageReverseError):
            Language.fromopensubtitles('zzz')
        with self.assertRaises(LanguageConvertError):
            Language('aaa').opensubtitles
        self.assertEqual(len(get_language_converter('opensubtitles').codes), 606)

        # test with all the languages from the opensubtitles api
        # downloaded from: http://www.opensubtitles.org/addons/export_languages.php
        f = resource_stream('babelfish', 'data/opensubtitles_languages.txt')
        f.readline()
        for l in f:
            idlang, alpha2, _, upload_enabled, web_enabled = l.decode('utf-8').strip().split('\t')
            if not int(upload_enabled) and not int(web_enabled):
                # do not test languages that are too esoteric / not widely available
                continue
            self.assertEqual(Language.fromopensubtitles(idlang).opensubtitles, idlang)
            if alpha2:
                self.assertEqual(Language.fromopensubtitles(idlang), Language.fromopensubtitles(alpha2))
        f.close()
Example #3
0
 def test_converter_name(self):
     self.assertEqual(Language('eng').name, 'English')
     self.assertEqual(Language.fromname('English'), Language('eng'))
     self.assertEqual(Language.fromcode('English', 'name'), Language('eng'))
     with self.assertRaises(LanguageReverseError):
         Language.fromname('Zzzzzzzzz')
     self.assertEqual(len(get_language_converter('name').codes), 7874)
Example #4
0
 def test_converter_alpha3t(self):
     self.assertEqual(Language('fra').alpha3t, 'fra')
     self.assertEqual(Language.fromalpha3t('fra'), Language('fra'))
     self.assertEqual(Language.fromcode('fra', 'alpha3t'), Language('fra'))
     with self.assertRaises(LanguageReverseError):
         Language.fromalpha3t('zzz')
     with self.assertRaises(LanguageConvertError):
         Language('aaa').alpha3t
     self.assertEqual(len(get_language_converter('alpha3t').codes), 418)
Example #5
0
 def test_converter_alpha2(self):
     self.assertEqual(Language('eng').alpha2, 'en')
     self.assertEqual(Language.fromalpha2('en'), Language('eng'))
     self.assertEqual(Language.fromcode('en', 'alpha2'), Language('eng'))
     with self.assertRaises(LanguageReverseError):
         Language.fromalpha2('zz')
     with self.assertRaises(LanguageConvertError):
         Language('aaa').alpha2
     self.assertEqual(len(get_language_converter('alpha2').codes), 184)
Example #6
0
 def __init__(self):
     self.name_converter = get_language_converter('name')
     self.from_addic7ed = {'CatalĂ ': ('cat',), 'Chinese (Simplified)': ('zho',), 'Chinese (Traditional)': ('zho',),
                           'Euskera': ('eus',), 'Galego': ('glg',), 'Greek': ('ell',), 'Malay': ('msa',),
                           'Portuguese (Brazilian)': ('por', 'BR'), 'Serbian (Cyrillic)': ('srp', None, 'Cyrl'),
                           'Serbian (Latin)': ('srp',), 'Spanish (Latin America)': ('spa',),
                           'Spanish (Spain)': ('spa',)}
     self.to_addic7ed = {('cat',): 'CatalĂ ', ('zho',): 'Chinese (Simplified)', ('eus',): 'Euskera',
                         ('glg',): 'Galego', ('ell',): 'Greek', ('msa',): 'Malay',
                         ('por', 'BR'): 'Portuguese (Brazilian)', ('srp', None, 'Cyrl'): 'Serbian (Cyrillic)'}
     self.codes = self.name_converter.codes | set(self.from_addic7ed.keys())
Example #7
0
def scan_subtitle_languages(path):
    """Search for subtitles with alpha2 extension from a video `path` and return their language

    :param string path: path to the video
    :return: found subtitle languages
    :rtype: set

    """
    language_extensions = tuple('.' + c for c in babelfish.get_language_converter('alpha2').codes)
    dirpath, filename = os.path.split(path)
    subtitles = set()
    for p in os.listdir(dirpath):
        if not isinstance(p, bytes) and p.startswith(os.path.splitext(filename)[0]) and p.endswith(SUBTITLE_EXTENSIONS):
            if os.path.splitext(p)[0].endswith(language_extensions):
                subtitles.add(babelfish.Language.fromalpha2(os.path.splitext(p)[0][-2:]))
            else:
                subtitles.add(babelfish.Language('und'))
    logger.debug('Found subtitles %r', subtitles)
    return subtitles
Example #8
0
 def test_converter_type(self):
     self.assertEqual(get_language_converter('type').codes, {'A', 'C', 'E', 'H', 'L', 'S'})
     self.assertEqual(Language('eng').type, 'living')
     self.assertEqual(Language('und').type, 'special')
Example #9
0
 def test_converter_scope(self):
     self.assertEqual(get_language_converter('scope').codes, {'I', 'S', 'M'})
     self.assertEqual(Language('eng').scope, 'individual')
     self.assertEqual(Language('und').scope, 'special')
Example #10
0
 def __init__(self):
     self.alpha2_converter = get_language_converter('alpha2')
     self.from_tvsubtitles = {'br': ('por', 'BR'), 'ua': ('ukr',), 'gr': ('ell',), 'cn': ('zho',), 'jp': ('jpn',),
                              'cz': ('ces',)}
     self.to_tvsubtitles = {v: k for k, v in self.from_tvsubtitles}
     self.codes = self.alpha2_converter.codes | set(self.from_tvsubtitles.keys())