def test_filename(self): __main__.main(["A.Movie.2014.avi"], False) __main__.main(["A.Movie.2014.avi", "A.2nd.Movie.2014.avi"], False) __main__.main(["-y", "A.Movie.2014.avi"], False) __main__.main(["-a", "A.Movie.2014.avi"], False) __main__.main(["-v", "A.Movie.2014.avi"], False) __main__.main(["-t", "movie", "A.Movie.2014.avi"], False) __main__.main(["-t", "episode", "A.Serie.S02E06.avi"], False) __main__.main(["-i", "hash_mpc", file_in_same_dir(__file__, "1MB")], False) __main__.main(["-i", "hash_md5", file_in_same_dir(__file__, "1MB")], False)
from guessit import fileutils import os.path import re import logging log = logging.getLogger('guessit.language') # downloaded from http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt # # Description of the fields: # "An alpha-3 (bibliographic) code, an alpha-3 (terminologic) code (when given), # an alpha-2 code (when given), an English name, and a French name of a language # are all separated by pipe (|) characters." language_matrix = [ l.strip().decode('utf-8').split('|') for l in open(fileutils.file_in_same_dir(__file__, 'ISO-639-2_utf-8.txt')) ] lng3 = frozenset(filter(bool, (l[0] for l in language_matrix))) lng3term = frozenset(filter(bool, (l[1] for l in language_matrix))) lng2 = frozenset(filter(bool, (l[2] for l in language_matrix))) lng_en_name = frozenset(filter(bool, (lng for l in language_matrix for lng in l[3].lower().split('; ')))) lng_fr_name = frozenset(filter(bool, (lng for l in language_matrix for lng in l[4].lower().split('; ')))) lng_all_names = lng3 | lng3term | lng2 | lng_en_name | lng_fr_name lng3_to_lng3term = dict((l[0], l[1]) for l in language_matrix if l[1]) lng3term_to_lng3 = dict((l[1], l[0]) for l in language_matrix if l[1]) lng3_to_lng2 = dict((l[0], l[2]) for l in language_matrix if l[2]) lng2_to_lng3 = dict((l[2], l[0]) for l in language_matrix if l[2]) # we only return the first given english name, hoping it is the most used one