def main(args=None, setup_logging=True): if setup_logging: from guessit import slogging slogging.setup_logging() if PY2: # pragma: no cover import codecs import locale import sys # see http://bugs.python.org/issue2128 if os.name == 'nt': for i, a in enumerate(sys.argv): sys.argv[i] = a.decode(locale.getpreferredencoding()) # see https://github.com/wackou/guessit/issues/43 # and http://stackoverflow.com/questions/4545661/unicodedecodeerror-when-redirecting-to-file # Wrap sys.stdout into a StreamWriter to allow writing unicode. sys.stdout = codecs.getwriter(locale.getpreferredencoding())( sys.stdout) # Needed for guessit.plugins.transformers.reload() to be called. from guessit.plugins import transformers if args: options = get_opts().parse_args(args) else: # pragma: no cover options = get_opts().parse_args() if options.verbose: logging.getLogger().setLevel(logging.DEBUG) help_required = True if options.properties or options.values: display_properties(options) help_required = False elif options.transformers: display_transformers() help_required = False if options.demo: run_demo(episodes=True, movies=True, options=vars(options)) help_required = False if options.version: print('+-------------------------------------------------------+') print('+ GuessIt ' + __version__ + (28 - len(__version__)) * ' ' + '+') print('+-------------------------------------------------------+') print('| Please report any bug or feature request at |') print('| https://github.com/wackou/guessit/issues. |') print('+-------------------------------------------------------+') help_required = False if options.yaml: try: import yaml, babelfish def default_representer(dumper, data): return dumper.represent_str(str(data)) yaml.SafeDumper.add_representer(babelfish.Language, default_representer) yaml.SafeDumper.add_representer(babelfish.Country, default_representer) except ImportError: # pragma: no cover print('PyYAML not found. Using default output.') filenames = [] if options.filename: filenames.extend(options.filename) if options.input_file: input_file = open(options.input_file, 'r') try: filenames.extend([line.strip() for line in input_file.readlines()]) finally: input_file.close() filenames = filter(lambda f: f, filenames) if filenames: if options.submit_bug: for filename in filenames: help_required = False submit_bug(filename, options) else: for filename in filenames: help_required = False guess_file(filename, info=options.info.split(','), options=vars(options)) if help_required: # pragma: no cover get_opts().print_help()
def guess_file_info(filename, info=None, options=None, **kwargs): """info can contain the names of the various plugins, such as 'filename' to detect filename info, or 'hash_md5' to get the md5 hash of the file. >>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt') >>> g = guess_file_info(testfile, info = ['hash_md5', 'hash_sha1']) >>> g['hash_md5'], g['hash_sha1'] ('64de6b5893cac24456c46a935ef9c359', 'a703fc0fa4518080505809bf562c6fc6f7b3c98c') """ info = info or 'filename' options = options or {} if isinstance(options, base_text_type): args = shlex.split(options) options = vars(get_opts().parse_args(args)) if default_options: if isinstance(default_options, base_text_type): default_args = shlex.split(default_options) merged_options = vars(get_opts().parse_args(default_args)) else: merged_options = deepcopy(default_options) merged_options.update(options) options = merged_options result = [] hashers = [] # Force unicode as soon as possible filename = u(filename) if isinstance(info, base_text_type): info = [info] for infotype in info: if infotype == 'filename': result.append(_guess_filename(filename, options, **kwargs)) elif infotype == 'hash_mpc': from guessit.hash_mpc import hash_file try: result.append( Guess({infotype: hash_file(filename)}, confidence=1.0)) except Exception as e: log.warning('Could not compute MPC-style hash because: %s' % e) elif infotype == 'hash_ed2k': from guessit.hash_ed2k import hash_file try: result.append( Guess({infotype: hash_file(filename)}, confidence=1.0)) except Exception as e: log.warning('Could not compute ed2k hash because: %s' % e) elif infotype.startswith('hash_'): import hashlib hashname = infotype[5:] try: hasher = getattr(hashlib, hashname)() hashers.append((infotype, hasher)) except AttributeError: log.warning( 'Could not compute %s hash because it is not available from python\'s hashlib module' % hashname) elif infotype == 'video': g = guess_video_metadata(filename) if g: result.append(g) else: log.warning('Invalid infotype: %s' % infotype) # do all the hashes now, but on a single pass if hashers: try: blocksize = 8192 hasherobjs = dict(hashers).values() with open(filename, 'rb') as f: chunk = f.read(blocksize) while chunk: for hasher in hasherobjs: hasher.update(chunk) chunk = f.read(blocksize) for infotype, hasher in hashers: result.append( Guess({infotype: hasher.hexdigest()}, confidence=1.0)) except Exception as e: log.warning('Could not compute hash because: %s' % e) result = smart_merge(result) return result
def guess_file_info(filename, info=None, options=None, **kwargs): """info can contain the names of the various plugins, such as 'filename' to detect filename info, or 'hash_md5' to get the md5 hash of the file. >>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt') >>> g = guess_file_info(testfile, info = ['hash_md5', 'hash_sha1']) >>> g['hash_md5'], g['hash_sha1'] ('64de6b5893cac24456c46a935ef9c359', 'a703fc0fa4518080505809bf562c6fc6f7b3c98c') """ info = info or 'filename' options = options or {} if isinstance(options, base_text_type): args = shlex.split(options) options = vars(get_opts().parse_args(args)) if default_options: if isinstance(default_options, base_text_type): default_args = shlex.split(default_options) merged_options = vars(get_opts().parse_args(default_args)) else: merged_options = deepcopy(default_options) merged_options.update(options) options = merged_options result = [] hashers = [] # Force unicode as soon as possible filename = u(filename) if isinstance(info, base_text_type): info = [info] for infotype in info: if infotype == 'filename': result.append(_guess_filename(filename, options, **kwargs)) elif infotype == 'hash_mpc': from guessit.hash_mpc import hash_file try: result.append(Guess({infotype: hash_file(filename)}, confidence=1.0)) except Exception as e: log.warning('Could not compute MPC-style hash because: %s' % e) elif infotype == 'hash_ed2k': from guessit.hash_ed2k import hash_file try: result.append(Guess({infotype: hash_file(filename)}, confidence=1.0)) except Exception as e: log.warning('Could not compute ed2k hash because: %s' % e) elif infotype.startswith('hash_'): import hashlib hashname = infotype[5:] try: hasher = getattr(hashlib, hashname)() hashers.append((infotype, hasher)) except AttributeError: log.warning('Could not compute %s hash because it is not available from python\'s hashlib module' % hashname) elif infotype == 'video': g = guess_video_metadata(filename) if g: result.append(g) else: log.warning('Invalid infotype: %s' % infotype) # do all the hashes now, but on a single pass if hashers: try: blocksize = 8192 hasherobjs = dict(hashers).values() with open(filename, 'rb') as f: chunk = f.read(blocksize) while chunk: for hasher in hasherobjs: hasher.update(chunk) chunk = f.read(blocksize) for infotype, hasher in hashers: result.append(Guess({infotype: hasher.hexdigest()}, confidence=1.0)) except Exception as e: log.warning('Could not compute hash because: %s' % e) result = smart_merge(result) return result
def main(args=None, setup_logging=True): if setup_logging: from guessit import slogging slogging.setup_logging() if PY2: # pragma: no cover import codecs import locale import sys # see http://bugs.python.org/issue2128 if os.name == 'nt': for i, a in enumerate(sys.argv): sys.argv[i] = a.decode(locale.getpreferredencoding()) # see https://github.com/wackou/guessit/issues/43 # and http://stackoverflow.com/questions/4545661/unicodedecodeerror-when-redirecting-to-file # Wrap sys.stdout into a StreamWriter to allow writing unicode. sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout) # Needed for guessit.plugins.transformers.reload() to be called. from guessit.plugins import transformers if args: options = get_opts().parse_args(args) else: # pragma: no cover options = get_opts().parse_args() if options.verbose: logging.getLogger().setLevel(logging.DEBUG) help_required = True if options.properties or options.values: display_properties(options) help_required = False elif options.transformers: display_transformers() help_required = False if options.demo: run_demo(episodes=True, movies=True, options=vars(options)) help_required = False if options.version: print('+-------------------------------------------------------+') print('+ GuessIt ' + __version__ + (28-len(__version__)) * ' ' + '+') print('+-------------------------------------------------------+') print('| Please report any bug or feature request at |') print('| https://github.com/wackou/guessit/issues. |') print('+-------------------------------------------------------+') help_required = False if options.yaml: try: import yaml, babelfish def default_representer(dumper, data): return dumper.represent_str(str(data)) yaml.SafeDumper.add_representer(babelfish.Language, default_representer) yaml.SafeDumper.add_representer(babelfish.Country, default_representer) except ImportError: # pragma: no cover print('PyYAML not found. Using default output.') filenames = [] if options.filename: filenames.extend(options.filename) if options.input_file: input_file = open(options.input_file, 'r') try: filenames.extend([line.strip() for line in input_file.readlines()]) finally: input_file.close() filenames = filter(lambda f: f, filenames) if filenames: if options.submit_bug: for filename in filenames: help_required = False submit_bug(filename, options) else: for filename in filenames: help_required = False guess_file(filename, info=options.info.split(','), options=vars(options)) if help_required: # pragma: no cover get_opts().print_help()
def checkFields(self, groundTruth, guess_func, remove_type=True, exclude_files=None): total = 0 exclude_files = exclude_files or [] fails = defaultdict(list) additionals = defaultdict(list) for filename, required_fields in groundTruth.items(): filename = u(filename) if filename in exclude_files: continue log.debug('\n' + '-' * 120) log.info('Guessing information for file: %s' % filename) options = required_fields.pop('options') if 'options' in required_fields else None if options: args = shlex.split(options) options = get_opts().parse_args(args) options = vars(options) try: found = guess_func(filename, options) except Exception as e: fails[filename].append("An exception has occured in %s: %s" % (filename, e)) log.exception("An exception has occured in %s: %s" % (filename, e)) continue total = total + 1 # no need for these in the unittests if remove_type: try: del found['type'] except: pass for prop in ('container', 'mimetype', 'unidentified'): if prop in found: del found[prop] # props which are list of just 1 elem should be opened for easier writing of the tests for prop in ('language', 'subtitleLanguage', 'other', 'episodeDetails', 'unidentified'): value = found.get(prop, None) if isinstance(value, list) and len(value) == 1: found[prop] = value[0] # look for missing properties for prop, value in required_fields.items(): if prop not in found: log.debug("Prop '%s' not found in: %s" % (prop, filename)) fails[filename].append("'%s' not found in: %s" % (prop, filename)) continue # if both properties are strings, do a case-insensitive comparison if (isinstance(value, base_text_type) and isinstance(found[prop], base_text_type)): if value.lower() != found[prop].lower(): log.debug("Wrong prop value [str] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop]))) fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop]))) elif isinstance(value, list) and isinstance(found[prop], list): if found[prop] and isinstance(found[prop][0], babelfish.Language): # list of languages s1 = set(Language.fromguessit(s) for s in value) s2 = set(found[prop]) else: # by default we assume list of strings and do a case-insensitive # comparison on their elements s1 = set(u(s).lower() for s in value) s2 = set(u(s).lower() for s in found[prop]) if s1 != s2: log.debug("Wrong prop value [list] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop]))) fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop]))) elif isinstance(found[prop], babelfish.Language): try: if babelfish.Language.fromguessit(value) != found[prop]: raise ValueError except: log.debug("Wrong prop value [Language] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop]))) fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop]))) elif isinstance(found[prop], babelfish.Country): try: if babelfish.Country.fromguessit(value) != found[prop]: raise ValueError except: log.debug("Wrong prop value [Country] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop]))) fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop]))) # otherwise, just compare their values directly else: if found[prop] != value: log.debug("Wrong prop value for '%s': expected = '%s' [%s] - received = '%s' [%s]" % (prop, u(value), type(value), u(found[prop]), type(found[prop]))) fails[filename].append("'%s': expected = '%s' [%s] - received = '%s' [%s]" % (prop, u(value), type(value), u(found[prop]), type(found[prop]))) # look for additional properties for prop, value in found.items(): if prop not in required_fields: log.debug("Found additional info for prop = '%s': '%s'" % (prop, u(value))) additionals[filename].append("'%s': '%s'" % (prop, u(value))) correct = total - len(fails) log.info('SUMMARY: Guessed correctly %d out of %d filenames' % (correct, total)) for failed_entry, failed_properties in fails.items(): log.error('---- ' + failed_entry + ' ----') for failed_property in failed_properties: log.error("FAILED: " + failed_property) for additional_entry, additional_properties in additionals.items(): log.warning('---- ' + additional_entry + ' ----') for additional_property in additional_properties: log.warning("ADDITIONAL: " + additional_property) self.assertTrue(correct == total, msg='Correct: %d < Total: %d' % (correct, total))
def checkFields(self, groundTruth, guess_func, remove_type=True, exclude_files=None): total = 0 exclude_files = exclude_files or [] fails = defaultdict(list) additionals = defaultdict(list) for filename, required_fields in groundTruth.items(): filename = u(filename) if filename in exclude_files: continue log.debug('\n' + '-' * 120) log.info('Guessing information for file: %s' % filename) options = required_fields.pop( 'options') if 'options' in required_fields else None if options: args = shlex.split(options) options = get_opts().parse_args(args) options = vars(options) try: found = guess_func(filename, options) except Exception as e: fails[filename].append("An exception has occured in %s: %s" % (filename, e)) log.exception("An exception has occured in %s: %s" % (filename, e)) continue total = total + 1 # no need for these in the unittests if remove_type: try: del found['type'] except: pass for prop in ('container', 'mimetype', 'unidentified'): if prop in found: del found[prop] # props which are list of just 1 elem should be opened for easier writing of the tests for prop in ('language', 'subtitleLanguage', 'other', 'episodeDetails', 'unidentified'): value = found.get(prop, None) if isinstance(value, list) and len(value) == 1: found[prop] = value[0] # look for missing properties for prop, value in required_fields.items(): if prop not in found: log.debug("Prop '%s' not found in: %s" % (prop, filename)) fails[filename].append("'%s' not found in: %s" % (prop, filename)) continue # if both properties are strings, do a case-insensitive comparison if (isinstance(value, base_text_type) and isinstance(found[prop], base_text_type)): if value.lower() != found[prop].lower(): log.debug( "Wrong prop value [str] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop]))) fails[filename].append( "'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop]))) elif isinstance(value, list) and isinstance(found[prop], list): if found[prop] and isinstance(found[prop][0], babelfish.Language): # list of languages s1 = set(Language.fromguessit(s) for s in value) s2 = set(found[prop]) else: # by default we assume list of strings and do a case-insensitive # comparison on their elements s1 = set(u(s).lower() for s in value) s2 = set(u(s).lower() for s in found[prop]) if s1 != s2: log.debug( "Wrong prop value [list] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop]))) fails[filename].append( "'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop]))) elif isinstance(found[prop], babelfish.Language): try: if babelfish.Language.fromguessit( value) != found[prop]: raise ValueError except: log.debug( "Wrong prop value [Language] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop]))) fails[filename].append( "'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop]))) elif isinstance(found[prop], babelfish.Country): try: if babelfish.Country.fromguessit(value) != found[prop]: raise ValueError except: log.debug( "Wrong prop value [Country] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop]))) fails[filename].append( "'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop]))) # otherwise, just compare their values directly else: if found[prop] != value: log.debug( "Wrong prop value for '%s': expected = '%s' [%s] - received = '%s' [%s]" % (prop, u(value), type(value), u( found[prop]), type(found[prop]))) fails[filename].append( "'%s': expected = '%s' [%s] - received = '%s' [%s]" % (prop, u(value), type(value), u( found[prop]), type(found[prop]))) # look for additional properties for prop, value in found.items(): if prop not in required_fields: log.debug("Found additional info for prop = '%s': '%s'" % (prop, u(value))) additionals[filename].append("'%s': '%s'" % (prop, u(value))) correct = total - len(fails) log.info('SUMMARY: Guessed correctly %d out of %d filenames' % (correct, total)) for failed_entry, failed_properties in fails.items(): log.error('---- ' + failed_entry + ' ----') for failed_property in failed_properties: log.error("FAILED: " + failed_property) for additional_entry, additional_properties in additionals.items(): log.warning('---- ' + additional_entry + ' ----') for additional_property in additional_properties: log.warning("ADDITIONAL: " + additional_property) self.assertTrue(correct == total, msg='Correct: %d < Total: %d' % (correct, total))