def guess_file_info(filename, filetype, info=None): """info can contain the names of the various plugins, such as 'filename' to detect filename info, or 'hash_md5' to get the md5 hash of the file. >>> guess_file_info('test/dummy.srt', 'autodetect', info = ['hash_md5', 'hash_sha1']) {'hash_md5': 'e781de9b94ba2753a8e2945b2c0a123d', 'hash_sha1': 'bfd18e2f4e5d59775c2bc14d80f56971891ed620'} """ result = [] hashers = [] if info is None: info = ['filename'] if isinstance(info, basestring): info = [info] for infotype in info: if infotype == 'filename': m = IterativeMatcher(filename, filetype=filetype) result.append(m.matched()) elif infotype == 'hash_mpc': from guessit.hash_mpc import hash_file try: result.append(Guess({'hash_mpc': hash_file(filename)}, confidence=1.0)) except Exception, e: log.warning('Could not compute MPC-style hash because: %s' % e) elif infotype == 'hash_ed2k': from guessit.hash_ed2k import hash_file try: result.append(Guess({'hash_ed2k': hash_file(filename)}, confidence=1.0)) except Exception, e: log.warning('Could not compute ed2k hash because: %s' % e)
def guess_file_info(filename, filetype, info=None): """info can contain the names of the various plugins, such as 'filename' to detect filename info, or 'hash_md5' to get the md5 hash of the file. >>> guess_file_info('test/dummy.srt', 'autodetect', info = ['hash_md5', 'hash_sha1']) {'hash_md5': 'e781de9b94ba2753a8e2945b2c0a123d', 'hash_sha1': 'bfd18e2f4e5d59775c2bc14d80f56971891ed620'} """ result = [] hashers = [] if info is None: info = ['filename'] if isinstance(info, basestring): info = [info] for infotype in info: if infotype == 'filename': m = IterativeMatcher(filename, filetype=filetype) result.append(m.matched()) elif infotype == 'hash_mpc': from guessit.hash_mpc import hash_file try: result.append( Guess({'hash_mpc': hash_file(filename)}, confidence=1.0)) except Exception, e: log.warning('Could not compute MPC-style hash because: %s' % e) elif infotype == 'hash_ed2k': from guessit.hash_ed2k import hash_file try: result.append( Guess({'hash_ed2k': hash_file(filename)}, confidence=1.0)) except Exception, e: log.warning('Could not compute ed2k hash because: %s' % e)
def guess_file_info(filename, filetype, info=None): """info can contain the names of the various plugins, such as 'filename' to detect filename info, or 'hash_md5' to get the md5 hash of the file. >>> guess_file_info('tests/dummy.srt', 'autodetect', info = ['hash_md5', 'hash_sha1']) {'hash_md5': 'e781de9b94ba2753a8e2945b2c0a123d', 'hash_sha1': 'bfd18e2f4e5d59775c2bc14d80f56971891ed620'} """ result = [] hashers = [] # Force unicode as soon as possible filename = u(filename) if info is None: info = ['filename'] if isinstance(info, base_text_type): info = [info] for infotype in info: if infotype == 'filename': result.append(_guess_filename(filename, filetype)) elif infotype == 'hash_mpc': from guessit.hash_mpc import hash_file try: result.append(Guess({'hash_mpc': hash_file(filename)}, confidence=1.0)) except Exception as e: log.warning('Could not compute MPC-style hash because: %s' % e) elif infotype == 'hash_ed2k': from guessit.hash_ed2k import hash_file try: result.append(Guess({'hash_ed2k': hash_file(filename)}, confidence=1.0)) except Exception as e: log.warning('Could not compute ed2k hash because: %s' % e) elif infotype.startswith('hash_'): import hashlib hashname = infotype[5:] try: hasher = getattr(hashlib, hashname)() hashers.append((infotype, hasher)) except AttributeError: log.warning('Could not compute %s hash because it is not available from python\'s hashlib module' % hashname) else: log.warning('Invalid infotype: %s' % infotype) # do all the hashes now, but on a single pass if hashers: try: blocksize = 8192 hasherobjs = dict(hashers).values() with open(filename, 'rb') as f: chunk = f.read(blocksize) while chunk: for hasher in hasherobjs: hasher.update(chunk) chunk = f.read(blocksize) for infotype, hasher in hashers: result.append(Guess({infotype: hasher.hexdigest()}, confidence=1.0)) except Exception as e: log.warning('Could not compute hash because: %s' % e) result = merge_all(result) # last minute adjustments # if country is in the guessed properties, make it part of the filename if 'series' in result and 'country' in result: result['series'] += ' (%s)' % result['country'].alpha2.upper() return result
def guess_file_info(filename, info=None, options=None, **kwargs): """info can contain the names of the various plugins, such as 'filename' to detect filename info, or 'hash_md5' to get the md5 hash of the file. >>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt') >>> g = guess_file_info(testfile, info = ['hash_md5', 'hash_sha1']) >>> g['hash_md5'], g['hash_sha1'] ('64de6b5893cac24456c46a935ef9c359', 'a703fc0fa4518080505809bf562c6fc6f7b3c98c') """ info = info or 'filename' options = options or {} if isinstance(options, base_text_type): args = shlex.split(options) options = vars(get_opts().parse_args(args)) if default_options: if isinstance(default_options, base_text_type): default_args = shlex.split(default_options) merged_options = vars(get_opts().parse_args(default_args)) else: merged_options = deepcopy(default_options) merged_options.update(options) options = merged_options result = [] hashers = [] # Force unicode as soon as possible filename = u(filename) if isinstance(info, base_text_type): info = [info] for infotype in info: if infotype == 'filename': result.append(_guess_filename(filename, options, **kwargs)) elif infotype == 'hash_mpc': from guessit.hash_mpc import hash_file try: result.append( Guess({infotype: hash_file(filename)}, confidence=1.0)) except Exception as e: log.warning('Could not compute MPC-style hash because: %s' % e) elif infotype == 'hash_ed2k': from guessit.hash_ed2k import hash_file try: result.append( Guess({infotype: hash_file(filename)}, confidence=1.0)) except Exception as e: log.warning('Could not compute ed2k hash because: %s' % e) elif infotype.startswith('hash_'): import hashlib hashname = infotype[5:] try: hasher = getattr(hashlib, hashname)() hashers.append((infotype, hasher)) except AttributeError: log.warning( 'Could not compute %s hash because it is not available from python\'s hashlib module' % hashname) elif infotype == 'video': g = guess_video_metadata(filename) if g: result.append(g) else: log.warning('Invalid infotype: %s' % infotype) # do all the hashes now, but on a single pass if hashers: try: blocksize = 8192 hasherobjs = dict(hashers).values() with open(filename, 'rb') as f: chunk = f.read(blocksize) while chunk: for hasher in hasherobjs: hasher.update(chunk) chunk = f.read(blocksize) for infotype, hasher in hashers: result.append( Guess({infotype: hasher.hexdigest()}, confidence=1.0)) except Exception as e: log.warning('Could not compute hash because: %s' % e) result = smart_merge(result) return result
def guess_file_info(filename, info=None, options=None, **kwargs): """info can contain the names of the various plugins, such as 'filename' to detect filename info, or 'hash_md5' to get the md5 hash of the file. >>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt') >>> g = guess_file_info(testfile, info = ['hash_md5', 'hash_sha1']) >>> g['hash_md5'], g['hash_sha1'] ('64de6b5893cac24456c46a935ef9c359', 'a703fc0fa4518080505809bf562c6fc6f7b3c98c') """ info = info or 'filename' options = options or {} if isinstance(options, base_text_type): args = shlex.split(options) options = vars(get_opts().parse_args(args)) if default_options: if isinstance(default_options, base_text_type): default_args = shlex.split(default_options) merged_options = vars(get_opts().parse_args(default_args)) else: merged_options = deepcopy(default_options) merged_options.update(options) options = merged_options result = [] hashers = [] # Force unicode as soon as possible filename = u(filename) if isinstance(info, base_text_type): info = [info] for infotype in info: if infotype == 'filename': result.append(_guess_filename(filename, options, **kwargs)) elif infotype == 'hash_mpc': from guessit.hash_mpc import hash_file try: result.append(Guess({infotype: hash_file(filename)}, confidence=1.0)) except Exception as e: log.warning('Could not compute MPC-style hash because: %s' % e) elif infotype == 'hash_ed2k': from guessit.hash_ed2k import hash_file try: result.append(Guess({infotype: hash_file(filename)}, confidence=1.0)) except Exception as e: log.warning('Could not compute ed2k hash because: %s' % e) elif infotype.startswith('hash_'): import hashlib hashname = infotype[5:] try: hasher = getattr(hashlib, hashname)() hashers.append((infotype, hasher)) except AttributeError: log.warning('Could not compute %s hash because it is not available from python\'s hashlib module' % hashname) elif infotype == 'video': g = guess_video_metadata(filename) if g: result.append(g) else: log.warning('Invalid infotype: %s' % infotype) # do all the hashes now, but on a single pass if hashers: try: blocksize = 8192 hasherobjs = dict(hashers).values() with open(filename, 'rb') as f: chunk = f.read(blocksize) while chunk: for hasher in hasherobjs: hasher.update(chunk) chunk = f.read(blocksize) for infotype, hasher in hashers: result.append(Guess({infotype: hasher.hexdigest()}, confidence=1.0)) except Exception as e: log.warning('Could not compute hash because: %s' % e) result = smart_merge(result) return result