def matched(self): """Return a single guess that contains all the info found in the nodes of this tree, trying to merge properties as good as possible. """ if not getattr(self, '_matched_result', None): # we need to make a copy here, as the merge functions work in place and # calling them on the match tree would modify it parts = [copy.copy(node.guess) for node in self.nodes() if node.guess] result = smart_merge(parts) log.debug('Final result: ' + result.nice_string()) self._matched_result = result return self._matched_result
def matched(self): """Return a single guess that contains all the info found in the nodes of this tree, trying to merge properties as good as possible. """ if not getattr(self, '_matched_result', None): # we need to make a copy here, as the merge functions work in place and # calling them on the match tree would modify it parts = [copy.copy(node.guess) for node in self.nodes() if node.guess] result = smart_merge(parts) log.debug('Final result: ' + result.nice_string()) self._matched_result = result for leaf in self.unidentified_leaves(): if 'unidentified' not in self._matched_result: self._matched_result['unidentified'] = [] self._matched_result['unidentified'].append(leaf.clean_value) return self._matched_result
def guess_file_info(filename, info=None, options=None, **kwargs): """info can contain the names of the various plugins, such as 'filename' to detect filename info, or 'hash_md5' to get the md5 hash of the file. >>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt') >>> g = guess_file_info(testfile, info = ['hash_md5', 'hash_sha1']) >>> g['hash_md5'], g['hash_sha1'] ('64de6b5893cac24456c46a935ef9c359', 'a703fc0fa4518080505809bf562c6fc6f7b3c98c') """ info = info or 'filename' options = options or {} if isinstance(options, base_text_type): args = shlex.split(options) options = vars(get_opts().parse_args(args)) if default_options: if isinstance(default_options, base_text_type): default_args = shlex.split(default_options) merged_options = vars(get_opts().parse_args(default_args)) else: merged_options = deepcopy(default_options) merged_options.update(options) options = merged_options result = [] hashers = [] # Force unicode as soon as possible filename = u(filename) if isinstance(info, base_text_type): info = [info] for infotype in info: if infotype == 'filename': result.append(_guess_filename(filename, options, **kwargs)) elif infotype == 'hash_mpc': from guessit.hash_mpc import hash_file try: result.append( Guess({infotype: hash_file(filename)}, confidence=1.0)) except Exception as e: log.warning('Could not compute MPC-style hash because: %s' % e) elif infotype == 'hash_ed2k': from guessit.hash_ed2k import hash_file try: result.append( Guess({infotype: hash_file(filename)}, confidence=1.0)) except Exception as e: log.warning('Could not compute ed2k hash because: %s' % e) elif infotype.startswith('hash_'): import hashlib hashname = infotype[5:] try: hasher = getattr(hashlib, hashname)() hashers.append((infotype, hasher)) except AttributeError: log.warning( 'Could not compute %s hash because it is not available from python\'s hashlib module' % hashname) elif infotype == 'video': g = guess_video_metadata(filename) if g: result.append(g) else: log.warning('Invalid infotype: %s' % infotype) # do all the hashes now, but on a single pass if hashers: try: blocksize = 8192 hasherobjs = dict(hashers).values() with open(filename, 'rb') as f: chunk = f.read(blocksize) while chunk: for hasher in hasherobjs: hasher.update(chunk) chunk = f.read(blocksize) for infotype, hasher in hashers: result.append( Guess({infotype: hasher.hexdigest()}, confidence=1.0)) except Exception as e: log.warning('Could not compute hash because: %s' % e) result = smart_merge(result) return result
def guess_file_info(filename, info=None, options=None, **kwargs): """info can contain the names of the various plugins, such as 'filename' to detect filename info, or 'hash_md5' to get the md5 hash of the file. >>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt') >>> g = guess_file_info(testfile, info = ['hash_md5', 'hash_sha1']) >>> g['hash_md5'], g['hash_sha1'] ('64de6b5893cac24456c46a935ef9c359', 'a703fc0fa4518080505809bf562c6fc6f7b3c98c') """ info = info or 'filename' options = options or {} if isinstance(options, base_text_type): args = shlex.split(options) options = vars(get_opts().parse_args(args)) if default_options: if isinstance(default_options, base_text_type): default_args = shlex.split(default_options) merged_options = vars(get_opts().parse_args(default_args)) else: merged_options = deepcopy(default_options) merged_options.update(options) options = merged_options result = [] hashers = [] # Force unicode as soon as possible filename = u(filename) if isinstance(info, base_text_type): info = [info] for infotype in info: if infotype == 'filename': result.append(_guess_filename(filename, options, **kwargs)) elif infotype == 'hash_mpc': from guessit.hash_mpc import hash_file try: result.append(Guess({infotype: hash_file(filename)}, confidence=1.0)) except Exception as e: log.warning('Could not compute MPC-style hash because: %s' % e) elif infotype == 'hash_ed2k': from guessit.hash_ed2k import hash_file try: result.append(Guess({infotype: hash_file(filename)}, confidence=1.0)) except Exception as e: log.warning('Could not compute ed2k hash because: %s' % e) elif infotype.startswith('hash_'): import hashlib hashname = infotype[5:] try: hasher = getattr(hashlib, hashname)() hashers.append((infotype, hasher)) except AttributeError: log.warning('Could not compute %s hash because it is not available from python\'s hashlib module' % hashname) elif infotype == 'video': g = guess_video_metadata(filename) if g: result.append(g) else: log.warning('Invalid infotype: %s' % infotype) # do all the hashes now, but on a single pass if hashers: try: blocksize = 8192 hasherobjs = dict(hashers).values() with open(filename, 'rb') as f: chunk = f.read(blocksize) while chunk: for hasher in hasherobjs: hasher.update(chunk) chunk = f.read(blocksize) for infotype, hasher in hashers: result.append(Guess({infotype: hasher.hexdigest()}, confidence=1.0)) except Exception as e: log.warning('Could not compute hash because: %s' % e) result = smart_merge(result) return result