def matched(self): if not self._matched_result: # we need to make a copy here, as the merge functions work in place and # calling them on the match tree would modify it parts = [copy.copy(node.guess) for node in self.nodes() if node.guess] # 1- try to merge similar information together and give it a higher # confidence for int_part in ('year', 'season', 'episodeNumber'): merge_similar_guesses(parts, int_part, choose_int) for string_part in ('title', 'series', 'container', 'format', 'releaseGroup', 'website', 'audioCodec', 'videoCodec', 'screenSize', 'episodeFormat', 'audioChannels', 'idNumber'): merge_similar_guesses(parts, string_part, choose_string) # 2- merge the rest, potentially discarding information not properly # merged before result = merge_all(parts, append=['language', 'subtitleLanguage', 'other']) log.debug('Final result: ' + result.nice_string()) self._matched_result = result return self._matched_result
def matched(self): # we need to make a copy here, as the merge functions work in place and # calling them on the match tree would modify it parts = [node.guess for node in self.match_tree.nodes() if node.guess] parts = copy.deepcopy(parts) # 1- try to merge similar information together and give it a higher # confidence for int_part in ('year', 'season', 'episodeNumber'): merge_similar_guesses(parts, int_part, choose_int) for string_part in ('title', 'series', 'container', 'format', 'releaseGroup', 'website', 'audioCodec', 'videoCodec', 'screenSize', 'episodeFormat', 'audioChannels'): merge_similar_guesses(parts, string_part, choose_string) # 2- merge the rest, potentially discarding information not properly # merged before result = merge_all(parts, append=['language', 'subtitleLanguage', 'other']) log.debug('Final result: ' + result.nice_string()) return result
def matched(self): # we need to make a copy here, as the merge functions work in place and # calling them on the match tree would modify it parts = [node.guess for node in self.nodes() if node.guess] parts = copy.deepcopy(parts) # 1- try to merge similar information together and give it a higher # confidence for int_part in ("year", "season", "episodeNumber"): merge_similar_guesses(parts, int_part, choose_int) for string_part in ( "title", "series", "container", "format", "releaseGroup", "website", "audioCodec", "videoCodec", "screenSize", "episodeFormat", "audioChannels", "idNumber", ): merge_similar_guesses(parts, string_part, choose_string) # 2- merge the rest, potentially discarding information not properly # merged before result = merge_all(parts, append=["language", "subtitleLanguage", "other"]) log.debug("Final result: " + result.nice_string()) return result
def matched(self): # we need to make a copy here, as the merge functions work in place and # calling them on the match tree would modify it parts = copy.deepcopy(self.parts) # 1- start by doing some common preprocessing tasks # 1.1- ", the" at the end of a series title should be prepended to it for part in parts: if "series" not in part: continue series = part["series"] lseries = series.lower() if lseries[-4:] == ",the": part["series"] = "The " + series[:-4] if lseries[-5:] == ", the": part["series"] = "The " + series[:-5] # 2- try to merge similar information together and give it a higher confidence for int_part in ("year", "season", "episodeNumber"): merge_similar_guesses(parts, int_part, choose_int) for string_part in ( "title", "series", "container", "format", "releaseGroup", "website", "audioCodec", "videoCodec", "screenSize", "episodeFormat", ): merge_similar_guesses(parts, string_part, choose_string) result = merge_all(parts, append=["language", "subtitleLanguage", "other"]) # 3- some last minute post-processing if result["type"] == "episode" and "season" not in result and result.get("episodeFormat", "") == "Minisode": result["season"] = 0 log.debug("Final result: " + result.nice_string()) return result
def matched(self): # we need to make a copy here, as the merge functions work in place and # calling them on the match tree would modify it parts = copy.deepcopy(self.parts) # 1- start by doing some common preprocessing tasks # 1.1- ", the" at the end of a series title should be prepended to it for part in parts: if 'series' not in part: continue series = part['series'] lseries = series.lower() if lseries[-4:] == ',the': part['series'] = 'The ' + series[:-4] if lseries[-5:] == ', the': part['series'] = 'The ' + series[:-5] # 2- try to merge similar information together and give it a higher confidence for int_part in ('year', 'season', 'episodeNumber'): merge_similar_guesses(parts, int_part, choose_int) for string_part in ('title', 'series', 'container', 'format', 'releaseGroup', 'website', 'audioCodec', 'videoCodec', 'screenSize', 'episodeFormat'): merge_similar_guesses(parts, string_part, choose_string) result = merge_all(parts, append = ['language', 'subtitleLanguage', 'other']) # 3- some last minute post-processing if (result['type'] == 'episode' and 'season' not in result and result.get('episodeFormat', '') == 'Minisode'): result['season'] = 0 log.debug('Final result: ' + result.nice_string()) return result
def matched(self): # we need to make a copy here, as the merge functions work in place and # calling them on the match tree would modify it parts = [node.guess for node in self.match_tree.nodes() if node.guess] parts = copy.deepcopy(parts) # 1- try to merge similar information together and give it a higher # confidence for int_part in ('year', 'season', 'episodeNumber'): merge_similar_guesses(parts, int_part, choose_int) for string_part in ('title', 'series', 'container', 'format', 'releaseGroup', 'website', 'audioCodec', 'videoCodec', 'screenSize', 'episodeFormat'): merge_similar_guesses(parts, string_part, choose_string) result = merge_all(parts, append=['language', 'subtitleLanguage', 'other']) log.debug('Final result: ' + result.nice_string()) return result
def guess_file_info(filename, filetype, info=None): """info can contain the names of the various plugins, such as 'filename' to detect filename info, or 'hash_md5' to get the md5 hash of the file. >>> guess_file_info('tests/dummy.srt', 'autodetect', info = ['hash_md5', 'hash_sha1']) {'hash_md5': 'e781de9b94ba2753a8e2945b2c0a123d', 'hash_sha1': 'bfd18e2f4e5d59775c2bc14d80f56971891ed620'} """ result = [] hashers = [] # Force unicode as soon as possible filename = u(filename) if info is None: info = ['filename'] if isinstance(info, base_text_type): info = [info] for infotype in info: if infotype == 'filename': result.append(_guess_filename(filename, filetype)) elif infotype == 'hash_mpc': from guessit.hash_mpc import hash_file try: result.append(Guess({'hash_mpc': hash_file(filename)}, confidence=1.0)) except Exception as e: log.warning('Could not compute MPC-style hash because: %s' % e) elif infotype == 'hash_ed2k': from guessit.hash_ed2k import hash_file try: result.append(Guess({'hash_ed2k': hash_file(filename)}, confidence=1.0)) except Exception as e: log.warning('Could not compute ed2k hash because: %s' % e) elif infotype.startswith('hash_'): import hashlib hashname = infotype[5:] try: hasher = getattr(hashlib, hashname)() hashers.append((infotype, hasher)) except AttributeError: log.warning('Could not compute %s hash because it is not available from python\'s hashlib module' % hashname) else: log.warning('Invalid infotype: %s' % infotype) # do all the hashes now, but on a single pass if hashers: try: blocksize = 8192 hasherobjs = dict(hashers).values() with open(filename, 'rb') as f: chunk = f.read(blocksize) while chunk: for hasher in hasherobjs: hasher.update(chunk) chunk = f.read(blocksize) for infotype, hasher in hashers: result.append(Guess({infotype: hasher.hexdigest()}, confidence=1.0)) except Exception as e: log.warning('Could not compute hash because: %s' % e) result = merge_all(result) # last minute adjustments # if country is in the guessed properties, make it part of the filename if 'series' in result and 'country' in result: result['series'] += ' (%s)' % result['country'].alpha2.upper() return result
# do all the hashes now, but on a single pass if hashers: try: blocksize = 8192 hasherobjs = dict(hashers).values() with open(filename, 'rb') as f: for chunk in iter(lambda: f.read(blocksize), ''): for hasher in hasherobjs: hasher.update(chunk) for infotype, hasher in hashers: result.append(Guess({infotype: hasher.hexdigest()}, confidence=1.0)) except Exception, e: log.warning('Could not compute hash because: %s' % e) return merge_all(result) def guess_video_info(filename, info=None): return guess_file_info(filename, 'autodetect', info) def guess_movie_info(filename, info=None): return guess_file_info(filename, 'movie', info) def guess_episode_info(filename, info=None): return guess_file_info(filename, 'episode', info)
def guess_file_info(filename, info=None, options=None, **kwargs): """info can contain the names of the various plugins, such as 'filename' to detect filename info, or 'hash_md5' to get the md5 hash of the file. >>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt') >>> g = guess_file_info(testfile, info = ['hash_md5', 'hash_sha1']) >>> g['hash_md5'], g['hash_sha1'] ('64de6b5893cac24456c46a935ef9c359', 'a703fc0fa4518080505809bf562c6fc6f7b3c98c') """ info = info or 'filename' options = options or {} result = [] hashers = [] # Force unicode as soon as possible filename = u(filename) if isinstance(info, base_text_type): info = [info] for infotype in info: if infotype == 'filename': result.append(_guess_filename(filename, options, **kwargs)) elif infotype == 'hash_mpc': from guessit.hash_mpc import hash_file try: result.append(Guess({infotype: hash_file(filename)}, confidence=1.0)) except Exception as e: log.warning('Could not compute MPC-style hash because: %s' % e) elif infotype == 'hash_ed2k': from guessit.hash_ed2k import hash_file try: result.append(Guess({infotype: hash_file(filename)}, confidence=1.0)) except Exception as e: log.warning('Could not compute ed2k hash because: %s' % e) elif infotype.startswith('hash_'): import hashlib hashname = infotype[5:] try: hasher = getattr(hashlib, hashname)() hashers.append((infotype, hasher)) except AttributeError: log.warning('Could not compute %s hash because it is not available from python\'s hashlib module' % hashname) else: log.warning('Invalid infotype: %s' % infotype) # do all the hashes now, but on a single pass if hashers: try: blocksize = 8192 hasherobjs = dict(hashers).values() with open(filename, 'rb') as f: chunk = f.read(blocksize) while chunk: for hasher in hasherobjs: hasher.update(chunk) chunk = f.read(blocksize) for infotype, hasher in hashers: result.append(Guess({infotype: hasher.hexdigest()}, confidence=1.0)) except Exception as e: log.warning('Could not compute hash because: %s' % e) result = merge_all(result) return result
def guess_file_info(filename, info=None, options=None, **kwargs): """info can contain the names of the various plugins, such as 'filename' to detect filename info, or 'hash_md5' to get the md5 hash of the file. >>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt') >>> g = guess_file_info(testfile, info = ['hash_md5', 'hash_sha1']) >>> g['hash_md5'], g['hash_sha1'] ('64de6b5893cac24456c46a935ef9c359', 'a703fc0fa4518080505809bf562c6fc6f7b3c98c') """ info = info or 'filename' options = options or {} result = [] hashers = [] # Force unicode as soon as possible filename = u(filename) if isinstance(info, base_text_type): info = [info] for infotype in info: if infotype == 'filename': result.append(_guess_filename(filename, options, **kwargs)) elif infotype == 'hash_mpc': from guessit.hash_mpc import hash_file try: result.append( Guess({infotype: hash_file(filename)}, confidence=1.0)) except Exception as e: log.warning('Could not compute MPC-style hash because: %s' % e) elif infotype == 'hash_ed2k': from guessit.hash_ed2k import hash_file try: result.append( Guess({infotype: hash_file(filename)}, confidence=1.0)) except Exception as e: log.warning('Could not compute ed2k hash because: %s' % e) elif infotype.startswith('hash_'): import hashlib hashname = infotype[5:] try: hasher = getattr(hashlib, hashname)() hashers.append((infotype, hasher)) except AttributeError: log.warning( 'Could not compute %s hash because it is not available from python\'s hashlib module' % hashname) else: log.warning('Invalid infotype: %s' % infotype) # do all the hashes now, but on a single pass if hashers: try: blocksize = 8192 hasherobjs = dict(hashers).values() with open(filename, 'rb') as f: chunk = f.read(blocksize) while chunk: for hasher in hasherobjs: hasher.update(chunk) chunk = f.read(blocksize) for infotype, hasher in hashers: result.append( Guess({infotype: hasher.hexdigest()}, confidence=1.0)) except Exception as e: log.warning('Could not compute hash because: %s' % e) result = merge_all(result) return result
# do all the hashes now, but on a single pass if hashers: try: blocksize = 8192 hasherobjs = dict(hashers).values() with open(filename, 'rb') as f: for chunk in iter(lambda: f.read(blocksize), ''): for hasher in hasherobjs: hasher.update(chunk) for infotype, hasher in hashers: result.append( Guess({infotype: hasher.hexdigest()}, confidence=1.0)) except Exception, e: log.warning('Could not compute hash because: %s' % e) return merge_all(result) def guess_video_info(filename, info=None): return guess_file_info(filename, 'autodetect', info) def guess_movie_info(filename, info=None): return guess_file_info(filename, 'movie', info) def guess_episode_info(filename, info=None): return guess_file_info(filename, 'episode', info)