def get_torrent_type(): # Move to completed target dir if os.path.isdir(g_dl_dir + g_target) os.chdir(g_dl_dir + g_target) for root, dirs, files in os.walk(os.getcwd()): for fn in files: name, ext = os.path.splitext(fn) fullpath = os.path.join(root, fn) if ext in g_video_mimes: guess = guessit.guess_file_info(fullpath) vid_type = guess['type'] return vid_type elif ext in g_music_mimes: return 'music' else: name, ext = os.path.splitext(g_target) if ext in g_video_mimes: guess = guessit.guess_file_info(g_dl_dir + g_target) vid_type = guess['type'] return vid_type elif ext in g_music_mimes: return 'music' return 'unknown'
def parse(self, data, type_=None, name=None, **kwargs): type_ = self._type_map.get(type_) guessit_options = self._guessit_options(data, type_, name, **kwargs) if name and name != data: if not guessit_options.get('strict_name'): guessit_options['expected_series'] = [name] guess_result = None if kwargs.get('metainfo'): guess_result = guessit.guess_file_info(data, options=guessit_options, type=None) else: guess_result = guessit.guess_file_info(data, options=guessit_options, type=type_) return self.build_parsed(guess_result, data, type_=type_, name=(name if name != data else None), **kwargs)
def Video(torrent, args): global settings res = guess_file_info(torrent.name) args['chain'].append("Video") if not ('mimetype' in res.keys() and res['mimetype'].startswith('video')): files = torrent.files() for fl in files: res2 = guess_file_info(files[fl]['name']) if not ('mimetype' in res2.keys() and res2['mimetype'].startswith('video')): return return SeriesFunc(torrent, args) or MovieFunc(torrent, args)
def deep_scan_nfo(filename, ratio=deep_scan_ratio): if verbose: print('Deep scanning satellite: %s (ratio=%.2f)' % (filename, ratio)) best_guess = None best_ratio = 0.00 try: nfo = open(filename) # Convert file content into iterable words for word in ''.join([item for item in nfo.readlines()]).split(): try: guess = guessit.guess_file_info(word + '.nfo', info=['filename']) # Series = TV, Title = Movie if any(item in guess for item in ('title')): # Compare word against NZB name diff = difflib.SequenceMatcher(None, word, nzb_name) # Evaluate ratio against threshold and previous matches if verbose: print('Tested: %s (ratio=%.2f)' % (word, diff.ratio())) if diff.ratio() >= ratio and diff.ratio() > best_ratio: if verbose: print('Possible match found: %s (ratio=%.2f)' % (word, diff.ratio())) best_guess = guess best_ratio = diff.ratio() except UnicodeDecodeError: # Ignore non-unicode words (common in nfo "artwork") pass nfo.close() except IOError as e: print('[ERROR] %s' % str(e)) return best_guess
def list(self, filepath, languages): if not self.config_dict['cache_dir']: raise Exception('Cache directory is required for this plugin') possible_languages = self.possible_languages(languages) if not possible_languages: return [] guess = guessit.guess_file_info(filepath, 'autodetect') if guess['type'] != 'episode': self.logger.debug(u'Not an episode') return [] # add multiple things to the release group set release_group = set() if 'releaseGroup' in guess: release_group.add(guess['releaseGroup'].lower()) else: if 'title' in guess: release_group.add(guess['title'].lower()) if 'screenSize' in guess: release_group.add(guess['screenSize'].lower()) if 'series' not in guess or len(release_group) == 0: self.logger.debug(u'Not enough information to proceed') return [] self.release_group = release_group # used to sort results return self.query(guess['series'], guess['season'], guess['episodeNumber'], release_group, filepath, possible_languages)
def obtain_guessit_query(movie_filename, language): guess = guessit.guess_file_info(os.path.basename(movie_filename), info=['filename']) def extract_query(guess, parts): result = ['"%s"' % guess.get(k) for k in parts if guess.get(k)] return ' '.join(result) result = {} if guess.get('type') == 'episode': result['query'] = extract_query(guess, ['series', 'title', 'releaseGroup']) if 'season' in guess: result['season'] = guess['season'] if 'episodeNumber' in guess: result['episode'] = guess['episodeNumber'] elif guess.get('type') == 'movie': result['query'] = extract_query(guess, ['title', 'year']) else: # pragma: no cover assert False, 'internal error: guessit guess: {0}'.format(guess) result['sublanguageid'] = language return result
def guess_file(filename, info='filename', options=None, **kwargs): options = options or {} filename = u(filename) print('For:', filename) guess = guess_file_info(filename, info, options, **kwargs) if options.get('yaml'): try: import yaml for k, v in guess.items(): if isinstance(v, list) and len(v) == 1: guess[k] = v[0] ystr = yaml.safe_dump({filename: dict(guess)}, default_flow_style=False) i = 0 for yline in ystr.splitlines(): if i == 0: print("? " + yline[:-1]) elif i == 1: print(":" + yline[1:]) else: print(yline) i = i + 1 return except ImportError: # pragma: no cover print('PyYAML not found. Using default output.') print('GuessIt found:', guess.nice_string(options.get('advanced')))
def guess_file(filename, info='filename', options=None, **kwargs): options = options or {} filename = u(filename) if not options.get('yaml') and not options.get('show_property'): print('For:', filename) guess = guess_file_info(filename, info, options, **kwargs) if not options.get('unidentified'): try: del guess['unidentified'] except KeyError: pass if options.get('show_property'): print(guess.get(options.get('show_property'), '')) return if options.get('yaml'): import yaml for k, v in guess.items(): if isinstance(v, list) and len(v) == 1: guess[k] = v[0] ystr = yaml.safe_dump({filename: dict(guess)}, default_flow_style=False, allow_unicode=True) i = 0 for yline in ystr.splitlines(): if i == 0: print("? " + yline[:-1]) elif i == 1: print(":" + yline[1:]) else: print(yline) i += 1 return print('GuessIt found:', guess.nice_string(options.get('advanced')))
def from_path(cls, path): """Create a :class:`Video` subclass guessing all informations from the given path :param string path: path :return: video object :rtype: :class:`Episode` or :class:`Movie` or :class:`UnknownVideo` """ guess = guessit.guess_file_info(path, "autodetect") result = None if guess["type"] == "episode" and "series" in guess and "season" in guess and "episodeNumber" in guess: title = None if "title" in guess: title = guess["title"] result = Episode(path, guess["series"], guess["season"], guess["episodeNumber"], title, guess) if guess["type"] == "movie" and "title" in guess: year = None if "year" in guess: year = guess["year"] result = Movie(path, guess["title"], year, guess) if not result: result = UnknownVideo(path, guess) if not isinstance(result, cls): raise ValueError("Video is not of requested type") return result
def from_path(cls, path): """Create a :class:`Video` subclass guessing all informations from the given path :param string path: path :return: video object :rtype: :class:`Episode` or :class:`Movie` or :class:`UnknownVideo` """ guess = guessit.guess_file_info(path, 'autodetect') result = None if guess['type'] == 'episode' and 'series' in guess and 'season' in guess and 'episodeNumber' in guess: title = None if 'title' in guess: title = guess['title'] result = Episode(path, guess['series'], guess['season'], guess['episodeNumber'], title, guess) if guess['type'] == 'movie' and 'title' in guess: year = None if 'year' in guess: year = guess['year'] result = Movie(path, guess['title'], year, guess) if not result: result = UnknownVideo(path, guess) if not isinstance(result, cls): raise ValueError('Video is not of requested type') return result
def parse_series(self, data, **kwargs): guessit_options = self._guessit_options(kwargs) if kwargs.get('name') and not guessit_options.get('strict_name'): guessit_options['expected_series'] = [kwargs['name']] # If no series name is provided, we don't tell guessit what kind of match we are looking for # This prevents guessit from determining that too general of matches # are series #parse_type = 'episode' if kwargs.get('name') else None parse_type = 'episode' guess_result = guessit.guess_file_info( data, options=guessit_options, type=parse_type ) #if guess_result.get('type') != 'episode': ## TODO: All other failures return an invalid parser. This is just a ## hack to match. Maybe exception instead? #class InvalidParser(object): #valid = False #return InvalidParser() parsed = GuessitParsedSerie( data, kwargs.pop( 'name', None), guess_result, **kwargs) # Passed in quality overrides parsed one if kwargs.get('quality'): parsed._old_quality = kwargs['quality'] return parsed
def from_path(cls, path): """Create a :class:`Video` subclass guessing all informations from the given path :param string path: path :return: video object :rtype: :class:`Episode` or :class:`Movie` or :class:`UnknownVideo` """ guess = guessit.guess_file_info(path, 'autodetect') result = None if guess[ 'type'] == 'episode' and 'series' in guess and 'season' in guess and 'episodeNumber' in guess: title = None if 'title' in guess: title = guess['title'] result = Episode(path, guess['series'], guess['season'], guess['episodeNumber'], title, guess) if guess['type'] == 'movie' and 'title' in guess: year = None if 'year' in guess: year = guess['year'] result = Movie(path, guess['title'], year, guess) if not result: result = UnknownVideo(path, guess) if not isinstance(result, cls): raise ValueError('Video is not of requested type') return result
def query(self, filepath, languages, title, season=None, episode=None, year=None, keywords=None): params = {'sXML': 1, 'sK': title, 'sJ': ','.join([str(self.get_code(l)) for l in languages])} if season is not None: params['sTS'] = season if episode is not None: params['sTE'] = episode if year is not None: params['sY'] = year if keywords is not None: params['sR'] = keywords r = self.session.get(self.server_url + '/ppodnapisi/search', params=params) if r.status_code != 200: logger.error(u'Request %s returned status code %d' % (r.url, r.status_code)) return [] subtitles = [] soup = BeautifulSoup(r.content, self.required_features) for sub in soup('subtitle'): if 'n' in sub.flags: logger.debug(u'Skipping hearing impaired') continue language = l confidence = float(sub.rating.text) / 5.0 sub_keywords = set() for release in sub.release.text.split(): sub_keywords |= get_keywords(guessit.guess_file_info(release + '.srt', 'autodetect')) sub_path = get_subtitle_path(filepath, language, self.config.multi) subtitle = ResultSubtitle(sub_path, language, self.__class__.__name__.lower(), sub.url.text, confidence=confidence, keywords=sub_keywords) subtitles.append(subtitle) return subtitles
def GetVidFileData(dir, file, configData): fileDir = dir fileName = file # 3rd party function to guess according to the file name if movie or tv show fileInfo = guess_file_info(fileName) if None == fileInfo: return None # According to the guessed type we create the object vidType = fileInfo.get('type', '') vidFileData = None if 'episode' == vidType: # Get Episode information series = fileInfo.get('series', '') season = fileInfo.get('season', '') episodeNumber = fileInfo.get('episodeNumber', '') # Get episode suffix format = fileInfo.get('format', '') basename = os.path.splitext(fileName)[0] suffix = basename[basename.lower().find(format.lower()) - 1:] # Create Episode Data Object vidFileData = EpisodeData(configData, series, season, episodeNumber, fileDir, fileName, suffix) elif 'movie' == vidType: # Get Movie information title = fileInfo.get('title', '') year = fileInfo.get('year', '') # Creeate Movie Data Object vidFileData = MovieData(configData, title, year, fileDir, fileName) return vidFileData
def handle_file(self, parent_folder, item): item = self.format_file_name(item) file_details = guessit.guess_file_info(item) # Force type for strange filenames (Mini-series without seasons, etc.) if "episode" in str(item).lower(): file_details['type'] = 'episode' if 'container' in file_details: if file_details['type'] == 'movie': details = guessit.guess_movie_info(parent_folder+item) details.update({ 'file_dir': "%s/" % parent_folder, 'file_name': item }) file_object_storage.append(Movie(**details)) elif file_details['type'] == 'episode': details = guessit.guess_episode_info(parent_folder+item) details.update({ 'file_dir': "%s/" % parent_folder, 'file_name': item }) if len(file_location_storage) > 0: temp_file_location_storage = file_location_storage.copy() for i in temp_file_location_storage: if i is details['file_dir']: file_location_storage[i].append( details['file_name'] ) else: file_location_storage[details['file_dir']] = [details['file_name']] else: file_location_storage[details['file_dir']] = [details['file_name']] file_object_storage.append(Episode(**details))
def guess_file(filename, info='filename', options=None, **kwargs): options = options or {} filename = u(filename) guess = guess_file_info(filename, info, options, **kwargs) if options.get('show_property'): print (guess[options.get('show_property')]) return print('For:', filename) if options.get('yaml'): try: import yaml for k, v in guess.items(): if isinstance(v, list) and len(v) == 1: guess[k] = v[0] ystr = yaml.safe_dump({filename: dict(guess)}, default_flow_style=False) i = 0 for yline in ystr.splitlines(): if i == 0: print("? " + yline[:-1]) elif i == 1: print(":" + yline[1:]) else: print(yline) i = i + 1 return except ImportError: # pragma: no cover print('PyYAML not found. Using default output.') print('GuessIt found:', guess.nice_string(options.get('advanced')))
def deep_scan_nfo(filename, ratio=deep_scan_ratio): if verbose: print('Deep scanning satellite: %s (ratio=%.2f)' % (filename, ratio)) best_guess = None best_ratio = 0.00 try: nfo = open(os.path.join(root, filename)) # Convert file content into iterable words for word in ''.join([item for item in nfo.readlines()]).split(): try: guess = guessit.guess_file_info(word + '.nfo', info=['filename']) # Series = TV, Title = Movie if any(item in guess for item in ('series', 'title')): # Compare word against NZB name diff = difflib.SequenceMatcher(None, word, nzb_name) # Evaluate ratio against threshold and previous matches if verbose: print('Tested: %s (ratio=%.2f)' % (word, diff.ratio())) if diff.ratio() >= ratio and diff.ratio() > best_ratio: if verbose: print('Possible match found: %s (ratio=%.2f)' % (word, diff.ratio())) best_guess = guess best_ratio = diff.ratio() except UnicodeDecodeError: # Ignore non-unicode words (common in nfo "artwork") pass nfo.close() except IOError as e: print('[ERROR] %s' % str(e)) return best_guess
def sort_file(filepath, move): filename = os.path.basename(filepath) guess = guessit.guess_file_info(filename) if not guess.get("type", "") in ["episode", "episodesubtitle"]: return if not guess.get("mimetype", "").startswith("video"): return season = str(guess["season"]).zfill(2) name = format_show_name(guess["series"]).strip() dst_path = os.path.join(tvroot, name, "Season " + season) try: os.makedirs(dst_path) except OSError as ex: if ex.errno == 17: pass else: raise if move: logging.info("mv: {0} -> {1}".format(filepath, dst_path)) shutil.move(filepath, dst_path) else: # short-circuit if file is found if filename in os.listdir(dst_path): raise ValueError("File exists (not overwriting): " + filename) logging.info("cp: {0} -> {1}".format(filepath, dst_path)) shutil.copy2(filepath, dst_path)
def recursive(path: Path): for element in path.iterdir(): if element.is_dir(): recursive(element) if element.suffix in ALL_TYPES['video']: metadata = guess_file_info(filename=element.name) if metadata['type'] == 'movie': if metadata.get('year'): operations.append((element, self.destination_folders['movies'] / Path( "{} - {}{}".format(metadata['title'], metadata.get('year'), element.suffix)))) else: operations.append((element, self.destination_folders['movies'] / Path( "{}.{}".format(metadata['title'], element.suffix)))) elif metadata['type'] == 'episode': if not metadata.get('season'): # Assume it's an anime operations.append((element, self.destination_folders['tv'] / Path(metadata['series']) / Path( "{} - {:02d}{}".format(metadata['series'], metadata['episodeNumber'], element.suffix) ))) else: # Assume it's a TV Show: operations.append((element, self.destination_folders['tv'] / Path(metadata['series']) / Path('Season {:02d}'.format(metadata['season'])) / Path( "{} S{:02d}E{:02d}{}".format(metadata['series'], metadata['episodeNumber'], metadata['season'], element.suffix) ))) else: # TODO: More file cases for moving and renaming continue
def matching_confidence(video, subtitle): '''Compute the confidence that the subtitle matches the video. Returns a float between 0 and 1. 1 being the perfect match.''' guess = guessit.guess_file_info(subtitle.release, 'autodetect') video_keywords = utils.get_keywords(video.guess) subtitle_keywords = utils.get_keywords(guess) | subtitle.keywords replacement = {'keywords': len(video_keywords & subtitle_keywords)} if isinstance(video, videos.Episode): replacement.update({'series': 0, 'season': 0, 'episode': 0}) matching_format = '{series:b}{season:b}{episode:b}{keywords:03b}' best = matching_format.format(series=1, season=1, episode=1, keywords=len(video_keywords)) if guess['type'] in ['episode', 'episodesubtitle']: if 'series' in guess and guess['series'].lower() == video.series.lower(): replacement['series'] = 1 if 'season' in guess and guess['season'] == video.season: replacement['season'] = 1 if 'episodeNumber' in guess and guess['episodeNumber'] == video.episode: replacement['episode'] = 1 elif isinstance(video, videos.Movie): replacement.update({'title': 0, 'year': 0}) matching_format = '{title:b}{year:b}{keywords:03b}' best = matching_format.format(title=1, year=1, keywords=len(video_keywords)) if guess['type'] in ['movie', 'moviesubtitle']: if 'title' in guess and guess['title'].lower() == video.title.lower(): replacement['title'] = 1 if 'year' in guess and guess['year'] == video.year: replacement['year'] = 1 else: return 0 confidence = float(int(matching_format.format(**replacement), 2)) / float(int(best, 2)) return confidence
def query(self, filepath, languages, title, season=None, episode=None, year=None, keywords=None): params = {'sXML': 1, 'sK': title, 'sJ': ','.join([str(self.get_code(l)) for l in languages])} if season is not None: params['sTS'] = season if episode is not None: params['sTE'] = episode if year is not None: params['sY'] = year if keywords is not None: params['sR'] = keywords r = self.session.get(self.server_url + '/ppodnapisi/search', params=params) if r.status_code != 200: logger.error(u'Request %s returned status code %d' % (r.url, r.status_code)) if sys.platform != 'win32' else logger.debug('Log line suppressed on windows') return [] subtitles = [] soup = BeautifulSoup(r.content, self.required_features) for sub in soup('subtitle'): if 'n' in sub.flags: logger.debug(u'Skipping hearing impaired') if sys.platform != 'win32' else logger.debug('Log line suppressed on windows') continue language = self.get_language(sub.languageId.text) confidence = float(sub.rating.text) / 5.0 sub_keywords = set() for release in sub.release.text.split(): sub_keywords |= get_keywords(guessit.guess_file_info(release + '.srt', 'autodetect')) sub_path = get_subtitle_path(filepath, language, self.config.multi) subtitle = ResultSubtitle(sub_path, language, self.__class__.__name__.lower(), sub.url.text, confidence=confidence, keywords=sub_keywords) subtitles.append(subtitle) return subtitles
def fromname(cls, name): """Shortcut for :meth:`fromguess` with a `guess` guessed from the `name`. :param str name: name of the video. """ return cls.fromguess(name, guess_file_info(name))
def matching_confidence(video, subtitle): """Compute the confidence that the subtitle matches the video. Returns a float between 0 and 1. 1 being the perfect match.""" guess = guessit.guess_file_info(subtitle.release, "autodetect") video_keywords = utils.get_keywords(video.guess) subtitle_keywords = utils.get_keywords(guess) | subtitle.keywords replacement = {"keywords": len(video_keywords & subtitle_keywords)} if isinstance(video, videos.Episode): replacement.update({"series": 0, "season": 0, "episode": 0}) matching_format = "{series:b}{season:b}{episode:b}{keywords:03b}" best = matching_format.format(series=1, season=1, episode=1, keywords=len(video_keywords)) if guess["type"] in ["episode", "episodesubtitle"]: if "series" in guess and guess["series"].lower() == video.series.lower(): replacement["series"] = 1 if "season" in guess and guess["season"] == video.season: replacement["season"] = 1 if "episodeNumber" in guess and guess["episodeNumber"] == video.episode: replacement["episode"] = 1 elif isinstance(video, videos.Movie): replacement.update({"title": 0, "year": 0}) matching_format = "{title:b}{year:b}{keywords:03b}" best = matching_format.format(title=1, year=1, keywords=len(video_keywords)) if guess["type"] in ["movie", "moviesubtitle"]: if "title" in guess and guess["title"].lower() == video.title.lower(): replacement["title"] = 1 if "year" in guess and guess["year"] == video.year: replacement["year"] = 1 else: return 0 confidence = float(int(matching_format.format(**replacement), 2)) / float(int(best, 2)) return confidence
def get_metadata(self, __episodes): for key in __episodes: guessit_result = guessit.guess_file_info(key) __episodes[key].name = guessit_result.get('series', '') __episodes[key].season_nr = guessit_result.get('season', '') __episodes[key].episode_nr = guessit_result.get('episodeNumber', '') __episodes[key].episode_title = guessit_result.get('title', '') return __episodes
def guess_filename(filename): try: import guessit return guessit.guess_file_info(filename) except Exception as e: name, extension = os.path.splitext(filename) log_error(e) return {'title' : name}
def guess_filename(filename): try: import guessit return guessit.guess_file_info(filename) except Exception as e: name, extension = os.path.splitext(filename) log_error(e) return {'title': name}
def parse(bot, trigger): release_name = trigger.group(2).strip() attrs = guess_file_info(release_name.lower(), options={'name_only': True}) items = [EntityGroup([Entity("RlsParse")])] for k, v in attrs.items(): items.append(EntityGroup([Entity(k, v)])) bot.say("{}".format(render(items=items)))
def query(self, filepath, languages, title, season=None, episode=None, year=None, keywords=None): myDB = db.DBConnection() myDBcache = db.DBConnection("cache.db") sql_show_id = myDB.select("SELECT tvdb_id, show_name FROM tv_shows WHERE show_name LIKE ?", ["%" + title + "%"]) if sql_show_id[0][0]: sql_scene = myDB.select( "SELECT scene_season, scene_episode FROM tv_episodes WHERE showid = ? and season = ? and episode = ?", [sql_show_id[0][0], season, episode], ) real_name = sql_show_id[0][1] if sql_scene[0][0]: season = sql_scene[0][0] episode = sql_scene[0][1] sql_custom_names = myDBcache.select( "SELECT show_name FROM scene_exceptions WHERE tvdb_id = ? and show_name<> ? ORDER BY exception_id asc", [sql_show_id[0][0], real_name], ) if sql_custom_names: title = sql_custom_names[0][0] glog.log( u"Searching Subtitles on Podnapisiweb with title : %s season : %s episode : %s" % (title, season, episode) ) params = {"sXML": 1, "sK": title, "sJ": ",".join([str(self.get_code(l)) for l in languages])} if season is not None: params["sTS"] = season if episode is not None: params["sTE"] = episode if year is not None: params["sY"] = year if keywords is not None: params["sR"] = keywords r = self.session.get(self.server_url + "/ppodnapisi/search", params=params) if r.status_code != 200: logger.error(u"Request %s returned status code %d" % (r.url, r.status_code)) return [] subtitles = [] soup = BeautifulSoup(r.content, self.required_features) for sub in soup("subtitle"): if "n" in sub.flags: logger.debug(u"Skipping hearing impaired") continue language = l confidence = float(sub.rating.text) / 5.0 sub_keywords = set() for release in sub.release.text.split(): sub_keywords |= get_keywords(guessit.guess_file_info(release + ".srt", "autodetect")) sub_path = get_subtitle_path(filepath, language, self.config.multi) subtitle = ResultSubtitle( sub_path, language, self.__class__.__name__.lower(), sub.url.text, confidence=confidence, keywords=sub_keywords, ) subtitles.append(subtitle) return subtitles
def guess(filename): guess = guess_file_info(filename) # Country is not unicode, it makes the jsonify crash... # I don't need it anyway so let's remove it if 'country' in guess: del guess['country'] return jsonify(guess)
def hash(self): """ Compute the MPC hash of a movie. """ if self._hash is None: self._hash = guessit.guess_file_info( self.path, 'hash_mpc')['hash_mpc'] return self._hash
def factory(cls, release): #TODO: Work with lowercase """Create a Subtitle object guessing all informations from the given subtitle release filename""" guess = guessit.guess_file_info(release, 'autodetect') keywords = set() for k in ['releaseGroup', 'screenSize', 'videoCodec', 'format', 'container']: if k in guess: keywords = keywords | splitKeyword(guess[k]) return Subtitle(release=release, keywords=keywords)
def fromFileName(cls, fileName_, tvdb_ = None) : guess = guessit.guess_file_info(fileName_) try: if guess['type'] == 'episode' : return EpisodeInfo.fromFileName(guess, fileName_, tvdb_) elif guess['type'] != 'movie' : raise ValueError('The file name must refer to an episode or a movie') except: raise
def parse_movie(self, data, **kwargs): log.debug('Parsing movie: `%s` [options: %s]', data, kwargs) start = time.clock() guessit_options = self._guessit_options(kwargs) guess_result = guessit.guess_file_info(data, options=guessit_options, type='movie') parsed = GuessitParsedMovie(data, kwargs.pop('name', None), guess_result, **kwargs) end = time.clock() log.debug('Parsing result: %s (in %s ms)', parsed, (end - start) * 1000) return parsed
def get_metadata(self, __episodes): for key in __episodes: guessit_result = guessit.guess_file_info(key) __episodes[key].name = guessit_result.get('series', '') __episodes[key].season_nr = guessit_result.get('season', '') __episodes[key].episode_nr = guessit_result.get( 'episodeNumber', '') __episodes[key].episode_title = guessit_result.get('title', '') return __episodes
def processFile(path): _, ext = os.path.splitext(path) if ext[1:] not in settings['types']['video']['extensions']: return end = path[len(root_folder):] info = guessit.guess_file_info(end) if info['type'] == 'movie': processMovie(path, info) elif info['type'] == 'episode': processSeries(path, info)
def get_movie_info(name): """Find movie information""" movie_info = guess_file_info(name) if movie_info['type'] == "movie": if 'year' in movie_info: return omdb(movie_info['title'], movie_info['year']) else: return omdb(movie_info['title'], None) else: not_a_movie.append(name)
def guess_details(self, path): show_mapper = {"Scandal (US)":"Scandal (2012)"} result = {} guess = guess_file_info(path, info='filename') if guess['type'] == "episode": try: show = guess["series"] if show in show_mapper: show = show_mapper[show] result["type"] = "tv" result["show"] = show result["season"] = guess["season"] if guess.has_key("year"): result["year"] = guess["year"] if guess.has_key("episodeList"): result["double_episode"] = 1 result["episode"] = guess["episodeList"][0] else: result["double_episode"] = 0 result["episode"] = guess["episodeNumber"] print result t = tvdb_api.Tvdb() if t[result["show"]][result["season"]][result["episode"]]["episodename"] is None: return [] else: return result except: return [] elif guess['type'] == "movie": try: result["type"] = "movie" result["name"] = guess["title"] result["year"] = guess["year"] search = tmdb.Search() search.movie(query=result["name"]) for s in search.results: if 'release_date' in s: if int(s['release_date'][0:4]) == int(result["year"]): return result return [] except: print traceback.format_exc() return [traceback.format_exc()] return []
def subliminal(): parser = subliminal_parser() args = parser.parse_args() # parse paths try: args.paths = [p.decode('utf-8') for p in args.paths] except UnicodeDecodeError: parser.error('argument paths: encodings is not utf-8: %r' % args.paths) # parse languages try: args.languages = {babelfish.Language.fromalpha2(l) for l in args.languages} except babelfish.Error: parser.error('argument -l/--languages: codes are not ISO-639-1: %r' % args.languages) # parse age if args.age is not None: match = re.match(r'^(?:(?P<weeks>\d+?)w)?(?:(?P<days>\d+?)d)?(?:(?P<hours>\d+?)h)?$', args.age) if not match: parser.error('argument -a/--age: invalid age: %r' % args.age) args.age = datetime.timedelta(**match.groupdict()) # setup verbosity if args.verbose: logging.basicConfig(level=logging.DEBUG) elif not args.quiet: logging.basicConfig(level=logging.WARN) # configure cache cache_region.configure('dogpile.cache.dbm', arguments={'filename': os.path.expanduser(args.cache_file)}) # scan videos videos = scan_videos([p for p in args.paths if os.path.exists(p)], subtitles=not args.force, age=args.age) # guess videos videos.extend([Video.fromguess(os.path.split(p)[1], guessit.guess_file_info(p, 'autodetect')) for p in args.paths if not os.path.exists(p)]) # download best subtitles subtitles = download_best_subtitles(videos, args.languages, providers=args.providers, provider_configs=None, single=args.single, min_score=args.min_score, hearing_impaired=args.hearing_impaired) # output result if not subtitles: if not args.quiet: sys.stderr.write('No subtitles downloaded\n') exit(1) if not args.quiet: subtitles_count = sum([len(s) for s in subtitles.values()]) if subtitles_count == 1: print('%d subtitle downloaded' % subtitles_count) else: print('%d subtitles downloaded' % subtitles_count)
def query(self, name, season, episode, release_group, filepath, languages): sublinks = [] # get the show id show_name = name.lower() if show_name in self.exceptions: # get it from exceptions show_id = self.exceptions[show_name] elif show_name in self.showids: # get it from cache show_id = self.showids[show_name] else: # retrieve it show_name_encoded = show_name if isinstance(show_name_encoded, unicode): show_name_encoded = show_name_encoded.encode('utf-8') show_id_url = '%sGetShowByName/%s' % (self.server_url, urllib2.quote(show_name_encoded)) self.logger.debug(u'Retrieving show id from web at %s' % show_id_url) page = urllib2.urlopen(show_id_url) dom = minidom.parse(page) if not dom or len(dom.getElementsByTagName('showid')) == 0: # no proper result page.close() return [] show_id = dom.getElementsByTagName('showid')[0].firstChild.data self.showids[show_name] = show_id with self.lock: f = open(self.showid_cache, 'w') self.logger.debug(u'Writing showid %s to cache file' % show_id) pickle.dump(self.showids, f) f.close() page.close() # get the subs for the show id we have for language in languages: subs_url = '%sGetAllSubsFor/%s/%s/%s/%s' % (self.server_url, show_id, season, episode, language) self.logger.debug(u'Getting subtitles at %s' % subs_url) page = urllib2.urlopen(subs_url) dom = minidom.parse(page) page.close() for sub in dom.getElementsByTagName('result'): sub_release = sub.getElementsByTagName('filename')[0].firstChild.data if sub_release.endswith('.srt'): sub_release = sub_release[:-4] sub_release = sub_release + '.avi' # put a random extension for guessit not to fail guessing that file # guess information from subtitle sub_guess = guessit.guess_file_info(sub_release, 'episode') sub_release_group = set() if 'releaseGroup' in sub_guess: sub_release_group.add(sub_guess['releaseGroup'].lower()) else: if 'title' in sub_guess: sub_release_group.add(sub_guess['title'].lower()) if 'screenSize' in sub_guess: sub_release_group.add(sub_guess['screenSize'].lower()) sub_link = sub.getElementsByTagName('downloadlink')[0].firstChild.data result = Subtitle(filepath, self.getSubtitlePath(filepath, language), self.__class__.__name__, language, sub_link, sub_release, sub_release_group) sublinks.append(result) sublinks.sort(self._cmpReleaseGroup) return sublinks
def matching_confidence(video, subtitle): """Compute the probability (confidence) that the subtitle matches the video :param video: video to match :type video: :class:`~subliminal.videos.Video` :param subtitle: subtitle to match :type subtitle: :class:`~subliminal.subtitles.Subtitle` :return: the matching probability :rtype: float """ guess = guessit.guess_file_info(subtitle.release, 'autodetect') video_keywords = get_keywords(video.guess) subtitle_keywords = get_keywords(guess) | subtitle.keywords logger.debug(u'Video keywords %r - Subtitle keywords %r' % (video_keywords, subtitle_keywords)) replacement = {'keywords': len(video_keywords & subtitle_keywords)} if isinstance(video, Episode): replacement.update({'series': 0, 'season': 0, 'episode': 0}) matching_format = '{series:b}{season:b}{episode:b}{keywords:03b}' best = matching_format.format(series=1, season=1, episode=1, keywords=len(video_keywords)) if guess['type'] in ['episode', 'episodesubtitle']: if 'series' in guess and guess['series'].lower( ) == video.series.lower(): replacement['series'] = 1 if 'season' in guess and guess['season'] == video.season: replacement['season'] = 1 if 'episodeNumber' in guess and guess[ 'episodeNumber'] == video.episode: replacement['episode'] = 1 elif isinstance(video, Movie): replacement.update({'title': 0, 'year': 0}) matching_format = '{title:b}{year:b}{keywords:03b}' best = matching_format.format(title=1, year=1, keywords=len(video_keywords)) if guess['type'] in ['movie', 'moviesubtitle']: if 'title' in guess and guess['title'].lower( ) == video.title.lower(): replacement['title'] = 1 if 'year' in guess and guess['year'] == video.year: replacement['year'] = 1 else: logger.debug(u'Not able to compute confidence for %r' % video) return 0.0 logger.debug(u'Found %r' % replacement) confidence = float(int(matching_format.format(**replacement), 2)) / float( int(best, 2)) logger.info(u'Computed confidence %.4f for %r and %r' % (confidence, video, subtitle)) return confidence
def get_matches(self, video, hearing_impaired=False): matches = super(LegendasTvSubtitle, self).get_matches(video, hearing_impaired=hearing_impaired) # The best available information about a subtitle is its name. Using guessit to parse it. guess = self.guess if self.guess else guess_file_info(self.name + '.mkv', type=self.type) matches |= guess_matches(video, guess) # imdb_id match used only for movies if self.type == 'movie' and video.imdb_id and self.imdb_id == video.imdb_id: matches.add('imdb_id') return matches
def factory(cls, release): #TODO: Work with lowercase """Create a Subtitle object guessing all informations from the given subtitle release filename""" guess = guessit.guess_file_info(release, 'autodetect') keywords = set() for k in [ 'releaseGroup', 'screenSize', 'videoCodec', 'format', 'container' ]: if k in guess: keywords = keywords | splitKeyword(guess[k]) return Subtitle(release=release, keywords=keywords)
def lights_camera_action(): # assumes there is at most one directory target_full_path = g_dl_dir + "/" + g_target guess = [] # is a directory if os.path.isdir(target_full_path): for fn in os.listdir('.'): name, ext = os.path.splitext(fn) if ext in g_video_mimes: guess = guessit.guess_file_info(g_dl_dir + "/" + fn) break mv_title = guess['title'] mv_year = guess['year'] if mv_year: mv_link_name = g_movie_dir + mv_title + " (" + mv_year + ")" else: mv_link_name = g_movie_dir + mv_title os.symlink(target_full_path, mv_link_name) # was originally just a single video file else: guess = guessit.guess_file_info(target_full_path) mv_title = guess['title'] mv_year = guess['year'] if mv_year: mv_link_name = g_movie_dir + mv_title + " (" + mv_year + ")" else: mv_link_name = g_movie_dir + mv_title os.mkdir(mv_link_name) shutil.copy(target_full_path, mv_link_name)
def guessInfo(fileName, tvdbid=None): if not settings.fullpathguess: fileName = os.path.basename(fileName) guess = guessit.guess_file_info(fileName) try: if guess['type'] == 'movie': return tmdbInfo(guess) elif guess['type'] == 'episode': return tvdbInfo(guess, tvdbid) else: return None except Exception as e: print(e) return None
def get_path_video(filename): guess = guessit.guess_file_info(filename) if guess[u'type'] == u'episode': series = guess.get(u'series', u'').title() season = guess.get(u'season', u'') return config.TV_PATH.format(series=series, season=season) elif guess[u'type'] == u'movie': title = guess.get(u'title', u'').title() year = guess.get(u'year', u'') return config.MOVIE_PATH.format(title=title, year=year) else: return None
def guess_file_info_get(): """ @api {get} /guess Detect properties for a given filename @apiName GuessFileInfoGet @apiGroup Guess @apiParam {String} filename Filename out of which to guess information. @apiParam {String} * Other fields you pass will be forwarded as options to the guesser. @apiSuccess {Object}   Object containing all detected fields. For a list of detected properties see <a href="https://guessit.readthedocs.org/en/latest/#features">here</a> @apiExample Example usage: curl "http://guessit.io/guess?filename=House.of.Cards.2013.S02E03.1080p.NF.WEBRip.DD5.1.x264-NTb.mkv" @apiSuccessExample Success-Response: HTTP/1.1 200 OK { "audioChannels": "5.1", "audioCodec": "DolbyDigital", "container": "mkv", "episodeNumber": 3, "format": "WEBRip", "mimetype": "video/x-matroska", "releaseGroup": "NTb", "screenSize": "1080p", "season": 2, "series": "House of Cards", "title": "NF", "type": "episode", "videoCodec": "h264", "year": 2013 } """ args = parse_options_dict(request.args) filename = args.pop('filename') filetype = args.pop('type', None) options = args log.info('[GET] Guess request: %s -- options: %s' % (filename, options)) # TODO: store request in DB # TODO: if exception, store in list of bugs g = guessit.guess_file_info(filename, type=filetype, options=options) return jsonify(g)
def get_movie_info(path): """Find movie information from a `path` to file.""" # I've added this string to files that don't exist on OMDB if 'omdb' in path: return None # Use the guessit module to find details of a movie from name file = guess_file_info(os.path.basename(path)) # BUG: Use some heuristics here too? if 'title' not in file: return None if not file['title']: return None # Use omdb to find ratings, genre etc. from title and year data, url = omdb(file['title'], file.get('year')) # Use the longest word as a title if not data: logger.warning('\033[35m' + "OMDB 404 - %s. Retrying with longest word!" % url + '\033[0m') data, url = omdb(max(file['title'].split(), key=len), file.get('year')) # Use the first word as title if not data: logger.warning('\033[35m' + "OMDB 404 - %s. Retrying with first word!" % url + '\033[0m') data, url = omdb(file['title'].split()[0], file.get('year')) # Still no luck :'( if not data: logger.warning('\033[35m' + "OMDB 404 - %s." % url + '\033[0m') return data # BUG: What if we end up fetching data of some other movie? if file['title'] != data['title']: logger.warning('\033[32m' + "Titles don't match: %s - %s" % (file['title'], data['title']) + '\033[0m') # Save the path to this movie in the data data['movie_path'] = path return data
def move_satellites(videofile, dest): """ Moves satellite files such as subtitles that are associated with base and stored in root to the correct dest. """ if verbose: print('Move satellites for %s' % videofile) root = os.path.dirname(videofile) destbasenm = os.path.splitext(dest)[0] base = os.path.basename(os.path.splitext(videofile)[0]) for (dirpath, dirnames, filenames) in os.walk(root): for filename in filenames: fbase, fext = os.path.splitext(filename) fextlo = fext.lower() fpath = os.path.join(dirpath, filename) if fextlo in satellite_extensions: # Handle subtitles and nfo files subpart = '' # We support GuessIt supported subtitle extensions if fextlo[1:] in guessit.patterns.extension.subtitle_exts: guess = guessit.guess_file_info(filename, info=['filename']) if guess and 'subtitleLanguage' in guess: fbase = fbase[:fbase.rfind('.')] # Use alpha2 subtitle language from GuessIt (en, es, de, etc.) subpart = '.' + guess['subtitleLanguage'][0].alpha2 if verbose: if subpart != '': print('Satellite: %s is a subtitle [%s]' % (filename, guess['subtitleLanguage'][0])) else: # English (or undetermined) print('Satellite: %s is a subtitle' % filename) elif (fbase.lower() != base.lower()) and fextlo == '.nfo': # Aggressive match attempt if deep_scan: guess = deep_scan_nfo(fpath) if guess is not None: # Guess details are not important, just that there was a match fbase = base if fbase.lower() == base.lower(): old = fpath new = destbasenm + subpart + fext if verbose: print('Satellite: %s' % os.path.basename(new)) rename(old, new)
def download(self, subtitle): logger.info(u'Downloading %s in %s' % (subtitle.link, subtitle.path) ) if sys.platform != 'win32' else logger.debug( 'Log line suppressed on windows') try: r = self.session.get(subtitle.link, headers={ 'Referer': self.server_url, 'User-Agent': self.user_agent }) zipcontent = StringIO.StringIO(r.content) zipsub = zipfile.ZipFile(zipcontent) # if not zipsub.is_zipfile(zipcontent): # raise DownloadFailedError('Downloaded file is not a zip file') subfile = '' if len(zipsub.namelist()) == 1: subfile = zipsub.namelist()[0] else: #Season Zip Retrive Season and episode Numbers from path guess = guessit.guess_file_info(subtitle.path, 'episode') ep_string = "s%(seasonnumber)02de%(episodenumber)02d" % { 'seasonnumber': guess['season'], 'episodenumber': guess['episodeNumber'] } for file in zipsub.namelist(): if re.search(ep_string, file, re.I): subfile = file break if os.path.splitext(subfile)[1] in EXTENSIONS: with open(subtitle.path, 'wb') as f: f.write(zipsub.open(subfile).read()) else: zipsub.close() raise DownloadFailedError('No subtitles found in zip file') zipsub.close() except Exception as e: if os.path.exists(subtitle.path): os.remove(subtitle.path) raise DownloadFailedError(str(e)) logger.debug( u'Download finished') if sys.platform != 'win32' else logger.debug( 'Log line suppressed on windows')
def process_file(dirname, filename): """ Process a file with guessit and construct the wanted_item dict. Items used in wanted_item for type = 'episode': - 'type' - 'title' - 'year' - 'season' - 'episode' - 'source' - 'quality' - 'codec' - 'releasegrp' Items used in wanted_item for type = 'movie': - 'type' - 'title' - 'year' - 'source' - 'quality' - 'codec' - 'releasegrp' """ log.info("Processing file: %s" % filename) file_path = os.path.join(dirname, filename) # Check minimal video file size if needed if autosubliminal.MINVIDEOFILESIZE: file_size = os.path.getsize(file_path) # MINVIDEOFILESIZE is size in MB if file_size < autosubliminal.MINVIDEOFILESIZE * 1024 * 1024: log.warning("File size (%s) is lower than %sMB, skipping" % (utils.humanize_bytes(file_size), autosubliminal.MINVIDEOFILESIZE)) return # Guess try: log.debug("Guessing file info") guess = guessit.guess_file_info(file_path) log.debug("Guess: %r" % guess) except Exception, e: log.error("Could not guess file info for: %s" % file_path) log.error(e) return
def classify(): if request.content_type == "application/json": form = ClassifyForm(data=request.get_json(force=True)) else: form = ClassifyForm(request.form) if form.validate(): release_name = form.release_name.data options = {'name_only': True} if form.media_type.data == "unknown": data = guessit.guess_file_info(release_name, options=options) elif form.media_type.data == "tv": data = guessit.guess_episode_info(release_name, options=options) else: data = guessit.guess_movie_info(release_name, options=options) try: jsonify() return json.dumps(data, default=json_serial) except Exception as err: return json.dumps({"err": str(err)}, default=json_serial)
def parse_series(self, data, **kwargs): log.debug('Parsing series: `%s` [options: %s]', data, kwargs) guessit_options = self._guessit_options(kwargs) if kwargs.get('name') and not guessit_options.get('strict_name'): guessit_options['expected_series'] = [kwargs['name']] start = time.clock() # If no series name is provided, we don't tell guessit what kind of match we are looking for # This prevents guessit from determining that too general of matches are series parse_type = 'episode' if kwargs.get('name') else None guess_result = guessit.guess_file_info(data, options=guessit_options, type=parse_type) if guess_result.get('type') != 'episode': # TODO: All other failures return an invalid parser. This is just a hack to match. Maybe exception instead? class InvalidParser(object): valid = False return InvalidParser() parsed = GuessitParsedSerie(data, kwargs.pop('name', None), guess_result, **kwargs) end = time.clock() log.debug('Parsing result: %s (in %s ms)', parsed, (end - start) * 1000) return parsed