def main(video): log("info", "Loaded module subtitles") # Logging in to OST log("info", "Logging in to OpenSubtitles") ost = OpenSubtitles() token = ost.login(opensubtitles_username, opensubtitles_password) if isinstance(token, str): log("success", "Logged in to OpenSubtitles") else: log("critical", "Invalid username / password entered") return # Opening File file_path = video_path + video if not path.exists(file_path): log("critical", "Could not find specified video file '" + file_path + "'") return # Hashing file log("info", "Generating video hash...") f = File(file_path) hash = f.get_hash() log("success", "File hash generated: " + hash) # Searching OST log("info", "Querying OpenSubtitles for subtitles...") data = ost.search_subtitles([{'sublanguageid': 'all', 'moviehash': hash}]) if len(data) > 0: log("success", f"Found {len(data)} results.") else: log("warning", "No results found.") # TODO: Implement series and episode-based downloading return subtitle_id = data[0]["IDSubtitleFile"] log("info", "Attempting download of subtitles with ID " + str(subtitle_id)) try: if isinstance(ost.download_subtitles([subtitle_id], override_filenames={subtitle_id: video + '.srt'}, output_directory=video_path, extension='srt'), dict): log("success", "Subtitles successfully downloaded. Enjoy your video!") else: log("critical", "Subtitle download failed.") return except: #TODO: Make neater log("error", "Something went wrong. Trying second option in list(?)") subtitle_id = data[1]["IDSubtitleFile"] log("info", "Attempting download of subtitles with ID " + str(subtitle_id)) try: if isinstance(ost.download_subtitles([subtitle_id], override_filenames={subtitle_id: video + '.srt'}, output_directory=video_path, extension='srt'), dict): log("success", "Subtitles successfully downloaded. Enjoy your video!") else: log("critical", "Subtitle download failed.") return except: log("critical", "giving up after too many tries") return
class OpenSubtitlesModel(object): def __init__(self): self.ost = OpenSubtitles('en') def _refresh_auth(self): return self.ost.login('doctest', 'doctest') def search_by_name_get_first_n(self, name, n): self._refresh_auth() search_result = self.ost.search_subtitles([{ 'sublanguageid': 'eng', 'query': name }]) if type(search_result) == list and len(search_result) > 0: parsed_result = search_result[0:n] return parsed_result else: return def search_by_name_get_most_fit(self, name): return self.search_by_name_get_first_n(name, 1)[0] def download_by_file_id(self, id_subtitle_file): self._refresh_auth() path_to_files_by_id_dict = self.ost.download_subtitles( [id_subtitle_file]) # Files are placed at project root path_to_subtitle_file = list(path_to_files_by_id_dict.values())[ 0] # Get value from single entry dict return path_to_subtitle_file
def main(): # Run the program in current directory # Get login details from user ( username, password ) = get_login_credentials() # The OpenSubtitles object ost = OpenSubtitles() # Login to opensubtitles.org through the API print("Logging in...") while ost.login(username, password) is None: print("Login failed, try again.") ( username, password ) = get_login_credentials() # Creates the Subs directory if it doesn't exist create_subs_dir() # Gets a list of all the video file names in the directory (.mp4 or .mkv) filename_list = get_video_filenames() video_list = to_video_object_list(filename_list) print("Downloading all subs...") # Search and download subtitles for all videos for video in video_list: print("Downloading " + video.search_name + " S" + video.season + " E" + video.episode + "Subs") # Creates a list of all the searched subtitles search_data = ost.search_subtitles([{ 'sublanguageid': 'eng', 'query': video.search_name.lower(), 'season': video.season, 'episode': video.episode }]) # Finds subtitle file ID of most downloaded subtitles file subtitle_file_id = find_most_downloaded(search_data) ost.download_subtitles([subtitle_file_id], output_directory='.\\Subs\\', override_filenames={str(subtitle_file_id): video.file_name + '-eng.srt'}, extension='srt') print("Done!") print("Logging out...") ost.logout()
def download_all_subtitles(filepath): dirname = os.path.dirname(filepath) basename = os.path.basename(filepath) basename_without_ext = os.path.splitext(basename)[0] ost = OpenSubtitles() ost.login(None, None) f = File(filepath) h = f.get_hash() results = ost.search_subtitles([{"sublanguageid": "all", "moviehash": h}]) for chunk in _chunks(results, 20): sub_ids = { r["IDSubtitleFile"]: f'{basename_without_ext}.{r["SubLanguageID"]}.srt' for r in chunk } ost.download_subtitles( [_id for _id in sub_ids.keys()], override_filenames=sub_ids, output_directory=dirname, extension="srt", )
def downloadSubs(Parameters): #Assign/allocate object and get token after logging in with credentials from the Parameters object opensubs = OpenSubtitles() token = opensubs.login(Parameters.username, Parameters.password) if token is None: print '\n*** Login failed! ***\n' sys.exit() #Get hash and size of file from Parameters object f = File(os.path.join(Parameters.path, Parameters.video)) print '\tPath: %s' % Parameters.path print '\tFile: %s' % Parameters.video hash = f.get_hash() size = f.get_size() #Search subtitles DB using file hash and size. Looks like the first result is the best matching result data = opensubs.search_subtitles([{'sublanguageid': 'eng', 'moviehash': hash, 'moviebytesize': size}]) if data: #Download first result, decode it from BASE64, add gz extension, save file download = opensubs.download_subtitles([data[0]['IDSubtitleFile']]) data_decoded = base64.b64decode(unicode(download[0]['data'])) gz_file = os.path.join(Parameters.path, Parameters.subtitle) + '.gz' print '\nCreating gz file: %s' % gz_file download_file = open(gz_file,'w') download_file.write(data_decoded) download_file.close() print 'Created gz file: %s' % gz_file #Extract SRT file from gz file and place it in the same folder print 'Opening gz file: %s' % gz_file srt_file_buffer = gzip.open(gz_file, 'r') srt_file_name = os.path.join(Parameters.path, Parameters.subtitle) print 'Creating SRT file: %s' % srt_file_name srt_file = open(srt_file_name,'w') srt_file.write(srt_file_buffer.read()) srt_file.close() print 'Created SRT file: %s' % srt_file_name #Delete .gz file print 'Deleting %s' % gz_file os.remove(gz_file) else: print '*** No match found for file! ***'
def main(): ost = OpenSubtitles() token = ost.login(LOGIN, PASSWORD) subs = get_list() id_list = [] filenames = dict() for sub in subs: sub_name = get_filename(sub) #subtitle already in directory if sub_name in os.listdir(): continue data = ost.search_subtitles([sub]) #no sub found if len(data) == 0: continue sub_id = data[0]["IDSubtitleFile"] id_list.append(int(sub_id)) filenames[sub_id] = sub_name for file in id_list: x = ost.download_subtitles([file], override_filenames = filenames)
def descargar_sub(movieid, path, ost=None): if ost is None: ost = OpenSubtitles() token = ost.login('doctest', 'doctest') langs = ['eng', 'spa', 'fre'] directory = os.path.join(path, movieid) if not os.path.isdir(directory): os.makedirs(directory) for lang in langs: found = ost.search_subtitles([{ 'sublanguageid': lang, 'imdbid': movieid }]) if found and len(found) > 0: subid = found[0].get('IDSubtitleFile') sub = ost.download_subtitles([subid], output_directory=directory, extension=lang + '.srt')
data = ost.search_subtitles([{ 'sublanguageid': 'eng', 'imdbid': movie_id[2:], 'moviehash': f.get_hash(), 'moviebytesize': f.size }]) id_subtitle_file = data[0].get('IDSubtitleFile') movie_prep_folder = movies_prep_path / f"{movie_id}" Path(movie_prep_folder).mkdir(parents=True, exist_ok=True) movie_subtitle_folder = movie_prep_folder / 'subtitle/' Path(movie_subtitle_folder).mkdir(parents=True, exist_ok=True) ost.download_subtitles([id_subtitle_file], output_directory=movie_subtitle_folder, extension='srt') time.sleep(1) except: print('\tErro') # # Convert Subtitle movies_with_subtitle = [ f for f in os.listdir(movies_prep_path) if not f.startswith('.') if os.path.exists(movies_prep_path / f'{f}/subtitle/') ] # + all_subs = []
class SubtitleFinder: def __init__(self, skip_subsync, ost_username, ost_password, tmdb_key): self.skip_subsync = skip_subsync self.ost = OpenSubtitles() try: self.ost.login(ost_username, ost_password) except Exception as e: logging.error("Failed to log into opensubtitles.org.") raise e self.ost_language = 'eng' if tmdb_key: self.media_searcher = MediaSearcher(tmdb_key) if not self.skip_subsync: self.subsync_parser = ffsubsync.make_parser() def find_and_download(self, source, imdb_id): # subtitles matching on hash are already synced subs_data = self.find_subtitles_by_hash(source) if subs_data: return self.download_subtitles(subs_data) parsed_media = filename_parser.parse(source) if imdb_id is None and self.media_searcher is not None: imdb_id = self.media_searcher.search(parsed_media) if imdb_id is not None: subs_data = self.find_subtitles_by_id(imdb_id) elif isinstance(parsed_media, ParsedMovie): subs_data = self.find_subtitles_for_movie(parsed_media.title) elif isinstance(parsed_media, ParsedSeries): subs_data = self.find_subtitles_for_episode(parsed_media.title, parsed_media.season, parsed_media.episode) # sync subs unless explicitly asked not to if subs_data is not None: subs = self.download_subtitles(subs_data) if self.skip_subsync: return subs else: return self.sync_subtitles(source, subs) def find_subtitles_by_hash(self, source): f = File(source) return self.find_subtitles(moviehash=f.get_hash(), moviebytesize=f.size) def find_subtitles_by_id(self, imdb_id): return self.find_subtitles(imdbid=imdb_id) def find_subtitles_for_movie(self, title): return self.find_subtitles(query=title) def find_subtitles_for_episode(self, title, season, episode): return self.find_subtitles(query=title, season=season, episode=episode) def find_subtitles(self, **request): request.update(sublanguageid=self.ost_language) if 'imdbid' in request and request['imdbid'][:2] == 'tt': request.update(imdbid=request['imdbid'][2:]) subs_data = self.ost.search_subtitles([request]) return subs_data def download_subtitles(self, subs_data): id_subtitle_file = subs_data[0].get('IDSubtitleFile') subs_dict = self.ost.download_subtitles([id_subtitle_file], return_decoded_data=True) raw_subs = subs_dict.get(id_subtitle_file) return pysubs2.SSAFile.from_string(raw_subs) def sync_subtitles(self, video_filename, subtitles): with tempfile.NamedTemporaryFile(delete=False, suffix='.srt') as tmp_unsynced: tmp_unsynced.write(subtitles.to_string('srt').encode()) tmp_unsynced.close() with tempfile.NamedTemporaryFile(suffix='.srt') as tmp_synced: tmp_synced.close() self.run_subsync(video_filename, tmp_unsynced.name, tmp_synced.name) return pysubs2.load(tmp_synced.name) def run_subsync(self, reference, srtin, srtout): ffsubsync.run(self.subsync_parser.parse_args([ reference, '-i', srtin, '-o', srtout ]))
class OpenSubtitlesManager: USERNAME = os.getenv('OST_USERNAME') PASSWORD = os.getenv('OST_PASSWORD') def __init__(self, lang=OST_LANG.en, dist='.data'): self.lang = lang self.dist = os.path.join(BASE_DIR, dist) self.client = OpenSubtitles() def login(self): token = self.client.login(self.USERNAME, self.PASSWORD) assert type(token) == str return token def search(self, query='matrix'): data = self.client.search_subtitles( [dict(sublanguageid=str(self.lang), query=query)]) if data is None: return [] subtitle_ids = [(d['IDSubtitleFile'], d['SubFormat']) for d in data] return subtitle_ids def ids_filter(self, subtitle_ids, chunk): def zipper(l): iteration = itertools.zip_longest(*[iter(l)] * chunk) return [[i for i in iter if i is not None] for iter in iteration] extensions = dict() for (sid, ext) in subtitle_ids: extensions[ext] = extensions.get(ext, []) + [sid] for ext, ids in extensions.items(): extensions[ext] = zipper(ids) return extensions.items() def check_exists(self, subtitle_ids, ext): return [ sid for sid in subtitle_ids if not os.path.isfile( os.path.join(self.dist, '{}.{}'.format(sid, ext))) ] def download_subtitles(self, chunk=5): if not os.path.exists(self.dist): os.makedirs(self.dist) token = self.login() subtitle_ids = self.search() iter = self.ids_filter(subtitle_ids, chunk) for ext, chunked in tqdm(iter): for ids in chunked: ids = self.check_exists(ids, ext) if len(ids) > 0: self.client.download_subtitles(ids, output_directory=self.dist, extension=ext) def reglob(self, path, exp, invert=False): m = re.compile(exp) if invert is False: res = [f for f in os.listdir(path) if m.search(f)] else: res = [f for f in os.listdir(path) if not m.search(f)] res = [os.path.join(path, f) for f in res] return res def readfile(self, fpath): ext = fpath.split('.')[-1] with open(fpath, 'r') as f: content = f.readlines() return content, ext def should_ignore(self, line, patterns, invert=False): if line == '' or line == ' ': return True for pattern in patterns: m = re.search(pattern, line) matched = m is not None if invert and not matched: return True if not invert and matched: return True return False def remove_strings(self, line, patterns): result = line for pattern in patterns: m = re.search(pattern, result) if m is not None: result = m.group(1) return result def parse_srt(self, content): ignores = [r'^[0-9:,]+ --> [0-9:,]+$', r'^[0-9]+$'] removes = [r'\<[a-z]+\>(.*)\<\/[a-z]+\>'] data = [ self.remove_strings(line.replace('\n', ''), removes) for line in content if not self.should_ignore(line, ignores) ] data = [line for line in data if line != ''] return data def parse_sub(self, content): removes = [r'{[0-9]+}{[0-9]+}(.*)'] data = [ self.remove_strings(before_remove(line, ['\n', '</i>']), removes) for line in content ] data = [line for line in data if line != ''] return data '''ややこしいのでカット''' # def parse_ssa(self, content): # ignores = [r'^Dialogue: (.*)'] # removes = [r'^Dialogue: (.*)'] # idx = content.index('[Events]') # data = [self.remove_strings(line.replace('\n', ''), removes).split(',') # for line in content[idx+3:] # if not self.should_ignore(line, ignores, invert=True)] # print(data) # return data def before_remove(self, text, l): result = text for i in l: result = result.replace(i, '') return result def parse_smi(self, content): ignores = [r'\<SYNC Start\=[0-9]+\>\<P Class\=[A-Z]+\>(.*)'] removes = [r'\<SYNC Start\=[0-9]+\>\<P Class\=[A-Z]+\>(.*)'] data = [ self.remove_strings( self.before_remove(line, ['\n', '<br>', ' ']), removes) for line in content if not self.should_ignore(line, ignores, invert=True) ] data = [line for line in data if line != ''] return data def parse_txt(self, content): ignores = [r'^\[[A-Z ]+\]', r'[0-9:,]+'] removes = ['[br]', '\n'] data = [ self.before_remove(line, removes) for line in content if not self.should_ignore(line, ignores) ] data = [line for line in data if line != ''] return data def get_sentenses(self): files = self.reglob(self.dist, r'\d+\.(srt|sub|smi|txt)$') data = [] for file in files: # pickleファイルが存在する場合は、これをロードする ppath = '{}.pkl'.format(file) if os.path.exists(ppath): with open(ppath, 'rb') as f: sentenses = pickle.load(f) else: content, ext = self.readfile(file) method_name = 'parse_{}'.format(ext) method = getattr(self, method_name) sentenses = method(content) with open(ppath, 'wb+') as f: pickle.dump(sentenses, f) data.append(sentenses) return data
def download(path, dir_mode=False): if not dir_mode: print("") print( colored( '=============================================================================', 'yellow')) ost = OpenSubtitles() ost.login('subspy', 'subspy') f = File(path) data = ost.search_subtitles([{ 'sublanguageid': 'eng', 'moviehash': f.get_hash(), 'moviebytesize': f.size }]) if data is None or len(data) == 0: print( colored( "Subtitles could not be found for " + os.path.basename(path), 'red')) return best_match = {"index": 0, "ratio": 0} for current_index, search_result in enumerate(data): current_ratio = SequenceMatcher( None, search_result.get('MovieName'), os.path.basename(path).replace(".", " ")).ratio() if (current_ratio > best_match['ratio']): best_match['index'] = current_index best_match['ratio'] = current_ratio if (current_index > 10): break if (best_match['ratio'] < 0.5): print( colored( "Subtitles could not be found for " + os.path.basename(path), 'red')) return print("[ TITLE ] " + colored( data[best_match['index']].get('MovieName') + " (" + data[best_match['index']].get('MovieYear') + ")", 'cyan')) print("[ RATING ] " + colored( data[best_match['index']].get('MovieImdbRating') + "/10 on IMDb", 'cyan')) confidence = round(best_match['ratio'] * 100, 1) print("Matched with " + str(confidence) + '% confidence') id_subtitle_file = data[best_match['index']].get('IDSubtitleFile') existing_subtitle = os.path.join( os.path.dirname(path), Path(os.path.basename(path)).stem + ".srt") abort_flag = False suffix = "" if (os.path.isfile(existing_subtitle)): suffix = ".SubsPY" print(colored("\nSubtitles already exist for this file.", 'red')) if globals()['keep_all']: print( colored("Keeping both subtitles (added suffix) for ALL FILES.", "cyan")) else: r = input( colored( 'Overwrite [o], keep existing [k], keep both [b] or keep both for all conflicts [a]? : ', 'magenta')) if r.lower() == "o": suffix = "" print(colored("Overwriting existing Subtitles", "cyan")) elif r.lower() == "k": print(colored("Skipping this download.", "cyan")) abort_flag = True elif r.lower() == "a": globals()['keep_all'] = True print( colored( "Keeping both subtitles (added suffix) for ALL FILES.", "cyan")) else: print( colored( "Keeping both subtitles (added suffix) for this file.", "cyan")) if not abort_flag: overrides = { id_subtitle_file: Path(os.path.basename(path)).stem + suffix + ".srt" } status = ost.download_subtitles([id_subtitle_file], override_filenames=overrides, output_directory=os.path.dirname(path), extension='srt') if status is None: input( colored( "\nSubtitles could not be downloaded for " + os.path.basename(path), 'red')) return print(colored("\nSubtitles downloaded successfully!", "green")) if not dir_mode: print( colored( '=============================================================================', 'yellow'))
if subtitles is None or len(subtitles) == 0: # not found. Log and Skip... log_unavailable(imdb_id) time.sleep(SLEEP_TIME) continue subtitles = sorted(subtitles, key=lambda i: int(i['SubDownloadsCnt']), reverse=True) id_subtitle_file = subtitles[0].get('IDSubtitleFile') override_filenames[id_subtitle_file] = "{}.srt".format(imdb_id) log("Top subtitles file ID {} : {} downloads".format( id_subtitle_file, subtitles[0].get('SubDownloadsCnt'))) try: ost.download_subtitles([id_subtitle_file], output_directory='./subtitles', extension='srt', override_filenames=override_filenames) except: log("Error downloading subtitles: {}".format(sys.exc_info()[0])) time.sleep(SLEEP_TIME) continue if not os.path.exists("./subtitles/{}.srt".format(imdb_id)): print( "Something went wrong... The subtitles file was not saved successfully." ) break log_owned(imdb_id) # we have downloaded it successfully. Let's log it. download_count += 1 time.sleep(SLEEP_TIME)
class TestOpenSubtitles(unittest.TestCase): def setUp(self): self.mock = MockServerProxy() self.ost = OpenSubtitles() self.ost.xmlrpc = self.mock def test_login(self): self.mock.LogIn = lambda *_: { 'status': '403', } assert self.ost.login('*****@*****.**', 'goodpassword') is None self.mock.LogIn = lambda *_: { 'status': '200 OK', 'token': 'token', } assert self.ost.login('*****@*****.**', 'goodpassword') == 'token' def test_search_subtitles(self): self.mock.SearchSubtitles = lambda *_: fixture('search_subtitles') data = self.ost.search_subtitles([]) assert len(data) == 1 assert data[0].get('IDSubtitle') == '7783633' assert data[0].get('IDSubtitleFile') == '1956355942' assert data[0].get('IDSubMovieFile') == '19353776' def test_search_imdb(self): self.mock.SearchMoviesOnIMDB = lambda *_: { 'status': '200 OK', 'data': [{ 'IDMovieImdb': 'id', }] } # TODO: not sure if these are the right params. I am just keeping the test because it was on the README data = self.ost.search_movies_on_imdb([]) assert data[0].get('IDMovieImdb') == 'id' def test_no_operation(self): self.mock.NoOperation = lambda *_: {'status': '200 OK'} assert self.ost.no_operation() def test_logout(self): self.mock.LogOut = lambda *_: {'status': '403'} assert not self.ost.logout() self.mock.LogOut = lambda *_: {'status': '200 OK'} assert self.ost.logout() def test_auto_update(self): self.mock.AutoUpdate = lambda *_: { 'status': '200 OK', 'version': 'something', } data = self.ost.auto_update('SubDownloader') assert 'version' in data.keys() def test_already_exists(self): self.mock.TryUploadSubtitles = lambda *_: { 'status': '403', } # TODO: The error here is unauthorized and not that the subtitle wasn't found, # however, for not breaking compatibility we will keep it this way for now. assert not self.ost.try_upload_subtitles([]) self.mock.TryUploadSubtitles = lambda *_: { 'status': '200 OK', 'alreadyindb': 1, } assert self.ost.try_upload_subtitles([]) def test_upload_subtitles(self): self.mock.UploadSubtitles = lambda *_: { 'status': '200 OK', 'data': { 'url': 'http://example.com', }, } data = self.ost.upload_subtitles([]) assert 'url' in data.keys() def test_check_subtitle_hash(self): self.mock.CheckSubHash = lambda *_: { 'status': '200 OK', 'data': {}, } data = self.ost.check_subtitle_hash([]) assert data == {} def test_check_movie_hash(self): self.mock.CheckMovieHash = lambda *_: { 'status': '200 OK', 'data': {}, } data = self.ost.check_movie_hash([]) assert data == {} def test_insert_movie_hash(self): self.mock.InsertMovieHash = lambda *_: { 'status': '200 OK', 'data': {}, } data = self.ost.insert_movie_hash([]) assert data == {} def test_report_wrong_movie_hash(self): self.mock.ReportWrongMovieHash = lambda *_: { 'status': '419', } assert not self.ost.report_wrong_movie_hash([]) self.mock.ReportWrongMovieHash = lambda *_: { 'status': '200 OK', } assert self.ost.report_wrong_movie_hash([]) def test_report_wrong_movie_hash_404(self): self.mock.ReportWrongMovieHash = lambda *_: { 'status': '404', } assert not self.ost.report_wrong_movie_hash('hash') self.mock.ReportWrongMovieHash = lambda *_: { 'status': '200 OK', } assert self.ost.report_wrong_movie_hash('hash') def test_get_subtitle_languages(self): self.mock.GetSubLanguages = lambda *_: { 'status': '200 OK', 'data': {}, } assert self.ost.get_subtitle_languages() == {} def test_get_available_translations(self): self.mock.GetAvailableTranslations = lambda *_: { 'status': '200 OK', 'data': {}, } assert self.ost.get_available_translations('SubDownloader') == {} def test_subtitles_votes(self): self.mock.SubtitlesVote = lambda *_: { 'status': '200 OK', 'data': {}, } assert self.ost.subtitles_votes({}) == {} def test_get_comments(self): self.mock.GetComments = lambda *_: { 'status': '200 OK', 'data': {}, } assert self.ost.get_comments([]) == {} def test_add_comment(self): self.mock.AddComment = lambda *_: { 'status': '403', } assert not self.ost.add_comment({}) self.mock.AddComment = lambda *_: { 'status': '200 OK', } assert self.ost.add_comment({}) def test_add_request(self): self.mock.AddRequest = lambda *_: { 'status': '200 OK', 'data': {}, } assert self.ost.add_request({}) == {} def test_download_subtitles(self): self.mock.DownloadSubtitles = lambda *_: fixture('download_subtitles') with tempfile.TemporaryDirectory() as tmpdirname: data = self.ost.download_subtitles(['id'], output_directory=tmpdirname) assert data, data @mock.patch('pythonopensubtitles.opensubtitles.decompress', return_value='test_decoded_data') def test_download_subtitles_force_encoding(self, mock_decompress): self.mock.DownloadSubtitles = lambda *_: fixture('download_subtitles') with tempfile.TemporaryDirectory() as tmpdirname: data = self.ost.download_subtitles(['id'], output_directory=tmpdirname, encoding='test_encoding') encoded_data = self.ost._get_from_data_or_none('data') mock_decompress.assert_called_with(encoded_data[0]['data'], encoding='test_encoding') assert data, data
# the link to the git repository of the library # https://github.com/agonzalezro/python-opensubtitles from pythonopensubtitles.opensubtitles import OpenSubtitles from pythonopensubtitles.utils import File ost = OpenSubtitles() # the username and password of the opensubtitle account ost.login("username", "password") # the movie or episode path f = File(r'path\to\file') # you can get the subtitles Language Information with the function get_subtitle_language() # ost.get_subtitle_languages() # and then set the sublanguageid to the language id data = ost.search_subtitles([{ 'sublanguageid': 'all', 'moviehash': f.get_hash(), 'moviebytesize': f.size }]) id_subtitle_file = data[0].get('IDSubtitleFile') # the output directory not set to any location so the subtitles will be downloaded in the # current location ost.download_subtitles([id_subtitle_file], output_directory='', extension='srt')
class SubDownloader(object): """ take the name of any TV show or movie, download the SRT files and save them """ def __init__(self, search_term = None, data_path = ".", verbose = 2): """ Initialize the SubDownloader object """ self.ost = OpenSubtitles() self.ia = IMDb() self.password_array = [] self.used_accounts = [] self.data_path = data_path self.verbose = verbose self.search_term = search_term self.current_account = None def add_login(self, username, password): """ Adds a user account for OpenSubtitles to the list of possible accounts. Multiple accounts can be added to deal with hitting rate limits in tokens. """ if (username, password) in self.password_array: self.ObjPrint("User already added", important = True) return -1 else: self.password_array.append((username, password)) def login(self, username = None, password = None): """ Used to login to OpenSubtitles and collect API token. If a username and password are passed then they will be used to login. Else, the previously added logins will be used if they have not already been used in this session. """ # If a manual user is passed to function if username is not None and password is not None: token = self.ost.login(username, password) if token is None: raise Exception("Failed to collect token from manual pass") else: # Save current account self.current_account = username # Add account to list of accounts if (username, password) not in self.password_array: self.password_array.append((username, password)) # Add to list of used accounts if username not in self.used_accounts: self.used_accounts.append(username) return token # Automatically login using a user that has not be used before. if len(self.password_array) == 0: raise Exception("Can't log in with a username and password") else: for usr, pwd in self.password_array: # Loop through each user if usr not in self.used_accounts: # Check for previous usage token = self.ost.login(usr, pwd) if token is None: raise Exception("Failed to login using a previously unused account") else: # Save current account self.current_account = usr self.used_accounts.append(usr) return token self.ObjPrint(["Reached end of loop through accounts.", "This suggests all accounts have been used.", "Try running rate_limit_clean to address issue.", "Meanwhile, first user has been used."], important = True) usr, pwd = self.password_array[0] token = self.ost.login(usr, pwd) # Save current account self.current_account = usr self.used_accounts.append(usr) return -1 def get_current_login(self): """ Returns current login username """ return self.current_account def rate_limit_clean(self): """ Manually overides the rate limit avoidance framework. Use if 24 hours have passed as the rate limits will reset. """ self.used_accounts = [] def rate_limit_naughty_fix(self): """ Avoids the rate limit by logining in with a different account. """ return self.login() def remove_usr(self, username): """ Removes a user from the list of possible accounts. Use this if something went wrong. """ for i, tup in self.password_array: if tup[0] == username: del self.password_array[i] def set_data_path(self, path): """ Sets the path that data will be saved at. """ self.data_path = path def set_search_term(self, term): """ Sets the search term that will be used to find subtitles""" self.search_term = term def find(self, search_term = None, force_series = False): """ Searchs IMDB for media that matches the seach term. Movies will return a list with the IMDB ID. TV Series will return the ID of the series but will update metadata to include all episode ID's :param force_series will force the search to only return tv series. This should be used for reliability. """ # Dealing with search term if search_term is not None: self.set_search_term(search_term) else: if self.search_term is None: raise Exception("Can't search for nothing") # Performing Search try: search_results = self.ia.search_movie(self.search_term) except IMDbError as err: print("Something went wrong searching IMDB, process aborted.") raise err # Viewing Results for i in range(len(search_results)): # Loop through results until one is a TV series. result = search_results[i] if result.data['kind'] == "movie" and not force_series: self.ObjPrint(["Found a movie called", result['title'], "\n If this not correct, try a different search term.", "\n If you were looking for a series try ", "force_series = True"]) return result if result.data['kind'] == 'tv series': # Check if episode, movie, or series self.ObjPrint(["The series found was "+result['title'], "\n If this is not the correct series then try using a different search term."]) # Update to get the episodes try: self.ia.update(result, 'episodes') except IMDbError as e: print("Something went wrong getting the episodes, process aborted.") raise e # If tv series is found and episodes are successfully downloaded then return return result # If the end is reached without finding anything raise Exception('Could not find any shows that matched the parameters.') def find_from_id_tv_show(self, imdb_id): """ Finds the IMDB Object based on a give IMDB ID """ try: result = self.ia.get_movie(imdb_id) self.ia.update(result, 'episodes') except IMDbError as e: print("Something went wrong getting the episodes, process aborted.") raise e return result def download_opensubtitles(self, imdb_ids, save = False, new_data_path = None): """ Takes some IMDB ID's and downloads the first english subtitle search results as SRT files. Ideally this function takes all the episode ID's. This allows the program to collect the subtitles in bunches to avoid hitting rate limits. Each call can make 20 requests for subtitles in one. """ id_subtitles = [] id_refrence = {} # Get the subtitles of all of the episodes in the imdb_ids list self.ObjPrint("Search for subtitles of all episodes.") for imdb_id in imdb_ids: databased_search = self.ost.search_subtitles([{'imdbid':imdb_id, 'sublanguageid': 'eng'}]) try: id_subtitle = databased_search[0].get('IDSubtitleFile') id_subtitles+= [id_subtitle] id_refrence[id_subtitle] = imdb_id except IndexError: print("Couldn't find any search results for this episode, ", imdb_id, " ~ Will not be downloaded.") all_subtitles = {} # We will group ID into batches of 18 to make the call. self.ObjPrint("Starting subtitle downloads.") batchs = [] mini_list = [] for an_id in id_subtitles: if len(mini_list)<19: mini_list+=[an_id] else: batchs+=[mini_list] mini_list = [] if len(mini_list)>0: batchs+=[mini_list] for mini_list in batchs: srt_dict = self.ost.download_subtitles(mini_list, return_decoded_data=True) # Check that the download worked while srt_dict is None: print("OpenSubtitles returned nothing, possibly due to rate limit", "Attempting to login via a new user") fix_result = self.rate_limit_naughty_fix() if fix_result == -1: raise Exception("Account Access Failed") srt_dict = self.ost.download_subtitles(mini_list, return_decoded_data=True) for id_ in mini_list: all_subtitles[id_]= srt_dict[id_] self.ObjPrint(["Downloaded SRT for all", mini_list]) self.ObjPrint("Finished Downloading") # Match the resulted subtitle id to imdb ids for returning returnable_dict = {} for sub_id, subtitles in all_subtitles.items(): returnable_dict[id_refrence[sub_id]] = subtitles if save: self.ObjPrint("Saving Files") if new_data_path is not None: self.data_path = new_data_path try: if not os.path.exists(self.data_path): os.makedirs(self.data_path) for imdb_id, subtitle in returnable_dict.items(): with open(self.data_path+imdb_id+".srt", "w+") as f: f.write(subtitle) self.ObjPrint("Saved all to file") except: self.ObjPrint("Somethign went wrong during saving") return returnable_dict def ObjPrint(self, obj, important = False): if self.verbose > 2: print(obj) elif self.verbose == 1: if important == True: print(obj) def save_meta_data(self, meta_data_obj, new_data_path = None): """ Saves the metadata object of the episodes to file as a pickle. """ if new_data_path is not None: self.data_path = new_data_path try: if not os.path.exists(self.data_path): os.makedirs(self.data_path) with open(self.data_path+"meta_object.pickle", "wb+") as f: pickle.dump(meta_data_obj, f) print("File has been saved") except: self.ObjPrint("Somethign went wrong during saving")
open(Path.home() / "Dropbox" / "scripts" / "opensubtitles.json")) ost.login(auth_info["user"], auth_info["password"]) file_path = Path(sys.argv[1]) file_name = file_path.name file_dir = file_path.parent output_dir = file_dir / 'downloaded_subtitles' output_dir.mkdir(exist_ok=True, parents=True) f = File(file_path) data = ost.search_subtitles([{ 'sublanguageid': 'eng', 'moviehash': f.get_hash(), 'moviebytesize': f.size }]) for i, sub in enumerate(data): id_subtitle_file = sub.get('IDSubtitleFile') sub_file = file_path.stem + "_" + str(i) + ".srt" ost.download_subtitles( [id_subtitle_file], override_filenames={ id_subtitle_file: sub_file, }, output_directory=output_dir, ) print(output_dir / sub_file)