def __init__(self, download_path, tmp_path, captcha_cb, delay_cb, message_cb, settings=None, settings_provider_cls=None, settings_provider_args=None, debug=False, providers=None): self.log = SimpleLogger(self.__class__.__name__, log_level=debug and SimpleLogger.LOG_DEBUG or SimpleLogger.LOG_INFO) self.download_path = toString(download_path) self.tmp_path = toString(tmp_path) self.seekers = [] providers = providers or SUBTITLES_SEEKERS for seeker in providers: provider_id = seeker.id default_settings = seeker.default_settings default_settings['enabled'] = {'type':'yesno', 'default':True, 'label':'Enabled', 'pos':-1} if settings_provider_cls is not None: settings = None settings_provider = settings_provider_cls(provider_id, default_settings, settings_provider_args) if hasattr(seeker, 'error') and seeker.error is not None: settings_provider.setSetting('enabled', False) self.seekers.append(ErrorSeeker(seeker, tmp_path, download_path, settings, settings_provider, captcha_cb, delay_cb, message_cb)) else: self.seekers.append(seeker(tmp_path, download_path, settings, settings_provider, captcha_cb, delay_cb, message_cb)) elif settings is not None and provider_id in settings: settings_provider = None if hasattr(seeker, 'error') and seeker.error is not None: self.seekers.append(ErrorSeeker(seeker, tmp_path, download_path, settings, settings_provider, captcha_cb, delay_cb, message_cb)) else: self.seekers.append(seeker(tmp_path, download_path, settings[provider_id], settings_provider, captcha_cb, delay_cb, message_cb)) else: settings = None settings_provider = None if hasattr(seeker, 'error') and seeker.error is not None: self.seekers.append(ErrorSeeker(seeker, tmp_path, download_path, settings, settings_provider, captcha_cb, delay_cb, message_cb)) else: self.seekers.append(seeker(tmp_path, download_path, settings, settings_provider, captcha_cb, delay_cb, message_cb))
class SubsSeeker(object): SUBTILES_EXTENSIONS = ['.srt', '.sub'] def __init__(self, download_path, tmp_path, captcha_cb, delay_cb, message_cb, settings=None, settings_provider_cls=None, settings_provider_args=None, debug=False, providers=None): self.log = SimpleLogger(self.__class__.__name__, log_level=debug and SimpleLogger.LOG_DEBUG or SimpleLogger.LOG_INFO) self.download_path = toString(download_path) self.tmp_path = toString(tmp_path) self.seekers = [] providers = providers or SUBTITLES_SEEKERS for seeker in providers: provider_id = seeker.id default_settings = seeker.default_settings default_settings['enabled'] = { 'type': 'yesno', 'default': True, 'label': 'Enabled', 'pos': -1 } if provider_id == 'opensubtitles': default_settings = { 'user_agent': { 'default': 'subssupportuseragent', 'type': 'text', 'pos': 0, 'label': 'User_agent' }, 'enabled': { 'default': True, 'type': 'yesno', 'pos': -1, 'label': 'Enabled' } } if settings_provider_cls is not None: settings = None settings_provider = settings_provider_cls( provider_id, default_settings, settings_provider_args) if hasattr(seeker, 'error') and seeker.error is not None: settings_provider.setSetting('enabled', False) self.seekers.append( ErrorSeeker(seeker, tmp_path, download_path, settings, settings_provider, captcha_cb, delay_cb, message_cb)) else: self.seekers.append( seeker(tmp_path, download_path, settings, settings_provider, captcha_cb, delay_cb, message_cb)) elif settings is not None and provider_id in settings: settings_provider = None if hasattr(seeker, 'error') and seeker.error is not None: self.seekers.append( ErrorSeeker(seeker, tmp_path, download_path, settings, settings_provider, captcha_cb, delay_cb, message_cb)) else: self.seekers.append( seeker(tmp_path, download_path, settings[provider_id], settings_provider, captcha_cb, delay_cb, message_cb)) else: settings = None settings_provider = None if hasattr(seeker, 'error') and seeker.error is not None: self.seekers.append( ErrorSeeker(seeker, tmp_path, download_path, settings, settings_provider, captcha_cb, delay_cb, message_cb)) else: self.seekers.append( seeker(tmp_path, download_path, settings, settings_provider, captcha_cb, delay_cb, message_cb)) def getSubtitlesSimple(self, updateCB=None, title=None, filepath=None, langs=None): title, year, tvshow, season, episode = detectSearchParams(title or filepath) seekers = self.getProviders(langs) return self.getSubtitles(seekers, updateCB, title, filepath, langs, year, tvshow, season, episode) def getSubtitles(self, providers, updateCB=None, title=None, filepath=None, langs=None, year=None, tvshow=None, season=None, episode=None, timeout=10): self.log.info( 'getting subtitles list - title: %s, filepath: %s, year: %s, tvshow: %s, season: %s, episode: %s' % (toString(title), toString(filepath), toString(year), toString(tvshow), toString(season), toString(episode))) subtitlesDict = {} threads = [] socket.setdefaulttimeout(timeout) lock = threading.Lock() if len(providers) == 1: provider = providers[0] if isinstance(provider, basestring): provider = self.getProvider(providers[0]) if provider.error is not None: self.log.debug( "provider '%s' has 'error' flag set, skipping...", provider) return subtitlesDict else: self._searchSubtitles(lock, subtitlesDict, updateCB, provider, title, filepath, langs, season, episode, tvshow, year) else: for provider in providers: if isinstance(provider, basestring): provider = self.getProvider(provider) if provider.error is not None: self.log.debug( "provider '%s' has 'error' flag set, skipping...", provider) else: threads.append( threading.Thread(target=self._searchSubtitles, args=(lock, subtitlesDict, updateCB, provider, title, filepath, langs, season, episode, tvshow, year))) for t in threads: t.setDaemon(True) t.start() working = True while working: working = False time.sleep(0.5) for t in threads: working = working or t.is_alive() socket.setdefaulttimeout(socket.getdefaulttimeout()) return subtitlesDict def getSubtitlesList(self, subtitles_dict, provider=None, langs=None, synced=False, nonsynced=False): subtitles_list = [] if provider and provider in subtitles_dict: subtitles_list = subtitles_dict[provider]['list'] for sub in subtitles_list: if 'provider' not in sub: sub['provider'] = provider if 'country' not in sub: sub['country'] = langToCountry( languageTranslate(sub['language_name'], 0, 2)) else: for provider in subtitles_dict: provider_list = subtitles_dict[provider]['list'] subtitles_list += provider_list for sub in provider_list: if 'provider' not in sub: sub['provider'] = provider if 'country' not in sub: sub['country'] = langToCountry( languageTranslate(sub['language_name'], 0, 2)) if synced: subtitles_list = filter(lambda x: x['sync'], subtitles_list) elif nonsynced: subtitles_list = filter(lambda x: not x['sync'], subtitles_list) if langs: subtitles_list = filter( lambda x: x['language_name'] in [languageTranslate(lang, 0, 2) for lang in langs]) return subtitles_list def sortSubtitlesList(self, subtitles_list, langs=None, sort_langs=False, sort_rank=False, sort_sync=False, sort_provider=False): def sortLangs(x): for idx, lang in enumerate(langs): if languageTranslate(x['language_name'], 0, 2) == lang: return idx return len(langs) if langs and sort_langs: return sorted(subtitles_list, key=sortLangs) if sort_provider: return sorted(subtitles_list, key=lambda x: x['provider']) if sort_rank: return subtitles_list if sort_sync: return sorted(subtitles_list, key=lambda x: x['sync'], reverse=True) return subtitles_list def downloadSubtitle(self, selected_subtitle, subtitles_dict, choosefile_cb, path=None, fname=None, overwrite_cb=None, settings=None): self.log.info( 'downloading subtitle "%s" with settings "%s"' % (selected_subtitle['filename'], toString(settings) or {})) if settings is None: settings = {} seeker = None for provider_id in subtitles_dict.keys(): if selected_subtitle in subtitles_dict[provider_id]['list']: seeker = self.getProvider(provider_id) break if seeker is None: self.log.error('provider for "%s" subtitle was not found', selected_subtitle['filename']) lang, filepath = seeker.download(subtitles_dict[provider_id], selected_subtitle)[1:3] compressed = getCompressedFileType(filepath) if compressed: subfiles = self._unpack_subtitles(filepath, self.tmp_path) else: subfiles = [filepath] subfiles = [toString(s) for s in subfiles] if len(subfiles) == 0: self.log.error("no subtitles were downloaded!") raise SubtitlesDownloadError( msg="[error] no subtitles were downloaded") elif len(subfiles) == 1: self.log.debug('found one subtitle: "%s"', str(subfiles)) subfile = subfiles[0] else: self.log.debug('found more subtitles: "%s"', str(subfiles)) subfile = choosefile_cb(subfiles) if subfile is None: self.log.debug('no subtitles file choosed!') return self.log.debug('selected subtitle: "%s"', subfile) ext = os.path.splitext(subfile)[1] if ext not in self.SUBTILES_EXTENSIONS: ext = os.path.splitext(toString(selected_subtitle['filename']))[1] if ext not in self.SUBTILES_EXTENSIONS: ext = '.srt' if fname is None: filename = os.path.basename(subfile) save_as = settings.get('save_as', 'default') if save_as == 'version': self.log.debug('filename creating by "version" setting') filename = toString(selected_subtitle['filename']) if os.path.splitext( filename)[1] not in self.SUBTILES_EXTENSIONS: filename = os.path.splitext(filename)[0] + ext elif save_as == 'video': self.log.debug('filename creating by "video" setting') videopath = toString( subtitles_dict[seeker.id]['params'].get('filepath')) filename = os.path.splitext( os.path.basename(videopath))[0] + ext if settings.get('lang_to_filename', False): lang_iso639_1_2 = toString(languageTranslate(lang, 0, 2)) self.log.debug('appending language "%s" to filename', lang_iso639_1_2) filename, ext = os.path.splitext(filename) filename = "%s.%s%s" % (filename, lang_iso639_1_2, ext) else: self.log.debug('using provided filename') filename = toString(fname) + ext self.log.debug('filename: "%s"', filename) download_path = os.path.join(toString(self.download_path), filename) if path is not None: self.log.debug('using custom download path: "%s"', path) download_path = os.path.join(toString(path), filename) self.log.debug('download path: "%s"', download_path) if os.path.isfile(download_path) and overwrite_cb is not None: ret = overwrite_cb(download_path) if ret is None: self.log.debug('overwrite cancelled, returning temp path') return subfile elif not ret: self.log.debug('not overwriting, returning temp path') return subfile elif ret: self.log.debug('overwriting') try: shutil.move(subfile, download_path) return download_path except Exception as e: self.log.error( 'moving "%s" to "%s" - %s' % (os.path.split(subfile)[-2:], os.path.split(download_path)[-2:]), str(e)) return subfile try: shutil.move(subfile, download_path) except Exception as e: self.log.error('moving "%s" to "%s" - %s', (os.path.split(subfile)[-2:], os.path.split(download_path)[-2:]), str(e)) return subfile return download_path def getProvider(self, provider_id): for s in self.seekers: if s.id == provider_id: return s def getProviders(self, langs=None, movie=True, tvshow=True): def check_langs(provider): for lang in provider.supported_langs: if lang in langs: return True return False providers = set() for provider in self.seekers: if provider.settings_provider.getSetting('enabled'): if langs: if check_langs(provider): if movie and provider.movie_search: providers.add(provider) if tvshow and provider.tvshow_search: providers.add(provider) else: if movie and provider.movie_search: providers.add(provider) if tvshow and provider.tvshow_search: providers.add(provider) return list(providers) def _searchSubtitles(self, lock, subtitlesDict, updateCB, seeker, title, filepath, langs, season, episode, tvshow, year): try: subtitles = seeker.search(title, filepath, langs, season, episode, tvshow, year) except Exception as e: traceback.print_exc() with lock: subtitlesDict[seeker.id] = { 'message': str(e), 'status': False, 'list': [] } if updateCB is not None: updateCB(seeker.id, False, e) else: with lock: subtitles['status'] = True subtitlesDict[seeker.id] = subtitles if updateCB is not None: updateCB(seeker.id, True, subtitles) def _unpack_subtitles(self, filepath, dest_dir, max_recursion=3): compressed = getCompressedFileType(filepath) if compressed == 'zip': self.log.debug('found "zip" archive, unpacking...') subfiles = self._unpack_zipsub(filepath, dest_dir) elif compressed == 'rar': self.log.debug('found "rar" archive, unpacking...') subfiles = self._unpack_rarsub(filepath, dest_dir) else: self.log.error('unsupported archive - %s', compressed) raise Exception(_("unsupported archive %s", compressed)) for s in subfiles: if os.path.splitext(s)[1] in ('.rar', '.zip') and max_recursion > 0: subfiles.extend( self._unpack_subtitles(s, dest_dir, max_recursion - 1)) subfiles = filter( lambda x: os.path.splitext(x)[1] in self.SUBTILES_EXTENSIONS, subfiles) return subfiles def _unpack_zipsub(self, zip_path, dest_dir): zf = zipfile.ZipFile(zip_path) namelist = zf.namelist() subsfiles = [] for subsfn in namelist: if os.path.splitext( subsfn)[1] in self.SUBTILES_EXTENSIONS + ['.rar', '.zip']: filename = os.path.basename(subsfn) outfile = open(os.path.join(dest_dir, filename), 'wb') outfile.write(zf.read(subsfn)) outfile.flush() outfile.close() subsfiles.append(os.path.join(dest_dir, filename)) return subsfiles def _unpack_rarsub(self, rar_path, dest_dir): try: import rarfile except ImportError: self.log.error('rarfile lib not available - pip install rarfile') raise rf = rarfile.RarFile(rar_path) namelist = rf.namelist() subsfiles = [] for subsfn in namelist: if os.path.splitext( subsfn)[1] in self.SUBTILES_EXTENSIONS + ['.rar', '.zip']: filename = os.path.basename(subsfn) outfile = open(os.path.join(dest_dir, filename), 'wb') outfile.write(rf.read(subsfn)) outfile.flush() outfile.close() subsfiles.append(os.path.join(dest_dir, filename)) return subsfiles
from utils import SimpleLogger import requests, json, bs4, time, random, os, threading from bs4 import BeautifulSoup, SoupStrainer from discord_webhook import DiscordEmbed, DiscordWebhook os.system("cls") logger = SimpleLogger() class Monitor: def __init__(self): self.session = requests.Session() self.delay = 3.5 self.userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36" self.discordWebhook = "YOUR_DISCORD_WEBHOOK" self.profile = { "firstAndMiddleName": "YOUR_FIRST_AND_MIDDLE_NAME", "lastName": "YOUR_LAST_NAME", "birthDay": "YOUR_BIRTH_DAY", "birthMonth": "YOUR_BIRTH_MONTH", "birthYear": "YOUR_BIRTH_YEAR", "licenseNumber": "YOUR_LICENSE_NUMBER" } logger.yellow("Starting Monitor...") self.login() def login(self): logger.yellow("Logging into DVLA...")
class SubsSeeker(object): SUBTILES_EXTENSIONS = ['.srt', '.sub'] def __init__(self, download_path, tmp_path, captcha_cb, delay_cb, message_cb, settings=None, settings_provider_cls=None, settings_provider_args=None, debug=False, providers=None): self.log = SimpleLogger(self.__class__.__name__, log_level=debug and SimpleLogger.LOG_DEBUG or SimpleLogger.LOG_INFO) self.download_path = toString(download_path) self.tmp_path = toString(tmp_path) self.seekers = [] providers = providers or SUBTITLES_SEEKERS for seeker in providers: provider_id = seeker.id default_settings = seeker.default_settings default_settings['enabled'] = {'type':'yesno', 'default':True, 'label':'Enabled', 'pos':-1} if settings_provider_cls is not None: settings = None settings_provider = settings_provider_cls(provider_id, default_settings, settings_provider_args) if hasattr(seeker, 'error') and seeker.error is not None: settings_provider.setSetting('enabled', False) self.seekers.append(ErrorSeeker(seeker, tmp_path, download_path, settings, settings_provider, captcha_cb, delay_cb, message_cb)) else: self.seekers.append(seeker(tmp_path, download_path, settings, settings_provider, captcha_cb, delay_cb, message_cb)) elif settings is not None and provider_id in settings: settings_provider = None if hasattr(seeker, 'error') and seeker.error is not None: self.seekers.append(ErrorSeeker(seeker, tmp_path, download_path, settings, settings_provider, captcha_cb, delay_cb, message_cb)) else: self.seekers.append(seeker(tmp_path, download_path, settings[provider_id], settings_provider, captcha_cb, delay_cb, message_cb)) else: settings = None settings_provider = None if hasattr(seeker, 'error') and seeker.error is not None: self.seekers.append(ErrorSeeker(seeker, tmp_path, download_path, settings, settings_provider, captcha_cb, delay_cb, message_cb)) else: self.seekers.append(seeker(tmp_path, download_path, settings, settings_provider, captcha_cb, delay_cb, message_cb)) def getSubtitlesSimple(self, updateCB=None, title=None, filepath=None, langs=None): title, year, tvshow, season, episode = detectSearchParams(title or filepath) seekers = self.getProviders(langs) return self.getSubtitles(seekers, updateCB, title, filepath, langs, year, tvshow, season, episode) def getSubtitles(self, providers, updateCB=None, title=None, filepath=None, langs=None, year=None, tvshow=None, season=None, episode=None, timeout=10): self.log.info('getting subtitles list - title: %s, filepath: %s, year: %s, tvshow: %s, season: %s, episode: %s' % ( toString(title), toString(filepath), toString(year), toString(tvshow), toString(season), toString(episode))) subtitlesDict = {} threads = [] socket.setdefaulttimeout(timeout) lock = threading.Lock() if len(providers) == 1: provider = providers[0] if isinstance(provider, basestring): provider = self.getProvider(providers[0]) if provider.error is not None: self.log.debug("provider '%s' has 'error' flag set, skipping...", provider) return subtitlesDict else: self._searchSubtitles(lock, subtitlesDict, updateCB, provider, title, filepath, langs, season, episode, tvshow, year) else: for provider in providers: if isinstance(provider, basestring): provider = self.getProvider(provider) if provider.error is not None: self.log.debug("provider '%s' has 'error' flag set, skipping...", provider) else: threads.append(threading.Thread(target=self._searchSubtitles, args=(lock, subtitlesDict, updateCB, provider, title, filepath, langs, season, episode, tvshow, year))) for t in threads: t.setDaemon(True) t.start() working = True while working: working = False time.sleep(0.5) for t in threads: working = working or t.is_alive() socket.setdefaulttimeout(socket.getdefaulttimeout()) return subtitlesDict def getSubtitlesList(self, subtitles_dict, provider=None, langs=None, synced=False, nonsynced=False): subtitles_list = [] if provider and provider in subtitles_dict: subtitles_list = subtitles_dict[provider]['list'] for sub in subtitles_list: if 'provider' not in sub: sub['provider'] = provider if 'country' not in sub: sub['country'] = langToCountry(languageTranslate(sub['language_name'], 0, 2)) else: for provider in subtitles_dict: provider_list = subtitles_dict[provider]['list'] subtitles_list += provider_list for sub in provider_list: if 'provider' not in sub: sub['provider'] = provider if 'country' not in sub: sub['country'] = langToCountry(languageTranslate(sub['language_name'], 0, 2)) if synced: subtitles_list = filter(lambda x:x['sync'], subtitles_list) elif nonsynced: subtitles_list = filter(lambda x:not x['sync'], subtitles_list) if langs: subtitles_list = filter(lambda x:x['language_name'] in [languageTranslate(lang, 0, 2) for lang in langs]) return subtitles_list def sortSubtitlesList(self, subtitles_list, langs=None, sort_langs=False, sort_rank=False, sort_sync=False, sort_provider=False): def sortLangs(x): for idx, lang in enumerate(langs): if languageTranslate(x['language_name'], 0, 2) == lang: return idx return len(langs) if langs and sort_langs: return sorted(subtitles_list, key=sortLangs) if sort_provider: return sorted(subtitles_list, key=lambda x:x['provider']) if sort_rank: return subtitles_list if sort_sync: return sorted(subtitles_list, key=lambda x:x['sync'], reverse=True) return subtitles_list def downloadSubtitle(self, selected_subtitle, subtitles_dict, choosefile_cb, path=None, fname=None, overwrite_cb=None, settings=None): self.log.info('downloading subtitle "%s" with settings "%s"' % (selected_subtitle['filename'], toString(settings) or {})) if settings is None: settings = {} seeker = None for provider_id in subtitles_dict.keys(): if selected_subtitle in subtitles_dict[provider_id]['list']: seeker = self.getProvider(provider_id) break if seeker is None: self.log.error('provider for "%s" subtitle was not found', selected_subtitle['filename']) lang, filepath = seeker.download(subtitles_dict[provider_id], selected_subtitle)[1:3] compressed = getCompressedFileType(filepath) if compressed: subfiles = self._unpack_subtitles(filepath, self.tmp_path) else: subfiles = [filepath] subfiles = [toString(s) for s in subfiles] if len(subfiles) == 0: self.log.error("no subtitles were downloaded!") raise SubtitlesDownloadError(msg="[error] no subtitles were downloaded") elif len(subfiles) == 1: self.log.debug('found one subtitle: "%s"', str(subfiles)) subfile = subfiles[0] else: self.log.debug('found more subtitles: "%s"', str(subfiles)) subfile = choosefile_cb(subfiles) if subfile is None: self.log.debug('no subtitles file choosed!') return self.log.debug('selected subtitle: "%s"', subfile) ext = os.path.splitext(subfile)[1] if ext not in self.SUBTILES_EXTENSIONS: ext = os.path.splitext(toString(selected_subtitle['filename']))[1] if ext not in self.SUBTILES_EXTENSIONS: ext = '.srt' if fname is None: filename = os.path.basename(subfile) save_as = settings.get('save_as', 'default') if save_as == 'version': self.log.debug('filename creating by "version" setting') filename = toString(selected_subtitle['filename']) if os.path.splitext(filename)[1] not in self.SUBTILES_EXTENSIONS: filename = os.path.splitext(filename)[0] + ext elif save_as == 'video': self.log.debug('filename creating by "video" setting') videopath = toString(subtitles_dict[seeker.id]['params'].get('filepath')) filename = os.path.splitext(os.path.basename(videopath))[0] + ext if settings.get('lang_to_filename', False): lang_iso639_1_2 = toString(languageTranslate(lang, 0, 2)) self.log.debug('appending language "%s" to filename', lang_iso639_1_2) filename, ext = os.path.splitext(filename) filename = "%s.%s%s" % (filename, lang_iso639_1_2, ext) else: self.log.debug('using provided filename') filename = toString(fname) + ext self.log.debug('filename: "%s"', filename) download_path = os.path.join(toString(self.download_path), filename) if path is not None: self.log.debug('using custom download path: "%s"', path) download_path = os.path.join(toString(path), filename) self.log.debug('download path: "%s"', download_path) if os.path.isfile(download_path) and overwrite_cb is not None: ret = overwrite_cb(download_path) if ret is None: self.log.debug('overwrite cancelled, returning temp path') return subfile elif not ret: self.log.debug('not overwriting, returning temp path') return subfile elif ret: self.log.debug('overwriting') try: shutil.move(subfile, download_path) return download_path except Exception as e: self.log.error('moving "%s" to "%s" - %s' % ( os.path.split(subfile)[-2:], os.path.split(download_path)[-2:]), str(e)) return subfile try: shutil.move(subfile, download_path) except Exception as e: self.log.error('moving "%s" to "%s" - %s', ( os.path.split(subfile)[-2:], os.path.split(download_path)[-2:]), str(e)) return subfile return download_path def getProvider(self, provider_id): for s in self.seekers: if s.id == provider_id: return s def getProviders(self, langs=None, movie=True, tvshow=True): def check_langs(provider): for lang in provider.supported_langs: if lang in langs: return True return False providers = set() for provider in self.seekers: if provider.settings_provider.getSetting('enabled'): if langs: if check_langs(provider): if movie and provider.movie_search: providers.add(provider) if tvshow and provider.tvshow_search: providers.add(provider) else: if movie and provider.movie_search: providers.add(provider) if tvshow and provider.tvshow_search: providers.add(provider) return list(providers) def _searchSubtitles(self, lock, subtitlesDict, updateCB, seeker, title, filepath, langs, season, episode, tvshow, year): try: subtitles = seeker.search(title, filepath, langs, season, episode, tvshow, year) except Exception as e: traceback.print_exc() with lock: subtitlesDict[seeker.id] = {'message':str(e), 'status':False, 'list':[]} if updateCB is not None: updateCB(seeker.id, False, e) else: with lock: subtitles['status'] = True subtitlesDict[seeker.id] = subtitles if updateCB is not None: updateCB(seeker.id, True, subtitles) def _unpack_subtitles(self, filepath, dest_dir, max_recursion=3): compressed = getCompressedFileType(filepath) if compressed == 'zip': self.log.debug('found "zip" archive, unpacking...') subfiles = self._unpack_zipsub(filepath, dest_dir) elif compressed == 'rar': self.log.debug('found "rar" archive, unpacking...') subfiles = self._unpack_rarsub(filepath, dest_dir) else: self.log.error('unsupported archive - %s', compressed) raise Exception(_("unsupported archive %s", compressed)) for s in subfiles: if os.path.splitext(s)[1] in ('.rar', '.zip') and max_recursion > 0: subfiles.extend(self._unpack_subtitles(s, dest_dir, max_recursion - 1)) subfiles = filter(lambda x:os.path.splitext(x)[1] in self.SUBTILES_EXTENSIONS, subfiles) return subfiles def _unpack_zipsub(self, zip_path, dest_dir): zf = zipfile.ZipFile(zip_path) namelist = zf.namelist() subsfiles = [] for subsfn in namelist: if os.path.splitext(subsfn)[1] in self.SUBTILES_EXTENSIONS + ['.rar', '.zip']: filename = os.path.basename(subsfn) outfile = open(os.path.join(dest_dir, filename) , 'wb') outfile.write(zf.read(subsfn)) outfile.flush() outfile.close() subsfiles.append(os.path.join(dest_dir, filename)) return subsfiles def _unpack_rarsub(self, rar_path, dest_dir): try: import rarfile except ImportError: self.log.error('rarfile lib not available - pip install rarfile') raise rf = rarfile.RarFile(rar_path) namelist = rf.namelist() subsfiles = [] for subsfn in namelist: if os.path.splitext(subsfn)[1] in self.SUBTILES_EXTENSIONS + ['.rar', '.zip']: filename = os.path.basename(subsfn) outfile = open(os.path.join(dest_dir, filename) , 'wb') outfile.write(rf.read(subsfn)) outfile.flush() outfile.close() subsfiles.append(os.path.join(dest_dir, filename)) return subsfiles
def main(config, resume): # parameters batch_size = config.get('batch_size', 32) start_epoch = config['epoch']['start'] max_epoch = config['epoch']['max'] lr = config.get('lr', 0.0005) use_conf = config.get('use_conf', False) ## path save_path = config['save_path'] timestamp = datetime.now().strftime(r"%Y-%m-%d_%H-%M-%S") save_path = os.path.join(save_path, timestamp) result_path = os.path.join(save_path, 'result') if not os.path.exists(result_path): os.makedirs(result_path) model_path = os.path.join(save_path, 'model') if not os.path.exists(model_path): os.makedirs(model_path) dest = shutil.copy('train.py', save_path) print("save to: ", dest) ## cuda or cpu if config['n_gpu'] == 0 or not torch.cuda.is_available(): device = torch.device("cpu") print("using CPU") else: device = torch.device("cuda:0") ## dataloader dataset = Dataset(phase='train', do_augmentations=False) data_loader = DataLoader( dataset, batch_size=int(batch_size), num_workers=1, shuffle=True, drop_last=True, pin_memory=True, # **loader_kwargs, ) val_dataset = Dataset(phase='val', do_augmentations=False) val_data_loader = DataLoader( val_dataset, batch_size=int(batch_size), num_workers=1, shuffle=True, drop_last=True, pin_memory=True, # **loader_kwargs, ) ## few shot do_few_shot = True if do_few_shot: fs_dataset = Dataset( phase='train', do_augmentations=False, metafile_path='metadata/detection_train_images.json') fs_data_loader = DataLoader( fs_dataset, batch_size=int(128), num_workers=1, shuffle=True, pin_memory=True, # **loader_kwargs, ) ## CNN model output_dim = 3 model = MyNet(output_dim) model = model.to(device) model.train() print(model) ## loss criterion = nn.CrossEntropyLoss(reduction='none') ## optimizer params = list(filter(lambda p: p.requires_grad, model.parameters())) optim_params = { 'lr': lr, 'weight_decay': 0, 'amsgrad': False, } optimizer = torch.optim.Adam(params, **optim_params) lr_params = { 'milestones': [10], 'gamma': 0.1, } lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, **lr_params) loss_avg = AverageMeter() acc_avg = AverageMeter() fs_loss_avg = AverageMeter() fs_acc_avg = AverageMeter() logger = SimpleLogger(['train_loss', 'train_acc', 'val_loss', 'val_acc']) ## loop for epoch in range(start_epoch, max_epoch): loss_avg.reset() for batch_idx, batch in tqdm( enumerate(data_loader), total=len(data_loader), ncols=80, desc=f'training epoch {epoch}', ): data = batch[0].to(device) gt_lbls = batch[1].to(device) gt_gt_lbls = batch[2].to(device) ## set zerograd optimizer.zero_grad() ## run forward pass out = model(data) ## logits: [B, NC]; conf: [B, 1] preds = torch.max(out, dim=-1)[1] # print("out shape: ", out.shape) weights = model.compute_entropy_weight(out) # print("weights shape: ", weights.shape) ## compute loss class_loss = criterion(out, gt_lbls) ## [B, 1] # print("class_loss shape: ", class_loss.shape) if use_conf: loss = (class_loss * (weights**2) + (1 - weights)**2).mean() else: loss = class_loss.mean() ## record loss_avg.update(loss.item(), batch_size) positive = ((gt_lbls == preds) + (gt_gt_lbls > 2)).sum() batch_acc = positive.to(torch.float) / batch_size acc_avg.update(batch_acc.item(), batch_size) ## run backward pass loss.backward() optimizer.step() ## update ## each epoch logger.update(loss_avg.avg, 'train_loss') logger.update(acc_avg.avg, 'train_acc') print("train loss: ", loss_avg.avg) print("train acc: ", acc_avg.avg) if do_few_shot and fs_data_loader is not None: for batch_idx, batch in tqdm( enumerate(fs_data_loader), total=len(fs_data_loader), ncols=80, desc=f'training epoch {epoch}', ): data = batch[0].to(device) gt_lbls = batch[1].to(device) gt_gt_lbls = batch[2].to(device) ## set zerograd optimizer.zero_grad() ## run forward pass out = model(data) ## logits: [B, NC]; conf: [B, 1] preds = torch.max(out, dim=-1)[1] # print("out shape: ", out.shape) weights = model.compute_entropy_weight(out) # print("weights shape: ", weights.shape) ## compute loss class_loss = criterion(out, gt_lbls) ## [B, 1] # print("class_loss shape: ", class_loss.shape) if use_conf: loss = (class_loss * (weights**2) + (1 - weights)**2).mean() else: loss = class_loss.mean() ## record positive = ((gt_lbls == preds) + (gt_gt_lbls > 2)).sum() batch_acc = positive.to(torch.float) / data.shape[0] fs_loss_avg.update(loss.item(), data.shape[0]) fs_acc_avg.update(batch_acc.item(), data.shape[0]) ## run backward pass loss = loss * 1.0 loss.backward() optimizer.step() ## update # print(f"\nfew-shot: {preds}, {gt_gt_lbls}") ## each epoch print("fs train loss: ", fs_loss_avg.avg) print("fs train acc: ", fs_acc_avg.avg) if val_data_loader is not None: log = evaluate(model.eval(), val_data_loader, device, use_conf=use_conf) model.train() logger.update(log['loss'], 'val_loss') logger.update(log['acc'], 'val_acc') print("val loss: ", log['loss']) print("val acc: ", log['acc']) best_idx = logger.get_best('val_acc', best='max') if best_idx == epoch: print('save ckpt') ## save ckpt _save_checkpoint(model_path, epoch, model) lr_scheduler.step() print() ## save final model _save_checkpoint(model_path, epoch, model)