class Puppet: def __init__(self, binary: str, profile: str): self.__has_marionette = False self.__auto_download = False self.__download_dir = "" if not Path(binary).is_file(): return if not Path(profile).is_dir(): return # geckodriver の log ファイル出力を抑止する NO_LOG = "-" self.marionette = Marionette(bin=binary, gecko_log=NO_LOG, profile=profile) # start_session しないと quit もできない self.marionette.start_session() self.__has_marionette = True @property def has_marionette(self): return self.__has_marionette @property def auto_download(self): return self.__auto_download def __activate_auto_download(self): # 一度有効にすると同セッション内では無効にできない # firefox52 では MIME_TYPES.rdf, firefox60 では handlers.json に # ファイルダウンロード時の動作設定が記述されている(text/plain はプログラムで開く、など) # 自動ダウンロードするため既存の設定は削除する MIME_TYPES_HANDLERS = ["MIME_TYPES.rdf", "handlers.json"] for name in MIME_TYPES_HANDLERS: p = Path(self.marionette.profile_path).joinpath(name) if p.is_file(): p.unlink() self.marionette.set_pref("browser.download.useDownloadDir", True) self.marionette.set_pref("browser.helperApps.neverAsk.saveToDisk", ",".join(MIME_TYPES)) USER_DEFINED = 2 self.marionette.set_pref("browser.download.folderList", USER_DEFINED) @property def download_dir(self): if self.__auto_download == False: raise Exception("auto download has not been activated") return self.__download_dir @download_dir.setter def download_dir(self, dir: str): p = Path(dir) if not p.is_dir(): return full_path = str(p.resolve()) if self.__auto_download == False: self.__activate_auto_download() self.__auto_download = True self.marionette.set_pref("browser.download.dir", full_path) self.__download_dir = full_path def set_download(self, dir: str): self.download_dir = dir def query_selector(self, selectors: str) -> HTMLElement: METHOD_CSS_SELECTOR = "css selector" return self.marionette.find_element(METHOD_CSS_SELECTOR, selectors) def query_selectors(self, selectors: str) -> List[HTMLElement]: METHOD_CSS_SELECTOR = "css selector" return self.marionette.find_elements(METHOD_CSS_SELECTOR, selectors) def wait(self, seconds: int): actions = Actions(self.marionette) actions.wait(seconds).perform() def quit(self): self.marionette.quit() def exec(self, script: str): # script 内での記述簡略化のため mrnt = self.marionette set_download = self.set_download wait = self.wait quit = self.quit query_selector = self.query_selector query_selectors = self.query_selectors exec(script)
class Puppet: MIME_TYPES = [ "application/epub+zip", "application/gzip", "application/java-archive", "application/json", "application/ld+json", "application/msword", "application/octet-stream", "application/ogg", "application/pdf", "application/rtf", "application/vnd.amazon.ebook", "application/vnd.apple.installer+xml", "application/vnd.mozilla.xul+xml", "application/vnd.ms-excel", "application/vnd.ms-fontobject", "application/vnd.ms-powerpoint", "application/vnd.oasis.opendocument.presentation", "application/vnd.oasis.opendocument.spreadsheet", "application/vnd.oasis.opendocument.text", "application/vnd.openxmlformats-officedocument.presentationml.presentation", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "application/vnd.visio", "application/x-7z-compressed", "application/x-abiword", "application/x-bzip", "application/x-bzip2", "application/x-csh", "application/x-freearc", "application/xhtml+xml", "application/xml", "application/x-rar-compressed", "application/x-sh", "application/x-shockwave-flash", "application/x-tar", "application/zip", "appliction/php", "audio/aac", "audio/midi audio/x-midi", "audio/mpeg", "audio/ogg", "audio/wav", "audio/webm", "font/otf", "font/ttf", "font/woff", "font/woff2", "image/bmp", "image/gif", "image/jpeg", "image/png", "image/svg+xml", "image/tiff", "image/vnd.microsoft.icon", "image/webp", "text/calendar", "text/css", "text/csv", "text/html", "text/javascript", "text/javascript", "text/plain", "text/xml", "video/3gpp", "video/3gpp2", "video/mp2t", "video/mpeg", "video/ogg", "video/webm", "video/x-msvideo" ] METHOD_CSS_SELECTOR = "css selector" NO_LOG = "-" DELETE_TARGET_FILES = ["mimeTypes.rdf", "handlers.json"] USER_DEFINED = 2 GECKO_LOG = Path(__file__).parent.resolve() def __init__(self, binary: str, profile: str): self.__has_session = False self.__auto_download = False self.__download_dir = "" if not Path(binary).is_file(): print(f"Binary {binary} Not Found") return if not Path(profile).is_dir(): print(f"Profile {profile} Not Found") return # geckodriver の log ファイル出力を抑止する self.marionette = Marionette( bin=binary, gecko_log=self.NO_LOG, profile=profile) # start_session 前にファイルを消しておかないと # 後で自動ダウンロードできない self.__delete_download_profile() # start_session しないと quit もできない self.marionette.start_session() self.__has_session = True def __enter__(self): return self def __exit__(self, ex_type, ex_value, trace): if self.has_session: self.quit() @property def has_session(self): return self.__has_session @property def auto_download(self): return self.__auto_download def __delete_download_profile(self): # mimeTypes.rdf と handlers.json に # ファイル読み込み時の動作設定が保存されている(text/plain はファイルを保存、など) # 自動ダウンロードするため既存の設定は削除する for name in self.DELETE_TARGET_FILES: p = Path(self.marionette.profile_path).joinpath(name) if p.is_file(): p.unlink() def __activate_auto_download(self): # 一度有効にすると同セッション内では無効にできない self.marionette.set_pref("browser.download.useDownloadDir", True) self.marionette.set_pref("browser.helperApps.neverAsk.saveToDisk", ",".join(self.MIME_TYPES)) self.marionette.set_pref( "browser.download.folderList", self.USER_DEFINED) self.marionette.set_pref("browser.download.lastDir", None) self.__auto_download = True @property def download_dir(self): if self.__auto_download == False: raise Exception("auto download not activated") return self.__download_dir @download_dir.setter def download_dir(self, dir: str): p = Path(dir) if not p.is_dir(): print(f"Download Dir {dir} Not Found") return full_path = str(p.resolve()) if self.__auto_download == False: self.__activate_auto_download() # self.__auto_download = True self.marionette.set_pref("browser.download.dir", full_path) self.marionette.set_pref("browser.download.downloadDir", full_path) self.__download_dir = full_path def set_download(self, dir: str): self.download_dir = dir def query_selector(self, selectors: str) -> HTMLElement: return self.marionette.find_element(self.METHOD_CSS_SELECTOR, selectors) def query_selectors(self, selectors: str) -> List[HTMLElement]: return self.marionette.find_elements(self.METHOD_CSS_SELECTOR, selectors) def wait(self, seconds: int): actions = Actions(self.marionette) actions.wait(seconds).perform() def quit(self): profile = Path(self.marionette.profile_path) self.marionette.quit(clean=True) # self.__forced_rmdir(profile) # Path(self.GECKO_LOG).unlink() self.__has_session = False def exec(self, script: str) -> Optional[str]: # script 内での記述簡略化のため mrnt = self.marionette set_download = self.set_download wait = self.wait quit = self.quit query_selector = self.query_selector query_selectors = self.query_selectors try: exec(script) return None except Exception as err: return str(err.args[0]) @classmethod def __forced_rmdir(self, p: Path): if p.is_dir(): for f in p.iterdir(): if f.is_file(): f.unlink() elif f.is_dir(): self.__forced_rmdir(f) p.rmdir()
class FirefoxMarionetteBase(object): """ Wrap Marionette/Firefox into convenient interface. - https://marionette-client.readthedocs.io/ - https://marionette-client.readthedocs.io/en/master/reference.html - https://marionette-client.readthedocs.io/en/master/interactive.html """ def __init__(self): logger.info('Starting Marionette Gecko wrapper') # Configuration self.firefox_bin = self.find_firefox() self.firefox_host = 'localhost' self.firefox_port = 2828 # TODO: Make configurable self.firefox_verbosity = 1 #self.firefox_verbosity = 2 # Timeout configuration self.startup_timeout = 20.0 self.socket_timeout = 32.0 self.page_timeout = 30.0 self.script_timeout = 20.0 self.shutdown_timeout = 10.0 # Instance state defaults self.marionette = None self.firefox_run_headless = True self.firefox_do_shutdown = False self.firefox_already_started = False def enable_headless(self, run_headless=True): self.firefox_run_headless = run_headless def enable_shutdown(self, do_shutdown=True): self.firefox_do_shutdown = do_shutdown def boot_firefox(self, headless=True): # Indicate whether to run in headless mode self.enable_headless(headless) # Optionally shut down Marionette/Firefox after performing work # This will just be called if Python exits normally atexit.register(self.shutdown) # Check whether Firefox is already running logger.info( 'Check for running instance of Marionette/Firefox at {}:{}'.format( self.firefox_host, self.firefox_port)) if check_socket(self.firefox_host, self.firefox_port): logger.info('Will reuse running Marionette/Firefox') self.firefox_bin = None self.firefox_already_started = True else: logger.info('Will launch new Marionette/Firefox instance') # Connect to / start Marionette Gecko engine self.marionette = Marionette(host=self.firefox_host, port=self.firefox_port, bin=self.firefox_bin, socket_timeout=self.socket_timeout, startup_timeout=self.startup_timeout, headless=self.firefox_run_headless, verbose=self.firefox_verbosity) self.marionette.DEFAULT_SHUTDOWN_TIMEOUT = self.shutdown_timeout # Start a session with Marionette Gecko engine self.marionette.start_session() # Configure Marionette self.configure_marionette() def configure_marionette(self): # This specifies the time to wait for the page loading to complete. self.marionette.timeout.page_load = self.page_timeout # This specifies the time to wait for injected scripts to finish # before interrupting them. self.marionette.timeout.script = self.script_timeout # Configure a HTTP proxy server self.marionette.set_pref('network.proxy.type', 0, default_branch=True) @classmethod def find_firefox(cls): candidates = where.where('firefox') candidates += [ '/Applications/Firefox.app/Contents/MacOS/firefox-bin', ] firefox = find_program_candidate(candidates) logger.info('Found "firefox" program at {}'.format(firefox)) return firefox def get_status(self): attributes = ['session', 'session_id'] data = OrderedDict() for attribute in attributes: data[attribute] = getattr(self.marionette, attribute) return data def log_status(self): logger.info('Marionette report: {}'.format( json.dumps(self.get_status(), indent=4))) def has_active_session(self): is_initialized = self.marionette is not None and self.marionette.session_id is not None return is_initialized def ensure_session(self): #self.log_status() if not self.has_active_session(): self.boot_firefox() logger.info( 'No session with Marionette, started new session {}'.format( self.marionette.session_id)) def shutdown(self): if self.firefox_do_shutdown: logger.info('Aiming at shutdown') if self.firefox_already_started: logger.warning( 'Can not shutdown Firefox as it was already running before starting this program' ) return False logger.info('Shutting down Marionette/Firefox') if self.marionette is not None: self.marionette.quit() return True def find_tag(self, tagname): try: element = self.marionette.find_element("tag name", tagname) return element except NoSuchElementException: pass def wait_for_element_tag(self, tagname): """ Wait for element to appear. """ waiter = Wait(self.marionette, timeout=20.0, interval=0.1) element = waiter.until(lambda m: self.find_tag(tagname)) return element def render_image(self, element=None): """ Return screenshot from element. """ image = self.marionette.screenshot(element=element, format='binary') return image def set_window_size(self, width, height): self.marionette.set_window_rect(width=width, height=height) def get_window_rect(self): return self.marionette.window_rect