class Puppet:
    def __init__(self, binary: str, profile: str):
        self.__has_marionette = False
        self.__auto_download = False
        self.__download_dir = ""

        if not Path(binary).is_file():
            return

        if not Path(profile).is_dir():
            return

        # geckodriver の log ファイル出力を抑止する
        NO_LOG = "-"
        self.marionette = Marionette(bin=binary,
                                     gecko_log=NO_LOG,
                                     profile=profile)
        # start_session しないと quit もできない
        self.marionette.start_session()
        self.__has_marionette = True

    @property
    def has_marionette(self):
        return self.__has_marionette

    @property
    def auto_download(self):
        return self.__auto_download

    def __activate_auto_download(self):
        # 一度有効にすると同セッション内では無効にできない

        # firefox52 では MIME_TYPES.rdf, firefox60 では handlers.json に
        # ファイルダウンロード時の動作設定が記述されている(text/plain はプログラムで開く、など)
        # 自動ダウンロードするため既存の設定は削除する
        MIME_TYPES_HANDLERS = ["MIME_TYPES.rdf", "handlers.json"]
        for name in MIME_TYPES_HANDLERS:
            p = Path(self.marionette.profile_path).joinpath(name)
            if p.is_file():
                p.unlink()

        self.marionette.set_pref("browser.download.useDownloadDir", True)
        self.marionette.set_pref("browser.helperApps.neverAsk.saveToDisk",
                                 ",".join(MIME_TYPES))
        USER_DEFINED = 2
        self.marionette.set_pref("browser.download.folderList", USER_DEFINED)

    @property
    def download_dir(self):
        if self.__auto_download == False:
            raise Exception("auto download has not been activated")
        return self.__download_dir

    @download_dir.setter
    def download_dir(self, dir: str):
        p = Path(dir)
        if not p.is_dir():
            return

        full_path = str(p.resolve())
        if self.__auto_download == False:
            self.__activate_auto_download()
            self.__auto_download = True

        self.marionette.set_pref("browser.download.dir", full_path)
        self.__download_dir = full_path

    def set_download(self, dir: str):
        self.download_dir = dir

    def query_selector(self, selectors: str) -> HTMLElement:
        METHOD_CSS_SELECTOR = "css selector"
        return self.marionette.find_element(METHOD_CSS_SELECTOR, selectors)

    def query_selectors(self, selectors: str) -> List[HTMLElement]:
        METHOD_CSS_SELECTOR = "css selector"
        return self.marionette.find_elements(METHOD_CSS_SELECTOR, selectors)

    def wait(self, seconds: int):
        actions = Actions(self.marionette)
        actions.wait(seconds).perform()

    def quit(self):
        self.marionette.quit()

    def exec(self, script: str):

        # script 内での記述簡略化のため
        mrnt = self.marionette
        set_download = self.set_download
        wait = self.wait
        quit = self.quit
        query_selector = self.query_selector
        query_selectors = self.query_selectors

        exec(script)
Exemple #2
0
class Puppet:
    MIME_TYPES = [
        "application/epub+zip",
        "application/gzip",
        "application/java-archive",
        "application/json",
        "application/ld+json",
        "application/msword",
        "application/octet-stream",
        "application/ogg",
        "application/pdf",
        "application/rtf",
        "application/vnd.amazon.ebook",
        "application/vnd.apple.installer+xml",
        "application/vnd.mozilla.xul+xml",
        "application/vnd.ms-excel",
        "application/vnd.ms-fontobject",
        "application/vnd.ms-powerpoint",
        "application/vnd.oasis.opendocument.presentation",
        "application/vnd.oasis.opendocument.spreadsheet",
        "application/vnd.oasis.opendocument.text",
        "application/vnd.openxmlformats-officedocument.presentationml.presentation",
        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
        "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
        "application/vnd.visio",
        "application/x-7z-compressed",
        "application/x-abiword",
        "application/x-bzip",
        "application/x-bzip2",
        "application/x-csh",
        "application/x-freearc",
        "application/xhtml+xml",
        "application/xml",
        "application/x-rar-compressed",
        "application/x-sh",
        "application/x-shockwave-flash",
        "application/x-tar",
        "application/zip",
        "appliction/php",
        "audio/aac",
        "audio/midi audio/x-midi",
        "audio/mpeg",
        "audio/ogg",
        "audio/wav",
        "audio/webm",
        "font/otf",
        "font/ttf",
        "font/woff",
        "font/woff2",
        "image/bmp",
        "image/gif",
        "image/jpeg",
        "image/png",
        "image/svg+xml",
        "image/tiff",
        "image/vnd.microsoft.icon",
        "image/webp",
        "text/calendar",
        "text/css",
        "text/csv",
        "text/html",
        "text/javascript",
        "text/javascript",
        "text/plain",
        "text/xml",
        "video/3gpp",
        "video/3gpp2",
        "video/mp2t",
        "video/mpeg",
        "video/ogg",
        "video/webm",
        "video/x-msvideo"
    ]
    METHOD_CSS_SELECTOR = "css selector"
    NO_LOG = "-"
    DELETE_TARGET_FILES = ["mimeTypes.rdf", "handlers.json"]
    USER_DEFINED = 2
    GECKO_LOG = Path(__file__).parent.resolve()

    def __init__(self, binary: str, profile: str):
        self.__has_session = False
        self.__auto_download = False
        self.__download_dir = ""

        if not Path(binary).is_file():
            print(f"Binary {binary} Not Found")
            return

        if not Path(profile).is_dir():
            print(f"Profile {profile} Not Found")
            return

        # geckodriver の log ファイル出力を抑止する
        self.marionette = Marionette(
            bin=binary, gecko_log=self.NO_LOG,  profile=profile)

        # start_session 前にファイルを消しておかないと
        # 後で自動ダウンロードできない
        self.__delete_download_profile()

        # start_session しないと quit もできない
        self.marionette.start_session()
        self.__has_session = True

    def __enter__(self):
        return self

    def __exit__(self, ex_type, ex_value, trace):
        if self.has_session:
            self.quit()

    @property
    def has_session(self):
        return self.__has_session

    @property
    def auto_download(self):
        return self.__auto_download

    def __delete_download_profile(self):
        # mimeTypes.rdf と handlers.json に
        # ファイル読み込み時の動作設定が保存されている(text/plain はファイルを保存、など)
        # 自動ダウンロードするため既存の設定は削除する
        for name in self.DELETE_TARGET_FILES:
            p = Path(self.marionette.profile_path).joinpath(name)
            if p.is_file():
                p.unlink()

    def __activate_auto_download(self):
        # 一度有効にすると同セッション内では無効にできない
        self.marionette.set_pref("browser.download.useDownloadDir", True)
        self.marionette.set_pref("browser.helperApps.neverAsk.saveToDisk",
                                 ",".join(self.MIME_TYPES))
        self.marionette.set_pref(
            "browser.download.folderList", self.USER_DEFINED)
        self.marionette.set_pref("browser.download.lastDir", None)
        self.__auto_download = True

    @property
    def download_dir(self):
        if self.__auto_download == False:
            raise Exception("auto download not activated")
        return self.__download_dir

    @download_dir.setter
    def download_dir(self, dir: str):
        p = Path(dir)
        if not p.is_dir():
            print(f"Download Dir {dir} Not Found")
            return

        full_path = str(p.resolve())
        if self.__auto_download == False:
            self.__activate_auto_download()
            # self.__auto_download = True

        self.marionette.set_pref("browser.download.dir", full_path)
        self.marionette.set_pref("browser.download.downloadDir", full_path)
        self.__download_dir = full_path

    def set_download(self, dir: str):
        self.download_dir = dir

    def query_selector(self, selectors: str) -> HTMLElement:
        return self.marionette.find_element(self.METHOD_CSS_SELECTOR, selectors)

    def query_selectors(self, selectors: str) -> List[HTMLElement]:
        return self.marionette.find_elements(self.METHOD_CSS_SELECTOR, selectors)

    def wait(self, seconds: int):
        actions = Actions(self.marionette)
        actions.wait(seconds).perform()

    def quit(self):
        profile = Path(self.marionette.profile_path)
        self.marionette.quit(clean=True)
        # self.__forced_rmdir(profile)
        # Path(self.GECKO_LOG).unlink()
        self.__has_session = False

    def exec(self, script: str) -> Optional[str]:
        # script 内での記述簡略化のため
        mrnt = self.marionette
        set_download = self.set_download
        wait = self.wait
        quit = self.quit
        query_selector = self.query_selector
        query_selectors = self.query_selectors

        try:
            exec(script)
            return None
        except Exception as err:
            return str(err.args[0])

    @classmethod
    def __forced_rmdir(self, p: Path):
        if p.is_dir():
            for f in p.iterdir():
                if f.is_file():
                    f.unlink()
                elif f.is_dir():
                    self.__forced_rmdir(f)
            p.rmdir()
Exemple #3
0
class FirefoxMarionetteBase(object):
    """
    Wrap Marionette/Firefox into convenient interface.

    - https://marionette-client.readthedocs.io/
    - https://marionette-client.readthedocs.io/en/master/reference.html
    - https://marionette-client.readthedocs.io/en/master/interactive.html
    """
    def __init__(self):
        logger.info('Starting Marionette Gecko wrapper')

        # Configuration
        self.firefox_bin = self.find_firefox()
        self.firefox_host = 'localhost'
        self.firefox_port = 2828
        # TODO: Make configurable
        self.firefox_verbosity = 1
        #self.firefox_verbosity = 2

        # Timeout configuration
        self.startup_timeout = 20.0
        self.socket_timeout = 32.0
        self.page_timeout = 30.0
        self.script_timeout = 20.0
        self.shutdown_timeout = 10.0

        # Instance state defaults
        self.marionette = None
        self.firefox_run_headless = True
        self.firefox_do_shutdown = False
        self.firefox_already_started = False

    def enable_headless(self, run_headless=True):
        self.firefox_run_headless = run_headless

    def enable_shutdown(self, do_shutdown=True):
        self.firefox_do_shutdown = do_shutdown

    def boot_firefox(self, headless=True):

        # Indicate whether to run in headless mode
        self.enable_headless(headless)

        # Optionally shut down Marionette/Firefox after performing work
        # This will just be called if Python exits normally
        atexit.register(self.shutdown)

        # Check whether Firefox is already running
        logger.info(
            'Check for running instance of Marionette/Firefox at {}:{}'.format(
                self.firefox_host, self.firefox_port))

        if check_socket(self.firefox_host, self.firefox_port):
            logger.info('Will reuse running Marionette/Firefox')
            self.firefox_bin = None
            self.firefox_already_started = True
        else:
            logger.info('Will launch new Marionette/Firefox instance')

        # Connect to / start Marionette Gecko engine
        self.marionette = Marionette(host=self.firefox_host,
                                     port=self.firefox_port,
                                     bin=self.firefox_bin,
                                     socket_timeout=self.socket_timeout,
                                     startup_timeout=self.startup_timeout,
                                     headless=self.firefox_run_headless,
                                     verbose=self.firefox_verbosity)

        self.marionette.DEFAULT_SHUTDOWN_TIMEOUT = self.shutdown_timeout

        # Start a session with Marionette Gecko engine
        self.marionette.start_session()

        # Configure Marionette
        self.configure_marionette()

    def configure_marionette(self):

        # This specifies the time to wait for the page loading to complete.
        self.marionette.timeout.page_load = self.page_timeout

        # This specifies the time to wait for injected scripts to finish
        # before interrupting them.
        self.marionette.timeout.script = self.script_timeout

        # Configure a HTTP proxy server
        self.marionette.set_pref('network.proxy.type', 0, default_branch=True)

    @classmethod
    def find_firefox(cls):
        candidates = where.where('firefox')
        candidates += [
            '/Applications/Firefox.app/Contents/MacOS/firefox-bin',
        ]
        firefox = find_program_candidate(candidates)
        logger.info('Found "firefox" program at {}'.format(firefox))
        return firefox

    def get_status(self):
        attributes = ['session', 'session_id']
        data = OrderedDict()
        for attribute in attributes:
            data[attribute] = getattr(self.marionette, attribute)
        return data

    def log_status(self):
        logger.info('Marionette report: {}'.format(
            json.dumps(self.get_status(), indent=4)))

    def has_active_session(self):
        is_initialized = self.marionette is not None and self.marionette.session_id is not None
        return is_initialized

    def ensure_session(self):
        #self.log_status()
        if not self.has_active_session():
            self.boot_firefox()
            logger.info(
                'No session with Marionette, started new session {}'.format(
                    self.marionette.session_id))

    def shutdown(self):
        if self.firefox_do_shutdown:

            logger.info('Aiming at shutdown')

            if self.firefox_already_started:
                logger.warning(
                    'Can not shutdown Firefox as it was already running before starting this program'
                )
                return False

            logger.info('Shutting down Marionette/Firefox')
            if self.marionette is not None:
                self.marionette.quit()
                return True

    def find_tag(self, tagname):
        try:
            element = self.marionette.find_element("tag name", tagname)
            return element
        except NoSuchElementException:
            pass

    def wait_for_element_tag(self, tagname):
        """
        Wait for element to appear.
        """
        waiter = Wait(self.marionette, timeout=20.0, interval=0.1)
        element = waiter.until(lambda m: self.find_tag(tagname))
        return element

    def render_image(self, element=None):
        """
        Return screenshot from element.
        """
        image = self.marionette.screenshot(element=element, format='binary')
        return image

    def set_window_size(self, width, height):
        self.marionette.set_window_rect(width=width, height=height)

    def get_window_rect(self):
        return self.marionette.window_rect