예제 #1
0
def initialize_browser(for_scenario_2=False):
    browser = None

    if for_scenario_2:
        # Test Scenario 2 requires users to download things from their browser.
        # Define a custom profile for Firefox, to automatically download files that a page asks user to download, without asking. This is because Selenium can't control downloads.
        profile = webdriver.FirefoxProfile()
        profile.set_preference('browser.download.folderList', 2) # Can be set to either 0, 1, or 2. When set to 0, Firefox will save all files downloaded via the browser on the user's desktop. When set to 1, these downloads are stored in the Downloads folder. When set to 2, the location specified for the most recent download is utilized again.
        profile.set_preference('browser.download.manager.showWhenStarting', False)
        profile.set_preference('browser.download.dir', settings.BROWSER_DOWNLOAD_FOLDER)
        mime_types_that_should_be_downloaded = ['text/plain', 'application/json']
        profile.set_preference('browser.helperApps.neverAsk.saveToDisk', ';'.join(mime_types_that_should_be_downloaded))

    if settings.USE_HEADLESS_BROWSER:
        from selenium.webdriver.firefox.options import Options
        options = Options()
        options.add_argument("--headless")
        options.log.level = "trace"
        if for_scenario_2:
            browser = webdriver.Firefox(profile, options=options)
        else:
            browser = webdriver.Firefox(options=options)
    else:
        if for_scenario_2:
            browser = webdriver.Firefox(profile)
        else:
            browser = webdriver.Firefox()
        # browser.maximize_window() # make the browser window use all available screen space. FIXME: When enabled, some clicks are not triggered anymore
    browser.implicitly_wait(settings.WAIT_TIME_BETWEEN_EACH_STEP) # In seconds
    return browser
예제 #2
0
    def get_driver(self,force=False):
        """ セレニアムドライバ初期化 """
        if force :
            # 強制生成なら先にクローズしとく
            self.close()

        if not self.driver :
            # ヘッドレスFF初期化
            # UA偽造
            profile = webdriver.FirefoxProfile()
            profile.set_preference("general.useragent.override", self.user_agent)
            # ヘッドレス定義
            options = Options()
            options.add_argument("--headless")
            # 起動
            self.driver = webdriver.Firefox(profile, firefox_options=options)

            # PhantomJS初期化
            # ユーザーエージェント偽装
            ua = dict(DesiredCapabilities.PHANTOMJS)
            ua['phantomjs.page.settings.userAgent'] = (self.user_agent)
            # 初期化
            #self.driver = webdriver.PhantomJS(desired_capabilities=ua) # Httpヘッダ設定

            # ウェイト設定
            self.driver.implicitly_wait(15) # seconds

        return self.driver
예제 #3
0
    def _setup_firefox(self, capabilities):
        """Setup Firefox webdriver

        :param capabilities: capabilities object
        :returns: a new local Firefox driver
        """
        if capabilities.get("marionette"):
            gecko_driver = self.config.get('Driver', 'gecko_driver_path')
            self.logger.debug("Gecko driver path given in properties: %s", gecko_driver)
        else:
            gecko_driver = None

        # Get Firefox binary
        firefox_binary = self.config.get_optional('Firefox', 'binary')

        firefox_options = Options()

        if self.config.getboolean_optional('Driver', 'headless'):
            self.logger.debug("Running Firefox in headless mode")
            firefox_options.add_argument('-headless')

        self._add_firefox_arguments(firefox_options)

        if firefox_binary:
            firefox_options.binary = firefox_binary

        log_path = os.path.join(DriverWrappersPool.output_directory, 'geckodriver.log')
        try:
            # Selenium 3
            return webdriver.Firefox(firefox_profile=self._create_firefox_profile(), capabilities=capabilities,
                                     executable_path=gecko_driver, firefox_options=firefox_options, log_path=log_path)
        except TypeError:
            # Selenium 2
            return webdriver.Firefox(firefox_profile=self._create_firefox_profile(), capabilities=capabilities,
                                     executable_path=gecko_driver, firefox_options=firefox_options)
예제 #4
0
파일: local.py 프로젝트: salsita/shishito
    def start_driver(self, browser_type, capabilities, config_section=None):
        """ Prepare selenium webdriver.

        :param browser_type: type of browser for which prepare driver
        :param capabilities: capabilities used for webdriver initialization
        """

        # get browser profile
        browser_profile = self.get_browser_profile(browser_type, capabilities, config_section)

        # starts local browser
        if browser_type == "firefox":
            from selenium.webdriver.firefox.options import Options
            firefox_options = Options()
            for arg in self.get_browser_arguments(config_section):
                firefox_options.add_argument(arg)
            driver = webdriver.Firefox(browser_profile, desired_capabilities=capabilities,
                                       firefox_options=firefox_options)
        elif browser_type == "chrome":
            driver = webdriver.Chrome(desired_capabilities=capabilities, chrome_options=browser_profile)
        elif browser_type == "ie":
            driver = webdriver.Ie(capabilities=capabilities)
        elif browser_type == "phantomjs":
            driver = webdriver.PhantomJS(desired_capabilities=capabilities)
        elif browser_type == "opera":
            driver = webdriver.Opera(desired_capabilities=capabilities)
            # SafariDriver bindings for Python not yet implemented
            # elif browser == "Safari":
            # self.driver = webdriver.SafariDriver()
        else:
            raise ValueError('Unknown type of browser.')

        return driver
예제 #5
0
    def test_to_capabilities(self):
        opts = Options()
        assert opts.to_capabilities() == {}

        profile = FirefoxProfile()
        opts.profile = profile
        caps = opts.to_capabilities()
        assert "moz:firefoxOptions" in caps
        assert "profile" in caps["moz:firefoxOptions"]
        assert isinstance(caps["moz:firefoxOptions"]["profile"], basestring)
        assert caps["moz:firefoxOptions"]["profile"] == profile.encoded

        opts.add_argument("--foo")
        caps = opts.to_capabilities()
        assert "moz:firefoxOptions" in caps
        assert "args" in caps["moz:firefoxOptions"]
        assert caps["moz:firefoxOptions"]["args"] == ["--foo"]

        binary = FirefoxBinary()
        opts.binary = binary
        caps = opts.to_capabilities()
        assert "moz:firefoxOptions" in caps
        assert "binary" in caps["moz:firefoxOptions"]
        assert isinstance(caps["moz:firefoxOptions"]["binary"], basestring)
        assert caps["moz:firefoxOptions"]["binary"] == binary._start_cmd

        opts.set_preference("spam", "ham")
        caps = opts.to_capabilities()
        assert "moz:firefoxOptions" in caps
        assert "prefs" in caps["moz:firefoxOptions"]
        assert isinstance(caps["moz:firefoxOptions"]["prefs"], dict)
        assert caps["moz:firefoxOptions"]["prefs"]["spam"] == "ham"
예제 #6
0
def getCDMStatusPage(tid_crm):

	from selenium import webdriver
	from selenium.webdriver.firefox.options import Options
	options = Options()
	options.add_argument("--headless")
	browser = webdriver.Firefox(firefox_options=options)

	# now Firefox will run headless
	# you will not see the browser.

	link = 'http://172.18.65.42/monitorcdm/'
	browser.get(link)

	browser.find_elements_by_css_selector("input[type='radio'][value='GUEST']")[0].click()
	browser.find_element_by_class_name('tbutton').click()
	browser.get(link)
	browser.get('http://172.18.65.42/monitorcdm/?_module_=search_tid')
	form_textfield = browser.find_element_by_name('_termid_')
	form_textfield.send_keys(tid_crm)
	browser.find_element_by_class_name('tbutton').click()

	html = browser.page_source
	browser.quit()

	return html
예제 #7
0
파일: srv_driver.py 프로젝트: t00m/basico
 def open(self):
     '''
     In order to have selenium working with Firefox and be able to
     get SAP Notes from launchpad.support.sap.com you must:
     1. Use a browser certificate (SAP Passport) in order to avoid
        renewed logons.
        You can apply for it at:
        https://support.sap.com/support-programs-services/about/getting-started/passport.html
     2. Get certificate and import it into Firefox.
        Open menu -> Preferences -> Advanced -> View Certificates
        -> Your Certificates -> Import
     3. Trust this certificate (auto select)
     4. Check it. Visit some SAP Note url in Launchpad.
        No credentials will be asked.
        Launchpad must load target page successfully.
     '''
     driver = None
     utils = self.get_service('Utils')
     options = Options()
     options.add_argument('--headless')
     FIREFOX_PROFILE_DIR = utils.get_firefox_profile_dir()
     FIREFOX_PROFILE = webdriver.FirefoxProfile(FIREFOX_PROFILE_DIR)
     try:
         driver = webdriver.Firefox(firefox_profile=FIREFOX_PROFILE, firefox_options=options)
     except Exception as error:
         self.log.error(error)
         # Geckodriver not found
         # Download it from:
         # https://github.com/mozilla/geckodriver/releases/latest
     self.log.debug("Webdriver initialited")
     return driver
def load_driver():
    """
    Loads the firefox driver in headless mode.
    """
    options = Options()
    options.add_argument("--headless")
    driver = webdriver.Firefox(firefox_options=options)
    return driver
예제 #9
0
 def setUp(self):
     superuser = User.objects.create_superuser(self.username, '*****@*****.**', self.password)
     self.existing = TestModel.objects.get(pk=1)
     # Instantiating the WebDriver will load your browser
     options = Options()
     if settings.HEADLESS_TESTING:
         options.add_argument("--headless")
     self.webdriver = CustomWebDriver(firefox_options=options, )
예제 #10
0
 def setUp(self):
     if _CI:
         self.driver = self.sauce_chrome_webdriver()
     elif settings.SELENIUM is True:
         options = FirefoxOptions()
         options.add_argument('-headless')
         self.driver = Firefox(firefox_options=options)
     self.driver.implicitly_wait(10)
예제 #11
0
 def setUp(self):
     # Firefox
     options_firefox = OptionsFF()
     options_firefox.add_argument('-headless')
     self.firefox_driver = webdriver.Firefox(firefox_options=options_firefox)
     # Chrome
     options_chrome = OptionsChrom()
     options_chrome.add_argument('-headless')
     self.chrome_driver = webdriver.Chrome(chrome_options=options_chrome)
예제 #12
0
    def test_arguments(self):
        opts = Options()
        assert len(opts.arguments) == 0

        opts.add_argument("--foo")
        assert len(opts.arguments) == 1
        opts.arguments.append("--bar")
        assert len(opts.arguments) == 2
        assert opts.arguments == ["--foo", "--bar"]
예제 #13
0
def test_rendering_utf8_iframe():
    iframe = elem.IFrame(html=u'<p>Cerrahpaşa Tıp Fakültesi</p>')

    options = Options()
    options.add_argument('-headless')
    driver = Firefox(options=options)

    driver.get('data:text/html,' + iframe.render())
    driver.switch_to.frame(0)
    assert u'Cerrahpaşa Tıp Fakültesi' in driver.page_source
예제 #14
0
 def setUp(self):
     options = Options()
     options.add_argument('-headless')
     self.browser = webdriver.Firefox(options=options)
     self.browser.get(redbot_uri)
     self.uri = self.browser.find_element_by_id("uri")
     self.uri.send_keys(self.test_uri)
     self.uri.submit()
     time.sleep(2.0)
     self.check_complete()
예제 #15
0
 def new_instance(self):
     """ initializes a new selenium web driver instance by using either PhantomJS or Mozilla
         and returns a reference to the browser object for further processing """
     options = Options()
     if self.headless:
         print_debug(self.debug, 'actiating headless mode')
         options.add_argument('-headless')
     driver = webdriver.Firefox(firefox_options=options)
     driver.set_window_size(1024, 768)
     driver.set_script_timeout(5)
     return driver
예제 #16
0
def before_all(context):
    print("context", context)
    # Determine the target path. Can either be file path or base URL.
    if 'TARGET' in os.environ:
        context.target = os.environ['TARGET']
    else:
        print("Please specify the Phenogrid file path or base URL with 'TARGET=' format")
        sys.exit(1)

	# Check to see which browser to use, default to use Firefox
    if 'BROWSER' in os.environ and os.environ['BROWSER'] == 'phantomjs':
        context.browser = webdriver.PhantomJS()
        print("# Using PhantomJS")
    else:
        options = Options()
        options.add_argument('-headless')
        context.browser = Firefox(firefox_options=options)

        # print("# Using Firefox")
        # d = DesiredCapabilities.FIREFOX
        # d['marionette'] = True
        # # d['binary'] = '/Applications/Firefox.app/Contents/MacOS/firefox-bin'
        # d['loggingPrefs'] = {'browser': 'ALL', 'client': 'ALL', 'driver': 'ALL', 'performance': 'ALL', 'server': 'ALL'}
        # fp = webdriver.FirefoxProfile()
        # fp.set_preference('devtools.jsonview.enabled', False)
        # fp.set_preference('javascript.options.showInConsole', True)
        # fp.set_preference('browser.dom.window.dump.enabled', True)
        # fp.set_preference('devtools.chrome.enabled', True)
        # fp.set_preference("devtools.webconsole.persistlog", True)

        # fp.set_preference("devtools.browserconsole.filter.jslog", True)
        # fp.set_preference("devtools.browserconsole.filter.jswarn", True)
        # fp.set_preference("devtools.browserconsole.filter.error", True)
        # fp.set_preference("devtools.browserconsole.filter.warn", True)
        # fp.set_preference("devtools.browserconsole.filter.info", True)
        # fp.set_preference("devtools.browserconsole.filter.log", True)

        # fp.set_preference("devtools.webconsole.filter.jslog", True)
        # fp.set_preference("devtools.webconsole.filter.jswarn", True)
        # fp.set_preference("devtools.webconsole.filter.error", True)
        # fp.set_preference("devtools.webconsole.filter.warn", True)
        # fp.set_preference("devtools.webconsole.filter.info", True)
        # fp.set_preference("devtools.webconsole.filter.log", True)

        # context.browser = webdriver.Firefox(capabilities=d, firefox_profile=fp, executable_path='/usr/local/bin/geckodriver')
        # context.browser._is_remote = False

    # Set a 30 second implicit wait - http://selenium-python.readthedocs.org/en/latest/waits.html#implicit-waits
    # Once set, the implicit wait is set for the life of the WebDriver object instance.
    context.browser.set_window_size(1440, 900)
    context.browser.implicitly_wait(30) # seconds
  def reset_browser(self):

    if globals.browser == None:
      options = Options()
      options.add_argument(self.__get_arg())
      globals.browser = Firefox(AbstractBrowserBasedTest._firefox_profile,
                                firefox_options = options,
                                log_path=naming.GECKODRIVER_LOG_FILE_PATH)
      globals.browser.set_page_load_timeout(self.DEFAULT_TIMEOUT)
    else:
      globals.browser.close()
      globals.browser.start_session(capabilities = AbstractBrowserBasedTest._firefox_capabilities,\
                                    browser_profile = AbstractBrowserBasedTest._firefox_profile)
      globals.browser.delete_all_cookies() # Belt and Braces.
예제 #18
0
 def setUp(self):
     self.single_empty = TestModelSingle()
     self.single_empty.save()
     self.single = TestModelSingle(**{'selection': 'octopus', })
     self.single.save()
     self.advanced_empty = TestModelAdvanced()
     self.advanced_empty.save()
     self.advanced = TestModelAdvanced(**{'set': 'set1', })
     self.advanced.save()
     self.superuser = create_superuser()
     # Instantiating the WebDriver will load your browser
     options = Options()
     if settings.HEADLESS_TESTING:
         options.add_argument("--headless")
     self.webdriver = CustomWebDriver(firefox_options=options, )
예제 #19
0
def setup_package():
    """Set up the Selenium driver once for all tests."""
    # Just skipping *setup_package* and *teardown_package* generates an
    # uncaught exception under Python 2.6.
    if tests_are_run:
        if not SHOW_BROWSER:
            # Perform all graphical operations in memory.
            vdisplay = SeleniumTestCase.vdisplay = Xvfb(width=1280, height=720)
            vdisplay.start()
        # Create a Selenium browser instance.
        options = Options()
        options.add_argument('-headless')
        selenium = SeleniumTestCase.selenium = Firefox(firefox_options=options)
        selenium.maximize_window()
        SeleniumTestCase.wait = ui.WebDriverWait(selenium, 10)
        SeleniumTestCase.selenium.implicitly_wait(3)
예제 #20
0
def create_browser(request, driver_wait_time, tries=0):
    """This sometimes fails to start firefox on CI, so we retry..."""
    max_tries = 5
    options = Options()
    options.add_argument('-headless')
    try:
        driver = webdriver.Firefox(firefox_options=options)
        driver.implicitly_wait(driver_wait_time)
        driver.set_window_size(1200, 1200)

        request.node._driver = driver
        return driver
    except Exception as e:
        if tries < max_tries:
            return create_browser(request, driver_wait_time, tries=tries + 1)
        else:
            raise e
예제 #21
0
def browserEngine(response):
    options = Options()
    options.add_argument('--headless')
    browser = webdriver.Firefox(options=options)
    response = re.sub(r'<script.*?src=.*?>', '<script src=#>', response, re.I)
    response = re.sub(r'href=.*?>', 'href=#>', response, re.I)
    writer(response, 'test.html')
    browser.get('file://' + sys.path[0] + '/test.html')
    os.remove('test.html')
    popUp = False
    actions = webdriver.ActionChains(browser)
    try:
        actions.move_by_offset(2, 2)
        actions.perform()
        browser.close()
    except UnexpectedAlertPresentException:
        popUp = True
        browser.quit()
    return popUp
예제 #22
0
def firefox_options(request, firefox_path, firefox_profile):
    options = Options()

    if firefox_profile is not None:
        options.profile = firefox_profile

    if firefox_path is not None:
        options.binary = FirefoxBinary(firefox_path)

    args = request.node.get_marker('firefox_arguments')
    if args is not None:
        for arg in args.args:
            options.add_argument(arg)

    prefs = request.node.get_marker('firefox_preferences')
    if prefs is not None:
        for name, value in prefs.args[0].items():
            options.set_preference(name, value)

    return options
예제 #23
0
    def _capture(self):
        """Save snapshot image of webpage, and set captured datetime."""
        from selenium import webdriver
        from selenium.webdriver.firefox.options import Options
        options = Options()
        options.add_argument('--headless')
        browser = webdriver.Firefox(options=options)
        # browser.set_page_load_timeout(10)  # TODO
        capture_resolution = self._get_capture_resolution()
        browser.set_window_size(*capture_resolution)
        browser.get(self.url)
        viewport_height = browser.execute_script(
            'return document.body.scrollHeight;')
        browser.set_window_size(capture_resolution[0], viewport_height)  # TODO
        self.captured_at = timezone.now()
        png = browser.get_screenshot_as_png()

        browser.quit()
        self.image.save(self._generate_image_filename(), ContentFile(png))
        return True
예제 #24
0
    def _get_Firefox(self):

        try:
            bin_path = self.config.get('firefox_binary_path')
            binary = FirefoxBinary(bin_path)
            geckodriver_path = self.config.get('geckodriver_path')
            options = FirefoxOptions()
            profile = webdriver.FirefoxProfile()

            options.add_argument(
                'user-agent={}'.format(self.user_agent))

            if self.browser_mode == 'headless':
                options.set_headless(headless=True)
                #options.add_argument('window-size=1200x600') # optional

            if self.proxy:
                # this means that the proxy is user set, regardless of the type
                profile.set_preference("network.proxy.type", 1)
                if self.proxy.proto.lower().startswith('socks'):
                    profile.set_preference("network.proxy.socks", self.proxy.host)
                    profile.set_preference("network.proxy.socks_port", self.proxy.port)
                    profile.set_preference("network.proxy.socks_version", 5 if self.proxy.proto[-1] == '5' else 4)
                    profile.update_preferences()
                elif self.proxy.proto == 'http':
                    profile.set_preference("network.proxy.http", self.proxy.host)
                    profile.set_preference("network.proxy.http_port", self.proxy.port)
                else:
                    raise ValueError('Invalid protocol given in proxyfile.')

                profile.update_preferences()

            self.webdriver = webdriver.Firefox(firefox_binary=binary, firefox_options=options,
                     executable_path=geckodriver_path, firefox_profile=profile)
            return True

        except WebDriverException as e:
            # reaching here is bad, since we have no available webdriver instance.
            logger.error(e)

        return False
예제 #25
0
def main():
  parser = argparse.ArgumentParser(description='Process some integers.')
  parser.add_argument('--url', help='the entry point URL',
                      default='http://localhost:8080/')
  parser.add_argument('--browser', help='the browser to use: chrome|firefox',
                      default='chrome')
  args = parser.parse_args()
  print(args)

  if args.browser == 'chrome':
    driver = webdriver.Chrome()
  elif args.browser == 'firefox':
    options = Options()
    options.add_argument('-headless')
    driver = webdriver.Firefox(firefox_options=options)
  else:
    raise 'Must specify which browser to use'

  try:
    test_case = OmegaUpTest(driver, args.url)
    test_case.run()
  finally:
    driver.quit()
예제 #26
0
def set_selenium_local_session(
    proxy_address,
    proxy_port,
    proxy_username,
    proxy_password,
    headless_browser,
    browser_profile_path,
    disable_image_load,
    page_delay,
    geckodriver_path,
    logger,
):
    """Starts local session for a selenium server.
    Default case scenario."""

    browser = None
    err_msg = ""

    # set Firefox Agent to mobile agent
    user_agent = (
        "Mozilla/5.0 (iPhone; CPU iPhone OS 12_1 like Mac OS X) AppleWebKit/605.1.15 "
        "(KHTML, like Gecko) FxiOS/18.1 Mobile/16B92 Safari/605.1.15"
    )

    # keep user_agent
    Settings.user_agent = user_agent

    firefox_options = Firefox_Options()

    if headless_browser:
        firefox_options.add_argument("-headless")

    if browser_profile_path is not None:
        firefox_profile = webdriver.FirefoxProfile(browser_profile_path)
    else:
        firefox_profile = webdriver.FirefoxProfile()

    # set English language
    firefox_profile.set_preference("intl.accept_languages", "en-US")
    firefox_profile.set_preference("general.useragent.override", user_agent)

    if disable_image_load:
        # permissions.default.image = 2: Disable images load,
        # this setting can improve pageload & save bandwidth
        firefox_profile.set_preference("permissions.default.image", 2)

    if proxy_address and proxy_port:
        firefox_profile.set_preference("network.proxy.type", 1)
        firefox_profile.set_preference("network.proxy.http", proxy_address)
        firefox_profile.set_preference("network.proxy.http_port", proxy_port)
        firefox_profile.set_preference("network.proxy.ssl", proxy_address)
        firefox_profile.set_preference("network.proxy.ssl_port", proxy_port)

    # mute audio while watching stories
    firefox_profile.set_preference("media.volume_scale", "0.0")

    # prefer user path before downloaded one
    driver_path = geckodriver_path or get_geckodriver()
    browser = webdriver.Firefox(
        firefox_profile=firefox_profile,
        executable_path=driver_path,
        options=firefox_options,
    )

    # add extenions to hide selenium
    browser.install_addon(create_firefox_extension(), temporary=True)

    # converts to custom browser
    # browser = convert_selenium_browser(browser)

    # authenticate with popup alert window
    if proxy_username and proxy_password:
        proxy_authentication(browser, logger, proxy_username, proxy_password)

    browser.implicitly_wait(page_delay)

    # set mobile viewport (iPhone X)
    browser.set_window_size(375, 812)

    message = "Session started!"
    highlight_print("browser", message, "initialization", "info", logger)

    return browser, err_msg
예제 #27
0
def download_gisaid_EpiCoV(
        uname,  # username
        upass,  # password
        normal,  # normal mode (quite)
        wd,  # output dir
        loc,  # location
        host,  # host
        cs,  # collection start date
        ce,  # collection end date
        ss,  # submission start date
        se,  # submission end date
        cg,  # complete genome only
        hc,  # high coverage only
        le,  # low coverage excluding
        to,  # timeout in sec
        rt,  # num of retry
        iv,  # interval in sec
        meta_dl  # also download meta
):
    """Download sequences and metadata from EpiCoV GISAID"""

    # output directory
    if not os.path.exists(wd):
        os.makedirs(wd, exist_ok=True)

    wd = os.path.abspath(wd)
    # GISAID_FASTA = f'{wd}/sequences.fasta.bz2'
    # GISAID_TABLE = f'{wd}/gisaid_cov2020_acknowledgement_table.xls'
    GISAID_DTL_JASON = f'{wd}/gisaid_detail_metadata.json'
    # GISAID_TSV   = f'{wd}/metadata.tsv.bz2'
    metadata = []

    # MIME types
    mime_types = "application/octet-stream"
    mime_types += ",application/excel,application/vnd.ms-excel"
    mime_types += ",application/pdf,application/x-pdf"
    mime_types += ",application/x-bzip2"
    mime_types += ",application/x-gzip,application/gzip"

    # start fresh
    try:
        os.remove(GISAID_DTL_JASON)
    except OSError:
        pass

    print("Opening browser...")
    profile = webdriver.FirefoxProfile()
    profile.set_preference("browser.download.folderList", 2)
    profile.set_preference("browser.download.manager.showWhenStarting", False)
    profile.set_preference("browser.download.dir", wd)
    profile.set_preference("browser.helperApps.neverAsk.saveToDisk",
                           mime_types)
    profile.set_preference("plugin.disable_full_page_plugin_for_types",
                           mime_types)
    profile.set_preference("pdfjs.disabled", True)

    options = Options()
    if not normal:
        options.add_argument("--headless")
    driver = webdriver.Firefox(firefox_profile=profile, options=options)

    # driverwait
    driver.implicitly_wait(20)
    wait = WebDriverWait(driver, to)

    # open GISAID
    print("Opening website GISAID...")
    driver.get('https://platform.gisaid.org/epi3/frontend')
    waiting_sys_timer(wait)
    print(driver.title)
    assert 'GISAID' in driver.title

    # login
    print("Logining to GISAID...")
    username = driver.find_element_by_name('login')
    username.send_keys(uname)
    password = driver.find_element_by_name('password')
    password.send_keys(upass)
    driver.execute_script("return doLogin();")

    waiting_sys_timer(wait)

    # navigate to EpiFlu
    print("Navigating to EpiCoV...")
    epicov_tab = driver.find_element_by_xpath("//div[@id='main_nav']//li[3]/a")
    epicov_tab.click()

    waiting_sys_timer(wait)

    # when user doesn't enter time/location, download nextstrain sequences and metadata
    if not (cs or ce or ss or se or loc):
        # download from downloads section
        print("Clicking downloads...")
        pd_button = wait.until(
            EC.element_to_be_clickable(
                (By.XPATH, "//div[@class='sys-actionbar-bar']//div[3]")))
        pd_button.click()
        waiting_sys_timer(wait)

        # have to click the first row twice to start the iframe
        iframe = waiting_for_iframe(wait, driver, rt, iv)
        driver.switch_to.frame(iframe)
        waiting_sys_timer(wait)

        print("Downloading Nextstrain sequences...")
        dl_button = wait.until(
            EC.element_to_be_clickable(
                (By.XPATH, '//div[contains(text(), "nextfasta")]')))
        dl_button.click()
        waiting_sys_timer(wait)

        fn = wait_downloaded_filename(wait, driver, 3600)
        print(f"Downloaded to {fn}.                     ")

        waiting_sys_timer(wait)

        print("Downloading Nextstrain metadata...")
        dl_button = wait.until(
            EC.element_to_be_clickable(
                (By.XPATH, '//div[contains(text(), "nextmeta")]')))
        dl_button.click()

        fn = wait_downloaded_filename(wait, driver, 1800)
        print(f"Downloaded to {fn}.                     ")

        waiting_sys_timer(wait)

        # go back to main frame
        back_button = wait.until(
            EC.element_to_be_clickable(
                (By.XPATH, '//button[contains(text(), "Back")]')))
        back_button.click()

        driver.switch_to.default_content()
        waiting_sys_timer(wait)

    # have to reduce the range of genomes
    if cs or ce or ss or se or loc:
        print("Browsing EpiCoV...")
        browse_tab = wait.until(
            EC.element_to_be_clickable(
                (By.XPATH, '//*[contains(text(), "Browse")]')))
        browse_tab.click()
        waiting_sys_timer(wait)
        waiting_table_to_get_ready(wait)

        # set location
        if loc:
            print("Setting location...")
            loc_input = driver.find_element_by_xpath(
                "//td/div[contains(text(), 'Location')]/../following-sibling::td/div/div/input"
            )
            loc_input.send_keys(loc)
            waiting_sys_timer(wait, 7)

        # set host
        if host:
            print("Setting host...")
            host_input = driver.find_element_by_xpath(
                "//td/div[contains(text(), 'Host')]/../following-sibling::td/div/div/input"
            )
            host_input.send_keys(host)
            waiting_sys_timer(wait, 7)

        # set dates
        date_inputs = driver.find_elements_by_css_selector(
            "div.sys-form-fi-date input")
        dates = (cs, ce, ss, se)
        for dinput, date in zip(date_inputs, dates):
            if date:
                print("Setting date...")
                dinput.send_keys(date)

        ActionChains(driver).send_keys(Keys.ESCAPE).perform()
        waiting_sys_timer(wait, 7)

        # complete genome only
        if cg:
            print("complete genome only...")
            checkbox = driver.find_element_by_xpath(
                '//input[@value="complete"]')
            checkbox.click()
            waiting_sys_timer(wait)

        # high coverage only
        if hc:
            print("high coverage only...")
            checkbox = driver.find_element_by_xpath('//input[@value="highq"]')
            checkbox.click()
            waiting_sys_timer(wait)

        # excluding low coverage
        if le:
            print("low coverage excluding...")
            checkbox = driver.find_element_by_xpath('//input[@value="lowco"]')
            checkbox.click()
            waiting_sys_timer(wait)

        # check if any genomes pass filters
        warning_message = None
        try:
            warning_message = driver.find_element_by_xpath(
                "//div[contains(text(), 'No data found.')]")
        except:
            pass
        if warning_message:
            print("No data found.")
            sys.exit(1)

        # select all genomes
        print("Selecting all genomes...")
        button_sa = driver.find_element_by_css_selector(
            "span.yui-dt-label input")
        button_sa.click()
        waiting_sys_timer(wait)

        # downloading sequence
        retry = 0
        while retry <= rt:
            try:
                print("Downloading sequences for selected genomes...")
                button = driver.find_element_by_xpath(
                    "//td[@class='sys-datatable-info']/button[contains(text(), 'Download')]"
                )
                button.click()
                waiting_sys_timer(wait)

                # switch to iframe
                iframe = waiting_for_iframe(wait, driver, rt, iv)
                driver.switch_to.frame(iframe)
                waiting_sys_timer(wait)

                button = driver.find_element_by_xpath(
                    "//button[contains(text(), 'Download')]")
                button.click()
                waiting_sys_timer(wait)
                driver.switch_to.default_content()

                fn = wait_downloaded_filename(wait, driver, 1800)
                print(f"Downloaded to {fn}.")

                break
            except:
                print(f"retrying...#{retry} in {iv} sec(s)")
                if retry == rt:
                    print("Unexpected error:", sys.exc_info())
                    sys.exit(1)
                else:
                    time.sleep(iv)
                    retry += 1

        # downloading metadata
        retry = 0
        while retry <= rt:
            try:
                print(
                    "Downloading acknowledgement table for selected genomes..."
                )
                button = driver.find_element_by_xpath(
                    "//td[@class='sys-datatable-info']/button[contains(text(), 'Download')]"
                )
                button.click()
                waiting_sys_timer(wait)

                # switch to iframe
                iframe = waiting_for_iframe(wait, driver, rt, iv)
                driver.switch_to.frame(iframe)
                waiting_sys_timer(wait)

                label = driver.find_element_by_xpath(
                    "//label[contains(text(), 'Acknowledgement Table')]")
                label.click()

                button = driver.find_element_by_xpath(
                    "//button[contains(text(), 'Download')]")
                button.click()

                waiting_sys_timer(wait)
                driver.switch_to.default_content()

                fn = wait_downloaded_filename(wait, driver, 180)
                print(f"Downloaded to {fn}.")

                break
            except:
                print(f"retrying...#{retry} in {iv} sec(s)")
                if retry == rt:
                    print("Unexpected error:", sys.exc_info())
                    sys.exit(1)
                else:
                    time.sleep(iv)
                    retry += 1

        # iterate each pages
        if meta_dl:
            page_num = 1
            print("Retrieving metadata...")
            while True:
                print(f"Starting processing page# {page_num}...")
                # retrieve tables
                tbody = wait.until(
                    EC.presence_of_element_located(
                        (By.XPATH, "//tbody[@class='yui-dt-data']")))

                waiting_table_to_get_ready(wait)

                # interate each row
                for tr in tbody.find_elements_by_tag_name("tr"):
                    td = tr.find_element_by_tag_name("td")
                    driver.execute_script("arguments[0].scrollIntoView();", td)

                    # have to click the first row twice to start the iframe
                    iframe = None
                    record_elem = None
                    retry = 1
                    while retry <= rt:
                        try:
                            td.click()
                            waiting_sys_timer(wait)
                            iframe = driver.find_element_by_xpath("//iframe")
                            if iframe:
                                break
                            else:
                                raise
                        except:
                            print(f"retrying...#{retry} in {iv} sec(s)")
                            if retry == rt:
                                print("Failed")
                                sys.exit(1)
                            else:
                                time.sleep(iv)
                                retry += 1

                    driver.switch_to.frame(iframe)

                    # detect error: "An internal server error occurred."
                    # and "error-token: DYX47"
                    error_token = driver.find_element_by_xpath("//b")
                    if error_token:
                        error_token_text = error_token.text
                        if "error-token" in error_token.text:
                            print(
                                "[FATAL ERROR] A website internal server error occurred."
                            )
                            print(error_token_text)
                            sys.exit(1)

                    # get the element of table with metadata
                    record_elem = wait.until(
                        EC.presence_of_element_located(
                            (By.XPATH, "//div[@class='packer']")))

                    # parse metadata
                    m = getMetadata(record_elem)
                    metadata.append(m)
                    print(f"{m['Accession ID']}\t{m['Virus name']}")

                    # get back
                    ActionChains(driver).send_keys(Keys.ESCAPE).perform()
                    time.sleep(1)
                    driver.switch_to.default_content()

                print(f"Compeleted page# {page_num}.")
                page_num += 1

                # go to the next page
                retry = 1
                button_next_page = None
                try:
                    button_next_page = driver.find_element_by_xpath(
                        f'//a[@page="{page_num}"]')
                except:
                    break

                if button_next_page:
                    print(f"Entering page# {page_num}...")
                    while retry <= rt:
                        try:
                            button_next_page.click()
                            time.sleep(10)
                            current_page = driver.find_element_by_xpath(
                                '//span[@class="yui-pg-current-page yui-pg-page"]'
                            ).text
                            if current_page != str(page_num):
                                raise
                            else:
                                break
                        except:
                            print(f"retrying...#{retry} in {iv} sec(s)")
                            if retry == rt:
                                print("Failed")
                                sys.exit(1)
                            else:
                                time.sleep(iv)
                                retry += 1

            # writing metadata to JSON file
            print("Writing detail metadata...")
            with open(GISAID_DTL_JASON, 'w') as outfile:
                json.dump(metadata, outfile)

    # close driver
    driver.quit()
예제 #28
0
def set_selenium_local_session(
    proxy_address,
    proxy_port,
    proxy_username,
    proxy_password,
    headless_browser,
    browser_profile_path,
    disable_image_load,
    page_delay,
    geckodriver_path,
    browser_executable_path,
    logfolder,
    logger,
    geckodriver_log_level,
):
    """Starts local session for a selenium server.
    Default case scenario."""

    browser = None
    err_msg = ""

    firefox_options = Firefox_Options()

    if headless_browser:
        firefox_options.add_argument("-headless")

    if browser_profile_path is not None:
        firefox_profile = webdriver.FirefoxProfile(browser_profile_path)
    else:
        firefox_profile = webdriver.FirefoxProfile()

    if browser_executable_path is not None:
        firefox_options.binary = browser_executable_path

    # set "info" by default
    # set "trace" for debubging, Development only
    firefox_options.log.level = geckodriver_log_level

    # set English language
    firefox_profile.set_preference("intl.accept_languages", "en-US")
    firefox_profile.set_preference("general.useragent.override", Settings.user_agent)

    if disable_image_load:
        # permissions.default.image = 2: Disable images load,
        # this setting can improve pageload & save bandwidth
        firefox_profile.set_preference("permissions.default.image", 2)

    if proxy_address and proxy_port:
        firefox_profile.set_preference("network.proxy.type", 1)
        firefox_profile.set_preference("network.proxy.http", proxy_address)
        firefox_profile.set_preference("network.proxy.http_port", int(proxy_port))
        firefox_profile.set_preference("network.proxy.ssl", proxy_address)
        firefox_profile.set_preference("network.proxy.ssl_port", int(proxy_port))

    # mute audio while watching stories
    firefox_profile.set_preference("media.volume_scale", "0.0")

    # prevent Hide Selenium Extension: error
    firefox_profile.set_preference("dom.webdriver.enabled", False)
    firefox_profile.set_preference("useAutomationExtension", False)
    firefox_profile.set_preference("general.platform.override", "iPhone")
    firefox_profile.update_preferences()

    # geckodriver log in specific user logfolder
    geckodriver_log = "{}geckodriver.log".format(logfolder)

    # prefer user path before downloaded one
    driver_path = geckodriver_path or get_geckodriver()
    browser = webdriver.Firefox(
        firefox_profile=firefox_profile,
        executable_path=driver_path,
        log_path=geckodriver_log,
        options=firefox_options,
    )

    # add extenions to hide selenium
    browser.install_addon(create_firefox_extension(), temporary=True)

    # converts to custom browser
    # browser = convert_selenium_browser(browser)

    # authenticate with popup alert window
    if proxy_username and proxy_password:
        proxy_authentication(browser, logger, proxy_username, proxy_password)

    browser.implicitly_wait(page_delay)

    # Apple iPhone X:      375, 812
    # Apple iPhone XS Max: 414, 896
    try:
        browser.set_window_size(414, 896)
    except UnexpectedAlertPresentException as exc:
        logger.exception(
            "Unexpected alert on resizing web browser!\n\t"
            "{}".format(str(exc).encode("utf-8"))
        )
        close_browser(browser, False, logger)
        return browser, "Unexpected alert on browser resize"

    message = "Session started!"
    highlight_print("browser", message, "initialization", "info", logger)

    return browser, err_msg
예제 #29
0
def search():
    client = MongoDB(environment=environment, db_name=db_name).client
    zds = client.dailypops.hotword.find({"question_state": 0}).limit(1000)
    # driver = webdriver.Firefox(options=options)
    # driver = webdriver.Firefox()
    path = r'C:\Users\EDZ\Documents\WeChat Files\wodexinwolai\FileStorage\File\2019-05/chromedriver'

    chrome_options = Options()
    chrome_options.add_argument('--headless')
    driver = webdriver.Chrome(executable_path=path,
                              chrome_options=chrome_options)
    begin_url = "https://www.google.com/"
    driver.get(begin_url)
    driver.find_element_by_id("gb_70").click()
    driver.find_element_by_id("identifierId").send_keys(
        "*****@*****.**")
    driver.find_element_by_id("identifierNext").click()
    time.sleep(3)
    driver.find_element_by_xpath("//input[@name='password']").send_keys(
        "jiexin88")
    driver.find_element_by_id("passwordNext").click()
    time.sleep(5)
    for k in zds:
        # url = 'https://www.google.com/search?sxsrf=ACYBGNRgCAf2dRIVd6dwrtD4B82G2GPK7A%3A1569392173168&ei=LQaLXe35CcmmmAX5k50o&q=trump&oq={}&gs_l=psy-ab.3..35i39l2j0i131j0i3j0i131j0j0i3j0i131l2j0.4634.7621..8636...1.2..3.398.1517.0j7j1j1......0....1..gws-wiz.....10..0i71j0i67j0i131i67j35i362i39j0i131i273j0i273.jn_vf2Z0qbo&ved=0ahUKEwitxPq3qevkAhVJE6YKHflJBwUQ4dUDCAs&uact=5'
        url = "https://www.google.com/search?biw=1536&bih=890&ei=ZomJXceaOtCbmAWyi7egCg&q={}&oq=commp&gs_l=psy-ab.3.1.0i67l3j0i10l7.8182.9766..12758...0.0..0.135.565.0j5......0....1..gws-wiz.......0.KfvdJE90Egw"
        print('参数', k)
        hotword = k.get("hotword", "")
        hotword_id = k.get("hotword_id", "")
        event_id = k.get("event_id", "")
        hotword = hotword.split(" ")
        print('hotword_list', hotword)
        parms = '+'.join(hotword)
        # parms = 'competition'
        print('parms', parms)
        url = url.format(parms)
        # url_ = url.format(parms)

        print('url_', url)
        driver.get(url)
        # driver.find_element_by_class_name("related-question-pair").click()
        response = driver.page_source
        # print(response)
        html = etree.HTML(response)
        titles = html.xpath(
            '//div[@class="related-question-pair"]//div[@class="match-mod-horizontal-padding hide-focus-ring cbphWd"]//text()'
        )
        print('titles', titles)
        contents = Selector(text=response).xpath(
            '//div[@class="related-question-pair"]//div[@class="gy6Qzb kno-ahide"]'
        ).extract()
        for title, content in zip(titles, contents):
            con = Selector(text=content).xpath(
                '//div[contains(@class,"mod")]//text()').extract()
            con = ' '.join(con)
            items = {}
            items['question_id'] = md5_(title + hotword_id)
            items['event_id'] = event_id
            items['hotword_id'] = hotword_id
            items['question'] = title
            items['answer'] = con
            items['source'] = ''
            items['release_time'] = '2019-09-25'
            items['time_stamp'] = int(time.time())
            items['entity'] = []
            items['label'] = []
            items['static_page'] = 0
            items['nlp_state'] = 0
            print(items)
            client.dailypops.question.update(
                {'question_id': items['question_id']}, items, True)
        s1 = {'hotword_id': hotword_id}
        s2 = {'$set': {'question_state': 1}}
        client.dailypops.hotword.update(s1, s2)
        time.sleep(3)
예제 #30
0
import selenium
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
import time

browser_options = Options()
browser_options.headless = True
browser_options.add_argument('--no-sandbox')
browser_options.add_argument('--disable-dev-shm-usage')


def searchLine(myline,
               url="http://cti.voa.gov.uk/cti/inits.asp",
               browser_options=browser_options):
    myline = myline.replace('"', '').split(',')
    if len(myline) != 16:
        print('[FATAL] Wrong format of input data, cannot perform research.')
        return 'fatalErr'
    postcode = myline[3]
    address = ' '.join(myline[7:10])
    try:
        driver = webdriver.Firefox(
            options=browser_options,
            firefox_binary="/kaggle/working/firefox/firefox/firefox")
        driver.get(url)
        txtPC = driver.find_element_by_name("txtPostCode")
        driver.execute_script('arguments[0].value = arguments[1]', txtPC,
                              postcode)
        driver.find_element_by_id('frmInitSForm').submit()
        time.sleep(1.5)
        scl_complex = driver.find_element_by_class_name('scl_complex')
예제 #31
0
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.keys import Keys
import requests, json
firefox_options = Options()
firefox_options.add_argument("--headless")

headers = {
    "user-agent":
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
    'content-type': 'application/json'
}
driver = webdriver.Firefox(firefox_options=firefox_options)
driver.get('https://www.baidu.com')
elem = driver.find_element_by_name("wd")  # 找到输入框的元素
elem.clear()  # 清空输入框里的内容
elem.send_keys(u"天气深圳")  # 在输入框中输入'Kali Linux'
elem.send_keys(Keys.RETURN)  # 在输入框中输入回车键
driver.implicitly_wait(10)  # 隐式等待
tqtoday = driver.find_element_by_css_selector('.op_weather4_twoicon_today')
tqelemtitle = driver.find_element_by_css_selector('.c-gap-bottom-small a').text
time = tqtoday.find_element_by_css_selector('.op_weather4_twoicon_date').text
wd = tqtoday.find_element_by_css_selector('.op_weather4_twoicon_temp').text
weath = tqtoday.find_element_by_css_selector('.op_weather4_twoicon_weath').text
wind = tqtoday.find_element_by_css_selector('.op_weather4_twoicon_wind').text

data = {
    "msgtype": "text",
    "text": {
        "content":
        tqelemtitle + '\n' + time + '\n' + wd + '\n' + weath + '\n' + wind
예제 #32
0
# coding: utf-8
import codecs
#import cookielib
import datetime
import os
import re
import requests
import urllib
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
options = Options()
options.add_argument("--headless")
LOG_DIR = os.path.dirname(os.path.abspath(__file__))

from threading import Thread, Lock

try:
	from Queue import Queue, Empty
	unicode = unicode
	from urllib import urlencode
except:
	from queue import Queue, Empty
	unicode = str
	from urllib.parse import urlencode

def worker_get_bookinfo_detail(function, urls, result, mutex, session):
	while True:
		try:
			if mutex.acquire():
				item = urls.get(block=False)
예제 #33
0
파일: fint.py 프로젝트: xuan2261/ffff
def main():

    print(BANNER)
    args = parse_args()

    options = Options()
    if args.headless: options.add_argument("--headless")
    driver = webdriver.Firefox(executable_path=args.driver_path,
                               options=options)

    do_login(driver, args.user, args.password)
    check_login(driver)

    if args.target.isdigit():
        target_id = args.target
        target_username = get_username(driver, target_id)
    else:
        target_id = get_user_id(driver, args.target)
        target_username = args.target

    print('[*] Selected target: %s (%s)' % (target_username, target_id))

    urls_to_visit = []
    commenters = []
    reactions = []

    print('[*] Getting photos links... ', end=" ")
    photos = get_all_photos(driver, target_username,
                            args.limit_photos)[:args.limit_photos]
    print('%d photos found' % len(photos))
    print('[*] Getting stories links... ', end=" ")
    stories = get_all_stories(driver, target_id,
                              args.limit_stories)[:args.limit_stories]
    print('%d stories found' % len(stories))

    print(
        '[*] Retreiving users who have interacted... press Ctrl+C when you have enough'
    )

    msg = ''
    try:
        for url in photos + stories:

            commenters += parse_commenters(driver.page_source)
            if len(commenters) < args.limit_comments:
                commenters += get_all_comments(driver,
                                               url,
                                               limit=args.limit_comments)

            if len(reactions) < args.limit_reactions:
                reactions += get_all_reactions(driver,
                                               url,
                                               limit=args.limit_reactions)

            users = list(set(reactions).union(set(commenters)))
            msg = '%sUnique users: %d        Comments: %d        Reactions: %d' % (
                '\r' * len(msg), len(users), len(commenters), len(reactions))
            print(msg, end='\r')

    except (KeyboardInterrupt, SystemExit):
        print('[!] KeyboardInterrupt received. %d users retrieved' %
              len(users))

    reactions = reactions[:args.limit_reactions]
    commenters = commenters[:args.limit_comments]
    users = list(set(reactions).union(set(commenters)))
    print_statistics(commenters, reactions)
    users = fill_user_ids(driver, users)

    if args.output:
        store_pivots(users, args.output)
    else:
        store_pivots(users, '%s-pivots.txt' % target_id)

    if args.csv_output:
        store_csv(users, args.csv_output)

    print('[*] Found %d comments and %d reactions from %d unique users ' %
          (len(commenters), len(reactions), len(users)))

    driver.close()
# firefox driver: geckodriver.exe
import requests
from selenium import webdriver
import time
from selenium.webdriver.firefox.options import Options
from bs4 import BeautifulSoup

firefox_options = Options()
firefox_options.add_argument('--headless')
firefox_options.add_argument('--disable-gpu')


def save_pdf(href):
    try:
        root = './/download//'
        kv = {'user-agent': 'Mozilla/5.0'}
        print(href)
        r = requests.get(href, headers=kv)
        path = root + href.split('/')[-1]
        with open(path, 'wb') as f:
            f.write(r.content)
            f.close()
        print("一份pdf")
    except:
        return ""


# firefox_options.binary_location = r'C:\ProgramData\Anaconda3\Scripts\geckodriver.exe'
# chrome_options.binary_location = '/opt/google/chrome/chrome'

driver = webdriver.Firefox(
예제 #35
0
import sys
import io
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
import time

sys.stdout = io.TextIOWrapper(sys.stdout.detach(), encoding='utf-8')
sys.stderr = io.TextIOWrapper(sys.stderr.detach(), encoding='utf-8')

firefox_option = Options()
firefox_option.add_argument("--headless")  #CLI

driver = webdriver.Firefox(
    firefox_options=firefox_option,
    executable_path=r'D:/atom_python/section3/webdriver/firefox/geckodriver.exe'
)
driver.get("https://google.com")
driver.save_screenshot(
    "D:/atom_python/section3/webdriver/firefox/website2.png")
예제 #36
0
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait # available since 2.4.0
from selenium.webdriver.support import expected_conditions as EC # available since 2.26.0
from selenium.webdriver.firefox.options import Options

inputfile = sys.argv[1] # presumbaly you have a list of input data you want to enter into the website to generate data output
outputfile = "output_"+inputfile
datafile = open(inputfile)
outfile = open(outputfile,'w+')
eachline = datafile.readline()
options = Options()
options.add_argument("--headless")
driver = webdriver.Firefox(firefox_options=options)

while eachline != '':
    inputdata = eachline.rstrip()
    #print postalcode
    #options = Options()
    #options.add_argument("--headless")
    #driver = webdriver.Firefox(firefox_options=options)

# go to the google home page
    driver.get("website url you want to scrap")
    driver.find_element_by_id('txtSearch').send_keys("markup key to the data you want") # output data format
    driver.find_element_by_css_selector('input[type=\"button\"]').click() # virtually "clicking" button

    try:
        WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.ID, 'tabResults')))
예제 #37
0
def main(user, password):
    urlRoot = "https://es.zulutrade.com"
    urlLogin = "******"
    urlToScrap = "https://es.zulutrade.com/traders"

    columnsFile = "ubicationColumns.json"
    firefoxDirectory = r'D:\Navegadores\Mozilla Firefox\firefox.exe'

    today = datetime.datetime.strftime(datetime.datetime.now(), '%Y_%m_%d')
    createTodayDirectory(today)

    outputFile = "zulutrade_" + today + ".csv"

    columnsJson = getColumns(columnsFile)

    writeHeaderFile(outputFile, columnsJson["Columns"])

    options = Options()
    options.add_argument("--headless")

    profile = webdriver.FirefoxProfile()
    profile.set_preference("dom.disable_beforeunload", True)

    profile.set_preference("browser.tabs.remote.autostart", False)
    profile.set_preference("browser.tabs.remote.autostart.1", False)
    profile.set_preference("browser.tabs.remote.autostart.2", False)
    profile.set_preference("browser.tabs.remote.force-enable", False)

    profile.set_preference('browser.download.folderList', 2)  # custom location
    profile.set_preference('browser.download.manager.showWhenStarting', False)
    profile.set_preference('browser.download.dir', os.getcwd() + '\\' + today)
    profile.set_preference(
        'browser.helperApps.neverAsk.saveToDisk',
        "application/xml,text/xml,application/csv,application/excel,application/vnd.msexcel,application/vnd.ms-excel,text/anytext,text/comma-separated-values,text/csv,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/octet-stream"
    )
    profile.set_preference(
        "browser.helperApps.neverAsk.openFile",
        "application/xml,text/xml,application/csv,application/excel,application/vnd.msexcel,application/vnd.ms-excel,text/anytext,text/comma-separated-values,text/csv,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/octet-stream"
    )
    profile.set_preference("browser.helperApps.alwaysAsk.force", False)
    profile.set_preference("browser.download.manager.useWindow", False)
    profile.set_preference("browser.download.manager.focusWhenStarting", False)
    profile.set_preference("browser.download.manager.alertOnEXEOpen", False)
    profile.set_preference("browser.download.manager.showAlertOnComplete",
                           False)
    profile.set_preference("browser.download.manager.closeWhenDone", True)

    binary = FirefoxBinary(firefoxDirectory)

    driver = webdriver.Firefox(firefox_options=options,
                               firefox_profile=profile,
                               firefox_binary=binary)
    #driver = webdriver.Firefox(firefox_profile = profile,firefox_binary=binary)
    #driver = webdriver.Firefox(firefox_profile = profile)

    driver.get(urlLogin)

    userElement = driver.find_element_by_id("main_tbUsername")
    passwordElement = driver.find_element_by_id("main_tbPassword")

    userElement.send_keys(user)
    passwordElement.send_keys(password)

    driver.find_element_by_id("main_btnLogin").click()

    delayLogin = 30  #seconds
    delay = 90  #seconds

    try:
        element = WebDriverWait(driver, delayLogin).until(
            EC.presence_of_element_located((By.ID, 'user-top-container')))
    except TimeoutException:
        print("Se excedió el tiempo de espera")
        driver.quit()
        raise LoginException()

    driver.get(urlToScrap)

    try:
        element = WebDriverWait(driver, delay).until(
            EC.presence_of_element_located(
                (By.XPATH, '//zl-load-more/button')))
    except TimeoutException:
        print("Se excedió el tiempo de espera")
        driver.quit()
        raise Exception()

    moreDetailElement = driver.find_elements_by_xpath(
        "//zl-performance/div/div/div/div/button")
    print(len(moreDetailElement))

    moreDetailElement[0].click()

    for i in range(59):
        print("Page: " + str(i))
        try:
            element = WebDriverWait(driver, delayLogin).until(
                EC.presence_of_element_located(
                    (By.XPATH, '//zl-load-more/button')))
        except TimeoutException:
            print("Se excedió el tiempo de espera del boton de Cargar mas")
            break

        if len(driver.find_elements_by_xpath("//zl-load-more/button")) > 0:
            downloadMoreElement = driver.find_element_by_xpath(
                "//zl-load-more/button")
            downloadMoreElement.click()
        else:
            break

        #sleep(4.5)

    rowsElements = driver.find_elements_by_xpath(
        "//zl-performance-forex-list/div/table/tbody")
    print(len(rowsElements))

    #badgesElements = driver.find_elements_by_xpath("//zl-trader-badge")
    #print(len(badgesElements))

    for iRowElement in range(len(rowsElements)):
        print(iRowElement)
        rowData = getDataPerTrader(rowsElements[iRowElement],
                                   columnsJson["UbicationsGrid"])
        '''
		numElements = len(badgesElements[iRowElement].find_elements_by_xpath(".//ngl-icon[@ng-reflect-set-icon='icon-badge-partially-verified' or @ng-reflect-set-icon='icon-badge-fully-verified']"))
		print(numElements)

		if numElements > 0:
			print("Si hay elemento Check")
			checkIconElement = badgesElements[iRowElement].find_elements_by_xpath(".//ngl-icon[@ng-reflect-set-icon='icon-badge-partially-verified' or @ng-reflect-set-icon='icon-badge-fully-verified']")[numElements - 1]
			
			driver.execute_script("arguments[0].scrollIntoView();", rowsElements[iRowElement])
			
			hover = ActionChains(driver).move_to_element(checkIconElement)
			hover.perform()

			sleep(2)

			soup = BeautifulSoup(driver.page_source, 'lxml')
			popUpElement = soup.find("zl-trader-verification-popover")
			#print(popUpElement)

			#To get lost of Focus of the little windows to iterate the next row
			hover = ActionChains(driver).move_to_element(badgesElements[iRowElement])
			hover.perform()
			sleep(1)
		'''

        badgesElementsHTML = rowsElements[iRowElement].find_element_by_xpath(
            ".//zl-trader-badge").get_attribute('innerHTML')

        for badge, item in columnsJson["UbicationsBadges"].items():
            rowData[badge] = item["ICON"] in badgesElementsHTML

        #open tab
        driver.find_element_by_tag_name('body').send_keys(Keys.CONTROL + 't')

        print(driver.window_handles)

        driver.switch_to.window(driver.window_handles[1])

        driver.get(rowData["Url"])

        try:
            element = WebDriverWait(driver, delay).until(
                EC.presence_of_element_located(
                    (By.XPATH, '//zl-timeframes/ngl-picklist/div/button')))
        except TimeoutException:
            print("Se excedió el tiempo de espera")
            driver.quit()
            raise Exception()

        rowData = getDataInsidePagePerTrader(rowData, driver,
                                             columnsJson["UbicationsInside"])

        graphicTimeElement = driver.find_element_by_xpath(
            "//zl-timeframes/ngl-picklist/div/button")
        graphicTimeElement.click()

        graphicTotalTimeElements = driver.find_elements_by_xpath(
            "//zl-timeframes/ngl-picklist/div/div/ul/li")
        graphicTotalTimeElements[len(graphicTotalTimeElements) - 1].click()

        excelFilename = "No hay archivo Excel disponible"

        if len(
                driver.find_elements_by_xpath(
                    "//zl-trading-history-excel-export/span/button")) > 0:
            exportExcelElement = driver.find_element_by_xpath(
                "//zl-trading-history-excel-export/span/button")
            exportExcelElement.click()

            exportExcel2007Elements = driver.find_elements_by_xpath(
                "//zl-trading-history-excel-export/span/div/ul/li")
            exportExcel2007Elements[0].click()

            sleep(3)

            excelFilename = getLastFilename(os.getcwd() + '\\' + today)

        rowData["Excel"] = excelFilename

        print(rowData)

        dfTraders = pd.DataFrame(rowData,
                                 columns=columnsJson["Columns"],
                                 index=[0])
        with open(outputFile, "a") as f:
            dfTraders.to_csv(f,
                             header=None,
                             index=False,
                             encoding='ISO-8859-1',
                             sep='|')

        # close the tab
        driver.find_element_by_tag_name('body').send_keys(Keys.CONTROL + 'w')

        driver.switch_to.window(driver.window_handles[0])

    driver.quit()
예제 #38
0
def dictwebpage(request):
    # case 1:  (OUTDATED since Glosbe doesn't work anymore) It needs to fetch an API.
    # AJAX has fetched the JSON on the wwww, then
    # the JSON obj from the www is sent to the view dictwebpage which processes it and
    # and sends back html.
    if 'json_obj' in request.GET.keys():
        parsed_json_obj = json.loads(request.GET['json_obj'])
        return render(request, 'lwt/_glosbe_api.html',
                      {'result': parsed_json_obj})

    # case 2:  AJAX sends the link to process to the view dictwebpage,
    # and the view sends backs a JSON containing the string URL. <iframe> displays it then.
    else:
        word = request.GET['word']
        word_escaped = urllibparsequote(word)
        wbl = request.GET['wbl']

        # case where it's a lookup sentence:
        if 'issentence' in request.GET.keys() and request.GET[
                'issentence'] != '':  # no key "issentence" is sent if the value of 'issentence' is empty in AJAX
            wo_id = int(request.GET['issentence'])
            word = Sentences.objects.values_list(
                'sentencetext', flat=True).get(sentence_having_this_word=wo_id)
            word_escaped = urllibparsequote(word)
        finalurl = wbl.replace('<WORD>', word_escaped)
        #         finalurl = createTheDictLink(wbl, word) # create the url of the dictionary, integrating the searched word

        # case where we can't put the url in an iframe src. we must request the entire html webpage
        # and will display it in the iframe srcdoc

        if finalurl[0] == '^' or finalurl[
                0] == '!':  # case where we open into the frame
            #             try: # check that the URL is working. else display a well-formed error
            headers = {
                "User-Agent":
                "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30)",
                "Accept":
                "text/html,application/xhtml+xml,application/xml; q=0.9,image/webp,*/*;q=0.8"
            }
            reqest = Request(finalurl[1:], headers=headers)
            try:
                content = urlopen(reqest)
            # catch the redirect from Wiktionary
            except urllib.error.HTTPError as httpError:
                error = httpError.read().decode()
                # wiktionary has a special way to redirect to similar word if nothing found
                if 'wiktionary' in finalurl:
                    redirect_url = _wiki_API_redirect(error, finalurl[1:],
                                                      word_escaped)
                    reqest = Request(redirect_url, headers=headers)
                    try:
                        content = urlopen(reqest)
                    except:
                        content = error  # redirect doesn't work neither, so display the error
                else:
                    content = error

        if finalurl[0] == '^':
            try:
                soup = BeautifulSoup(content, 'html.parser')
                html = _clean_soup(soup, finalurl)
            except:
                html = render_to_string('lwt/dictwebpage_not_working.html')
            result_str = escape(html)
            return HttpResponse(json.dumps(result_str))

        if finalurl[
                0] == '#':  # case where we use Selenium (Tricky website where scrapping is bloked)
            # detect if mac or else
            system = platform.system().lower()
            if system == 'windows' or system == 'linux':
                is_Mac = False
            else:
                is_Mac = True

            from selenium.webdriver.firefox.webdriver import WebDriver
            from functional_tests.selenium_base import Base
            from selenium.webdriver.common.by import By
            from selenium.webdriver.firefox.options import Options as FirefoxOptions
            options = FirefoxOptions()
            options.add_argument("--headless")
            selenium = WebDriver(options=options)
            selenium.get('{}'.format(finalurl[1:]))
            base = Base()
            base.selenium = selenium
            if 'naver' in finalurl:
                base.wait_until_appear(By.ID, 'searchPage_entry')
            content = selenium.execute_script(
                "return document.documentElement.outerHTML;")

            if 'naver' in finalurl:
                translation_result = _naver_API(content, finalurl)
                context = {
                    'translation_result': translation_result,
                    'API_name': 'naver'
                }
            context['is_Mac'] = is_Mac

            return render(request, 'lwt/_translation_api.html', context)

        if finalurl[0] == '!' or finalurl[
                0] == '#':  # this dictionary uses my custom APIs (for ex. Google translate)

            context = {}
            # detect if mac or else
            system = platform.system().lower()
            if system == 'windows' or system == 'linux':
                is_Mac = False
            else:
                is_Mac = True

            if 'https://translate.google.com' in finalurl:
                translation_result = _google_API(content)
                context = {
                    'url': finalurl[1:],
                    'url_name': 'Google Translate',
                    'trans_item_nb': len(translation_result),
                    'translation_result': translation_result,
                    'word_OR_sentence_origin': word,
                    'is_Mac': is_Mac
                }
                return render(request, 'lwt/_google_api.html', context)
            if 'pons.com/translate' in finalurl:
                translation_result = _pons_API(content, finalurl)
                context = {
                    'translation_result': translation_result,
                    'API_name': 'pons'
                }
            if 'dict.cc' in finalurl:
                translation_result = _dictcc_API(content, finalurl)
                context = {
                    'translation_result': translation_result,
                    'API_name': 'dictcc'
                }
            if 'wordref' in finalurl:
                translation_result = _wordref_API(content, finalurl)
                context = {
                    'translation_result': translation_result,
                    'API_name': 'wordref'
                }
            if 'wiktionary' in finalurl:
                translation_result = _wiki_API(content, finalurl)
                context = {
                    'translation_result': translation_result,
                    'API_name': 'wiki'
                }
            if 'youdao' in finalurl:
                translation_result = _youdao_API(content, finalurl)
                context = {
                    'translation_result': translation_result,
                    'API_name': 'youdao'
                }
            context['is_Mac'] = is_Mac
            return render(request, 'lwt/_translation_api.html', context)

        return HttpResponse(
            json.dumps(finalurl))  # case where we open into a new window
예제 #39
0
파일: solvefailurl1.py 프로젝트: yc999/-
messageless_log_path =  "../../newwebdata/messagelog1.txt"
if not os.path.isdir(savepath):
    os.mkdir(savepath)

logfile = open(logpath,'a+')
def makelog(logmessage):
    logfile.write(logmessage + '\n')

messagelogfile = open(messageless_log_path,'a+')
def messagelesslog(logmessage):
    messagelogfile.write(logmessage + '\n')

# option = webdriver.ChromeOptions()

option = Options()
option.add_argument('--no-sandbox')
option.add_argument('--disable-dev-shm-usage')
option.add_argument('--headless') #静默运行
option.add_argument('log-level=3')
option.add_argument('--disable-gpu')  # 禁用GPU加速,GPU加速可能会导致Chrome出现黑屏,且CPU占用率高达80%以上
browser = webdriver.Firefox(options=option)
# browser = webdriver.Chrome(options=option)
browser.implicitly_wait(time_limit)
browser.set_page_load_timeout(time_limit)

# 查询网址,爬取内容
# def requesturl(url, savefilepath):
def requesturl(url):
    print(url)
    webinfo={}  #最后保存的数据
    webtext = []    #首页内容文本
예제 #40
0
def init_browser():
    global browser
    options = Options()
    options.add_argument('--headless')
    browser = webdriver.Firefox(options=options)
예제 #41
0
    def parse_page(self, response):
        try:
            from pyvirtualdisplay import Display
            display = Display(visible=0, size=(800, 800))
            display.start()
            firefox_options = Options()
            firefox_options.add_argument('-headless')
            firefox_options.add_argument('--disable-gpu')
            driver = webdriver.Firefox(firefox_options=firefox_options, executable_path=settings.FIREFOX_PATH)
            driver.get(response.url)
            driver.implicitly_wait(100)
            elem_code = driver.find_elements_by_id('WarehouseCode')
            elem_acode = driver.find_elements_by_id('AccountCode')
            elem_name = driver.find_elements_by_id('UserName')
            elem_pass = driver.find_elements_by_id('Password')
            btn_login = driver.find_elements_by_css_selector('input[name="Login"]')

            if elem_code:
                elem_code[0].send_keys('03')
            if elem_acode:
                elem_acode[0].send_keys('001862')
            if elem_name:
                elem_name[0].send_keys('MAXLEAD')
            if elem_pass:
                elem_pass[0].send_keys('1202HXML')
            btn_login[0].click()
            driver.implicitly_wait(100)
            time.sleep(5)
            total_page = driver.find_elements_by_css_selector('#navigationTR nobr')[0].text
            total_page = int(total_page.split(' ')[-1])

            for i in range(total_page):
                try:
                    res = driver.find_elements_by_css_selector('#ViewManyListTable tr')
                    elem = driver.find_element_by_id('MetaData')
                    elem.click()
                    res.pop(0)
                    for val in res:
                        td_re = val.find_elements_by_tag_name('td')
                        if td_re:
                            sku = td_re[0].text
                            warehouse = 'Hanover'
                            if td_re[3].text and not td_re[3].text == ' ':
                                qty = td_re[3].text
                                qty = qty.replace(',','')
                            else:
                                qty = 0

                            qty_sql = "select id from mmc_stocks where commodity_repertory_sku='%s' and warehouse='%s'" % (
                            sku, warehouse)
                            self.db_cur.execute(qty_sql)
                            self.db_cur.fetchone
                            qty_re = self.db_cur.rowcount
                            values = (qty, sku, warehouse)
                            if qty_re > 0:
                                sql = "update mmc_stocks set qty=%s where commodity_repertory_sku=%s and warehouse=%s"
                            else:
                                sql = "insert into mmc_stocks (qty, commodity_repertory_sku, warehouse) values (%s, %s, %s)"
                            self.db_cur.execute(sql, values)
                    if i < total_page:
                        elem_next_page = driver.find_elements_by_id('Next')
                        if elem_next_page:
                            elem_next_page[0].click()
                            driver.implicitly_wait(100)
                except:
                    continue
            self.conn.commit()
            sql = "update mmc_spider_status set status=3, description='' where warehouse='Hanover'"
            self.db_cur.execute(sql)
            self.conn.commit()
        except Exception as e:
            values = (str(e),)
            sql = "update mmc_spider_status set status=2, description=%s where warehouse='Hanover'"
            self.db_cur.execute(sql, values)
            self.conn.commit()

        try:
            driver.refresh()
            driver.switch_to.alert.accept()
            driver.implicitly_wait(100)
        except:
            pass
        display.stop()
        driver.quit()
예제 #42
0
class mask():
    '''
    Mask object
    '''
    def __init__(self, path, g_prefix, b_prefix):
        self.g_prefix = g_prefix
        self.b_prefix = b_prefix
        self.path = path
        self.opts = Options()
        self.opts.add_argument('-private')

    def _check_tor(self):
        '''
        Checks for running TOR browser

        Only works on Linux based systems

        @param none
        @return boolean status
        '''
        CMD = "netstat -ano | grep LISTEN | grep 9150 > /dev/null 2>&1"
        if (os.system(CMD) > 0):
            return False
        else:
            return True

    def _start_tor(self):
        '''
        Start TOR browser

        @param none
        @return boolean status
        '''
        CMD = "start-tor-browser"
        try:
            p = subprocess.Popen(self.path + CMD)
        except:
            return False
        while True:  # Give TOR browser time to open
            if self._check_tor():
                break
            else:
                time.sleep(2)
        return True

    def _get_ua(self):
        '''
        Get random user agent string

        @param none
        @return String
        '''
        ua = [
            "Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1",
            "Mozilla/5.0 (Linux; U; Android 4.4.2; en-us; SCH-I535 Build/KOT49H) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30",
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36",
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0",
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393",
            "Mozilla/5.0 (iPad; CPU OS 8_4_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12H321 Safari/600.1.4",
            "Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1",
            "Mozilla/5.0 (Linux; Android 6.0.1; SAMSUNG SM-G570Y Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/4.0 Chrome/44.0.2403.133 Mobile Safari/537.36",
            "Mozilla/5.0 (Linux; Android 5.0; SAMSUNG SM-N900 Build/LRX21V) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/2.1 Chrome/34.0.1847.76 Mobile Safari/537.36",
            "Mozilla/5.0 (Linux; Android 6.0.1; SAMSUNG SM-N910F Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/4.0 Chrome/44.0.2403.133 Mobile Safari/537.36",
            "Mozilla/5.0 (Linux; U; Android-4.0.3; en-us; Galaxy Nexus Build/IML74K) AppleWebKit/535.7 (KHTML, like Gecko) CrMo/16.0.912.75 Mobile Safari/535.7",
            "Mozilla/5.0 (Linux; Android 7.0; HTC 10 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.83 Mobile Safari/537.36",
        ]
        return ua[randint(0, (len(ua) - 1))]

    def _get_tor_browser_profile(self):
        '''
        Get TOR browser profile

        @param none
        @return selenium webdriver
        '''
        profile = webdriver.FirefoxProfile()
        proxyIP = "127.0.0.1"
        proxyPort = 9150
        profile.set_preference("network.proxy.type", 1)
        profile.set_preference("network.proxy.socks", proxyIP)
        profile.set_preference("network.proxy.socks_port", int(proxyPort))
        profile.set_preference("network.proxy.socks_remote_dns", True)
        profile.set_preference("browser.privatebrowsing.autostart", True)

        return profile

    def _get_proxy_list(self):
        ##############################################################################
        ####################### INCOMPLETE ##########################################
        #############################################################################
        '''
        Get up-to-date list of free proxy server IP:PORT, return it as a list
        to cycle through.

        OPTIONS:  Return the list of proxies, or return a webdriver using each 
        proxy....

        Not sure yet
        '''
        URI = "https://free-proxy-list.net/"

    def get_tor_browser(self):
        '''
        Get firefox browser using TOR proxy

        @param none
        @return selenium webdriver
        '''
        if not self._check_tor():
            print(self.g_prefix + "TOR not running, starting TOR")
            time.sleep(2)
            if not self._start_tor():
                print(self.b_prefix + "Could not start TOR browser")
                return None
            else:
                print(self.g_prefix + "TOR started successfully")
        return webdriver.Firefox(self._get_tor_browser_profile(),
                                 firefox_options=self.opts)

    def swap_ident(self):
        '''
        Swap TOR browser identity

        @param none
        @return boolean status
        '''
        if self._check_tor():
            with Controller.from_port(port=9151) as controller:
                controller.authenticate()
                controller.signal(Signal.NEWNYM)
                time.sleep(1)  # Give the identity time to reset
            return True
        return False
예제 #43
0
 def generate_driver(self):
     options = FirefoxOptions()
     options.add_argument("-headless")
     return webdriver.Firefox(executable_path=self.execute_path, options=options)
예제 #44
0
"""
Functional Tests for Superlists
"""
from selenium.webdriver import Firefox
from selenium.webdriver.firefox.options import Options

firefox_options = Options()
firefox_options.add_argument('-headless')
BROWSER = Firefox(firefox_options=firefox_options)
BROWSER.get('http://localhost:8000')

assert 'Django' in BROWSER.title
예제 #45
0
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
import os
import config
import utils


dirpath = os.getcwd()
options = Options()
options.add_argument("--headless")
options.add_argument("--width=1920")
options.add_argument("--height=1080")

 
def type_in_origin(driver, origin):
    form_origin = driver.find_element_by_id(config.origin_field)
    form_origin.send_keys(origin)

def type_in_destination(driver, destination):
    form_destination = driver.find_element_by_id(config.destination_field)
    form_destination.send_keys(destination)

def navigate_to_result_page(journey):
    driver = webdriver.Firefox(executable_path = dirpath + '/geckodriver',options = options)
    driver.get(config.url)
    type_in_origin(driver, journey.get('origin'))
    type_in_destination(driver, journey.get('destination'))
    btn_submit = driver.find_element_by_class_name(config.submit_button_class)
    btn_submit.click()
    btn_show_details = driver.find_element_by_id(config.details_button_id)
    btn_show_details.click()
예제 #46
0
import time, scrapy, hashlib
from selenium import webdriver
from lxml import etree
from scrapy.selector import Selector
from database.mongodb import MongoDB

from selenium.webdriver.firefox.options import Options
options = Options()
options.add_argument('--headless')

from items import eventItem, articleItem, hotwordItem, questionItem
from selenium.common.exceptions import TimeoutException
from scrapy.crawler import CrawlerProcess
# driver = webdriver.Firefox()
environment = 'local'
db_name = 'dailypops'
# class Question(scrapy.Spider):
#     name = 'question'
#     allowed_domain=[]
#     custom_settings = {
#         'LOG_LEVEL': 'ERROR',
#         'CONCURRENT_REQUESTS': 1,
#         'DOWNLOAD_DELAY': 0.2,
#         'CONCURRENT_REQUESTS_PER_DOMAIN': 1,
#         'ITEM_PIPELINES': {'pipeline.pipeline.MongodbPipeline': 300},
#         'DOWNLOADER_MIDDLEWARES': {'middleware.middlewares.GoogleMiddleware': 400}
#     }
#
#     def start_requests():
#         # while True:
#         #     time.sleep(6)
예제 #47
0
class HistoryDragonTigerList():
    """
    龙虎榜
    """
    def __init__(self):
        # 创建一个存放标题的列表
        self.title_list = []
        # 创建一个存放股票数据的二维列表
        self.row_list = []  # 行
        self.column_list = []  # 列

        # self.driver = webdriver.Firefox()
        # self.driver.set_window_position(0, 0)
        # self.driver.set_window_size(1400, 900)
        # self.driver.maximize_window()  # 让窗口最大化

        # 使用以下三行代码可以不弹出界面,实现无界面爬取
        self.options = Options()
        self.options.add_argument('--headless')
        self.options.add_argument('--disable-gpu')
        self.driver = webdriver.Firefox(
            executable_path='geckodriver',
            options=self.options)  # 配了环境变量第一个参数就可以省了,不然传绝对路径

        self.current_url = "http://data.eastmoney.com/stock/tradedetail.html"
        self.WAIT = WebDriverWait(self.driver, 15)

    def load_page_by_xpath(self, web_driver_wait, xpath_elem):
        """
        通过selenium的xpath加载页面元素
        :param web_driver_wait:
        :param xpath_elem:
        :return: 被加载的元素
        """
        load_elem = web_driver_wait.until(
            EC.presence_of_element_located((By.XPATH, xpath_elem)))
        return load_elem

    def update_date_range(self, start_date, end_date):
        """
        :param start_date:
        :param end_date:
        :return:
        """
        remove_start_date = 'document.getElementsByClassName("date-input")[0].removeAttribute("readonly");'
        self.driver.execute_script(remove_start_date)
        add_start_date = 'document.getElementsByClassName("date-input")[0].value="' + start_date + '"'
        self.driver.execute_script(add_start_date)

        remove_end_date = 'document.getElementsByClassName("date-input")[1].removeAttribute("readonly");'
        self.driver.execute_script(remove_end_date)
        add_end_date = 'document.getElementsByClassName("date-input")[1].value="' + end_date + '"'
        self.driver.execute_script(add_end_date)

        query_elem = '//*[@id="divSjri"]/div[2]/div[2]'
        self.load_page_by_xpath(self.WAIT, query_elem).click()
        time.sleep(10)
        print("时间修改完毕" + start_date + "\t" + end_date)

    def analysis_page_source(self, html, filename):
        """
        解析网页
        :param filename:
        :param html:
        :return:
        """
        soup = BeautifulSoup(html, 'lxml')
        date_elem = soup.select_one("#search_date_start")
        year = str(date_elem.attrs["value"]).replace('-', '')[0:4]
        # 获取标题信息
        title_items = soup.select_one('#tab-1 > thead').find("tr").find_all(
            "th")
        for item in title_items:
            if item.text != '相关':
                self.title_list.append(str(item.text).strip())
        self.title_list.append("星期")
        Utils.Utils.print_title(self.title_list)
        self.title_list.clear()

        # 获取股票数据
        tr_elems = soup.select("#tab-1 > tbody > tr")
        for tr_elem in tr_elems:
            td_items = tr_elem.select("td")
            td_size = len(td_items)
            for i in range(0, td_size):
                if i == 17:
                    self.column_list.append(
                        str(td_items[i].select_one(
                            'span').attrs['title']).strip())
                elif i == 4:
                    month_day = str(td_items[i].text).strip()
                    dt = year + month_day[0:2] + month_day[3:5]
                    self.column_list.append(dt)
                    weekday = Utils.Utils.date2weekday(dt)
                elif i != 3:
                    self.column_list.append(str(td_items[i].text).strip())

            self.column_list.append(weekday)
            self.row_list.append(self.column_list)
            self.column_list = []
        Utils.Utils.save_file(filename, self.row_list, 'a')
        self.row_list.clear()

    def get_current_window(self):
        """
        获取当前页面
        :return:
        """
        time.sleep(10)
        # 获取当前页面句柄
        current_window = self.driver.current_window_handle
        # 获取所有页面句柄
        all_Handles = self.driver.window_handles
        # 如果新的pay_window句柄不是当前句柄,用switch_to_window方法切换
        for new_window in all_Handles:
            if new_window != current_window:
                self.driver.switch_to.window(new_window)
        # 隐式等待n秒,解释JavaScript是需要时间的,如果短了就无法正常获取数据,如果长了浪费时间;
        # implicitly_wait()给定时间智能等待
        self.driver.implicitly_wait(15)

    def get_stock_data(self, path, start_date, end_date):
        """
        获取龙虎榜数据
        :param path: 文件保存路径
        :param start_date: yyyyMMdd
        :param end_date: yyyyMMdd
        :return:
        """
        if not os.path.exists(path):
            os.makedirs(path)
        file_name = path + '/' + start_date + '-' + end_date
        if os.path.exists(file_name):
            print(file_name + " 文件已存在...\t退出")
            return
        self.driver.get(self.current_url)
        show_date_window_xpath = '//*[@id="divSjri"]/div[1]'
        self.load_page_by_xpath(self.WAIT, show_date_window_xpath).click()

        start_date = start_date[0:4] + "-" + start_date[
            4:6] + "-" + start_date[6:8]
        end_date = end_date[0:4] + "-" + end_date[4:6] + "-" + end_date[6:8]
        self.update_date_range(start_date, end_date)
        self.get_current_window()

        # 获取最大页面
        max_page_elem_xpath = "//*[@id='PageCont']/a[last()-2]"
        max_page_elem = self.load_page_by_xpath(self.WAIT, max_page_elem_xpath)

        max_page = str(max_page_elem.text).strip()
        print("最大页面", max_page)
        if max_page.__eq__("..."):
            max_page_elem.click()
            self.get_current_window()
            max_page_elem_xpath = "//*[@id='PageCont']/child::node()[last()-4]"
            max_page_elem = self.load_page_by_xpath(self.WAIT,
                                                    max_page_elem_xpath)
            max_page_elem.click()
            max_page = str(max_page_elem.text).strip()
            print("最大页面", max_page)
            first_page_elem_xpath = "//*[@id='PageCont']/a[2]"
            self.load_page_by_xpath(self.WAIT, first_page_elem_xpath).click()

        max_page_num = int(max_page)

        for i in range(0, max_page_num):
            print("第", i + 1, "页")
            self.get_current_window()
            html = self.driver.page_source
            self.analysis_page_source(html, file_name)
            # 获取下一页元素
            next_page = self.driver.find_element_by_xpath(
                "//*[@id='PageCont']/a[last()-1]")
            next_page.click()

    def get_history_dragon_tiger_list(self, start_date, end_date):
        """
        接口调用
        :param start_date: yyyyMMdd
        :param end_date: yyyyMMdd
        :return:
        """
        try:
            path = Utils.Utils.get_stock_data_path(
            ) + '/history_dragon_tiger_list'
            self.get_stock_data(path, start_date, end_date)
        finally:
            self.driver.quit()
예제 #48
0
def start():
    browser = None
    while True:
        # 代理
        try:
            ips = redisClient.getProxyData()
            for ip, status in ips.items():
                ip = str(ip, encoding="utf-8")
                if common.isUseIp(ip):
                    urls = [
                        "https://cn.iac-worldwide.com/api.php/Home/Taskdetail/index/if_id/826/sharefrom/8198"
                    ]
                    for url in urls:

                        flag = redisClient.isExistsStartIP(url, ip)
                        if (flag == False):

                            try:
                                redisClient.setUseStart(url, ip)

                                options = Options()
                                options.add_argument('-headless')
                                profile = webdriver.FirefoxProfile()
                                ip_ip = ip.split(":")[0]
                                ip_port = int(ip.split(":")[1])
                                options.set_preference(
                                    'network.proxy.type',
                                    1)  # 默认值0,就是直接连接;1就是手工配置代理。
                                options.set_preference('network.proxy.http',
                                                       ip_ip)
                                options.set_preference(
                                    'network.proxy.http_port', ip_port)
                                options.set_preference('network.proxy.ssl',
                                                       ip_ip)
                                options.set_preference(
                                    'network.proxy.ssl_port', ip_port)

                                options.set_preference(
                                    "network.http.use-cache", False)
                                options.set_preference(
                                    "browser.cache.memory.enable", False)
                                options.set_preference(
                                    "browser.cache.disk.enable", False)
                                options.set_preference(
                                    "browser.sessionhistory.max_total_viewers",
                                    3)

                                options.set_preference(
                                    'permissions.default.image', 2)
                                ##禁用Flash
                                options.set_preference(
                                    'dom.ipc.plugins.enabled.libflashplayer.so',
                                    'false')

                                # 火狐浏览器
                                browser = webdriver.Firefox(
                                    executable_path='geckodriver',
                                    firefox_options=options)
                                browser.set_page_load_timeout(20)
                                browser.get(url)

                                print(url)
                                praisebg = WebDriverWait(browser, 10).until(
                                    EC.presence_of_element_located(
                                        (By.ID, "praisebg")))
                                redisClient.setUseIP(ip)
                            except Exception as e:

                                print(e)
                                print("浏览url出错")
                                break
                            finally:
                                if (browser != None):
                                    browser.close()
                else:
                    redisClient.deleteProxyData(ip)
        except Exception as e:
            print(e)
            continue
예제 #49
0
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
from selenium.webdriver.common.keys import Keys
import sys

# macos_firefox.py
# /Applications/Firefox.app/Contents/MacOS/firefox  --private-window https://www.uol.com.br

# Options
firefox_options = Options()
firefox_options.log.level = 'debug'
firefox_options.add_argument('-private')
firefox_options.accept_untrusted_certs = True
firefox_options.assume_untrusted_cert_issuer = True
firefox_options.binary_location = '/Applications/Firefox.app/Contents/MacOS/firefox'
# firefox_options.headless = True

# FirefoxProfile
firefox_profile = FirefoxProfile();
firefox_profile.set_preference('browser.privatebrowsing.autostart', True)
firefox_profile.set_preference('pdfjs.disabled', True)
firefox_profile.set_preference('browser.download.folderList', 2)
firefox_profile.set_preference('browser.download.panel.shown', False)
firefox_profile.set_preference('browser.tabs.warnOnClose', False)
firefox_profile.set_preference('browser.tabs.animate', False)
firefox_profile.set_preference('browser.fullscreen.animateUp', 0)
firefox_profile.set_preference('geo.enabled', False)
firefox_profile.set_preference('browser.urlbar.suggest.searches', False)
firefox_profile.set_preference('browser.tabs.warnOnCloseOtherTabs', False)
firefox_profile.update_preferences()
accuracy is very low..


'''



from selenium.webdriver.firefox.options import Options
from selenium import webdriver
import os
import sys
from time import strftime,sleep
options = Options()
options.headless = True
options.add_argument("user-data-dir="+os.path.dirname(sys.argv[0]))
driver = webdriver.Firefox(options=options)
driver.get("http://web.whatsapp.com")
with open('qr.png', 'wb') as file:
    file.write(driver.find_element_by_xpath('/html/body/div[1]/div/div/div[2]/div[1]/div/div[2]/div/img').screenshot_as_png)

name1=input("Please Enter First Person Name : ")
name2=input("Please Enter Second Person Name : ")

ot={name1:"60:00",name2:"50:30"}

check=False



def track(name):
예제 #51
0
def scrape(curr_url, hash, soup, results):
    print('Found elcorreodeespana.com...')

    # article
    for t in soup.find_all('article', id='article'):
        print('Getting wordpress article...')

        dt = {}
        dm = {}

        dm["id"] = str(hash)
        dm["type"] = 'article'
        dm["source"] = curr_url
        dm["meta"] = ''
        for c in t.find_all('div', class_='post-headbar'):
            dm["meta"] = dm["meta"] + utils.clean_soup(c) + ' '
        dm["title"] = ''
        for c in t.find_all('h1', class_='post-title'):
            dm["title"] = dm["title"] + utils.clean_soup(c) + ' '

        dt["meta"] = dm
        dt["text"] = ''
        for c in t.find_all('div', class_='post-content'):
            dt["text"] = dt["text"] + utils.clean_soup(c) + ' '

        result = json.dumps(dt, ensure_ascii=False)
        results.append(result)
        print(result)

    # comments
    if len(soup.find_all('div', id='disqus_thread')) > 0:
        print('Getting disqus comments...')

        options = FirefoxOptions()
        options.add_argument("--headless")
        driver = webdriver.Firefox(options=options)
        driver.implicitly_wait(5)
        try:
            driver.get(curr_url)
            driver.execute_script("document.getElementById('disqus_thread').scrollIntoView();setTimeout(function(){},2000);")
            WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'disqus_thread')))
            for i in driver.find_elements_by_tag_name('iframe'):
                if i.get_attribute('src').find('disqus.com/embed') >= 0:
                    driver.get(i.get_attribute('src'))
                    WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.CLASS_NAME, 'post-message')))
                    content = driver.page_source
                    break
        except:
            #
            print('webdriver timeout... ')
        driver.close()

        try:
            for t in BeautifulSoup(content, "html.parser").find_all('div', class_='post-message'):

                dt = {}
                dm = {}

                dm["id"] = str(hash)
                dm["type"] = 'comment'
                dm["source"] = curr_url

                dt["meta"] = dm
                dt["text"] = utils.clean_soup(t)

                result = json.dumps(dt, ensure_ascii=False)
                results.append(result)
                print(result)

        except:
            #
            print('webdriver empty...')
예제 #52
0
from selenium.webdriver.firefox.options import Options

CWD = os.path.dirname(os.path.abspath(__file__))
MS_WD = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

# get the flask web app
from multiscanner.web.app import app as flask_app

proxies = {
    "http": None,
    "https": None,
}

try:
    opts = Options()
    opts.add_argument('-headless')
    driver = webdriver.Firefox(firefox_options=opts)
except Exception as e:
    pytestmark = pytest.mark.skip

test_submitter_name = 'John Doe'
test_submitter_email = '*****@*****.**'
test_submitter_org = 'Testers'
test_submitter_phone = '123-456-7890'
test_submission_desc = 'A test document submission'


class TestBase(LiveServerTestCase):
    @classmethod
    def setup_class(cls):
        cls.mock_server_port = get_free_server_port()
def initBrowser():
    """launch firefox headless browser"""
    options = Options()
    options.add_argument('-headless')
    global browser
    browser = webdriver.Firefox(firefox_options=options)
예제 #54
0
def scrape():
    options = Options()
    options.headless = shared.HEADLESS
    options.add_argument("--width=" + shared.PARAMS['scrape']['width'])
    options.add_argument("--height=" + shared.PARAMS['scrape']['height'])
    firefox_profile = webdriver.FirefoxProfile()
    for file in os.listdir(shared.EXTENSIONS_DIR):
        firefox_profile.add_extension(
            extension=os.path.join(shared.EXTENSIONS_DIR, file))
    if 'firefox_profile_ure' in shared.CONFIG:
        for key, value in shared.CONFIG['firefox_profile_ure'].items():
            firefox_profile.set_preference(key, int(value))
    driver = webdriver.Firefox(
        firefox_profile=firefox_profile,
        options=options,
        executable_path=shared.DRIVER_PATH,
        service_log_path=shared.CONFIG['constant']['driver_log_file'])
    try:
        driver.set_page_load_timeout(
            int(shared.CONFIG['search']['driver_timeout']))
        driver.get(shared.PARAMS['scrape']['ure'])
        xpaths = shared.PARAMS['xpath']
        shared.wait_for_element_visible(driver, xpaths['geolocation'])
        driver.find_element_by_xpath(xpaths['geolocation']).send_keys(
            shared.CONFIG['search']['geolocation'])
        shared.wait_for_element_visible(driver, xpaths['geolocation'])
        driver.find_element_by_xpath(xpaths['geolocation']).send_keys(
            Keys.RETURN)
        results = driver.find_elements_by_xpath(xpaths['cookie_close_banner'])
        if results:
            driver.execute_script("arguments[0].click();", results[0])
        shared.wait_for_element_visible(driver, xpaths['filter'])
        filter_el = driver.find_element_by_xpath(xpaths['filter'])
        driver.execute_script("arguments[0].click();", filter_el)
        shared.wait_for_element_visible(driver, xpaths['min_price'])
        driver.find_element_by_xpath(xpaths['min_price']).send_keys(
            shared.CONFIG['search']['min_price'])
        shared.wait_for_element_visible(driver, xpaths['max_price'])
        driver.find_element_by_xpath(xpaths['max_price']).send_keys(
            shared.CONFIG['search']['max_price'])
        shared.wait_for_element_visible(driver, xpaths['bedrooms_dropdown'])
        Select(driver.find_element_by_xpath(
            xpaths['bedrooms_dropdown'])).select_by_visible_text(
                shared.CONFIG['search']['bedrooms_dropdown'])
        shared.wait_for_element_visible(driver, xpaths['bathrooms_dropdown'])
        Select(driver.find_element_by_xpath(
            xpaths['bathrooms_dropdown'])).select_by_visible_text(
                shared.CONFIG['search']['bathrooms_dropdown'])
        shared.wait_for_element(driver, xpaths['under_contract_checkbox'])
        under_contract_element = driver.find_element_by_xpath(
            xpaths['under_contract_checkbox'])
        driver.execute_script("arguments[0].click();", under_contract_element)
        shared.wait_for_element_visible(driver, xpaths['square_feet_dropdown'])
        sqft_el = driver.find_element_by_xpath(xpaths['square_feet_dropdown'])
        driver.execute_script("arguments[0].scrollIntoView(true);", sqft_el)
        Select(sqft_el).select_by_visible_text(
            shared.CONFIG['search']['square_feet_dropdown'])
        shared.wait_for_element_visible(driver, xpaths['acres_dropdown'])
        Select(driver.find_element_by_xpath(
            xpaths['acres_dropdown'])).select_by_visible_text(
                shared.CONFIG['search']['acres_dropdown'])
        shared.wait_for_element_visible(driver, xpaths['update_search'])
        update_search = driver.find_element_by_xpath(xpaths['update_search'])
        driver.execute_script("arguments[0].click();", update_search)
        shared.wait_for_element_visible(driver, xpaths['results_listings'])
        shared.wait_for_invisible(driver, xpaths['results_spin_wrap'])
        page_sources = [driver.page_source]
        result = get_next(driver)
        while result:
            driver.execute_script("arguments[0].click();", result)
            shared.wait_for_element_visible(driver, xpaths['results_listings'])
            page_sources.append(driver.page_source)
            result = get_next(driver)
    finally:
        driver.quit()
    return page_sources
예제 #55
0
 def headless_firefox_driver(self, driverpath):
     options = FirefoxOptions()
     options.add_argument('--headless')
     self.__browser = wd.Firefox(executable_path=driverpath,
                                 options=options)
예제 #56
0
 def get_options_headlesschrome(self, download_dir):
     from selenium import webdriver
     from selenium.webdriver.chrome.options import Options
     options = Options();
     prefs = {"download.default_directory": download_dir,"download.directory_upgrade": True,"download.prompt_for_download": False,"safebrowsing.enabled": False,"safebrowsing.disable_download_protection": True,"page.setDownloadBehavior": {'behavior': 'allow', 'downloadPath': download_dir}}
     options.add_experimental_option("prefs",prefs);
     options.add_argument("--test-type");
     options.add_argument("--headless");
     options.add_argument("--no-sandbox");
     options.add_argument("--disable-gpu");
     options.add_argument("--incognito");
     options.add_argument("--disable-extensions");
     #options.add_argument("window-size=1920x1080");
     return options;
예제 #57
0
_LOGGER.setLevel(logging.DEBUG)
logging.debug("test")
HTML_PARSER = 'html.parser'
ATTRIBUTION = 'Information provided by Aesop'
LOGIN_URL = 'https://sub.aesoponline.com/Substitute/Home'
LOGIN_TIMEOUT = 10
COOKIE_PATH = './aesop_cookies.pickle'
CACHE_PATH = './aesop_cache'
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'
CHROME_WEBDRIVER_ARGS = [
    '--headless', '--user-agent={}'.format(USER_AGENT), '--disable-extensions',
    '--disable-gpu', '--no-sandbox'
]
CHROMEDRIVER_PATH = 'C:/Users/asaboo/Downloads/chromedriver_76/chromedriver'
FIREFOXOPTIONS = Options()
FIREFOXOPTIONS.add_argument("--headless")


class AESOPError(Exception):
    """AESOP error."""

    pass


def _save_cookies(requests_cookiejar, filename):
    """Save cookies to a file."""
    with open(filename, 'wb') as handle:
        pickle.dump(requests_cookiejar, handle)


def _load_cookies(filename):
예제 #58
0
 def setUp(self):
     """Set up test driver"""
     opts = Options()
     opts.add_argument('-headless')
     self.driver = webdriver.Firefox(firefox_options=opts)
     self.driver.get(self.get_server_url())
예제 #59
0
 def __init__(self):
     options = Options()
     options.add_argument('--headless')
     self.browser = webdriver.Firefox(options=options)
     self.browser.set_page_load_timeout(30)
     self.browser.set_window_size(500, 500)  #设置浏览器窗口大小
예제 #60
0
def main():
    # Parse the command line arguments
    models = [
        'hash', 'rr', 'random', 'cloudflare', 'google', 'quad9', 'nextdns'
    ]
    parser = argparse.ArgumentParser()
    parser.add_argument('website')
    parser.add_argument('dns_type',
                        choices=['dns', 'doh', 'dot', 'dnscrypt-proxy_doh'])
    parser.add_argument('trr_resolver_ip')
    parser.add_argument('trr_resolver_uri')
    parser.add_argument('model', choices=models)
    parser.add_argument('--timeout', type=int, default=45)
    args = parser.parse_args()

    dnscrypt_config_file = '/dnscrypt-proxy/dnscrypt-proxy/dnscrypt-proxy-{0}.toml'.format(
        args.model)

    # Enable devtools in Firefox
    options = Options()
    options.headless = True
    options.add_argument('-devtools')

    # Enable the netmonitor toolbox in devtools so we can save HARs
    profile = webdriver.FirefoxProfile()
    profile.set_preference('devtools.toolbox.selectedTool', 'netmonitor')

    # Set up DNS configuration
    subprocess.run(
        ["sudo", "cp", "/etc/resolv.conf", "/etc/resolv.upstream.conf"])
    subprocess.run(["sudo", "cp", "resolv.conf", "/etc/resolv.conf"])
    if args.dns_type == 'dnscrypt-proxy_doh':
        subprocess.run(
            "sudo /dnscrypt-proxy/dnscrypt-proxy/dnscrypt-proxy -config {0} &> /dev/null &"
            .format(dnscrypt_config_file),
            shell=True)
        subprocess.run(["sudo", "sleep", "5s"])

    # Configure the DNS settings in Firefox
    if args.dns_type == 'dns' or args.dns_type == 'dot' or args.dns_type == 'dnscrypt-proxy_doh':
        options.set_preference('network.trr.mode', 0)
    elif args.dns_type == 'doh':
        options.set_preference('network.trr.mode', 3)
        options.set_preference('network.trr.request-timeout', 1500)
        options.set_preference('network.trr.max-fails', 5)
        trr_resolver_ip = args.trr_resolver_ip
        trr_resolver_uri = args.trr_resolver_uri
        if trr_resolver_ip:
            options.set_preference('network.trr.bootstrapAddress',
                                   trr_resolver_ip)
        if trr_resolver_uri:
            options.set_preference('network.trr.uri', trr_resolver_uri)

    # Launch Firefox and install our extension for getting HARs
    driver = webdriver.Firefox(options=options,
                               firefox_profile=profile,
                               firefox_binary="/opt/firefox/firefox-bin")
    driver.install_addon("/home/seluser/measure/harexporttrigger-0.6.2-fx.xpi")
    driver.set_page_load_timeout(args.timeout)

    # Make a page load
    started = datetime.now()
    driver.get(args.website)

    # Once the HAR is on disk in the container, write it to stdout so the host machine can get it
    har_file = "/home/seluser/measure/har.json"

    def har_file_ready():
        return os.path.exists(har_file + ".ready")

    while (datetime.now() - started).total_seconds() < args.timeout \
            and not har_file_ready():
        time.sleep(1)

    if har_file_ready():
        with open(har_file, 'rb') as f:
            sys.stdout.buffer.write(f.read())
    driver.quit()