Ejemplo n.º 1
0
 def getBrowser(self):
     if platform.system() == 'Windows':
         geckopath = os.path.join(os.getcwd(),"geckodriver","windows","geckodriver.exe")
     elif platform.system() == 'Linux':
         geckopath = os.path.join(os.getcwd(),"geckodriver","linux","geckodriver")
     else:
         geckopath = os.path.join(os.getcwd(),"geckodriver","mac","geckodriver")
     if self.__headless:
         options = FirefoxOptions()
         options.headless = True
     else:
         options = FirefoxOptions()
         options.headless = False
     firefox_profile = FirefoxProfile()
     firefox_profile.set_preference('permissions.default.stylesheet', 2)
     firefox_profile.set_preference('permissions.default.image', 2)
     firefox_profile.set_preference('permissions.default.image', 2)
     firefox_profile.set_preference('dom.ipc.plugins.enabled.libflashplayer.so', 'false')
     firefox_profile.set_preference("browser.download.folderList", 2)
     firefox_profile.set_preference("browser.download.manager.showWhenStarting", False)
     firefox_profile.set_preference("browser.download.dir", os.path.join(os.getcwd(),"tmp"))
     firefox_profile.set_preference("browser.helperApps.neverAsk.saveToDisk", self.__mimeTypes)
     firefox_profile.set_preference("plugin.disable_full_page_plugin_for_types", self.__mimeTypes)
     firefox_profile.set_preference("pdfjs.disabled", True)
     driver = Firefox(firefox_profile=firefox_profile,executable_path=geckopath,options=options)
     return driver
Ejemplo n.º 2
0
def driver(request):
    browser = request.config.getoption("--browser")
    if browser == 'firefox':
        options = FirefoxOptions()
        # options.add_argument("--headless")
        capabilities = options.to_capabilities()
        wd = webdriver.Firefox(desired_capabilities=capabilities)
        wd.maximize_window()
    elif browser == 'chrome':
        options = ChromeOptions()
        # options.add_argument("--headless")
        capabilities = options.to_capabilities()
        wd = webdriver.Chrome(desired_capabilities=capabilities)
        wd.fullscreen_window()
    elif browser == 'ie':
        options = IeOptions()
        # options.add_argument("--headless")
        capabilities = options.to_capabilities()
        wd = webdriver.Ie(desired_capabilities=capabilities)
        wd.fullscreen_window()
    else:
        print('Unsupported browser!')
        sys.exit(1)
    yield wd
    wd.quit()
Ejemplo n.º 3
0
def test4():
    options = FirefoxOptions()
    options.add_argument('--headless')
    dr = webdriver.Firefox(firefox_options=options)
    dr.get("https://www.baidu.com")
    print(dr.current_url)
    dr.close()
Ejemplo n.º 4
0
def get_user_page(url: str, profile_dir: str, outdir='.') -> None:
    opt = FirefoxOptions()
    # opt.add_argument('-headless')

    p = FirefoxProfile(profile_dir)
    p.set_preference('extensions.lastAppBuildId', '-1')
    p.set_preference('network.proxy.socks', '')
    p.set_preference('network.proxy.socks', '')

    with Firefox(p, options=opt) as ff:
        ff.get(url)

        heights = []

        while True:
            count = 0

            for elem in ff.find_elements_by_css_selector('.v1Nh3'):
                try:
                    get_picture(ff, elem, outdir)
                except Exception as e:
                    print(e)

            heights.append(
                ff.execute_script('return document.body.scrollHeight;'))
            if len(heights) >= 3 and functools.reduce(
                    lambda a, b: a == b and a, heights[-3:]):
                break

            ff.execute_script(
                'window.scrollTo(0, document.body.scrollHeight);')
            time.sleep(2)
Ejemplo n.º 5
0
    def __init__(
        self,
        firefox_options: FirefoxOptions = None,
        desired_capabilities: dict = None,
        token: str = None,
        project_name: str = None,
        job_name: str = None,
        disable_reports: bool = False,
        report_type: ReportType = ReportType.CLOUD_AND_LOCAL,
    ):

        # If no options or capabilities are specified at all, use default FirefoxOptions
        if firefox_options is None and desired_capabilities is None:
            caps = FirefoxOptions().to_capabilities()
        else:
            # Specified FirefoxOptions take precedence over desired capabilities but either can be used
            caps = firefox_options.to_capabilities(
            ) if firefox_options is not None else desired_capabilities

        super().__init__(
            capabilities=caps,
            token=token,
            project_name=project_name,
            job_name=job_name,
            disable_reports=disable_reports,
            report_type=report_type,
        )
Ejemplo n.º 6
0
def browser(request, driver_path):

    # setup
    name = request.config.getoption("--browser")
    if name == "Chrome":
        options = ChromeOptions()
        options.add_argument("headless")
        options.add_argument("start-maximized")
        wd = Chrome(executable_path=driver_path+"chromedriver", options=options)
    elif name == "Firefox":
        options = FirefoxOptions()
        options.add_argument("headless")
        options.add_argument("start-maximized")
        wd = Firefox(executable_path=driver_path+"geckodriver", options=options)
    elif name == "Ie":
        options = IeOptions()
        options.add_argument("headless")
        options.add_argument("start-maximized")
        wd = Ie(options=options)
    else:
        print(f"Неизвестный тип браузера \"{name}\"")
        return None

    yield wd

    # teardown
    wd.quit()
Ejemplo n.º 7
0
    def __LoadDrivers(self):
        """Hidden method that checks for `MySelenium` folder in current path and from that goes to `DRIVERS` and tries to open specified driver

        Raises:
            DriverFolderNotFound: If there is no `MySelenium` folder in current file path.
            DriverMissing: There is no driver found with specified name

        Returns:
            __EasyBrowser instance: Returns object address to __EasyBrowser initialized
        """
        pattern = r'(.*\\MySelenium\\)(.*)'
        folder_path = re.search(pattern, abspath(__file__))
        try:
            if folder_path:
                folder_path = str(folder_path.group(1)) + 'DRIVERS\\'
            else:
                raise DriverFolderNotFound
            if self.browser_type == 'CHROME':
                return self.__EasyBrowser(f'{folder_path}chromedriver.exe',
                                          ChromeOptions(), self.browser_type)
            elif self.browser_type == 'FIREFOX':
                return self.__EasyBrowser(folder_path, FirefoxOptions(),
                                          self.browser_type)
            elif self.browser_type == 'EDGE':
                return self.__EasyBrowser(f'{folder_path}msedgedriver.exe',
                                          EdgeOptions(), self.browser_type)
            else:
                raise DriverMissing
        except (DriverFolderNotFound, DriverMissing) as error:
            sys.exit(error.__doc__)
Ejemplo n.º 8
0
 def __init__(self, firefox_config: dict, tor_port: int):
     """
     Constructor
     @param config the configuration to load options from
     """
     # Get the logger
     self.logger = logging.getLogger()
     # Set up firefox to run in headless mode to avoid graphical overhead
     options = FirefoxOptions()
     options.set_headless(True)
     # Store the options
     self.options = options
     # Store params from the config
     self.retries = int(firefox_config["retries"])
     self.wait_tag = firefox_config["wait_tag"]
     self.load_images = int(firefox_config["load_images"])
     self.clean_frequency = int(firefox_config["clean_frequency"])
     self.page_timeout = int(firefox_config["timeout"]["page"])
     self.element_timeout = int(firefox_config["timeout"]["element"])
     # Store tor proxy config
     self.tor_port = tor_port
     # Set driver to None for now
     self.driver = None
     # Initialize some members that will be stored later
     self.mode = None
     self.profile = None
Ejemplo n.º 9
0
def browser(request):
    browser_type = request.config.getoption('--browser')
    if browser_type == 'chrome':
        cdm = ChromeDriverManager()
        cdm.download_and_install()
        option = ChromeOptions()
        option.add_argument('--kiosk')
        # option.add_argument('--disable-popup-blocking')
        option.add_argument('--ignore-certificate-errors')
        # option.headless = True
        browser = webdriver.Chrome(options=option)
        # browser.implicitly_wait(5)
        request.addfinalizer(browser.quit)
        return browser
    if browser_type == 'firefox':
        gdm = GeckoDriverManager()
        gdm.download_and_install()
        option = FirefoxOptions()
        option.add_argument('--kiosk')
        # option.headless = True
        browser = webdriver.Firefox(options=option)
        browser.implicitly_wait(5)
        request.addfinalizer(browser.quit)
        return browser
    else:
        raise Exception(f'{request.param} is not supported!')
Ejemplo n.º 10
0
def init_driver(binary_path, binary_type, stop_compression, proxy_add,
                proxy_port):
    """
    Method to initialize a Selenium driver. Only support Firefox browser for now.
    Args:
        binary_path(str): the path to the 'firefox' executable
        binary_type(str): for now, binary type can only be 'FirefoxBinary'.
    Returns:
        driver(WebDriver): an initialized Selenium WebDriver.
    """
    driver = None
    if binary_type == 'FirefoxBinary':
        binary = FirefoxBinary(binary_path)
        options = FirefoxOptions()
        options.add_argument('--headless')
        if stop_compression:
            options.set_preference('network.http.accept-encoding', '')
            options.set_preference('network.http.accept-encoding.secure', '')
            options.set_preference('devtools.cache.disabled', True)
        if proxy_add and proxy_port:
            options.set_preference('network.proxy.ftp', proxy_add)
            options.set_preference('network.proxy.ftp_port', proxy_port)
            options.set_preference('network.proxy.http', proxy_add)
            options.set_preference('network.proxy.http_port', proxy_port)
            options.set_preference('network.proxy.socks', proxy_add)
            options.set_preference('network.proxy.socks_port', proxy_port)
            options.set_preference('network.proxy.ssl', proxy_add)
            options.set_preference('network.proxy.ssl_port', proxy_port)
            options.set_preference('network.proxy.type', 1)
        driver = webdriver.Firefox(firefox_binary=binary, options=options)
    return driver
Ejemplo n.º 11
0
    def write_tf(self,filesize, threadnum, num_tfrecords=10):
        '''This function writes tfrecords. Input parameters are: filesize (number of images in one tfrecord), threadnum(thread id)'''
        options = tf.io.TFRecordOptions(tf.io.TFRecordCompressionType.GZIP)
        opts = FirefoxOptions()
        opts.headless = True
        driver = Firefox(executable_path='./geckodriver', options=opts)

        for idx in range(num_tfrecords):
            starttime = time.time()
            output_file_name = '{:02d}_{:08d}.tfrecord'.format(threadnum, idx)
            print('+Thread {} [{}/{}] generating ...'.format(threadnum, idx+1, num_tfrecords))

            data_arr, all_table_categories = self.generate_tables(driver, filesize, output_file_name)
            if(data_arr is not None):
                if(len(data_arr)==filesize):
                    with tf.io.TFRecordWriter(os.path.join(self.outtfpath,output_file_name),options=options) as writer:
                        for imgindex,subarr in enumerate(data_arr):
                            arr = subarr[0]

                            img = np.asarray(subarr[1][0],np.int64)[:,:,0]
                            colmatrix = np.array(arr[1],dtype=np.int64)
                            cellmatrix = np.array(arr[2],dtype=np.int64)
                            rowmatrix = np.array(arr[0],dtype=np.int64)
                            bboxes = np.array(arr[3])
                            tablecategory = arr[4][0]
                            seq_ex = self.generate_tf_record(img, cellmatrix, rowmatrix, colmatrix, bboxes,tablecategory,imgindex,output_file_name)
                            writer.write(seq_ex.SerializeToString())
                        print('--- thread {} [{}/{}] completed in {:.3f}'.format(threadnum, idx+1, num_tfrecords, time.time() - starttime))

        driver.stop_client()
        driver.quit()
Ejemplo n.º 12
0
 def _open_browser_with_headless(self):
     """
     无头模式打开谷歌或火狐
     :return: driver对象
     """
     driver = None
     browser = self._reader.get_str('browser', 'browser').lower()
     try:
         if browser == 'chrome':
             chrome_options = ChromeOptions()
             chrome_options.add_argument('--headless')
             chrome_options.add_argument('--disable-gpu')
             driver = webdriver.Chrome(options=chrome_options,
                                       executable_path=constant.chrome_path)
         elif browser == 'firefox':
             firefox_options = FirefoxOptions()
             firefox_options.add_argument('--headless')
             firefox_options.add_argument('--disable-gpu')
             driver = webdriver.Firefox(
                 options=firefox_options,
                 executable_path=constant.firefox_path,
                 service_log_path=devnull)
         else:
             self._log.error(f'{browser}配置有误,或{browser}不支持无头模式,请确认!!')
             exit()
         try:
             version = driver.capabilities['browserVersion']
         except KeyError:
             version = driver.capabilities['version']
         self._log.info(f'{browser}启动成功,版本号:{version}')
         sleep(1)
         return driver
     except WebDriverException as e:
         self._log.error('{}无头模式启动失败:{}'.format(browser, e))
         exit()
def prepare_browsers(headless: bool, driver_path: str,
                     twitter_profile_path: str) -> Browsers:
    """
    Sets up browsers to search accounts
    :param headless bool: Should search be performed in headless mode
    :param driver_path: Path to geckodriver
    :param twitter_profile_path: Path to twitter profile folder
    :return: tuple of browsers, that are logged in LinkedIn and Xing
    """
    logging.info("Running Twitter scraper from profile in %s",
                 twitter_profile_path)
    driver_path = driver_path if driver_path else "geckodriver"
    profile = FirefoxProfile()
    twitter_profile = FirefoxProfile(twitter_profile_path)
    twitter_profile.DEFAULT_PREFERENCES["frozen"][
        "extensions.autoDisableScopes"] = 0
    twitter_profile.set_preference("extensions.enabledScopes", 15)
    logins = social_media_logins(driver_path, profile)
    driver_options = FirefoxOptions()
    driver_options.headless = headless
    linked_in_driver = Firefox(options=driver_options,
                               firefox_profile=profile,
                               executable_path=driver_path)
    xing_driver = Firefox(options=driver_options,
                          firefox_profile=profile,
                          executable_path=driver_path)
    twitter_driver = Firefox(options=driver_options,
                             firefox_profile=twitter_profile,
                             executable_path=driver_path)
    set_login_data(linked_in_driver, logins[0])
    set_login_data(xing_driver, logins[1])
    retoggleAllTheAddons(twitter_driver)
    return Browsers(linked_in_driver, xing_driver, twitter_driver)
Ejemplo n.º 14
0
 def __init__(self, headless=True, options=[], path='myengine\geckodriver'):
     browser_options = FirefoxOptions()
     for _ in options:
         browser_options.add_argument(_)
     browser_options.headless = headless
     Firefox.__init__(self, options=browser_options, executable_path=path)
     Browser.__init__(self)
Ejemplo n.º 15
0
    def get_browser_capabilities(self, browser_name, headless=False):
        """
        Get browser capabilities for specific browser with included options inside

        :param browser_name: browser name in lowercase
        :type browser_name: str
        :param headless: run browser without gui
        :type headless: bool
        :return: capabilities for specific browser
        :rtype: dict
        """
        if self.is_appium_based:
            return

        options = None
        if 'firefox' == browser_name:
            options = FirefoxOptions()
        elif 'chrome' == browser_name:
            options = ChromeOptions()
            options.add_argument('disable-infobars')
        if options and headless:
            options.headless = True

        # huck for preventing overwriting 'platform' value in desired_capabilities by chrome options
        browser_caps = options.to_capabilities() if options else {}
        browser_name, browser_version = [b for b in self.browsers if browser_name.lower() == b[0].lower()][0]
        browser_caps.update({'browserName': browser_name,
                             'version':     browser_version,
                             'platform':    self.full_name})
        if isinstance(self.extra, dict):
            browser_caps.update(self.extra)
        return browser_caps
Ejemplo n.º 16
0
def main():
    opts = FirefoxOptions()
    #opts.add_argument("--headless")dont show the page
    option_profile = webdriver.FirefoxProfile()
    option_profile.set_preference("plugin.state.flash", 2)
    driver = webdriver.Firefox(firefox_profile=option_profile, options=opts)
    driver.maximize_window()
    time.sleep(1)
    driver.get("https://www.huya.com")
    WebDriverWait(driver, 20, 0.5).until(
        EC.presence_of_element_located((By.CLASS_NAME, 'hy-nav-title.J_hdLg')))
    driver.find_element_by_class_name('hy-nav-title.J_hdLg').click()
    #WebDriverWait(driver,20,0.5).until(EC.presence_of_element_located((By.ID,'nav-login')))
    #https://www.huya.com/685233
    wandMsg = [u'有人一起开黑的吗', u'鲁班的攻击距离长,更适合远程消耗敌人', u'主播那么厉害']
    WebDriverWait(driver, 2000, 0.5).until(
        EC.presence_of_element_located((By.ID, 'pub_msg_input')))
    sendBtn = driver.find_element_by_id('msg_send_bt')
    msg = driver.find_element_by_id('pub_msg_input')
    if msg != None and sendBtn != None:
        print 'msg btn found..'
        js = 'document.getElementById("pub_msg_input").value="%s";'
        text = "[害羞]"
        while True:
            driver.execute_script(js % text)
            msg.send_keys(wandMsg[random.randint(0, len(wandMsg)) %
                                  len(wandMsg)])
            time.sleep(random.randint(5, 10))
            msg.send_keys(Keys.ENTER)
    raw_input('enter to end')
    driver.close()
Ejemplo n.º 17
0
def driver_factory(browser):
    if browser == "chrome":
        logger = logging.getLogger('chrome_fixture')
        logger.setLevel(LOG_LEVEL)
        options = ChromeOptions()
        options.headless = True
        options.add_argument('--ignore-ssl-errors=yes')
        options.add_argument('--ignore-certificate-errors')
        logger.info("Подготовка среды для запуска тестов...")
        caps = DesiredCapabilities.CHROME
        caps['loggingPrefs'] = {'performance': 'ALL', 'browser': 'ALL'}
        options.add_experimental_option('w3c', False)
        driver = EventFiringWebDriver(
            webdriver.Chrome(desired_capabilities=caps, options=options),
            MyListener())
        logger.debug(
            "Браузер Chrome запущен со следующими desired_capabilities:{}".
            format(driver.desired_capabilities))
    elif browser == "firefox":
        profile = FirefoxProfile()
        profile.accept_untrusted_certs = True
        options = FirefoxOptions()
        options.headless = True
        driver = webdriver.Firefox(options=options, firefox_profile=profile)
    else:
        raise Exception("Driver not supported")
    return driver
Ejemplo n.º 18
0
 def __init__(self, browser='Chrome'):
     self.opts = FirefoxOptions(
     ) if browser == 'Firefox' else ChromeOptions()
     self.opts.headless = True
     self.opts.add_argument('--no-sandbox')
     self.opts.add_argument('--disable-extensions')
     self.opts.add_argument('--ignore-certificate-errors')
     self.opts.add_argument('--disable-gpu')
     self.opts.add_argument("--start-maximized")
     self.opts.add_argument("--enable-logging")
     self.opts.add_argument("--enable-automation")
     self.capabilities = self.opts.capabilities.copy()
     self.capabilities['acceptSslCerts'] = True
     self.capabilities['acceptInsecureCerts'] = True
     self.capabilities['goog:loggingPrefs'] = {
         'browser': 'ALL',
         'performance': 'ALL'
     }
     self.selenoid = {
         'host': os.environ.get("SELENOID_HOST"),
         'port': os.environ.get("SELENOID_PORT", "4444")
     }
     self.driver = None
     self.ui = None
     self.adcm = None
     self._client = None
 def __init__(self, config: dict):
     """
     Constructor
     @param config the configuration to load options from
     """
     # Get the logger
     self.logger = logging.getLogger(config["log"]["name"])
     # Set up firefox to run in headless mode to avoid graphical overhead
     options = FirefoxOptions()
     options.set_headless(True)
     # Configure profile settings
     profile = FirefoxProfile()
     # Add the proxy if applicable
     if config["mode"] == "tor":
         profile.set_preference("network.proxy.type", 1)
         profile.set_preference("network.proxy.socks", "127.0.0.1")
         profile.set_preference("network.proxy.socks_port", 9050)
         profile.set_preference("network.proxy.socks_remote_dns", True)
     # Store configs, the profile and options
     self.retries = config["firefox"]["retries"]
     self.page_timeout = config["firefox"]["page_timeout"]
     self.options = options
     self.profile = profile
     # Set driver to None for now
     self.driver = None
Ejemplo n.º 20
0
def driver(request):
    browser = request.config.getoption('--browser')
    if browser == 'chrome':
        print('\n Chrome browser')
        options = ChromeOptions()
        options.add_argument('--start-maximized')
        #options.add_argument('--headless')
        wd = webdriver.Chrome(options=options)
        request.addfinalizer(wd.quit)
        return wd
    elif browser == 'firefox':
        print('\n FF browser')
        options = FirefoxOptions()
        options.add_argument('--start-maximized')
        options.add_argument('--headless')
        wd = webdriver.Firefox(options=options)
        wd.maximize_window()
        request.addfinalizer(wd.quit)
        return wd
    else:
        print('\n IE browser')
        options = IeOptions()
        # options.add_argument('headless_ie_selenium')
        wd = webdriver.Ie(options=options)
        wd.fullscreen_window()
        request.addfinalizer(wd.quit)
        return wd
        #yield wd
    wd.quit()
Ejemplo n.º 21
0
def driver_open(url, the_encoding="utf-8", timeout=3):
    from selenium.webdriver import FirefoxOptions
    from selenium import webdriver
    import time
    opts = FirefoxOptions()
    opts.add_argument("--headless")
    driver = webdriver.Firefox(firefox_options=opts)
    driver.set_page_load_timeout(timeout)
    #driver.set_script_timeout(3)
    try:
        res1 = driver.get(
            url
        )  ## may jumpout timeout error, the js has just finish load, reutrn the innerhtml
    except:
        time.sleep(5)
    finally:
        #time.sleep(5)
        print("++++++++++++++++++++++++++++++++++++++++")
        print("++++++++++++ run finnaly +++++++++++++++")
        print("++++++++++++++++++++++++++++++++++++++++")
        html2 = driver.execute_script(
            "return document.documentElement.innerHTML;")
        soup1 = BS(html2.encode(the_encoding))
        driver.close()
    #os.system('pkill phantomjs')
    return soup1
Ejemplo n.º 22
0
 def __enter__(self):
     options = FirefoxOptions()
     options.set_headless(True)
     profile = FirefoxProfile()
     self.browser = Firefox(firefox_options=options,
                            firefox_profile=profile)
     return self
Ejemplo n.º 23
0
 def launch_browser(self, browser_name, url):
     global driver
     try:
         if browser_name == "chrome":
             chromeoptions = ChromeOptions()
             chromeoptions.add_argument("start-maximized")
             chromeoptions.add_argument("disable-notifications")
             chromeoptions.add_argument("--ignore-certificate-errors")
             chromeoptions.add_argument("--disable-infobars")
             chromeoptions.add_argument("--disable-extensions")
             driver = webdriver.Chrome(
                 executable_path="./drivers/chromedriver.exe",
                 options=chromeoptions)
             log.info("chrome browser launch successfully")
         elif browser_name == "firefox":
             firefoxoptions = FirefoxOptions()
             firefoxoptions.add_argument("start-maximize")
             driver = webdriver.Firefox(
                 executable_path="./drivers/geckodriver.exe",
                 options=firefoxoptions)
             log.info("firefox browser launch successfully")
         elif browser_name == "ie":
             ieoptions = IeOptions()
             ieoptions.add_argument("start-maximize")
             driver = webdriver.Ie(
                 executable_path="./drivers/IEDriverServer.exe",
                 options=ieoptions)
             log.info("ie browser launch successfully")
         else:
             log.error("invalid browser name")
     except WebDriverException as e:
         log.error("exception ", e)
     driver.implicitly_wait(10)
     driver.get(url)
Ejemplo n.º 24
0
def login1():
    opt = FirefoxOptions()            # 创建Chrome参数对象
    opt.headless = True              # 把Chrome设置成可视化无界面模式,windows/Linux 皆可
    driver = Firefox(options=opt)     # 创建Chrome无界面对象
    #selenium登录测试长庆
    #driver = webdriver.Firefox()

    driver.get("http://192.168.6.27:6030/passports/login?service=http%3A%2F%2F192.168.6.27%3A6030%2Fportals%2Fcas&tenantCode=cqsh&trial=false")

    driver.find_element(By.ID, "username").send_keys("test")
    driver.find_element(By.ID, "pwd1").send_keys("1")
    driver.find_element(By.CSS_SELECTOR, ".justUse").click()

    time.sleep(5)
    #获取JSESSIONID
    c= driver.get_cookies()
    #print (c)
    #print (c[0])
    for a in c:
        #print (a)
        if a['name'] == 'JSESSIONID':
            b=a
            #print (b)
    cookies={'JSESSIONID': b['value']}

    #cookies={'JSESSIONID': '3BAB7DF0381948EA376F907859D5321C'}
    driver.close()
    driver.quit()
    return cookies
Ejemplo n.º 25
0
    def __init__(self, position, lit, time):
        # 首页搜索页
        self.start_url = 'https://search.51job.com/list/000000,000000,0000,00,9,99,+,2,1.html'
        # 职位详情页url
        # 搜索关键字[职位,学历要求,工作经验]
        self.key_words = [position, lit, time]
        # 会计,
        # 大专,本科,硕士
        # 应届生,3-5年
        self.df = pd.DataFrame(columns=['职位', '日期', '地点', '网址'])
        with open(
                '职位详情{0}_{1}_{2}.csv'.format(self.key_words[0],
                                             self.key_words[1],
                                             self.key_words[2]),
                'w') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(['公司简介', '职位名称', '职位信息'])

        # 用webdriver
        options = FirefoxOptions()
        options.add_argument('-headless')
        self.browser = Firefox(options=options)
        self.wait = WebDriverWait(self.browser, 10)

        with open(
                '职位详情{0}_{1}_{2}.csv'.format(self.key_words[0],
                                             self.key_words[1],
                                             self.key_words[2]),
                'w') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(['公司简介', '职位名称', '职位信息'])
Ejemplo n.º 26
0
def driver_factory(browser, executor):
    if browser == "chrome":
        logger = logging.getLogger('chrome_fixture')
        logger.setLevel(LOG_LEVEL)
        options = ChromeOptions()
        options.headless = True
        options.add_argument('--ignore-ssl-errors=yes')
        options.add_argument('--ignore-certificate-errors')
        logger.info("Подготовка среды для запуска тестов...")
        options.add_experimental_option('w3c', False)
        driver = EventFiringWebDriver(
            webdriver.Remote(command_executor=f"http://{executor}:4444/wd/hub",
                             desired_capabilities={
                                 "browserName": browser,
                                 "platform": "WIN10",
                                 "platformName": "WIN10"
                             },
                             options=options), MyListener())
        logger.debug(
            "Браузер Chrome запущен со следующими desired_capabilities:{}".
            format(driver.desired_capabilities))
    elif browser == "firefox":
        profile = FirefoxProfile()
        profile.accept_untrusted_certs = True
        options = FirefoxOptions()
        options.headless = True
        driver = webdriver.Firefox(options=options, firefox_profile=profile)
    else:
        raise Exception("Driver not supported")
    return driver
Ejemplo n.º 27
0
    def test_passing_firefox_options(self):

        firefox_options = FirefoxOptions()
        firefox_options.add_argument("--headless")

        self.driver = get_webdriver_for("firefox", options=firefox_options)
        self.assertTrue(self.driver.capabilities["moz:headless"])
Ejemplo n.º 28
0
 def __init__(self,
              browser="Chrome",
              downloads: Optional[Union[os.PathLike, str]] = None):
     self.opts = FirefoxOptions(
     ) if browser == "Firefox" else ChromeOptions()
     self.opts.headless = True
     self.opts.add_argument("--no-sandbox")
     self.opts.add_argument("--disable-extensions")
     self.opts.add_argument("--ignore-certificate-errors")
     self.opts.add_argument("--disable-gpu")
     self.opts.add_argument("--start-maximized")
     self.opts.add_argument("--enable-logging")
     self.opts.add_argument("--enable-automation")
     if browser == "Chrome":
         self.opts.add_argument("--window-size=1366,768")
     else:
         self.opts.add_argument("--width=1366")
         self.opts.add_argument("--height=768")
     self.capabilities = self.opts.capabilities.copy()
     self.capabilities["acceptSslCerts"] = True
     self.capabilities["acceptInsecureCerts"] = True
     self.capabilities["goog:loggingPrefs"] = {
         "browser": "ALL",
         "performance": "ALL"
     }
     self.selenoid = {
         "host": os.environ.get("SELENOID_HOST"),
         "port": os.environ.get("SELENOID_PORT", "4444"),
     }
     self._configure_downloads(browser, downloads)
     self.driver = None
     self.adcm = None
Ejemplo n.º 29
0
def scrape_video_no_protection(url: str) -> str:
    """
    Gets video url directly from page. Raises a NoVideoAvailableException if no player is found
    """
    # opens a driver on the given url
    options = FirefoxOptions();
    options.add_argument("-headless")

    driver = webdriver.Firefox(options=options)
    driver.get(url)

    try:
        # clicks play to start video and load video url in the page
        play_button = driver.find_element_by_xpath("//div[@class = '{}']".format(PLAY_BUTTON_CLASS))
        play_button.click()

        # gets video url from page once is loaded
        video_player_element = driver.find_element_by_xpath("//video[@class = '{}']".format(VIDEO_ELEMENT_CLASS))
        video_url = video_player_element.get_attribute('src')
    except NoSuchElementException:
        raise NoVideoAvailableException(NO_VIDEO_MESSAGE)

    # closes driver
    driver.close()

    return video_url
Ejemplo n.º 30
0
def bake_chapters(start, stop):
    """
    Use Selenium to get the live javascript rendered webpage and then save it
    requires a geckodriver to be somewhere in the PATH
    :param start: start with this chapter
    :param stop: stop at this chapter (inclusive)
    """
    logger.info('{}: bake_chapters task started'.format(
        current_task.request.id))
    logger.debug('{}: Baking chapters: {} to {}'.format(
        current_task.request.id, start, stop))

    opts = FirefoxOptions()
    opts.add_argument("--headless")
    driver = webdriver.Firefox(firefox_options=opts)

    for i in range(start, stop + 1):
        logger.debug('{}: Bake chapter: {}'.format(current_task.request.id, i))
        url = settings.BAKING_WEBPAGES_BASEURL + 'chapter/?chapter={}'.format(
            i)
        driver.get(url)
        container = driver.find_element_by_class_name(
            'container').get_attribute('innerHTML')
        with open(os.path.join(settings.ESTORIA_LOCATION, 'edition/critical',
                               str(i) + '.html'),
                  'w',
                  encoding='utf-8') as f:
            f.write(container)

    logger.info('{}: complete'.format(current_task.request.id))