Ejemplo n.º 1
0
def scrap(to_sql=False):
    opts = FirefoxOptions()
    opts.add_argument("--headless")
    driver = webdriver.Firefox(options=opts)
    most_active_url = "https://www.nasdaq.com/market-activity/most-active"
    driver.get(most_active_url)
    table = driver.find_element_by_css_selector(
        'div.most-active__data-container--share-volume').find_elements_by_css_selector("tr.most-active__row")
    most_active_list = []
    for row in table:
        #     print(row.text)
        most_active = dict()
        most_active['symbol'] = row.find_elements_by_css_selector("td.most-active__cell.most-active__cell--heading")[0].text
        most_active['name'] = row.find_elements_by_css_selector("td.most-active__cell.most-active__cell--heading")[1].text
        most_active['last'] = row.find_elements_by_css_selector("td.most-active__cell.most-active__cell--heading")[2].text
        most_active['change'] = row.find_elements_by_css_selector("td.most-active__cell.most-active__cell--heading")[3].text
        most_active['volume'] = row.find_elements_by_css_selector("td.most-active__cell.most-active__cell--heading")[4].text
        if to_sql:
            db.session.add(MostActive(most_active["symbol"], most_active["name"],
                                      float(most_active["last"][1:]), float(most_active["change"]),
                                      float(most_active["volume"].replace(",",""))))
            db.session.commit()
        most_active_list.append(most_active)

    df = pd.DataFrame(most_active_list)
    return render_template('scraper/index.html',  tables=[df.to_html(classes='data')], titles=df.columns.values)
Ejemplo n.º 2
0
 def __init__(self, browser, user_data=''):
     """
     Run class initialization method, the default is proper
     to drive the Firefox browser. Of course, you can also
     pass parameter for other browser, Chrome browser for the "Chrome",
     the Internet Explorer browser for "internet explorer" or "ie".
     """
     if browser == "ff":
         self.driver = webdriver.Firefox()
     elif browser == "ff_headless":
         ff_options = FirefoxOptions()
         ff_options.set_headless()
         self.driver = webdriver.Firefox(firefox_options=ff_options)
     elif browser == "chrome":
         self.driver = webdriver.Chrome()
     elif browser == "internet explorer" or browser == "ie":
         self.driver = webdriver.Ie()
     elif browser == "opera":
         self.driver = webdriver.Opera()
     elif browser == "chrome_headless":
         chrome_options = ChromeOptions()
         chrome_options.add_argument('--headless')
         self.driver = webdriver.Chrome(chrome_options=chrome_options)
     elif browser == 'chrome_user_data':
         # 通过chrome://version/ 查看chrome信息
         chrome_options = ChromeOptions()
         chrome_options.add_argument(user_data)
         self.driver = webdriver.Chrome(chrome_options=chrome_options)
     elif browser == 'edge':
         self.driver = webdriver.Edge()
     else:
         raise NameError(
             "Not found %s browser,You can enter 'ie', 'ff', 'opera', 'edge', 'chrome' or 'chrome_headless'."
             % browser)
Ejemplo n.º 3
0
def login1():
    opt = FirefoxOptions()            # 创建Chrome参数对象
    opt.headless = True              # 把Chrome设置成可视化无界面模式,windows/Linux 皆可
    driver = Firefox(options=opt)     # 创建Chrome无界面对象
    #selenium登录测试长庆
    #driver = webdriver.Firefox()

    driver.get("http://192.168.6.27:6030/passports/login?service=http%3A%2F%2F192.168.6.27%3A6030%2Fportals%2Fcas&tenantCode=cqsh&trial=false")

    driver.find_element(By.ID, "username").send_keys("test")
    driver.find_element(By.ID, "pwd1").send_keys("1")
    driver.find_element(By.CSS_SELECTOR, ".justUse").click()

    time.sleep(5)
    #获取JSESSIONID
    c= driver.get_cookies()
    #print (c)
    #print (c[0])
    for a in c:
        #print (a)
        if a['name'] == 'JSESSIONID':
            b=a
            #print (b)
    cookies={'JSESSIONID': b['value']}

    #cookies={'JSESSIONID': '3BAB7DF0381948EA376F907859D5321C'}
    driver.close()
    driver.quit()
    return cookies
Ejemplo n.º 4
0
def scrape_video_no_protection(url: str) -> str:
    """
    Gets video url directly from page. Raises a NoVideoAvailableException if no player is found
    """
    # opens a driver on the given url
    options = FirefoxOptions();
    options.add_argument("-headless")

    driver = webdriver.Firefox(options=options)
    driver.get(url)

    try:
        # clicks play to start video and load video url in the page
        play_button = driver.find_element_by_xpath("//div[@class = '{}']".format(PLAY_BUTTON_CLASS))
        play_button.click()

        # gets video url from page once is loaded
        video_player_element = driver.find_element_by_xpath("//video[@class = '{}']".format(VIDEO_ELEMENT_CLASS))
        video_url = video_player_element.get_attribute('src')
    except NoSuchElementException:
        raise NoVideoAvailableException(NO_VIDEO_MESSAGE)

    # closes driver
    driver.close()

    return video_url
Ejemplo n.º 5
0
 def __enter__(self):
     options = FirefoxOptions()
     options.set_headless(True)
     profile = FirefoxProfile()
     self.browser = Firefox(firefox_options=options,
                            firefox_profile=profile)
     return self
Ejemplo n.º 6
0
    def __init__(
        self,
        firefox_options: FirefoxOptions = None,
        desired_capabilities: dict = None,
        token: str = None,
        project_name: str = None,
        job_name: str = None,
        disable_reports: bool = False,
        report_type: ReportType = ReportType.CLOUD_AND_LOCAL,
    ):

        # If no options or capabilities are specified at all, use default FirefoxOptions
        if firefox_options is None and desired_capabilities is None:
            caps = FirefoxOptions().to_capabilities()
        else:
            # Specified FirefoxOptions take precedence over desired capabilities but either can be used
            caps = firefox_options.to_capabilities(
            ) if firefox_options is not None else desired_capabilities

        super().__init__(
            capabilities=caps,
            token=token,
            project_name=project_name,
            job_name=job_name,
            disable_reports=disable_reports,
            report_type=report_type,
        )
Ejemplo n.º 7
0
    def write_tf(self,filesize, threadnum, num_tfrecords=10):
        '''This function writes tfrecords. Input parameters are: filesize (number of images in one tfrecord), threadnum(thread id)'''
        options = tf.io.TFRecordOptions(tf.io.TFRecordCompressionType.GZIP)
        opts = FirefoxOptions()
        opts.headless = True
        driver = Firefox(executable_path='./geckodriver', options=opts)

        for idx in range(num_tfrecords):
            starttime = time.time()
            output_file_name = '{:02d}_{:08d}.tfrecord'.format(threadnum, idx)
            print('+Thread {} [{}/{}] generating ...'.format(threadnum, idx+1, num_tfrecords))

            data_arr, all_table_categories = self.generate_tables(driver, filesize, output_file_name)
            if(data_arr is not None):
                if(len(data_arr)==filesize):
                    with tf.io.TFRecordWriter(os.path.join(self.outtfpath,output_file_name),options=options) as writer:
                        for imgindex,subarr in enumerate(data_arr):
                            arr = subarr[0]

                            img = np.asarray(subarr[1][0],np.int64)[:,:,0]
                            colmatrix = np.array(arr[1],dtype=np.int64)
                            cellmatrix = np.array(arr[2],dtype=np.int64)
                            rowmatrix = np.array(arr[0],dtype=np.int64)
                            bboxes = np.array(arr[3])
                            tablecategory = arr[4][0]
                            seq_ex = self.generate_tf_record(img, cellmatrix, rowmatrix, colmatrix, bboxes,tablecategory,imgindex,output_file_name)
                            writer.write(seq_ex.SerializeToString())
                        print('--- thread {} [{}/{}] completed in {:.3f}'.format(threadnum, idx+1, num_tfrecords, time.time() - starttime))

        driver.stop_client()
        driver.quit()
Ejemplo n.º 8
0
 def __init__(self, browser='Chrome'):
     self.opts = FirefoxOptions(
     ) if browser == 'Firefox' else ChromeOptions()
     self.opts.headless = True
     self.opts.add_argument('--no-sandbox')
     self.opts.add_argument('--disable-extensions')
     self.opts.add_argument('--ignore-certificate-errors')
     self.opts.add_argument('--disable-gpu')
     self.opts.add_argument("--start-maximized")
     self.opts.add_argument("--enable-logging")
     self.opts.add_argument("--enable-automation")
     self.capabilities = self.opts.capabilities.copy()
     self.capabilities['acceptSslCerts'] = True
     self.capabilities['acceptInsecureCerts'] = True
     self.capabilities['goog:loggingPrefs'] = {
         'browser': 'ALL',
         'performance': 'ALL'
     }
     self.selenoid = {
         'host': os.environ.get("SELENOID_HOST"),
         'port': os.environ.get("SELENOID_PORT", "4444")
     }
     self.driver = None
     self.ui = None
     self.adcm = None
     self._client = None
Ejemplo n.º 9
0
 def __init__(self, headless=True, options=[], path='myengine\geckodriver'):
     browser_options = FirefoxOptions()
     for _ in options:
         browser_options.add_argument(_)
     browser_options.headless = headless
     Firefox.__init__(self, options=browser_options, executable_path=path)
     Browser.__init__(self)
def prepare_browsers(headless: bool, driver_path: str,
                     twitter_profile_path: str) -> Browsers:
    """
    Sets up browsers to search accounts
    :param headless bool: Should search be performed in headless mode
    :param driver_path: Path to geckodriver
    :param twitter_profile_path: Path to twitter profile folder
    :return: tuple of browsers, that are logged in LinkedIn and Xing
    """
    logging.info("Running Twitter scraper from profile in %s",
                 twitter_profile_path)
    driver_path = driver_path if driver_path else "geckodriver"
    profile = FirefoxProfile()
    twitter_profile = FirefoxProfile(twitter_profile_path)
    twitter_profile.DEFAULT_PREFERENCES["frozen"][
        "extensions.autoDisableScopes"] = 0
    twitter_profile.set_preference("extensions.enabledScopes", 15)
    logins = social_media_logins(driver_path, profile)
    driver_options = FirefoxOptions()
    driver_options.headless = headless
    linked_in_driver = Firefox(options=driver_options,
                               firefox_profile=profile,
                               executable_path=driver_path)
    xing_driver = Firefox(options=driver_options,
                          firefox_profile=profile,
                          executable_path=driver_path)
    twitter_driver = Firefox(options=driver_options,
                             firefox_profile=twitter_profile,
                             executable_path=driver_path)
    set_login_data(linked_in_driver, logins[0])
    set_login_data(xing_driver, logins[1])
    retoggleAllTheAddons(twitter_driver)
    return Browsers(linked_in_driver, xing_driver, twitter_driver)
Ejemplo n.º 11
0
def bake_chapters(start, stop):
    """
    Use Selenium to get the live javascript rendered webpage and then save it
    requires a geckodriver to be somewhere in the PATH
    :param start: start with this chapter
    :param stop: stop at this chapter (inclusive)
    """
    logger.info('{}: bake_chapters task started'.format(
        current_task.request.id))
    logger.debug('{}: Baking chapters: {} to {}'.format(
        current_task.request.id, start, stop))

    opts = FirefoxOptions()
    opts.add_argument("--headless")
    driver = webdriver.Firefox(firefox_options=opts)

    for i in range(start, stop + 1):
        logger.debug('{}: Bake chapter: {}'.format(current_task.request.id, i))
        url = settings.BAKING_WEBPAGES_BASEURL + 'chapter/?chapter={}'.format(
            i)
        driver.get(url)
        container = driver.find_element_by_class_name(
            'container').get_attribute('innerHTML')
        with open(os.path.join(settings.ESTORIA_LOCATION, 'edition/critical',
                               str(i) + '.html'),
                  'w',
                  encoding='utf-8') as f:
            f.write(container)

    logger.info('{}: complete'.format(current_task.request.id))
Ejemplo n.º 12
0
 def launch_application(browser_name, app_url):
     global driver
     log.info("in init method of selenium base")
     try:
         if browser_name == "chrome":
             option = ChromeOptions()
             option.add_argument("start-maximized")
             option.add_argument("--ignore-certificate-errors")
             option.add_argument("--disable-extensions")
             option.add_argument("--disable-infobars")
             option.add_argument("disable-notifications")
             driver = Chrome(executable_path="./drivers/chromedriver.exe",
                             options=option)
             log.info("chrome browser is launch successfully")
         elif browser_name == "firefox":
             profile = FirefoxProfile()
             profile.accept_untrusted_certs = True
             options = FirefoxOptions()
             options.add_argument("start-maximized")
             driver = Firefox(executable_path="./drivers/geckodriver.exe")
             log.info("firefox browser is launch successfully")
         elif browser_name == "ie":
             driver = Ie(executable_path="./drivers/IEDriverServer.exe")
         else:
             log.error("browser name is incorrect", browser_name)
     except WebDriverException:
         log.critical("exception", WebDriverException)
     driver.implicitly_wait(5)
     driver.get(app_url)
Ejemplo n.º 13
0
def test4():
    options = FirefoxOptions()
    options.add_argument('--headless')
    dr = webdriver.Firefox(firefox_options=options)
    dr.get("https://www.baidu.com")
    print(dr.current_url)
    dr.close()
Ejemplo n.º 14
0
    def __init__(self):
        """ Initialize Firefox instance """
        opts = FirefoxOptions()
        opts.add_argument("--headless")
        self.driver = webdriver.Firefox(firefox_options=opts)

        self.initiate()
Ejemplo n.º 15
0
    def test_passing_firefox_options(self):

        firefox_options = FirefoxOptions()
        firefox_options.add_argument("--headless")

        self.driver = get_webdriver_for("firefox", options=firefox_options)
        self.assertTrue(self.driver.capabilities["moz:headless"])
Ejemplo n.º 16
0
 def launch_browser(self, browser_name, url):
     global driver
     try:
         if browser_name == "chrome":
             chromeoptions = ChromeOptions()
             chromeoptions.add_argument("start-maximized")
             chromeoptions.add_argument("disable-notifications")
             chromeoptions.add_argument("--ignore-certificate-errors")
             chromeoptions.add_argument("--disable-infobars")
             chromeoptions.add_argument("--disable-extensions")
             driver = webdriver.Chrome(
                 executable_path="./drivers/chromedriver.exe",
                 options=chromeoptions)
             log.info("chrome browser launch successfully")
         elif browser_name == "firefox":
             firefoxoptions = FirefoxOptions()
             firefoxoptions.add_argument("start-maximize")
             driver = webdriver.Firefox(
                 executable_path="./drivers/geckodriver.exe",
                 options=firefoxoptions)
             log.info("firefox browser launch successfully")
         elif browser_name == "ie":
             ieoptions = IeOptions()
             ieoptions.add_argument("start-maximize")
             driver = webdriver.Ie(
                 executable_path="./drivers/IEDriverServer.exe",
                 options=ieoptions)
             log.info("ie browser launch successfully")
         else:
             log.error("invalid browser name")
     except WebDriverException as e:
         log.error("exception ", e)
     driver.implicitly_wait(10)
     driver.get(url)
Ejemplo n.º 17
0
 def __init__(self,
              browser="Chrome",
              downloads: Optional[Union[os.PathLike, str]] = None):
     self.opts = FirefoxOptions(
     ) if browser == "Firefox" else ChromeOptions()
     self.opts.headless = True
     self.opts.add_argument("--no-sandbox")
     self.opts.add_argument("--disable-extensions")
     self.opts.add_argument("--ignore-certificate-errors")
     self.opts.add_argument("--disable-gpu")
     self.opts.add_argument("--start-maximized")
     self.opts.add_argument("--enable-logging")
     self.opts.add_argument("--enable-automation")
     if browser == "Chrome":
         self.opts.add_argument("--window-size=1366,768")
     else:
         self.opts.add_argument("--width=1366")
         self.opts.add_argument("--height=768")
     self.capabilities = self.opts.capabilities.copy()
     self.capabilities["acceptSslCerts"] = True
     self.capabilities["acceptInsecureCerts"] = True
     self.capabilities["goog:loggingPrefs"] = {
         "browser": "ALL",
         "performance": "ALL"
     }
     self.selenoid = {
         "host": os.environ.get("SELENOID_HOST"),
         "port": os.environ.get("SELENOID_PORT", "4444"),
     }
     self._configure_downloads(browser, downloads)
     self.driver = None
     self.adcm = None
 def __init__(self, config: dict):
     """
     Constructor
     @param config the configuration to load options from
     """
     # Get the logger
     self.logger = logging.getLogger(config["log"]["name"])
     # Set up firefox to run in headless mode to avoid graphical overhead
     options = FirefoxOptions()
     options.set_headless(True)
     # Configure profile settings
     profile = FirefoxProfile()
     # Add the proxy if applicable
     if config["mode"] == "tor":
         profile.set_preference("network.proxy.type", 1)
         profile.set_preference("network.proxy.socks", "127.0.0.1")
         profile.set_preference("network.proxy.socks_port", 9050)
         profile.set_preference("network.proxy.socks_remote_dns", True)
     # Store configs, the profile and options
     self.retries = config["firefox"]["retries"]
     self.page_timeout = config["firefox"]["page_timeout"]
     self.options = options
     self.profile = profile
     # Set driver to None for now
     self.driver = None
Ejemplo n.º 19
0
def driver_open(url, the_encoding="utf-8", timeout=3):
    from selenium.webdriver import FirefoxOptions
    from selenium import webdriver
    import time
    opts = FirefoxOptions()
    opts.add_argument("--headless")
    driver = webdriver.Firefox(firefox_options=opts)
    driver.set_page_load_timeout(timeout)
    #driver.set_script_timeout(3)
    try:
        res1 = driver.get(
            url
        )  ## may jumpout timeout error, the js has just finish load, reutrn the innerhtml
    except:
        time.sleep(5)
    finally:
        #time.sleep(5)
        print("++++++++++++++++++++++++++++++++++++++++")
        print("++++++++++++ run finnaly +++++++++++++++")
        print("++++++++++++++++++++++++++++++++++++++++")
        html2 = driver.execute_script(
            "return document.documentElement.innerHTML;")
        soup1 = BS(html2.encode(the_encoding))
        driver.close()
    #os.system('pkill phantomjs')
    return soup1
Ejemplo n.º 20
0
 def __init__(self, firefox_config: dict, tor_port: int):
     """
     Constructor
     @param config the configuration to load options from
     """
     # Get the logger
     self.logger = logging.getLogger()
     # Set up firefox to run in headless mode to avoid graphical overhead
     options = FirefoxOptions()
     options.set_headless(True)
     # Store the options
     self.options = options
     # Store params from the config
     self.retries = int(firefox_config["retries"])
     self.wait_tag = firefox_config["wait_tag"]
     self.load_images = int(firefox_config["load_images"])
     self.clean_frequency = int(firefox_config["clean_frequency"])
     self.page_timeout = int(firefox_config["timeout"]["page"])
     self.element_timeout = int(firefox_config["timeout"]["element"])
     # Store tor proxy config
     self.tor_port = tor_port
     # Set driver to None for now
     self.driver = None
     # Initialize some members that will be stored later
     self.mode = None
     self.profile = None
Ejemplo n.º 21
0
    def __init__(self, position, lit, time):
        # 首页搜索页
        self.start_url = 'https://search.51job.com/list/000000,000000,0000,00,9,99,+,2,1.html'
        # 职位详情页url
        # 搜索关键字[职位,学历要求,工作经验]
        self.key_words = [position, lit, time]
        # 会计,
        # 大专,本科,硕士
        # 应届生,3-5年
        self.df = pd.DataFrame(columns=['职位', '日期', '地点', '网址'])
        with open(
                '职位详情{0}_{1}_{2}.csv'.format(self.key_words[0],
                                             self.key_words[1],
                                             self.key_words[2]),
                'w') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(['公司简介', '职位名称', '职位信息'])

        # 用webdriver
        options = FirefoxOptions()
        options.add_argument('-headless')
        self.browser = Firefox(options=options)
        self.wait = WebDriverWait(self.browser, 10)

        with open(
                '职位详情{0}_{1}_{2}.csv'.format(self.key_words[0],
                                             self.key_words[1],
                                             self.key_words[2]),
                'w') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(['公司简介', '职位名称', '职位信息'])
Ejemplo n.º 22
0
def driver_factory(browser, executor_url, test_name):
    if browser == "chrome":
        logger = logging.getLogger('chrome_fixture')
        logger.setLevel(LOG_LEVEL)
        caps = {
            "browserName": browser,
            "version": "83.0",
            "enableVnc": True,
            "enableVideo": True,
            "enableLog": True,
            "screenResolution": "1280x720",
            "name": test_name
        }
        driver = EventFiringWebDriver(
            webdriver.Remote(command_executor=executor_url + "/wd/hub",
                             desired_capabilities=caps), MyListener())
        logger.info(f"Start session {driver.session_id}")
    elif browser == "firefox":
        profile = FirefoxProfile()
        profile.accept_untrusted_certs = True
        options = FirefoxOptions()
        options.headless = True
        driver = webdriver.Firefox(options=options, firefox_profile=profile)
    else:
        raise Exception("Driver not supported")
    return driver
Ejemplo n.º 23
0
    def setUp(self):
        self.db = DBCreatorTester()
        options = FirefoxOptions()
        options.add_argument('-headless')

        self.selenium = Firefox(options=options)

        self.selenium.implicitly_wait(5)
Ejemplo n.º 24
0
 def __init__(self, folder, profile, username, password):
     self.folder = folder
     self.username = username
     self.password = password
     firefox_options = FirefoxOptions()
     firefox_profile = FirefoxProfile('/home/aniquetahir/.mozilla/firefox/'+profile)
     firefox_options.add_argument('-headless')
     self.webdriver = Firefox(firefox_profile, executable_path='/home/aniquetahir/youtube-upload-folder/geckodriver',firefox_options=firefox_options, firefox_binary='/home/aniquetahir/firefox/firefox')
Ejemplo n.º 25
0
    def setUp(self):
        opts = FirefoxOptions()
        opts.add_argument("--headless")
        self.browser = webdriver.Firefox(firefox_options=opts)

        staging_server = os.environ.get("STAGING_SERVER")
        if staging_server:
            self.live_server_url = f'http://{staging_server}'
def browser():
    options = FirefoxOptions()
    options.headless = True
    driver = Firefox(
        executable_path="/Users/amir/makmal/geckodriver/geckodriver",
        options=options,
    )
    yield driver
    driver.close()
Ejemplo n.º 27
0
 def get_mozilla_browser(self):
     options = FirefoxOptions()
     options.add_argument("no-sandbox")
     options.accept_untrusted_certs = True
     options.assume_untrusted_cert_issuer = True
     options.add_argument("--disable-infobars")
     options.add_argument("--headless")
     driver_ = webdriver.Firefox(executable_path=GeckoDriverManager().install(), options=options)
     return driver_
Ejemplo n.º 28
0
 def __init__(self, testConf):
     opts = FirefoxOptions()
     opts.add_argument("--headless")
     self.driver = webdriver.Firefox(
         executable_path=GeckoDriverManager().install(),
         firefox_options=opts)
     print(testConf)
     with open(testConf) as testConfFp:
         self.testConf = json.load(testConfFp)
Ejemplo n.º 29
0
 def test_empty_reset(self):
     opts = FirefoxOptions()
     opts.add_argument("--headless")
     driver = webdriver.Firefox(firefox_options=opts)
     driver.get("http://127.0.0.1:8000/verificacion/")
     element = driver.find_element_by_id("id_input")
     element.send_keys(" ")
     button = driver.find_element_by_id("reset")
     button.click()
     self.assertIn("", driver.find_element_by_id("id_input").text)
Ejemplo n.º 30
0
def test_example():
    gdd = GeckoDriverManager()
    gdd.download_and_install()
    option = FirefoxOptions()
    option.add_argument("--kiosk")
    # option.headless = True
    wd = webdriver.Firefox(options=option)
    wd.get("https://otus.ru/")
    assert wd.title == 'Онлайн‑курсы для профессионалов, дистанционное обучение современным профессиям'
    wd.quit()