def _get_chrome_driver(self, download_location, headless, driver_path): chrome_options = chrome_webdriver.Options() if download_location: prefs = { 'download.default_directory': download_location, 'download.prompt_for_download': False, 'download.directory_upgrade': True, 'safebrowsing.enabled': False, 'safebrowsing.disable_download_protection': True } chrome_options.add_experimental_option('prefs', prefs) if headless: chrome_options.add_argument("--headless") if sys.platform.startswith("win"): driver_path += ".exe" driver = Chrome(executable_path=driver_path, options=chrome_options) if headless: self.enable_download_in_headless_chrome(driver, download_location) return driver
def _get_chrome_driver(self, download_location, headless, chrome_option_prefs): chrome_options = chrome_webdriver.Options() prefs = {} if download_location: dl_prefs = { 'download.default_directory': os.path.abspath(download_location), 'download.prompt_for_download': False, 'download.directory_upgrade': True, 'safebrowsing.enabled': False, 'safebrowsing.disable_download_protection': True } prefs.update(dl_prefs) if chrome_option_prefs: prefs.update(chrome_option_prefs) chrome_options.add_experimental_option('prefs', prefs) # when run from a Docker container chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--disable-dev-shm-usage') if headless: chrome_options.add_argument("--headless") driver = Chrome(options=chrome_options) if headless: self._enable_download_in_headless_chrome(driver, download_location) return driver
def _get_chrome_driver(self, download_location, headless): chrome_options = chrome_webdriver.Options() if download_location: prefs = { 'download.default_directory': download_location, 'download.prompt_for_download': False, 'download.directory_upgrade': True, 'safebrowsing.enabled': False, 'safebrowsing.disable_download_protection': True } chrome_options.add_experimental_option('prefs', prefs) if headless: chrome_options.add_argument("--headless") dir_path = os.path.dirname(os.path.realpath(__file__)) loc = os.path.join(dir_path, 'chromedriver') driver = Chrome(loc, chrome_options=chrome_options) if headless: self.enable_download_in_headless_chrome(driver, download_location) return driver
def _get_chrome_driver(self, download_location, headless): chrome_options = chrome_webdriver.Options() if download_location: prefs = { "download.default_directory": download_location, "download.prompt_for_download": False, "download.directory_upgrade": True, "safebrowsing.enabled": False, "safebrowsing.disable_download_protection": True, } chrome_options.add_experimental_option("prefs", prefs) if headless: chrome_options.add_argument("--headless") driver_path = "/usr/local/bin/chromedriver" driver = Chrome(executable_path=driver_path, chrome_options=chrome_options) if headless: self.enable_download_in_headless_chrome(driver, download_location) return driver
def _get_chrome_driver(self, download_location, headless): chrome_options = chrome_webdriver.Options() if download_location: prefs = { 'download.default_directory': download_location, 'download.prompt_for_download': False, 'download.directory_upgrade': True, 'safebrowsing.enabled': False, 'safebrowsing.disable_download_protection': True } chrome_options.add_experimental_option('prefs', prefs) if headless and not os.path.isdir("/home/nhall/selpyvenv/"): chrome_options.add_argument("--headless") if os.path.isdir("/home/nhall/selpyvenv/"): display = Display(visible=0, size=(800, 600)) display.start() chrome_options.add_argument("--no-sandbox") driver = Chrome(chrome_options=chrome_options, executable_path="/usr/bin/chromedriver") # chrome_options.add_argument("--disable-gpu") else: driver = Chrome( chrome_options=chrome_options, executable_path="/Users/nigel/AuthnetTests/chromedriver 4") if headless: self.enable_download_in_headless_chrome(driver, download_location) return driver
def _get_chrome_driver(self, download_location, headless): chrome_options = chrome_webdriver.Options() if download_location: prefs = { 'download.default_directory': download_location, 'download.prompt_for_download': False, 'download.directory_upgrade': True, 'safebrowsing.enabled': False, 'safebrowsing.disable_download_protection': True } chrome_options.add_experimental_option('prefs', prefs) if headless: chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--disable-gpu") chrome_options.add_argument("--headless") dir_path = os.path.dirname(os.path.realpath(__file__)) # driver_path = os.path.join(dir_path, "drivers/chromedriver") if sys.platform.startswith("win"): driver_path += ".exe" driver = Chrome(options=chrome_options) if headless: self.enable_download_in_headless_chrome(driver, download_location) return driver
def _get_chrome_driver(self, download_location, headless): chrome_options = chrome_webdriver.Options() if download_location: prefs = {'download.default_directory': download_location, 'download.prompt_for_download': False, 'download.directory_upgrade': True, 'safebrowsing.enabled': False, 'safebrowsing.disable_download_protection': True} chrome_options.add_experimental_option('prefs', prefs) if headless: chrome_options.add_argument("--headless") dir_path = os.path.dirname(os.path.realpath(__file__)) driver_path = os.path.join(dir_path, "drivers/chromedriver") if sys.platform.startswith("win"): driver_path += ".exe" driver = Chrome(executable_path=driver_path, chrome_options=chrome_options) # driver.get("http://rmp.global.schindler.com/Equipment/EquipmentMain/EquipmentDetails/?sapSys=ZAP&equnr=10000021") # print("title of baidu=",driver.title) # print("url =",driver.current_url) if headless: self.enable_download_in_headless_chrome(driver, download_location) return driver
def get_chrome_driver(cls) -> ChromeWebDriver: if cls.chrome_driver is None: chrome_options = chromedriver.Options() # chrome_options.add_argument("--headless") desired_capabilities = {'applicationCacheEnabled': False} cls.chrome_driver = chromedriver.WebDriver( options=chrome_options, desired_capabilities=desired_capabilities) cls.chrome_driver.implicitly_wait(10) # seconds return cls.chrome_driver
def _get_chrome_driver(self, headless): chrome_options = chrome_webdriver.Options() if headless: chrome_options.add_argument("--headless") dir_path = os.path.dirname(os.path.realpath(__file__)) driver_path = os.path.join(dir_path, "drivers/chromedriver") if sys.platform.startswith("win"): driver_path += ".exe" driver = Chrome(executable_path=driver_path, options=chrome_options) return driver
def _get_chrome_driver(self, download_location, headless): chrome_options = chrome_webdriver.Options() if download_location: prefs = { 'download.default_directory': download_location, 'download.prompt_for_download': False, 'download.directory_upgrade': True, 'safebrowsing.enabled': False, 'safebrowsing.disable_download_protection': True } chrome_options.add_experimental_option('prefs', prefs) if headless: chrome_options.add_argument('--headless') chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--disable-gpu') chrome_options.add_argument('--window-size=1280x1696') chrome_options.add_argument('--hide-scrollbars') chrome_options.add_argument('--enable-logging') chrome_options.add_argument('--log-level=0') chrome_options.add_argument('--v=99') chrome_options.add_argument('--single-process') chrome_options.add_argument('--ignore-certificate-errors') chrome_options.add_argument('--disk-cache-dir=/tmp/cache-dir') chrome_options.add_argument( 'user-agent=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36' ) dir_path = os.path.dirname(os.path.realpath(__file__)) driver_path = os.path.join(dir_path, "drivers/chromedriver") if sys.platform.startswith("win"): driver_path += ".exe" driver = Chrome(executable_path='chromedriver', chrome_options=chrome_options) if headless: self.enable_download_in_headless_chrome(driver, download_location) return driver
def _get_chrome_driver(self, download_location, headless): chrome_options = chrome_webdriver.Options() if download_location: prefs = { 'download.default_directory': download_location, 'download.prompt_for_download': False, 'download.directory_upgrade': True, 'safebrowsing.enabled': False, 'safebrowsing.disable_download_protection': True, "plugins.plugins_list": [{ "enabled": False, "name": "Chrome PDF Viewer" }] } chrome_options.add_experimental_option('prefs', prefs) chrome_options.add_argument('log-level=3') if headless: chrome_options.add_argument("--headless") dir_path = os.path.dirname(os.path.realpath(__file__)) driver_path = 'chromedriver' if sys.platform.startswith("win"): driver_path += ".exe" driver = Chrome(chrome_options=chrome_options) if headless: self.enable_download_in_headless_chrome(driver, download_location) return driver
def get_chrome_driver(self, download_location, headless, chromedriver_logging_on): chrome_options = chrome_webdriver.Options() if download_location: prefs = { 'download.default_directory': download_location, 'download.prompt_for_download': False, 'download.directory_upgrade': True, 'safebrowsing.enabled': False, 'safebrowsing.disable_download_protection': True } chrome_options.add_experimental_option('prefs', prefs) if headless: chrome_options.add_argument("--headless") chrome_options.add_argument( "--no-sandbox") # required to run in docker (?) driver_path = self.get_chromedriver_path() if chromedriver_logging_on: print("Loading ChromeDriver with logging on") service_args = ["--verbose", "--log-path=./chromedriver.log"] else: service_args = None print("Loading ChromeDriver from path:" + driver_path) driver = Chrome(executable_path=driver_path, chrome_options=chrome_options, service_args=service_args) if headless: self.enable_download_in_headless_chrome(driver, download_location) return driver
import os import sys from selenium.webdriver import Chrome from selenium.webdriver.chrome import webdriver as chrome_webdriver dir_path = os.path.dirname(os.path.realpath(__file__)) driver_path = os.path.join(dir_path, "drivers/chromedriver") if sys.platform.startswith("win"): driver_path += ".exe" options = chrome_webdriver.Options() ## se quiser rodar sem exibir o chrome, basta descomentar a linha abaixo # options.add_argument("--headless") driver = Chrome(executable_path=driver_path, chrome_options=options) driver.get("https://globoesporte.globo.com/futebol/brasileirao-serie-a/") tabela_brasileirao = driver.find_element_by_class_name( 'classificacao__pontos-corridos') print(tabela_brasileirao.text)
def crawling_stock_info(): if not DEBUG: options = chrome_webdriver.Options() download_path = '/tmp/ssad_info_{}'.format( datetime.now().strftime('%Y_%m_%d_%H_%M_%S')) options.add_experimental_option( 'prefs', {'download.default_directory': download_path}) chrome_driver = webdriver.Chrome('../etc/chromedriver', options=options) chrome_driver.get( 'http://marketdata.krx.co.kr/contents/MKD/04/0404/04040400/MKD04040400.jsp' ) # change select date to today schdate = chrome_driver.find_element_by_name('schdate') schdate.clear() schdate.send_keys(datetime.now().strftime('%Y%m%d')) # schdate.send_keys(datetime.now().strftime('20200130')) stock_info_file_path_list = [] for market in (KOSPI, KOSDAQ): # select market, KOSPI or KOSDAQ chrome_driver.find_element_by_css_selector( f'.design-fieldset > form > dl > dd > input:nth-child({market[0]})' ).click() select_element_id = chrome_driver.find_element_by_name( 'var_invr_cd').get_attribute("id") # 기관 합계 invester = Select( chrome_driver.find_element_by_id(select_element_id)) invester.select_by_value('7050') chrome_driver.find_element_by_class_name( 'btn-board.btn-board-search').click() excel_button = chrome_driver.find_element_by_xpath( "//*[contains(text(), 'CSV')]") excel_button.click() time.sleep(6) institution_file_path = f'{download_path}/{market[1]}_{INSTITUTION_FILE_NAME}' os.rename(f'{download_path}/data.csv', institution_file_path) stock_info_file_path_list.append(institution_file_path) # 외국 invester.select_by_value('9000') chrome_driver.find_element_by_class_name( 'btn-board.btn-board-search').click() excel_button.click() time.sleep(6) foreigner_file_path = f'{download_path}/{market[1]}_{FOREIGNER_FILE_NAME}' os.rename(f'{download_path}/data.csv', foreigner_file_path) stock_info_file_path_list.append(foreigner_file_path) chrome_driver.close() else: # excel file read download_path = './tests/ssad_info_2020_01_31_00_18_49' stock_info_file_path_list = [ f'{download_path}/kospi_institution.csv', f'{download_path}/kospi_foreigner.csv', f'{download_path}/kosdaq_institution.csv', f'{download_path}/kosdaq_foreigner.csv', ] return stock_info_file_path_list