コード例 #1
0
def test_load_dividends_table():
    driver_options = options.Options()
    driver_options.headless = True
    with webdriver.Firefox(options=driver_options) as driver:
        driver.get('https://www.conomy.ru/emitent/akron')
        conomy_ru.load_dividends_table(driver)
        assert driver.current_url == 'https://www.conomy.ru/emitent/akron/akrn-div'
コード例 #2
0
ファイル: login.py プロジェクト: emmhaha/XJcrawler
 def __init__(self):
     option = options.Options()
     option.add_argument('-headless')
     self.LOGIN_URL = "http://example.python-scraping.com/user/login"
     self.LOGIN_NAME = "*****@*****.**"
     self.LOGIN_PASSWORD = "******"
     self.driver = webdriver.Firefox(options=None)
コード例 #3
0
def test_load_ticker_page():
    driver_options = options.Options()
    driver_options.headless = True
    with webdriver.Firefox(options=driver_options) as driver:
        conomy_ru.load_ticker_page(driver, 'AKRN')
        conomy_ru.xpath_await(driver, conomy_ru.DIVIDENDS_MENU)
        assert driver.current_url == 'https://www.conomy.ru/emitent/akron'
コード例 #4
0
def setup_driver() -> webdriver:
    Options = options.Options()
    Options.headless = True

    driver = webdriver.Firefox(options=Options,
                               service=Service(GeckoDriverManager().install()))
    return driver
コード例 #5
0
ファイル: executor.py プロジェクト: quickstrom/pyquickstrom
 def new_driver(self):
     if self.browser == 'chrome':
         options = chrome_options.Options()
         options.headless = self.headless
         browser_path = which("chrome") or which("chromium")
         options.binary_location = browser_path    # type: ignore
         chromedriver_path = which('chromedriver')
         if not chromedriver_path:
             raise Exception("chromedriver not found in PATH")
         return webdriver.Chrome(options=options,
                                 executable_path=chromedriver_path)
     elif self.browser == 'firefox':
         options = firefox_options.Options()
         options.headless = self.headless
         binary = FirefoxBinary(which("firefox"))
         # options.binary = FirefoxBinary(which("firefox"))    # type: ignore
         geckodriver_path = which('geckodriver')
         if not geckodriver_path:
             raise Exception("geckodriver not found in PATH")
         return webdriver.Firefox(options=options,
                                  firefox_binary=binary,
                                  executable_path=geckodriver_path,
                                  service_log_path=self.driver_log_file or "geckodriver.log")
     else:
         raise Exception(f"Unsupported browser: {self.browser}")
コード例 #6
0
def test_xpath_await():
    driver_options = options.Options()
    driver_options.headless = True
    with webdriver.Firefox(options=driver_options) as driver:
        driver.get(conomy_ru.SEARCH_URL)
        element = conomy_ru.xpath_await(driver, conomy_ru.SEARCH_FIELD)
        assert element.get_attribute(
            'placeholder') == 'Поиск эмитентов по названию или тикеру'
コード例 #7
0
def test_xpath_await_no_element():
    driver_options = options.Options()
    driver_options.headless = True
    with webdriver.Firefox(options=driver_options) as driver, pytest.raises(
            TimeoutException) as error:
        driver.get(conomy_ru.SEARCH_URL)
        conomy_ru.xpath_await(driver, conomy_ru.SEARCH_FIELD + '/div', 1)
    assert error.type == TimeoutException
コード例 #8
0
def get_html(ticker: str):
    """Возвращает html код страницы с данными по дивидендам"""
    driver_options = options.Options()
    driver_options.headless = True
    with webdriver.Firefox(options=driver_options) as driver:
        load_ticker_page(ticker, driver)
        load_dividends_table(driver)
        return driver.page_source
コード例 #9
0
    def init_firefox(self):
        options = firefox.Options()
        options.set_headless()

        driver = webdriver.Firefox(options=options)
        driver.set_page_load_timeout(60)

        self.drivers['firefox'] = driver
コード例 #10
0
    def __init__(self, cache=RedisCache(), delay=2):
        option = options.Options()
        option.add_argument('-headless')
        self.cache = cache
        self.throttle = Throttle(delay)
        self.driver = webdriver.Firefox(options=option)
        self.driver.set_page_load_timeout(20)

        self.writer = csv.writer(open("./data/data.csv", "w", newline=""))
コード例 #11
0
ファイル: login.py プロジェクト: stivenroytman/twittercli
def driverInit(headLess=False):
    """Create the webdriver object."""
    if headLess:
        opts = options.Options()
        opts.headless = True
        driver = webdriver.Firefox(options=opts)
    else: 
        driver = webdriver.Firefox()
    return driver
コード例 #12
0
def get_firefox():
    options = firefox_options.Options()
    options.headless = True
    fp = webdriver.FirefoxProfile(
        'C:/Users/Tu Le/AppData/Roaming/Mozilla/Firefox/Profiles/ejmvt067.default-1554669393168'
    )
    driver = webdriver.Firefox(options=options,
                               firefox_profile=fp,
                               executable_path=PATH_FIREFOX_DRIVER)
    return driver
コード例 #13
0
def main():
    random.seed()
    profile = webdriver.FirefoxProfile('/home/jerry/.mozilla/firefox/huqgok2n.default')
    opts = options.Options()
    opts.headless = True

    # run indefinitely
    while True:
        run_routes(profile, opts)
        sleep_range = random.randint(-60, 60)
        print('Sleeping %d s.' % (SLEEP_BASE + sleep_range))
        time.sleep(SLEEP_BASE + sleep_range)
コード例 #14
0
    def __init__(self, headless_mode: bool = False) -> None:
        """
        :param headless_mode: define se o navegador será renderizado
        ou não. Por padrão (headless_mode = False), o navegador será
        renderizado.

        :return:
            None
        """
        self._options: options.Options = options.Options()
        self._options.headless = headless_mode
        self._browser: WebDriver = webdriver.Firefox(options=self._options)
コード例 #15
0
 def __init__(self, selenium_webdriver=None, headless=False):  # noqa: D107
     self.element_class = Element
     if selenium_webdriver:
         # If a webdriver is suppied, use it.
         self.selenium_webdriver = selenium_webdriver
     else:
         # Configure and use a Firefox webdriver if no webdriver is
         # supplied.
         options = firefox_options.Options()
         options.headless = headless
         self.selenium_webdriver = selenium.webdriver.Firefox(
             executable_path="geckodriver",
             options=options,
         )
コード例 #16
0
ファイル: reuseBrowser.py プロジェクト: FengZiQ/tool
    def start_session(self, capabilities, browser_profile=None):
        """
        重写start_session方法
        """
        if not isinstance(capabilities, dict):
            raise InvalidArgumentException("Capabilities must be a dictionary")
        if browser_profile:
            if "moz:firefoxOptions" in capabilities:
                capabilities["moz:firefoxOptions"]["profile"] = browser_profile.encoded
            else:
                capabilities.update({'firefox_profile': browser_profile.encoded})

        self.capabilities = f_option.Options().to_capabilities()
        self.session_id = self.r_session_id
        self.w3c = False
コード例 #17
0
def test_nondefault_browser():
    # Create and configure the browser.
    options = firefox_options.Options()
    options.headless = True
    selenium_webdriver = selenium.webdriver.Firefox(
        executable_path="geckodriver",
        options=options,
    )
    browser = Browser(selenium_webdriver)

    # Test the the browser.
    url = utilities.build_url("test_actions.html")
    browser.visit(url)
    assert browser.title == "Actions test page"

    # Stop the browser.
    browser.quit()
コード例 #18
0
    def setUp(self):
        self.world = baker.make(models.Place, site_type=enums.SiteTypes.WORLD)
        self.url = reverse(self.resolver)
        self.url = f'{self.live_server_url}{self.url}'

        self.options = firefox_options.Options()
        self.options.headless = True
        self.browser = webdriver.Firefox(options=self.options)

        # SetUp for logging user
        self.user = baker.make(get_user_model())
        self.client.force_login(self.user)
        self.session_cookie = self.client.cookies['sessionid']
        self.browser.get(self.url)
        self.browser.add_cookie({
            'name': 'sessionid',
            'value': self.session_cookie.value,
            'secure': False,
            'path': '/'
        })
        self.browser.refresh()
コード例 #19
0
ファイル: hhs_robot.py プロジェクト: 7astro7/hhsBreaches
 def __init__(
     self,
     headless_browser: bool = True,
     download_dir: str = "data",
 ):
     self.options = options.Options()
     self.options.set_preference("browser.helperApps.neverAsk.saveToDisk",
                                 "text/csv")
     self.headless_browser = headless_browser
     if self.headless_browser:
         self.options.headless = True
     if not os.path.exists(download_dir):
         os.mkdir(download_dir)
     self.download_dir = os.path.abspath(download_dir)
     # don't use default Downloads directory
     self.options.set_preference("browser.download.folderList", 2)
     # set download directory
     self.options.set_preference("browser.download.dir", self.download_dir)
     self.driver = webdriver.Firefox(options=self.options)
     if self.driver is not None:
         self.driver.implicitly_wait(12)
         self.driver.set_page_load_timeout(30)
コード例 #20
0
    def __init__(self, timeout: int = 0, headless_mode: bool = False) -> None:
        """
        Classe responsável por retirar informações dos capítulos dos mangás
        contidos em "https://mangalivre.net" onde as páginas são
        geradas por meio de javascript.


        :param timeout: tempo em segundos que o navegador irá
        esperar para aparecerem as informações dos capítulos.

        :param headless_mode: define se o navegador será ou
        não renderizado. Por padrão, o navegador será renderizado.

        :return:
            None
        """
        self._options: options.Options = options.Options()
        self._options.headless = headless_mode
        self._browser: WebDriver = webdriver.Firefox(options=self._options)
        self._web_driver_wait = WebDriverWait(
            driver=self._browser,
            timeout=timeout,
        )
コード例 #21
0
class GetCreditCardRewards(unittest.TestCase):
    #boot webdriver
    Options = options.Options()
    Options.add_argument("--headless")
    driver = webdriver.Firefox(firefox_options=Options)
    #find current rewards categories
    driver.get(
        "https://www.discover.com/credit-cards/cashback-bonus/cashback-calendar.html"
    )
    discoverItRewards = driver.find_element_by_css_selector(
        ".offer-enroll .offer-name").text
    driver.get(
        "https://www.citi.com/credit-cards/credit-cards-citi/citi.action?ID=dividend-quarterly-offer"
    )
    citiRewardsList = driver.find_elements_by_css_selector(
        ".selectedProductRow .descriptions")
    citi = ""
    for citiRewards in citiRewardsList:
        if citi == "":
            citi = citiRewards.text
        else:
            citi = citi + " and " + citiRewards.text
    #chase is harder because they are all images so it has to be done by quarter and alt text
    currentMonth = datetime.now().month
    currentQuarter = (currentMonth - 1) // 3
    driver.get("https://creditcards.chase.com/freedom/calendar")
    chaseRewardsList = driver.find_elements_by_css_selector(
        ".quarter-box-wrapper .quarter-box")
    chase = chaseRewardsList[currentQuarter].find_element_by_css_selector(
        "img")
    chase = chase.get_attribute("alt")

    #write to sqllite
    updateCategories('Discover', discoverItRewards)
    updateCategories('Citi', citi)
    updateCategories('Chase', chase)
コード例 #22
0
ファイル: main.py プロジェクト: abhiraj2/khdownloader
import re

browser = None

try:
    chrome_options = options.Options()
    chrome_options.add_argument("--headless")
    browser = webdriver.Chrome(options=chrome_options)
    print("Chrome Found.")
    browser.maximize_window()
except (IOError, Exception):
    print("Chrome not Found. Moving to Firefox.")
    pass

try:
    firefox_options = fir_options.Options()
    firefox_options.add_argument("--headless")
    browser = webdriver.Firefox(options=firefox_options)
    print("Firefox Found.")
    browser.maximize_window()
except (IOError, Exception):
    print("Firefox not found. Get one of these")
    pass

# url for album to download.
url = input("Enter the URL for the album to download: ")

flac_check = re.findall('FLAC', requests.get(url).text)
browser.get(url)

time.sleep(2)
コード例 #23
0
def scraping_files_and_folders():
    browser = None
    url = input("Enter the URL for the Finisher to download: ")

    try:
        chrome_options = options.Options()
        chrome_options.add_argument("--headless")
        browser = webdriver.Chrome(options=chrome_options)
        print("Chrome Found.")
        browser.maximize_window()
    except(IOError, Exception):
        print("Chrome not Found. Moving to Firefox.")
        pass
    if not browser:
        try:
            firefox_options = fir_options.Options()
            firefox_options.add_argument("--headless")
            browser = webdriver.Firefox(options=firefox_options)
            print("Firefox Found.")
            browser.maximize_window()
        except(IOError, Exception):
            print("Firefox not found. Get one of these")
            pass

    folder_url = open('tempFolder.txt', 'w+')
    file_url = open('tempFile.txt', 'w+')
    folder_url_list = []
    file_url_list = []

    # Ensure that the page is loaded correctly
    ensure_page_loaded(browser, url)
    file = browser.find_elements_by_xpath("//a[@class='file']")
    file_view = browser.find_elements_by_xpath("//a[@class='file view']")
    for i in file:
        file_url_list.append(i.get_attribute('href'))
    for i in file_view:
        file_url_list.append(i.get_attribute('href'))

    index = 1
    folder_url_list.append(browser.find_elements_by_xpath("//a[@class='folder']")[0].get_attribute('href'))
    while True:
        time.sleep(3)
        folder = browser.find_elements_by_class_name("folder")
        real_folders = []
        for i in folder:
            if i.find_elements_by_tag_name("div"):
                real_folders.append(i)
                folder_url_list.append(i.get_attribute('href'))

        if index < len(folder_url_list):
            # Ensure that the page is loaded correctly
            print(f'Scraping Folder...')
            ensure_page_loaded(browser, folder_url_list[index])
            file = browser.find_elements_by_xpath("//a[@class='file']")
            file_view = browser.find_elements_by_xpath("//a[@class='file view']")
            for i in file:
                file_url_list.append(i.get_attribute('href'))
            for i in file_view:
                file_url_list.append(i.get_attribute('href'))
        else:
            break
        index = index + 1

    for i in folder_url_list:
        folder_url.write(str(i) + '\n')
    for i in file_url_list:
        file_url.write(str(i) + '\n')

    f = open('debug_file.txt', 'w+')
    for i in folder_url_list:
        f.write(str(i) + '\n')

    folder_url.close()
    file_url.close()
    time.sleep(3)
    browser.close()
コード例 #24
0
ファイル: browsers.py プロジェクト: melhamin/STARS
def firefox():
    options = FirefoxOptions.Options()
    options.add_argument('--start-maximized')
    driver = webdriver.Firefox(options=options)
    return driver
コード例 #25
0
def PDBReader(file, timeout=600):
    """
    Accesses http://charmm-gui.org and uses the PDB Reader.

    Parameters
    ----------
    file : str
        Path to the input PDB file.
    timeout : int
        Timeout in seconds.

    Returns
    -------
    filename_output : str
        The absolute path to the output TGZ archive.
    """
    def autoClicker(id, timeout):
        # deals with some rare cases of an unclickable element
        for i in range(timeout):
            try:
                elem = driver.find_element_by_id(id)
                elem.click()
                return
            except _exceptions.WebDriverException:
                _time.sleep(1)
        elem = driver.find_element_by_id(id)
        elem.click()

    file = _os.path.abspath(file)
    options = _options.Options()
    options.headless = True

    try:
        driver = _seleniumrequests.Chrome(options=options)
    except _exceptions.WebDriverException:
        try:
            driver = _seleniumrequests.Firefox(options=options)
        except _exceptions.WebDriverException:
            raise SystemError("Need either Chrome or Firefox for CHARMM-GUI "
                              "functionality.")

    _logging.info("Accessing http://www.charmm-gui.org ...")
    driver.get("http://www.charmm-gui.org/?doc=input/pdbreader")

    pdb_element = driver.find_element_by_name("file")
    pdb_element.send_keys(file)

    pdb_radio = driver.find_element_by_xpath("//input[@name='pdb_format' and "
                                             "@value='PDB']")
    pdb_radio.click()

    autoClicker("nextBtn", 60)

    # could add some support for options. For now, we just go with the
    # defaults.
    wait = _wait.WebDriverWait(driver, timeout)
    wait.until(_EC.element_to_be_clickable((_by.By.ID, "nextBtn")))
    autoClicker("nextBtn", 60)

    wait.until(_EC.element_to_be_clickable((_by.By.ID, "nextBtn")))
    autoClicker("nextBtn", 60)

    try:
        _logging.info("Retrieving files...")
        wait.until(
            _EC.visibility_of_any_elements_located(
                (_by.By.CLASS_NAME, "download")))
    except TimeoutError:
        raise ConnectionError("Could not retrieve any files. Please increase "
                              "the maximum timeout or try again later.")

    _logging.info("Downloading TGZ archive...")
    filebase = _os.path.splitext(file)[0]
    tgz_file = driver.find_elements_by_partial_link_text(".tgz")[0]
    response = driver.request('POST',
                              tgz_file.get_attribute("href"),
                              verify=False,
                              stream=True)
    with open(filebase + "_CHARMM.tgz", "wb") as file:
        file.write(response.raw.read())

    driver.quit()
    return filebase + "_CHARMM.tgz"
コード例 #26
0
def ligandReader(file, timeout=60, find_similar_residues=False):
    """
    Accesses http://charmm-gui.org and uses the Ligand Reader.

    Parameters
    ----------
    file : str
        Path to the input ligand file.
    timeout : int
        Timeout in seconds.
    find_similar_residues : bool
        Whether to tick the "Find similar residues" checkbox before searching.

    Returns
    -------
    filename_output : str
        The absolute path to the output TGZ archive.
    """
    file = _os.path.abspath(file)
    options = _options.Options()
    options.headless = True

    try:
        driver = _seleniumrequests.Chrome(options=options)
    except _exceptions.WebDriverException:
        try:
            driver = _seleniumrequests.Firefox(options=options)
        except _exceptions.WebDriverException:
            raise SystemError("Need either Chrome or Firefox for CHARMM-GUI "
                              "functionality.")

    _logging.info("Accessing http://www.charmm-gui.org ...")
    driver.get("http://www.charmm-gui.org/?doc=input/ligandrm")

    pdb_element = driver.find_element_by_name("file2")
    pdb_element.send_keys(file)

    upload_button = driver.find_element_by_xpath(
        "//input[@type='button' and @value='Upload MOL/MOL2/SDF']")
    upload_button.click()

    driver.switch_to.alert.accept()

    _time.sleep(5)

    if find_similar_residues:
        checkbox = driver.find_element_by_name("simi")
        checkbox.click()

    next_button = driver.find_element_by_id("nextBtn")
    next_button.click()

    # could add some support for options. For now, we just go with the
    # defaults.
    next_button = driver.find_element_by_id("nextBtn")
    next_button.click()

    try:
        _logging.info("Retrieving files...")
        wait = _wait.WebDriverWait(driver, timeout)
        wait.until(
            _EC.visibility_of_any_elements_located(
                (_by.By.CLASS_NAME, "download")))
    except TimeoutError:
        raise ConnectionError("Could not retrieve any files. Please increase "
                              "the maximum timeout or try again later.")

    _logging.info("Downloading TGZ archive...")
    filebase = _os.path.splitext(file)[0]
    tgz_file = driver.find_elements_by_partial_link_text(".tgz")[0]
    response = driver.request('POST',
                              tgz_file.get_attribute("href"),
                              verify=False,
                              stream=True)
    with open(filebase + "_CHARMM.tgz", "wb") as file:
        file.write(response.raw.read())

    driver.quit()
    return filebase + "_CHARMM.tgz"
コード例 #27
0
        sys.exit(4)
    elif code == 5:
        # The User-Settings file is removed if the event/giveaway informations are invalid
        # So it can be created again from scratch
        os.remove("User-Settings.txt")
        input("ERROR (5): User-Settings.txt Sanity check failed, please try again later.\n    You may close this console or press any key\n")
        browser.close()
        sys.exit(5)

if __name__ == "__main__":
    print("Starting Tasks\n    DO NOT CLOSE THIS CONSOLE")
    # Setting up the selenium environment - Using Geckodriver
    # geckodriver.exe is expected to be found at the same directory as the program
    GECKODRIVER_PATH = os.path.join(os.getcwd(), "geckodriver.exe")
    WINDOW_SIZE = "1920,1080"
    firefox_options = option.Options()
    firefox_options.add_argument("--headless")
    firefox_options.add_argument("--window-size=%s" % WINDOW_SIZE)
    firefox_options.add_argument("disable-gpu")
    firefox_options.add_argument("--disable-notifications")

    caps = DC.DesiredCapabilities().FIREFOX.copy()
    caps["pageLoadStrategy"] = "eager"

    try: 
        browser = Webdriver.Firefox(executable_path = GECKODRIVER_PATH, 
                                   options = firefox_options, desired_capabilities = caps)
    except:
        #Exit code 4 is when geckodriver.exe isn't found in the directory (usually)
        exitScript(4)
    #Setting up a few explicit wait classes, for standard and extended loading respectively
コード例 #28
0
ファイル: article.py プロジェクト: codingfinance/Webscraping
import scrapy
from selenium import webdriver
from selenium.webdriver.firefox import options

opt = options.Options()
opt.headless = True


class ArticleSpider(scrapy.Spider):

    name = 'article'

    start_urls = ['https://www.ssga.com/us/en/individual/etfs/fund-finder']

    def __init__(self):
        self.driver = webdriver.Firefox(options=opt)

    def parse(self, response):

        self.driver.get(response.url)

        print(response.css('td').extract())
コード例 #29
0
 def setUpClass(cls):
     super().setUpClass()
     firefox_options = options.Options()
     firefox_options.headless = False
     cls.driver = WebDriver(options=firefox_options)