예제 #1
0
 def setUpClass(cls):
     options = ChromeOptions()
     options.headless = True
     options.add_argument('--no-sandbox')
     options.add_argument('--disable-gpu')
     options.add_argument('--privileged')
     options.add_argument('--window-size=1920,1080')
     if 'TEST_CHROMEDRIVER' in os.environ:
         chromedriver_executable = os.environ['TEST_CHROMEDRIVER']
     else:
         chromedriver_executable = '/usr/lib/chromium-browser/chromedriver'
     cls.selenium = webdriver.Chrome(
         executable_path=chromedriver_executable, chrome_options=options)
     cls.selenium.implicitly_wait(10)
     super(SeleniumTest, cls).setUpClass()
     cls.server_url = cls.live_server_url
예제 #2
0
    def setUpClass(cls):
        # We try Chrome, fallback to Firefox

        try:
            driver_options = ChromeOptions()
            # Headless on Appveyor/Travis
            if "CI" in os.environ:
                driver_options.add_argument("--headless")
                driver_options.add_argument("--no-sandbox")
            cls.driver = webdriver.Chrome(chrome_options=driver_options)
        except WebDriverException:
            driver_options = FirefoxOptions()
            # Headless on Appveyor/Travis
            if "CI" in os.environ:
                driver_options.headless = True
            cls.driver = webdriver.Firefox(firefox_options=driver_options)

        # Get the newsserver-info, if available
        if "SAB_NEWSSERVER_HOST" in os.environ:
            cls.newsserver_host = os.environ['SAB_NEWSSERVER_HOST']
            cls.newsserver_user = os.environ['SAB_NEWSSERVER_USER']
            cls.newsserver_password = os.environ['SAB_NEWSSERVER_PASSWORD']
예제 #3
0
 def _prepare_webdriver():
     chrome_options = Options()
     chrome_options.headless = True
     return webdriver.Chrome(ChromeDriverManager().install(),
                             chrome_options=chrome_options)
예제 #4
0
    def post(self):
        args = Scraper.parser.parse_args()

        options = Options()
        options.headless = True
        try:
            #the chromedriver executable should be installed in the src folder
            driver = webdriver.Chrome('./chromedriver', chrome_options=options)

        except:
            return {"message":"Error getting chrome driver, is it installed?"}
        try:
            driver.get(args['url'])
        except:
            return {"message":"Invalid URL"}



        time.sleep(5)

        html = driver.page_source

        soup = BeautifulSoup(html, 'html.parser')

        #try nhl standings
        tags = []
        try:
            for i in soup.findAll('div', {'class': 'responsive-datatable__pinned'}):
                nhl_table = i.find("tbody")
                for t in nhl_table.findAll("tr"):
                    team_name = t.find('span', {'class':'team--name'}).text
                    tag = {}
                    tag['games_played'] = t.find('td', attrs={'data-col':'1'}).find('span').text
                    tag['tag_key'] = team_name
                    tags.append(tag)
                    tag = {}
                    tag['wins'] = t.find('td', attrs={'data-col':'2'}).find('span').text
                    tag['tag_key'] = team_name
                    tags.append(tag)
                    tag = {}
                    tag['losses'] = t.find('td', attrs={'data-col':'3'}).find('span').text
                    tag['tag_key'] = team_name
                    tags.append(tag)
                    tag = {}
                    tag['OT_losses'] = t.find('td', attrs={'data-col':'4'}).find('span').text
                    tag['tag_key'] = team_name
                    tags.append(tag)
                    tag = {}
                    tag['points'] = t.find('td', attrs={'data-col':'5'}).find('span').text
                    tag['tag_key'] = team_name
                    tags.append(tag)
                    tag = {}
                    tag['points_percentage'] = t.find('td', attrs={'data-col':'6'}).find('span').text
                    tag['tag_key'] = team_name
                    tags.append(tag)
                    tag = {}
                    tag['regulation_wins'] = t.find('td', attrs={'data-col':'7'}).find('span').text
                    tag['tag_key'] = team_name
                    tags.append(tag)
                    tag = {}
                    tag['regulation_and_OT_wins'] = t.find('td', attrs={'data-col':'8'}).find('span').text
                    tag['tag_key'] = team_name
                    tags.append(tag)
                    tag = {}
                    tag['goals_for'] = t.find('td', attrs={'data-col':'9'}).find('span').text
                    tag['tag_key'] = team_name
                    tags.append(tag)
                    tag = {}
                    tag['goals_against'] = t.find('td', attrs={'data-col':'10'}).find('span').text
                    tag['tag_key'] = team_name
                    tags.append(tag)
                    tag = {}
                    tag['goal_differential'] = t.find('td', attrs={'data-col':'11'}).find('span').text
                    tag['tag_key'] = team_name
                    tags.append(tag)
                    tag = {}
                    tag['home'] = t.find('td', attrs={'data-col':'12'}).find('span').text
                    tag['tag_key'] = team_name
                    tags.append(tag)
                    tag = {}
                    tag['away'] = t.find('td', attrs={'data-col':'13'}).find('span').text
                    tag['tag_key'] = team_name
                    tags.append(tag)
                    tag = {}
                    tag['shoot_out'] = t.find('td', attrs={'data-col':'14'}).find('span').text
                    tag['tag_key'] = team_name
                    tags.append(tag)
                    tag = {}
                    tag['last10'] = t.find('td', attrs={'data-col':'15'}).find('span').text
                    tag['tag_key'] = team_name
                    tags.append(tag)
                    tag = {}
                    tag['streak'] = t.find('td', attrs={'data-col':'16'}).find('span').text
                    tag['tag_key'] = team_name
                    tags.append(tag)

            if tags: #if teams is not empty, else try a different site
                return tags
        except:
            pass
        
        tags = []
        try:
            head = soup.find('thead')
            labels = []
            for c in head.findAll('th'):
                labels.append(c.text.strip())
            body = soup.find('tbody')
            for i in body.findAll('tr'):
                colIndex = 0
                name = ''
                
                for j in i.findAll('td'):
                    tag = {}
                    if colIndex == 0:
                        name_tag = j.find('div',{'class': 'd3-o-club-fullname'})
                        if not name_tag:
                            name_tag = j.find('a',{'class': 'd3-o-player-fullname nfl-o-cta--link'})
                        name = name_tag.text.strip()
                        colIndex += 1
                        continue
                    tag[labels[colIndex]] = j.text.strip()
                    tag['tag_key'] = name
                    tags.append(tag)
                    colIndex += 1
                

            if tags: #if teams is not empty, else try a different site
                return tags
        except Exception as e:
            pass



        return{"message":"Could not scrape data from URL"}

        divs = soup.find_all("div")

        tables = soup.find_all("table")

        return {"div_tags:" : [str(d) for d in divs], "table_tags:": [str(t) for t in tables]}
예제 #5
0
from selenium.webdriver.common.proxy import Proxy, ProxyType
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
import time
import os
import requests
import sys

plat = sys.argv[1:]

options = Options()
options.headless = True

try:
	if 'win' in plat[0]: 
		browser = webdriver.Chrome('chromedriver.exe', chrome_options=options)
	elif 'mac' in plat[0]:
		browser = webdriver.Chrome('mac', chrome_options=options)
	elif 'linux' in plat[0]:
		browser = webdriver.Chrome('linux', chrome_options=options)
	else:
		print('no OS argument provided. --win/--mac/--linux')
except:
	print('No argument provided or webdriver error')

apks_folder = "apks/"
code_folder = 'source_code/'

for file in os.listdir(apks_folder):
예제 #6
0
파일: dynamic.py 프로젝트: beingsantosh/NLP
### This code helps to scrap the headlines from Economictimes website.
###

from selenium import webdriver
from bs4 import BeautifulSoup
import dynamic2, dynamic3
from selenium.webdriver.chrome.options import Options
from datetime import datetime
import pandas as pd

# initiating the timer
t1 = datetime.now()

chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.headless = True

driver = webdriver.Chrome(
    "C:/Users/Mein Pc/Downloads/chromedriver_win32/chromedriver")

base_url = 'https://economictimes.indiatimes.com/archive.cms'

driver.get(base_url)

data = driver.page_source

soup = BeautifulSoup(data, 'html.parser')

span = soup.find_all('span', class_='normtxt')

all_month_link_dict = {}
예제 #7
0
 def __init__(self):
     opts = Options()
     if OPEN_BROWSER == 0:
         opts.headless = True
     self.browser = webdriver.Chrome(CHROMEDRIVER, options=opts)
예제 #8
0
 def searching_information(self):
     columns_order = pd.read_csv(self._dir_path + '/Columns_order.txt',
                                 header=None)
     columns_order = columns_order[0].tolist()
     options = Options()
     options.headless = True
     options.add_argument('--disable-notifications')
     options.add_argument('--no-sandbox')
     options.add_argument('--verbose')
     options.add_argument('--disable-gpu')
     options.add_argument('--disable-software-rasterizer')
     options.add_argument("--log-level=3")
     options.add_argument('--hide-scrollbars')
     self._browser = webdriver.Chrome(ChromeDriverManager().install(), \
                                     options = options)
     df = pd.DataFrame(columns=columns_order)
     for chemical in self.chemicals:
         try:
             headers, Name, Molecular_Weight = self._searching_headers(
                 chemical)
             Properties = {
                 'Name': Name,
                 'Molecular Mass': Molecular_Weight,
                 'Consulted Date': self._now,
                 'Source': self._url,
                 'TRI_CHEM_ID': chemical
             }
             if len(headers) == 0:
                 df_aux = pd.DataFrame(
                     {key: [value]
                      for key, value in Properties.items()})
                 df = pd.concat([df, df_aux],
                                ignore_index=True,
                                sort=True,
                                axis=0)
                 self._browser.back()
             else:
                 for header in headers:
                     Results = self._searching_properties(header, chemical)
                     for key, val in Results.items():
                         Properties.update({
                             key: val[0],
                             key + ' - Units': val[1]
                         })
                     self._browser.back()
                     df_aux = pd.DataFrame({
                         key: [value]
                         for key, value in Properties.items()
                     })
                     df = pd.concat([df, df_aux],
                                    ignore_index=True,
                                    sort=True,
                                    axis=0)
         except NoSuchElementException:
             continue
     df = df[columns_order]
     if self._existing:
         df.to_csv(self.file_save,
                   index=False,
                   mode='a',
                   sep=',',
                   header=False)
     else:
         df.to_csv(self.file_save, index=False, sep=',')
     self._browser.close()
예제 #9
0
 def openSiap(self):
     options = Options()
     options.headless = True
     self.driver = webdriver.Chrome(options=options)
     self.driver.get(self.urlsiap)
예제 #10
0
            def Flipkart():
                headers = {
                    'Connection': 'keep-alive',
                    'Cache-Control': 'max-age=0',
                    'Upgrade-Insecure-Requests': '1',
                    'User-Agent':
                    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36',
                    'Sec-Fetch-User': '******',
                    'Accept':
                    'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
                    'Sec-Fetch-Site': 'same-origin',
                    'Sec-Fetch-Mode': 'navigate',
                    'referer': 'https://www.flipkart.com/',
                    'Accept-Encoding': 'gzip, deflate, br',
                    'Accept-Language': 'en-GB,en;q=0.9,en-US;q=0.8,nl;q=0.7',
                }

                URL = "https://www.flipkart.com/search?q=" + str(
                    Product_search
                ) + "&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off"

                try:
                    recieve = requests.get(URL, headers=headers)
                    recieve = recieve.text

                except:
                    options = Options_Chrome()
                    options.headless = True
                    browser = webdriver.Chrome(options=options)
                    browser.get(URL)
                    time.sleep(60)
                    recieve = browser.page_source
                    browser.close()

                soup = BeautifulSoup(recieve, 'lxml')

                def initial_viability_test():
                    test_count = 0
                    The_Whole_Page = soup.prettify()
                    while test_count <= 100:
                        print(The_Whole_Page)
                        test_count += 1

                def name_scrape():
                    time.sleep(time_variable)
                    outlines = soup.findAll("div", {"class": "_1UoZlX"})

                    for x in range(len(outlines)):
                        outline = outlines[x]
                        identify = outline.find("div", {"class": "_3wU53n"})
                        name = identify.text
                        Flipkart_Names.append(name)

                    if len(Flipkart_Names) is 0:
                        outlines_2 = soup.findAll("div", {"class": "_3liAhj"})

                        for y in range(len(outlines_2)):
                            outline_2 = outlines_2[y]
                            identify_2 = outline_2.find(
                                "a", {"class": "_2cLu-l"})
                            name_2 = identify_2.text
                            Flipkart_Names.append(name_2)
                    else:
                        pass
                    # print(Flipkart_Names)

                def price_scrape():
                    time.sleep(time_variable)
                    outlines = soup.findAll("div", {"class": "_1UoZlX"})

                    for x in range(len(outlines)):
                        outline = outlines[x]
                        identify = outline.find("div",
                                                {"class": "_1vC4OE _2rQ-NK"})
                        price = identify.text
                        Flipkart_Prices.append(price)

                    if len(Flipkart_Prices) is 0:
                        outlines_2 = soup.findAll("div", {"class": "_3liAhj"})

                        for y in range(len(outlines_2)):
                            outline_2 = outlines_2[y].find(
                                "div", {"class": "_1vC4OE"})
                            price_2 = outline_2.text
                            Flipkart_Prices.append(price_2)

                    else:
                        pass
                    # print(Flipkart_Prices)

                def image_scrape():
                    time.sleep(time_variable)
                    outlines = soup.findAll("div", {"class": "_1UoZlX"})

                    for x in range(len(outlines)):
                        outline = outlines[x]
                        identify = outline.find("div", {"class": "_3BTv9X"})
                        image = identify.find("img")
                        image_link = image['src']
                        Flipkart_ImageLinks.append(image_link)

                    if len(Flipkart_ImageLinks) is 0:
                        outlines_2 = soup.findAll("div", {"class": "_3liAhj"})

                        for y in range(len(outlines_2)):
                            outline_2 = outlines_2[y]
                            identify_2 = outline_2.find(
                                "div", {"class": "_3BTv9X"})
                            image_2 = identify_2.find("img")
                            image_link_2 = image_2['src']
                            Flipkart_ImageLinks.append(image_link_2)

                    else:
                        pass
                    # print(Flipkart_ImageLinks)

                name_scrape()
                price_scrape()
                image_scrape()
예제 #11
0
from selenium import webdriver
from selenium.webdriver.common.keys import Keys 
from selenium.webdriver.chrome.options import Options
import getpass
from fblib.fbfunct import facebook_login

option = Options()

option.add_argument("--disable-infobars")
option.add_argument("start-maximized")
option.add_argument("--disable-extensions")
option.headless = True
# Pass the argument 1 to allow and 2 to block
option.add_experimental_option("prefs", { 
    "profile.default_content_setting_values.notifications": 1 
})

p = getpass.getpass()
driver = webdriver.Chrome(options=option)
facebook_login(driver,'*****@*****.**',p)
checking_list = ["evening.tkc", "chou.wang.39", "wang.leox"]
i = 0
while i < len(checking_list):
    driver.implicitly_wait(5)
    print('https://www.facebook.com/'+checking_list[i])
    driver.get('https://www.facebook.com/'+checking_list[i])
    i = i + 1
    scroll = 0
    for scroll in range(2):
        scroll = scroll + 1
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
예제 #12
0
def twitterSearch(query, maximum=10):
    options = Options()
    options.headless = True
    options.add_argument("--window-size=1920,1200")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--disable-javascript")
    options.add_argument("--no-sandbox")

    options.add_experimental_option(
        "prefs", {'profile.managed_default_content_settings.javascript': 2})

    print('Options setted...')

    if os.environ.get("ENV") == "development":
        driver = webdriver.Chrome(
            ChromeDriverManager().install(), options=options)
    else:
        options.binary_location = os.environ.get("GOOGLE_CHROME_BIN")
        driver = webdriver.Chrome(executable_path=os.environ.get(
            "CHROMEDRIVER_PATH"), chrome_options=options)
        print('Driver mounted...')

    url = 'https://mobile.twitter.com/search?q='+query

    finalComments = []

    try:
        driver.get(url)
        time.sleep(5)

        print('Driver opened...')
        # button = driver.find_element_by_tag_name("body")
        # a = button.get_attribute("innerHTML")
        # q = BeautifulSoup(a, 'html.parser')
        # print(q)
        # time.sleep(60)
        # button.click()

        print('Twitter acessed...')

        while True:
            javaScript = "window.scrollBy(0, document.body.scrollHeight);"
            driver.execute_script(javaScript)
            element = driver.find_element_by_tag_name("body")
            commentsDiv = element.get_attribute("innerHTML")

            soup = BeautifulSoup(commentsDiv, 'html.parser')

            print(soup)

            comments = soup.find_all('div', attrs={
                                     'class': 'css-901oao r-hkyrab r-1qd0xha r-a023e6 r-16dba41 r-ad9z0x r-bcqeeo r-bnwqim r-qvutc0'})

            for i in comments:
                if i in finalComments:
                    continue
                else:
                    finalComments.append(html.escape(i.text))

            # moreButton = driver.find_element_by_class_name("w-button-more")
            # moreButton.click()
            time.sleep(2)

            print(len(finalComments))
            if len(finalComments) >= maximum:
                break

        print('Twitter crawling done!')
        driver.quit()
        return finalComments

    except Exception as x:
        print(x)
        print("Error on loading comments")
        driver.quit()
        return -1
예제 #13
0
 def setUp(self):
     options = Options()
     options.headless = True
     self.driver = webdriver.Chrome(options=options)
     self.driver.get(url="https://semantic-ui.com/examples/login.html")
예제 #14
0
from selenium import webdriver
from bs4 import BeautifulSoup as bs
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium import webdriver

sistema = platform.system()

if sistema == 'Windows':
    driver = webdriver.Chrome(executable_path=r"chromedriver.exe")
else:
    chromeOptions = Options()
    chromeOptions.headless = True
    driver = webdriver.Chrome(executable_path="./chromedriver",
                              options=chromeOptions)

import funciones
import apiTwitch
import creacionBot

linkBase = 'https://www.twitch.tv/popout/'
linkExtra = '/chat?popout='

diccionarioStreamer = apiTwitch.diccionarioStreamer

cantidadCanales = len(diccionarioStreamer)
inactivos = list()
streamersActivos = list()
예제 #15
0
from selenium import webdriver  #Import the webdriver to launch Chrome driver here
from selenium.webdriver.chrome.options import Options  #Import Options from Selenium to configure our driver
from githubConnexion import githubConnect

options = Options()  #Declare options
options.headless = True  #True=the driver won't be launch and False it will be launch
options.add_argument("--window-size=1920,1080")  #Size of driver's window
options.add_argument(
    "--incognito"
)  #If you add this argument, the driver will launch a private session
DRIVER_PATH = "your_driver_path"  #Replace by the path of your driver's executable
driver = webdriver.Chrome(
    options=options,
    executable_path=DRIVER_PATH)  #Create and init with options your driver

githubConnect(driver, "your_github_username", "your_github_password", True)

driver.quit()  #Close the driver at the end of the program
예제 #16
0
파일: umdauth.py 프로젝트: tybug/umd-auth
    def authenticate(self):
        """
        Authenticates with umd using https://identity.umd.edu/mfaprofile as
        the base request. Authenticating with identity.umd.edu gives us the most
        access to other sites (except notably
        https://app.testudo.umd.edu/main/profile), which is why it was chosen.

        Notes
        -----
        Interestingly, websites under umd control seem to have a hierarchy of
        some kind. My best current approximation of this hierarchy is a
        partially ordered set. In the notation below, (a, b) means that "a
        grants access to b". So, the "higher up" in the hierarchy a website is,
        the more sites it grants access to.

        By "grants access" I mean that if you use CAS to log into website a,
        then you can freely access website b without needing ot re-authenticate.
        Note that the opposite is not necessarily true.

        1 = https://identity.umd.edu/mfaprofile
        2 = https://app.testudo.umd.edu/main/profile
        3 = http://umd.instructure.com/
        4 = https://return.umd.edu/covid/returnstatus
        5 = https://dsonline2.umd.edu/dpms/cas.do
        6 = https://www.myuhc.umd.edu/home.aspx

        (1, 3)
        (1, 4)
        (1, 5)
        (1, 6)
        (2, 3)
        (2, 4)
        (2, 5)
        (2, 6)
        (3, 5)
        (3, 6)
        (4, 5)
        (4, 6)
        (5, 4)
        (5, 6)

        Sites 1 and 2 are equally high up in the hierarchy, but neither grants
        access to the other. So we have to pick one to authenticate with. I
        chose 1. In the future we may authenticate with both (or only as
        necessary for 2) for maximum coverage.

        Warnings
        --------
        We're making more than a few requests in this method, so this could take
        multiple seconds to complete (around 5-6 seconds for me).
        """
        generate_codes_after = False
        if len(self.codes) == 0:
            raise ValueError(
                "Need at least one authentication code to log in.")
        if len(self.codes) == 1:
            # we're down to our last code - authenticate and then generate
            # another set.
            print("down to our last code, generating more after this "
                  "authentication")
            generate_codes_after = True

        # use up the first code available (starting from the front of the list)
        code = self.codes.pop(0)
        print(f"authenticating with code {code}")

        # A useful reference: "Detailed Trace of a Shibboleth Login".
        # https://docs.shib.ncsu.edu/docs/shiblogindetails.html

        r = requests.get("https://identity.umd.edu/mfaprofile")
        jsession_id = r.history[2].cookies["JSESSIONID"]

        cookies = {"JSESSIONID": jsession_id}
        data = {
            "j_username": self.username,
            "j_password": self.password,
            "_eventId_proceed": ""
        }
        r = requests.post(
            "https://shib.idm.umd.edu/shibboleth-idp/profile/cas"
            "/login?execution=e1s1",
            data=data,
            cookies=cookies)
        # sanity check to ensure our request / jsession id was accepted
        assert ("Please complete your multi-factor authentication "
                "using Duo.") in r.text

        umd_shib_url = r.url

        # There's an iframe on this page (the duo mobile 2fa element) which
        # makes some requests for us. We need to get the source code of that
        # iframe in order to replicate the requests by hand. Duo has a js
        # library that sets the iframe source based on some parameters in the
        # source code of this page, so we replicate that js code here to create
        # the iframe url and retrieve its source.
        #
        # The duo js code is minified on the umd page, but an unmified version
        # (that seems to be accurate as far as I can tell) can be found here:
        # http://shibboleth.net/pipermail/commits/2017-September/031081.html.
        soup = BeautifulSoup(r.text, features="lxml")
        duo_iframe = soup.find(id="duo_iframe")
        duo_host = duo_iframe.get("data-host")
        duo_sig_request = duo_iframe.get("data-sig-request")

        duo_sig = duo_sig_request.split(":")[0]
        app_sig = duo_sig_request.split(":")[1]

        # Apparently javascript's encodeURIComponent function (which we are
        # replicating here) replaces "/" as well, so we pass `safe=""`` to
        # emulate this.
        current_url_encoded = urllib.parse.quote(umd_shib_url, safe="")

        duo_iframe_source_url = (
            f"https://{duo_host}/frame/web/v1/auth?tx="
            f"{duo_sig}&parent={current_url_encoded}&v=2.6")

        options = Options()
        options.headless = True
        driver = webdriver.Chrome(Path(__file__).parent / "chromedriver",
                                  options=options)
        driver.get(duo_iframe_source_url)
        # TODO this errors with "list index out of range" randomly - race
        # condition somewhere? I just retry whenever I hit that error currently.
        sid = driver.current_url.split("sid=")[1]
        sid = urllib.parse.unquote(sid)

        data = {
            "sid": sid,
            "device": "phone1",
            "factor": "Passcode",
            "passcode": f"{code}",
            "out_of_date": "False",
            "days_out_of_date": "0",
            "days_to_block": "None"
        }
        r = requests.post(f"https://{duo_host}/frame/prompt", data=data)
        txid = json.loads(r.content)["response"]["txid"]

        data = {"sid": sid, "txid": txid}
        r = requests.post(f"https://{duo_host}/frame/status", data=data)

        data = {"sid": sid}
        r = requests.post(f"https://{duo_host}/frame/status/{txid}", data=data)
        auth_sig = json.loads(r.content)["response"]["cookie"]

        sig_response = f"{auth_sig}:{app_sig}"
        data = {"_eventId": "proceed", "sig_response": sig_response}

        session = requests.Session()
        add_dict_to_cookiejar(session.cookies, cookies)

        r = session.post(umd_shib_url, data=data, cookies=cookies)

        # ``len(history)`` used to be 2, but umd recently introduced a screen
        # which would show if you haven't completed the daily symptom survey.
        # The ``if``` branch deals with this scenario. the ``else`` branch deals
        # with the 'normal' scenario of having your daily symptom survey
        # completed (which the login process will presumably go back to by
        # default when the survey is no longer a thing).
        assert len(r.history) != 0
        print(f"len(r.history): {len(r.history)}")
        if len(r.history) == 1:
            shib_idp_session = (r.history[0].headers["set-cookie"].split(
                "shib_idp_session=")[1].split(";")[0])

            umd_shib_url_ = umd_shib_url[:-1] + "3&_eventId_proceed=1"
            r = session.get(umd_shib_url_)

            cookie = r.history[1].headers["set-cookie"]
            cookie = cookie.split("JSESSIONID=")[1]
            cookie = cookie.split(";")[0]

            self.identity_jsession_id = cookie
        else:
            shib_idp_session = (r.history[0].headers["set-cookie"].split(
                "shib_idp_session=")[1].split(";")[0])

            # we're actually issued a *new* JSESSIONID just for the identity.umd.edu
            # site. If we attempt to make requests with our first (and still valid)
            # JSESSSIONID, they will be rejected, so store this new JSESSIONID for
            # later use (if we want to make requests to identity.umd.edu later).
            #
            # As far as I can tell, this doesn't occur for other websites. They will
            # still accept the original JSESSIONID and don't issue a new one to us.
            identity_jsession_id = (r.history[1].headers["set-cookie"].split(
                "JSESSIONID=")[1].split(";")[0])
            self.identity_jsession_id = identity_jsession_id

        # With these two cookies, we are basically a god. We can make a request
        # to any umd website with full authentication permissions.
        cookies = {
            "JSESSIONID": jsession_id,
            "shib_idp_session": shib_idp_session
        }
        self.auth_cookies = cookies

        print("Authenticated. Creds: ", self.auth_cookies,
              self.identity_jsession_id)

        # we popped a code off our codes list at the beginning of this method,
        # so we need to remove it from our codes file as wll.
        self._write_codes()

        if generate_codes_after:
            self.generate_new_codes()
예제 #17
0
def set_up_driver(chromedriver_path):
    options = Options()
    options.headless = False
    driver = webdriver.Chrome(chromedriver_path,options=options)
    return driver
예제 #18
0
# -*- coding: utf-8 -*-
# Harvest theses from Western Australia
# JH: 2022-03-19

from time import sleep
from selenium import webdriver
from datetime import datetime
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import os
import codecs
import ejlmod2
import re

driver_options = Options()
driver_options.headless = True
driver = webdriver.Chrome(options=driver_options)

xmldir = '/afs/desy.de/user/l/library/inspire/ejl'
retfiles_path = "/afs/desy.de/user/l/library/proc/retinspire/retfiles"  #+'_special'

pages = 1

now = datetime.now()
stamp_of_today = '%4d-%02d-%02d' % (now.year, now.month, now.day)

publisher = 'Western Australia U.'
jnlfilename = 'THESES-WESTERN-AUSTRALIA-%s' % stamp_of_today

recs = []
예제 #19
0
파일: Pylenium.py 프로젝트: bnonni/Pylenium
    'Welcome to GTPylenium!\nWould you like this test to be headless (Y/n)? ')
uname = os.system('uname')
envs = mac_envs if uname == 'Darwin' else win_envs
for path in envs:
    #Set path to driver
    engine_path = path
    print("PATH set: ", path)
    if browser[i] == 'chrome':
        from selenium.webdriver.chrome.options import Options
    else:
        from selenium.webdriver.firefox.options import Options

    #Enable headless option (i.e. no browser window will open) - to diable this feature, change options.headless = True to options.headless = False
    options = Options()
    if H == 'Y':
        options.headless = True
    else:
        options.headless = False

    print('Driver Status: Building Webdriver')
    if browser[i] == 'chrome':
        driver = webdriver.Chrome(path, options=options)
    else:
        driver = webdriver.Firefox(options=options, executable_path=path)
        path = os.path.join(path, 'firefox')

    #Visit website
    driver.get(website)
    print('Driver Status: Visiting website -', website)

    #Find and click the Schedule of Classes link
예제 #20
0
import time
import random
import os
'''
SET YOUR DETAILS HERE
'''
NRIC = 'S1234567Z'  # TODO KEY YOUR IC HERE
EMAIL = '*****@*****.**'  # TODO KEY YOUR EMAIL HERE
WORKDAY = [0, 1, 3,
           4]  # TODO Key your workdays, Mon = 0, Tues = 1 and so on... sun = 6
'''
Start Selenium settings
'''

options = Options()
options.headless = True  # TODO you can set this to False, to see the outcome before submission

cwd = os.getcwd()
driverpath = os.path.join(cwd, "chromedriver.exe")

driver = webdriver.Chrome(driverpath, options=options, port=8080)
driver.get("https://form.gov.sg/#!/5e37870c73a1e90011942e50")


# create a function to key value in the element by id
def key_value_into_element_by_id(id, value):
    elem = driver.find_element_by_id(id)
    elem.send_keys(value)


#scroll
예제 #21
0
def igbombing():
    from selenium import webdriver
    from selenium.webdriver.common.keys import Keys
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.chrome.options import Options
    import time
    import os
    import sys

    ig_username = input('    |-$ Your Username > ')
    ig_password = input('    |-$ Your Password > ')
    ig_victim = input("    |-$ Victim's Username > ")
    mode = input('''    |
    |-PRESS------------------|
    | 1] Repetitive Mode     |
    | 2] Script/Lyrical Mode |
    | Facing Problem ?       |
    |    Check out README.MD |
    |------------------------|
    |-> ''')

    if mode.lower() == '1' or mode.lower() == 'repetitive mode':
        reptxt = input(
            '    |-$ Word/Sentence that you want to send Multiple Times > ')
        repcount = int(input('    |-$ How many times ? > '))

    elif mode.lower() == '2' or mode.lower() == 'script/lyrical mode':
        lyrics = open("lyrics.txt", "r+")
        splitedlyrics = (lyrics.read().split())

    else:
        print('    |-} invalid input !')
        return

    print('    |-} Logging in...')

    options = Options()
    options.headless = True
    options.add_argument("--log-level=3")
    browser = webdriver.Chrome("chromedriver.exe", chrome_options=options)
    os.system('cls')
    print(f'''

    All Bombs away Sir
                 \      Goodbye Dullsville!
                __|__  /
       .'(\      .-.     /)'.
    +-====(*)===: " :===(o)=====-+
            \).  '-'  .(/
                 +=
                 +=
                 +=           █▄▄ █▀█ █▀▄▀█ █▄▄ █▀▀ █▀█ ▀█▀ █ █ █▀█ █▄ █
                              █▄█ █▄█ █ ▀ █ █▄█ ██▄ █▀▄  █  █▀█ █▄█ █ ▀█
                 +=
                 +=          ###########################################
                 +=          # Version >>> v1.0 (Beta)                 #
                             # Last Update >>> 11th July 2020          #
                 +=          # Coded by b3!ngD3v (Pramurta Sinha)      #
                 +=          # Named by Ritik Gupta                    #
                 +=          # GitHub >>> https://github.com/b31ngd3v  #
                 +=          ###########################################


    |-PRESS---------------|
    | 1] Call Bomber      |
    | 2] SMS Bomber       |
    | 3] Instagram Bomber |
    | 4] WhatsApp Bomber  |
    | 5] About            |
    | 6] Exit Script      |
    |---------------------|
    |-> 3
    |-$ Your Username > {ig_username}
    |-$ Your Password > {ig_password}
    |-$ Victim's Username > {ig_victim}
    |
    |-PRESS------------------|
    | 1] Repetitive Mode     |
    | 2] Script/Lyrical Mode |
    | Facing Problem ?       |
    |    Check out README.MD |
    |------------------------|
    |-> {mode}''')

    if mode.lower() == '1' or mode.lower() == 'repetitive mode':
        print(
            f'''    |-$ Word/Sentence that you want to send Multiple Times > {reptxt}
    |-$ How many times ? > {repcount}''')

    print('    |-} Logging in...')

    browser.get('https://www.instagram.com/accounts/login')
    time.sleep(2)
    username_bar = browser.find_element_by_name('username')
    username_bar.send_keys(ig_username)
    password_bar = browser.find_element_by_name('password')
    password_bar.send_keys(ig_password + Keys.ENTER)
    time.sleep(7)

    if browser.current_url == 'https://www.instagram.com/':
        pass
    else:
        try:
            confirm = WebDriverWait(browser, 20).until(
                EC.presence_of_element_located(
                    (By.CLASS_NAME, "coreSpriteKeyhole")))
        except:
            print('    |-} Log in Failed !')
            return
        finally:
            pass

    print('    |-} Logged in Successfully !')

    browser.get('https://www.instagram.com/direct/new/')
    '''try:
        confirm = WebDriverWait(browser, 20).until(EC.presence_of_element_located(
            (By.CLASS_NAME, "mt3GC")))
    finally:
        pass

    browser.find_element_by_class_name('mt3GC').click()'''

    try:
        confirm = WebDriverWait(browser, 20).until(
            EC.presence_of_element_located((By.NAME, "queryBox")))
    finally:
        pass

    browser.find_element_by_name('queryBox').send_keys(ig_victim)

    try:
        confirm = WebDriverWait(browser, 20).until(
            EC.presence_of_element_located((By.CLASS_NAME, "dCJp8")))
    finally:
        pass

    browser.find_element_by_class_name('dCJp8').click()

    time.sleep(1)

    browser.find_element_by_class_name('rIacr').click()

    try:
        confirm = WebDriverWait(browser, 20).until(
            EC.presence_of_element_located((By.XPATH, "//textarea")))
    finally:
        pass

    if ig_victim == browser.find_element_by_class_name(
            '_7UhW9.vy6Bb.qyrsm.KV-D4.fDxYl').text:
        pass
    else:
        print('    |-} no such user named' + ig_victim)
        return

    if mode.lower() == '1' or mode.lower() == 'repetitive mode':
        for i in range(repcount):
            browser.find_element_by_xpath("//textarea").send_keys(reptxt +
                                                                  Keys.ENTER)

    elif mode.lower() == '2' or mode.lower() == 'script/lyrical mode':
        for words in splitedlyrics:
            browser.find_element_by_xpath("//textarea").send_keys(words +
                                                                  Keys.ENTER)

    print('''    |-} Done !
    |-----------------------------------------------------------''')
    browser.quit()
예제 #22
0
 def searching_information(self):
     columns_order = pd.read_csv(self._dir_path + '/Columns_order.txt',
                                 header=None)
     columns_order = columns_order[0].tolist()
     options = Options()
     options.headless = True
     options.add_argument('--disable-notifications')
     options.add_argument('--no-sandbox')
     options.add_argument('--verbose')
     options.add_argument('--disable-gpu')
     options.add_argument('--disable-software-rasterizer')
     options.add_argument("--log-level=3")
     options.add_argument('--hide-scrollbars')
     self._browser = webdriver.Chrome(ChromeDriverManager().install(), \
                                     options = options)
     self.chemicals = pd.DataFrame({'CAS NUMBER': self.chemicals})
     self._opening_dsstox_identifiers_and_casrn()
     df = pd.DataFrame(columns=columns_order)
     n_searches = 0
     self.chemicals = self.chemicals.where(pd.notnull(self.chemicals), None)
     n_rows = self.chemicals.shape[0]
     for idx, row in self.chemicals.iterrows():
         dsstox_substance_id = row['DSSTOX ID']
         cas = row['CAS NUMBER']
         preferred_name = row['PREFERRED NAME']
         n_searches = n_searches + 1
         try:
             if not dsstox_substance_id:
                 df_aux = pd.DataFrame({
                     'CAS NUMBER': [cas],
                     'Consulted Date': [self._now]
                 })
             else:
                 Properties = {
                     'CAS NUMBER': [cas],
                     'Data Source': [
                         '{}/dsstoxdb/results?search={}'.format(
                             self._url, dsstox_substance_id)
                     ],
                     'Consulted Date': [self._now],
                     'PREFERRED NAME': [preferred_name],
                     'DSSTOX ID': [dsstox_substance_id]
                 }
                 list_tabs = ['properties', 'env-fate-transport', 'details']
                 for tab in list_tabs:
                     self._visit(dsstox_substance_id, tab)
                     self._dynamic_wait(self._queries['dialog_window'],
                                        action='dialog')
                     if tab == 'details':
                         Properties.update(self._searching_details())
                     else:
                         Properties.update(self._searching_properties())
                     self._browser.back()
                     time.sleep(2)
                 df_aux = pd.DataFrame(Properties)
             df = pd.concat([df, df_aux],
                            ignore_index=True,
                            sort=True,
                            axis=0)
             if (n_searches % 20 == 0) or (n_searches == n_rows):
                 df = df[columns_order]
                 if self._existing:
                     df.to_csv(self.file_save,
                               index=False,
                               mode='a',
                               sep=',',
                               header=False)
                 else:
                     df.to_csv(self.file_save, index=False, sep=',')
                     self._existing = True
                 df = pd.DataFrame(columns=columns_order)
         except TimeoutException:
             continue
     self._browser.close()
예제 #23
0
def get_driver(headles=True):
    options = Options()
    options.headless = headles
    driver = webdriver.Chrome(path.join(ROOT_DIR, "chromedriver"),
                              options=options)
    return driver
예제 #24
0
def make_driver(driver_path:Path):
    options = Options()
    options.headless = True
    # options.add_argument("--window-size=1920,1200")
    driver = webdriver.Chrome(executable_path=driver_path, options=options)
    return driver
예제 #25
0
import time
from selenium.webdriver import Chrome
from selenium.webdriver.chrome.options import Options
import json
import os
import pandas as pd

cwd = os.getcwd()
# Allows easy option to display / hide the browser window
display_brower_window = False
if display_brower_window == False:
    opts = Options()
    opts.headless = True
    assert opts.headless
    browser = Chrome(f'{cwd}/chromedriver', options=opts)

else:
    browser = Chrome(f'{cwd}/chromedriver')

URL = 'https://www.pearsonham.com/'
# Navigates to the URL
browser.get(URL)
time.sleep(0.5)
# Finds and clicks hamburger menu to open menu
browser.find_element_by_id('hamburger').click()
time.sleep(0.1)
# Finds and clicks the team link via the xpath
browser.find_element_by_xpath('//*[@id="menu-item-52"]/a').click()

# Instantiate empty employee dictionary
employee_dict = {}
예제 #26
0
def get_currencies(currencies, start, end, export_csv=False):
    frames = []  # store data for each currency

    # Get the historic data between USD and other currencies
    for currency in currencies:
        while True:
            try:
                # Open the URL and maximize the window
                my_url = f"https://investing.com/currencies/usd-{currency.lower()}-historical-data"
                option = Options()
                option.add_experimental_option("excludeSwitches",
                                               ["enable-logging"])
                option.headless = False  # Make the actions visible
                driver = webdriver.Chrome(options=option)
                driver.get(my_url)
                print("Got the URL.")
                driver.maximize_window()
                print("Maximized window.")
                sleep(5)

                # Accept the cookies, otherwise the prompt does not go away...
                cookies_button = WebDriverWait(driver, 20).until(
                    EC.element_to_be_clickable(
                        (By.ID, "onetrust-accept-btn-handler")))
                cookies_button.click()
                print("Accepted the cookies.")
                sleep(5)

                # Click on the date button to change the range
                date_button = WebDriverWait(driver, 20).until(
                    EC.element_to_be_clickable(
                        (By.ID, "flatDatePickerCanvasHol")))
                date_button.click()
                print("Clicked the date button.")
                sleep(5)

                # Select Start and End Date, clear their contents and input our own
                start_bar = WebDriverWait(driver, 20).until(
                    EC.element_to_be_clickable(
                        (By.XPATH, "/html/body/div[7]/div[1]/input[1]")))
                start_bar.clear()
                start_bar.send_keys(start)
                print("Entered the start date.")
                sleep(5)

                end_bar = WebDriverWait(driver, 20).until(
                    EC.element_to_be_clickable(
                        (By.XPATH, "/html/body/div[7]/div[1]/input[2]")))
                end_bar.clear()
                end_bar.send_keys(end)
                print("Entered the end date.")
                sleep(5)

                # Click the apply button and wait a bit
                apply_button = WebDriverWait(driver, 10).until(
                    EC.element_to_be_clickable(
                        (By.XPATH, "/html/body/div[7]/div[5]/a")))
                apply_button.click()
                print("Clicked 'Apply'.")
                sleep(5)

                # Get the source code of the page that appeared with pandas
                # Use the default read_html flavor parameter (lxml)
                dataframes = pd.read_html(driver.page_source)
                # From the webpage source code we collected, keep only the table containing the historical data
                for dataframe in dataframes:
                    if dataframe.columns.tolist() == [
                            "Date",
                            "Price",
                            "Open",
                            "High",
                            "Low",
                            "Change %",
                    ]:
                        frames.append(dataframe)
                        df = dataframe
                        break
                frames.append(df)

                # Export to csv if asked by function argument
                if export_csv:
                    df.to_csv("currency.csv", index=False)
                    print(f"{currency}.csv exported")
                driver.quit()
                print(f"{currency} scraped.")
                break
            except:
                driver.quit()
                print(
                    f"Failed to scrape {currency}. Trying again in 10 seconds."
                )
                sleep(10)
                continue
    return frames
예제 #27
0
파일: browser.py 프로젝트: edx/bok-choy
def _local_browser_class(browser_name):
    """
    Returns class, kwargs, and args needed to instantiate the local browser.
    """

    # Log name of local browser
    LOGGER.info(u"Using local browser: %s [Default is firefox]", browser_name)

    # Get class of local browser based on name
    browser_class = BROWSERS.get(browser_name)
    headless = os.environ.get('BOKCHOY_HEADLESS', 'false').lower() == 'true'
    if browser_class is None:
        raise BrowserConfigError(
            u"Invalid browser name {name}.  Options are: {options}".format(
                name=browser_name, options=", ".join(list(BROWSERS.keys()))))
    else:
        if browser_name == 'firefox':
            # Remove geckodriver log data from previous test cases
            log_path = os.path.join(os.getcwd(), 'geckodriver.log')
            if os.path.exists(log_path):
                os.remove(log_path)

            firefox_options = FirefoxOptions()
            firefox_options.log.level = 'trace'
            if headless:
                firefox_options.headless = True
            browser_args = []
            browser_kwargs = {
                'firefox_profile': _firefox_profile(),
                'options': firefox_options,
            }

            firefox_path = os.environ.get('SELENIUM_FIREFOX_PATH')
            firefox_log = os.environ.get('SELENIUM_FIREFOX_LOG')
            if firefox_path and firefox_log:
                browser_kwargs.update({
                    'firefox_binary': FirefoxBinary(
                        firefox_path=firefox_path, log_file=firefox_log)
                })
            elif firefox_path:
                browser_kwargs.update({
                    'firefox_binary': FirefoxBinary(firefox_path=firefox_path)
                })
            elif firefox_log:
                browser_kwargs.update({
                    'firefox_binary': FirefoxBinary(log_file=firefox_log)
                })

        elif browser_name == 'chrome':
            chrome_options = ChromeOptions()
            if headless:
                chrome_options.headless = True

            # Emulate webcam and microphone for testing purposes
            chrome_options.add_argument('--use-fake-device-for-media-stream')

            # Bypasses the security prompt displayed by the browser when it attempts to
            # access a media device (e.g., a webcam)
            chrome_options.add_argument('--use-fake-ui-for-media-stream')

            browser_args = []
            browser_kwargs = {
                'options': chrome_options,
            }
        else:
            browser_args, browser_kwargs = [], {}

        return browser_class, browser_args, browser_kwargs

def link_assembler(file_path, name):
    fieldnames = ['Grade']

    with open(file_path, "ab") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        # writer.writeheader()
        writer.writerow({
            "Grade": name,
        })



options = Options()
options.headless = False
#PROXY = "36.67.23.117:8888" # IP:PORT or HOST:PORT
#options.add_argument('--proxy-server=%s' % PROXY)
driver = selenium.webdriver.Chrome(chrome_options=options)
driver.set_page_load_timeout(10000)
e = 100
d = 7900
for i in range(80):
    d += e
    driver.get("https://aws.amazon.com/partners/find/results/?size=100&start={}&sort=Relevance&view=Grid".format(d))
    sleep(20)
    zpath = driver.find_element_by_xpath("//*[@id='psf-search-results-da-wrapper']/div[2]/div[3]/div[1]")
    child_elements = zpath.find_elements_by_class_name("psf-partner-name")
    for i in child_elements:
        b = i.find_element_by_tag_name("a")
        c = b.get_attribute("href")
예제 #29
0
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
import time
import requests

# false to watch it work
HEADLESS = True

options = Options()
options.headless = HEADLESS
options.add_argument("--window-size=1920,1200")

driver = webdriver.Chrome(options=options)
driver.get("https://www.gutenberg.org/")
search_box = driver.find_element_by_class_name("searchInput")
search_box.send_keys("Heart of Darkness")
search_box.send_keys(Keys.RETURN)

book_results = driver.find_elements_by_class_name("booklink")
print(f"Found {len(book_results)} results for 'Heart of Darkness':")
result_array = []
for result in book_results:
    book_element = result.find_element_by_class_name("content")
    book_title = book_element.find_element_by_class_name("title").text
    book_author = book_element.find_element_by_class_name("subtitle").text
    book_downloads = book_element.find_element_by_class_name("extra").text
    print(f"{book_title}, {book_author}, downloads: {book_downloads}")
    result_array.append({
        "link element": result,
        "title": book_title,
예제 #30
0
def init_webdriver(proxy_dict=None, account_credentials=None):
    """
    Here, we are configure and intitialize webdriver

    :param proxy_dict: proxy dictionary
    :param account_credentials: account credentials
    :return:
    """
    """
    Webdriver's Load Strategy
    
    In majority of cases, you might want to make the load strategy to "none" to greatly improve
    the scrapping. Doing so will not run any JS/other script, so selenium will be much faster.
    
    In apkpure.com's case though, or in other case when you need JS/Script to load, it should be set to "normal".
    
    If no mode is set (i.e. all commented out) then the default will be "normal"
    
    More about loading strategy: https://stackoverflow.com/a/44771628/6558550
    """

    # webdriver.DesiredCapabilities.CHROME['pageLoadStrategy'] = "none"
    # webdriver.DesiredCapabilities.CHROME['pageLoadStrategy'] = "normal"

    if config.UseProxies:
        """
        Here we set the proxy
        """
        webdriver.DesiredCapabilities.CHROME['proxy'] = {
            "httpProxy": proxy_dict.get('httpProxy', ''),
            "ftpProxy": proxy_dict.get('ftpProxy', ''),
            "sslProxy": proxy_dict.get('sslProxy', ''),
            "noProxy": None,
            "proxyType": "MANUAL",
            "class": "org.openqa.selenium.Proxy",
            "autodetect": False
        }

    # declare an options object
    chrome_options = Options()

    # setting the browser visibility mode
    chrome_options.headless = not config.ShowBrowserWindows
    """
    Setting Webdriver's Preferences

    All of the codes below are experimental preference that worked for my system.
    It should be good for you to run. But you can turn it off by commenting it should you need any adjustment.

    
    - Setting the default download directory
        'download.default_directory': 'your/download/path',
        Note that if same filename already in directory, browser will automatically append with counter (like "files (1).zip" )

    - Disable Chrome Harm file detector
        'safebrowsing.enabled': True,
        Disable chrome popup asking that file downloaded might be harm. The pop up is preventing the download to start.

    - disable image load 
        'profile.managed_default_content_settings.images': 2,
        This will greatly increase the bot speed, but the images will not be loaded

    - Force webdriver to use diskcache. 
        'disk-cache-size': 4096
        This will force webdriver to save browser's cache on disk. So we did not loading everytime we start the browser.
        4096 is for 4gb and , you can configure yours. The bigger the better, but as high as 8gb would be wasteful I think.
        But we do need as high as 8gb if say you want to scrap it to download entire night.
        
    - Allow/prevent downloading multiple files
        'profile.default_content_setting_values.automatic_downloads': 1,
        'download.prompt_for_download': False
        Two lines above will automatically download without prompting a new window for saving the file.
        For now, I did not require these codes so I commented it and it worked on apkpure.com.
        You might need to turn it on should you find a problem regarding downloading

    - Disable download protection
        chrome_options.add_argument('--safebrowsing-disable-download-protection')
        This code is neraly have same functionality with "Disable Chrome Harm file detector" above.

    """

    preferences = {
        'download.default_directory':
        config.SaveDirectory
        if config.SaveDirectory else config.DefaultSaveDirectory,
        'safebrowsing.enabled':
        True,
        'profile.managed_default_content_settings.images':
        2,
        'disk-cache-size':
        4096

        # For now, I did not use this
        # 'profile.default_content_setting_values.automatic_downloads': 1,
        # 'download.prompt_for_download': False
    }

    chrome_options.add_argument('--safebrowsing-disable-download-protection')

    # and then we register the preference to the options
    chrome_options.add_experimental_option('prefs', preferences)

    # create the webdriver according to its path and options, then the browser will appears
    driver = webdriver.Chrome(config.WebDriverPath,
                              chrome_options=chrome_options)

    # for firefox (gecko driver) you will run this
    # driver = webdriver.Firefox(config.WebDriverPath)

    if config.TestMode:
        # for testing, it will download a notepad++ installer and then exit
        driver.get(
            'https://notepad-plus-plus.org/repository/7.x/7.6.5/npp.7.6.5.bin.minimalist.7z'
        )
        input()
        exit()
    """
    Clearing Cookies
    
    We seem did not need to clear the cookies, because everytime we close a driver
    and re-instantiating it, it is a brand new clean web driver with no default configuration.
    
    But should that requirements really necessary, we can always uncomment the code below
    """
    # driver.delete_all_cookies()
    """
    If Login is required, the code will perform a login action
    """
    if config.RequireLogin:
        # get the credentials
        if ':' not in account_credentials:
            print("Credentials is not valid")
            exit()

        username, password = account_credentials.split(':')

        # get loading page
        print('Loading Login Page ...')
        driver.get(config.LoginPage)
        """
        Form filling
        
        Clicking On Username/Email Input -> Filling Next Username/Email ...
        Clicking On Password Input -> Filling Next Password ...
        """
        try:
            driver.find_element_by_xpath(
                config.XPathFormUserOrEmail).send_keys(username)
            driver.find_element_by_xpath(
                config.XPathFormPassword).send_keys(password)
        except NoSuchElementException as e:
            print(
                'either username or password input form is not found by xpath. Please check the XPath '
                'for the username and password form in configuration.')
            if config.DebugMode:
                print(e)
            exit()
        """
        ReCaptcha 
    
        If reCaptcha found -> Solving reCaptcha ...
        """

        recaptcha_element = driver.find_element_by_xpath(config.XPathRecaptcha)
        if recaptcha_element:
            """
            For now we have 2 options, according to config_02.py/ReCaptchaOption:
            1. Wait for certain seconds to solve recaptcha
            2. CLI will ask for input (it will wait forever). So user can take time to solve captcha, 
               and after that user will need to go to CLI and press any key (say, enter)
            3. Requesting anti-captcha service (for now it is not available because we need the 
            documentation of the API service. Feel free to inform me about this)
            """

            if config.ReCaptchaOption == 1:
                # option 1
                time.sleep(
                    60
                )  # wait 1 minute / any given time for user to solve captcha
            elif config.ReCaptchaOption == 2:
                # option 2
                input(
                    "Captcha found. Please solve it on the browser then press any key here to continue.."
                )
            elif config.ReCaptchaOption == 3:
                # todo: or anti-captcha.com services will be called here
                # requirements: what anti-captcha services are? and We should need the documentation for the API
                pass

        print('Clicking on Login Button ...')
        try:
            driver.find_element_by_xpath(config.XPathLoginButton).click()
        except NoSuchElementException as e:
            print(
                'Login button not found by Xpath. Please check the XPath for the login button in confugiration.',
                e)
            exit()
        """
        Then the code will check if a certain XPath that will be always appear after successful login, appears.
        If appears, it is guaranteed that the login process is success.
        If not, it may be also running well, but are suspected for error in long term 
        (e.g. browser will keep access something when the login process is failed).
        
        If login is done via requests method, we can easily see the status_code to determine if login was successful.
        But because login process must be done via HTML Form, this is considered the best practice to check if login is succesful or not.
        """
        is_successfully_login = False
        print('Waiting until login is succeed ...')
        try:
            login_timeout = config.LoginTimeout
            WebDriverWait(driver, login_timeout).until(
                EC.presence_of_element_located(
                    (By.XPATH, config.LoggedInXPath)))
            is_successfully_login = True
        except TimeoutException:
            print(
                "Login takes too much time. Code can not tell if browser is logged in or not."
                "This is not an error statement but a warning, as in some case this may lead to an error."
                "This is because the code did not know if browser is successfully logged or not"
            )

        if not is_successfully_login:
            # todo: need variety of error case to be coded more specifically
            print('Login/get sessions seems to be failed')
            exit()

        return driver

    else:
        return driver
예제 #31
0
from selenium.webdriver import Chrome
from selenium.webdriver.chrome.options import Options
# from selenium.webdriver.common.by import By
# from selenium.webdriver.support.ui import WebDriverWait
# from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys

# from selenium.common.exceptions import StaleElementReferenceException
import time

# C:\Users\spi59\Documents\Drivers

opts = Options()
opts.headless = False

assert opts.set_headless
browser = Chrome(
	executable_path='C:/Users/spi59/Documents/Drivers/chromedriver.exe',
	options=opts)

browser.get('https://www.ecosia.org')
input_field = browser.find_element_by_tag_name('input')
input_field.send_keys('ads')
input_field.submit()
time.sleep(3)
ads = browser.find_elements_by_class_name('result-title-ad')
ads[0].click()
time.sleep(3)


browser.quit()
예제 #32
0
l = [
    'Oi', 'Ola', 'Oi Siri', 'Ola Siri', 'Tudo e com você', 'Tudo e com voce',
    'Estou bem obrigado', 'Siri', 'Qual seu nome', 'Quantos anos voce tem',
    'Oque e voce', 'Tudo', 'Olá', 'Qual o seu nome'
]
dialogo = ''

path = './WebDriver/chromedriver.exe'
option = Options()

option.add_experimental_option("prefs", {\
    "profile.default_content_setting_values.media_stream_mic": 1
    })

option.headless = False
driver = webdriver.Chrome(path, 0, option)
driver.set_window_position(0, 0)
driver.set_window_size(200, 200)


def falar(dialogo, ):
    speak = pyttsx3.init('sapi5')
    speak.say(dialogo)
    speak.runAndWait()


def respostanula(res, ):
    if res != l[0]:
        falar('Não entendi')
    elif res != l[1]:
예제 #33
0
 def test_on_linux(self):
     exe_path = os.path.join(os.getenv("CHROMEWEBDRIVER"), "chromedriver")
     opts = ChromeOptions()
     opts.headless = True
     driver = webdriver.Chrome(executable_path=exe_path, options=opts)
     run_for_driver(driver, self)