コード例 #1
0
browser.get(URL)
for location in df:
    browser.find_element_by_id('address').click()
    time.sleep(2)
    keyboard.press_and_release('ctrl+a, delete')
    keyboard.write(location + " Singapore")
    keyboard.press('down')
    time.sleep(0.3)
    keyboard.release('down')
    keyboard.press('down')
    time.sleep(0.3)
    keyboard.release('down')
    keyboard.press('enter')
    time.sleep(0.3)
    keyboard.release('enter')
    browser.find_element_by_class_name('btn-primary').click()

    mydict[location] = browser.find_element_by_id('info_window').get_attribute(
        "innerHTML")
    print(location)

with open('places.p', 'wb') as f:
    pickle.dump(mydict, f)

# keyboard.press('down')

# keyboard.press_and_release('down, enter')
# keyboard.press('down')

# for _ in range(20):
#     browser.find_element_by_id('address').send_keys(Keys.DELETE)
コード例 #2
0
import time
from selenium.webdriver import Chrome

chromeDriver = 'C:\\temp\\chromedriver.exe'

driver = Chrome(chromeDriver)

driver.get('https://login.11st.co.kr/auth/front/login.tmall')

time.sleep(3)

input_login = driver.find_element_by_id("loginName")
input_login.send_keys("wownskl")

input_pw = driver.find_element_by_id("passWord")
input_pw.send_keys("ahrl8383")

btn = driver.find_element_by_class_name("btn_Atype")

time.sleep(3)

btn.click()

time.sleep(3)

driver.get('http://buy.11st.co.kr/order/OrderList.tmall')

time.sleep(5)

driver.quit()
コード例 #3
0
ファイル: scrape.py プロジェクト: MichaelWerner/Python
from selenium.webdriver import Chrome
from selenium.webdriver.chrome.options import Options
opts = Options()
opts.set_headless()
assert opts.headless  # Operating in headless mode
browser = Chrome(options=opts)
browser.get('https://duckduckgo.com')

search_form = browser.find_element_by_class_name('js-search-input.search__input--adv')
search_form.send_keys('real python')
search_form.submit()

results = browser.find_elements_by_class_name('result')
print(results[0].text)
print(len(results))

for i in range(len(results)):
  print(results[i].text)

browser.close
コード例 #4
0
class Scraping():
    def __init__(self, index_get):
        chrome_path = 'D:/Soft/chromedriver_win32/chromedriver.exe'
        # keep selenium always open until ended code
        # opts = ChromeOptions()
        # opts.add_experimental_option("detach", True)
        #self.driver = Chrome(executable_path = chrome_path, chrome_options= opts)
        options = Options()
        options.add_argument("--use-fake-ui-for-media-stream")
        #options.add_argument('--headless')
        self.driver = Chrome(executable_path=chrome_path,
                             chrome_options=options)
        if index_get == 0:
            self.driver.get(
                'https://translate.google.com/#view=home&op=translate&sl=en&tl=vi&text='
            )

    def google_translate(self, sentence, mode):
        mode = 'en&tl=vi' if mode == 'e2v' else 'vi&tl=en'
        self.driver.get(
            'https://translate.google.com/#view=home&op=translate&sl=%s&text=%s'
            % (mode, sentence))
        time.sleep(0.5)
        translated = ''
        spelling = None
        word_type = None
        hint = None
        contents_of_word_type = None
        try:
            spelling = self.driver.find_element_by_xpath(
                '/html/body/div[2]/div[1]/div[2]/div[1]/div[1]/div[1]/div[2]/div/div/div[3]/div[1]'
            ).text
            word_type = self.driver.find_elements_by_class_name(
                'gt-baf-pos-head')
            word_type = [x.text.replace('\nFrequency', '') for x in word_type]
            contents_of_word_type = self.driver.find_element_by_xpath(
                '/html/body/div[2]/div[1]/div[2]/div[2]/div[3]/div[2]/div[1]/div[1]/div'
            ).text.split('\n')
            del (contents_of_word_type[0])
            del (contents_of_word_type[1])
        except:
            pass

        try:
            hint = self.driver.find_element_by_class_name(
                'gt-related-suggest-message').text.split('\n')
        except:
            try:
                hint = self.driver.find_element_by_class_name(
                    'gt-spell-correct-message').text.split('\n')
            except:
                pass
        translated = self.driver.find_element_by_xpath(
            '/html/body/div[2]/div[2]/div[1]/div[2]/div[1]/div[1]/div[2]/div[3]/div[1]/div[2]/div/span[1]'
        ).text

        display_wordtype = self.driver.find_element_by_class_name(
            'gt-cd-baf').get_attribute('style')
        display_hint = self.driver.find_element_by_id(
            'spelling-correction').get_attribute('style')
        if display_wordtype == 'display: none;':
            word_type = None
        if display_hint == 'display: none;':
            hint = None
        return [translated, spelling, (word_type, contents_of_word_type), hint]

    def speech_to_text(self, ui):
        speech_button = wait(self.driver, timeout=10).until(
            EC.presence_of_element_located(
                (By.XPATH, '//*[@id="gt-speech"]/span')))

        def status_s2t():
            status = self.driver.find_element_by_xpath('//*[@id="gt-speech"]')
            return status.get_attribute('data-tooltip').split(' ')[1]

        # remove_input = self.driver.find_element_by_xpath('/html/body/div[2]/div[1]/div[2]/div[1]/div[1]/div[1]/div[2]/div/div/div[2]/div/div/div')
        # remove_input.click()
        if status_s2t() == 'on':
            speech_button.click()
        ui.st2_waiting = True
        time_waiting = len(ui.parent.GetLang('ENG').split(' ')) * 1000
        time_waiting = 3000 if time_waiting <= 2000 else int(time_waiting *
                                                             0.8)

        def waiting(speech_button):
            result = self.driver.find_element_by_xpath(
                '//*[@id="input-wrap"]/div[2]').get_attribute('textContent')
            ui.lineEdit.setReadOnly(False)
            ui.lineEdit.setText(result)
            ui.lineEdit.setReadOnly(True)
            ui.st2_waiting = False
            ui.lineEdit.setPlaceholderText('Listening stopped !')
            if status_s2t() == 'off':
                speech_button.click()

        QtCore.QTimer.singleShot(time_waiting, lambda: waiting(speech_button))
コード例 #5
0
def fech_data(*args, **kwargs):
    """
    sceduled task to
    scrap the data from https://www.worldometers.info/coronavirus/
    """
    webdriver = ChromeDriverManager().install()
    opt = Options()
    opt.add_argument('--headless')
    opt.add_argument('--window-size=1920,1080')

    driver = Chrome(webdriver, options=opt)

    print("<<<<<<<<<<<<scrapping started >>>>>>>>>>>>>>>>>>>>> \n")
    url = 'https://www.worldometers.info/coronavirus/'
    driver.get(url)
    data = driver.find_element_by_class_name("content-inner")

    print("\n<<<<<<<<<<<Fetching general stats >>>>>>>>>>>>>>>>>>>>> \n")
    last_update = data.find_elements_by_xpath(
        "//*[contains(text(), 'Last updated:')]")[0].text
    total_cases = data.find_elements_by_css_selector(
        '[id*="maincounter-wrap"]')[0].find_element_by_tag_name('span').text
    death_cases = data.find_elements_by_css_selector(
        '[id*="maincounter-wrap"]')[1].find_element_by_tag_name('span').text
    recovery_cases = data.find_elements_by_css_selector(
        '[id*="maincounter-wrap"]')[2].find_element_by_tag_name('span').text
    active_cases = data.find_elements_by_css_selector(
        '[class*="panel panel-default"]')[0]
    closed_cases = data.find_elements_by_css_selector(
        '[class*="panel panel-default"]')[1]
    currently_infected = active_cases.find_element_by_class_name(
        "number-table-main").text
    cases_with_outcome = closed_cases.find_element_by_class_name(
        "number-table-main").text
    mild_condition_active_cases = active_cases.find_elements_by_css_selector(
        '[class*="number-table"]')[1].text
    critical_condition_active_cases = active_cases.find_elements_by_css_selector(
        '[class*="number-table"]')[2].text
    recovered_closed_cases = closed_cases.find_elements_by_css_selector(
        '[class*="number-table"]')[1].text
    death_closed_cases = closed_cases.find_elements_by_css_selector(
        '[class*="number-table"]')[2].text
    general_stats = {
        'total_cases':
        ''.join(total_cases.split(',')),
        'death_cases':
        ''.join(death_cases.split(',')),
        'recovery_cases':
        ''.join(recovery_cases.split(',')),
        'currently_infected':
        ''.join(currently_infected.split(',')),
        'cases_with_outcome':
        ''.join(cases_with_outcome.split(',')),
        'mild_condition_active_cases':
        ''.join(mild_condition_active_cases.split(',')),
        'critical_condition_active_cases':
        ''.join(critical_condition_active_cases.split(',')),
        'recovered_closed_cases':
        ''.join(recovered_closed_cases.split(',')),
        'death_closed_cases':
        ''.join(death_closed_cases.split(',')),
        'last_update':
        parse(last_update.split('Last updated:')[1])
    }

    gen_serializer = GeneralStatsSerializer(data=general_stats)
    gen_serializer.is_valid(raise_exception=True)
    gen_serializer.save()

    print("<<<<<<<<<<<Fetching country stats >>>>>>>>>>>>>>>>>>>>> \n")

    country_data = driver.find_element_by_id('main_table_countries_today')
    country_data_list = country_data.find_element_by_tag_name(
        'tbody').find_elements_by_tag_name("tr")

    attributes = [
        'country', 'total_cases', 'new_cases', 'total_deaths', 'new_deaths',
        'total_recovered', 'active_cases', 'serious_critical',
        'cases_per_mill_pop'
    ]

    engine = create_engine(
        f"postgresql://{settings.DB_USER}:{settings.DB_PASSWORD}@{settings.DB_HOST}:5432/{settings.DB_NAME}"
    )
    print("<<<<<<<<<<<Persist to DB:START >>>>>>>>>>>>>>>>>>>>> \n")
    for country in country_data_list:
        country_stats = []
        for i in country.find_elements_by_tag_name("td"):
            country_stats.append(''.join(i.text.split(',')))

        detail = dict(zip(attributes, country_stats))
        country_ = detail['country']
        flag = "https://upload.wikimedia.org/wikipedia/commons/thumb/e/ef/International_Flag_of_Planet_Earth.svg/800px-International_Flag_of_Planet_Earth.svg.png"
        with open(settings.FLAGS_FILE, "r") as read_file:
            data = json.load(read_file)
            try:
                flag = data[country_]
            except:
                pass

        total_cases = detail['total_cases']
        new_cases = detail['new_cases'].split('+')[::-1][0]
        total_deaths = detail['total_deaths']
        new_deaths = detail['new_deaths'].split('+')[::-1][0]
        total_recovered = detail['total_recovered']
        active_cases = detail['active_cases']
        serious_critical = detail['serious_critical']
        cases_per_mill_pop = detail['cases_per_mill_pop']
        last_update = datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S")
        update_sql = f"""
        INSERT INTO cases_countrycases
        values (
            '{country_}', '{total_cases if total_cases else 0}',
            '{new_cases if new_cases else 0}', '{total_deaths if total_deaths else 0}',
            '{new_deaths if new_deaths else 0}', '{total_recovered if total_recovered else 0}',
            '{active_cases if active_cases else 0}', '{serious_critical if serious_critical else 0}',
            '{cases_per_mill_pop if cases_per_mill_pop else 0}', '{flag}', '{last_update}')
        ON CONFLICT (country) DO UPDATE SET
        total_cases = '{total_cases if total_cases else 0}',
        new_cases = '{new_cases if new_cases else 0}',
        total_deaths = '{total_deaths if total_deaths else 0}',
        new_deaths = '{new_deaths if new_deaths else 0}',
        total_recovered = '{total_recovered if total_recovered else 0}',
        active_cases = '{active_cases if active_cases else 0}',
        serious_critical = '{serious_critical if serious_critical else 0}',
        cases_per_mill_pop = '{cases_per_mill_pop if cases_per_mill_pop else 0}',
        flag = '{flag}',
        last_update = '{last_update}'
        """
        with engine.begin() as conn:  # TRANSACTION
            conn.execute(text(update_sql))
    print("<<<<<<<<<<<Persist to DB:END >>>>>>>>>>>>>>>>>>>>> \n")

    driver.quit()
コード例 #6
0
ファイル: test_views.py プロジェクト: LucasArthur94/tccapp
class NewWorkgroupTestCase(StaticLiveServerTestCase):
    def setUp(self):
        self.browser = Chrome()
        self.browser.implicitly_wait(10)

        user = User.objects.create_user(username='******', name="Administrador Teste", email='*****@*****.**', password='******', is_staff=True, is_superuser=True)
        coordinator = Coordinator.objects.create(usp_number='1234567', user=user)
        teacher = Teacher.objects.create(usp_number='1234567', user=user)
        student_user = User.objects.create(username='******', name="Aluno Teste", email='*****@*****.**', password='******')
        student = Student.objects.create(usp_number='7983121', user=student_user)
        guest_user = User.objects.create(username='******', name="Convidado Teste", email='*****@*****.**', password='******')
        guest = Guest.objects.create(organization_name='Empresa', user=guest_user)

        self.browser.get('%s%s' % (self.live_server_url, reverse_lazy("login")))
        self.browser.find_element_by_id('id_username').send_keys('*****@*****.**')
        self.browser.find_element_by_id('id_password').send_keys('tccpoliusp')
        self.browser.find_element_by_id('login').click()

    def tearDown(self):
        self.browser.quit()

    def test_teacher_sign_up_fire(self):
        self.browser.find_element_by_id('coordinator-workgroups').click()
        self.browser.find_element_by_id('new-workgroup').click()

        self.browser.find_element_by_id('id_title').send_keys('Projeto de Teste')

        self.browser.find_element_by_class_name("select2-search__field").send_keys('alunogrupo')
        self.browser.find_element_by_class_name("select2-results__option").click()

        self.browser.find_element_by_class_name("select2-search__field").send_keys('admin')
        self.browser.find_element_by_class_name("select2-results__option").click()

        self.browser.find_element_by_class_name("select2-search__field").send_keys('convidadogrupo')
        self.browser.find_element_by_class_name("select2-results__option").click()

        self.browser.find_element_by_id('submit').click()

        self.assertIn(('%s%s' % (self.live_server_url, reverse_lazy("disciplines_list"))), self.browser.current_url)
        self.assertTrue(self.browser.find_element_by_xpath("//table[@class='table']/tbody/tr/th[1][contains(.,'Projeto de Teste')]"))
        self.assertTrue(self.browser.find_element_by_xpath("//table[@class='table']/tbody/tr/th[2][contains(.,'1')]"))
        self.assertTrue(self.browser.find_element_by_xpath("//table[@class='table']/tbody/tr/th[3][contains(.,'Administrador Teste')]"))
        self.assertTrue(self.browser.find_element_by_xpath("//table[@class='table']/tbody/tr/th[4][contains(.,'Convidado Teste')]"))
コード例 #7
0
from selenium.webdriver import Chrome
import time
from datetime import datetime

url = 'https://www.google.com'

navegador = Chrome()
navegador.get(url)

botao_teclado = navegador.find_element_by_class_name('hOoLGe')
botao_teclado.click()

time.sleep(2)

botao_login = navegador.find_elements_by_tag_name('a')[0]
botao_login.click()

time.sleep(2)

navegador.quit()
コード例 #8
0
from selenium.webdriver import Chrome
from selenium.webdriver.common.keys import Keys

navegador = Chrome()
navegador.get('https://consultacnpj.com/cnpj/')
navegador.maximize_window()

cnpjs = ["45997418000153", "18328118000109", "45543915000181"]

for cnpj in cnpjs:
    input = navegador.find_element_by_css_selector(
        '#__layout > div > div:nth-child(2) > div > div > div > div > div > div.cnpj--wrapper > div > div > input'
    )
    input.clear()
    input.send_keys(cnpj)
    texto = navegador.find_element_by_class_name('company-data--card').text
    with open(f'{str(cnpj)}.csv', 'w', encoding='UTF-8') as csv:
        csv.write(texto)

navegador.quit()
コード例 #9
0
from selenium.webdriver import Chrome
import time

driver = Chrome('./chromedriver')

url = 'https://accounts.google.com/signin/v2/identifier?service=youtube&uilel=3&passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Faction_handle_signin%3Dtrue%26app%3Ddesktop%26hl%3Dzh-TW%26next%3D%252F&hl=zh-TW&ec=65620&flowName=GlifWebSignIn&flowEntry=ServiceLogin'

driver.get(url)

driver.find_element_by_id('identifierId').send_keys('*****@*****.**')
driver.find_element_by_id('identifierNext').click()
time.sleep(5)
driver.find_element_by_class_name('whsOnd').send_keys('123123123')
driver.find_element_by_id('passwordNext').click()
コード例 #10
0
#こちらのGitHubから持ってきたい
#driver = webdriver.Chrome(executable_path="https://github.com/kkakkoottaaaniiiia/abunator/tree/master/test/chromedriver.exe")

#こっちならいける(私個人の環境では)
options = ChromeOptions()
# ChromeのWebDriverオブジェクトを作成する。
driver = Chrome(options=options)

#abunatorを起動
driver.get("https://abunatorroute.azurewebsites.net/")

#3秒待機
time.sleep(3)

#start.click 質問画面に遷移
start = driver.find_element_by_class_name("start")
start.click()

#質問画面は[はい]だけ選択
#nameを参照すると「はい」以外も選択してクリックできる?
#no = driver.find_element_by_name('いいえ')

#Xpathの場合
#no = driver.find_element_by_xpath("//input[@value='はい']").click()

#タイトルが"解答画面"になるまで[はい],[いいえ]いずれかのボタンを押す
#while driver.title == '質問画面':
#    no = driver.find_element_by_xpath("//input[@value='はい']").click()

time.sleep(3)
no = driver.find_element_by_class_name("button")
コード例 #11
0
ファイル: funcs.py プロジェクト: hatsunem/ig-autolikes
class InstaOperator:
    def __init__(self, username, password, tag):
        self.username = username
        self.password = password
        self.tag = tag

        options = Options()
        # options.add_argument('--headless')
        # options.binary_location = '/app/.apt/usr/bin/google-chrome'
        self.driver = Chrome(chrome_options=options)
        self.driver.implicitly_wait(5)

    def login(self):
        self.driver.get(INSTA_URL)
        email_form = self.driver.find_element_by_name("username")
        password_form = self.driver.find_element_by_name("password")
        email_form.send_keys(self.username)
        password_form.send_keys(self.password)

        login_button = self.driver.find_element_by_class_name("_5f5mN")
        login_button.submit()

        try:
            self.driver.find_element_by_class_name("XTCLo")
        except NoSuchElementException:
            print("アカウント認証")
            send_button = self.driver.find_element_by_class_name("_5f5mN")
            send_button.submit()
            code_form = self.driver.find_element_by_name("security_code")
            code = gmailreceiver.get_code()
            code_form.send_keys(code)
            code_button = self.driver.find_element_by_class_name("_5f5mN")
            code_button.submit()
            try:
                self.driver.find_element_by_class_name("XTCLo")
            except NoSuchElementException as e:
                print("ログイン失敗")
                print(type(e))
                print(str(e))
            else:
                print("ログイン完了")
        else:
            print("ログイン完了")

    def open_article(self):
        self.driver.get("https://www.instagram.com/explore/tags/" + self.tag +
                        "/")
        articles = self.driver.find_elements_by_class_name("Nnq7C")
        article = articles[3].find_elements_by_class_name("v1Nh3")
        actions = ActionChains(self.driver)
        actions.move_to_element(articles[4])
        actions.perform()
        article[0].click()

    def get_users(self):
        users = []
        while len(users) < 10:
            try:
                address = self.driver.find_element_by_class_name(
                    "nJAzx").get_attribute('href')
                if address not in users:
                    users.append(address)
                self.driver.find_element_by_class_name(
                    "coreSpriteRightPaginationArrow").click()
            except (NoSuchElementException, StaleElementReferenceException):
                self.open_article()

        return users

    def likes_users(self, users):
        likes = 0
        for user in users:
            self.driver.get(user)
            try:
                self.driver.find_element_by_class_name("v1Nh3").click()
                for num in range(3):
                    self.driver.find_element_by_class_name(
                        "coreSpriteHeartOpen").click()
                    likes += 1
                    sleep(1)
                    self.driver.find_element_by_class_name(
                        "coreSpriteRightPaginationArrow").click()
            except NoSuchElementException:
                pass

        return likes
コード例 #12
0
ファイル: jobAutoamtion.py プロジェクト: AI-Pree/WebScraper
class Automate():
    def __init__(self):
        chrome_opt = Options()
        chrome_opt.add_argument('--headless')
        self.driver = Chrome(options=chrome_opt)
        self.logger = logging.getLogger(__name__)
        self.datas = parsedata.ParseJson('seekdata.json')

    #loggin in to the site
    def login(self):
        self.logger.info("Opened the site successfully")
        WebDriverWait(self.driver, 2).until(
            EC.presence_of_element_located(
                (By.CSS_SELECTOR,
                 "a[title = 'Sign in']")))  # waiting for sign in to be loaded
        self.driver.find_element_by_link_text("Sign in").click()
        WebDriverWait(self.driver, 4).until(
            EC.presence_of_element_located(
                (By.TAG_NAME, "form")))  # waiting for the form to be loaded
        self.logger.info("signing in....")
        # can add config file for logging in infos
        WebDriverWait(self.driver,
                      3).until(EC.presence_of_element_located(
                          (By.ID, "email")))
        #email address to log in
        self.driver.find_element_by_id("email").send_keys("*****@*****.**")
        #password to log in
        self.driver.find_element_by_id("password").send_keys("password")
        self.driver.find_element_by_xpath("//button[@type = 'submit']").click()
        self.logger.info("logged in successfully")
        self.logger.info("working!!")

    def applyJob(self):
        WebDriverWait(self.driver, 5).until(
            EC.presence_of_element_located((By.LINK_TEXT, "Apply")))
        self.driver.find_element_by_link_text('Apply').click()
        delay = 2  #waiting time for the element to load
        try:
            WebDriverWait(self.driver, delay).until(
                EC.presence_of_element_located((By.TAG_NAME, "fieldset"))
            )  # waits for 3secs to check if the element fieldset is loaded
        except TimeoutException:
            self.logger.exception("Extended amount time error")
        finally:
            self.driver.find_element_by_id("uploadresume").click()

            if os.path.exists(os.getcwd() + "/resume.txt"):
                self.driver.find_element_by_xpath(
                    "//input[@id='resumeFile'][@type = 'file']").send_keys(
                        os.getcwd() + "/resume.text")
            else:
                self.driver.find_element_by_id("dontIncluderesume").click()

            self.driver.find_element_by_id("dontIncludecoverLetter").click()
            self.driver.find_element_by_xpath(
                "//button[@type='button'][@data-testid='continue-button-desktop']"
            ).click()

            WebDriverWait(self.driver, 3).until(
                EC.presence_of_element_located(
                    (By.XPATH, "//a[@data-testid = 'back-button-desktop']")))
            self.driver.implicitly_wait(2)
            self.driver.find_element_by_xpath(
                "//button[@type='button'][@data-testid='continue-button-desktop']"
            ).click()

            WebDriverWait(self.driver, 10).until(
                EC.presence_of_element_located(
                    (By.XPATH, "//button[@type = 'submit']")))
            self.driver.find_element_by_xpath(
                "//button[@type = 'submit']").click()
            WebDriverWait(self.driver, 3).until(
                EC.presence_of_element_located((By.CLASS_NAME, "_2tfj6Sf")))
            if self.driver.find_element_by_class_name("_2QIfI_2"):
                self.logger.info("Successfully applied for the job")

    # Apply for the jobs that has been scraped in the json file
    def apply(self, url):

        for i, data in enumerate(self.datas.get_data()):
            job_url = url + data['jobURL']
            self.logger.info("opening page {id}...".format(id=i))
            self.driver.get(job_url)

            if len(self.driver.find_elements_by_link_text("Sign in")) > 0:
                self.login()

            self.logger.info("page loaded successfully")
            self.applyJob()
            self.logger.info("closing page {id}...".format(id=i))

        self.logger.info("Applying for all the jobs complete")
コード例 #13
0
#Step 3 is to maximize browser
driver.maximize_window()

#Fetching Title
print("Title of page is " + driver.title)

#Fetch Url of page
print("Page url is " + driver.current_url)

#Fetch complete page html source code
print("***********************************")
print(driver.page_source)

#Fetch element/locator text
print(driver.find_element_by_class_name(
    "displayPopup").text)  #using text property

#Fetching attribute(s)value of element by locating it
print("Value of button is " + driver.find_element_by_xpath(
    "//input[@type='submit']").get_attribute("value"))

#Fetch all available values from dropdown
# to work on drop-down, list (only by index, value & visible text)
obj = Select(driver.find_element_by_name("sex"))
obj.select_by_visible_text("Male")

#Fetch selected option
print(obj.first_selected_option.text)

#Fetch all available options in the drop down
print(obj.options)  #it will not work as it will return only object
コード例 #14
0
class SeleniumDownloader(object):
    """
    自定义selenium的下载中间件
    实现网页请求由selenium 下载,不经过scrapy的下载器(Downloader)
    """
    def __init__(self):
        # 创建selenium的浏览器对象
        # window->   d:/drivers/chromedriver.exe
        # linux->    /home/xxxx/drivers/chromedriver
        # 设置chrome浏览为无头(headless)浏览器 -> 不打开浏览器窗口

        options = Options()
        options.add_argument('--headless')
        options.add_argument('--disable-gpu')

        # chrome_options 或 options
        self.browser = Chrome('D:\driver\chromedriver.exe',
                              options=options)  # mac
        self.close_ok = False  # 第一次弹出 窗口,需要点击『我知道了』关闭窗口

    def process_request(self, request, spider):
        # 将会由selenium的chrome浏览器来请求
        self.browser.get(request.url)
        # self.browser.save_screenshot('zhaopin.png')

        if not self.close_ok:
            # 关闭
            ok_btn = self.browser.find_element_by_class_name(
                'risk-warning__content').find_element_by_tag_name('button')
            ok_btn.click()

            self.close_ok = True

        #  等待网页中soupager可以选择(可见)
        ui.WebDriverWait(self.browser, 60).until(
            ec.visibility_of_all_elements_located((By.CLASS_NAME, 'soupager')))

        # 获取页面标签的高度
        soupager = self.browser.find_element_by_class_name('soupager')

        # .location -> {'x':, 'y': }
        # .rect  -> {'x': 0, 'y': 0,  'width': 990, 'height': 10001 }
        soupager_height = soupager.location['y']

        time.sleep(1)

        # 向下滚动
        # 滚动屏幕到底部
        for i in range(20):
            current_height = (i + 1) * 1000
            if current_height >= soupager_height:
                break

            self.browser.execute_script(
                'var q = document.documentElement.scrollTop=%s' %
                current_height)
            time.sleep(0.5)

        # 获取网页数据
        html = self.browser.page_source

        return HtmlResponse(url=request.url,
                            body=html.encode(encoding='utf-8'),
                            encoding='utf-8')
コード例 #15
0
def findGoogle(search_param):
    #Ignorar los certificados:
    options = webdriver.ChromeOptions()
    options.add_argument('ignore-certificate-errors')
    options.add_argument('--ignore-ssl-errors')
    options.headless = True
    articlesData = []

    #Chrome drivers
    driver = Chrome(chrome_options=options)

    #Navegar a google academico
    driver.get(
        'https://scholar.google.com/citations?view_op=search_authors&mauthors=&hl=en&oi=ao'
    )

    #Esperar 10 segundos para el buscador
    search = WebDriverWait(
        driver,
        timeout=10).until(lambda d: d.find_element_by_class_name('gs_in_txt'))

    #Buscar un resultado
    search.send_keys(search_param)
    search.send_keys(Keys.RETURN)

    #Verificar si existen resultados por 5 segundos
    try:
        WebDriverWait(driver, timeout=10).until(
            lambda d: d.find_elements_by_class_name("gsc_1usr"))
        print("se encontraton resultado")
    except:
        return {"articles": [], "count": 0}

    #Entrar a los articulos
    driver.find_element_by_class_name('gs_ai_pho').click()

    #Esperar a que la pagina cargue por 5 segundos
    try:
        WebDriverWait(
            driver,
            timeout=10).until(lambda d: d.find_element_by_id('gsc_a_b'))

        #Cargar todos los articulos
        driver.find_element_by_id('gsc_bpf_more').click()
        time.sleep(1)  #TODO Mejorar esta linea
    except:
        pass

    #Esperar a que los articulos se carguen por 10 segundos
    articles = WebDriverWait(
        driver,
        timeout=10).until(lambda d: d.find_elements_by_class_name('gsc_a_tr'))
    print(len(articles))

    #Ciclando articulos
    for article in articles:

        #Obtener datos
        title = article.find_element_by_class_name('gsc_a_at').text
        autors = article.find_element_by_class_name('gs_gray').text
        year = article.find_element_by_class_name('gsc_a_y').text

        #manejo de expecion si no existe fecha
        #TODO

        #Objeto de articulos
        data = {"title": title, "autors": autors, "year": year}

        #Agregar datos a lista
        articlesData.append(data)

    return {"articles": articlesData, "count": len(articlesData)}
コード例 #16
0
def start_callback():

    """
    Main loop of the scrape.
    """
    profile_username = E_username.get() # The Instagram username of the profile from which we
    # are downloading. Must be supplied.
    output_directory = E_path.get() # Will be initialized with the optional argument or a
    # default later.
    update_mode = True
    serialize = True
    latest_image = ''

    # The latest downloaded images will be the first in the directory.
    files = os.listdir(output_directory)
    if files:
        latest_image = files[0]

    # Start the browser
    driver = Chrome(executable_path='../bin/chromedriver')
    driver.get(insta_url + profile_username)

    # Find the number of posts on this Instagram profile
    post_count_tag_xpath = ('//*[@id="react-root"]/section/main/'
                            + 'article/header/div[2]/ul/li[1]/span/span')
    post_count_tag = driver.find_element_by_xpath(post_count_tag_xpath)
    post_count = int(post_count_tag.text.replace(',', ''))

    # If the target profile is private, then redirect to the login page
    login_tag_xpath = '//*[@id="react-root"]/section/main/article/div/p/a'
    try:
        login_tag = driver.find_element_by_xpath(login_tag_xpath)
        login_page_url = login_tag.get_attribute('href')
        driver.get(login_page_url)

        # Wait for the user to login
        while driver.current_url == login_page_url:
            sleep(1)

        # Return to the target profile from the homepage
        driver.get(insta_url + profile_username)
    except:
        pass

    # Click the 'Load More' element
    driver.find_element_by_class_name('_oidfu').click()

    # Load all the posts into the browser
    processed = 0
    while processed < post_count:
        # Load more content by scrolling to the bottom of the page
        driver.execute_script("window.scrollTo(0, 0);")
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

        # Download 4 rows of items (4 rows are loaded upon each scroll) and
        # remove them from view
        for _ in itertools.repeat(None, 4):
            urls = fetch_row_links(driver)
            delete_row(driver)
            for url in urls:

                # Exit if we've reached the latest image that was in the
                # directory before downloading. This means the directory has
                # everything beyond this point.
                if update_mode:
                    fname = file_name.search(url).group(0)
                    if fname in latest_image:
                        exit(0)

                download_from_url(url, output_directory,
                                  serialize, post_count-processed)
                processed += 1

    driver.close()
コード例 #17
0
ファイル: main_6_2.py プロジェクト: sore9988/crawler-study
url = 'https://www.google.com.tw/maps'
driver.get(url)
# find -> find_element
# find_all -> find_elements
time.sleep(5)  # 等待1秒
keyword = '大腸麵線'

print('輸入框輸入欲搜尋關鍵字')
mylocation = driver.find_elements_by_id('widget-mylocation')
print(len(mylocation))
while len(mylocation) == 0:
    print('mylocation')
    mylocation = driver.find_elements_by_id('widget-mylocation')
mylocation[0].click()
time.sleep(4)
input = driver.find_element_by_class_name('tactile-searchbox-input')
input.send_keys(keyword)

print('點擊搜尋')
driver.find_element_by_id('searchbox-searchbutton').click()
time.sleep(7)
bfinal = False
page = 1
list_dem = []
list_demstar = []
list_add = []
list_ben = []
list_star = []
list_date = []
list_pim = []
n = 0
コード例 #18
0
from selenium.webdriver import ChromeOptions, Chrome
path = "../driver/chromedriver"
# in win u must give chromedriver.exe
# webdriver.Firefox(executable_path=path)
ops = ChromeOptions()
ops.add_argument("--disable-notifications")

driver = Chrome(executable_path=path, options=ops)
baseUrl = "https://www.icicibank.com/"
driver.get(baseUrl)
print("Home page title ", driver.title)
driver.find_element_by_class_name("pl-login-ornage-box").click()
print("Login page title ", driver.title)

driver.close()
コード例 #19
0
for lp in logins:
    if br:
        br.close()
    br = Browser()
    matches = LOGIN_PASSWORD_FORMAT.match(lp)
    login, password = matches.group('login'), matches.group('password')
    br.get(LOGIN_URL)
    while True:
        try:
            tag = br.find_element_by_id("email")
            if tag.is_displayed():
                break
        except:
            br.get(LOGIN_URL)
        time.sleep(0.1)
    if (tg := br.find_element_by_class_name("captcha-container")).is_displayed():
        continue
    tag.send_keys(login)
    completed = False
    while not completed:  # Может быть случай когда сначало логин и потом кнопку продолжить для пароля
        if (tag := br.find_element_by_id("password")).is_displayed():
            tag.send_keys(password)
            try:
                br.find_element_by_id("btnLogin").click()
                completed = True
            except:
                ...
        elif (tag := br.find_element_by_id("btnNext")).is_displayed():
            try:
                tag.click()
            except:
コード例 #20
0
class Teams:
    def __init__(self):

        self.opts = ChromeOptions()
        self.opts.add_experimental_option("detach", True)
        self.opts.add_argument('--ignore-certificate-errors')
        self.opts.add_argument('--ignore-ssl-errors')
        self.opts.add_argument("--use-fake-ui-for-media-stream")

        self.browser = Chrome(executable_path='Chrome-Driver/V83/chromedriver',
                              chrome_options=self.opts)

        self.link = 'https://www.microsoft.com/en-in/microsoft-365/microsoft-teams/group-chat-software'
        self.x = 1600
        self.y = 1600

        self.sign_in = 'mectrl_main_trigger'
        self.login_id = 'i0116'
        self.password_id = 'i0118'
        self.btn_class = 'inline-block'
        self.popup_id = 'use-app-lnk'
        self.team_name_id = 'team-name-text'
        self.meeting_class = 'ts-sym ts-btn ts-btn-primary inset-border icons-call-jump-in ts-calling-join-button app-title-bar-button app-icons-fill-hover call-jump-in'

    def start_window(self):

        self.browser.set_window_size(self.x, self.y)
        self.browser.get(self.link)

        login_href = WebDriverWait(self.browser, 10).until(
            EC.presence_of_element_located((By.ID, self.sign_in)))

        login_href.click()

    def add_credentials(self):
        with open('assets/credentials.json') as json_data:
            credentials = json.load(json_data)

        email_field = WebDriverWait(self.browser, 10).until(
            EC.presence_of_element_located((By.ID, self.login_id)))
        email_field.send_keys(credentials['email'], Keys.ENTER)

        password_field = WebDriverWait(self.browser, 10).until(
            EC.presence_of_element_located((By.ID, self.password_id)))
        password_field.send_keys(credentials['password'])

        #### To find better method

        time.sleep(5)  # DOM to load
        submit_btn = self.browser.find_element_by_class_name(
            self.btn_class).click()

    def popup_login(self):

        submit_btn = self.browser.find_element_by_class_name(self.btn_class)
        submit_btn.click()

    def popup_ad(self):

        web_app_btn = self.browser.find_element_by_class_name(self.popup_id)
        web_app_btn.click()

    def join_group(self, class_name):

        all_user_groups = WebDriverWait(self.browser, 20).until(
            EC.presence_of_element_located((By.CLASS_NAME, self.team_name_id)))
        all_user_groups = self.browser.find_elements_by_class_name(
            self.team_name_id)

        for group in range(0, len(all_user_groups)):
            if all_user_groups[group].text.lower() == class_name.lower():
                all_user_groups[group].click()
                break

    def mute_audio(self):

        audio_btn = self.browser.find_element_by_css_selector(
            "toggle-button[data-tid='toggle-mute']>div>button")
        audio_is_on = audio_btn.get_attribute("aria-pressed")
        if audio_is_on == "true":
            audio_btn.click()

    def close_video(self):

        video_btn = self.browser.find_element_by_css_selector(
            "toggle-button[data-tid='toggle-video']>div>button")
        video_is_on = video_btn.get_attribute("aria-pressed")
        if video_is_on == "true":
            video_btn.click()

    def join_meeting(self):

        time.sleep(20)

        try:
            meeting_button = WebDriverWait(self.browser, 30).until(
                EC.element_to_be_clickable(
                    (By.CSS_SELECTOR, "button[ng-click='ctrl.joinCall()']")))
        except selenium.common.exceptions.TimeoutException:
            print(
                "Couldn't load the link or Maybe meeting has not been started yet ."
            )

        else:
            time.sleep(20)

            meeting_button.click()

            time.sleep(20)

            self.mute_audio()
            self.close_video()

            join_button = WebDriverWait(self.browser, 30).until(
                EC.element_to_be_clickable(
                    (By.CSS_SELECTOR,
                     "div.flex-fill.input-section > div > div > button")))

            join_button.click()

    def show_chat(self):

        background = self.browser.find_element_by_css_selector(
            'div > div.video-stream-container')
        hover = ActionChains(self.browser).move_to_element(background)
        hover.perform()
        chat_btn = self.browser.find_element_by_css_selector(
            '#callingButtons-showMoreBtn > ng-include > svg')
        chat_btn.click()

    def getchats(self):
        pass

    def hang_call(self):

        hangup_btn = WebDriverWait(self.browser, 30).until(
            EC.presence_of_element_located(
                (By.CSS_SELECTOR, "button[data-tid='call-hangup']")))
        time.sleep(50)  # For a 50 minute class
        print(hangup_btn)
        hangup_btn.click()
コード例 #21
0
class NewAllocationTestCase(StaticLiveServerTestCase):
    def setUp(self):
        self.browser = Chrome()
        self.browser.implicitly_wait(10)

        user = User.objects.create_user(username='******',
                                        name="Administrador Teste",
                                        email='*****@*****.**',
                                        password='******',
                                        is_staff=True,
                                        is_superuser=True)
        coordinator = Coordinator.objects.create(usp_number='1234567',
                                                 user=user)
        teacher = Teacher.objects.create(usp_number='1234567', user=user)
        student_user = User.objects.create(username='******',
                                           name="Aluno Teste",
                                           email='*****@*****.**',
                                           password='******')
        student = Student.objects.create(usp_number='7983121',
                                         user=student_user)
        quarter_discipline = Discipline.objects.create(
            modality='QDR',
            start_date=date.today() - timedelta(8),
            end_date=date.today() - timedelta(1))
        quarter_discipline.users.add(student_user)
        semester_discipline = Discipline.objects.create(
            modality='SMS',
            start_date=date.today() - timedelta(8),
            end_date=date.today() - timedelta(1))
        semester_discipline.users.add(student_user)
        event = Event.objects.create(type='THR',
                                     quarter_discipline=quarter_discipline,
                                     semester_discipline=semester_discipline,
                                     selected_date=date.today() + timedelta(1),
                                     start_time=time(0, 0, 0),
                                     end_time=time(23, 59, 59))
        workgroup = Workgroup.objects.create(modality='QDR',
                                             identifier='1',
                                             title='Grupo de Teste',
                                             advisor=user)
        workgroup.students.add(student_user)
        room = Room.objects.create(block='A', floor='T', identifier='ST')

        self.browser.get('%s%s' %
                         (self.live_server_url, reverse_lazy("login")))
        self.browser.find_element_by_id('id_username').send_keys(
            '*****@*****.**')
        self.browser.find_element_by_id('id_password').send_keys('1234567')
        self.browser.find_element_by_id('login').click()

    def tearDown(self):
        self.browser.quit()

    def test_allocation_sign_up_fire(self):
        self.browser.find_element_by_id('coordinator-events').click()
        self.browser.find_element_by_id('manage-allocations-1').click()
        self.browser.find_element_by_id('new-allocation').click()

        self.browser.find_element_by_id('id_start_time').send_keys(
            time(0, 0).strftime('%H:%M'))
        self.browser.find_element_by_id('id_end_time').send_keys(
            time(23, 59).strftime('%H:%M'))

        self.browser.find_element_by_class_name(
            "select2-search__field").send_keys('administrador')
        self.browser.find_element_by_class_name(
            "select2-results__option").click()

        self.browser.find_element_by_id(
            "select2-id_workgroup-container").send_keys('C1')
        self.browser.find_element_by_class_name(
            "select2-results__option").click()

        self.browser.find_element_by_id(
            "select2-id_selected_room-container").send_keys('A')
        self.browser.find_element_by_class_name(
            "select2-results__option").click()

        self.browser.execute_script(
            "window.scrollTo(0, document.body.scrollHeight);")

        self.browser.find_element_by_id('submit').click()

        self.assertIn(
            ('%s%s' %
             (self.live_server_url,
              reverse_lazy("allocations_list", kwargs={'event_id': 1}))),
            self.browser.current_url)
        self.assertTrue(
            self.browser.find_element_by_xpath(
                "//div[@class='card']/div[@class='card-header info-color lighten-1 white-text'][contains(.,'Grupo C1 - Sala AT-ST')]"
            ))
コード例 #22
0
ファイル: rice_is_nice2.py プロジェクト: simma1/coder-course
def multiplier(a, b):
    return a * b


opts = Options()
opts.headless = False

assert opts.set_headless
browser = Chrome(
    executable_path='C:/Users/spi59/Documents/Drivers/chromedriver.exe',
    options=opts)

browser.get('https://freerice.com/categories')
time.sleep(0.5)
cookie_monster = browser.find_element_by_class_name('as-js-optin')
cookie_monster.click()
time.sleep(0.5)
categories = browser.find_elements_by_class_name('category-item')
categories[21].click()
time.sleep(2)

for rice_donator in range(1, 1000):
    try:
        element = WebDriverWait(browser, 50).until(
            EC.presence_of_element_located(
                (By.CLASS_NAME,
                 'card-button.fade-appear-done.fade-enter-done')))

    except:
        print(rice_donator * 10)
コード例 #23
0
ファイル: ex01.py プロジェクト: seongMinS2/day0404
import time
from selenium.webdriver import Chrome

#셀레늄을 이용한 자동로그인

chromeDriver = 'c:\\temp\\chromedriver.exe'

driver = Chrome(chromeDriver)

driver.get('https://login.coupang.com/login/login.pang')

time.sleep(3)

input_login = driver.find_element_by_id('login-email-input')
input_login.send_keys('*****@*****.**')

time.sleep(3)

input_pw = driver.find_element_by_id("login-password-input")
input_pw.send_keys('1111')

time.sleep(3)

btn = driver.find_element_by_class_name('login__button')

btn.click()

time.sleep(3)

driver.quit()
コード例 #24
0
class XiechengSpider(object):
    def __init__(self):
        options = Options()
        options.add_argument('--headless')
        # 使用chorme的selenium模拟操作
        self.chrome = Chrome(executable_path='/usr/local/bin/chromedriver', options=options)
        self.chrome.get(
            'https://huodong.ctrip.com/things-to-do/list?pagetype=city&citytype=dt&keyword=%E6%8F%AD%E9%98%B3&pshowcode=Ticket2')
        # time.sleep(3)
        self.page = 1
        self.headers = {
            'cookie': 'Session=SmartLinkCode=U155952&SmartLinkKeyWord=&SmartLinkQuary=&SmartLinkHost=&SmartLinkLanguage=zh; _RSG=KqK3qETfa143fOqQl4rFXB; _RDG=282f24100640c82731283334fcc3364464; _RGUID=4064a5d3-b40d-4d14-b84f-d44bdad18a43; Union=OUID=index&AllianceID=4897&SID=155952&SourceID=&createtime=1600831032&Expires=1601435831593; MKT_OrderClick=ASID=4897155952&AID=4897&CSID=155952&OUID=index&CT=1600831031597&CURL=https%3A%2F%2Fwww.ctrip.com%2F%3Fsid%3D155952%26allianceid%3D4897%26ouid%3Dindex&VAL={"pc_vid":"1600831028743.427gzc"}; MKT_CKID=1600831031634.5olt5.f6pj; MKT_CKID_LMT=1600831031635; _ga=GA1.2.248639397.1600831032; _gid=GA1.2.1954297618.1600831032; MKT_Pagesource=PC; GUID=09031031210931119554; nfes_isSupportWebP=1; appFloatCnt=1; nfes_isSupportWebP=1; ASP.NET_SessionSvc=MTAuNjAuMzUuMTQ2fDkwOTB8amlucWlhb3xkZWZhdWx0fDE1ODkwMDMyMjQ5NDI; U_TICKET_SELECTED_DISTRICT_CITY=%7B%22value%22%3A%7B%22districtid%22%3A%22835%22%2C%22districtname%22%3A%22%E6%8F%AD%E9%98%B3%22%2C%22isOversea%22%3Anull%7D%2C%22createTime%22%3A1600847243848%2C%22updateDate%22%3A1600847243848%7D; _RF1=113.118.204.141; _gat=1; _pd=%7B%22r%22%3A1%2C%22d%22%3A614%2C%22_d%22%3A613%2C%22p%22%3A634%2C%22_p%22%3A20%2C%22o%22%3A655%2C%22_o%22%3A21%2C%22s%22%3A668%2C%22_s%22%3A13%7D; _bfa=1.1600831028743.427gzc.1.1600843833503.1600847244099.5.49.10650038368; _bfs=1.30; _bfi=p1%3D290510%26p2%3D290510%26v1%3D49%26v2%3D48; _jzqco=%7C%7C%7C%7C1600831031803%7C1.1555887407.1600831031625.1600849509140.1600849530503.1600849509140.1600849530503.0.0.0.19.19; __zpspc=9.4.1600846262.1600849530.14%232%7Cwww.baidu.com%7C%7C%7C%25E6%2590%25BA%25E7%25A8%258B%7C%23',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36',
        }
        # 储存城市列表
        self.cityList = {}
        # 储存携程里的景点门票数据
        self.spotsInfo = {}
        self.name = '携程旅游'
        # 标记是否已完成查询
        self.done = False



    def get_url(self,keyword):
        '''
        获得景点url
        :param keyword:
        :return:
        '''
        while True:
            content = self.chrome.find_element_by_class_name('right-content-list').get_attribute('innerHTML')
            cons = re.findall(r'href="(.*?)" title="(.*?)"', content)
            # print(content)
            for con in cons:
                self.detail_url = 'https:' + con[0]
                self.title = con[1]
                result = fuzz.token_sort_ratio(self.title, keyword)
                if result <= 20:
                    # print(self.title,result)
                    continue
                # print(self.detail_url, self.title)
                self.get_detail()
            return
            # print(self.spotsInfo)
            pagenums = self.chrome.find_elements_by_class_name('pagination_div_content')
            i = self.chrome.find_elements_by_class_name('item_wrap_font')
            # print(len(i))
            totalpage = -1;
            # print(len(pagenums))
            for pagenum in pagenums:
                totalpage = pagenum.find_element_by_class_name('item_wrap_font').text
                # print(totalpage)
            totalpagenum = int(totalpage)
            self.page = self.page + 1
            # print(totalpage)

            # icon = self.chrome.find_element_by_css_selector('.u_icon_ttd.undefined.u_icon_enArrowforward')
            # icon = self.chrome.find_element_by_class_name('u_icon_ttd')
            # print(type(icon))
            # icon.click()
            if totalpagenum<0:
                self.page = self.page-1
                continue
            if self.page > totalpagenum:
                break
            self.chrome.find_element_by_class_name('pagination_div_jump_input').click()
            self.chrome.find_element_by_class_name('pagination_div_jump_input').send_keys(str(self.page))
            self.chrome.find_element_by_class_name('pagination_div_button').click()
            # time.sleep(1)


    def search_spots(self, keyword, city):
        '''
        核心方法,获取景点数据
        :param keyword:
        :param city:
        :return:
        '''
        self.spotsInfo = {}
        try:
            id = self.getCityID(city)
            Ncity = city.replace('市', '').replace('县', '').replace('省', '')
            #url = 'https://huodong.ctrip.com/things-to-do/list?pagetype=city&citytype=dt&keyword=%E6%8F%AD%E9%98%B3&pshowcode=Ticket2'
            url = 'https://huodong.ctrip.com/things-to-do/list?pagetype=city&citytype=dt&keyword=' + Ncity+keyword + '&id=' + str(self.getCityID(city)) + '&pshowcode=Ticket2'
            self.chrome.get(url)
            self.get_url(keyword)

        except:
            pass
        self.done = True



    def getCityID(self,city):
        '''
        获得城市ID
        :param city:
        :return:
        '''
        if len(self.cityList) == 0:
            self.getCityList()
        result = process.extractBests(city, self.cityList.keys(), score_cutoff=80, limit=1)
        return self.cityList[result[0][0]]



    def getCityList(self):
        '''
        爬取所有城市
        :return:
        '''
        html = self.getHtml('https://piao.ctrip.com/ticket/?districtid=1')
        soup = BS(html, "html.parser")
        script = soup.find_all('script')
        i = 0
        for s in script:
            if i == 4:
                #cities = json.loads(s.text)
                #break
                text = s.text.replace(' ','').replace('\n','')
                text = text[text.find('window.__INITIAL_STATE__')+25:text.find('window.__APP_SETTINGS__=')]
                cities = json.loads(text)
                city = cities['citiesData']['domesticcity']['cities']
                for c in city:
                    for ci in c['cities']:
                        self.cityList[ci['name']] = ci['id']
                break
            i = i+1



    def to_unicode(self, string):
        '''
        unicode编码
        :param string:
        :return:
        '''

        ret = ''
        for v in string:
            ret = ret + hex(ord(v)).upper().replace('0X', '\\u')

        return ret



    def get_detail(self):
        '''
        进入景点详细页面,爬取详细信息
        :return:
        '''
        # detail_con = requests.get(self.detail_url, verify=False, headers=self.headers).text
        # # time.sleep(2)
        # '''使用正则获取信息'''
        # self.rank = ''.join(re.findall(r'rankText">(.*?)<', detail_con, re.DOTALL))
        # self.address = ''.join(re.findall(r'景点地址</p><p class="baseInfoText">(.*?)<', detail_con, re.DOTALL))
        # self.mobile = ''.join(re.findall(r'官方电话</p><p class="baseInfoText">(.*?)<', detail_con, re.DOTALL))
        # print(self.rank, self.address, self.mobile)
        # '''使用xpath获取信息'''
        # ret = etree.HTML(detail_con)
        # desc_cons = ret.xpath('//div[@class="detailModule normalModule"]//div[@class="moduleContent"]')
        # desc_titles = ret.xpath('//div[@class="detailModule normalModule"]//div[@class="moduleTitle"]')
        # desc_list = []
        # desc_title_list = []
        # for d in desc_cons:
        #     des = ''.join(d.xpath('.//text()'))
        #     desc_list.append(des)
        # for d in desc_titles:
        #     des = ''.join(d.xpath('.//text()'))
        #     desc_title_list.append(des)
        # desc_dict = dict(zip(desc_title_list, desc_list))
        # print(desc_dict)
        # '''获取图片链接'''
        # img_list = []
        # imgs = re.findall(r'background-image:url\((.*?)\)', detail_con, re.DOTALL)
        # for img in imgs:
        #     '''匹配到的同一张图片会有两种尺寸,我们只要大图,所以把尺寸为521*391的匹配出来即可'''
        #     image = re.search(r'521_391', img)
        #     if image:
        #         img_list.append(img)
        # print(img_list)
        self.get_ticket()



    def get_ticket(self):
        '''
        获取景点门票信息
        :return:
        '''
        id = self.detail_url.split('/')[-1]
        # print(id)
        ticket_url = f'https://piao.ctrip.com/ticket/dest/{id}?onlyContent=true&onlyShelf=true'
        # print(ticket_url)
        ticket_res = requests.get(ticket_url, verify=False, headers=self.headers).text
        # time.sleep(1)
        ticket_res = ticket_res.replace('\n','').replace(' ','')
        ticket_res = ticket_res[ticket_res.find('window.__INITIAL_STATE__')+25:ticket_res.find('window.__APP_SETTINGS__')]
        info = json.loads(ticket_res)
        ticketinfos = info['detailInfo']['ressHash']
        slist = {}
        for ticketinfo in ticketinfos.values():
            '''解析字典数据'''
            title = ticketinfo['name']
            price = ticketinfo['price']
            type = ticketinfo['saleunitinfo']['propleproperty']
            fromw = '携程旅游 '+ticketinfo['brandname']
            '''数据合并'''
            slist.setdefault(type, [])
            slist[type].append(
                {'name': title, 'type': type, 'price': price, 'url': self.detail_url,
                 'buy': '', 'from': fromw, 'isReturnable': '',
                 'bookTime': '', 'outTime': '', 'useTime': '',
                 'discription': ''})
        self.spotsInfo[self.title] = slist


        # ticket_ret = etree.HTML(ticket_res)
        # ticket = ticket_ret.xpath('//table[@class="ticket-table"]//div[@class="ttd-fs-18"]/text()')
        # price = ticket_ret.xpath(
        #     '//table[@class="ticket-table"]//td[@class="td-price"]//strong[@class="ttd-fs-24"]/text()')
        # print(ticket)
        # print(price)
        # '''拿到的列表里可能存在不确定数量的空值,所以这里用while True把空值全部删除,这样才可以确保门票种类与价格正确对应上'''
        # while True:
        #     try:
        #         ticket.remove(' ')
        #     except:
        #         break
        # while True:
        #     try:
        #         price.remove(' ')
        #     except:
        #         break
        # '''
        #     这里多一个if判断是因为我发现有些详情页即便拿到门票信息并剔除掉空值之后仍然存在无法对应的问题,原因是网页规则有变动,
        #     所以一旦出现这种情况需要使用新的匹配规则,否则会数据会出错(不会报错,但信息对应会错误)
        # '''
        # if len(ticket) != len(price):
        #     ticket = ticket_ret.xpath(
        #         '//table[@class="ticket-table"]/tbody[@class="tkt-bg-gray"]//a[@class="ticket-title "]/text()')
        #     price = ticket_ret.xpath('//table[@class="ticket-table"]//strong[@class="ttd-fs-24"]/text()')
        #     while True:
        #         try:
        #             ticket.remove(' ')
        #         except:
        #             break
        #     while True:
        #         try:
        #             price.remove(' ')
        #         except:
        #             break
        #     print(ticket)
        #     print(price)
        # ticket_dict = dict(zip(ticket, price))
        # print(ticket_dict)



    def getHtml(self, url):
        '''
        获得网页的text内容
        :param url:
        :return:
        '''
        try:
            self.headers['User-Agent'] = mfu.UserAgent().random()
            self.headers['Cookie'] = '_abtest_userid=128990d6-ec49-40cb-b25d-fc8452c3d8a1; _ga=GA1.2.179469688.1614864484; MKT_CKID=1614864484805.yk39i.z4vz; _RSG=r2q6zDxpRN1sq9uB0iKSXA; _RGUID=15dbcfb3-7d1b-40b5-a85c-52c00be09d36; _RDG=287a9b7a6689de2a903820b27712075311; ibulanguage=CN; ibulocale=zh_cn; cookiePricesDisplayed=CNY; _gid=GA1.2.435881939.1618908478; Union=AllianceID=5376&SID=130860&OUID=&createtime=1618908478&Expires=1619513277722; Session=smartlinkcode=U130860&smartlinklanguage=zh&SmartLinkKeyWord=&SmartLinkQuary=&SmartLinkHost=; MKT_CKID_LMT=1618908477918; MKT_Pagesource=PC; GUID=09031023413294183609; __utma=1.179469688.1614864484.1618917016.1618917016.1; __utmc=1; __utmz=1.1618917016.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _RF1=223.99.16.76; _jzqco=%7C%7C%7C%7C1618908478117%7C1.94961373.1614864484796.1618917699648.1618917762688.1618917699648.1618917762688.undefined.0.0.7.7; __zpspc=9.3.1618917018.1618917762.3%233%7Cwww.so.com%7C%7C%7C%7C%23; _bfa=1.1614864482896.3146qi.1.1618908639843.1618917016691.3.15.10650038368; _bfs=1.5; _bfi=p1%3D10650034475%26p2%3D10650034475%26v1%3D15%26v2%3D14; appFloatCnt=4; U_TICKET_SELECTED_DISTRICT_CITY={%22value%22:{%22districtid%22:1%2C%22districtname%22:%22%E5%8C%97%E4%BA%AC%22%2C%22isoversea%22:false%2C%22stage%22:%22selectedCity%22}%2C%22updateDate%22:1618919559657%2C%22createTime%22:1618919434692}'
            #self.headers['Host'] = 'www.tuniu.com'
            self.headers['Referer'] = 'https://piao.ctrip.com/ticket'
            resp = requests.get(url, headers=self.headers)
            resp.raise_for_status()
            resp.encoding = 'utf-8'
            return resp.text
        except:
            return ""
コード例 #25
0
ファイル: buscaavancada.py プロジェクト: jesiqueira/selenium
from selenium.webdriver import Chrome
from selenium.webdriver.support.ui import Select
import time

chrome = Chrome()

url = 'https://statusinvest.com.br/acoes/busca-avancada'

chrome.get(url)

buscar = chrome.find_element_by_class_name('find')

buscar.click()

time.sleep(2)

# chrome.find_element_by_css_selector('dropdown-content.select-dropdown li::nth-of-type(3)').click()
select = chrome.find_element_by_class_name('select-dropdown').


select.send_keys('TODOS')
コード例 #26
0
class Question_Parser:
    def __init__(self):
        self.driver = Chrome()

        # self.driver.implicitly_wait(20)

        # Login
        self.login_option = False
        self.login_url = 'https://leetcode.com/accounts/login/'
        self.login_account_file = ""

        # Problem
        self.problem_url = sys.argv[1]
        if not self.problem_url:
            self.problem_url = input()

        self.driver.get(self.problem_url)

        # Stupid Leetcode CN
        time.sleep(3)
        self.driver.get(
            self.driver.current_url.replace("leetcode-cn", "leetcode"))
        while "leetcode-cn" in self.driver.current_url:
            time.sleep(2)

        # title is obtained by get_title function
        self.title = ""
        # title is obtained by get_question_description function
        self.problem = ""
        # title is obtained by get_starter_code function
        self.starter_code = ""

    def login(self):

        with open(login_account_file) as f:
            USERNAME = f.readline()
            PASSWORD = f.readline()

        if USERNAME and PASSWORD:
            self.driver.get(self.login_url)

            username_field = self.driver.find_elements_by_class_name(
                "input__2W4f")[0]
            password_field = self.driver.find_elements_by_class_name(
                "input__2W4f ")[1]
            username_field.send_keys(USERNAME)
            password_field.send_keys(PASSWORD)

            signin = self.driver.find_element_by_class_name(
                "btn-content__lOBM")
            flag = True

            while (flag):
                try:
                    time.sleep(0.5)
                    signin.click()
                    flag = False
                except WebDriverException:
                    time.sleep(0.5)
            time.sleep(1)
            if (self.driver.current_url != self.login_url):
                print('Login succeeded')
                return True
            else:
                print('Login failed - check username and password')
                return False
        print('Need to provide a username and password!')
        return False

    def get_title(self):

        success = False
        try_times = 0

        while not success:
            try_times += 1
            try:
                self.title = self.driver.find_element_by_class_name(
                    "css-v3d350").text
                success = True
            except WebDriverException:
                time.sleep(0.5)

            if try_times == 5:
                self.driver.get(self.problem_url)

                # Stupid Leetcode CN
                time.sleep(3)
                self.driver.get(
                    self.driver.current_url.replace("leetcode-cn", "leetcode"))
                while "leetcode-cn" in self.driver.current_url:
                    time.sleep(2)
            elif try_times > 10:
                print("Parsing title fail")
                return False

        return success

    def get_question_description(self):

        success = False
        try_times = 0
        doc_mark = '"""'
        new_line_mark = "\n"

        while not success:
            try_times += 1
            try:
                content = self.driver.find_element_by_class_name(
                    "question-content__JfgR").text
                success = True
            except WebDriverException:
                time.sleep(0.5)

            if try_times > 10:
                print("Parsing question content fail")
                return False

        self.problem = doc_mark + new_line_mark + content + new_line_mark + doc_mark + new_line_mark
        return success

    def get_starter_code(self):

        success = False
        try_times = 0
        doc_mark = '"""'
        new_line_mark = "\n"

        while not success:
            try_times += 1
            try:
                # Obtain starter code
                language_dropdown = '//*[@id="app"]/div/div[3]/div/div/div[3]/div/div[1]/div/div[1]/div[1]/div'
                time.sleep(1)
                python3_option = '/html/body/div[7]/div/div/div/ul/li[4]'
                self.driver.find_element_by_xpath(language_dropdown).click()
                self.driver.find_element_by_xpath(python3_option).click()
                time.sleep(1)
                success = True
            except WebDriverException:
                time.sleep(0.5)

            if try_times > 10:
                print("Parsing starter code  fail")
                return False

        self.starter_code = ""
        for line in self.driver.find_elements_by_class_name("CodeMirror-line"):
            self.starter_code += line.text + "\n"

        return success

    def create_file(self):
        try:
            if self.get_title():
                f_name = self.title
                f_name = f_name.replace('.', '')
                f_name = f_name.replace(' ', '_')
                f_name += ".py"
                f = open(f_name, "w")

            if self.get_question_description() and self.get_starter_code():
                f.write(self.problem + "\n" + self.starter_code)

            print(f_name, " Created")
        except IOError as e:
            print(e)
        finally:
            f.close()
            self.driver.close()

    def run(self):
        self.create_file()
コード例 #27
0
def start_selenium_for_fssp(last_name, first_name, middle_name, birthday):
    browser = Chrome(path_webdriver)
    # URL сайта с которого будет брать информацию
    browser.get('https://fssprus.ru')
    # У меня сразу открывлась окно, которое закрывала основной сайт, это окно я закрываю
    browser.find_element_by_class_name('tingle-modal__close').click()
    # нажимаю кнопку расширенный поиск, чтобы появились поля для ввода данных
    browser.find_element_by_class_name('main-form__toggle-open').click()
    # ввожу данные
    browser.find_element_by_name('is[last_name]').send_keys(last_name)
    browser.find_element_by_name('is[first_name]').send_keys(first_name)
    browser.find_element_by_name('is[patronymic]').send_keys(middle_name)
    browser.find_element_by_name('is[date]').send_keys(birthday)
    # Нажимаю поиск
    browser.find_element_by_class_name(
        'main-form__btns').find_element_by_class_name('btn-primary').click()
    # Делаю паузу, чтобы сайт успел прогрузиться
    time.sleep(3)

    # функция отправки изображения капчи стороннему сервис для распознования и ввод полученных данных
    def capcha():
        time.sleep(3)
        # находим изображение капчи
        x = browser.find_element_by_id('capchaVisual')
        # получаем кодировку base64 изображения
        image_code_base64 = x.get_attribute('src')
        image_code_base64 = image_code_base64.replace(
            'data:image/jpeg;base64,', '')
        # декодируем изображение
        dec = base64.b64decode(image_code_base64)
        # Сохраняем изображение
        filename = 'some_image.jpg'
        with open(filename, 'wb') as f:
            f.write(dec)

        # создем цикл, который будет отправялть запрос, до тех пор пока не получим подтвержения получения изображения
        get_task_id = False
        while not get_task_id:

            r = requests.post('https://rucaptcha.com/in.php',
                              data={
                                  'key': API_KEY,
                                  'method': 'post',
                                  'lang': 'ru',
                                  'json': 1
                              },
                              files={'file': open(filename, 'rb')})
            # Если получили подтвержение, то выходим из цикла
            if r.json()['status'] == 1:
                get_task_id = True
            # Если сервис перегружен, ждем 5 сек и пробуем заново
            else:
                time.sleep(5)

        capcha_ready = False
        # создаем цикл, который будет получать ответ
        while not capcha_ready:
            r1 = requests.get(
                'https://rucaptcha.com/res.php?key={}&action={}&id={}&json=1'.
                format(API_KEY, 'get',
                       r.json()['request']))
            # Если капча еще не разгадана, ждем 3 сек и пробуем еще раз
            if r1.json()['request'] == 'CAPCHA_NOT_READY':
                time.sleep(3)
            # если разгадана, то выходим из цикла
            else:
                capcha_ready = True

        # получаем текст капчи
        text_for_capcha = r1.json()['request']
        # вводим текст
        browser.find_element_by_id("captcha-popup-code").send_keys(
            text_for_capcha)
        # нажимаем отправить капчу
        browser.find_element_by_id("ncapcha-submit").click()

        time.sleep(3)

    capcha_passed = False
    # создаю цикл, который запускает функцию отправки капчи и ввод ответа
    while not capcha_passed:
        # Если капча по какой то причине не пройдена, то функция запускается заново
        try:
            if browser.find_element_by_class_name('popup-wrapper'):
                capcha()
        # Если капча успешно пройдена, цикл заканчивается
        except:
            capcha_passed = True

    # Я создал список с заголовками исполнительных производств
    # и в этот же список буду добавять найденые исполнительные производства
    list_all_enforcements_proceeding = [[
        'Должник (физ. лицо: ФИО, дата и место рождения; юр. лицо: наименование, юр. адрес)',
        'Исполнительное производство (номер, дата возбуждения)',
        'Реквизиты исполнительного документа (вид, дата принятия органом, номер, наименование органа, '
        'выдавшего исполнительный документ)',
        'Дата, причина окончания или прекращения ИП (статья, часть, пункт основания)',
        'Сервис', 'Предмет исполнения, сумма непогашенной задолженности',
        'Отдел судебных приставов (наименование, адрес)',
        'Судебный пристав-исполнитель'
    ]]

    # Если исполнительных производств нет то завершаем проверку данного физ.лица
    try:
        browser.find_element_by_class_name('b-search-message__text').text
        # закрываем браузер
        browser.close()
        return True
    # Если исполнительные производства есть, то запишеи их в список и вернем его
    except:
        # Обработка таблицы и получение строк с исполнительными производствами
        table = browser.find_element_by_class_name('iss')
        all_rows = table.find_elements_by_tag_name('tr')
        for row in all_rows:
            list_enforcement_proceeding = []
            for block in row.find_elements_by_tag_name('td'):
                list_enforcement_proceeding.append(block.text)
            list_all_enforcements_proceeding.append(
                list_enforcement_proceeding)
        browser.close()
        return list_all_enforcements_proceeding
コード例 #28
0
login_form.send_keys(login)

print('Ввожу пароль...')
pass_form = browser.find_element_by_id('index_pass')
pass_form.send_keys(password)

print('Вхожу...')
browser.find_element_by_id('index_login_button').click()

sleep(5)
print('Захожу в сообщения...')
browser.find_element_by_id('l_msg').click()

sleep(5)
print('Выбираю диалог с IQ Bot...')
browser.find_element_by_class_name('_im_dialog_-181604561').click()

now = 0
while now < makeCount:
	sleep(5)
	print('Нажимаю кнопку Начать...')
	browser.find_element_by_class_name('Button--positive').click()

	sleep(5)
	print('Нажимаю кнопку С ботом...')
	browser.find_element_by_class_name('Button--secondary').click()

	sleep(5)
	print('Перехожу по ссылке VK Coin...')
	links = browser.find_elements_by_partial_link_text("vk.com/coin")
	linkscount = len(links)
コード例 #29
0
ファイル: main.py プロジェクト: alexey-kott/cargox_parser
def get_last_page_number(driver: Chrome) -> int:
    navi_panel = driver.find_element_by_class_name('ul_navi')
    last_page_link = navi_panel.find_elements_by_tag_name('a')[-2]

    return int(last_page_link.text)
コード例 #30
0
ファイル: facebook.py プロジェクト: puffyliu/iii
from selenium.webdriver import Chrome
import time
from 練習 import secret

driver = Chrome("./chromedriver")  # 加./是為了讓程式知道driver是在專案底下,沒加的話就會去找環境變數

driver.get("https://www.facebook.com")

# BeautifulSoup: find, find_all
# selenium: find_element, find_elements
driver.find_element_by_id("email").send_keys(secret.username)
driver.find_element_by_id("pass").send_keys(secret.password)
driver.find_element_by_id("loginbutton").click()

# 有些會需要輸入驗證碼
# s = input("請輸入安全碼")
# driver.find_element_by_id("approvals_code").send_keys(s)
# driver.find_element_by_id("checkpointSubmitButton").click()
#
# time.sleep(1)
# driver.find_element_by_id("checkpointSubmitButton").click()

time.sleep(5)
post = driver.find_element_by_class_name("userContent")
# 紙條:
print(post.text)

time.sleep(3)
driver.close()
コード例 #31
0
import datetime
from bs4 import BeautifulSoup

driver = Chrome()
driver.get('https://web.whatsapp.com/')

#Code to get the list of the contacts in the group :)

driver.find_element_by_xpath('//*[@title="Leaf SongDiscoveryHindi 3"]').click()

sourcer=(driver.page_source).encode('utf-8')
soup=BeautifulSoup(sourcer,features='html.parser')

#CODE FOR GETTING ALL THE CONTACTS IN THE GROUP

p=driver.find_element_by_class_name('_2y17h')
q=p.find_element_by_xpath('div[2]/div[2]/span')

l=q.text
l=l.split(',')
for i in range(len(l)):
    l[i]=l[i].replace(" ","")
    l[i]=l[i][3:]
    
popper=[]
for k,v in names.items():
    if k not in l:
        popper.append(k)
    else:
        pass
for item in popper:
コード例 #32
0
ファイル: main.py プロジェクト: gsugar87/autoVenmo
browser = Chrome(CHROME_DRIVER_PATH)
browser.get(VENMO_URL)

if os.path.isfile('cookies.pkl'):
    # there is a cookie file

    cookies = pickle.load(open("cookies.pkl", "rb"))
    for cookie in cookies:
        browser.add_cookie(cookie)

    # click on the sign in link
    signin_link = browser.find_element_by_link_text("Sign in")
    signin_link.click()

    # enter the email and password and send it
    username_box = browser.find_element_by_class_name("email-username-phone")
    username_box.send_keys(venmoInfo.my_u)
    password_box = browser.find_element_by_class_name("password")
    password_box.send_keys(venmoInfo.my_p)
    send_button = browser.find_element_by_class_name("login")
    send_button.click()

    # enter the person's name you want to pay
    time.sleep(5)
    name_box = browser.find_element_by_class_name("onebox_prefill")
    name_box.click()
    name_text_box = browser.find_element_by_class_name("paddingUnifier")
    name_text_box.send_keys(venmoInfo.payee_name)
    name_text_box.send_keys(Keys.ENTER)
    payment_box = browser.find_element_by_class_name("mainTextBox")
    time.sleep(1)
コード例 #33
0
    'tutsplus/%s' % sys.argv[3]
)

if not os.path.exists(download_path):
    os.makedirs(download_path)

username = sys.argv[1]
password = getpass()

browser = Chrome()
try:
    browser.maximize_window()

    browser.get('http://tutsplus.com/sign_in')

    login_form = browser.find_element_by_class_name('sign-in')

    login_form.find_element_by_name('session[login]').send_keys(username)
    login_form.find_element_by_name('session[password]').send_keys(password)

    login_form.find_element_by_tag_name('button').submit()

    if browser.current_url == 'https://tutsplus.com/sessions':
        browser.quit()
        print 'Incorrect email_address or password.'
        sys.exit()

    def video_links(page_link):
        while True:
            browser.get(page_link)