without running the whole bot. """ from selenium.webdriver import Chrome, ChromeOptions from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.by import By from selenium.common.exceptions import StaleElementReferenceException from os import path, makedirs from time import sleep opt = ChromeOptions() opt.add_experimental_option('w3c', False) driver = Chrome(chrome_options=opt) driver.get('https://www.apple.com') sleep(2) print("and:") print(driver.get_log('browser')) username, password = open('ig.credentials.txt').read().split(',') user = {'username': username, 'password': password} def open_instagram_and_login(): """ Opens instagram.com in Chrome and logs you in using given credentials """ login_route = "https://www.instagram.com/accounts/login/?source=auth_switcher" # Open Instragram driver.get(login_route)
import json import pandas as pd from selenium.webdriver.common.keys import Keys import time from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import NoSuchElementException, ElementNotVisibleException import selenium.common.exceptions as exception from selenium.common.exceptions import TimeoutException webdriver = 'C:\\Users\karim\projects\selenium\chromedriver.exe' driver = Chrome(webdriver) driver.implicitly_wait(2) column_names = ['Entity_Name', 'Website'] df = pd.read_excel('test.xlsx', names = column_names) names = df.Website.to_list() url = 'https://google.com' driver.maximize_window() driver.get(url) count = 0 Crunch_links_list = [] names_list = []
string_id = property_address + status_text + phone_number m = hashlib.md5() m.update(string_id.encode('utf8')) identifier = m.hexdigest() print("hash-------------------->", identifier) create_time = str(datetime.datetime.now()) update_time = "" insertdb = InsertDB() data_base.append((property_address, street_add, city, state, zipcode, status_text, phone_number, identifier, create_time, update_time)) insertdb.insert_document(data_base, table_name) # return if __name__ == "__main__": print("-----------------start---------------") path = "driver\\chromedriver.exe" driver = Chrome(executable_path=path) driver.get("https://www.zillow.com/") time.sleep(2) driver.maximize_window() main(driver.page_source, driver)
def setUp(self) -> None: super().setUp() self.driver = Chrome(r'C:\Users\Yuri\Desktop\chromedriver.exe')
insertdb.insert_document(data_base, table_name) except: print("Continue") if __name__ == "__main__": # url = "https://directory.goodonyou.eco/categories/activewear" # url = "https://directory.goodonyou.eco/categories/tops" # url = "https://directory.goodonyou.eco/categories/denim" # url = "https://directory.goodonyou.eco/categories/dresses" # url = "https://directory.goodonyou.eco/categories/knitwear" # url = "https://directory.goodonyou.eco/categories/suits" # url = "https://directory.goodonyou.eco/categories/basics" # url = "https://directory.goodonyou.eco/categories/swimwear" # url = "https://directory.goodonyou.eco/categories/maternity" # url = "https://directory.goodonyou.eco/categories/plus-size" url = "https://directory.goodonyou.eco/categories/bags" options = Options() options.binary_location = "C:\Program Files\Google\Chrome\Application\chrome.exe" path = "driver\\chromedriver.exe" driver = Chrome(executable_path=path, chrome_options=options) driver1 = Chrome(executable_path=path, chrome_options=options) time.sleep(2) driver.maximize_window() driver1.maximize_window() driver.get(url) scraping(driver.page_source, driver, driver1)
def tear_down(web_driver, runner): # Close the browser print("Closing the browser.") web_driver.close() print("Running UFG and creating Batch in Dashboard") # we pass false to this method to suppress the exception that is thrown if we # find visual differences all_test_results = runner.get_all_test_results(False) print(all_test_results) # Create a new chrome web driver web_driver = Chrome(ChromeDriverManager().install()) # Create a runner with concurrency of 1 runner = VisualGridRunner(concurrency) # Create Eyes object with the runner, meaning it'll be a Visual Grid eyes. eyes = Eyes(runner) set_up(eyes) try: # ⭐️ Note to see visual bugs, run the test using the above URL for the 1st run. # but then change the above URL to https://demo.applitools.com/index_v2.html # (for the 2nd run) ultra_fast_test(web_driver, eyes) finally:
def switch_to_left_frame(d): d.switch_to.frame(d.find_element_by_xpath(LEFT_FRAME_XPATH)) def switch_to_right_frame(d): d.switch_to.frame(d.find_element_by_xpath(RIGHT_FRAME_XPATH)) def create_file_name(course_, year_, semester_): return './failed_attempts/' + course_ + 'year' + str( year_) + 'semester' + str(semester_) + '.png' driver = Chrome() driver.maximize_window() driver.get(SITE_URL) driver.switch_to.frame(driver.find_element_by_xpath(RIGHT_FRAME_XPATH)) roll_no = input("Enter your roll no.: ") username_element = driver.find_element_by_xpath(USER_NAME_ELEMENT_XPATH) username_element.send_keys(roll_no) password_element = driver.find_element_by_xpath(PASSWORD_ELEMENT_XPATH) password_element.send_keys(getpass(), Keys.ENTER) switch_to_left_frame(driver) driver.find_element_by_xpath(ACADEMIC_ELEMENT_XPATH).click() driver.find_element_by_xpath(ALL_ABOUT_COURSES_ELEMENT_XPATH).click() driver.find_element_by_xpath(GRADING_STASTISTICS_ELEMENT_XPATH).click()
def browser_chrome(context): context.browser = Chrome() yield context.browser context.browser.quit()
# -*- coding: utf-8 -*- __author__ = 'suruomo' __date__ = '2020/6/29 13:44' from selenium.webdriver import Chrome, ChromeOptions import json # 获取陕西政务平台审批项目数据 # 动态网页,使用browser.page_source获取渲染后的网站源码 # 翻页在js中,需要selenium操作浏览器获取 base_url = "http://tzxm.shaanxi.gov.cn/tzxmweb/pages/home/approvalResult/queryPublicResultNew.jsp" option = ChromeOptions() option.add_argument("--headless") # 隐藏游览器 option.add_argument("--no--sandbox") browser = Chrome(options=option, executable_path="D:\\Notebook\\chromedriver.exe") data = [] # 存储数据 browser.get(base_url) html = browser.page_source # 翻页爬取数据 for page in range(1,11): print("第"+str(page)+"页爬虫\n") # 翻页后需要重新定位元素 content = browser.find_elements_by_xpath('//*[@id="tablist"]/tr') # 从list[2]数据开始爬取,每隔两个元素获取数据,直到全部结束 for c in content[2::3]: name = c.find_element_by_xpath("./td[1]").text department = c.find_element_by_xpath("./td[2]").text status = c.find_element_by_xpath("./td[3]").text date = c.find_element_by_xpath("./td[last()]").text
def get_jobs(start, end, webdriver_location, location='Pune', query=''): ''' The function is used to scrape out the data from Indeed.com site. start: The starting page of search to retrieve data from. end: The ending page of search to retrieve data from. location: which particular place, city or country you want to retrive data of. query: This is a static method and will return the dataframe which is processed during the training. ''' warnings.filterwarnings('ignore') # spark = SparkSession.builder.config('spark.mongodb.input.uri', spark_mongo_server_connection_string).config('spark.mongodb.input.uri', spark_mongo_server_connection_string).appName('MongoDBIntegration').getOrCreate() df = pd.DataFrame(columns=[ 'Title', 'Location', 'Company', 'Salary', 'Sponsored', 'Description', 'Time' ]) opts = ChromeOptions() opts.headless = True driver = Chrome(webdriver_location, options=opts) # driver.maximize_window() title = '' loc = '' company = '' salary = '' sponsored = '' time = '' job_desc = '' for i in range(start, end): try: driver.get('https://www.indeed.co.in/jobs?q=' + query + '&l=' + location + '&start=' + str(i)) for job in driver.find_elements_by_class_name('result'): soup = BeautifulSoup(job.get_attribute('innerHTML'), 'html.parser') try: title = soup.find('a', class_='jobtitle').text.replace( '\n', '').strip() except: title = '' try: loc = soup.find(class_='location').text except: loc = '' try: company = soup.find(class_='company').text.replace( '\n', '').strip() except: company = '' try: salary = soup.find(class_='salary').text.replace( '\n', '').strip() except: salary = '' try: sponsored = soup.find(class_='sponsoredGray').text sponsored = 'Sponsored' except: sponsored = 'Organic' sum_div = job.find_element_by_xpath('./div[3]') try: sum_div.click() except: close_button = driver.find_elements_by_class_name( 'popover-x-button-close')[0] close_button.click() sum_div.click() try: _time = soup.find(class_='date').text if _time == 'Just posted' or _time == 'Today': time = str(date.today()) elif _time == '1 day ago': time = str(date.today() - timedelta(days=1)) elif _time == '2 days ago': time = str(date.today() - timedelta(days=2)) elif _time == '3 days ago': time = str(date.today() - timedelta(days=3)) elif _time == '4 days ago': time = str(date.today() - timedelta(days=4)) elif _time == '5 days ago': time = str(date.today() - timedelta(days=5)) elif _time == '6 days ago': time = str(date.today() - timedelta(days=6)) elif _time == '7 days ago': time = str(date.today() - timedelta(days=7)) elif _time == '8 days ago': time = str(date.today() - timedelta(days=8)) elif _time == '9 days ago': time = str(date.today() - timedelta(days=9)) elif _time == '10 days ago': time = str(date.today() - timedelta(days=10)) elif _time == '11 days ago': time = str(date.today() - timedelta(days=11)) elif _time == '12 days ago': time = str(date.today() - timedelta(days=12)) elif _time == '13 days ago': time = str(date.today() - timedelta(days=13)) elif _time == '14 days ago': time = str(date.today() - timedelta(days=14)) elif _time == '15 days ago': time = str(date.today() - timedelta(days=15)) elif _time == '16 days ago': time = str(date.today() - timedelta(days=16)) elif _time == '17 days ago': time = str(date.today() - timedelta(days=17)) elif _time == '18 days ago': time = str(date.today() - timedelta(days=18)) elif _time == '19 days ago': time = str(date.today() - timedelta(days=19)) elif _time == '20 days ago': time = str(date.today() - timedelta(days=20)) elif _time == '21 days ago': time = str(date.today() - timedelta(days=21)) elif _time == '22 days ago': time = str(date.today() - timedelta(days=22)) elif _time == '23 days ago': time = str(date.today() - timedelta(days=23)) elif _time == '24 days ago': time = str(date.today() - timedelta(days=24)) elif _time == '25 days ago': time = str(date.today() - timedelta(days=25)) elif _time == '26 days ago': time = str(date.today() - timedelta(days=26)) elif _time == '27 days ago': time = str(date.today() - timedelta(days=27)) elif _time == '28 days ago': time = str(date.today() - timedelta(days=28)) elif _time == '29 days ago': time = str(date.today() - timedelta(days=29)) elif _time == '30 days ago': time = str(date.today() - timedelta(days=30)) else: time = str(date.today() - timedelta(days=randint(31, 181))) except: time = str(date.today() - timedelta(days=randint(31, 181))) driver.implicitly_wait(10) try: job_desc = driver.find_element_by_id('vjs-desc').text except: job_desc = None df = df.append( { 'Title': title, 'Location': loc, 'Company': company, 'Salary': salary, 'Sponsored': sponsored, 'Description': job_desc, 'Time': time }, ignore_index=True) # data = Row(dict(Title=str(title), Location=str(loc), Company=str(company), Salary=str(salary), Sponsored=str(sponsored), Description=str(job_desc), Time=str(time))) # spark.createDataFrame(data).write.format('com.mongodb.spark.sql.DefaultSource').option('uri', spark_mongo_server_connection_string).mode('append').save() except Exception as e: print(e) finally: try: if not os.path.isdir(download_directory): os.mkdir(download_directory) n = download_directory + str( hashlib.md5(str(datetime.now()).encode()).hexdigest(). encode()).replace("b'", '').replace("'", '') + '.xlsx' df.to_excel(n, index=False) except Exception as e: print(e) driver.close()
import cssselect import pickle from bs4 import BeautifulSoup as bsoup from lxml import html from selenium.webdriver import Chrome, ActionChains from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.keys import Keys chrome_options = Options() chrome_options.add_argument('log-level=3') chrome_options.add_argument("--headless") chrome_options.add_argument("--disable-gpu") webdriver = 'chromedriver_win32/chromedriver.exe' driver = Chrome(webdriver, options=chrome_options) meta_df = pd.read_csv('covid-kernels.csv') start = 0 notebooks = {'name': [], 'views': [], 'char_length': [], 'packages': []} if os.path.isfile("notebooks.p"): notebooks = pickle.load(open("notebooks.p", "rb")) start = meta_df.index[meta_df.notebook_name == notebooks['name'] [-1]][0] + 1 for i in range(start, len(meta_df['notebook_link'])): notebooks['name'].append(meta_df['notebook_name'][i]) link = meta_df['notebook_link'][i] print(f'{i}/{len(meta_df)}') print(link)
read_excel = objconfig.get("Basics", "read_excel") ## Writing data to excel file objwbook = openpyxl.Workbook() wsheet = objwbook.create_sheet("FirstProject") ##Reading data from excel sheet objwbook2 = openpyxl.load_workbook(read_excel) wsheet2 = objwbook2["data"] name = wsheet2.cell(1, 1).value email = wsheet2.cell(1, 2).value password = wsheet2.cell(1, 3).value try: ## created an object "driver" for chrome class driver = Chrome(executable_path=browser_exe_path) driver.get(url) driver.maximize_window() #print(driver.title) # #print(driver.current_url) # #print(driver.page_source) # ## fetching text from ui and asserting its value. text = driver.find_element_by_xpath("//label[@for='tab1']").text driver.find_element_by_xpath( "//form[@name='register']/input[2]").send_keys(name) driver.find_element_by_xpath( "//form[@name='register']/input[3]").send_keys(email) driver.find_element_by_xpath( "//input[@name='fld_email']/following-sibling::input[1]").send_keys( password) #time.sleep(60)
from selenium.webdriver.common.action_chains import ActionChains import utils with open('configs.json', "r") as in_file: configs = json.load(in_file) print(json.dumps(configs, indent=4, sort_keys=True)) # url = "https://www.vebongdaonline.vn" url = "https://www.vebongdaonline.vn/bookTicket" # url = "file:///home/xuananh/2-chon-thong-tin-ve-khong-dang-nhap/HE%CC%A3%CC%82%20THO%CC%82%CC%81NG%20%C4%90A%CC%A3%CC%86T%20VE%CC%81%20ONLINE.html" # url = "file:///home/xuananh/2-chon-thong-tin-ve-khong-dang-nhap(chinh-sua)/HE%CC%A3%CC%82%20THO%CC%82%CC%81NG%20%C4%90A%CC%A3%CC%86T%20VE%CC%81%20ONLINE.html" # url = "file:///home/xuananh/2-chon-thong-tin-ve-he-thong-dang-ban/H%E1%BB%86%20TH%E1%BB%90NG%20%C4%90%E1%BA%B6T%20V%C3%89%20ONLINE.html" chrome_option = ChromeOptions() driver = Chrome(executable_path=configs['duong_dan_den_file_chrome'], options=chrome_option) # firefox_option = FirefoxOptions() # driver = Firefox(executable_path="/home/xuananh/data/Downloads/geckodriver-v0.23.0-linux64/geckodriver") driver.maximize_window() driver.implicitly_wait(1) driver.get(url) # driver.find_element_by_xpath('//input[@class="btn_muave"]').click() thong_bao_ban_xu_ly = "dang ban" while thong_bao_ban_xu_ly: try: # thong_bao_ban_xu_ly = driver.find_element_by_link_text("Hệ thống đang bận xử lý. Quý khách vui lòng quay trở lại sau ít phút. Rất xin lỗi Quý khách về sự bất tiện này.") thong_bao_ban_xu_ly = driver.find_element_by_xpath(
# @Site : # @File : wevdirverUsage.py # @Software: PyCharm from selenium.webdriver import Chrome from selenium.webdriver.common.keys import Keys import time import requests def board_crawl(url, web): pass web = Chrome() headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36"} test_url_board = "https://huaban.com/pins/3899008225/" init_url = "https://huaban.com/discovery/beauty/" web.get(test_url_board) time.sleep(1) # xpath can only help us get html tag but can not get attribute pic_li = web.find_element_by_xpath("//*[@id='baidu_image_holder']/img")
from selenium.webdriver import Chrome from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support import expected_conditions as ec from selenium.webdriver.remote import webelement from selenium.webdriver.common.action_chains import ActionChains from urllib.request import urlopen from bs4 import BeautifulSoup import os import io import time diver = Chrome("F:/asir/asi/chromedriver.exe") diver.get("http://www.shuquge.com/txt/108555/index.html") mids = diver.find_elements_by_xpath("/html/body/div[5]/dl/dd") book = diver.find_element_by_xpath("/html/body/div[4]/div[2]/h2") x = len(mids) m = str(book.text) + "totally have " + str(x) + "章" print(m) #获取封面 of 书 cover = diver.find_element_by_xpath("/html/body/div[4]/div[2]/div[1]/img") link = cover.get_attribute("src") filed = open("x/readme.txt", "w+") filed.write(m + '\n') filed.write('封面为' + str(link)) for i in range(1, 100): sc = "/html/body/div[5]/dl/dd" + '[' + str(i) + ']' mid = diver.find_element_by_xpath(sc) name = 'x/' + mid.text + '.txt' file = open(name, "w+")
from selenium.webdriver import Chrome from selenium.webdriver.common.by import By import time import json import csv from selenium.webdriver.chrome.options import Options options = Options() options.headless = True browser = Chrome("./chromedriver", chrome_options=options) amigo_url = "https://amigo.amityonline.com/login/index.php" username = '******' password = "******" sleep_time = 0.01 data_links = [] heads = ["Title", "Link"] try: browser.get(amigo_url) browser.maximize_window() element = browser.find_element(By.ID, "username") # time.sleep(sleep_time) element.send_keys(username) element = browser.find_element(By.ID, "password") element.send_keys(password) # time.sleep(sleep_time)
""" Taking a screenshot of the window (even if it's running in headless mode. """ from selenium.webdriver import Chrome browser = Chrome() browser.get('https://duckduckgo.com') # Taking the screenshot and saving the file in path/file/name browser.get_screenshot_as_file('myscreenshot.png') # Equivalent to browser.save_screenshot('myscreenshot.png') browser.quit()
from selenium.webdriver import Chrome driver=Chrome('D:\chromedriver.exe') driver.maximize_window() driver.get('https://www.google.com/') element = driver.find_element_by_link_text('Gmail') element.click()
def init_browser(): opts = Options() opts.add_argument("user-data-dir=selenium") return Chrome(chrome_options=opts)
} object = sampledb.logic.objects.create_object( action_id=instrument_action.id, data=data, user_id=user.id, previous_object_id=None, schema=schema) os.makedirs('docs/static/img/generated', exist_ok=True) options = Options() # disable Chrome sandbox for root in GitLab CI if 'CI' in os.environ and getpass.getuser() == 'root': options.add_argument('--no-sandbox') with contextlib.contextmanager( tests.test_utils.create_flask_server)(app) as flask_server: with contextlib.closing(Chrome(options=options)) as driver: time.sleep(5) object_permissions(flask_server.base_url, driver) default_permissions(flask_server.base_url, driver) guest_invitation(flask_server.base_url, driver) action(flask_server.base_url, driver, instrument_action) hazards_input(flask_server.base_url, driver, instrument_action) tags_input(flask_server.base_url, driver, object) comments(flask_server.base_url, driver, object) activity_log(flask_server.base_url, driver, object) files(flask_server.base_url, driver, object) file_information(flask_server.base_url, driver, object) labels(flask_server.base_url, driver, object) advanced_search_by_property(flask_server.base_url, driver, object) advanced_search_visualization(flask_server.base_url, driver)
from selenium.webdriver import Chrome driver = Chrome("chromedriver.exe") pages = 2 for page in range(1,pages): url = "http://quotes.toscrape.com/js/page/" + str(page) + "/" driver.get(url) items = len(driver.find_elements_by_class_name("quote")) print (items)
from selenium.webdriver import Chrome driver = Chrome("D:/chromedriver_win32/chromedriver.exe") driver.get("https://www.facebook.com") e = driver.find_element_by_name("firstname") epos = e.location f = driver.find_element_by_name("reg_email__") fpos = f.location g = driver.find_element_by_name("reg_passwd__") gpos = g.location if epos["x"] == fpos["x"] or epos["x"] == gpos["x"]: print("vertical") else: print("not vertical")
from selenium import webdriver from selenium.webdriver import Chrome from selenium.webdriver.common.keys import Keys import time url = "https://www3.hilton.com/en/index.html" chromedriver = '/Users/mclaren/Downloads/CodingFor/chromedriver' driver = Chrome(chromedriver) driver.get(url) form = driver.find_element_by_id("hotelSearchOneBox") print(form) checkin = driver.find_element_by_id("checkin") checkout = driver.find_element_by_id("checkout") checkin.clear() checkout.clear() checkin.send_keys("12 Mar 2019") checkout.send_keys("20 Mar 2019") checkin.send_keys(Keys.ENTER) checkout.send_keys(Keys.ENTER) form.send_keys("Miami") form.send_keys(Keys.ENTER) form.submit() # # Wait for 1 second # time.sleep(1) hotels = driver.find_elements_by_class_name('hotelDescription') hotels = [[hotel.text] for hotel in hotels] print(hotels) # hotels = hotels.find_elements_by_tag_name("span") prices = driver.find_elements_by_class_name('statusPrice')
def parse(url, lang=RU): opt = Options() opt.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/80.0.3987.116 Safari/537.36') opt.add_argument("--disable-notifications") pref = {"profile.managed_default_content_settings.images": 2} opt.add_experimental_option('prefs', pref) browser = Chrome(executable_path='chromedriver.exe', options=opt) browser.maximize_window() def bad(driver): bd = driver.find_elements_by_css_selector('div.ytd-mealbar-promo-renderer') if len(bd) > 0 and bd[0].is_displayed(): bd[0].find_element_by_tag_name('a').send_keys(Keys.ESCAPE) browser.get(url) browser.implicitly_wait(10) browser.find_element_by_css_selector('paper-toggle-button#toggle').click() print('...Autoplay off...') output = browser.title.split()[0] + '.txt' with open(output, 'w', encoding='utf8') as f: title = browser.find_element_by_css_selector('h1.title').text.strip() views = browser.find_element_by_css_selector('span.yt-view-count-renderer').text.strip() views = int(''.join([i for i in views if i != ' ' and not i.isalpha()])) date_p = browser.find_element_by_css_selector('div#date').text.strip() date_p = date_p.replace('•', '') tmp = browser.find_element_by_css_selector('div#top-level-buttons').text.strip().split('\n') likes = num_to_str(tmp[0], lang=RU) dis = num_to_str(tmp[1], lang=RU) chanel = browser.find_element_by_css_selector('div#text-container').text.strip() subscribers = browser.find_element_by_css_selector('yt-formatted-string#owner-sub-count').text.strip().split() try: subscribers = subscr(subscribers, lang=RU) except Exception: subscribers = '-' desc_presence = browser.find_elements_by_css_selector('yt-formatted-string.more-button') if len(desc_presence) > 0: desc_presence[0].click() desc = browser.find_element_by_css_selector('yt-formatted-string.content').text.strip().replace('\n', '') print(lang[2], chanel, file=f) print(lang[3], subscribers, file=f) print(lang[4], title, file=f) print(lang[5], url, file=f) print(lang[6], date_p, file=f) print(lang[7], views, file=f) print(lang[8], likes, file=f) print(lang[9], dis, file=f) print(lang[10], desc, file=f) print(lang[11], file=f) divs1, divs2 = 0, [] browser.find_element_by_tag_name('body').send_keys(Keys.PAGE_DOWN) browser.find_element_by_tag_name('body').send_keys(Keys.PAGE_DOWN) cnt = 0 for i in range(1, 777777777777777777): browser.find_element_by_tag_name('body').send_keys(Keys.PAGE_DOWN) sleep(1) if i == 10: bad(driver=browser) browser.find_element_by_tag_name('body').send_keys(Keys.SPACE) print('....Video paused....') divs2 = browser.find_elements_by_css_selector('ytd-comment-thread-renderer.style-scope') if len(divs2) == divs1: cnt += 1 if cnt == 5: break else: cnt = 0 divs1 = len(divs2) bad(driver=browser) for k in divs2: print('....... Comment ', divs2.index(k) + 1, '/', len(divs2), '..........') author = k.find_element_by_css_selector('a#author-text').text.strip() if len(author) == 0: author = k.find_element_by_css_selector('yt-formatted-string#text').text.strip() when = k.find_element_by_css_selector('yt-formatted-string.published-time-text').text.strip() text = k.find_element_by_css_selector('yt-formatted-string#content-text').text.strip().replace('\n', '') with open(output, 'a', encoding='utf8') as f: print(author, ' (', when, ')', file=f, sep='') print(' ', text, file=f, sep='') repl_p = k.find_elements_by_css_selector('div#replies') if repl_p[0].get_attribute('hidden') != 'true': q = repl_p[0].find_elements_by_css_selector('a.yt-simple-endpoint')[0] bad(driver=browser) q.send_keys(Keys.ARROW_UP) sleep(1.5) if q.is_displayed(): q.click() sleep(1) while True: nex = k.find_elements_by_css_selector('yt-formatted-string.yt-next-continuation') nex = [i for i in nex if i.text.strip() == lang[12]] txt = [i.text.strip() for i in nex] if lang[12] not in txt: break for _ in nex: t = 0 while t < 5: try: if _.is_displayed(): browser.execute_script("arguments[0].scrollIntoView(false);", _) _.click() sleep(1) t = 0 except Exception: t += 1 replies = k.find_element_by_css_selector('div#loaded-replies') replies = replies.find_elements_by_css_selector('ytd-comment-renderer') for s in replies: print('\tAnswer ', replies.index(s) + 1, ' / ', len(replies)) rep_auth = s.find_element_by_css_selector('a#author-text').text.strip() if len(rep_auth) == 0: rep_auth = s.find_element_by_css_selector('yt-formatted-string#text').text.strip() rep_data = s.find_element_by_css_selector('yt-formatted-string.published-time-text').text.strip() rep_text = s.find_element_by_css_selector('yt-formatted-string#content-text'). \ text.strip().replace('\n', '') print('\t', rep_auth, ' (', rep_data, ')', file=f, sep='') print('\t\t', rep_text, file=f) browser.close()
from selenium.webdriver import Chrome import requests driver = Chrome('./chromedriver') url = 'https://www.ptt.cc/bbs/index.html' driver.get(url) driver.find_element_by_class_name('board-name').click() driver.find_element_by_class_name('btn-big').click() cookies = driver.get_cookies() newCookies = dict() for c in cookies: print(c) newCookies[c['name']] = c['value'] res = requests.get('https://www.ptt.cc/bbs/Gossiping/index.html', cookies=newCookies) print(res.text)
return button def get_actual_loginname(self): '''获取登录用户名''' name = self.wait_ele(login_locator.login_name, 30) return name.text def get_alter_error(self): '''获取弹框错误''' alter_error = self.wait_ele(login_locator.alter_error, 30) return alter_error.text def get_error_info(self): '''获取错误提示''' error_info = self.wait_ele(login_locator.error_info, 30) return error_info.text def clear_user_name(self): '''清空用户名输入框''' ele = self.get_user_name().clear() return ele def clear_pass_word(self): '''清空密码输入框''' ele = self.get_passwork().clear() return ele if __name__ == '__main__': LoginPage(Chrome()).register('18684720553', 'python')
# BUCLE QUE REVISA SI HAY CAMBIOS PARA TWITEAR while True: ahora = datetime.now() print("---------------------------------------------------------------------------------") print("Ciclo iniciado = ", ahora) sendtelegramessage("Corriendo... ") if limpiadorpantalla == 20: os.system('clear') print("pantalla limpia") limpiadorpantalla = 0 driver = Chrome(executable_path=ubicacionchromedriver, chrome_options=chrome_options) driver.get("https://e.infogram.com/_/fx5xud0FhM7Z9NS6qpxs?parent_url=https%3A%2F%2Fcovid19.gob.sv%2F&src=embed#async_embed") recuperados = driver.find_elements_by_tag_name('span') try: fintotales = recuperados[12] finrecuperados = recuperados[14] finmuerte = recuperados[16] cofirmados = recuperados[19] except: if ejecucion == 0: print("------ES LA PRIMERA EJECUCION NO HAY DATOS PARA SALTAR EL ERROR DEBE EJECUTARLO NUEVAMENTE-----") print("DETENIENDO") driver.quit() sendtelegramessage("ERROR EXTRAYENDO DATOS :(") exit()
def scrape(self): start = datetime.now() score_dict = {} options = ChromeOptions() user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.50 Safari/537.36' options.add_argument("--headless") options.add_argument("--disable-gpu") options.add_argument(f'user-agent={user_agent}') driver = Chrome(options=options) print('driver pre url: ', datetime.now() - start) driver.get(self.url) print('driver after url: ', datetime.now() - start) #soup = BeautifulSoup(driver.page_source, 'html.parser') try: #print (driver.page_source) lb = WebDriverWait(driver, 60).until( EC.presence_of_element_located( (By.ID, "leaderBoardPlayersTraditionalContent"))) print('a') soup = BeautifulSoup(driver.page_source, 'html.parser') print('b') table = (soup.find("div", {'id': 'leaderBoardPlayersTraditionalContent'})) print('c') leaderboard = soup.find( 'div', {'id': 'leaderBoardPlayersTraditionalContent'}) player_rows = soup.find_all('div', {'class': 'playerRow'}) score_dict = {} for row in player_rows: masters_name = row.find('div', { 'class': 'playerName' }).find('div', { 'class': 'data' }).text for c in row['class']: if c[:2] == 'pr': player_num = c[2:] else: pass try: try: golfer = Golfer.objects.get(golfer_pga_num=player_num) field = Field.objects.get(tournament=self.tournament, golfer=golfer) player_name = field.playerName except Exception: if Field.objects.filter( tournament=self.tournament, playerName__contains=masters_name.split(',') [0].split(' ')[0].capitalize()).exists(): o = Field.objects.get( tournament=self.tournament, playerName__contains=masters_name.split( ',')[0].split(' ')[0].capitalize()) player_name = o.playerName else: print('cant find player', masters_player) stats = row.find('div', {'class': 'playerStatContainer'}) pos = row.find('div', { 'class': 'pos' }).find('div', { 'class': 'data' }).text if pos != "WD": total = row.find('div', { 'class': 'playerStatContainer' }).find('div', { 'attr': 'topar' }).find('div', { 'class': 'data' }).text today = row.find('div', { 'class': 'playerStatContainer' }).find('div', { 'attr': 'today3' }).find('div', { 'class': 'data' }).text thru = row.find('div', { 'class': 'playerStatContainer' }).find('div', { 'class': 'thru' }).find('div', { 'class': 'data' }).text r1 = row.find('div', { 'class': 'playerStatContainer' }).find('div', { 'class': 'r1' }).find('div', { 'class': 'data' }).text if r1 == '': r1 = '--' r2 = row.find('div', { 'class': 'playerStatContainer' }).find('div', { 'class': 'r2' }).find('div', { 'class': 'data' }).text if r2 == '': r2 = '--' r3 = row.find('div', { 'class': 'playerStatContainer' }).find('div', { 'class': 'r3' }).find('div', { 'class': 'data' }).text if r3 == '': r3 = '--' r4 = row.find('div', { 'class': 'playerStatContainer' }).find('div', { 'class': 'r4' }).find('div', { 'class': 'data' }).text if r4 == '': r4 = '--' else: total = '' today = '' thru = '' r1 = row.find('div', { 'class': 'playerStatContainer' }).find('div', { 'class': 'r1' }).find('div', { 'class': 'data' }).text if r1 == '': r1 = '--' r2 = row.find('div', { 'class': 'playerStatContainer' }).find('div', { 'class': 'r2' }).find('div', { 'class': 'data' }).text if r2 == '': r2 = '--' r3 = row.find('div', { 'class': 'playerStatContainer' }).find('div', { 'class': 'r3' }).find('div', { 'class': 'data' }).text if r3 == '': r3 = '--' r4 = row.find('div', { 'class': 'playerStatContainer' }).find('div', { 'class': 'r4' }).find('div', { 'class': 'data' }).text if r4 == '': r4 = '--' score_dict[player_name] = { 'rank': pos, 'change': 'n/a', \ 'thru': thru, 'round_score': today, 'total_score': total , 'r1': r1, 'r2': r2, 'r3': r3, 'r4': r4 } except Exception as e: print('row execptino', e) cut_num = len([ x for x in score_dict.values() if int(utils.formatRank(x['rank'])) <= 50 and x['rank'] not in self.tournament.not_playing_list() ]) + 1 cut_score = [ x for x in score_dict.values() if int(utils.formatRank(x['rank'])) <= 50 and x['rank'] not in self.tournament.not_playing_list() ] + 1 self.tournament.cut_score = 'Cut Number ' + str(cut) self.tournament.save() return (score_dict) except Exception as e: print('scrape issues', e)
def cookies_erneuern(self, terminbuchung=False): """ Cookies der Session erneuern, wenn sie abgelaufen sind. Inklusive Backup-Prozess für die Terminbuchung, wenn diese im Bot fehlschlägt. :param terminbuchung: Startet den Backup-Prozess der Terminbuchung :return: """ self.log.info("Browser-Cookies generieren") # Chromedriver anhand des OS auswählen chromedriver = None if 'linux' in self.operating_system: if "64" in platform.architecture() or sys.maxsize > 2**32: chromedriver = os.path.join( PATH, "tools/chromedriver/chromedriver-linux-64") else: chromedriver = os.path.join( PATH, "tools/chromedriver/chromedriver-linux-32") elif 'windows' in self.operating_system: chromedriver = os.path.join( PATH, "tools/chromedriver/chromedriver-windows.exe") elif 'darwin' in self.operating_system: if "arm" in platform.processor().lower(): chromedriver = os.path.join( PATH, "tools/chromedriver/chromedriver-mac-m1") else: chromedriver = os.path.join( PATH, "tools/chromedriver/chromedriver-mac-intel") path = "impftermine/service?plz={}".format(choice( self.plz_impfzentren)) # deaktiviere Selenium Logging chrome_options = Options() chrome_options.add_experimental_option('excludeSwitches', ['enable-logging']) with Chrome(chromedriver, options=chrome_options) as driver: driver.get(self.domain + path) # Queue Bypass queue_cookie = driver.get_cookie("akavpwr_User_allowed") if queue_cookie: self.log.info("Im Warteraum, Seite neuladen") queue_cookie["name"] = "akavpau_User_allowed" driver.add_cookie(queue_cookie) # Seite neu laden driver.get(self.domain + path) driver.refresh() # Klick auf "Auswahl bestätigen" im Cookies-Banner # Warteraum-Support: Timeout auf 1 Stunde button_xpath = ".//html/body/app-root/div/div/div/div[2]/div[2]/div/div[1]/a" button = WebDriverWait(driver, 60 * 60).until( EC.element_to_be_clickable((By.XPATH, button_xpath))) action = ActionChains(driver) action.move_to_element(button).click().perform() # Klick auf "Vermittlungscode bereits vorhanden" button_xpath = "/html/body/app-root/div/app-page-its-login/div/div/div[2]/app-its-login-user/" \ "div/div/app-corona-vaccination/div[2]/div/div/label[1]/span" button = WebDriverWait(driver, 1).until( EC.element_to_be_clickable((By.XPATH, button_xpath))) action = ActionChains(driver) action.move_to_element(button).click().perform() # Auswahl des ersten Code-Input-Feldes input_xpath = "/html/body/app-root/div/app-page-its-login/div/div/div[2]/app-its-login-user/" \ "div/div/app-corona-vaccination/div[3]/div/div/div/div[1]/app-corona-vaccination-yes/" \ "form[1]/div[1]/label/app-ets-input-code/div/div[1]/label/input" input_field = WebDriverWait(driver, 1).until( EC.element_to_be_clickable((By.XPATH, input_xpath))) action = ActionChains(driver) action.move_to_element(input_field).click().perform() # Code eintragen input_field.send_keys(self.code) time.sleep(.1) # Klick auf "Termin suchen" button_xpath = "/html/body/app-root/div/app-page-its-login/div/div/div[2]/app-its-login-user/" \ "div/div/app-corona-vaccination/div[3]/div/div/div/div[1]/app-corona-vaccination-yes/" \ "form[1]/div[2]/button" button = WebDriverWait(driver, 1).until( EC.element_to_be_clickable((By.XPATH, button_xpath))) action = ActionChains(driver) action.move_to_element(button).click().perform() # Maus-Bewegung hinzufügen (nicht sichtbar) action.move_by_offset(10, 20).perform() # Backup Prozess, wenn die Terminbuchung mit dem Bot nicht klappt # wird das Browserfenster geöffnet und die Buchung im Browser beendet if terminbuchung: # Klick auf "Termin suchen" button_xpath = "/html/body/app-root/div/app-page-its-search/div/div/div[2]/div/div/div[5]/div/div[1]/div[2]/div[2]/button" button = WebDriverWait(driver, 1).until( EC.element_to_be_clickable((By.XPATH, button_xpath))) action = ActionChains(driver) action.move_to_element(button).click().perform() time.sleep(.5) # Termin auswählen button_xpath = '//*[@id="itsSearchAppointmentsModal"]/div/div/div[2]/div/div/form/div[1]/div[2]/label/div[2]/div' button = WebDriverWait(driver, 1).until( EC.element_to_be_clickable((By.XPATH, button_xpath))) action = ActionChains(driver) action.move_to_element(button).click().perform() time.sleep(.5) # Klick Button "AUSWÄHLEN" button_xpath = '//*[@id="itsSearchAppointmentsModal"]/div/div/div[2]/div/div/form/div[2]/button[1]' button = WebDriverWait(driver, 1).until( EC.element_to_be_clickable((By.XPATH, button_xpath))) action = ActionChains(driver) action.move_to_element(button).click().perform() time.sleep(.5) # Klick Daten erfassen button_xpath = '/html/body/app-root/div/app-page-its-search/div/div/div[2]/div/div/div[5]/div/div[2]/div[2]/div[2]/button' button = WebDriverWait(driver, 1).until( EC.element_to_be_clickable((By.XPATH, button_xpath))) action = ActionChains(driver) action.move_to_element(button).click().perform() time.sleep(.5) # Klick Anrede button_xpath = '//*[@id="itsSearchContactModal"]/div/div/div[2]/div/form/div[1]/app-booking-contact-form/div[1]/div/div/div[1]/label[2]/span' button = WebDriverWait(driver, 1).until( EC.element_to_be_clickable((By.XPATH, button_xpath))) action = ActionChains(driver) action.move_to_element(button).click().perform() # Input Vorname input_xpath = '/html/body/app-root/div/app-page-its-search/app-its-search-contact-modal/div/div/div/div[2]/div/form/div[1]/app-booking-contact-form/div[2]/div[1]/div/label/input' input_field = WebDriverWait(driver, 1).until( EC.element_to_be_clickable((By.XPATH, input_xpath))) action.move_to_element(input_field).click().perform() input_field.send_keys(self.kontakt['vorname']) # Input Nachname input_field = driver.find_element_by_xpath( '//*[@id="itsSearchContactModal"]/div/div/div[2]/div/form/div[1]/app-booking-contact-form/div[2]/div[2]/div/label/input' ) input_field.send_keys(self.kontakt['nachname']) # Input PLZ input_field = driver.find_element_by_xpath( '//*[@id="itsSearchContactModal"]/div/div/div[2]/div/form/div[1]/app-booking-contact-form/div[3]/div[1]/div/label/input' ) input_field.send_keys(self.kontakt['plz']) # Input City input_field = driver.find_element_by_xpath( '//*[@id="itsSearchContactModal"]/div/div/div[2]/div/form/div[1]/app-booking-contact-form/div[3]/div[2]/div/label/input' ) input_field.send_keys(self.kontakt['ort']) # Input Strasse input_field = driver.find_element_by_xpath( '//*[@id="itsSearchContactModal"]/div/div/div[2]/div/form/div[1]/app-booking-contact-form/div[4]/div[1]/div/label/input' ) input_field.send_keys(self.kontakt['strasse']) # Input Hasunummer input_field = driver.find_element_by_xpath( '//*[@id="itsSearchContactModal"]/div/div/div[2]/div/form/div[1]/app-booking-contact-form/div[4]/div[2]/div/label/input' ) input_field.send_keys(self.kontakt['hausnummer']) # Input Telefonnummer input_field = driver.find_element_by_xpath( '//*[@id="itsSearchContactModal"]/div/div/div[2]/div/form/div[1]/app-booking-contact-form/div[4]/div[3]/div/label/div/input' ) input_field.send_keys(self.kontakt['phone'].replace("+49", "")) # Input Mail input_field = driver.find_element_by_xpath( '//*[@id="itsSearchContactModal"]/div/div/div[2]/div/form/div[1]/app-booking-contact-form/div[5]/div/div/label/input' ) input_field.send_keys(self.kontakt['notificationReceiver']) # Klick Button "ÜBERNEHMEN" button_xpath = '//*[@id="itsSearchContactModal"]/div/div/div[2]/div/form/div[2]/button[1]' button = WebDriverWait(driver, 1).until( EC.element_to_be_clickable((By.XPATH, button_xpath))) action = ActionChains(driver) action.move_to_element(button).click().perform() time.sleep(.7) # Termin buchen button_xpath = '/html/body/app-root/div/app-page-its-search/div/div/div[2]/div/div/div[5]/div/div[3]/div[2]/div[2]/button' button = WebDriverWait(driver, 1).until( EC.element_to_be_clickable((By.XPATH, button_xpath))) action = ActionChains(driver) action.move_to_element(button).click().perform() time.sleep(3) # prüfen, ob Cookies gesetzt wurden und in Session übernehmen try: cookie = driver.get_cookie("bm_sz") if cookie: self.s.cookies.clear() self.s.cookies.update( {c['name']: c['value'] for c in driver.get_cookies()}) self.log.info("Browser-Cookie generiert: *{}".format( cookie.get("value")[-6:])) return True else: self.log.error("Cookies können nicht erstellt werden!") return False except: return False
import datetime as dt # for return flights def date_range(start_date, end_date): for n in range(int((end_date - start_date).days)): yield start_date + dt.timedelta(n) start_date = dt.date.today() + dt.timedelta(days=1) end_date = dt.date.today() + dt.timedelta(days=301) chrome_options = Options() chrome_options.add_argument("--headless") for i in date_range(start_date, end_date): return_date = i driver = Chrome("./chromedriver", chrome_options=chrome_options) (driver.get("https://www.google.com/flights?hl=en#flt=/m/07dfk./m/0ftkx." + return_date.strftime("%Y-%m-%d") + ";c:TWD;e:1;sd:1;t:f;tt:o")) print('enter homepage for TKO to TPE') time.sleep(5) (driver.find_element_by_xpath( '//*[@class="gws-flights-results__dominated-toggle ' 'flt-subhead2 gws-flights-results__collapsed"]').click()) print('expand button pushed') time.sleep(5) print('scraping etd...') dep_arr_time = driver.find_elements_by_class_name( "gws-flights-results__times") dep_etd_list = [ return_date.strftime("%Y-%m-%d") + ' ' +