browser.get(URL) for location in df: browser.find_element_by_id('address').click() time.sleep(2) keyboard.press_and_release('ctrl+a, delete') keyboard.write(location + " Singapore") keyboard.press('down') time.sleep(0.3) keyboard.release('down') keyboard.press('down') time.sleep(0.3) keyboard.release('down') keyboard.press('enter') time.sleep(0.3) keyboard.release('enter') browser.find_element_by_class_name('btn-primary').click() mydict[location] = browser.find_element_by_id('info_window').get_attribute( "innerHTML") print(location) with open('places.p', 'wb') as f: pickle.dump(mydict, f) # keyboard.press('down') # keyboard.press_and_release('down, enter') # keyboard.press('down') # for _ in range(20): # browser.find_element_by_id('address').send_keys(Keys.DELETE)
import time from selenium.webdriver import Chrome chromeDriver = 'C:\\temp\\chromedriver.exe' driver = Chrome(chromeDriver) driver.get('https://login.11st.co.kr/auth/front/login.tmall') time.sleep(3) input_login = driver.find_element_by_id("loginName") input_login.send_keys("wownskl") input_pw = driver.find_element_by_id("passWord") input_pw.send_keys("ahrl8383") btn = driver.find_element_by_class_name("btn_Atype") time.sleep(3) btn.click() time.sleep(3) driver.get('http://buy.11st.co.kr/order/OrderList.tmall') time.sleep(5) driver.quit()
from selenium.webdriver import Chrome from selenium.webdriver.chrome.options import Options opts = Options() opts.set_headless() assert opts.headless # Operating in headless mode browser = Chrome(options=opts) browser.get('https://duckduckgo.com') search_form = browser.find_element_by_class_name('js-search-input.search__input--adv') search_form.send_keys('real python') search_form.submit() results = browser.find_elements_by_class_name('result') print(results[0].text) print(len(results)) for i in range(len(results)): print(results[i].text) browser.close
class Scraping(): def __init__(self, index_get): chrome_path = 'D:/Soft/chromedriver_win32/chromedriver.exe' # keep selenium always open until ended code # opts = ChromeOptions() # opts.add_experimental_option("detach", True) #self.driver = Chrome(executable_path = chrome_path, chrome_options= opts) options = Options() options.add_argument("--use-fake-ui-for-media-stream") #options.add_argument('--headless') self.driver = Chrome(executable_path=chrome_path, chrome_options=options) if index_get == 0: self.driver.get( 'https://translate.google.com/#view=home&op=translate&sl=en&tl=vi&text=' ) def google_translate(self, sentence, mode): mode = 'en&tl=vi' if mode == 'e2v' else 'vi&tl=en' self.driver.get( 'https://translate.google.com/#view=home&op=translate&sl=%s&text=%s' % (mode, sentence)) time.sleep(0.5) translated = '' spelling = None word_type = None hint = None contents_of_word_type = None try: spelling = self.driver.find_element_by_xpath( '/html/body/div[2]/div[1]/div[2]/div[1]/div[1]/div[1]/div[2]/div/div/div[3]/div[1]' ).text word_type = self.driver.find_elements_by_class_name( 'gt-baf-pos-head') word_type = [x.text.replace('\nFrequency', '') for x in word_type] contents_of_word_type = self.driver.find_element_by_xpath( '/html/body/div[2]/div[1]/div[2]/div[2]/div[3]/div[2]/div[1]/div[1]/div' ).text.split('\n') del (contents_of_word_type[0]) del (contents_of_word_type[1]) except: pass try: hint = self.driver.find_element_by_class_name( 'gt-related-suggest-message').text.split('\n') except: try: hint = self.driver.find_element_by_class_name( 'gt-spell-correct-message').text.split('\n') except: pass translated = self.driver.find_element_by_xpath( '/html/body/div[2]/div[2]/div[1]/div[2]/div[1]/div[1]/div[2]/div[3]/div[1]/div[2]/div/span[1]' ).text display_wordtype = self.driver.find_element_by_class_name( 'gt-cd-baf').get_attribute('style') display_hint = self.driver.find_element_by_id( 'spelling-correction').get_attribute('style') if display_wordtype == 'display: none;': word_type = None if display_hint == 'display: none;': hint = None return [translated, spelling, (word_type, contents_of_word_type), hint] def speech_to_text(self, ui): speech_button = wait(self.driver, timeout=10).until( EC.presence_of_element_located( (By.XPATH, '//*[@id="gt-speech"]/span'))) def status_s2t(): status = self.driver.find_element_by_xpath('//*[@id="gt-speech"]') return status.get_attribute('data-tooltip').split(' ')[1] # remove_input = self.driver.find_element_by_xpath('/html/body/div[2]/div[1]/div[2]/div[1]/div[1]/div[1]/div[2]/div/div/div[2]/div/div/div') # remove_input.click() if status_s2t() == 'on': speech_button.click() ui.st2_waiting = True time_waiting = len(ui.parent.GetLang('ENG').split(' ')) * 1000 time_waiting = 3000 if time_waiting <= 2000 else int(time_waiting * 0.8) def waiting(speech_button): result = self.driver.find_element_by_xpath( '//*[@id="input-wrap"]/div[2]').get_attribute('textContent') ui.lineEdit.setReadOnly(False) ui.lineEdit.setText(result) ui.lineEdit.setReadOnly(True) ui.st2_waiting = False ui.lineEdit.setPlaceholderText('Listening stopped !') if status_s2t() == 'off': speech_button.click() QtCore.QTimer.singleShot(time_waiting, lambda: waiting(speech_button))
def fech_data(*args, **kwargs): """ sceduled task to scrap the data from https://www.worldometers.info/coronavirus/ """ webdriver = ChromeDriverManager().install() opt = Options() opt.add_argument('--headless') opt.add_argument('--window-size=1920,1080') driver = Chrome(webdriver, options=opt) print("<<<<<<<<<<<<scrapping started >>>>>>>>>>>>>>>>>>>>> \n") url = 'https://www.worldometers.info/coronavirus/' driver.get(url) data = driver.find_element_by_class_name("content-inner") print("\n<<<<<<<<<<<Fetching general stats >>>>>>>>>>>>>>>>>>>>> \n") last_update = data.find_elements_by_xpath( "//*[contains(text(), 'Last updated:')]")[0].text total_cases = data.find_elements_by_css_selector( '[id*="maincounter-wrap"]')[0].find_element_by_tag_name('span').text death_cases = data.find_elements_by_css_selector( '[id*="maincounter-wrap"]')[1].find_element_by_tag_name('span').text recovery_cases = data.find_elements_by_css_selector( '[id*="maincounter-wrap"]')[2].find_element_by_tag_name('span').text active_cases = data.find_elements_by_css_selector( '[class*="panel panel-default"]')[0] closed_cases = data.find_elements_by_css_selector( '[class*="panel panel-default"]')[1] currently_infected = active_cases.find_element_by_class_name( "number-table-main").text cases_with_outcome = closed_cases.find_element_by_class_name( "number-table-main").text mild_condition_active_cases = active_cases.find_elements_by_css_selector( '[class*="number-table"]')[1].text critical_condition_active_cases = active_cases.find_elements_by_css_selector( '[class*="number-table"]')[2].text recovered_closed_cases = closed_cases.find_elements_by_css_selector( '[class*="number-table"]')[1].text death_closed_cases = closed_cases.find_elements_by_css_selector( '[class*="number-table"]')[2].text general_stats = { 'total_cases': ''.join(total_cases.split(',')), 'death_cases': ''.join(death_cases.split(',')), 'recovery_cases': ''.join(recovery_cases.split(',')), 'currently_infected': ''.join(currently_infected.split(',')), 'cases_with_outcome': ''.join(cases_with_outcome.split(',')), 'mild_condition_active_cases': ''.join(mild_condition_active_cases.split(',')), 'critical_condition_active_cases': ''.join(critical_condition_active_cases.split(',')), 'recovered_closed_cases': ''.join(recovered_closed_cases.split(',')), 'death_closed_cases': ''.join(death_closed_cases.split(',')), 'last_update': parse(last_update.split('Last updated:')[1]) } gen_serializer = GeneralStatsSerializer(data=general_stats) gen_serializer.is_valid(raise_exception=True) gen_serializer.save() print("<<<<<<<<<<<Fetching country stats >>>>>>>>>>>>>>>>>>>>> \n") country_data = driver.find_element_by_id('main_table_countries_today') country_data_list = country_data.find_element_by_tag_name( 'tbody').find_elements_by_tag_name("tr") attributes = [ 'country', 'total_cases', 'new_cases', 'total_deaths', 'new_deaths', 'total_recovered', 'active_cases', 'serious_critical', 'cases_per_mill_pop' ] engine = create_engine( f"postgresql://{settings.DB_USER}:{settings.DB_PASSWORD}@{settings.DB_HOST}:5432/{settings.DB_NAME}" ) print("<<<<<<<<<<<Persist to DB:START >>>>>>>>>>>>>>>>>>>>> \n") for country in country_data_list: country_stats = [] for i in country.find_elements_by_tag_name("td"): country_stats.append(''.join(i.text.split(','))) detail = dict(zip(attributes, country_stats)) country_ = detail['country'] flag = "https://upload.wikimedia.org/wikipedia/commons/thumb/e/ef/International_Flag_of_Planet_Earth.svg/800px-International_Flag_of_Planet_Earth.svg.png" with open(settings.FLAGS_FILE, "r") as read_file: data = json.load(read_file) try: flag = data[country_] except: pass total_cases = detail['total_cases'] new_cases = detail['new_cases'].split('+')[::-1][0] total_deaths = detail['total_deaths'] new_deaths = detail['new_deaths'].split('+')[::-1][0] total_recovered = detail['total_recovered'] active_cases = detail['active_cases'] serious_critical = detail['serious_critical'] cases_per_mill_pop = detail['cases_per_mill_pop'] last_update = datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S") update_sql = f""" INSERT INTO cases_countrycases values ( '{country_}', '{total_cases if total_cases else 0}', '{new_cases if new_cases else 0}', '{total_deaths if total_deaths else 0}', '{new_deaths if new_deaths else 0}', '{total_recovered if total_recovered else 0}', '{active_cases if active_cases else 0}', '{serious_critical if serious_critical else 0}', '{cases_per_mill_pop if cases_per_mill_pop else 0}', '{flag}', '{last_update}') ON CONFLICT (country) DO UPDATE SET total_cases = '{total_cases if total_cases else 0}', new_cases = '{new_cases if new_cases else 0}', total_deaths = '{total_deaths if total_deaths else 0}', new_deaths = '{new_deaths if new_deaths else 0}', total_recovered = '{total_recovered if total_recovered else 0}', active_cases = '{active_cases if active_cases else 0}', serious_critical = '{serious_critical if serious_critical else 0}', cases_per_mill_pop = '{cases_per_mill_pop if cases_per_mill_pop else 0}', flag = '{flag}', last_update = '{last_update}' """ with engine.begin() as conn: # TRANSACTION conn.execute(text(update_sql)) print("<<<<<<<<<<<Persist to DB:END >>>>>>>>>>>>>>>>>>>>> \n") driver.quit()
class NewWorkgroupTestCase(StaticLiveServerTestCase): def setUp(self): self.browser = Chrome() self.browser.implicitly_wait(10) user = User.objects.create_user(username='******', name="Administrador Teste", email='*****@*****.**', password='******', is_staff=True, is_superuser=True) coordinator = Coordinator.objects.create(usp_number='1234567', user=user) teacher = Teacher.objects.create(usp_number='1234567', user=user) student_user = User.objects.create(username='******', name="Aluno Teste", email='*****@*****.**', password='******') student = Student.objects.create(usp_number='7983121', user=student_user) guest_user = User.objects.create(username='******', name="Convidado Teste", email='*****@*****.**', password='******') guest = Guest.objects.create(organization_name='Empresa', user=guest_user) self.browser.get('%s%s' % (self.live_server_url, reverse_lazy("login"))) self.browser.find_element_by_id('id_username').send_keys('*****@*****.**') self.browser.find_element_by_id('id_password').send_keys('tccpoliusp') self.browser.find_element_by_id('login').click() def tearDown(self): self.browser.quit() def test_teacher_sign_up_fire(self): self.browser.find_element_by_id('coordinator-workgroups').click() self.browser.find_element_by_id('new-workgroup').click() self.browser.find_element_by_id('id_title').send_keys('Projeto de Teste') self.browser.find_element_by_class_name("select2-search__field").send_keys('alunogrupo') self.browser.find_element_by_class_name("select2-results__option").click() self.browser.find_element_by_class_name("select2-search__field").send_keys('admin') self.browser.find_element_by_class_name("select2-results__option").click() self.browser.find_element_by_class_name("select2-search__field").send_keys('convidadogrupo') self.browser.find_element_by_class_name("select2-results__option").click() self.browser.find_element_by_id('submit').click() self.assertIn(('%s%s' % (self.live_server_url, reverse_lazy("disciplines_list"))), self.browser.current_url) self.assertTrue(self.browser.find_element_by_xpath("//table[@class='table']/tbody/tr/th[1][contains(.,'Projeto de Teste')]")) self.assertTrue(self.browser.find_element_by_xpath("//table[@class='table']/tbody/tr/th[2][contains(.,'1')]")) self.assertTrue(self.browser.find_element_by_xpath("//table[@class='table']/tbody/tr/th[3][contains(.,'Administrador Teste')]")) self.assertTrue(self.browser.find_element_by_xpath("//table[@class='table']/tbody/tr/th[4][contains(.,'Convidado Teste')]"))
from selenium.webdriver import Chrome import time from datetime import datetime url = 'https://www.google.com' navegador = Chrome() navegador.get(url) botao_teclado = navegador.find_element_by_class_name('hOoLGe') botao_teclado.click() time.sleep(2) botao_login = navegador.find_elements_by_tag_name('a')[0] botao_login.click() time.sleep(2) navegador.quit()
from selenium.webdriver import Chrome from selenium.webdriver.common.keys import Keys navegador = Chrome() navegador.get('https://consultacnpj.com/cnpj/') navegador.maximize_window() cnpjs = ["45997418000153", "18328118000109", "45543915000181"] for cnpj in cnpjs: input = navegador.find_element_by_css_selector( '#__layout > div > div:nth-child(2) > div > div > div > div > div > div.cnpj--wrapper > div > div > input' ) input.clear() input.send_keys(cnpj) texto = navegador.find_element_by_class_name('company-data--card').text with open(f'{str(cnpj)}.csv', 'w', encoding='UTF-8') as csv: csv.write(texto) navegador.quit()
from selenium.webdriver import Chrome import time driver = Chrome('./chromedriver') url = 'https://accounts.google.com/signin/v2/identifier?service=youtube&uilel=3&passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Faction_handle_signin%3Dtrue%26app%3Ddesktop%26hl%3Dzh-TW%26next%3D%252F&hl=zh-TW&ec=65620&flowName=GlifWebSignIn&flowEntry=ServiceLogin' driver.get(url) driver.find_element_by_id('identifierId').send_keys('*****@*****.**') driver.find_element_by_id('identifierNext').click() time.sleep(5) driver.find_element_by_class_name('whsOnd').send_keys('123123123') driver.find_element_by_id('passwordNext').click()
#こちらのGitHubから持ってきたい #driver = webdriver.Chrome(executable_path="https://github.com/kkakkoottaaaniiiia/abunator/tree/master/test/chromedriver.exe") #こっちならいける(私個人の環境では) options = ChromeOptions() # ChromeのWebDriverオブジェクトを作成する。 driver = Chrome(options=options) #abunatorを起動 driver.get("https://abunatorroute.azurewebsites.net/") #3秒待機 time.sleep(3) #start.click 質問画面に遷移 start = driver.find_element_by_class_name("start") start.click() #質問画面は[はい]だけ選択 #nameを参照すると「はい」以外も選択してクリックできる? #no = driver.find_element_by_name('いいえ') #Xpathの場合 #no = driver.find_element_by_xpath("//input[@value='はい']").click() #タイトルが"解答画面"になるまで[はい],[いいえ]いずれかのボタンを押す #while driver.title == '質問画面': # no = driver.find_element_by_xpath("//input[@value='はい']").click() time.sleep(3) no = driver.find_element_by_class_name("button")
class InstaOperator: def __init__(self, username, password, tag): self.username = username self.password = password self.tag = tag options = Options() # options.add_argument('--headless') # options.binary_location = '/app/.apt/usr/bin/google-chrome' self.driver = Chrome(chrome_options=options) self.driver.implicitly_wait(5) def login(self): self.driver.get(INSTA_URL) email_form = self.driver.find_element_by_name("username") password_form = self.driver.find_element_by_name("password") email_form.send_keys(self.username) password_form.send_keys(self.password) login_button = self.driver.find_element_by_class_name("_5f5mN") login_button.submit() try: self.driver.find_element_by_class_name("XTCLo") except NoSuchElementException: print("アカウント認証") send_button = self.driver.find_element_by_class_name("_5f5mN") send_button.submit() code_form = self.driver.find_element_by_name("security_code") code = gmailreceiver.get_code() code_form.send_keys(code) code_button = self.driver.find_element_by_class_name("_5f5mN") code_button.submit() try: self.driver.find_element_by_class_name("XTCLo") except NoSuchElementException as e: print("ログイン失敗") print(type(e)) print(str(e)) else: print("ログイン完了") else: print("ログイン完了") def open_article(self): self.driver.get("https://www.instagram.com/explore/tags/" + self.tag + "/") articles = self.driver.find_elements_by_class_name("Nnq7C") article = articles[3].find_elements_by_class_name("v1Nh3") actions = ActionChains(self.driver) actions.move_to_element(articles[4]) actions.perform() article[0].click() def get_users(self): users = [] while len(users) < 10: try: address = self.driver.find_element_by_class_name( "nJAzx").get_attribute('href') if address not in users: users.append(address) self.driver.find_element_by_class_name( "coreSpriteRightPaginationArrow").click() except (NoSuchElementException, StaleElementReferenceException): self.open_article() return users def likes_users(self, users): likes = 0 for user in users: self.driver.get(user) try: self.driver.find_element_by_class_name("v1Nh3").click() for num in range(3): self.driver.find_element_by_class_name( "coreSpriteHeartOpen").click() likes += 1 sleep(1) self.driver.find_element_by_class_name( "coreSpriteRightPaginationArrow").click() except NoSuchElementException: pass return likes
class Automate(): def __init__(self): chrome_opt = Options() chrome_opt.add_argument('--headless') self.driver = Chrome(options=chrome_opt) self.logger = logging.getLogger(__name__) self.datas = parsedata.ParseJson('seekdata.json') #loggin in to the site def login(self): self.logger.info("Opened the site successfully") WebDriverWait(self.driver, 2).until( EC.presence_of_element_located( (By.CSS_SELECTOR, "a[title = 'Sign in']"))) # waiting for sign in to be loaded self.driver.find_element_by_link_text("Sign in").click() WebDriverWait(self.driver, 4).until( EC.presence_of_element_located( (By.TAG_NAME, "form"))) # waiting for the form to be loaded self.logger.info("signing in....") # can add config file for logging in infos WebDriverWait(self.driver, 3).until(EC.presence_of_element_located( (By.ID, "email"))) #email address to log in self.driver.find_element_by_id("email").send_keys("*****@*****.**") #password to log in self.driver.find_element_by_id("password").send_keys("password") self.driver.find_element_by_xpath("//button[@type = 'submit']").click() self.logger.info("logged in successfully") self.logger.info("working!!") def applyJob(self): WebDriverWait(self.driver, 5).until( EC.presence_of_element_located((By.LINK_TEXT, "Apply"))) self.driver.find_element_by_link_text('Apply').click() delay = 2 #waiting time for the element to load try: WebDriverWait(self.driver, delay).until( EC.presence_of_element_located((By.TAG_NAME, "fieldset")) ) # waits for 3secs to check if the element fieldset is loaded except TimeoutException: self.logger.exception("Extended amount time error") finally: self.driver.find_element_by_id("uploadresume").click() if os.path.exists(os.getcwd() + "/resume.txt"): self.driver.find_element_by_xpath( "//input[@id='resumeFile'][@type = 'file']").send_keys( os.getcwd() + "/resume.text") else: self.driver.find_element_by_id("dontIncluderesume").click() self.driver.find_element_by_id("dontIncludecoverLetter").click() self.driver.find_element_by_xpath( "//button[@type='button'][@data-testid='continue-button-desktop']" ).click() WebDriverWait(self.driver, 3).until( EC.presence_of_element_located( (By.XPATH, "//a[@data-testid = 'back-button-desktop']"))) self.driver.implicitly_wait(2) self.driver.find_element_by_xpath( "//button[@type='button'][@data-testid='continue-button-desktop']" ).click() WebDriverWait(self.driver, 10).until( EC.presence_of_element_located( (By.XPATH, "//button[@type = 'submit']"))) self.driver.find_element_by_xpath( "//button[@type = 'submit']").click() WebDriverWait(self.driver, 3).until( EC.presence_of_element_located((By.CLASS_NAME, "_2tfj6Sf"))) if self.driver.find_element_by_class_name("_2QIfI_2"): self.logger.info("Successfully applied for the job") # Apply for the jobs that has been scraped in the json file def apply(self, url): for i, data in enumerate(self.datas.get_data()): job_url = url + data['jobURL'] self.logger.info("opening page {id}...".format(id=i)) self.driver.get(job_url) if len(self.driver.find_elements_by_link_text("Sign in")) > 0: self.login() self.logger.info("page loaded successfully") self.applyJob() self.logger.info("closing page {id}...".format(id=i)) self.logger.info("Applying for all the jobs complete")
#Step 3 is to maximize browser driver.maximize_window() #Fetching Title print("Title of page is " + driver.title) #Fetch Url of page print("Page url is " + driver.current_url) #Fetch complete page html source code print("***********************************") print(driver.page_source) #Fetch element/locator text print(driver.find_element_by_class_name( "displayPopup").text) #using text property #Fetching attribute(s)value of element by locating it print("Value of button is " + driver.find_element_by_xpath( "//input[@type='submit']").get_attribute("value")) #Fetch all available values from dropdown # to work on drop-down, list (only by index, value & visible text) obj = Select(driver.find_element_by_name("sex")) obj.select_by_visible_text("Male") #Fetch selected option print(obj.first_selected_option.text) #Fetch all available options in the drop down print(obj.options) #it will not work as it will return only object
class SeleniumDownloader(object): """ 自定义selenium的下载中间件 实现网页请求由selenium 下载,不经过scrapy的下载器(Downloader) """ def __init__(self): # 创建selenium的浏览器对象 # window-> d:/drivers/chromedriver.exe # linux-> /home/xxxx/drivers/chromedriver # 设置chrome浏览为无头(headless)浏览器 -> 不打开浏览器窗口 options = Options() options.add_argument('--headless') options.add_argument('--disable-gpu') # chrome_options 或 options self.browser = Chrome('D:\driver\chromedriver.exe', options=options) # mac self.close_ok = False # 第一次弹出 窗口,需要点击『我知道了』关闭窗口 def process_request(self, request, spider): # 将会由selenium的chrome浏览器来请求 self.browser.get(request.url) # self.browser.save_screenshot('zhaopin.png') if not self.close_ok: # 关闭 ok_btn = self.browser.find_element_by_class_name( 'risk-warning__content').find_element_by_tag_name('button') ok_btn.click() self.close_ok = True # 等待网页中soupager可以选择(可见) ui.WebDriverWait(self.browser, 60).until( ec.visibility_of_all_elements_located((By.CLASS_NAME, 'soupager'))) # 获取页面标签的高度 soupager = self.browser.find_element_by_class_name('soupager') # .location -> {'x':, 'y': } # .rect -> {'x': 0, 'y': 0, 'width': 990, 'height': 10001 } soupager_height = soupager.location['y'] time.sleep(1) # 向下滚动 # 滚动屏幕到底部 for i in range(20): current_height = (i + 1) * 1000 if current_height >= soupager_height: break self.browser.execute_script( 'var q = document.documentElement.scrollTop=%s' % current_height) time.sleep(0.5) # 获取网页数据 html = self.browser.page_source return HtmlResponse(url=request.url, body=html.encode(encoding='utf-8'), encoding='utf-8')
def findGoogle(search_param): #Ignorar los certificados: options = webdriver.ChromeOptions() options.add_argument('ignore-certificate-errors') options.add_argument('--ignore-ssl-errors') options.headless = True articlesData = [] #Chrome drivers driver = Chrome(chrome_options=options) #Navegar a google academico driver.get( 'https://scholar.google.com/citations?view_op=search_authors&mauthors=&hl=en&oi=ao' ) #Esperar 10 segundos para el buscador search = WebDriverWait( driver, timeout=10).until(lambda d: d.find_element_by_class_name('gs_in_txt')) #Buscar un resultado search.send_keys(search_param) search.send_keys(Keys.RETURN) #Verificar si existen resultados por 5 segundos try: WebDriverWait(driver, timeout=10).until( lambda d: d.find_elements_by_class_name("gsc_1usr")) print("se encontraton resultado") except: return {"articles": [], "count": 0} #Entrar a los articulos driver.find_element_by_class_name('gs_ai_pho').click() #Esperar a que la pagina cargue por 5 segundos try: WebDriverWait( driver, timeout=10).until(lambda d: d.find_element_by_id('gsc_a_b')) #Cargar todos los articulos driver.find_element_by_id('gsc_bpf_more').click() time.sleep(1) #TODO Mejorar esta linea except: pass #Esperar a que los articulos se carguen por 10 segundos articles = WebDriverWait( driver, timeout=10).until(lambda d: d.find_elements_by_class_name('gsc_a_tr')) print(len(articles)) #Ciclando articulos for article in articles: #Obtener datos title = article.find_element_by_class_name('gsc_a_at').text autors = article.find_element_by_class_name('gs_gray').text year = article.find_element_by_class_name('gsc_a_y').text #manejo de expecion si no existe fecha #TODO #Objeto de articulos data = {"title": title, "autors": autors, "year": year} #Agregar datos a lista articlesData.append(data) return {"articles": articlesData, "count": len(articlesData)}
def start_callback(): """ Main loop of the scrape. """ profile_username = E_username.get() # The Instagram username of the profile from which we # are downloading. Must be supplied. output_directory = E_path.get() # Will be initialized with the optional argument or a # default later. update_mode = True serialize = True latest_image = '' # The latest downloaded images will be the first in the directory. files = os.listdir(output_directory) if files: latest_image = files[0] # Start the browser driver = Chrome(executable_path='../bin/chromedriver') driver.get(insta_url + profile_username) # Find the number of posts on this Instagram profile post_count_tag_xpath = ('//*[@id="react-root"]/section/main/' + 'article/header/div[2]/ul/li[1]/span/span') post_count_tag = driver.find_element_by_xpath(post_count_tag_xpath) post_count = int(post_count_tag.text.replace(',', '')) # If the target profile is private, then redirect to the login page login_tag_xpath = '//*[@id="react-root"]/section/main/article/div/p/a' try: login_tag = driver.find_element_by_xpath(login_tag_xpath) login_page_url = login_tag.get_attribute('href') driver.get(login_page_url) # Wait for the user to login while driver.current_url == login_page_url: sleep(1) # Return to the target profile from the homepage driver.get(insta_url + profile_username) except: pass # Click the 'Load More' element driver.find_element_by_class_name('_oidfu').click() # Load all the posts into the browser processed = 0 while processed < post_count: # Load more content by scrolling to the bottom of the page driver.execute_script("window.scrollTo(0, 0);") driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") # Download 4 rows of items (4 rows are loaded upon each scroll) and # remove them from view for _ in itertools.repeat(None, 4): urls = fetch_row_links(driver) delete_row(driver) for url in urls: # Exit if we've reached the latest image that was in the # directory before downloading. This means the directory has # everything beyond this point. if update_mode: fname = file_name.search(url).group(0) if fname in latest_image: exit(0) download_from_url(url, output_directory, serialize, post_count-processed) processed += 1 driver.close()
url = 'https://www.google.com.tw/maps' driver.get(url) # find -> find_element # find_all -> find_elements time.sleep(5) # 等待1秒 keyword = '大腸麵線' print('輸入框輸入欲搜尋關鍵字') mylocation = driver.find_elements_by_id('widget-mylocation') print(len(mylocation)) while len(mylocation) == 0: print('mylocation') mylocation = driver.find_elements_by_id('widget-mylocation') mylocation[0].click() time.sleep(4) input = driver.find_element_by_class_name('tactile-searchbox-input') input.send_keys(keyword) print('點擊搜尋') driver.find_element_by_id('searchbox-searchbutton').click() time.sleep(7) bfinal = False page = 1 list_dem = [] list_demstar = [] list_add = [] list_ben = [] list_star = [] list_date = [] list_pim = [] n = 0
from selenium.webdriver import ChromeOptions, Chrome path = "../driver/chromedriver" # in win u must give chromedriver.exe # webdriver.Firefox(executable_path=path) ops = ChromeOptions() ops.add_argument("--disable-notifications") driver = Chrome(executable_path=path, options=ops) baseUrl = "https://www.icicibank.com/" driver.get(baseUrl) print("Home page title ", driver.title) driver.find_element_by_class_name("pl-login-ornage-box").click() print("Login page title ", driver.title) driver.close()
for lp in logins: if br: br.close() br = Browser() matches = LOGIN_PASSWORD_FORMAT.match(lp) login, password = matches.group('login'), matches.group('password') br.get(LOGIN_URL) while True: try: tag = br.find_element_by_id("email") if tag.is_displayed(): break except: br.get(LOGIN_URL) time.sleep(0.1) if (tg := br.find_element_by_class_name("captcha-container")).is_displayed(): continue tag.send_keys(login) completed = False while not completed: # Может быть случай когда сначало логин и потом кнопку продолжить для пароля if (tag := br.find_element_by_id("password")).is_displayed(): tag.send_keys(password) try: br.find_element_by_id("btnLogin").click() completed = True except: ... elif (tag := br.find_element_by_id("btnNext")).is_displayed(): try: tag.click() except:
class Teams: def __init__(self): self.opts = ChromeOptions() self.opts.add_experimental_option("detach", True) self.opts.add_argument('--ignore-certificate-errors') self.opts.add_argument('--ignore-ssl-errors') self.opts.add_argument("--use-fake-ui-for-media-stream") self.browser = Chrome(executable_path='Chrome-Driver/V83/chromedriver', chrome_options=self.opts) self.link = 'https://www.microsoft.com/en-in/microsoft-365/microsoft-teams/group-chat-software' self.x = 1600 self.y = 1600 self.sign_in = 'mectrl_main_trigger' self.login_id = 'i0116' self.password_id = 'i0118' self.btn_class = 'inline-block' self.popup_id = 'use-app-lnk' self.team_name_id = 'team-name-text' self.meeting_class = 'ts-sym ts-btn ts-btn-primary inset-border icons-call-jump-in ts-calling-join-button app-title-bar-button app-icons-fill-hover call-jump-in' def start_window(self): self.browser.set_window_size(self.x, self.y) self.browser.get(self.link) login_href = WebDriverWait(self.browser, 10).until( EC.presence_of_element_located((By.ID, self.sign_in))) login_href.click() def add_credentials(self): with open('assets/credentials.json') as json_data: credentials = json.load(json_data) email_field = WebDriverWait(self.browser, 10).until( EC.presence_of_element_located((By.ID, self.login_id))) email_field.send_keys(credentials['email'], Keys.ENTER) password_field = WebDriverWait(self.browser, 10).until( EC.presence_of_element_located((By.ID, self.password_id))) password_field.send_keys(credentials['password']) #### To find better method time.sleep(5) # DOM to load submit_btn = self.browser.find_element_by_class_name( self.btn_class).click() def popup_login(self): submit_btn = self.browser.find_element_by_class_name(self.btn_class) submit_btn.click() def popup_ad(self): web_app_btn = self.browser.find_element_by_class_name(self.popup_id) web_app_btn.click() def join_group(self, class_name): all_user_groups = WebDriverWait(self.browser, 20).until( EC.presence_of_element_located((By.CLASS_NAME, self.team_name_id))) all_user_groups = self.browser.find_elements_by_class_name( self.team_name_id) for group in range(0, len(all_user_groups)): if all_user_groups[group].text.lower() == class_name.lower(): all_user_groups[group].click() break def mute_audio(self): audio_btn = self.browser.find_element_by_css_selector( "toggle-button[data-tid='toggle-mute']>div>button") audio_is_on = audio_btn.get_attribute("aria-pressed") if audio_is_on == "true": audio_btn.click() def close_video(self): video_btn = self.browser.find_element_by_css_selector( "toggle-button[data-tid='toggle-video']>div>button") video_is_on = video_btn.get_attribute("aria-pressed") if video_is_on == "true": video_btn.click() def join_meeting(self): time.sleep(20) try: meeting_button = WebDriverWait(self.browser, 30).until( EC.element_to_be_clickable( (By.CSS_SELECTOR, "button[ng-click='ctrl.joinCall()']"))) except selenium.common.exceptions.TimeoutException: print( "Couldn't load the link or Maybe meeting has not been started yet ." ) else: time.sleep(20) meeting_button.click() time.sleep(20) self.mute_audio() self.close_video() join_button = WebDriverWait(self.browser, 30).until( EC.element_to_be_clickable( (By.CSS_SELECTOR, "div.flex-fill.input-section > div > div > button"))) join_button.click() def show_chat(self): background = self.browser.find_element_by_css_selector( 'div > div.video-stream-container') hover = ActionChains(self.browser).move_to_element(background) hover.perform() chat_btn = self.browser.find_element_by_css_selector( '#callingButtons-showMoreBtn > ng-include > svg') chat_btn.click() def getchats(self): pass def hang_call(self): hangup_btn = WebDriverWait(self.browser, 30).until( EC.presence_of_element_located( (By.CSS_SELECTOR, "button[data-tid='call-hangup']"))) time.sleep(50) # For a 50 minute class print(hangup_btn) hangup_btn.click()
class NewAllocationTestCase(StaticLiveServerTestCase): def setUp(self): self.browser = Chrome() self.browser.implicitly_wait(10) user = User.objects.create_user(username='******', name="Administrador Teste", email='*****@*****.**', password='******', is_staff=True, is_superuser=True) coordinator = Coordinator.objects.create(usp_number='1234567', user=user) teacher = Teacher.objects.create(usp_number='1234567', user=user) student_user = User.objects.create(username='******', name="Aluno Teste", email='*****@*****.**', password='******') student = Student.objects.create(usp_number='7983121', user=student_user) quarter_discipline = Discipline.objects.create( modality='QDR', start_date=date.today() - timedelta(8), end_date=date.today() - timedelta(1)) quarter_discipline.users.add(student_user) semester_discipline = Discipline.objects.create( modality='SMS', start_date=date.today() - timedelta(8), end_date=date.today() - timedelta(1)) semester_discipline.users.add(student_user) event = Event.objects.create(type='THR', quarter_discipline=quarter_discipline, semester_discipline=semester_discipline, selected_date=date.today() + timedelta(1), start_time=time(0, 0, 0), end_time=time(23, 59, 59)) workgroup = Workgroup.objects.create(modality='QDR', identifier='1', title='Grupo de Teste', advisor=user) workgroup.students.add(student_user) room = Room.objects.create(block='A', floor='T', identifier='ST') self.browser.get('%s%s' % (self.live_server_url, reverse_lazy("login"))) self.browser.find_element_by_id('id_username').send_keys( '*****@*****.**') self.browser.find_element_by_id('id_password').send_keys('1234567') self.browser.find_element_by_id('login').click() def tearDown(self): self.browser.quit() def test_allocation_sign_up_fire(self): self.browser.find_element_by_id('coordinator-events').click() self.browser.find_element_by_id('manage-allocations-1').click() self.browser.find_element_by_id('new-allocation').click() self.browser.find_element_by_id('id_start_time').send_keys( time(0, 0).strftime('%H:%M')) self.browser.find_element_by_id('id_end_time').send_keys( time(23, 59).strftime('%H:%M')) self.browser.find_element_by_class_name( "select2-search__field").send_keys('administrador') self.browser.find_element_by_class_name( "select2-results__option").click() self.browser.find_element_by_id( "select2-id_workgroup-container").send_keys('C1') self.browser.find_element_by_class_name( "select2-results__option").click() self.browser.find_element_by_id( "select2-id_selected_room-container").send_keys('A') self.browser.find_element_by_class_name( "select2-results__option").click() self.browser.execute_script( "window.scrollTo(0, document.body.scrollHeight);") self.browser.find_element_by_id('submit').click() self.assertIn( ('%s%s' % (self.live_server_url, reverse_lazy("allocations_list", kwargs={'event_id': 1}))), self.browser.current_url) self.assertTrue( self.browser.find_element_by_xpath( "//div[@class='card']/div[@class='card-header info-color lighten-1 white-text'][contains(.,'Grupo C1 - Sala AT-ST')]" ))
def multiplier(a, b): return a * b opts = Options() opts.headless = False assert opts.set_headless browser = Chrome( executable_path='C:/Users/spi59/Documents/Drivers/chromedriver.exe', options=opts) browser.get('https://freerice.com/categories') time.sleep(0.5) cookie_monster = browser.find_element_by_class_name('as-js-optin') cookie_monster.click() time.sleep(0.5) categories = browser.find_elements_by_class_name('category-item') categories[21].click() time.sleep(2) for rice_donator in range(1, 1000): try: element = WebDriverWait(browser, 50).until( EC.presence_of_element_located( (By.CLASS_NAME, 'card-button.fade-appear-done.fade-enter-done'))) except: print(rice_donator * 10)
import time from selenium.webdriver import Chrome #셀레늄을 이용한 자동로그인 chromeDriver = 'c:\\temp\\chromedriver.exe' driver = Chrome(chromeDriver) driver.get('https://login.coupang.com/login/login.pang') time.sleep(3) input_login = driver.find_element_by_id('login-email-input') input_login.send_keys('*****@*****.**') time.sleep(3) input_pw = driver.find_element_by_id("login-password-input") input_pw.send_keys('1111') time.sleep(3) btn = driver.find_element_by_class_name('login__button') btn.click() time.sleep(3) driver.quit()
class XiechengSpider(object): def __init__(self): options = Options() options.add_argument('--headless') # 使用chorme的selenium模拟操作 self.chrome = Chrome(executable_path='/usr/local/bin/chromedriver', options=options) self.chrome.get( 'https://huodong.ctrip.com/things-to-do/list?pagetype=city&citytype=dt&keyword=%E6%8F%AD%E9%98%B3&pshowcode=Ticket2') # time.sleep(3) self.page = 1 self.headers = { 'cookie': 'Session=SmartLinkCode=U155952&SmartLinkKeyWord=&SmartLinkQuary=&SmartLinkHost=&SmartLinkLanguage=zh; _RSG=KqK3qETfa143fOqQl4rFXB; _RDG=282f24100640c82731283334fcc3364464; _RGUID=4064a5d3-b40d-4d14-b84f-d44bdad18a43; Union=OUID=index&AllianceID=4897&SID=155952&SourceID=&createtime=1600831032&Expires=1601435831593; MKT_OrderClick=ASID=4897155952&AID=4897&CSID=155952&OUID=index&CT=1600831031597&CURL=https%3A%2F%2Fwww.ctrip.com%2F%3Fsid%3D155952%26allianceid%3D4897%26ouid%3Dindex&VAL={"pc_vid":"1600831028743.427gzc"}; MKT_CKID=1600831031634.5olt5.f6pj; MKT_CKID_LMT=1600831031635; _ga=GA1.2.248639397.1600831032; _gid=GA1.2.1954297618.1600831032; MKT_Pagesource=PC; GUID=09031031210931119554; nfes_isSupportWebP=1; appFloatCnt=1; nfes_isSupportWebP=1; ASP.NET_SessionSvc=MTAuNjAuMzUuMTQ2fDkwOTB8amlucWlhb3xkZWZhdWx0fDE1ODkwMDMyMjQ5NDI; U_TICKET_SELECTED_DISTRICT_CITY=%7B%22value%22%3A%7B%22districtid%22%3A%22835%22%2C%22districtname%22%3A%22%E6%8F%AD%E9%98%B3%22%2C%22isOversea%22%3Anull%7D%2C%22createTime%22%3A1600847243848%2C%22updateDate%22%3A1600847243848%7D; _RF1=113.118.204.141; _gat=1; _pd=%7B%22r%22%3A1%2C%22d%22%3A614%2C%22_d%22%3A613%2C%22p%22%3A634%2C%22_p%22%3A20%2C%22o%22%3A655%2C%22_o%22%3A21%2C%22s%22%3A668%2C%22_s%22%3A13%7D; _bfa=1.1600831028743.427gzc.1.1600843833503.1600847244099.5.49.10650038368; _bfs=1.30; _bfi=p1%3D290510%26p2%3D290510%26v1%3D49%26v2%3D48; _jzqco=%7C%7C%7C%7C1600831031803%7C1.1555887407.1600831031625.1600849509140.1600849530503.1600849509140.1600849530503.0.0.0.19.19; __zpspc=9.4.1600846262.1600849530.14%232%7Cwww.baidu.com%7C%7C%7C%25E6%2590%25BA%25E7%25A8%258B%7C%23', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36', } # 储存城市列表 self.cityList = {} # 储存携程里的景点门票数据 self.spotsInfo = {} self.name = '携程旅游' # 标记是否已完成查询 self.done = False def get_url(self,keyword): ''' 获得景点url :param keyword: :return: ''' while True: content = self.chrome.find_element_by_class_name('right-content-list').get_attribute('innerHTML') cons = re.findall(r'href="(.*?)" title="(.*?)"', content) # print(content) for con in cons: self.detail_url = 'https:' + con[0] self.title = con[1] result = fuzz.token_sort_ratio(self.title, keyword) if result <= 20: # print(self.title,result) continue # print(self.detail_url, self.title) self.get_detail() return # print(self.spotsInfo) pagenums = self.chrome.find_elements_by_class_name('pagination_div_content') i = self.chrome.find_elements_by_class_name('item_wrap_font') # print(len(i)) totalpage = -1; # print(len(pagenums)) for pagenum in pagenums: totalpage = pagenum.find_element_by_class_name('item_wrap_font').text # print(totalpage) totalpagenum = int(totalpage) self.page = self.page + 1 # print(totalpage) # icon = self.chrome.find_element_by_css_selector('.u_icon_ttd.undefined.u_icon_enArrowforward') # icon = self.chrome.find_element_by_class_name('u_icon_ttd') # print(type(icon)) # icon.click() if totalpagenum<0: self.page = self.page-1 continue if self.page > totalpagenum: break self.chrome.find_element_by_class_name('pagination_div_jump_input').click() self.chrome.find_element_by_class_name('pagination_div_jump_input').send_keys(str(self.page)) self.chrome.find_element_by_class_name('pagination_div_button').click() # time.sleep(1) def search_spots(self, keyword, city): ''' 核心方法,获取景点数据 :param keyword: :param city: :return: ''' self.spotsInfo = {} try: id = self.getCityID(city) Ncity = city.replace('市', '').replace('县', '').replace('省', '') #url = 'https://huodong.ctrip.com/things-to-do/list?pagetype=city&citytype=dt&keyword=%E6%8F%AD%E9%98%B3&pshowcode=Ticket2' url = 'https://huodong.ctrip.com/things-to-do/list?pagetype=city&citytype=dt&keyword=' + Ncity+keyword + '&id=' + str(self.getCityID(city)) + '&pshowcode=Ticket2' self.chrome.get(url) self.get_url(keyword) except: pass self.done = True def getCityID(self,city): ''' 获得城市ID :param city: :return: ''' if len(self.cityList) == 0: self.getCityList() result = process.extractBests(city, self.cityList.keys(), score_cutoff=80, limit=1) return self.cityList[result[0][0]] def getCityList(self): ''' 爬取所有城市 :return: ''' html = self.getHtml('https://piao.ctrip.com/ticket/?districtid=1') soup = BS(html, "html.parser") script = soup.find_all('script') i = 0 for s in script: if i == 4: #cities = json.loads(s.text) #break text = s.text.replace(' ','').replace('\n','') text = text[text.find('window.__INITIAL_STATE__')+25:text.find('window.__APP_SETTINGS__=')] cities = json.loads(text) city = cities['citiesData']['domesticcity']['cities'] for c in city: for ci in c['cities']: self.cityList[ci['name']] = ci['id'] break i = i+1 def to_unicode(self, string): ''' unicode编码 :param string: :return: ''' ret = '' for v in string: ret = ret + hex(ord(v)).upper().replace('0X', '\\u') return ret def get_detail(self): ''' 进入景点详细页面,爬取详细信息 :return: ''' # detail_con = requests.get(self.detail_url, verify=False, headers=self.headers).text # # time.sleep(2) # '''使用正则获取信息''' # self.rank = ''.join(re.findall(r'rankText">(.*?)<', detail_con, re.DOTALL)) # self.address = ''.join(re.findall(r'景点地址</p><p class="baseInfoText">(.*?)<', detail_con, re.DOTALL)) # self.mobile = ''.join(re.findall(r'官方电话</p><p class="baseInfoText">(.*?)<', detail_con, re.DOTALL)) # print(self.rank, self.address, self.mobile) # '''使用xpath获取信息''' # ret = etree.HTML(detail_con) # desc_cons = ret.xpath('//div[@class="detailModule normalModule"]//div[@class="moduleContent"]') # desc_titles = ret.xpath('//div[@class="detailModule normalModule"]//div[@class="moduleTitle"]') # desc_list = [] # desc_title_list = [] # for d in desc_cons: # des = ''.join(d.xpath('.//text()')) # desc_list.append(des) # for d in desc_titles: # des = ''.join(d.xpath('.//text()')) # desc_title_list.append(des) # desc_dict = dict(zip(desc_title_list, desc_list)) # print(desc_dict) # '''获取图片链接''' # img_list = [] # imgs = re.findall(r'background-image:url\((.*?)\)', detail_con, re.DOTALL) # for img in imgs: # '''匹配到的同一张图片会有两种尺寸,我们只要大图,所以把尺寸为521*391的匹配出来即可''' # image = re.search(r'521_391', img) # if image: # img_list.append(img) # print(img_list) self.get_ticket() def get_ticket(self): ''' 获取景点门票信息 :return: ''' id = self.detail_url.split('/')[-1] # print(id) ticket_url = f'https://piao.ctrip.com/ticket/dest/{id}?onlyContent=true&onlyShelf=true' # print(ticket_url) ticket_res = requests.get(ticket_url, verify=False, headers=self.headers).text # time.sleep(1) ticket_res = ticket_res.replace('\n','').replace(' ','') ticket_res = ticket_res[ticket_res.find('window.__INITIAL_STATE__')+25:ticket_res.find('window.__APP_SETTINGS__')] info = json.loads(ticket_res) ticketinfos = info['detailInfo']['ressHash'] slist = {} for ticketinfo in ticketinfos.values(): '''解析字典数据''' title = ticketinfo['name'] price = ticketinfo['price'] type = ticketinfo['saleunitinfo']['propleproperty'] fromw = '携程旅游 '+ticketinfo['brandname'] '''数据合并''' slist.setdefault(type, []) slist[type].append( {'name': title, 'type': type, 'price': price, 'url': self.detail_url, 'buy': '', 'from': fromw, 'isReturnable': '', 'bookTime': '', 'outTime': '', 'useTime': '', 'discription': ''}) self.spotsInfo[self.title] = slist # ticket_ret = etree.HTML(ticket_res) # ticket = ticket_ret.xpath('//table[@class="ticket-table"]//div[@class="ttd-fs-18"]/text()') # price = ticket_ret.xpath( # '//table[@class="ticket-table"]//td[@class="td-price"]//strong[@class="ttd-fs-24"]/text()') # print(ticket) # print(price) # '''拿到的列表里可能存在不确定数量的空值,所以这里用while True把空值全部删除,这样才可以确保门票种类与价格正确对应上''' # while True: # try: # ticket.remove(' ') # except: # break # while True: # try: # price.remove(' ') # except: # break # ''' # 这里多一个if判断是因为我发现有些详情页即便拿到门票信息并剔除掉空值之后仍然存在无法对应的问题,原因是网页规则有变动, # 所以一旦出现这种情况需要使用新的匹配规则,否则会数据会出错(不会报错,但信息对应会错误) # ''' # if len(ticket) != len(price): # ticket = ticket_ret.xpath( # '//table[@class="ticket-table"]/tbody[@class="tkt-bg-gray"]//a[@class="ticket-title "]/text()') # price = ticket_ret.xpath('//table[@class="ticket-table"]//strong[@class="ttd-fs-24"]/text()') # while True: # try: # ticket.remove(' ') # except: # break # while True: # try: # price.remove(' ') # except: # break # print(ticket) # print(price) # ticket_dict = dict(zip(ticket, price)) # print(ticket_dict) def getHtml(self, url): ''' 获得网页的text内容 :param url: :return: ''' try: self.headers['User-Agent'] = mfu.UserAgent().random() self.headers['Cookie'] = '_abtest_userid=128990d6-ec49-40cb-b25d-fc8452c3d8a1; _ga=GA1.2.179469688.1614864484; MKT_CKID=1614864484805.yk39i.z4vz; _RSG=r2q6zDxpRN1sq9uB0iKSXA; _RGUID=15dbcfb3-7d1b-40b5-a85c-52c00be09d36; _RDG=287a9b7a6689de2a903820b27712075311; ibulanguage=CN; ibulocale=zh_cn; cookiePricesDisplayed=CNY; _gid=GA1.2.435881939.1618908478; Union=AllianceID=5376&SID=130860&OUID=&createtime=1618908478&Expires=1619513277722; Session=smartlinkcode=U130860&smartlinklanguage=zh&SmartLinkKeyWord=&SmartLinkQuary=&SmartLinkHost=; MKT_CKID_LMT=1618908477918; MKT_Pagesource=PC; GUID=09031023413294183609; __utma=1.179469688.1614864484.1618917016.1618917016.1; __utmc=1; __utmz=1.1618917016.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _RF1=223.99.16.76; _jzqco=%7C%7C%7C%7C1618908478117%7C1.94961373.1614864484796.1618917699648.1618917762688.1618917699648.1618917762688.undefined.0.0.7.7; __zpspc=9.3.1618917018.1618917762.3%233%7Cwww.so.com%7C%7C%7C%7C%23; _bfa=1.1614864482896.3146qi.1.1618908639843.1618917016691.3.15.10650038368; _bfs=1.5; _bfi=p1%3D10650034475%26p2%3D10650034475%26v1%3D15%26v2%3D14; appFloatCnt=4; U_TICKET_SELECTED_DISTRICT_CITY={%22value%22:{%22districtid%22:1%2C%22districtname%22:%22%E5%8C%97%E4%BA%AC%22%2C%22isoversea%22:false%2C%22stage%22:%22selectedCity%22}%2C%22updateDate%22:1618919559657%2C%22createTime%22:1618919434692}' #self.headers['Host'] = 'www.tuniu.com' self.headers['Referer'] = 'https://piao.ctrip.com/ticket' resp = requests.get(url, headers=self.headers) resp.raise_for_status() resp.encoding = 'utf-8' return resp.text except: return ""
from selenium.webdriver import Chrome from selenium.webdriver.support.ui import Select import time chrome = Chrome() url = 'https://statusinvest.com.br/acoes/busca-avancada' chrome.get(url) buscar = chrome.find_element_by_class_name('find') buscar.click() time.sleep(2) # chrome.find_element_by_css_selector('dropdown-content.select-dropdown li::nth-of-type(3)').click() select = chrome.find_element_by_class_name('select-dropdown'). select.send_keys('TODOS')
class Question_Parser: def __init__(self): self.driver = Chrome() # self.driver.implicitly_wait(20) # Login self.login_option = False self.login_url = 'https://leetcode.com/accounts/login/' self.login_account_file = "" # Problem self.problem_url = sys.argv[1] if not self.problem_url: self.problem_url = input() self.driver.get(self.problem_url) # Stupid Leetcode CN time.sleep(3) self.driver.get( self.driver.current_url.replace("leetcode-cn", "leetcode")) while "leetcode-cn" in self.driver.current_url: time.sleep(2) # title is obtained by get_title function self.title = "" # title is obtained by get_question_description function self.problem = "" # title is obtained by get_starter_code function self.starter_code = "" def login(self): with open(login_account_file) as f: USERNAME = f.readline() PASSWORD = f.readline() if USERNAME and PASSWORD: self.driver.get(self.login_url) username_field = self.driver.find_elements_by_class_name( "input__2W4f")[0] password_field = self.driver.find_elements_by_class_name( "input__2W4f ")[1] username_field.send_keys(USERNAME) password_field.send_keys(PASSWORD) signin = self.driver.find_element_by_class_name( "btn-content__lOBM") flag = True while (flag): try: time.sleep(0.5) signin.click() flag = False except WebDriverException: time.sleep(0.5) time.sleep(1) if (self.driver.current_url != self.login_url): print('Login succeeded') return True else: print('Login failed - check username and password') return False print('Need to provide a username and password!') return False def get_title(self): success = False try_times = 0 while not success: try_times += 1 try: self.title = self.driver.find_element_by_class_name( "css-v3d350").text success = True except WebDriverException: time.sleep(0.5) if try_times == 5: self.driver.get(self.problem_url) # Stupid Leetcode CN time.sleep(3) self.driver.get( self.driver.current_url.replace("leetcode-cn", "leetcode")) while "leetcode-cn" in self.driver.current_url: time.sleep(2) elif try_times > 10: print("Parsing title fail") return False return success def get_question_description(self): success = False try_times = 0 doc_mark = '"""' new_line_mark = "\n" while not success: try_times += 1 try: content = self.driver.find_element_by_class_name( "question-content__JfgR").text success = True except WebDriverException: time.sleep(0.5) if try_times > 10: print("Parsing question content fail") return False self.problem = doc_mark + new_line_mark + content + new_line_mark + doc_mark + new_line_mark return success def get_starter_code(self): success = False try_times = 0 doc_mark = '"""' new_line_mark = "\n" while not success: try_times += 1 try: # Obtain starter code language_dropdown = '//*[@id="app"]/div/div[3]/div/div/div[3]/div/div[1]/div/div[1]/div[1]/div' time.sleep(1) python3_option = '/html/body/div[7]/div/div/div/ul/li[4]' self.driver.find_element_by_xpath(language_dropdown).click() self.driver.find_element_by_xpath(python3_option).click() time.sleep(1) success = True except WebDriverException: time.sleep(0.5) if try_times > 10: print("Parsing starter code fail") return False self.starter_code = "" for line in self.driver.find_elements_by_class_name("CodeMirror-line"): self.starter_code += line.text + "\n" return success def create_file(self): try: if self.get_title(): f_name = self.title f_name = f_name.replace('.', '') f_name = f_name.replace(' ', '_') f_name += ".py" f = open(f_name, "w") if self.get_question_description() and self.get_starter_code(): f.write(self.problem + "\n" + self.starter_code) print(f_name, " Created") except IOError as e: print(e) finally: f.close() self.driver.close() def run(self): self.create_file()
def start_selenium_for_fssp(last_name, first_name, middle_name, birthday): browser = Chrome(path_webdriver) # URL сайта с которого будет брать информацию browser.get('https://fssprus.ru') # У меня сразу открывлась окно, которое закрывала основной сайт, это окно я закрываю browser.find_element_by_class_name('tingle-modal__close').click() # нажимаю кнопку расширенный поиск, чтобы появились поля для ввода данных browser.find_element_by_class_name('main-form__toggle-open').click() # ввожу данные browser.find_element_by_name('is[last_name]').send_keys(last_name) browser.find_element_by_name('is[first_name]').send_keys(first_name) browser.find_element_by_name('is[patronymic]').send_keys(middle_name) browser.find_element_by_name('is[date]').send_keys(birthday) # Нажимаю поиск browser.find_element_by_class_name( 'main-form__btns').find_element_by_class_name('btn-primary').click() # Делаю паузу, чтобы сайт успел прогрузиться time.sleep(3) # функция отправки изображения капчи стороннему сервис для распознования и ввод полученных данных def capcha(): time.sleep(3) # находим изображение капчи x = browser.find_element_by_id('capchaVisual') # получаем кодировку base64 изображения image_code_base64 = x.get_attribute('src') image_code_base64 = image_code_base64.replace( 'data:image/jpeg;base64,', '') # декодируем изображение dec = base64.b64decode(image_code_base64) # Сохраняем изображение filename = 'some_image.jpg' with open(filename, 'wb') as f: f.write(dec) # создем цикл, который будет отправялть запрос, до тех пор пока не получим подтвержения получения изображения get_task_id = False while not get_task_id: r = requests.post('https://rucaptcha.com/in.php', data={ 'key': API_KEY, 'method': 'post', 'lang': 'ru', 'json': 1 }, files={'file': open(filename, 'rb')}) # Если получили подтвержение, то выходим из цикла if r.json()['status'] == 1: get_task_id = True # Если сервис перегружен, ждем 5 сек и пробуем заново else: time.sleep(5) capcha_ready = False # создаем цикл, который будет получать ответ while not capcha_ready: r1 = requests.get( 'https://rucaptcha.com/res.php?key={}&action={}&id={}&json=1'. format(API_KEY, 'get', r.json()['request'])) # Если капча еще не разгадана, ждем 3 сек и пробуем еще раз if r1.json()['request'] == 'CAPCHA_NOT_READY': time.sleep(3) # если разгадана, то выходим из цикла else: capcha_ready = True # получаем текст капчи text_for_capcha = r1.json()['request'] # вводим текст browser.find_element_by_id("captcha-popup-code").send_keys( text_for_capcha) # нажимаем отправить капчу browser.find_element_by_id("ncapcha-submit").click() time.sleep(3) capcha_passed = False # создаю цикл, который запускает функцию отправки капчи и ввод ответа while not capcha_passed: # Если капча по какой то причине не пройдена, то функция запускается заново try: if browser.find_element_by_class_name('popup-wrapper'): capcha() # Если капча успешно пройдена, цикл заканчивается except: capcha_passed = True # Я создал список с заголовками исполнительных производств # и в этот же список буду добавять найденые исполнительные производства list_all_enforcements_proceeding = [[ 'Должник (физ. лицо: ФИО, дата и место рождения; юр. лицо: наименование, юр. адрес)', 'Исполнительное производство (номер, дата возбуждения)', 'Реквизиты исполнительного документа (вид, дата принятия органом, номер, наименование органа, ' 'выдавшего исполнительный документ)', 'Дата, причина окончания или прекращения ИП (статья, часть, пункт основания)', 'Сервис', 'Предмет исполнения, сумма непогашенной задолженности', 'Отдел судебных приставов (наименование, адрес)', 'Судебный пристав-исполнитель' ]] # Если исполнительных производств нет то завершаем проверку данного физ.лица try: browser.find_element_by_class_name('b-search-message__text').text # закрываем браузер browser.close() return True # Если исполнительные производства есть, то запишеи их в список и вернем его except: # Обработка таблицы и получение строк с исполнительными производствами table = browser.find_element_by_class_name('iss') all_rows = table.find_elements_by_tag_name('tr') for row in all_rows: list_enforcement_proceeding = [] for block in row.find_elements_by_tag_name('td'): list_enforcement_proceeding.append(block.text) list_all_enforcements_proceeding.append( list_enforcement_proceeding) browser.close() return list_all_enforcements_proceeding
login_form.send_keys(login) print('Ввожу пароль...') pass_form = browser.find_element_by_id('index_pass') pass_form.send_keys(password) print('Вхожу...') browser.find_element_by_id('index_login_button').click() sleep(5) print('Захожу в сообщения...') browser.find_element_by_id('l_msg').click() sleep(5) print('Выбираю диалог с IQ Bot...') browser.find_element_by_class_name('_im_dialog_-181604561').click() now = 0 while now < makeCount: sleep(5) print('Нажимаю кнопку Начать...') browser.find_element_by_class_name('Button--positive').click() sleep(5) print('Нажимаю кнопку С ботом...') browser.find_element_by_class_name('Button--secondary').click() sleep(5) print('Перехожу по ссылке VK Coin...') links = browser.find_elements_by_partial_link_text("vk.com/coin") linkscount = len(links)
def get_last_page_number(driver: Chrome) -> int: navi_panel = driver.find_element_by_class_name('ul_navi') last_page_link = navi_panel.find_elements_by_tag_name('a')[-2] return int(last_page_link.text)
from selenium.webdriver import Chrome import time from 練習 import secret driver = Chrome("./chromedriver") # 加./是為了讓程式知道driver是在專案底下,沒加的話就會去找環境變數 driver.get("https://www.facebook.com") # BeautifulSoup: find, find_all # selenium: find_element, find_elements driver.find_element_by_id("email").send_keys(secret.username) driver.find_element_by_id("pass").send_keys(secret.password) driver.find_element_by_id("loginbutton").click() # 有些會需要輸入驗證碼 # s = input("請輸入安全碼") # driver.find_element_by_id("approvals_code").send_keys(s) # driver.find_element_by_id("checkpointSubmitButton").click() # # time.sleep(1) # driver.find_element_by_id("checkpointSubmitButton").click() time.sleep(5) post = driver.find_element_by_class_name("userContent") # 紙條: print(post.text) time.sleep(3) driver.close()
import datetime from bs4 import BeautifulSoup driver = Chrome() driver.get('https://web.whatsapp.com/') #Code to get the list of the contacts in the group :) driver.find_element_by_xpath('//*[@title="Leaf SongDiscoveryHindi 3"]').click() sourcer=(driver.page_source).encode('utf-8') soup=BeautifulSoup(sourcer,features='html.parser') #CODE FOR GETTING ALL THE CONTACTS IN THE GROUP p=driver.find_element_by_class_name('_2y17h') q=p.find_element_by_xpath('div[2]/div[2]/span') l=q.text l=l.split(',') for i in range(len(l)): l[i]=l[i].replace(" ","") l[i]=l[i][3:] popper=[] for k,v in names.items(): if k not in l: popper.append(k) else: pass for item in popper:
browser = Chrome(CHROME_DRIVER_PATH) browser.get(VENMO_URL) if os.path.isfile('cookies.pkl'): # there is a cookie file cookies = pickle.load(open("cookies.pkl", "rb")) for cookie in cookies: browser.add_cookie(cookie) # click on the sign in link signin_link = browser.find_element_by_link_text("Sign in") signin_link.click() # enter the email and password and send it username_box = browser.find_element_by_class_name("email-username-phone") username_box.send_keys(venmoInfo.my_u) password_box = browser.find_element_by_class_name("password") password_box.send_keys(venmoInfo.my_p) send_button = browser.find_element_by_class_name("login") send_button.click() # enter the person's name you want to pay time.sleep(5) name_box = browser.find_element_by_class_name("onebox_prefill") name_box.click() name_text_box = browser.find_element_by_class_name("paddingUnifier") name_text_box.send_keys(venmoInfo.payee_name) name_text_box.send_keys(Keys.ENTER) payment_box = browser.find_element_by_class_name("mainTextBox") time.sleep(1)
'tutsplus/%s' % sys.argv[3] ) if not os.path.exists(download_path): os.makedirs(download_path) username = sys.argv[1] password = getpass() browser = Chrome() try: browser.maximize_window() browser.get('http://tutsplus.com/sign_in') login_form = browser.find_element_by_class_name('sign-in') login_form.find_element_by_name('session[login]').send_keys(username) login_form.find_element_by_name('session[password]').send_keys(password) login_form.find_element_by_tag_name('button').submit() if browser.current_url == 'https://tutsplus.com/sessions': browser.quit() print 'Incorrect email_address or password.' sys.exit() def video_links(page_link): while True: browser.get(page_link)