def search_hits(list_words, close_driver=True): global driver try: driver.get('https://www.google.co.jp/') # if driver is still open, reuse it except: #options = ChromeOptions() #options.add_argument('--headless') # enable headless mode #driver = Chrome('/Users/Go/chromedriver', options=options) # on the first argument, set path to the chromedriver driver = Chrome('/Users/Go/chromedriver') driver.get('https://www.google.co.jp/') to_req = '' for idx, word in enumerate(list_words): # concatenate list_words into a String to_req += word if idx != len(list_words)-1: to_req += ' ' input_element = driver.find_element_by_name('q') # input the words and search input_element.send_keys(to_req) input_element.send_keys(Keys.RETURN) result = driver.find_elements_by_css_selector('div#resultStats')[0].text # 約 16,700,000 件 (0.21 秒) no_contain = driver.find_elements_by_css_selector('div.TXwUJf') # 含まれない:りんご if len(no_contain) > 0: result = -1 if close_driver: driver.quit() # terminate browser try: num_hits = int( result[2:].split(' ')[0].replace(',', '') ) # '約 16,700,000 件 (0.21 秒) ' -> '16700000' except: num_hits = 0 return num_hits
def process_request(self, request, spider): options = ChromeOptions() options.headless = True driver = Chrome(options=options) driver.implicitly_wait(20) driver.get('https://gaudiy.com/community_details/avJEInz3EXlxNXKMSWxR') time.sleep(0.3) input_element = driver.find_elements_by_css_selector('span:nth-child(5) > button > span > p')[0] if input_element: input_element.click() time.sleep(0.3) nft_element = driver.find_elements_by_css_selector('span.MuiTab-wrapper')[0] if nft_element: nft_element.click() source_element = driver.find_element_by_css_selector('label.MuiFormControlLabel-root') if source_element: # source_element.click() time.sleep(1.0) link = driver.find_elements_by_css_selector('button > div > p:nth-child(1)')[-2] driver.execute_script("arguments[0].scrollIntoView(true);", link) time.sleep(0.3) while link != driver.find_elements_by_css_selector('button > div > p:nth-child(1)')[-2]: link = driver.find_elements_by_css_selector('button > div > p:nth-child(1)')[-2] driver.execute_script("arguments[0].scrollIntoView(true);", link) time.sleep(0.3) return HtmlResponse( driver.current_url, body=driver.page_source, encoding='utf-8', request=request, ) time.sleep(0.5) driver.quit()
def downloadSongs(searches): # Chrome Driver setup. driver = Chrome() driver.implicitly_wait(10) # Perform login. (function hidden in tools file) login(driver, loginUrl) driver.find_element_by_id('nav_menu_19_trigger').click() WebDriverWait(driver, 10).until(EC.title_contains('Ignition3')) # Close the popup window which appears. (function hidden in tools file) performAntiBotCounterMeasures(driver) # Click the downloads sorter twice. (function hidden in tools file) sortSongsByMostDownloads(driver) # Perform our search(es) for searchTerms in searches: searchForTrack(driver, searchTerms) songRows = driver.find_elements_by_css_selector('tr.odd, tr.even') empty = driver.find_elements_by_css_selector('td.dataTables_empty') if not empty: # Open the context menu and ctrl+click to download. downloadSong(driver, songRows[0]) # It throws an exception here because then the browser will remain open for some time, allowing you to download the files. raise Exception('Finished. You\'ll have to manually download the files now and move them to the appropriate location.')
def get_info(self, link): chrome_options = ChromeOptions() chrome_options.add_argument("--headless") chrome = Chrome(chrome_options=chrome_options) chrome.get(link) # get the links to the chapters links = chrome.find_elements_by_css_selector( "[class^=chapter-title] a") links.reverse() chapter_urls = list(map(lambda a: a.get_attribute("href"), links)) # get the chapter numbers chapter_numbers = chrome.find_elements_by_css_selector( "[class^=chapter-title] em") chapter_numbers.reverse() sanitized_numbers = list( map( lambda nr: ''.join(c for c in nr.text if c.isdigit() or c == "."), chapter_numbers)) title = chrome.find_element_by_css_selector(".widget-title") print(chapter_numbers[0].text) return { "manga_title": title.text, "manga_chapter_names": sanitized_numbers, "manga_chapter_links": chapter_urls }
def test_output(browser: Chrome, enable_percy=False): """测试输出:: run template.basic_output() template.background_output() # 或者 await template.coro_background_output() hold() """ time.sleep(1) # 等待输出完毕 # get focus browser.find_element_by_tag_name('body').click() time.sleep(1) browser.execute_script( '$("html, body").scrollTop( $(document).height()+100);') time.sleep(0.5) enable_percy and percySnapshot(browser=browser, name='begin output') tab_btns = browser.find_elements_by_css_selector( '#pywebio-scope-table_cell_buttons button') for btn in tab_btns: time.sleep(0.5) browser.execute_script("arguments[0].click();", btn) btns = browser.find_elements_by_css_selector( '#pywebio-scope-put_buttons button') for btn in btns: time.sleep(0.5) browser.execute_script("arguments[0].click();", btn) # 滚动窗口 btns = browser.find_elements_by_css_selector( '#pywebio-scope-scroll_basis_btns button') for btn in btns: time.sleep(1) browser.execute_script("arguments[0].click();", btn) time.sleep(1) browser.execute_script( '$("html, body").scrollTop( $(document).height()+100);') time.sleep(0.5) enable_percy and percySnapshot(browser=browser, name='basic output') # popup btn = browser.find_element_by_css_selector( '#pywebio-scope-popup_btn button') browser.execute_script("arguments[0].click();", btn) time.sleep(1) enable_percy and percySnapshot(browser=browser, name='popup') browser.execute_script("$('.modal').modal('hide');")
def get_marks(b: Chrome): m_marks = {} cur_ue = 0 cur_mod = 0 entries = b.find_elements_by_css_selector('table.notes_bulletin tbody tr') for e in entries: e_type = e.get_attribute("class").split("_")[-1] if e_type == "ue": cur_ue = e.find_element_by_css_selector("td:first-child").text m_marks[cur_ue] = { "coeff": e.find_element_by_css_selector("td:last-child").text, "modules": {} } elif e_type == "mod": cur_mod = e.find_element_by_css_selector("td:nth-child(3)").text m_marks[cur_ue]["modules"][cur_mod] = { "coeff": e.find_element_by_css_selector("td:last-child").text, "notes": [] } else: mark = e.find_element_by_css_selector("td.note").text if mark == 'ABS': mark = "0" if mark != 'EXC' and mark != 'NP': m_marks[cur_ue]["modules"][cur_mod]["notes"].append([ mark, e.find_element_by_css_selector("td:last-child").text[1:-1] ]) return m_marks
def scrape_top(results_num): url = 'http://www.popvortex.com/music/charts/top-100-songs.php' chrome_path = 'D:\Documents\Python\OCRtunes Project\chromedriver79.exe' browser = Chrome(chrome_path) html = browser.get(url) parent = browser.find_elements_by_css_selector(Locators.song_parent) rows_to_add = [] for song in range(int(results_num)): rows_to_add.append((parent[song].find_element_by_css_selector( Locators.song_artist).text.lower(), parent[song].find_element_by_css_selector( Locators.song_title).text.lower(), parent[song].find_element_by_css_selector( Locators.song_genre).text.lower(), 0)) with openSQL('songs.db') as sql: sql.execute('SELECT name FROM songs') existing = sql.fetchall() for row in rows_to_add: found = False artist, title, genre, length = row for exist_name in existing: exist_name = exist_name[0] if exist_name == title: found = True if not found: sql.execute('INSERT INTO songs VALUES(?, ?, ?, ?)', (title, artist, length, genre)) print("Title:{}\nArtist:{}\nLength:{}\nGenre:{}".format( title, artist, length, genre)) browser.close()
def gather_thumbs(): opt = Options() opt.add_argument( 'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/74.0.3729.169 Safari/537.36') opt.add_argument("--disable-notifications") browser = Chrome(executable_path='chromedriver.exe', options=opt) browser.maximize_window() browser.get(URL) for _ in range(N): browser.find_element_by_tag_name('body').send_keys(Keys.END) sleep(1) try: browser.find_element_by_css_selector('input.mye4qd').click() sleep(1) except selenium.common.exceptions.ElementNotInteractableException: pass over = browser.find_element_by_css_selector('div.Yu2Dnd').text if over != '': break thumbs_urls = browser.find_elements_by_css_selector('a.mM5pbd') list_urls = [] for href in thumbs_urls: action_chains = ActionChains(browser) action_chains.context_click(href).perform() list_urls.append(href.get_attribute('href')) if len(list_urls) % 30 == 0: print('collecting links ...', len(list_urls)) browser.close() print('------------------------THUMBS collected. Total: ', len(list_urls)) return list_urls
def download_chapters(self, destination, meta, start, end): print("downloading chapters") print(destination, meta, start, end) manga_title = meta["manga_title"] manga_chapter_names = meta["manga_chapter_names"] if os.path.exists(destination): manga_dir = os.path.join(destination, manga_title) if not os.path.isfile(destination): if not os.path.exists(manga_dir): os.mkdir(os.path.join(destination, manga_dir)) chrome_options = ChromeOptions() # chrome_options.add_argument("--headless") chrome = Chrome(chrome_options=chrome_options) for index, chapter_link in enumerate( meta["manga_chapter_links"]): if index < start: continue print(chapter_link) chapter_dir = os.path.join(manga_dir, manga_chapter_names[index]) if not os.path.exists(chapter_dir): os.mkdir(chapter_dir) chrome.get(chapter_link) page_count = len( chrome.find_elements_by_css_selector( "#page-list option")) for ix in range(1, page_count + 1): print(f'getting { chapter_link }/{ ix }') chrome.get(f'{ chapter_link }/{ ix }') download_link = chrome.find_element_by_css_selector( '.scan-page').get_attribute("src") # remote_file = urllib.request.urlopen(download_link) # info = remote_file.info()['Content-Disposition'] # value, params = cgi.parse_header(info) # filename = params["filename"] urllib.request.urlretrieve( download_link, os.path.join(chapter_dir, f'{ix:03}.jpg')) if index >= end: break chrome.close() else: print("Path is a file!") else: print("Path already exists")
def filter_entries_from_ls_url(self): options = webdriver.ChromeOptions() options.add_argument('--ignore-certificate-errors') options.add_argument('--incognito') # This will not pop up the website window. options.add_argument('--headless') # You need to install chromedriver by running "sudo apt install chromium-chromedriver", # make sure this executable is included in your ${PATH}. wb_driver = Chrome(chrome_options=options) wb_driver.get(self.ls_url) # Check doc here: # https://www.selenium.dev/documentation/en/getting_started_with_webdriver/locating_elements/. # Selenium-python: https://selenium-python.readthedocs.io/locating-elements.html#locating-elements-by-class-name # https://devqa.io/selenium/. # Click a button and write text in website: https://pythonspot.com/selenium-click-button/. select_element = wb_driver.find_elements_by_css_selector( "select#cimmotool_status") all_options = select_element[0].find_elements_by_tag_name("option") all_options[0].click() # Allow some time for the page to load after clicking. time.sleep(1) page_source = wb_driver.page_source soup = BeautifulSoup(page_source, 'lxml') html_segments = soup.findAll("div", class_="row status2") return html_segments
def process_one_page(driver: webdriver.Chrome, index='', keyword='', contents=[]): main_tables = driver.find_elements_by_css_selector('.listmain>table') if not main_tables: print("没有找到分页,可能是没有数据") return data_table = main_tables[0] back_btn = data_table.find_element_by_tag_name('img') # get all of the rows in the table rows = data_table.find_elements_by_tag_name("tr") product_name = get_element_text(rows[5]) register_code = get_element_text(rows[1]) register_name = get_element_text(rows[2]) approve_date = get_element_text(rows[15]) effective_date = get_element_text(rows[16]) print("item {}=>{},{},{},{},{}".format(index, keyword, product_name, register_code, register_name, approve_date, effective_date)) if product_name: contents.append([ index, keyword, product_name, register_name, register_code, approve_date, effective_date ]) back_btn.click()
def test_searching_in_duckduckgo(): # Opening Chrome browser. The path to chromedriver # set automatically by the webdriver-manager library browser = Chrome(executable_path=ChromeDriverManager().install()) # Opening duckduckgo website browser.get('https://duckduckgo.com/') # Finding the search bar search_bar = browser.find_element_by_id('search_form_input_homepage') # Finding the search button (icon finder) search_button = browser.find_element_by_id('search_button_homepage') # Assertions that items are visible to the user assert search_bar.is_displayed() is True assert search_button.is_displayed() is True # Searching for Shiba Inu breed search_bar.send_keys('Shiba Inu') search_button.click() #Checking that any search result has a title "Shiba Inu" list = browser.find_elements_by_css_selector('.result__title') list_of_titles = [] for i in list: list_of_titles.append(i.text)
def get_server_binfile_names(webpage) -> list: """From the micropython esp32 download page, get the names of the available binary files :param webpage: URL of the webpage to inspect :returns: list of available bin filenames on the server """ print(f"Determining binfiles available on {webpage}") filenames = [] opts = Options() opts.headless = True assert opts.headless # Operate in headless mode, i.e. do not show a browser window browser = Chrome(options=opts) browser.get(webpage) references = browser.find_elements_by_css_selector('a') for ref in references: # From all the references, filter out the esp32 bin files if ref.text.startswith("esp32-") and ref.text.endswith(".bin"): filenames.append(ref.text) browser.close() return filenames
def insert_youpin_url(browser: Chrome): elements = browser.find_elements_by_css_selector('.pro-item.m-tag-a') print(f'当前页面共有{len(elements)}个商品') for element in elements: # 获取当前商品SKU编号 target_url = element.get_attribute('data-src') YouPinURL.get_or_create(url=target_url)
def _service_info_fetch_celebrities_by_sid(self, driver: webdriver.Chrome, sid: str) -> List: url = f"https://movie.douban.com/subject/{sid}/celebrities" try: driver.get(url=url) except TimeoutException: print(f"the page opening is timeout: {url}") pass elements = driver.find_elements_by_css_selector( css_selector="li.celebrity") def func_element_wrap(element) -> dict: cid = re.search( ".*/(\\d+)/$", element.find_element_by_css_selector("a").get_attribute( "href")).group(1) img = re.search( ".*url\\(\"(.*)\"\\).*", element.find_element_by_css_selector( "div.avatar").get_attribute("style")).group(1) name = element.find_element_by_css_selector( "span.name").text.split(" ")[0] role = "" try: role = element.find_element_by_css_selector( "span.role").text.split(" ")[0] except: pass return {"id": cid, "img": img, "name": name, "role": role} result = map(func_element_wrap, elements) result = filter(lambda x: x["role"] in ["导演", "配音", "演员"], list(result)) return list(result)
def scrape_page(driver: webdriver.Chrome) -> dict: """Parse detail page and return data as a dictionary""" labels = driver.find_elements_by_css_selector("label") data = [l.text for l in labels] # convert ordered list to dictionary it = iter(data) record = dict(zip(it, it)) return record
def get_er(user_name): browser = Chrome() r = requests.get('https://hypeauditor.com/report/%s/' % user_name) html = browser.page_source soup = BeautifulSoup(html, "html.parser") scores = browser.find_elements_by_css_selector( "div.kyb-user-info-v2__sub-title") er = [score.text for score in scores if score.text.endswith('%')][0] return er
def test_titles_count(): browser = Chrome(executable_path=ChromeDriverManager().install()) browser.get('https://en.wikipedia.org/wiki/Shiba_Inu') list_of_titles = browser.find_elements_by_css_selector('.mw-headline') assert len(list_of_titles) == 10 browser.quit()
def get_links(start_url): mfa_links = get_json_from_file('mfa_links.json') driver = Chrome(executable_path="C://Users//User/chromedriver.exe") # open page 1 and count pages driver.get(start_url + '1') time.sleep(5) try: num_pages = int( driver.find_elements_by_css_selector('div.paginates > ul > li') [-2].text) except IndexError: driver.get(start_url + '1') time.sleep(10) num_pages = int( driver.find_elements_by_css_selector('div.paginates > ul > li') [-2].text) # generate pages urls list pages = [start_url + str(i) for i in range(1, num_pages + 1)] # get links to texts from every page all_links = [] n = 0 for page in pages: print('Working with page', n, 'out of', num_pages) links = [] while len(links) == 0: driver.get(page) time.sleep(3) links = [ link.get_attribute('href') for link in driver.find_elements_by_css_selector('a.anons-title') ] print('Found', len(links), 'links on this page') all_links.extend(links) n += 1 # save scraped data to file category = re.compile('/(\w+)\?').findall(start_url)[0] mfa_links[category] = all_links update_json(mfa_links, 'mfa_links.json') driver.close()
def _service_keyword_partial_search(self, driver: webdriver.Chrome, keyword: str) -> List: """ 根据 keyword 来查询列表的部分信息 - 不会进入详情页从而减少响应时间 注意: 限制的数据条数永远为前 3 条 (减少非相关性数据) :Args: - driver - 委托代理往下传递的对象,拥有自动管理关闭的功能 - keyword - 给定的关键字字符串 :Returns: 返回 List[Dict] 类型的 JSON 字符串 """ # How many rows should be handled limits: int = 3 try: driver.get(url=f"https://www.douban.com/search?q={keyword}") except TimeoutException: print( f"the page opening is timeout: https://www.douban.com/search?q={keyword}" ) pass element_result_list: List = driver.find_elements_by_css_selector( css_selector="div.search-result div:nth-child(3) .result") result = filter(self._filter_func_movie_only, element_result_list) def func_item_wrap(element: WebElement) -> Dict: a: WebElement = element.find_element_by_css_selector( css_selector="div.content div h3 a") img = element.find_element_by_css_selector( css_selector="div.pic a img").get_attribute("src") name = a.text sid = re.search(".*sid: (\\d+),.*", a.get_attribute("onclick")).group(1) year = re.search( ".*/ (\\d+)$", element.find_element_by_css_selector( css_selector="div.content div div span.subject-cast").text ).group(1) rating = "0" try: element.find_element_by_css_selector( css_selector="div.content div div span.rating_nums").text except: pass return { "sid": sid, "name": name.strip(), "rating": rating.strip(), "img": img.strip(), "year": year } result = map(func_item_wrap, list(result)[:limits]) return list(result)
def map_zc_to_rep(start_end_tuple): """Map Zip Codes to U.S. Representatives""" print(f"Process {os.getpid()}: ***map_zc_to_rep***") start = start_end_tuple[0] end = start_end_tuple[1] shared_state_zipcode_data_map = start_end_tuple[2] sns = STATE_NAMES[start:end] browser = Chrome(executable_path=CHROME_DRIVER_PATH) print(f"Process {os.getpid()}: States to evaluate: {sns}") for state in sns: print(f"Process {os.getpid()}: State: {state}") if len(shared_state_zipcode_data_map[state]) != 0: for index, zip_code_city_pair in enumerate( shared_state_zipcode_data_map[state]): zip_code = zip_code_city_pair[0] browser.get(REP_URL) sleep(2.5) find_rep_input_field = browser.find_elements_by_css_selector( '#Find_Rep_by_Zipcode') find_rep_input_field[0].send_keys(zip_code) find_rep_button = browser.find_elements_by_css_selector( '.btn-success') find_rep_button[0].click() sleep(2.5) rep_page_anchor_tags = browser.find_elements_by_css_selector( '.rep > a') reps = "" for anchor_tag in rep_page_anchor_tags: if anchor_tag.text == '': continue print( f"Process {os.getpid()} Representative: {anchor_tag.text}" ) reps += anchor_tag.text + ", " # Remove when not debugging # break zip_code_city_pair.append(reps) shared_state_zipcode_data_map[state][ index] = zip_code_city_pair # Remove when not debugging # break print("DONE") browser.close()
def fintech_urls(): driver = Chrome('/Users/mingyupark/spyder/chromedriver') first = 'https://thefintechtimes.com/category/fintech/' driver.get(first) urls = [] for i in range(300): articles = driver.find_elements_by_css_selector('h2.entry-title > a') url = [] for i in articles: if 'Latest Fintech Jobs' not in i.text: url.append(i.get_attribute('href')) urls.extend(url) try: next_page = driver.find_elements_by_css_selector( 'a.next.page-numbers')[0] driver.get(next_page.get_attribute('href')) except IndexError: break driver.quit() return urls
def download_vip( driver: webdriver.Chrome, url: str, ): print("正在前往:" + url) driver.get(url) try: vip_btn = WebDriverWait(driver, 3).until( EC.presence_of_element_located((By.CLASS_NAME, "s-vip-text"))) except: return {'error': '账号未登录'} vip_btn = WebDriverWait(driver, 3).until( EC.presence_of_element_located( (By.CSS_SELECTOR, 'i.triangle-left + span'))) doc_types = driver.find_elements_by_css_selector('i.triangle-left + span') doc_type = '' for tt in doc_types: doc_type += tt.text print(doc_type) print(doc_type) if doc_type == "VIP专享文档": return {'type': 'private'} if doc_type == '共享文档': return {'type': 'public'} if doc_type == '付费文档': return {'type': 'pay'} if doc_type == "VIP免费文档": pass #下载 try: WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.CLASS_NAME, "btn-download")) # EC.element_to_be_clickable((By.CLASS_NAME,'btn-download')) ) except: return {'error': 'get download btn error!'} btn = driver.find_element_by_class_name('btn-download') driver.execute_script("arguments[0].click();", btn) try: confirm_ddl = WebDriverWait(driver, 6).until( EC.presence_of_element_located( (By.CLASS_NAME, "btn-diaolog-downdoc"))) except: return {'error': 'click confirm btn error!'} confirm_ddl = driver.find_element_by_class_name("btn-diaolog-downdoc") confirm_ddl.click() url_path = wenku_file_handle(url) if url_path: return {'download_path': url_path} else: return {'error': 'vip文档下载失败,请重试!'}
def get_now_page_list(driver: webdriver.Chrome) -> list: tmp_list = [] # 현재 페이지에 보이는 포스트들 주소 가져오기 table_el = driver.find_elements_by_css_selector(".wrap_list") result_post_list = table_el[0].find_elements_by_css_selector("td.title a") for e in result_post_list: # print(e.text, e.get_attribute("href")) tmp = {'title': e.text.strip(), 'href': e.get_attribute('href')} tmp_list.append(tmp) return tmp_list
def test_GoogleでSeleniumLibraryを検索する(self): browser = Chrome() browser.get('https://google.co.jp') query_input = browser.find_element_by_name('q') query_input.send_keys('SeleniumLibrary' + Keys.ENTER) # 結果出力と検証 links = browser.find_elements_by_css_selector('h3 > a') for link in links: print(link.text) self.assertEqual(len(links), 10) browser.quit()
def test_next_results(driver: webdriver.Chrome): test_launch_site(driver) result_ids = list( map( lambda x: x.get_attribute('id'), driver.find_elements_by_css_selector( '#mainbar > div.flush-left.js-search-results > div > div'))) with allure.step('Click to Next'): bt = driver.find_element_by_css_selector( '#mainbar > div.s-pagination.pager.fl > a:nth-child(8)') bt.click() next_result_ids = list( map( lambda x: x.get_attribute('id'), driver.find_elements_by_css_selector( '#mainbar > div.flush-left.js-search-results > div > div')) ) # time.sleep(5) assert set(result_ids) & set(next_result_ids) == set( ), 'Запросы повторяются'
def process_request(self, request, spider): options = ChromeOptions() # options.headless = True driver = Chrome(options=options) driver.implicitly_wait(30) driver.get('https://godsunchained.com/marketplace') driver.find_elements_by_css_selector("gu-login-form") search_button = driver.execute_script( 'return document.querySelector("gu-login-form").shadowRoot.querySelector(' '"gu-form").shadowRoot.querySelector("input.inputArea__input")') search_button.send_keys("*****@*****.**") search_button.send_keys(Keys.TAB, "password") login_button = driver.execute_script( 'return document.querySelector("gu-login-form").shadowRoot.querySelector(' '"gu-form").shadowRoot.querySelector("gu-primary-hex-button")') login_button.click() welcome_button = driver.find_element_by_css_selector(".closeButton") welcome_button.click() # サイト内で他の画面に遷移させたければ driver.get( 'https://godsunchained.com/marketplace/search?groupby=name&sortby=timestamp&orderby=desc¤tpage=1&perpage=100' ) driver.find_elements_by_css_selector('div.assets__cardItem') driver.execute_script('scroll(0, document.body.scrollHeight)') time.sleep(0.5) return HtmlResponse( driver.current_url, body=driver.page_source, encoding='utf-8', request=request, ) driver.quit()
def process_request(self, request, spider): options = ChromeOptions() # options.headless = True driver = Chrome(options=options) driver.implicitly_wait(20) driver.get('https://miime.io/assets/2') input_element = driver.find_elements_by_css_selector( '#__layout > div > main > div.filterButtonBar > div > div:nth-child(5) > a')[0] input_element.click() all_item = driver.find_elements_by_css_selector('#__layout > div > main > div.filterButtonBar > div > div.filterButtonBar__filterButton.filterButtonBar__filterButton--saleFilter > div > div:nth-child(2)')[0] all_item.click() time.sleep(0.5) driver.execute_script('scroll(0, document.body.scrollHeight)') more_element = driver.find_element_by_css_selector('#__layout > div > main > div.assetCardList > div.loadMoreButton__Container > div > button.loadMoreButton') while more_element: try: more_element = driver.find_element_by_css_selector('#__layout > div > main > div.assetCardList > ' 'div.loadMoreButton__Container > div > ' 'button.loadMoreButton') except Exception: break time.sleep(0.5) if more_element: try: more_element.click() except Exception: break else: break # print('全て表示されているはず。') return HtmlResponse( driver.current_url, body=driver.page_source, encoding='utf-8', request=request, ) time.sleep(0.5) driver.quit()
def getTableElements( driver: webdriver.Chrome, tableIndex: int = 0) -> [webdriver.WebKitGTK._web_element_cls, ...]: """ Find a table with chrome webdriver. Returns a list of table row elements. """ try: selector = driver.find_elements_by_css_selector( "table.wsod_dataTableBig") table = selector[tableIndex].find_element_by_tag_name('tbody') except IndexError as i: selector = driver.find_elements_by_css_selector( "table.wsod_dataTableBigAlt") table = selector[tableIndex].find_element_by_tag_name('tbody') except Exception as e: sys.stdout.write(e.args) return None return table.find_elements_by_tag_name('tr')
def test_post_count_on_the_label(): browser = Chrome(executable_path=ChromeDriverManager().install()) browser.get('https://en.wikipedia.org/wiki/Shiba_Inu') label = browser.find_element_by_class_name("external.text") label.click() title = browser.find_elements_by_css_selector('h1') assert len(title) == 1 browser.quit()
def order(shop=None, browser=None, lego_set=None, order_list=None, username=None, password=None): """ Fill in LEGO parts to be ordered in LEGO's customer service shop. """ from selenium.common.exceptions import NoSuchElementException from selenium.webdriver import Chrome, Firefox from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.select import Select from selenium.webdriver.support.wait import WebDriverWait from time import sleep order_list = order_list.split(',') shop_url = 'https://wwwsecure.us.lego.com/{shop}/service/replacementparts/order'.format(shop=shop) browser = Chrome() if browser == 'chrome' else Firefox() browser.get(shop_url) print("Sometimes they ask you to fill in a survey.") try: survey_layer = browser.find_element_by_id('ipeL104230') survey_layer.send_keys(Keys.ESCAPE) except NoSuchElementException: print("We're lucky, no survey on the LEGO shop today!") print("They want to know how old we are.") age_field = browser.find_element_by_name('rpAgeAndCountryAgeField') age_field.send_keys('55') age_field.send_keys(Keys.RETURN) if username and password: print("Let's log in with LEGO ID {user}.".format(user=username)) login_link = browser.find_element_by_css_selector('.legoid .links > a') login_link.click() browser.switch_to.frame('legoid-iframe') user_input = browser.find_element_by_id('fieldUsername') user_input.click() user_input.send_keys(username) passwd_input = browser.find_element_by_id('fieldPassword') passwd_input.click() passwd_input.send_keys(password) login_button = browser.find_element_by_id('buttonSubmitLogin') login_button.click() browser.switch_to.default_content() sleep(4) # seconds wait = WebDriverWait(browser, 5) print("We need to tell them which set we want to buy parts from: {lego_set}".format(lego_set=lego_set)) setno_field = wait.until(EC.element_to_be_clickable( (By.CSS_SELECTOR, '.product-search input[ng-model=productNumber]'))) setno_field.send_keys(lego_set) setno_field.send_keys(Keys.RETURN) print("Let's scroll the page down a bit, so we can see things better.") browser.execute_script("window.scroll(0, 750);") print("That's gonna be crazy: {count} elements to order! Let's rock.".format(count=len(order_list))) element_field = wait.until(EC.element_to_be_clickable( (By.ID, 'element-filter'))) print() for brick in order_list: part_no, quantity = brick.split(':') print("- {qty}x #{pn} ".format(qty=quantity, pn=part_no), end='') element_field.clear() element_field.send_keys(part_no) element_field.send_keys(Keys.RETURN) sleep(.3) # seconds try: add_button = browser.find_element_by_css_selector('.element-details + button') add_button.click() sleep(.2) # seconds except NoSuchElementException: print("OOOPS! No LEGO part with that number found in set #{set}. :-(".format(set=lego_set)) continue try: warn_msg = browser.find_element_by_css_selector('.alert-warning .sold-out-info') if warn_msg.is_displayed(): print("NOTE: item out of stock. ", end='') add_anyway = browser.find_element_by_css_selector('.alert-warning + .clearfix button') add_anyway.click() except NoSuchElementException: pass amount_select = browser.find_elements_by_css_selector('.bag-item select')[-1] amount_select.send_keys(quantity) amount_select.send_keys(Keys.TAB) selected = Select(amount_select).first_selected_option if quantity != selected.text: print("WARNING: Could not select desired quantity. {} != {}".format(quantity, selected.text)) else: print() browser.execute_script("window.scroll(0, 0);") print() print("We're done. You can finalize your order now. Thanks for watching!")