def click_status(driver: webdriver) -> WebElement: """ :param driver: a webdriver object :return: a WebElement of the status component """ if first_time: set_first_time_false() composer = driver.find_element_by_id("pagelet_composer") return composer.find_element_by_tag_name("textarea") else: return find_element_by_test_id(driver, "status-attachment-mentions-input")
def blockFindByXpath(driver: webdriver, xpath, retry=200) -> WebElement: tag = None for _ in range(retry): try: tag = driver.find_element_by_xpath(xpath) except Exception: pass if tag: break sleep(0.1) sleep(0.1) return tag
def radio_button_helper(driver: webdriver, common_id: str, unique_id: str) -> None: """ Helper to deal with radio buttons which share NAME, ID attributes. :param driver: webdriver object. :param common_id: example: "@name='my_radio_button'" :param unique_id: example: "@value='01'" :return: None. """ # build an xpath to locate element on page, execute through JS engine try: # Builds and executes the xpath we need. # Who said they were inflexible? driver.execute_script( 'arguments[0].click()', driver.find_element( By.XPATH, ''.join(['//*[', common_id, ' and ', unique_id, ']']))) except NoSuchElementException: pass
def fetch_image_urls(query:str, max_links_to_fetch:int, wd:webdriver, sleep_between_interactions:int=1): def scroll_to_end(wd): wd.execute_script("window.scrollTo(0, document.body.scrollHeight);") sleep(sleep_between_interactions) # build the google query search_url = """https://www.google.com/search?safe=off&site=&tbm=isch&source=hp&q={q}&oq={q}&gs_l=img""" # load the page wd.get(search_url.format(q=query)) sleep(0.7) image_urls = set() image_count = 0 results_start = 0 while image_count < max_links_to_fetch: scroll_to_end(wd) # get all image thumbnail results thumbnail_results = wd.find_elements_by_css_selector("img.rg_i") number_results = len(thumbnail_results) print(f"Found: {number_results} search results. Extracting links from {results_start}:{number_results}") for img in thumbnail_results[results_start:number_results]: # try to click every thumbnail such that we can get the real image behind it try: img.click() sleep(sleep_between_interactions) except Exception: continue # extract image urls actual_images = wd.find_elements_by_css_selector('img.rg_i') for actual_image in actual_images: if actual_image.get_attribute('src'): image_urls.add(actual_image.get_attribute('src')) image_count = len(image_urls) if len(image_urls) >= max_links_to_fetch: print(f"Found: {len(image_urls)} image links, done!") break else: print("Found:", len(image_urls), "image links, looking for more ...") sleep(0.8) load_more_button = wd.find_element_by_class_name(".ksb") #load_more_button = wd.find_element_by_id('n3VNCb') # load_more_button = wd.find_element_by_css_selector("") # if load_more_button: wd.execute_script("document.querySelector('.ksb').click();") # .ksb # move the result startpoint further down results_start = len(thumbnail_results) return image_urls
def fetch_image_urls(query: str, max_links_to_fetch: int, wd: webdriver, sleep_between_interactions: int = 1): def scroll_to_end(wd): wd.execute_script("window.scrollTo(0, document.body.scrollHeight);") time.sleep(sleep_between_interactions) search_url = "https://www.google.com/search?safe=off&site=&tbm=isch&source=hp&q={q}&oq={q}&gs_l=img" wd.get(search_url.format(q=query)) image_urls = set() image_count = 0 results_start = 0 while image_count < max_links_to_fetch: scroll_to_end(wd) thumbnail_results = wd.find_elements_by_css_selector("img.Q4LuWd") number_results = len(thumbnail_results) print( f"Found: {number_results} search results. Extracting links from {results_start}:{number_results}" ) for img in thumbnail_results[results_start:number_results]: try: img.click() time.sleep(sleep_between_interactions) except Exception: continue actual_images = wd.find_elements_by_css_selector('img.n3VNCb') for actual_image in actual_images: if actual_image.get_attribute( 'src') and 'http' in actual_image.get_attribute('src'): image_urls.add(actual_image.get_attribute('src')) image_count = len(image_urls) if len(image_urls) >= max_links_to_fetch: print(f"Found: {len(image_urls)} image links, done!") break else: print("Found:", len(image_urls), "image links, looking for more ...") time.sleep(30) return load_more_button = wd.find_element_by_css_selector(".mye4qd") if load_more_button: wd.execute_script("document.querySelector('.mye4qd').click();") results_start = len(thumbnail_results) return image_urls
def fetch_image_urls_google(query: str, max_links_to_fetch: int, wd: webdriver, sleep_between_interactions: int = 2.5): search_url = f'https://www.google.com/search?safe=off&site=&tbm=isch&source=hp&q={query}&oq={query}&gs_l=img' wd.get(search_url) last_height = wd.execute_script('return document.body.scrollHeight') while True: wd.execute_script('window.scrollTo(0,document.body.scrollHeight)') time.sleep(sleep_between_interactions) new_height = wd.execute_script('return document.body.scrollHeight') try: wd.find_element_by_xpath('//*[@id="islmp"]/div/div/div/div/div[4]/div[2]/input').click() time.sleep(sleep_between_interactions) except: pass if new_height == last_height: break last_height = new_height image_urls = set() thumbnail_results = wd.find_elements_by_css_selector("img.Q4LuWd") number_results = len(thumbnail_results) print(f"Found: {number_results} search results. Extracting links.") limit = max_links_to_fetch if number_results > max_links_to_fetch else number_results for idx, img in enumerate(thumbnail_results[:limit]): try: img.click() time.sleep(sleep_between_interactions) actual_image = wd.find_element_by_xpath( '//*[@id="Sva75c"]/div/div/div[3]/div[2]/c-wiz/div/div[1]/div[1]/div/div[2]/a/img' ) if actual_image.get_attribute('src') and 'http' in actual_image.get_attribute('src'): url = actual_image.get_attribute('src') image_urls.add(url) print(f'{idx}/{limit}: {url}') except: continue return image_urls
def chat(dialogue: str, driver: webdriver, count: int) -> List[str]: """Input dialogue for the chat bot and return their response.""" # Locate chat form. chat_form = driver.find_element_by_xpath("//div[@id='main-input']/input") # Wait for chat window to load. WebDriverWait(driver, 10).until( ec.visibility_of_all_elements_located( (By.XPATH, "(//div[@class='pb-bot-response'])[1]"))) chat_form.send_keys(dialogue) chat_form.send_keys(Keys.RETURN) # Wait for response. WebDriverWait(driver, 10).until( ec.visibility_of_all_elements_located( (By.XPATH, "(//div[@class='pb-bot-response'])[" + str(count) + ']'))) # Get response and extract relevant text. messages = driver.find_element_by_xpath( "(//div[@class='pb-bot-response'])[" + str(count) + ']').find_elements_by_class_name('pb-message') response = [] for message in messages: if has_image(message): children = message.find_elements_by_xpath( ".//img[@class='pb-standalone-image pb-fullSizeImage']") for child in children: response.append(child.get_attribute('src')) elif not has_button(message): response.append(message.text) return response
def filter_posts_list(driver: webdriver, months: list, posts_list: list) -> list: """Отфильтровывает только нужные для анализа записи""" res = [] for post_url in posts_list: driver.get(post_url) try: sleep(2) time = driver.find_element_by_class_name('Nzb55').get_attribute( 'datetime') for month in months: if time[0:7] == month: res.append(post_url) except NoSuchElementException: print(f'ДАТУ НЕ НАХОДИТ!!! ПОСТ {post_url}') driver.get(post_url) try: sleep(2) time = driver.find_element_by_class_name( 'Nzb55').get_attribute('datetime') for month in months: if time[0:7] == month: res.append(post_url) except NoSuchElementException: print(f'ДАТУ НЕ НАХОДИТ ВТОРОЙ РАЗ!!! ПОСТ {post_url}') res.append(post_url) return res
def test_invalid_custom_docker(driver: selenium.webdriver, *args, **kwargs): """ Test invalid custom Docker instructions. Args: driver """ r = testutils.prep_py3_minimal_base(driver) username, project_name = r.username, r.project_name # add an invalid custom docker instruction logging.info("Adding invalid custom Docker") environment = testutils.EnvironmentElements(driver) environment.environment_tab_button.click() time.sleep(1) driver.execute_script("window.scrollBy(0, 600);") environment.custom_docker_edit_button.click() time.sleep(1) environment.custom_docker_text_input.send_keys("RUN /bin/false") time.sleep(1) driver.execute_script("window.scrollBy(0, 300);") environment.custom_docker_save_button.click() # wait until container status is stopped wait = selenium.webdriver.support.ui.WebDriverWait(driver, 200) wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, ".flex>.Rebuild"))) time.sleep(2) # assert container status is stopped and 'Successfully tagged' is in footer envelts = testutils.elements.EnvironmentElements(driver) assert driver.find_element_by_css_selector(".flex>.Rebuild").is_displayed(), "Expected rebuild container status" assert "Project failed to build" in driver.find_element_by_css_selector(".Footer__message-title").text, \ "Expected 'Project failed to build' in footer"
def execSearch(browser: webdriver): """ 京大の課題サイトにアクセスし、未実行の課題を教える。 """ browser.get('https://cas.ecs.kyoto-u.ac.jp/cas/login?service=https%3A%2F%2Fpanda.ecs.kyoto-u.ac.jp%2Fsakai-login-tool%2Fcontainer') sleep(1) #ログイン user_id = browser.find_element_by_name("username") user_id.clear() user_id.send_keys(settings.USN) password = browser.find_element_by_name("password") password.clear() password.send_keys(settings.PWD) login = browser.find_element_by_class_name("btn-submit") login.click() sleep(1) #各科目のページに遷移 base_url = browser.current_url links = { "弾性体の力学解析":"2020-110-3200-000", "流体力学":"2020-110-3165-000", "一般力学":"2020-110-3010-100", "基礎有機化学I":"2020-888-N347-014", "地球環境学のすすめ":"2020-888-Y201-001", "社会基盤デザインI":"2020-110-3181-000", "工業数学B2":"2020-110-3174-000", "確率統計解析及び演習":"2020-110-3003-000", "水文学基礎":"2020-110-3030-000", "地球工学基礎数理":"2020-110-3005-000", } # nav = browser.find_element_by_id("2020-110-3165-000") for subject,link_id in links.items(): unkadai(base_url,subject,link_id)
def submit_solution(driver: webdriver, local_file_path: str): driver.find_element_by_css_selector( CSS_SELECTORS['task_submit_tab']).click() driver.find_element_by_css_selector( CSS_SELECTORS['task_file_selector']).send_keys(local_file_path) driver.find_element_by_css_selector( CSS_SELECTORS['task_submit_button']).click()
def __paginate(driver: webdriver, pagination_index: int, timeout=5) -> object: next_page_button = driver.execute_script( ''' return document.querySelector(arguments[0]); ''', Selectors.paginationNextBtn) if next_page_button is None: return { 'success': False, 'error': 'There are no more pages to visit' } try: offset = int(get_query_params(driver.current_url)['start']) except: offset = 0 offset += 25 url = override_query_params(driver.current_url, {'start': offset}) driver.get(url) elapsed = 0 sleep_time = 0.05 # Wait for new jobs to load while elapsed < timeout: loaded = driver.execute_script( ''' return document.querySelectorAll(arguments[0]).length > 0; ''', Selectors.links) if loaded: return {'success': True} sleep(sleep_time) elapsed += sleep_time return {'success': False, 'error': 'Timeout on pagination'}
def fetch_image_urls(query:str,max_links_to_fetch:int,wd:webdriver,sleep_between_interaction: int =1): def scroll_to_end(wd): wd.execute_script("window.scrollTo(0,document.body.scrollHeight);") time.sleep(sleep_between_interaction) search_url = "https://www.google.com/search?safe=off&site=&tbm=isch&source=hp&q={q}&oq={q}&gs_l=img" #load the page wd.get(search_url.format(q=query)) image_urls=set() image_count= 0 result_start=0 while image_count <max_links_to_fetch: scroll_to_end(wd) #get all the images thumbnails results thumbnails_result= wd.find_elements_by_css_selector("img.Q4LuWd") #print(thumbnails_result) num_results= len(thumbnails_result) print(f"Found: {num_results} search results. Extracting links from {result_start} :{num_results}") for img in thumbnails_result[result_start:num_results]: #try to click every thumbnail and get the image page behind it. try: img.click() time.sleep(sleep_between_interaction) except Exception: continue #extract images urls actual_images=wd.find_elements_by_css_selector('img.n3VNCb') for actual_image in actual_images: if actual_image.get_attribute('src') and 'http' in actual_image.get_attribute('src'): image_urls.add(actual_image.get_attribute('src')) image_count=len(image_urls) if len(image_urls) >= max_links_to_fetch: print(f"Found: {len(image_urls)} image links ,Done!!") break else: print(f"Found {len(image_urls)} images links ,Looking for more") time.sleep(30) return load_more_button=wd.find_element_by_css_selector('.mye4qd') if load_more_button: wd.execute_script("document.querySelector('.mye4qd').click();") #move the result startpoint further down result_start=len(thumbnails_result) return image_urls
def borrar_archivo_claro_drive(webdriver_test_ux: webdriver, jsonEval): tiempo_step_inicio = Temporizador.obtener_tiempo_timer() fecha_inicio = Temporizador.obtener_fecha_tiempo_actual() try: btn_borrar = webdriver_test_ux.find_element_by_xpath( '//input[@class="menuItem svg deleteImage icon-delete icon-32"]') btn_borrar.click() time.sleep(10) btn_cerrar = webdriver_test_ux.find_element_by_xpath( '//input[@class="svg exit icon-close icon-32"]') time.sleep(4) btn_cerrar.click() time.sleep(4) jsonEval["steps"][3]["output"][0]["status"] = jsonConst.SUCCESS jsonEval["steps"][3]["status"] = jsonConst.SUCCESS jsonEval["steps"][3]["output"][0][ "output"] = 'Se realiza el borrado del archivo correctamente' except selExcep.NoSuchElementException: jsonEval["steps"][3]["output"][0]["status"] = jsonConst.FAILED jsonEval["steps"][3]["status"] = jsonConst.FAILED jsonEval["steps"][3]["output"][0][ "output"] = 'No fue posibles realizar el borrado del archivo correctamente' except selExcep.ElementClickInterceptedException: jsonEval["steps"][3]["output"][0]["status"] = jsonConst.FAILED jsonEval["steps"][3]["status"] = jsonConst.FAILED jsonEval["steps"][3]["output"][0][ "output"] = 'No fue posibles realizar el borrado del archivo correctamente' tiempo_step_final = Temporizador.obtener_tiempo_timer( ) - tiempo_step_inicio fecha_fin = Temporizador.obtener_fecha_tiempo_actual() jsonEval["steps"][3]["time"] = FormatUtils.truncar_float_cadena( tiempo_step_final) jsonEval["steps"][3]["start"] = fecha_inicio jsonEval["steps"][3]["end"] = fecha_fin return jsonEval
def cerrar_sesion_claro_drive(webdriver_test_ux: webdriver, jsonEval): tiempo_step_inicio = Temporizador.obtener_tiempo_timer() fecha_inicio = Temporizador.obtener_fecha_tiempo_actual() try: boton_ajustes = webdriver_test_ux.find_element_by_id('expand') boton_ajustes.click() time.sleep(4) boton_cerrar_sesion = webdriver_test_ux.find_element_by_xpath( '//li[@data-id="logout"]') boton_cerrar_sesion.click() time.sleep(10) jsonEval["steps"][4]["output"][0]["status"] = jsonConst.SUCCESS jsonEval["steps"][4]["status"] = jsonConst.SUCCESS jsonEval["steps"][4]["output"][0][ "output"] = 'Se cierra sesion correctamente' except selExcep.NoSuchElementException: jsonEval["steps"][4]["output"][0]["status"] = jsonConst.FAILED jsonEval["steps"][4]["status"] = jsonConst.FAILED jsonEval["steps"][4]["output"][0][ "output"] = 'No fue posible realizar el cierre de sesion' except selExcep.ElementClickInterceptedException: jsonEval["steps"][4]["output"][0]["status"] = jsonConst.FAILED jsonEval["steps"][4]["status"] = jsonConst.FAILED jsonEval["steps"][4]["output"][0][ "output"] = 'No fue posible realizar el cierre de sesion' tiempo_step_final = Temporizador.obtener_tiempo_timer( ) - tiempo_step_inicio fecha_fin = Temporizador.obtener_fecha_tiempo_actual() jsonEval["steps"][4]["time"] = FormatUtils.truncar_float_cadena( tiempo_step_final) jsonEval["steps"][4]["start"] = fecha_inicio jsonEval["steps"][4]["end"] = fecha_fin return jsonEval
def testGETXSSDriver(url: str, cookies: Mapping[str, str], driver: webdriver) -> Optional[str]: """ If the given URL pops an alert box when accessed with the given cookies, return the contents of the alert box, otherwise return None """ driver.setCookies(url, cookies) try: driver.get(url) WebDriverWait(driver, config.timeout).until(expected_conditions.alert_is_present()) # Note that despite the name switch_to_alert also handles prompt: # - http://selenium-python.readthedocs.io/navigating.html#popup-dialogs alert = driver.switch_to_alert() text = alert.text driver.reset() return text except (TimeoutException, URLError): driver.reset() return None
def select_style(driver: webdriver): styles = driver.find_elements_by_xpath( '//*[@id="container"]/aside/div/table/tbody/tr[1]/td[1]/ul/li') max_iter = len(styles) for i in range(max_iter): styles = driver.find_elements_by_xpath( '//*[@id="container"]/aside/div/table/tbody/tr[1]/td[1]/ul/li') style_name = str(styles[i].text) styles[i].find_element_by_tag_name('a').send_keys(Keys.ENTER) time.sleep(3) driver.find_element_by_xpath( '//*[@id="container"]/section/div/header/p[2]/a[1]').click() title_list = [] artist_list = [] image_list = [] select_playlist(driver, title_list, artist_list, image_list) data = zip(title_list, artist_list, image_list) data_to_csv(data, style_name, "style")
def select_song(driver: webdriver, title_list, artist_list, image_list): url = driver.current_url request = requests.get(url) html = request.text bs = BeautifulSoup(html, 'html.parser') titles = bs.select('p.title') artists = bs.select('p.artist') images = bs.select('a.thumbnail') for i in range(len(titles)): if '[권리없는 곡]' in str(titles[i].text): continue title = str(titles[i].find('a').text) anchor_list = artists[i].find_all('a') if len(anchor_list) > 1: attr = anchor_list[1]['onclick'] attr = attr.split("'") attr = attr[1] attr = attr.split("||") for word in attr: if word.isdigit(): attr.remove(word) artist = attr[1::2] artist = ", ".join(artist) else: artist = artists[i].text.strip().split('\n')[0] image = images[i].find('img')['src'] title_list.append(title) artist_list.append(artist) image_list.append(image) driver.back()
def log_in(driver: selenium.webdriver, user_index: int = 0) -> str: """ Log in to Gigantum. Args: driver user_index: an offset into credentials.txt Returns: Username of user just logged in """ username, password = testutils.load_credentials(user_index=user_index) driver.get(f"{os.environ['GIGANTUM_HOST']}/projects/local#") auth0_elts = elements.Auth0LoginElements(driver) auth0_elts.login_green_button.wait().click() auth0_elts.auth0_lock_widget.wait() if auth0_elts.auth0_lock_button.selector_exists(): logging.info("Clicking 'Not your account?'") auth0_elts.not_your_account_button.wait().click() auth0_elts.do_login(username, password) time.sleep(5) # Set the ID and ACCESS TOKENS -- Used as headers for GraphQL mutations access_token = driver.execute_script( "return window.localStorage.getItem('access_token')") id_token = driver.execute_script( "return window.localStorage.getItem('id_token')") active_username = driver.execute_script( "return window.localStorage.getItem('username')") assert active_username == username, \ f"Username from credentials.txt ({username}) must match chrome cache ({active_username})" os.environ['GIGANTUM_USERNAME'] = active_username os.environ['ACCESS_TOKEN'] = access_token os.environ['ID_TOKEN'] = id_token assert os.environ['ACCESS_TOKEN'], "ACCESS_TOKEN could not be retrieved" assert os.environ['ID_TOKEN'], "ID_TOKEN could not be retrieved" return username.strip()
def removeKataomoi(browser: webdriver, safe_accounts): global remain_remove_count """ 片思いったーにログインする :param browser: webdriver """ # 片思いったーにアクセス browser.get('http://kataomoi.net/redirect.php') sleep(1) url = browser.current_url is_confirm = url.startswith( "https://api.twitter.com/oauth/authorize?oauth_token") if is_confirm: submit_btn = browser.find_element_by_id("allow") submit_btn.click() sleep(1) else: # ログイン情報の入力 username_or_email = browser.find_element_by_xpath( "//*[@id='username_or_email']") username_or_email.send_keys(USER_NAME) password = browser.find_element_by_xpath("//*[@id='password']") password.send_keys(PASSWORD) # ログイン password.submit() sleep(1) browser.get('http://kataomoi.net/find_one_way.php') sleep(1) trs = browser.find_elements_by_tag_name("tr") # 古い順位に並び替え reversed_trs = reversed(trs) print('▼フォロー解除中…▼') for tr in reversed_trs: if remain_remove_count <= 0: break tds = tr.find_elements_by_tag_name("td") if len(tds) > 1 and not (tds[1].find_element_by_tag_name("a").text in safe_accounts): print(tds[1].find_element_by_tag_name("a").text) tr.find_elements_by_tag_name("span")[0].click() remain_remove_count = remain_remove_count - 1 sleep(0.5)
def get_budget(brow: webdriver) -> float: """ Extract the text from the HTML element and return it as a float. """ money_path = './/span[@class="user-balance ng-binding"]' money_el = brow.find_element_by_xpath(money_path) money_value = None while not money_value: money_value = money_el.get_attribute('innerText') return float(money_value.replace(',', '.'))
def extract_match_datetime(brow: webdriver, match_obj: webdriver) -> datetime: scroll_to_element(brow, match_obj) data = match_obj.find_element_by_xpath( './/div[@class="event-date ng-binding"]').text ddmmyy, hhmm = data.split(' - ') day, month, year = ddmmyy.split('/') hour, minute = hhmm.split(':') match_datetime = f'{year}-{month}-{day} {hour}:{minute}:00' return datetime.strptime(match_datetime, '%Y-%m-%d %H:%M:%S')
def fetch_image_urls(query: str, max_links_to_fetch: int, wd: webdriver, sleep_between_interactions: float = 1.0): def scroll_to_end(wd): wd.execute_script("window.scrollTo(0, document.body.scrollHeight);") time.sleep(sleep_between_interactions) search_url = "https://www.google.com/search?safe=off&site=&tbm=isch&source=hp&q={q}&oq={q}&gs_l=img" # load the page wd.get(search_url.format(q=query)) image_urls = set() image_count = 0 results_start = 0 while image_count < max_links_to_fetch: scroll_to_end(wd) # get all image thumbnail results thumbnail_results = wd.find_elements_by_css_selector("img.Q4LuWd") number_results = len(thumbnail_results) logger.info("Found: %i search results. Extracting links from %i:%i" % (number_results, results_start, number_results)) for img in thumbnail_results[results_start:number_results]: # try to click every thumbnail such that we can get the real image behind it try: img.click() time.sleep(sleep_between_interactions) except Exception: continue # extract image urls actual_images = wd.find_elements_by_css_selector("img.n3VNCb") for actual_image in actual_images: if actual_image.get_attribute("src") and "http" in actual_image.get_attribute("src"): image_urls.add(actual_image.get_attribute("src")) image_count = len(image_urls) if len(image_urls) >= max_links_to_fetch: logger.info("Found: %i image links, done!" % len(image_urls)) break else: logger.info("Found: %i image links, looking for more ..." % len(image_urls)) time.sleep(30) load_more_button = wd.find_element_by_css_selector(".mye4qd") if load_more_button: wd.execute_script("document.querySelector('.mye4qd').click();") # move the result startpoint further down results_start = len(thumbnail_results) return image_urls
def scrape_once(driver: webdriver, save_into_dict: bool, tagDict=[]): tagCount = 0 postCount = 0 driver.get('https://www.instagram.com/explore') WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.TAG_NAME, 'article'))) all_links = driver.find_elements_by_tag_name('a') for link in all_links: href = link.get_attribute('href') if (href.endswith('?explore=true')): media_html = requests.get(href).text soup = BeautifulSoup(media_html, 'lxml') timestamp: int = time.time() hashtags = soup.findAll(attrs={"property": "instapp:hashtags"}) for tagElement in hashtags: tag = tagElement.get('content') if save_into_dict: tagDict[tag] += 1 put_tag(tag, timestamp) tagCount += 1 postCount += 1 print('%d tags processed in %d posts' % (tagCount, postCount))
def test_delete_project(driver: selenium.webdriver, *args, **kwargs): """ Test that deleting a project in Gigantum deletes it from the file system. Args: driver """ r = testutils.prep_py3_minimal_base(driver) username, project_name = r.username, r.project_name # Check that project path exists on file system logging.info("Checking that the project exists in the file system") project_path = os.path.join(os.environ['GIGANTUM_HOME'], username, username, 'labbooks', project_name) assert os.path.exists(project_path), \ f"Project {project_name} should exist at {project_path}" logging.info("Finding project Docker image") dc = docker.from_env() project_img = [] for img in dc.images.list(): for t in img.tags: if 'gmlb-' in t and project_name in t: logging.info(f"Found Docker image {t} for {project_name}") project_img.append(img) assert len(project_img) == 1, f"Must be one docker tag for {project_name}" # Navigate to the "Delete Project" button and click it logging.info("Navigating to 'Delete Project' and delete the project") driver.find_element_by_css_selector(".ActionsMenu__btn").click() time.sleep(3) driver.find_element_by_css_selector(".ActionsMenu__item--delete").click() time.sleep(3) driver.find_element_by_css_selector("#deleteInput").send_keys(project_name.lstrip()) driver.find_element_by_css_selector(".DeleteLabbook .ButtonLoader").click() time.sleep(5) # Check all post conditions for delete: # 1. Does not exist in filesystem, and # 2. Docker image no longer exists logging.info("Checking that project path and project Docker image no longer exist") assert not os.path.exists(project_path), f"Project at {project_path} not deleted" project_img = [] for img in dc.images.list(): for t in img.tags: if 'gmlb-' in t and project_name in t: logging.error(f'Docker tag {t} still exists for deleted project {project_name}') project_img.append(img) assert len(project_img) == 0, \ f"Docker image for {project_path}: {project_img[0]} still exists"
def get_neighborhood_state_zip(driver: webdriver) -> []: state_list = [] zipcode_list = [] neighborhood_list = [] addresses_2 = driver.find_elements_by_css_selector('.bfg-gallery-address2') for t in addresses_2: t_list = t.text.split() neighborhood_list.append(t_list[0][:-1]) state_list.append(t_list[1]) zipcode_list.append(int(t_list[2])) return neighborhood_list, state_list, zipcode_list
def page_up(driver: webdriver): """ 翻页 """ while True: try: tag = driver.find_element_by_css_selector(Order.page_down.value) if tag: time.sleep(2) tag.click() break except ElementClickInterceptedException: move_down(driver)
def fetch_image_urls(search_term: str, n_links: int, web_driver: webdriver, sleep_between_interactions: int = 1): def scroll_to_end(web_driver): web_driver.execute_script( "window.scrollTo(0, document.body.scrollHeight);") time.sleep(sleep_between_interactions) search_url = f"https://www.google.com/search?tbm=isch&q={'+'.join(search_term.split())}" web_driver.get(search_url) image_counter = 0 res_start = 0 image_urls = set() while(image_counter < n_links): thumbnail_results = web_driver.find_elements_by_css_selector( 'img.Q4LuWd') number_results = len(thumbnail_results) scroll_to_end(web_driver) print( f'Found {number_results} search results. Extracting links {res_start} to {number_results}') for img in thumbnail_results[res_start:number_results]: try: img.click() time.sleep(sleep_between_interactions) except: continue actual_image = web_driver.find_elements_by_css_selector( 'img.n3VNCb') for img in actual_image: src = img.get_attribute('src') if src and 'http' in src: image_urls.add(src) image_counter = len(image_urls) if image_counter >= n_links: print(f"Found {n_links} image links.") break else: print("Found:", len(image_urls), "image links, looking for more ...") time.sleep(3) load_more_button = web_driver.find_element_by_css_selector( ".mye4qd") if load_more_button: web_driver.execute_script( "document.querySelector('.mye4qd').click();") res_start = len(thumbnail_results) return image_urls
def find_sizes(self, driver: webdriver) -> List[Size]: # pylint: disable=missing-function-docstring sizes = driver.find_elements_by_css_selector( '.size-list .product-size') obj_sizes = [] for size in sizes: deque_size_types = collections.deque( size.get_attribute('data-name').split(' ('), 2) obj_size = Size(deque_size_types.popleft(), self.clean_nubmer_size(deque_size_types.popleft()), "disabled" in size.get_attribute("class")) obj_sizes.append(obj_size) return obj_sizes
def click_related_images(driver: webdriver, url:str): related = [] pbar = tqdm(desc=f'starting looping on {len(tbnids)} data ids') while len(tbnids): pbar.update() data_id = tbnids.pop() pbar.set_description(f'left {len(tbnids)} data ids to process') visited.add(data_id) driver.get(f'{url}#imgrc={data_id}') if '#imgrc' not in driver.current_url: continue # _,_, doc = parse(driver) text = driver.page_source.encode('utf-8').decode('utf-8') doc = html.fromstring(text) l = [f'https://www.google.com/{a.get("href")}' for a in doc.cssselect('a') if 'aria-label' in a.attrib and 'Related images' in a.get('aria-label')] if len(l) > 0: rel = l[0] if rel.startswith('http'): related.append(rel) return related
def execSearch(browser: webdriver): """ Googleで検索を実行する :param browser: webdriver """ # スクリーンショットのファイル名用に日付を取得 dt = datetime.datetime.today() dtstr = dt.strftime("%Y%m%d%H%M%S") # Googleにアクセス browser.get('https://www.google.co.jp/') sleep(1) # キーワードの入力 browser.find_element_by_id('lst-ib').send_keys('docker selenium') # 検索実行 browser.find_element_by_name('btnK').submit() sleep(1) # スクリーンショット browser.save_screenshot('images/' + dtstr + '.png')