def browser(config_browser, config_headless_mode, request): if config_browser == 'chrome': options = ChromeOptions() options.headless = config_headless_mode options.add_argument('--no-sandbox') options.add_argument('--disable-dev-shm-usage') # mobile_emulation = {"deviceName": "Nexus 5"} # options.add_experimental_option("mobileEmulation", mobile_emulation) driver = Chrome(options=options) elif config_browser == 'firefox': options = FirefoxOptions() options.headless = config_headless_mode options.add_argument('--no-sandbox') options.add_argument('--disable-dev-shm-usage') driver = Firefox(options=options) elif config_browser == 'ie': if config_headless_mode: Warning("Headless mode is not supported in IE") driver = Ie() else: raise Exception(f'"{config_browser}" is not a supported browser') driver.delete_all_cookies() driver.set_window_size(1920, 1080) driver.implicitly_wait(wait_time) # Return the driver object at the end of setup yield driver # For cleanup, quit the driver driver.quit()
class Spider: def __init__(self, index_url, target_url, page_range): self.index_url = index_url self.target_url = target_url self.page_range = page_range + 1 self.raw_html = [] self.boot() def boot(self): self.driver = Chrome() self.driver.start_client() self.check_cookie() def check_cookie(self): from xcookie import cookie_list if cookie_list: self.driver.get(self.index_url) time.sleep(8) self.driver.delete_all_cookies() print('clear') for c in cookie_list: self.driver.add_cookie(c) print('Done') else: print('please insert cookie!') sys.exit() def crawl(self): for p in range(1, self.page_range): full_url = f'{self.target_url}{p}' self.driver.get(full_url) print(full_url) time.sleep(5) self.raw_html.append(self.driver.page_source)
class BasePage: def __init__(self, maximize_window=True, hide_cookie_notice=True): self.hide_cookie_notice = hide_cookie_notice self.driver = Chrome(driver_path_resolver.resolve_driver_path()) self._hide_cookie_notice() if self.hide_cookie_notice else None self.driver.implicitly_wait(DEFAULT_IMPLICITLY_WAIT) self.driver.maximize_window() if maximize_window else None self.error = None def _hide_cookie_notice(self): """ Hiding cookie notice object by adding hideCookieNotice cookie """ LOGGER.info('Started hiding cookie notice') wrong_path = '404' if self.hide_cookie_notice: self.driver.get(BASE_URL + wrong_path) LOGGER.debug('Opened 404 page') self.driver.delete_all_cookies() LOGGER.debug('Deleted all cookies') cookie = {'name': 'hideCookieNotice', 'value': '1'} self.driver.add_cookie(cookie) LOGGER.debug('Added cookie {}'.format(cookie)) LOGGER.info('Cookie notice hiding finished success') else: pass def get_last_error(self): return repr(self.error)
def main(): ''' main function loads urls from sqlite3 db and uses selenium to drive dakboard rotation''' #Set options for selenium chrome driver opt = Options() opt.add_argument("--kiosk") opt.add_argument("disk-cache-size=0") opt.add_experimental_option("useAutomationExtension", False) opt.add_experimental_option("excludeSwitches", ["enable-automation"]) #formatter = SyslogBOMFormatter(logging.BASIC_FORMAT) #start the browser driver = Chrome(options=opt) timeout = 1 while True: boards = getActiveList() #display each url in boards for url in boards: driver.get(url) driver.delete_all_cookies() # test & remove bad links from rotation; # adapted from https://selenium-python.readthedocs.io/waits.html if driver.title != "MyView": try: elem_present = \ expected_conditions.presence_of_element_located((By.ID, "dak-banner")) WebDriverWait(driver, timeout).until(elem_present) except TimeoutException: logger.error("Myview timed out loading %s", url) boards.remove(url) finally: logger.debug("MyView loaded: %s", url) time.sleep(DISPLAY_TIME) #tear down the driver driver.quit()
class WebDriver: def __init__(self, headless=True): options = Options() options.headless = headless self.driver = Chrome(chrome_options=options) self.in_course = False self.driver.implicitly_wait(5) self.logged_in = False self.courses = {} def login(self, username, password): self.driver.delete_all_cookies() self.driver.get(login_url) username_element = self.driver.find_element_by_id('username') username_element.send_keys(username) passwd_element = self.driver.find_element_by_id('password') passwd_element.send_keys(password) print('Logging in ... ') passwd_element.send_keys(Keys.RETURN) self.logged_in = True print('Succesfully logged in.') raw_courses = self.driver.find_element_by_class_name('coursefakeclass') raw_courses_list = raw_courses.find_elements_by_xpath('li') for raw_course in raw_courses_list: raw_link = raw_course.find_element_by_xpath('a') description = raw_link.text url = raw_link.get_attribute('href') course = Course(description, url) self.courses[course.name] = course print('Succesfully loaded courses.') def enter_course(self, course_name): course = self.courses[course_name] self.driver.get(course.urls['course']) self.in_course = True name = course.department + ' ' + course.crn + '.' + course.section print('Entered course: ' + name + '.') def create_announcement(self, subject, announcement): if self.in_course: self.driver.find_element_by_xpath('//*[@id="nav"]/li/a').click() subject_field = self.driver.find_element_by_xpath( '//*[@id="subject"]') subject_field.send_keys(subject) iframe = self.driver.find_element_by_xpath( '//*[@id="messagetext_ifr"]') self.driver.switch_to.frame(iframe) html_input = self.driver.find_element_by_xpath('html/body') html_input.send_keys(announcement) self.driver.switch_to.default_content() button = self.driver.find_element_by_class_name('submit') button.click() print('Succesfully created and posted announcement.') else: print('You are not in any course. Can`t make the announcement.') def quit(self): self.driver.quit()
def main_fun(): options = Options() options.add_argument("headless") options.add_argument("disable-gpu") options.add_argument("no-sandbox") browser = Chrome(options=options) browser.set_window_size(1280, 1024) browser.maximize_window() browser.delete_all_cookies() try: hh_worker(browser) except Exception as ex: print(ex) traceback.print_exc() finally: browser.delete_all_cookies() browser.close() browser.quit()
def loop_tab_based(driver: webdriver.Chrome, project: Project): driver.delete_all_cookies() driver.get(project.url) try: driver.find_element_by_class_name("confirmVote").click() WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.CLASS_NAME, "voteComplete"))) logs_maker.success(project) except: logs_maker.fail(project) # TODO: count successes on file copy (so write acces wont fail in logs_counter) counted_successes = count_check.get_success_num(project.url) print(counted_successes, "/", project.times_to_vote) if counted_successes < project.times_to_vote: loop_tab_based(driver, project) else: print("Finished!")
class S(object): def __init__(self): self.path = '/root/.wdm/drivers/chromedriver/80.0.3987.106/linux64/chromedriver' option = ChromeOptions() option.add_argument('--headless') prefs = { 'profile.default_content_setting_values': { 'notifications': 2 } } option.add_experimental_option('prefs', prefs) option.add_argument('--no-sandbox') option.add_argument('--disable-dev-shm-usage') option.add_argument('--disable-extensions') option.add_argument('--disable-gpu') option.add_argument("--disable-features=VizDisplayCompositor") option.add_experimental_option('excludeSwitches', ['enable-automation']) self.wd = Chrome(options=option, executable_path=self.path) # 移除webdriver self.wd.execute_cdp_cmd( "Page.addScriptToEvaluateOnNewDocument", { "source": """ Object.defineProperty(navigator, 'webdriver', { get: () => undefined }) """ }) self.wd.set_page_load_timeout(20) self.timeout = WebDriverWait(self.wd, 20) self.url = 'https://graph.qq.com/oauth2.0/show?which=Login&display=pc&response_type=code&client_id=101477621&redirect_uri=https%3A%2F%2Fsso.e.qq.com%2Fpassport%3Fsso_redirect_uri%3Dhttps%253A%252F%252Fe.qq.com%252Fads%252F%26service_tag%3D1&scope=get_user_info' self.users = 'xxx' self.passwd = 'xxx' def run(self): self.wd.get(self.url) self.wd.implicitly_wait(10) self.wd.delete_all_cookies() time.sleep(2) iframe = self.wd.find_element_by_xpath('//iframe') self.wd.switch_to.frame(iframe) self.wd.find_element_by_id('switcher_plogin').click() time.sleep(1) self.wd.find_element_by_id('u').clear() time.sleep(1) self.wd.find_element_by_id('u').send_keys(self.users) time.sleep(2) self.wd.find_element_by_id('p').clear() time.sleep(1) self.wd.find_element_by_id('p').send_keys(self.passwd) time.sleep(2) self.wd.find_element_by_id('login_button').click() time.sleep(5) try: tips = self.wd.find_element_by_id('qlogin_tips_2').text if '由于你的帐号存在异常,需要进行手机验证,' in tips: while True: dd_notice('需要扫描二维码...', dd_token_url) time.sleep(2) self.wd.save_screenshot('qrImg.png') im = Image.open('qrImg.png') im.save('qrImg.png') time.sleep(30) requests.get( 'https://e.qq.com/atlas/8944022/admanage/campaign', verify=False) time.sleep(2) if 'gdt_token' in json.dumps(self.wd.get_cookies()): dd_notice('二维码验证成功!!!', dd_token_url) break else: dd_notice('二维码验证失败!重试中...', dd_token_url) except Exception as e: dd_notice('不需要二维码验证!', dd_token_url) try: while True: time.sleep(3) iframe = self.wd.find_element_by_xpath('//iframe') self.wd.switch_to.frame(iframe) time.sleep(1) flags = self.wd.find_element_by_xpath( '//*[@id="guideText"]').text if '拖动下方滑块完成拼图' == flags: dd_notice('需要滑块!!!', dd_token_url) src_url = self.wd.find_element_by_xpath( '//*[@id="slideBg"]').get_attribute('src') res = requests.get(url=src_url, verify=False) with open('crack.jpeg', 'wb') as f: f.write(res.content) time.sleep(3) slid_ing = self.wd.find_element_by_id( 'tcaptcha_drag_button') ActionChains( self.wd).click_and_hold(on_element=slid_ing).perform() time.sleep(0.2) position = qq_mark_detect('crack.jpeg').x.values[0] real_position = position * (280 / 680) - 23 track_list = self.get_track(int(real_position)) for track in track_list: ActionChains(self.wd).move_by_offset( xoffset=track, yoffset=0).perform() time.sleep(0.002) ActionChains(self.wd).release().perform() time.sleep(2) requests.get( 'https://e.qq.com/atlas/8944022/admanage/campaign', verify=False) time.sleep(2) print(self.wd.get_cookies()) if 'gdt_token' in json.dumps(self.wd.get_cookies()): dd_notice('滑块验证成功!!!', dd_token_url) break else: dd_notice('滑块验证验证失败!重试中...', dd_token_url) else: dd_notice('不需要滑块!!!', dd_token_url) except Exception as e: dd_notice('不需要滑块!', dd_token_url) cookies_data = self.wd.get_cookies() try: if 'gdt_token' in json.dumps( cookies_data) and 'gdt_protect' in json.dumps( cookies_data): cookies = {} for data in cookies_data: if 'gdt_protect' in data.values(): gdt_protect = data.get('value') if gdt_protect: cookies['gdt_protect'] = gdt_protect if 'gdt_token' in data.values(): gdt_token = data.get('value') if gdt_token: cookies['gdt_token'] = gdt_token dd_notice(f'获取的cookies: {cookies}', dd_token_url) time.sleep(2) self.close() else: dd_notice('未成功获取cookies, 需手动重试!!!!!', dd_token_url) self.close() except Exception as e: dd_notice('广点通自动化登陆失败!!!需手动重试!!!!!', dd_token_url) self.close() @staticmethod def get_track(distance): """ 模拟轨迹 假装是人在操作 :param distance: :return: """ v = 0 t = 0.2 tracks = [] current = 0 mid = distance * 7 / 8 distance += 10 while current < distance: if current < mid: a = random.randint(2, 4) else: a = -random.randint(3, 5) v0 = v s = v0 * t + 0.5 * a * (t**2) current += s tracks.append(round(s)) v = v0 + a * t for i in range(4): tracks.append(-random.randint(2, 3)) for i in range(4): tracks.append(-random.randint(1, 3)) return tracks def close(self): self.wd.close()
class BoxDriver(object): """ a simple usage of selenium framework tool """ """ 私有全局变量 """ _web_driver = None _by_char = None _wait_seconds = None """ 构造方法 """ class DriverType(Enum): CHROME = 1, FIREFOX = 2, IE = 3, SAFARI = 4, CHROME_HEADLESS = 5 def __init__(self, driver_type: DriverType, by_char=_CHARACTER_COMMA, wait_seconds=_WAIT_SECONDS, firefox_profile=None): """ 构造方法:实例化 BoxDriver 时候使用 :type wait_seconds: object :param driver_type: DriverType: selenium driver :param by_char: 分隔符,默认使用"," :param firefox_profile: 火狐浏览器配置 """ self._by_char = by_char self._wait_seconds = wait_seconds if driver_type is None or driver_type == "": driver_type = self.DriverType.CHROME self._set_selenium_driver(driver_type, firefox_profile) def _set_selenium_driver(self, driver_type, firefox_profile): if driver_type == self.DriverType.CHROME: self._web_driver = Chrome() elif driver_type == self.DriverType.FIREFOX: if firefox_profile and os.path.exists(firefox_profile): profile = FirefoxProfile(firefox_profile) self._web_driver = Firefox(firefox_profile=profile) else: self._web_driver = Firefox() elif driver_type == self.DriverType.IE: self._web_driver = Ie() elif driver_type == self.DriverType.SAFARI: self._web_driver = Safari() elif driver_type == self.DriverType.CHROME_HEADLESS: profile = ChromeOptions() profile.add_argument('headless') profile.add_experimental_option("excludeSwitches", ["ignore-certificate-errors"]) self._web_driver = Chrome(options=profile) else: self._web_driver = Chrome() print("Invalid Driver Type filled: %r" % driver_type) """ 私有方法 """ def _convert_selector_to_locator(self, selector): """ 转换自定义的 selector 为 Selenium 支持的 locator :param selector: 定位字符,字符串类型,"i, xxx" :return: locator """ if self._by_char not in selector: return By.ID, selector selector_by = selector.split(self._by_char)[0].strip() selector_value = selector.split(self._by_char)[1].strip() if selector_by == "i" or selector_by == 'id': locator = (By.ID, selector_value) elif selector_by == "n" or selector_by == 'name': locator = (By.NAME, selector_value) elif selector_by == "c" or selector_by == 'class_name': locator = (By.CLASS_NAME, selector_value) elif selector_by == "l" or selector_by == 'link_text': locator = (By.LINK_TEXT, selector_value) elif selector_by == "p" or selector_by == 'partial_link_text': locator = (By.PARTIAL_LINK_TEXT, selector_value) elif selector_by == "t" or selector_by == 'tag_name': locator = (By.TAG_NAME, selector_value) elif selector_by == "x" or selector_by == 'xpath': locator = (By.XPATH, selector_value) elif selector_by == "s" or selector_by == 'css_selector': locator = (By.CSS_SELECTOR, selector_value) else: raise NameError( "Please enter a valid selector of targeting elements.") return locator def _locate_element(self, selector): """ to locate element by selector :arg selector should be passed by an example with "i,xxx" "x,//*[@id='langs']/button" :returns DOM element """ locator = self._convert_selector_to_locator(selector) if locator is not None: element = self._web_driver.find_element(*locator) else: raise NameError( "Please enter a valid locator of targeting elements.") return element def _locate_elements(self, selector): """ to locate element by selector :arg selector should be passed by an example with "i,xxx" "x,//*[@id='langs']/button" :returns DOM element """ locator = self._convert_selector_to_locator(selector) if locator is not None: elements = self._web_driver.find_elements(*locator) else: raise NameError( "Please enter a valid locator of targeting elements.") return elements """ cookie 相关方法 """ def clear_cookies(self): """ clear all cookies after driver init """ self._web_driver.delete_all_cookies() def add_cookies(self, cookies): """ Add cookie by dict :param cookies: :return: """ self._web_driver.add_cookie(cookie_dict=cookies) def add_cookie(self, cookie_dict): """ Add single cookie by dict 添加 单个 cookie 如果该 cookie 已经存在,就先删除后,再添加 :param cookie_dict: 字典类型,有两个key:name 和 value :return: """ cookie_name = cookie_dict["name"] cookie_value = self._web_driver.get_cookie(cookie_name) if cookie_value is not None: self._web_driver.delete_cookie(cookie_name) self._web_driver.add_cookie(cookie_dict) def remove_cookie(self, name): """ 移除指定 name 的cookie :param name: :return: """ # 检查 cookie 是否存在,存在就移除 old_cookie_value = self._web_driver.get_cookie(name) if old_cookie_value is not None: self._web_driver.delete_cookie(name) """ 浏览器本身相关方法 """ def refresh(self, url=None): """ 刷新页面 如果 url 是空值,就刷新当前页面,否则就刷新指定页面 :param url: 默认值是空的 :return: """ if url is None: self._web_driver.refresh() else: self._web_driver.get(url) self.forced_wait(self._wait_seconds) def maximize_window(self): """ 最大化当前浏览器的窗口 :return: """ self._web_driver.maximize_window() def navigate(self, url): """ 打开 URL :param url: :return: """ self._web_driver.get(url) self.forced_wait(self._wait_seconds) def quit(self): """ 退出驱动 :return: """ self._web_driver.quit() def close_browser(self): """ 关闭浏览器 :return: """ self._web_driver.close() """ 基本元素相关方法 """ def type(self, selector, text): """ Operation input box. Usage: driver.type("i,el","selenium") """ el = self._locate_element(selector) el.clear() el.send_keys(text) def click(self, selector): """ It can click any text / image can be clicked Connection, check box, radio buttons, and even drop-down box etc.. Usage: driver.click("i,el") """ el = self._locate_element(selector) el.click() self.forced_wait(self._wait_seconds) def click_by_enter(self, selector): """ It can type any text / image can be located with ENTER key Usage: driver.click_by_enter("i,el") """ el = self._locate_element(selector) el.send_keys(Keys.ENTER) self.forced_wait(self._wait_seconds) def click_by_text(self, text): """ Click the element by the link text Usage: driver.click_text("新闻") """ self._locate_element('p%s' % self._by_char + text).click() self.forced_wait(self._wait_seconds) def submit(self, selector): """ Submit the specified form. Usage: driver.submit("i,el") """ el = self._locate_element(selector) el.submit() self.forced_wait(self._wait_seconds) def move_to(self, selector): """ to move mouse pointer to selector :param selector: :return: """ el = self._locate_element(selector) ActionChains(self._web_driver).move_to_element(el).perform() self.forced_wait(self._wait_seconds) def right_click(self, selector): """ to click the selector by the right button of mouse :param selector: :return: """ el = self._locate_element(selector) ActionChains(self._web_driver).context_click(el).perform() self.forced_wait(self._wait_seconds) def count_elements(self, selector): """ 数一下元素的个数 :param selector: 定位符 :return: """ els = self._locate_elements(selector) return len(els) def drag_element(self, source, target): """ 拖拽元素 :param source: :param target: :return: """ el_source = self._locate_element(source) el_target = self._locate_element(target) if self._web_driver.w3c: ActionChains(self._web_driver).drag_and_drop(el_source, el_target).perform() else: ActionChains(self._web_driver).click_and_hold(el_source).perform() ActionChains(self._web_driver).move_to_element(el_target).perform() ActionChains(self._web_driver).release(el_target).perform() self.forced_wait(self._wait_seconds) def lost_focus(self): """ 当前元素丢失焦点 :return: """ ActionChains(self._web_driver).key_down(Keys.TAB).key_up( Keys.TAB).perform() self.forced_wait(self._wait_seconds) """ <select> 元素相关 """ def select_by_index(self, selector, index): """ It can click any text / image can be clicked Connection, check box, radio buttons, and even drop-down box etc.. Usage: driver.select_by_index("i,el") """ el = self._locate_element(selector) Select(el).select_by_index(index) self.forced_wait(self._wait_seconds) def get_selected_text(self, selector): """ 获取 Select 元素的选择的内容 :param selector: 选择字符 "i, xxx" :return: 字符串 """ el = self._locate_element(selector) selected_opt = Select(el).first_selected_option() return selected_opt.text def select_by_visible_text(self, selector, text): """ It can click any text / image can be clicked Connection, check box, radio buttons, and even drop-down box etc.. Usage: driver.select_by_index("i,el") """ el = self._locate_element(selector) Select(el).select_by_visible_text(text) self.forced_wait(self._wait_seconds) def select_by_value(self, selector, value): """ It can click any text / image can be clicked Connection, check box, radio buttons, and even drop-down box etc.. Usage: driver.select_by_index("i,el") """ el = self._locate_element(selector) Select(el).select_by_value(value) self.forced_wait(self._wait_seconds) """ JavaScript 相关 """ def execute_js(self, script): """ Execute JavaScript scripts. Usage: driver.js("window.scrollTo(200,1000);") """ self._web_driver.execute_script(script) self.forced_wait(self._wait_seconds) """ 元素属性相关方法 """ def get_value(self, selector): """ 返回元素的 value :param selector: 定位字符串 :return: """ el = self._locate_element(selector) return el.get_attribute("value") def get_attribute(self, selector, attribute): """ Gets the value of an element attribute. Usage: driver.get_attribute("i,el","type") """ el = self._locate_element(selector) return el.get_attribute(attribute) def get_text(self, selector): """ Get element text information. Usage: driver.get_text("i,el") """ el = self._locate_element(selector) return el.text def get_displayed(self, selector): """ Gets the element to display,The return result is true or false. Usage: driver.get_display("i,el") """ el = self._locate_element(selector) return el.is_displayed() def get_selected(self, selector): """ to return the selected status of an WebElement :param selector: selector to locate :return: True False """ el = self._locate_element(selector) return el.is_selected() def get_text_list(self, selector): """ 根据selector 获取多个元素,取得元素的text 列表 :param selector: :return: list """ el_list = self._locate_elements(selector) results = [] for el in el_list: results.append(el.text) return results """ 窗口相关方法 """ def accept_alert(self): ''' Accept warning box. Usage: driver.accept_alert() ''' self._web_driver.switch_to.alert.accept() self.forced_wait(self._wait_seconds) def dismiss_alert(self): ''' Dismisses the alert available. Usage: driver.dismissAlert() ''' self._web_driver.switch_to.alert.dismiss() self.forced_wait(self._wait_seconds) def switch_to_frame(self, selector): """ Switch to the specified frame. Usage: driver.switch_to_frame("i,el") """ el = self._locate_element(selector) self._web_driver.switch_to.frame(el) self.forced_wait(self._wait_seconds) def switch_to_default(self): """ Returns the current form machine form at the next higher level. Corresponding relationship with switch_to_frame () method. Usage: driver.switch_to_default() """ self._web_driver.switch_to.default_content() self.forced_wait(self._wait_seconds) def switch_to_parent(self): """ switch to parent frame :return: """ self._web_driver.switch_to.parent_frame() self.forced_wait(self._wait_seconds) def switch_to_window_by_title(self, title): for handle in self._web_driver.window_handles: self._web_driver.switch_to.window(handle) if self._web_driver.title == title: break self._web_driver.switch_to.default_content() self.forced_wait(self._wait_seconds) def open_new_window(self, selector): ''' Open the new window and switch the handle to the newly opened window. Usage: driver.open_new_window() ''' original_windows = self._web_driver.current_window_handle el = self._locate_element(selector) el.click() all_handles = self._web_driver.window_handles for handle in all_handles: if handle != original_windows: self._web_driver.switch_to.window(handle) break def save_window_snapshot(self, file_name): """ save screen snapshot :param file_name: the image file name and path :return: """ driver = self._web_driver driver.save_screenshot(file_name) self.forced_wait(self._wait_seconds) def save_window_snapshot_by_png(self): return self._web_driver.get_screenshot_as_png() def save_element_snapshot_by_png(self, selector): """ 控件截图 :param selector: :return: """ el = self._locate_element(selector) self.forced_wait(self._wait_seconds) return el.screenshot_as_png def save_window_snapshot_by_io(self): """ 保存截图为文件流 :return: """ return self._web_driver.get_screenshot_as_base64() def save_element_snapshot_by_io(self, selector): """ 控件截图 :param selector: :return: """ el = self._locate_element(selector) return el.screenshot_as_base64 """ 等待方法 """ @staticmethod def forced_wait(seconds): """ 强制等待 :param seconds: :return: """ time.sleep(seconds) def implicitly_wait(self, seconds): """ Implicitly wait. All elements on the page. :param seconds 等待时间 秒 隐式等待 Usage: driver.implicitly_wait(10) """ self._web_driver.implicitly_wait(seconds) def explicitly_wait(self, selector, seconds): """ 显式等待 :param selector: 定位字符 :param seconds: 最长等待时间,秒 :return: """ locator = self._convert_selector_to_locator(selector) WebDriverWait(self._web_driver, seconds).until( expected_conditions.presence_of_element_located(locator)) def get_explicitly_wait_element_text(self, selector, seconds): """ 显式等待,得到元素的 text :param selector: locator :param seconds: max timeout sencods :return: str, element.text """ locator = self._convert_selector_to_locator(selector) driver = self._web_driver el = WebDriverWait(driver, seconds).until(lambda d: d.find_element(*locator)) if el and isinstance(el, WebElement): return el.text return None """ 属性 """ @property def current_title(self): ''' Get window title. Usage: driver.current_title ''' return self._web_driver.title @property def current_url(self): """ Get the URL address of the current page. Usage: driver.current_url """ return self._web_driver.current_url
def run(self): try: import os driverName = "\\chromedriver.exe" # defining base file directory of chrome drivers driver_loc = os.path.dirname( os.path.abspath(__file__)) + "\\ChromeDriver\\" # defining the file path of your exe file automatically updating based on your browsers current version of chrome. currentPath = driver_loc + chrome_browser_version + driverName # check if new version of drive exists --> only continue if it doesn't Newpath = driver_loc + nextVersion # check if we have already downloaded the newest version of the browser newfileloc = Newpath + driverName newpathexists = os.path.exists(newfileloc) if newpathexists == False: try: # open chrome driver and attempt to download new chrome driver exe file. # set the arguments and options chromeOptions = Options() chromeOptions.add_experimental_option( "prefs", { "download.default_directory": driver_loc, "download.prompt_for_download": False, "download.directory_upgrade": True, "safebrowsing.enabled": True, "profile.managed_default_content_settings.images": 2, }, ) chromeOptions.add_experimental_option( "excludeSwitches", ["enable-logging"]) chromeOptions.add_argument("--headless") chromeOptions.add_argument( "--blink-settings=imagesEnabled=false") chromeOptions.add_argument("--disable-popup-blocking") chromeOptions.add_argument("--ignore-certificate-errors") chromeOptions.add_argument("--allow-insecure-localhost") chromeOptions.add_argument( "--allow-running-insecure-content") chromeOptions.accept_untrusted_certs = True chromeOptions.assume_untrusted_cert_issuer = True service_args = ["hide_console"] try: print("~~~Calling Update Driver") update_driver = Chrome( executable_path=currentPath, options=chromeOptions, service_args=service_args, ) print("~~~Update Driver Opened") # opening up url of chromedriver to get new version of chromedriver. chromeDriverURL = ( "https://chromedriver.storage.googleapis.com/index.html?path=" + nextVersion) update_driver.set_page_load_timeout(10) update_driver.delete_all_cookies() update_driver.get(chromeDriverURL) print("~~~Update Website Got") # time.sleep(5) event.wait(5) # find records of table rows table = update_driver.find_elements_by_css_selector( "tr") # check the length of the table Table_len = len(table) # ensure that table length is greater than 4, else fail. -- table length of 4 is default when there are no availble updates if Table_len > 4: # define string value of link rowText = table[(len(table) - 2)].text[:6] # time.sleep(1) event.wait(1) # select the value of the row update_driver.find_element_by_xpath( "//*[contains(text()," + '"' + str(rowText) + '"' + ")]").click() event.wait(1) # time.sleep(1) # select chromedriver zip for windows update_driver.find_element_by_xpath( "//*[contains(text()," + '"' + "win32" + '"' + ")]").click() print("~~~Download Started") # time.sleep(5) event.wait(5) update_driver.quit() print("~~~Update Driver Exited") try: from zipfile import ZipFile import shutil fileName = os.path.join( os.path.dirname(driver_loc), "chromedriver_win32.zip", ) # Create a ZipFile Object and load sample.zip in it with ZipFile(fileName, "r") as zipObj: # Extract all the contents of zip file in different directory zipObj.extractall(Newpath) print("~~~Newer Version Extracted") except Exception as ex: print( "Error in extracting:\t\tAn exception of type {0} occurred. Arguments:\n{1}" .format(type(ex).__name__, ex.args)) try: # delete downloaded file os.remove(fileName) print("Downloaded Zip Deleted") except Exception as ex: print( "~~~Error in deleting zip:\t\tAn exception of type {0} occurred. Arguments:\n{1}" .format(type(ex).__name__, ex.args)) # defining old chrome driver location oldPath = driver_loc + lastVersion oldpathexists = os.path.exists(oldPath) # this deletes the old folder with the older version of chromedriver in it if oldpathexists == True: try: import stat shutil.rmtree(oldPath, ignore_errors=True) print("Old Version Deleted") except Exception as ex: print( "~~~Error in deleting previous version:\t\tAn exception of type {0} occurred. Arguments:\n{1}" .format(type(ex).__name__, ex.args)) else: # update_driver.quit() print("~~~No new version available") except Exception as ex: print( "~~~Error in update driver:\t\tAn exception of type {0} occurred. Arguments:\n{1}" .format(type(ex).__name__, ex.args)) finally: try: # close the driver update_driver.quit() print("~~~Update Driver Exited") except Exception as ex: print( "~~~Error in quitting:\t\tAn exception of type {0} occurred. Arguments:\n{1}" .format(type(ex).__name__, ex.args)) except Exception as ex: print( "~~~Error in if:\t\tAn exception of type {0} occurred. Arguments:\n{1}" .format(type(ex).__name__, ex.args)) else: print("~~~ChromeDriver Upto Date~~~") except Exception as ex: print( "~~~Error in update:\t\tAn exception of type {0} occurred. Arguments:\n{1}" .format(type(ex).__name__, ex.args))
def run(self): """ Will check wheather the portal in logged in after a set interval and will sign in if not. """ try: # set the arguments and options chromeOptions = Options() prefs = {"profile.managed_default_content_settings.images": 2} chromeOptions.add_experimental_option("prefs", prefs) chromeOptions.add_experimental_option("excludeSwitches", ["enable-logging"]) chromeOptions.add_argument("--headless") chromeOptions.add_argument("--blink-settings=imagesEnabled=false") chromeOptions.add_argument("--disable-popup-blocking") chromeOptions.add_argument("--ignore-certificate-errors") chromeOptions.add_argument("--allow-insecure-localhost") chromeOptions.add_argument("--allow-running-insecure-content") chromeOptions.accept_untrusted_certs = True chromeOptions.assume_untrusted_cert_issuer = True service_args = ["hide_console"] currentPath = (os.path.dirname(os.path.abspath(__file__)) + "\\ChromeDriver\\" + chrome_browser_version + "\\chromedriver.exe") while True: try: print("\nCalling Driver") # Creating an instance of chrome driver = Chrome( executable_path=currentPath, options=chromeOptions, service_args=service_args, ) print("Driver Called") driver.set_page_load_timeout(10) driver.delete_all_cookies() # open a page driver.get("Enter Checking Site Here") print("Getting Site") try: """ remove this try except if the your wifi doesn't block websites """ # xpath if the website is blocked element = driver.find_element_by_xpath( "Enter xpath to an element in the blocked page") print("Site Blocked\n") except: try: # xpath to any thing in the website to make sure you are connected to the internet element = driver.find_element_by_xpath( "/Enter xpath to an element in the page") print("Site Opening\n") except: try: """ if your portal doesn't have auto redirect, uncomment the following line and type in your login url """ # driver.get("Paste Login Webiste URL Here") # change the ids to those in your login website # you can use developer mode to find the id of fields (use ctrl + shift + i) # change the username and password to the required one print("Trying To Login") # select usnername field element = driver.find_element_by_id( "Ending id of user input field") print("User Found") element.send_keys("Enter username") print("User Inputted") # select password field element = driver.find_element_by_id( "Ending id of password input field") print("Passwprd Found") element.send_keys("Enter password") print("Password Inputted") # select submit button element = driver.find_element_by_id( "Enter id of submit button") print("Button Found") element.click() print("Logged In\n") # except NoSuchElementException as ex: # print("Can't Login") # event.wait(120) except Exception as ex: print( "Can't login:\t\tAn exception of type {0} occurred. Arguments:\n{1}" .format(type(ex).__name__, ex.args)) event.wait(60) continue except Exception as ex: print( "Error in loop:\t\tAn exception of type {0} occurred. Arguments:\n{1}" .format(type(ex).__name__, ex.args)) try: driver.quit() except Exception as ex: print( "Error in Quitting:\t\tAn exception of type {0} occurred. Arguments:\n{1}" .format(type(ex).__name__, ex.args)) event.wait(60) continue try: driver.quit() except Exception as ex: print( "Error in Quitting in loop:\t\tAn exception of type {0} occurred. Arguments:\n{1}" .format(type(ex).__name__, ex.args)) event.wait(300) continue except Exception as ex: print( "Error outside loop:\t\tAn exception of type {0} occurred. Arguments:\n{1}" .format(type(ex).__name__, ex.args)) finally: try: driver.quit() except Exception as ex: print( "Error in Quitting in final:\t\tAn exception of type {0} occurred. Arguments:\n{1}" .format(type(ex).__name__, ex.args)) finally: print("Login Thread Exited")
opts = Options() opts.add_argument( "--user-data-dir=" + config["activeProfilePath"]) # add user data to chrome-data folder # opts.add_argument("user-data-dir=C:\\Users\\AtechM_03\\AppData\\Local\\Google\\Chrome\\User Data\\Profile 2") if (config["showInterface"] != "true"): opts.set_headless() opts.add_argument('headless') opts.add_argument('--disable-infobars') opts.add_argument('--disable-dev-shm-usage') opts.add_argument('--no-sandbox') # opts.add_argument('--remote-debugging-port=9222') # print(config["webDataPath"]) browser = Chrome(options=opts) # browser.implicitly_wait(20) browser.delete_all_cookies() # browser = Chrome() browser.get(config["inventoryUrl"]) printLog("Open inventory manager") # time.sleep(1) if (getElementByXpathUntilTimeout(browser, config["loginDoneId"], 3) == False): try: browser.find_element_by_xpath(config["emailId"]).send_keys( config["activeEmail"]) except: print("skip email!") browser.find_element_by_xpath(config["passwordId"]).send_keys( config["activePassword"]) browser.find_element_by_xpath(config["rememberLoginId"]).click()
class Crawler: def __init__(self, profile_manager, driver_path, config): """ :param profile_manager: A ProfileManager object with all profiles loaded already :param driver_path: The path to the browser driver that is being used for scraping :param config: A CrawlerConfig object """ # Parameters self.cfg = config self.driver_path = driver_path # Controls self.profile_manager = profile_manager self.crawled_urls = [] self.profiles_since_break = 0 self.browser = None self.current_agent = None def run(self): """ This runs in a new thread when crawler.run() is called """ self._start_browser() # Start crawling each URL for url in self.cfg.websites: print("Starting Crawling on Seed: ", url) self._crawl_page(url) self._close_browser() def _crawl_page(self, url): """ If the url is a linkedin url, it will check if it is a profile, save it, and add it to crawled URL's. If the url is NOT a linkedin url (thus a google search page), it will find linkedin URL's and crawl them. It will also find "next search page" arrows on google, and crawl those as well. :param url: The link to crawl :return: """ if url in self.crawled_urls: print("Tried to crawl the same URL twice in one session", url) return self.crawled_urls.append(url) if self.profiles_since_break > randint(*self.cfg.urls_between_break): self._take_break( self.cfg.sleep_random_break, "Taking break after " + str(self.profiles_since_break) + " profiles !") # If it is a linkedin profile currently being crawled, save the HTML if "linkedin" in url and "/in/" in url and url.count("/") == 4: # Check that this profile has not been parsed before username = url.split("/in/", 1)[1] if username in self.profile_manager.users: print("Already crawled: ", username) return print("Analyzing Profile #", len(self.profile_manager), url, username) # Load the page html = self._load_page(url) self.profiles_since_break += 1 # If linkedin rate-limited us, continue to the next profile if html is None: return # Save HTML to a file # TODO: Fix this try: self.profile_manager.write_new_html_profile(html) except Exception as e: print("ERROR: While saving HTML: ", e, " in url: ", url) elif "www.google.com" in url: # Load the page html = self._load_page(url) # If it is a google search page currently being crawled to find more linkedin profiles linkedin_urls = self._get_results_urls(html) shuffle(linkedin_urls) for url in linkedin_urls: self._crawl_page(url) # Get the "Next" link to go to the next page of results soup = BeautifulSoup(html, "lxml") next_link = soup.find("a", {"id": "pnnext"}) if next_link is not None: next_link = urljoin("http://www.google.com", next_link["href"]) self._crawl_page(next_link) else: print("ERROR: Tried to crawl bad url: ", url) def _get_results_urls(self, html): # Get links to results from a google search links = [] soup = BeautifulSoup(html, "lxml") for link in soup.find_all('cite', class_="_Rm"): links.append(link.text) return links def _load_page(self, url): # Load the page try: self.browser.get(url) html = self.browser.page_source except TimeoutError: self._take_break(self.cfg.sleep_timeout, "Page has timed out. Taking a long break!") return # Sleep for a certain amount of time if "www.google.com" in url: self._sleep(self.cfg.sleep_google_search, "Google Search") else: self._sleep(self.cfg.sleep_linkedin, "Linkedin Profile") # Check that linkedin didn't rate limit us if "Join to view full profiles for free" in html: self._take_break(self.cfg.sleep_rate_limiting, "Linkeding is rate-limiting us. Taking a break.") return None return html def _sleep(self, interval, reason): """ A sleep helper function that prints the reason its sleeping, and the random interval of sleep :param interval: A tuple (mintime, maxtime) :param reason: Why the sleep is happening (for a pretty print) """ # TODO: Add a self.browser.back() or something here to fake human use, and/or self.browser.delete_all_cookies() rand_interval = uniform(*interval) print("Sleeping", rand_interval, "seconds: ", reason) # If the browser isn't open, do a normal sleep if self.browser is None: sleep(rand_interval) return # If the browser is closed, sleep halfway then do a random action, then continue sleeping sleep(rand_interval / 2) # Do a random action while sleeping actions = [ self.browser.back, lambda: self.browser.set_window_size( randint(700, 1080), randint(700, 1080)), lambda: self.browser.set_window_position(randint(0, 300), randint(0, 300)), self.browser.maximize_window, self.browser.delete_all_cookies ] action = choice(actions) print("Performing random action: ", action) action() sleep(rand_interval / 2) def _take_break(self, break_interval, reason): """ Shut down browser, restart with new browser """ self._close_browser() self._sleep(break_interval, reason) self.profiles_since_break = 0 self._start_browser() def _close_browser(self): self.browser.quit() self.browser = None def _start_browser(self): assert self.browser is None, "Browser must not exist in order to call _start_browser!" # Load a user profile from normal chrome user_profile = "C:\\Users\\Alex Thiel\\AppData\\Local\\Google\\Chrome\\User Data\\Default" # Options options = Options() options.add_argument("user-data-dir={}".format(user_profile)) options.add_experimental_option("excludeSwitches", [ "ignore-certificate-errors", "safebrowsing-disable-download-protection", "safebrowsing-disable-auto-update", "disable-client-side-phishing-detection" ]) os.environ["webdriver.chrome.driver"] = self.driver_path # Add variation to the browser if randint(0, 2) == 1: options.add_argument("--incognito") print("Option: Incognito") if randint(0, 2) == 1: options.add_argument("--disable-extensions") print("Option: Disabling Extensions") if randint(0, 2) == 1: options.add_argument("--disable-plugins-discovery") print("Option: Disabling plugins discovery") if randint(0, 2) == 1: options.add_argument('--no-referrers') print("Option: No Referrers") if randint(0, 2) == 1: options.add_argument('--disable-web-security') print("Option: Disabled web security") if randint(0, 2) == 1: options.add_argument('--allow-running-insecure-content') print("Option: Allowing running insecure content") if randint(0, 2) == 1: options.add_experimental_option( 'prefs', { 'credentials_enable_service': False, 'profile': { 'password_manager_enabled': False } }) print("Options: Disabled Password Manager") # options.add_experimental_option('prefs', {'profile.managed_default_content_settings.images': 2}) agent = UserAgent().random options.add_argument("user-agent=" + agent) self.current_agent = agent print("Option: Agent:", agent) # Open up browser window self.browser = Driver(executable_path=self.driver_path, chrome_options=options) self.browser.set_page_load_timeout(self.cfg.browser_timeout) self.browser.delete_all_cookies() if randint(0, 2) == 1: print("Option: Start Maximized") self.browser.maximize_window() else: self.browser.set_window_size(randint(700, 1080), randint(700, 1080)) self.browser.set_window_position(randint(0, 300), randint(0, 300))
def post(self): req_parser = reqparse.RequestParser() req_parser.add_argument('url', type=str, required=True) args = req_parser.parse_args() url = args['url'] if not url: return { 'url': url, 'x5sec': '', } option = ChromeOptions() # option.add_argument('--headless') option.add_argument('--no-sandbox') option.add_argument('--disable-dev-shm-usage') mobile_emulation = {"deviceMetrics": { "width": 375, "height": 667, "pixelRatio": 3}, "userAgent": "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372"} option.add_experimental_option("mobileEmulation", mobile_emulation) option.add_experimental_option('w3c', False) option.add_argument('--disable-extensions') option.add_argument('--disable-gpu') option.add_argument("--disable-features=VizDisplayCompositor") option.add_experimental_option('excludeSwitches', ['enable-automation']) option.add_experimental_option("useAutomationExtension", False) option.binary_location = '/root/Downloads/login_taobao/node_modules/puppeteer/.local-chromium/linux-672088/chrome-linux/chrome' wd = Chrome(options=option, executable_path='/root/Downloads/slider_servers/chromedriver') wd.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { "source": """ Object.defineProperty(navigator, 'webdriver', { get: () => undefined }) """ }) ''' wd.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { "source": """ Object.defineProperty(navigator, 'webdriver', { get: () => undefined }); Object.defineProperty(navigator, 'language', { get: () => "zh-CN" }); Object.defineProperty(navigator, 'deviceMemory', { get: () => 8 }); Object.defineProperty(navigator, 'hardwareConcurrency', { get: () => 8 }); Object.defineProperty(navigator, 'platform', { get: () => 'MacIntel' }); Object.defineProperty(navigator, 'userAgent', { get: () => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36' }); Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] }); """ }) ''' wd.set_page_load_timeout(20) _timeout = WebDriverWait(wd, 20) try: x5sec = '' wd.get(url) wd.implicitly_wait(10) wd.delete_all_cookies() cnt = 0 while True: time.sleep(0.4) wd.find_element_by_id("nc_1_n1t").click() slid_ing = wd.find_element_by_id("nc_1_n1t") time.sleep(0.2) try: TouchActions(wd).flick_element(slid_ing, 258, 0, random.randint(200, 300)).perform() time.sleep(0.2) except Exception as e: import traceback print(traceback.format_exc()) print(e) time.sleep(0.4) try: slide_refresh = wd.find_element_by_xpath('//*[@id="nc_1-stage-3"]/span[1]/span[1]') slide_refresh.click() except: break cnt += 1 if cnt > 10: break cookies = wd.get_cookies() wd.close() for x5sec_data in cookies: if 'x5sec' in x5sec_data.values(): x5sec = x5sec_data['value'] return { 'x5sec': x5sec, } except: wd.close() return { 'url': url, 'x5sec': '', }
def post(self): req_parser = reqparse.RequestParser() req_parser.add_argument('url', type=str, required=True) args = req_parser.parse_args() url = args['url'] if not url: return { 'url': url, 'x5sec': '', } option = ChromeOptions() # option.add_argument('--headless') option.add_argument('--no-sandbox') # option.add_argument('--proxy-server=http://HD3P6R2K3912I09D:[email protected]:9020') option.add_argument( 'user-agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36"') option.add_argument('--disable-dev-shm-usage') option.add_argument('--disable-extensions') option.add_argument('--disable-gpu') option.add_argument("--disable-features=VizDisplayCompositor") option.add_experimental_option('excludeSwitches', ['enable-automation']) option.add_experimental_option("useAutomationExtension", False) # option.binary_location = '/root/Downloads/login_taobao/node_modules/puppeteer/.local-chromium/linux-672088/chrome-linux/chrome' wd = Chrome(options=option, executable_path='chromedriver') # wd = Chrome(ChromeDriverManager().install(), options=option) wd.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { "source": """ Object.defineProperty(navigator, 'webdriver', { get: () => undefined }); Object.defineProperty(navigator, 'language', { get: () => "zh-CN" }); Object.defineProperty(navigator, 'deviceMemory', { get: () => 8 }); Object.defineProperty(navigator, 'hardwareConcurrency', { get: () => 8 }); Object.defineProperty(navigator, 'platform', { get: () => 'MacIntel' }); Object.defineProperty(navigator, 'userAgent', { get: () => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36' }); Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] }); """ }) wd.set_page_load_timeout(20) _timeout = WebDriverWait(wd, 20) try: x5sec = '' wd.get(url) wd.implicitly_wait(10) wd.delete_all_cookies() # todo 多页面并行实现 cnt = 0 while True: time.sleep(0.2) wd.find_element_by_id("nc_1_n1z").click() slid_ing = wd.find_element_by_id("nc_1_n1z") ActionChains(wd).click_and_hold(on_element=slid_ing).perform() time.sleep(0.2) lgh = 0 try: while lgh <= 510: lgh += random.randint(30, 50) ActionChains(wd).move_by_offset(xoffset=lgh, yoffset=0).perform() time.sleep(0.2) ActionChains(wd).release().perform() except: time.sleep(0.2) ActionChains(wd).release().perform() try: slide_refresh = wd.find_element_by_xpath("//div[@id='nocaptcha']/div/span/a") slide_refresh.click() except: break cnt += 1 if cnt > 10: break cookies = wd.get_cookies() wd.close() for x5sec_data in cookies: if 'x5sec' in x5sec_data.values(): x5sec = x5sec_data['value'] return { 'x5sec': x5sec, } except: wd.close() return { 'url': url, 'x5sec': '', }
class InstaBot(object): base_url = 'https://www.instagram.com' def __init__(self, implicit_wait=20, page_load_timeout=30): try: Xvfb().start() except EnvironmentError: pass options = ChromeOptions() options.add_argument('--no-sandbox') options.add_argument('--disable-setuid-sandbox') self.driver = Chrome(settings.CHROMEDRIVER_PATH, chrome_options=options) self.driver.implicitly_wait(implicit_wait) self.driver.set_page_load_timeout(page_load_timeout) self.wait = WebDriverWait(self.driver, settings.WEB_DRIVER_WAIT_SEC) self.liked = 0 self.liked_total_samples = 0 self.followed = 0 def close(self): try: self.driver.delete_all_cookies() self.driver.close() from subprocess import call call(['killall', 'Xvfb']) call(['killall', 'chromedriver']) except: pass def login(self, username=None, password=None): username = username or os.environ.get('INSTABOT_IG_USERNAME') password = password or os.environ.get('INSTABOT_IG_PASSWORD') if not username or not password: raise InvalidUsernamePasswordError logger.info("Logging in as: %s" % username) self.driver.get(self.base_url) self.wait.until(EC.element_to_be_clickable( (By.XPATH, xpath.login))).click() self.driver.find_element_by_name('username').send_keys(username) self.driver.find_element_by_name('password').send_keys(password) self.driver.find_element_by_xpath(xpath.submit_login).click() def follow_users(self, usernames=None): """ Follow all the users (don't pass `@') """ for username in usernames: time.sleep(settings.FOLLOW_USER_SLEEP_SEC) self.driver.get('%s/%s' % (self.base_url, username)) try: elem = self.wait.until( EC.element_to_be_clickable((By.XPATH, xpath.follow))) if elem.text.lower() != 'following': elem.click() self.followed += 1 logger.info("Started following %s" % username) else: logger.info("Already following %s" % username) except NoSuchElementException as e: logger.info(e) except Exception as e: logger.error(e) def like_tags(self, tags, num=100): """ Like `num' number of posts when exploring hashtag (don't pass `#') A random sample of posts will be liked for a given tag Return the usernames of the posts liked """ usernames = [] for tag in tags: time.sleep(settings.LIKE_TAG_SLEEP_SEC) logger.info("Liking posts with tag: %s" % tag) self.driver.get('%s/explore/tags/%s/' % (self.base_url, tag)) time.sleep(settings.LIKE_TAG_SLEEP_SEC) self._load_more(max(1, num / 10)) # get the actual url's of images to like try: main = self.driver.find_element_by_tag_name('main') except NoSuchElementException as e: logger.info(e) continue links = main.find_elements_by_tag_name('a') urls = [link.get_attribute('href') for link in links] sample = random.sample(urls, min(num, len(links))) self.liked_total_samples += len(sample) logger.info("Like sample size: %d" % len(sample)) for url in sample: time.sleep(settings.LIKE_TAG_SLEEP_SEC) try: self.driver.get(url) elem = self.driver.find_element_by_link_text('Like') username = self.driver.find_element_by_xpath( xpath.profile_username).text elem.click() self.liked += 1 usernames.append(username) except NoSuchElementException as e: logger.info(e) logger.info("Liked %d/%d" % (self.liked, self.liked_total_samples)) return usernames def _load_more(self, n=10): """ Press "end" key `n' times to load more images """ body = self.driver.find_element_by_tag_name('body') for _ in range(n): body.send_keys(Keys.END) time.sleep(settings.LOAD_MORE_SLEEP_SEC)
driver = Chrome(ChromeDriverManager().install(), options=opts) for i in range(ran): driver.get(ank) try: WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, xent))) ent = driver.find_element_by_xpath(xent) ent.send_keys(code) ent.submit() except TimeoutError: driver.close() break try: WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, xent1))) ent1 = driver.find_element_by_xpath(xent1) ent1.send_keys(mes) ent1.submit() WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, xtest))) except TimeoutError: driver.close() break driver.delete_all_cookies() try: driver.close() except: pass
class Target: def __init__(self, mode: str, pin: str): self.options = Options() if mode == "headless": self.options.add_argument("--headless") self.layers = dict() # słownik par {rodzaj_ankiety:odpowiedź} self.driver = Chrome(ChromeDriverManager().install(), options=self.options) self.pin = pin self.ank = r"https://www.mentimeter.com/" def add(self, layer: Dict[str, str]) -> None: self.layers.update(layer) # dodawanie kolejnych warstw def cloud(self, ans: str) -> None: xinp = r"/html/body/div[1]/div/div[2]/div[1]/form/fieldset/div/div/input" try: WebDriverWait(self.driver, 10).until( EC.presence_of_element_located((By.XPATH, xinp))) ent1 = self.driver.find_elements_by_xpath(xinp) for ent in ent1: ent.send_keys(ans) ent.submit() WebDriverWait(self.driver, 10).until( EC.presence_of_element_located( (By.XPATH, r"/html/body/div[1]/div/div[2]/div[1]/div[2]/h1"))) except: self.driver.close() def opend(self, ans: str) -> None: xinp = r"/html/body/div[1]/div/div[2]/div[1]/form/fieldset/div/textarea" try: WebDriverWait(self.driver, 10).until( EC.presence_of_element_located((By.XPATH, xinp))) ent = self.driver.find_element_by_xpath(xinp) ent.send_keys(ans) ent.submit() WebDriverWait(self.driver, 10).until( EC.presence_of_element_located( (By.XPATH, r"/html/body/div[1]/div/div[2]/div[1]/div[2]/h1"))) except: self.driver.close() def run(self, iterations: int) -> None: try: for i in range(iterations): self.driver.get(self.ank) WebDriverWait(self.driver, 10).until( EC.presence_of_element_located( (By.XPATH, r"/html/body/div[1]/div[1]/header/div/div/form/input" ))) ent = self.driver.find_element_by_xpath( r"/html/body/div[1]/div[1]/header/div/div/form/input") ent.send_keys(self.pin) ent.submit() k0 = list(self.layers.keys())[0] eval(f"self.{k0}('{self.layers[k0]}')") self.driver.delete_all_cookies() except: pass self.layers.pop(k0)