def build_chrome(self): """构建浏览器 :param cookie: selenium list类型的cookie :return: 返回构建完成的chrome浏览器 """ chrome_options = ChromeOptions() chrome = Chrome(chrome_options=chrome_options) chrome.get('http://pan.baidu.com') if not self.cookie: self.load_cookie() if (type(self.cookie) == str): for i in self.cookie.split(';'): I = i.split('=') if (len(I) >= 2): chrome.add_cookie({'name': I[0], 'value': I[1]}) else: pass else: if (self.cookie != None): for i in self.cookie: try: del i['expiry'] except: pass chrome.add_cookie(i) chrome.refresh() return chrome
class BasePage: def __init__(self, maximize_window=True, hide_cookie_notice=True): self.hide_cookie_notice = hide_cookie_notice self.driver = Chrome(driver_path_resolver.resolve_driver_path()) self._hide_cookie_notice() if self.hide_cookie_notice else None self.driver.implicitly_wait(DEFAULT_IMPLICITLY_WAIT) self.driver.maximize_window() if maximize_window else None self.error = None def _hide_cookie_notice(self): """ Hiding cookie notice object by adding hideCookieNotice cookie """ LOGGER.info('Started hiding cookie notice') wrong_path = '404' if self.hide_cookie_notice: self.driver.get(BASE_URL + wrong_path) LOGGER.debug('Opened 404 page') self.driver.delete_all_cookies() LOGGER.debug('Deleted all cookies') cookie = {'name': 'hideCookieNotice', 'value': '1'} self.driver.add_cookie(cookie) LOGGER.debug('Added cookie {}'.format(cookie)) LOGGER.info('Cookie notice hiding finished success') else: pass def get_last_error(self): return repr(self.error)
def sign_in_with_cookies(driver: webdriver.Chrome) -> bool: logger.info("Starting to sign in using pickled cookie") try: cookies_file = open(cookie_file_name, "rb") cookies = pickle.load(cookies_file) driver.get(amazon_fresh_home_url) for cookie in cookies: if 'expiry' in cookie: del cookie['expiry'] driver.add_cookie(cookie) except IOError: logger.error("Unable to use pickled cookie to sign in") return False driver.refresh() try: WebDriverWait(driver, 10).until( expected_conditions.visibility_of_element_located( (By.ID, amazon_fresh_welcome_id))) if "signin" in driver.find_element_by_id( amazon_fresh_welcome_id).get_attribute("href"): return False else: return True except Exception: logger.error("Failed to use pickled cookie to sign in")
class ChromeDriver: def __init__(self): self.options = self.get_options() self.driver = Chrome(chrome_options=self.options) self.implicitly_wait(10) def __getattr__(self, attr): return getattr(self.driver, attr) # overwrite def get(self, url, cookies=None, refresh=True): self.driver.get(url) if cookies is not None: for name, value in cookies.items(): self.driver.add_cookie({'name': name, 'value': value}) if refresh: self.refresh() def get_options(self): options = chrome.options.Options() options.add_argument('--headless') options.add_argument('--disable-gpu') # options.add_argument('blink-settings=imagesEnabled=false') options.add_argument('--proxy-server=http://127.0.0.1:10809') options.add_argument( 'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36' ) return options
def start_browser(link,cookies): caps = DesiredCapabilities().CHROME caps["pageLoadStrategy"] = "eager" chrome_options = ChromeOptions() chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"]) chrome_options.add_experimental_option("useAutomationExtension", False) driver = Chrome(desired_capabilities=caps, executable_path=driver_path, options=chrome_options) driver.execute_cdp_cmd( "Page.addScriptToEvaluateOnNewDocument", { "source": """ Object.defineProperty(window, 'navigator', { value: new Proxy(navigator, { has: (target, key) => (key === 'webdriver' ? false : key in target), get: (target, key) => key === 'webdriver' ? undefined : typeof target[key] === 'function' ? target[key].bind(target) : target[key] }) }) """ }, ) driver.get(link) for cookie in cookies: driver.add_cookie({ "name": cookie["name"], "value" : cookie["value"], "domain" : cookie["domain"] }) driver.get(link)
class Spider: def __init__(self, index_url, target_url, page_range): self.index_url = index_url self.target_url = target_url self.page_range = page_range + 1 self.raw_html = [] self.boot() def boot(self): self.driver = Chrome() self.driver.start_client() self.check_cookie() def check_cookie(self): from xcookie import cookie_list if cookie_list: self.driver.get(self.index_url) time.sleep(8) self.driver.delete_all_cookies() print('clear') for c in cookie_list: self.driver.add_cookie(c) print('Done') else: print('please insert cookie!') sys.exit() def crawl(self): for p in range(1, self.page_range): full_url = f'{self.target_url}{p}' self.driver.get(full_url) print(full_url) time.sleep(5) self.raw_html.append(self.driver.page_source)
def browser( ): # na metodzie jest wykonuwany, bo defacto nasz fixture steruje przeglądarką, są tu 3 wyrażne sekcje: browser = Chrome(executable_path=ChromeDriverManager().install() ) # 1.co się wykona przed każdym testem: browser.get( 'https://www.awesome-testing.com/' ) # 2. Ta sekcja mówi nam co chcemy zwrócić do testów, return -> ale aby zwrócić coś w połowie używamy ,,yield", czyli nas fixture wydaje z siebie do testów otwartą na stronie do testów przeglądarkę # żeby skojarzyć to z testami, to wrzucimy fixture jako argument naszej metody cookie = { 'name': 'displayCookieNotice', 'value': 'y', 'domain': 'www.awesome-testing.com' } browser.add_cookie( cookie ) #ciastka bierzemy ze strony -> aplication -> Cookies (klikamy ok na str z ciastkiem) -> refresh -> ciastko i wartości browser.refresh() # w razie problemów rozwiązaniem byłoby time.sleep(1) yield browser # słówko yield jest granicą sekcji, jezeli nie ma go to wykona się tylko 1 sekcja, jeżeli chcemy zwrócić więcej niż 1 zmienną browser, zwracamy je w tupli browser.quit( ) # 3. coś co się wykona ZAWSZE po teście, niezaleznie od tego, czy się on uda, czy nie (zeby wykonac 1 i 3 musimy skopiować 1 do 3?)
def load_cookie(browser: Chrome): cookie_files = glob.glob(os.path.join(COOKIES_PATH, '*.cookie')) for file in cookie_files: with codecs.open(filename=file) as rfile: data_cookies = json.load(rfile) for data_cookie in data_cookies: if data_cookie['domain'][0] != '.': data_cookie['domain'] = '.' + data_cookie['domain'] browser.add_cookie(data_cookie)
def login_by_cookies(): with open("data/cookies.json") as fd: cookies = json.loads(fd.read()) web = Chrome() web.get('https://www.mosoteach.cn/web/') for cookie in cookies: web.add_cookie(cookie) web.get('https://www.mosoteach.cn/web/') web.refresh() return web
def browser(): browser = Chrome(executable_path=ChromeDriverManager().install()) browser.get('https://www.awesome-testing.com/') cookie = {'name': 'displayCookieNotice', 'value': 'y', 'domain': 'www.awesome-testing.com'} browser.add_cookie(cookie) browser.refresh() yield browser browser.quit()
def check_queue(driver: webdriver.Chrome): """Schaut ob man im Wartebereich ist und versucht diesen zu skippen Args: driver (webdriver.Chrome): webdriver """ # Cookie holen queue_cookie = driver.get_cookie("akavpwr_User_allowed") # Neuer Cookie erstellen falls vorhanden if queue_cookie: logging.debug("Warteraum - Try skipping") queue_cookie["name"] = "akavpau_User_allowed" driver.add_cookie(queue_cookie) # Seite neu laden driver.refresh()
def driver_start(bot_name, headless_mode = True): options = Options() headless = headless_mode if headless: options.add_argument('--headless') options.add_argument('--disable-gpu') options.add_argument("--disable-notifications") options.add_argument("--disable-extensions") driver = Chrome("chromedriver.exe", options = options) driver.set_window_size(1366, 768) if headless == True else driver.maximize_window() driver.get("https://twitter.com/login") for cookie in pickle.load(open("Cookies/" + bot_name + ".pkl", "rb")): if 'expiry' in cookie: del cookie['expiry'] driver.add_cookie(cookie) driver.refresh() return driver
def build_chrome(cookie=None): """构建浏览器 :param cookie: selenium list类型的cookie :return: 返回构建完成的chrome浏览器 """ chrome_options = ChromeOptions() chrome = Chrome(chrome_options=chrome_options) chrome.get('http://pan.baidu.com') if (cookie != None or cookie != []): for i in cookie: try: del i['expiry'] except: pass chrome.add_cookie(i) chrome.refresh() return chrome
def login_cookie(driver: Chrome): raw_cookie = "sb=sJyfYPRg73_iH7HXwXh8Z7jS; datr=sJyfYG7kFe-wxSuCHWsSgiDv; dpr=1.25; wd=1479x734; c_user=100003617755928; xs=21%3AE0HZCHbl-LJw1Q%3A2%3A1621073168%3A-1%3A6381; fr=1bSzt7fdPoQutylf6.AWXwhkAAla0xRmN2Z1v3maVz5_w.Bgn5yw.O6.AAA.0.0.Bgn50Q.AWXmd3f3OSI; spin=r.1003799421_b.trunk_t.1621073170_s.1_v.2_" cookie = parse_dict_cookies(raw_cookie) print(json.dumps(cookie, indent=4, sort_keys=True)) driver.get( "https://facebook.com" ) # NOTE: it must be access facebook.com before add cookie of facebook, else ERROR happend for key, value in cookie.items(): driver.add_cookie({ 'name': key, 'value': value, 'domain': 'facebook.com' }) # cookies = pickle.load(open("/home/xuananh/Dropbox/facebook_cookies.pkl", "rb")) # driver.get("https://facebook.com") # NOTE: it must be access facebook.com before add cookie of facebook, else ERROR happend # for cookie in cookies: # driver.add_cookie(cookie) driver.get("https://www.facebook.com") driver.maximize_window()
class GameDriver: url = None cookies = None ua = "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36'" driver = None active = True base = None def __init__(self, url, cookies, base=None): self.url = url self.cookies = cookies self.base = base def run(self): driver = "chromedriver.exe" chrome_options = ChromeOptions() #chrome_options.add_argument('--headless') chrome_options.add_argument('--user-agent="%s"' % self.ua) self.driver = Chrome(executable_path=driver, chrome_options=chrome_options) self.driver.get(url=self.base) time.sleep(1) for c in self.cookies: self.driver.add_cookie({ 'name': c.name, 'value': c.value, 'domain': c.domain }) print("Setting Driver cookie: %s=%s (%s)" % (c.name, c.value, c.domain)) self.driver.get(url=self.url) def close(self): if self.driver: self.driver.close()
class BoxDriver(object): """ a simple usage of selenium framework tool """ """ 私有全局变量 """ _web_driver = None _by_char = None _wait_seconds = None """ 构造方法 """ class DriverType(Enum): CHROME = 1, FIREFOX = 2, IE = 3, SAFARI = 4, CHROME_HEADLESS = 5 def __init__(self, driver_type: DriverType, by_char=_CHARACTER_COMMA, wait_seconds=_WAIT_SECONDS, firefox_profile=None): """ 构造方法:实例化 BoxDriver 时候使用 :type wait_seconds: object :param driver_type: DriverType: selenium driver :param by_char: 分隔符,默认使用"," :param firefox_profile: 火狐浏览器配置 """ self._by_char = by_char self._wait_seconds = wait_seconds if driver_type is None or driver_type == "": driver_type = self.DriverType.CHROME self._set_selenium_driver(driver_type, firefox_profile) def _set_selenium_driver(self, driver_type, firefox_profile): if driver_type == self.DriverType.CHROME: self._web_driver = Chrome() elif driver_type == self.DriverType.FIREFOX: if firefox_profile and os.path.exists(firefox_profile): profile = FirefoxProfile(firefox_profile) self._web_driver = Firefox(firefox_profile=profile) else: self._web_driver = Firefox() elif driver_type == self.DriverType.IE: self._web_driver = Ie() elif driver_type == self.DriverType.SAFARI: self._web_driver = Safari() elif driver_type == self.DriverType.CHROME_HEADLESS: profile = ChromeOptions() profile.add_argument('headless') profile.add_experimental_option("excludeSwitches", ["ignore-certificate-errors"]) self._web_driver = Chrome(options=profile) else: self._web_driver = Chrome() print("Invalid Driver Type filled: %r" % driver_type) """ 私有方法 """ def _convert_selector_to_locator(self, selector): """ 转换自定义的 selector 为 Selenium 支持的 locator :param selector: 定位字符,字符串类型,"i, xxx" :return: locator """ if self._by_char not in selector: return By.ID, selector selector_by = selector.split(self._by_char)[0].strip() selector_value = selector.split(self._by_char)[1].strip() if selector_by == "i" or selector_by == 'id': locator = (By.ID, selector_value) elif selector_by == "n" or selector_by == 'name': locator = (By.NAME, selector_value) elif selector_by == "c" or selector_by == 'class_name': locator = (By.CLASS_NAME, selector_value) elif selector_by == "l" or selector_by == 'link_text': locator = (By.LINK_TEXT, selector_value) elif selector_by == "p" or selector_by == 'partial_link_text': locator = (By.PARTIAL_LINK_TEXT, selector_value) elif selector_by == "t" or selector_by == 'tag_name': locator = (By.TAG_NAME, selector_value) elif selector_by == "x" or selector_by == 'xpath': locator = (By.XPATH, selector_value) elif selector_by == "s" or selector_by == 'css_selector': locator = (By.CSS_SELECTOR, selector_value) else: raise NameError( "Please enter a valid selector of targeting elements.") return locator def _locate_element(self, selector): """ to locate element by selector :arg selector should be passed by an example with "i,xxx" "x,//*[@id='langs']/button" :returns DOM element """ locator = self._convert_selector_to_locator(selector) if locator is not None: element = self._web_driver.find_element(*locator) else: raise NameError( "Please enter a valid locator of targeting elements.") return element def _locate_elements(self, selector): """ to locate element by selector :arg selector should be passed by an example with "i,xxx" "x,//*[@id='langs']/button" :returns DOM element """ locator = self._convert_selector_to_locator(selector) if locator is not None: elements = self._web_driver.find_elements(*locator) else: raise NameError( "Please enter a valid locator of targeting elements.") return elements """ cookie 相关方法 """ def clear_cookies(self): """ clear all cookies after driver init """ self._web_driver.delete_all_cookies() def add_cookies(self, cookies): """ Add cookie by dict :param cookies: :return: """ self._web_driver.add_cookie(cookie_dict=cookies) def add_cookie(self, cookie_dict): """ Add single cookie by dict 添加 单个 cookie 如果该 cookie 已经存在,就先删除后,再添加 :param cookie_dict: 字典类型,有两个key:name 和 value :return: """ cookie_name = cookie_dict["name"] cookie_value = self._web_driver.get_cookie(cookie_name) if cookie_value is not None: self._web_driver.delete_cookie(cookie_name) self._web_driver.add_cookie(cookie_dict) def remove_cookie(self, name): """ 移除指定 name 的cookie :param name: :return: """ # 检查 cookie 是否存在,存在就移除 old_cookie_value = self._web_driver.get_cookie(name) if old_cookie_value is not None: self._web_driver.delete_cookie(name) """ 浏览器本身相关方法 """ def refresh(self, url=None): """ 刷新页面 如果 url 是空值,就刷新当前页面,否则就刷新指定页面 :param url: 默认值是空的 :return: """ if url is None: self._web_driver.refresh() else: self._web_driver.get(url) self.forced_wait(self._wait_seconds) def maximize_window(self): """ 最大化当前浏览器的窗口 :return: """ self._web_driver.maximize_window() def navigate(self, url): """ 打开 URL :param url: :return: """ self._web_driver.get(url) self.forced_wait(self._wait_seconds) def quit(self): """ 退出驱动 :return: """ self._web_driver.quit() def close_browser(self): """ 关闭浏览器 :return: """ self._web_driver.close() """ 基本元素相关方法 """ def type(self, selector, text): """ Operation input box. Usage: driver.type("i,el","selenium") """ el = self._locate_element(selector) el.clear() el.send_keys(text) def click(self, selector): """ It can click any text / image can be clicked Connection, check box, radio buttons, and even drop-down box etc.. Usage: driver.click("i,el") """ el = self._locate_element(selector) el.click() self.forced_wait(self._wait_seconds) def click_by_enter(self, selector): """ It can type any text / image can be located with ENTER key Usage: driver.click_by_enter("i,el") """ el = self._locate_element(selector) el.send_keys(Keys.ENTER) self.forced_wait(self._wait_seconds) def click_by_text(self, text): """ Click the element by the link text Usage: driver.click_text("新闻") """ self._locate_element('p%s' % self._by_char + text).click() self.forced_wait(self._wait_seconds) def submit(self, selector): """ Submit the specified form. Usage: driver.submit("i,el") """ el = self._locate_element(selector) el.submit() self.forced_wait(self._wait_seconds) def move_to(self, selector): """ to move mouse pointer to selector :param selector: :return: """ el = self._locate_element(selector) ActionChains(self._web_driver).move_to_element(el).perform() self.forced_wait(self._wait_seconds) def right_click(self, selector): """ to click the selector by the right button of mouse :param selector: :return: """ el = self._locate_element(selector) ActionChains(self._web_driver).context_click(el).perform() self.forced_wait(self._wait_seconds) def count_elements(self, selector): """ 数一下元素的个数 :param selector: 定位符 :return: """ els = self._locate_elements(selector) return len(els) def drag_element(self, source, target): """ 拖拽元素 :param source: :param target: :return: """ el_source = self._locate_element(source) el_target = self._locate_element(target) if self._web_driver.w3c: ActionChains(self._web_driver).drag_and_drop(el_source, el_target).perform() else: ActionChains(self._web_driver).click_and_hold(el_source).perform() ActionChains(self._web_driver).move_to_element(el_target).perform() ActionChains(self._web_driver).release(el_target).perform() self.forced_wait(self._wait_seconds) def lost_focus(self): """ 当前元素丢失焦点 :return: """ ActionChains(self._web_driver).key_down(Keys.TAB).key_up( Keys.TAB).perform() self.forced_wait(self._wait_seconds) """ <select> 元素相关 """ def select_by_index(self, selector, index): """ It can click any text / image can be clicked Connection, check box, radio buttons, and even drop-down box etc.. Usage: driver.select_by_index("i,el") """ el = self._locate_element(selector) Select(el).select_by_index(index) self.forced_wait(self._wait_seconds) def get_selected_text(self, selector): """ 获取 Select 元素的选择的内容 :param selector: 选择字符 "i, xxx" :return: 字符串 """ el = self._locate_element(selector) selected_opt = Select(el).first_selected_option() return selected_opt.text def select_by_visible_text(self, selector, text): """ It can click any text / image can be clicked Connection, check box, radio buttons, and even drop-down box etc.. Usage: driver.select_by_index("i,el") """ el = self._locate_element(selector) Select(el).select_by_visible_text(text) self.forced_wait(self._wait_seconds) def select_by_value(self, selector, value): """ It can click any text / image can be clicked Connection, check box, radio buttons, and even drop-down box etc.. Usage: driver.select_by_index("i,el") """ el = self._locate_element(selector) Select(el).select_by_value(value) self.forced_wait(self._wait_seconds) """ JavaScript 相关 """ def execute_js(self, script): """ Execute JavaScript scripts. Usage: driver.js("window.scrollTo(200,1000);") """ self._web_driver.execute_script(script) self.forced_wait(self._wait_seconds) """ 元素属性相关方法 """ def get_value(self, selector): """ 返回元素的 value :param selector: 定位字符串 :return: """ el = self._locate_element(selector) return el.get_attribute("value") def get_attribute(self, selector, attribute): """ Gets the value of an element attribute. Usage: driver.get_attribute("i,el","type") """ el = self._locate_element(selector) return el.get_attribute(attribute) def get_text(self, selector): """ Get element text information. Usage: driver.get_text("i,el") """ el = self._locate_element(selector) return el.text def get_displayed(self, selector): """ Gets the element to display,The return result is true or false. Usage: driver.get_display("i,el") """ el = self._locate_element(selector) return el.is_displayed() def get_selected(self, selector): """ to return the selected status of an WebElement :param selector: selector to locate :return: True False """ el = self._locate_element(selector) return el.is_selected() def get_text_list(self, selector): """ 根据selector 获取多个元素,取得元素的text 列表 :param selector: :return: list """ el_list = self._locate_elements(selector) results = [] for el in el_list: results.append(el.text) return results """ 窗口相关方法 """ def accept_alert(self): ''' Accept warning box. Usage: driver.accept_alert() ''' self._web_driver.switch_to.alert.accept() self.forced_wait(self._wait_seconds) def dismiss_alert(self): ''' Dismisses the alert available. Usage: driver.dismissAlert() ''' self._web_driver.switch_to.alert.dismiss() self.forced_wait(self._wait_seconds) def switch_to_frame(self, selector): """ Switch to the specified frame. Usage: driver.switch_to_frame("i,el") """ el = self._locate_element(selector) self._web_driver.switch_to.frame(el) self.forced_wait(self._wait_seconds) def switch_to_default(self): """ Returns the current form machine form at the next higher level. Corresponding relationship with switch_to_frame () method. Usage: driver.switch_to_default() """ self._web_driver.switch_to.default_content() self.forced_wait(self._wait_seconds) def switch_to_parent(self): """ switch to parent frame :return: """ self._web_driver.switch_to.parent_frame() self.forced_wait(self._wait_seconds) def switch_to_window_by_title(self, title): for handle in self._web_driver.window_handles: self._web_driver.switch_to.window(handle) if self._web_driver.title == title: break self._web_driver.switch_to.default_content() self.forced_wait(self._wait_seconds) def open_new_window(self, selector): ''' Open the new window and switch the handle to the newly opened window. Usage: driver.open_new_window() ''' original_windows = self._web_driver.current_window_handle el = self._locate_element(selector) el.click() all_handles = self._web_driver.window_handles for handle in all_handles: if handle != original_windows: self._web_driver.switch_to.window(handle) break def save_window_snapshot(self, file_name): """ save screen snapshot :param file_name: the image file name and path :return: """ driver = self._web_driver driver.save_screenshot(file_name) self.forced_wait(self._wait_seconds) def save_window_snapshot_by_png(self): return self._web_driver.get_screenshot_as_png() def save_element_snapshot_by_png(self, selector): """ 控件截图 :param selector: :return: """ el = self._locate_element(selector) self.forced_wait(self._wait_seconds) return el.screenshot_as_png def save_window_snapshot_by_io(self): """ 保存截图为文件流 :return: """ return self._web_driver.get_screenshot_as_base64() def save_element_snapshot_by_io(self, selector): """ 控件截图 :param selector: :return: """ el = self._locate_element(selector) return el.screenshot_as_base64 """ 等待方法 """ @staticmethod def forced_wait(seconds): """ 强制等待 :param seconds: :return: """ time.sleep(seconds) def implicitly_wait(self, seconds): """ Implicitly wait. All elements on the page. :param seconds 等待时间 秒 隐式等待 Usage: driver.implicitly_wait(10) """ self._web_driver.implicitly_wait(seconds) def explicitly_wait(self, selector, seconds): """ 显式等待 :param selector: 定位字符 :param seconds: 最长等待时间,秒 :return: """ locator = self._convert_selector_to_locator(selector) WebDriverWait(self._web_driver, seconds).until( expected_conditions.presence_of_element_located(locator)) def get_explicitly_wait_element_text(self, selector, seconds): """ 显式等待,得到元素的 text :param selector: locator :param seconds: max timeout sencods :return: str, element.text """ locator = self._convert_selector_to_locator(selector) driver = self._web_driver el = WebDriverWait(driver, seconds).until(lambda d: d.find_element(*locator)) if el and isinstance(el, WebElement): return el.text return None """ 属性 """ @property def current_title(self): ''' Get window title. Usage: driver.current_title ''' return self._web_driver.title @property def current_url(self): """ Get the URL address of the current page. Usage: driver.current_url """ return self._web_driver.current_url
driver = Chrome(options=chrome_options, executable_path="/usr/local/bin/chromedriver") driver.get(user_config_url) cookie_path = os.path.dirname(os.path.realpath(__file__)) + '/cookie/' if not os.path.exists(cookie_path): os.mkdir(cookie_path, mode=0o777) tapdcookies_path = cookie_path + 'tapdcookies.pkl' if os.path.exists(tapdcookies_path): cookies = pickle.load(open(tapdcookies_path, "rb")) for cookie in cookies: driver.add_cookie({ 'name': cookie["name"], 'value': cookie["value"], 'path': cookie["path"], 'secure': True }) else: pickle.dump(driver.get_cookies(), open(tapdcookies_path, "wb")) if isElementExist(driver, "username"): driver.find_element_by_id("username").send_keys(user_email) driver.find_element_by_id("password_input").send_keys(user_password) driver.find_element_by_id("tcloud_login_button").click() time.sleep(1) arr = driver.find_elements_by_class_name("rowNOTdone") undo_output = '' doing_output = ''
# -*- coding:utf-8 -*- from time import sleep from selenium.webdriver import Chrome driver = Chrome() cook = { 'domain': '120.78.128.25', 'name': 'fengwoo', 'path': '/', 'value': '2shlsj8b6qju618hgj9ggjs2e3' } driver.get('http://120.78.128.25:8765/Index/login.html') driver.add_cookie(cook) driver.get('http://120.78.128.25:8765') # ele=driver.find_element_by_xpath("//a[text()='我的帐户[python10]']") ele = driver.find_element_by_xpath("//a[contains(text(),'我的帐户')]") ele2 = driver.find_element_by_xpath("//a[contains(@href,'Member')]") print(ele.text) print(ele2.text)
class BasePage: def __init__(self, driver=None): if driver is None: self.driver = Chrome() self.driver.maximize_window() self.driver.get( "https://work.weixin.qq.com/wework_admin/frame#contacts") cookies = { "wwrtx.vst": "zenYAe4CxGbueq5ASVGKquiAk5PdPagPGzKHdLCVqT2i-M2L68XlyLV_-2tP7InD4kOpcBm" "_stcX8b9Y9z6ec1BgEMdhR-FASZD-wSBX7D37_L7OFcsEYXUePdKC8sPqQBza3KieYk7TE9De" "2a2AaILp3vEZTlaJMLwFDrOFjOBcFLvhY-k-VmX1gl-BGUklaeVgd8MBeY1ky3t4-2M0yiQlnA" "7VWwRByLyJxlGrHgCrxZhOhs_BhvyJzLmJOoFNQvhrVSvzAXXoFdHs51gdxA", "wwrtx.d2st": "a4861364", "wwrtx.sid": "iAu-Z4L3xTLbZ5elezl0oXsd6Y-SXiveFjergOybpzZeb_7vPhAIpt8yVlOv0Ki1", "wwrtx.ltype": "1", "wxpay.corpid": "1688852500754167", "wxpay.vid": "1688852500754167", } for k, v in cookies.items(): self.driver.add_cookie({"name": k, "value": v}) self.driver.refresh() else: self.driver = driver def get_visible_element(self, locator, eqc=20) -> WebElement: ''' 定位元素,参数locator为元祖类型 :param locator: :param eqc: :return: ''' try: ele = WebDriverWait(self.driver, timeout=eqc).until( EC.visibility_of_element_located(locator)) logger.info('获取{}元素成功'.format(locator)) return ele except: logger.error("相对时间内没有定位到{}元素".format(locator)) allure.attach(self.get_windows_img()) def get_presence_element(self, locator, eqc=10): """ 定位一组元素 :param locator: :param eqc: :return: """ try: ele = WebDriverWait(self.driver, timeout=eqc).until( EC.presence_of_element_located(locator)) logger.info('获取{}元素成功'.format(locator)) return ele except: logger.error("相对时间内没有定位到{}元素".format(locator)) allure.attach(self.get_windows_img()) def get_clickable_element(self, locator, eqc=20): try: ele = WebDriverWait(self.driver, timeout=eqc).until( EC.element_to_be_clickable(locator)) logger.info('获取{}元素成功'.format(locator)) return ele except: logger.error("相对时间内没有定位到{}元素".format(locator)) allure.attach(self.get_windows_img()) def send_keys(self, locator, text): ''' 发送文本,清空后输入 locator = ('id','xxx') element.send_keys(locator,text) ''' element = self.get_visible_element(locator) element.clear() element.send_keys(text) logger.info('SendKeys %s in %s success.' % (text, locator)) def is_text_in_element(self, locator, text, timeout=10): ''' 判断文本在元素里,没有元素返回false打印日志,定位到返回判断结果的布尔值 result = driver.text_in_element(locator,text) ''' try: result = WebDriverWait(self.driver, timeout, 1).until( EC.text_to_be_present_in_element(locator, text)) except TimeoutException: logger.info('No location to the element.') allure.attach(self.get_windows_img()) return False else: return result def is_text_in_value(self, locator, value, timeout=10): ''' 判断元素的value值,没定位到元素返回false,定位到返回判断结果布尔值 result = dirver.text_to_be_present_in_element_value(locator,text) ''' try: result = WebDriverWait(self.driver, timeout, 1).until( EC.text_to_be_present_in_element_value(locator, value)) except TimeoutException: logger.info('No location to the element.') allure.attach(self.get_windows_img()) return False else: return result def is_title(self, title, timeout=10): ''' 判断元素的title是否完全等于 ''' result = WebDriverWait(self.driver, timeout, 1).until(EC.title_is(title)) return result def is_title_contains(self, title, timeout=10): ''' 判断元素的title是否包含 ''' result = WebDriverWait(self.driver, timeout, 1).until(EC.title_contains(title)) return result def is_selected(self, locator, timeout=10): ''' 判断元素是否被选中 ''' result = WebDriverWait(self.driver, timeout, 1).until( EC.element_located_to_be_selected(locator)) return result def is_selected_be(self, locator, selected=True, timeout=10): ''' 判断元素的状态是不是符合期望的状态,selected是期望的状态 ''' result = WebDriverWait(self.driver, timeout, 1).until( EC.element_located_selection_state_to_be(locator, selected)) return result def is_alert_present(self, timeout=10): ''' 判断页面是否有alert,有的话返回alert,没有返回False ''' result = WebDriverWait(self.driver, timeout, 1).until(EC.alert_is_present()) return result def is_visibility(self, locator, timeout=10): ''' 元素可见,返回本身,不可见返回False ''' result = WebDriverWait(self.driver, timeout, 1).until( EC.visibility_of_element_located(locator)) return result def is_invisibility(self, locator, timeout=10): ''' 元素可见返回本身,不可见返回Ture,没有找到元素也返回Ture ''' result = WebDriverWait(self.driver, timeout, 1).until( EC.invisibility_of_element_located(locator)) return result def is_clickable(self, locator, timeout=10): ''' 元素可以点击is_enabled返回本身,不可点击返回False ''' result = WebDriverWait(self.driver, timeout, 1).until(EC.element_to_be_clickable(locator)) return result def is_located(self, locator, timeout=10): ''' 判断元素有没有被定位到(并不意味着可见),定位到返回element,没有定位到返回False ''' result = WebDriverWait(self.driver, timeout, 1).until( EC.presence_of_all_elements_located(locator)) return result def move_to_element(self, locator): ''' 鼠标悬停操作 locator=('id','xxx') driver.move_to_element(locator) ''' element = self.get_visible_element(locator) ActionChains(self.driver).move_to_element(element).perform() # logger.info('ActionChins move to %s' % locator) def back(self): self.driver.back() logger.info('back driver!') def forward(self): self.driver.forward() logger.info('forward driver!') def close(self): self.driver.close() logger.info('close driver!') def refresh(self): return self.driver.refresh() def get_title(self): ''' 获取title ''' logger.info('git dirver title.') return self.driver.title() def get_text(self, locator): ''' 获取文本 ''' element = self.get_visible_element(locator) # logger.info('get text in %s' % locator) text = element.text return text def get_attribute(self, locator, name): ''' 获取属性 ''' element = self.get_visible_element(locator) logger.info('get attribute in %s' % locator) return element.get_attribute(name) def js_execute(self, js): ''' 执行js ''' try: logger.info('Execute js.%s' % js) return self.driver.execute_script(js) except: allure.attach(self.get_windows_img()) logger.info('failed to excute js') def js_focus_element(self, locator): ''' 聚焦元素 ''' target = self.get_visible_element(locator) self.driver.execute_script("arguments[0].scrollIntoView();", target) def js_scroll_top(self): ''' 滚动到顶部 ''' js = 'window.scrollTo(0,0)' self.js_execute(js) logger.info('Roll to the top!') def js_scroll_end(self): ''' 滚动到底部 ''' js = "window.scrollTo(0,document.body.scrollHight)" self.js_execute(js) logger.info('Roll to the end!') def get_windows_img(self): try: file_name = contants.screenshot_img self.driver.get_screenshot_as_file(file_name) logger.info( 'Had take screenshot and save to folder:output/screenshots') except NameError as e: logger.info('Failed to take the screenshot!%s' % e) self.get_windows_img() return file_name def switch_window(self, name=None, fqc=20): """ 切换窗口,有name切换至该name的窗口,没有则切换最新 :param name: :param fqc: :return: """ if name is None: current_handle = self.driver.current_window_handle WebDriverWait(self.driver, fqc).until(EC.new_window_is_opened(current_handle)) handles = self.driver.window_handles return self.driver.switch_to.window(handles[-1]) return self.driver.switch_to.window()
class Mangafreak(): def __init__(self): self.browser = Chrome(executable_path="E:\chromedriver.exe") self.browser.get(MANGAFREAK_FP) if os.path.exists('angel.pkl'): sleep(1) cookies = pickle.load(open('angel.pkl', 'rb')) for cookie in cookies: if cookie.get('expiry', None) is not None: cookie['expires'] = cookie.pop('expiry') self.browser.add_cookie(cookie) self.browser.get(MANGAFREAK_FP + "/jobs") sleep(5) self.browser.get(MANGAFREAK_FP + "/jobs") else: log_in_btn = self.browser.find_element_by_xpath( "/html/body/div/div/header/div/div[2]/a[3]") log_in_btn.click() sleep(1) email_box = self.browser.find_element_by_id( "user_email").send_keys("*****@*****.**") passwd_box = self.browser.find_element_by_id( "user_password").send_keys("angelico@job55") enter = self.browser.find_element_by_name("commit").click() cookies = self.browser.get_cookies() pickle.dump(cookies, open('angel.pkl', 'wb')) jobs_portal = self.browser.find_element_by_xpath( "/html/body/div/header/div/div[1]/nav/ul/li[3]/a") jobs_portal.click() sleep(5) if os.path.exists('templateReq.txt'): self.template = open('templateReq.txt', 'r').read() # print("Template -- ",self.template) self.apply_to_jobs() def apply_to_jobs(self): sleep(4) starting_index = 0 for _ in range(2): job_search_results = self.browser.find_element_by_xpath( '//*[@id="main"]/div/div[5]/div[2]/div') startup_results = job_search_results.find_elements_by_tag_name( 'div') startup_results = [ element for element in startup_results if element.get_attribute('data-test') == 'StartupResult' ] print("Startup entries", len(startup_results)) for startup in startup_results[starting_index:]: startup.location_once_scrolled_into_view company_listings = startup.find_elements_by_class_name( 'component_07bb9') print("GEtting here", len(company_listings)) # company_listings = [ e for e in company_listings if e.get_attribute('class') == 'listing_4d13a'] for position in company_listings: sleep(1) self.apply_to_a_single_job_listing(position) starting_index = len(startup_results) # print(dir(startup_results[0])) input("Ready for next round") def display_text(self, elementArray, banner): if len(elementArray) > 0: for entry in elementArray: print(banner, entry.text) def display_attribute(self, elementArray): for entry in elementArray: print(entry.get_attribute('class') + "\n") def apply_to_a_single_job_listing(self, element): #Get the apply button company_info_1 = element.find_elements_by_tag_name('a') company_info_2 = element.find_elements_by_tag_name('span') self.display_text(company_info_1, "Company Info 1 -- ") self.display_text(company_info_2[-1:], "Company Info 2 -- ") # return apply_box = element.find_element_by_class_name('box_1bc08') apply_button = apply_box.find_element_by_tag_name('button') if apply_button.text != 'Applied': apply_button.click() sleep(3) applicationModal = self.browser.find_element_by_class_name( "ReactModalPortal") sleep(0.2) h4tags = applicationModal.find_elements_by_tag_name('h4') # self.display_text(h4tags, "Contact Person -- ") print(h4tags[-1].text.split(" is ")) contact_person = h4tags[-1].text.split(" is ")[1] writeNoteToContact = applicationModal.find_element_by_tag_name( 'textarea') writeNoteToContact.send_keys("Hi {}, {}".format( contact_person, self.template)) bts = applicationModal.find_elements_by_tag_name('button') for e in bts: if e.text == 'Cancel': e.click() break
'file:///D:/Users/%E4%BD%95%E6%97%BA%E5%BD%A4/PycharmProjects/heyang-/index.html' ) ''' #iframe切换 1、name 2、索引 3、WebElement driver.switch_to.frame('myiframe')#第一种方法 #先拿到WebElement frame = driver.find_element_by_tag_name('iframe') driver.switch_to.frame(frame) #等待新的iframe可以用在进行切换 ec.frame_to_be_available_and_switch_to_it((By.TAG_NAME,'frame')) #怎么切换回去初始的HTML内容 driver.switch_to.default_content() #多个iframe,多个嵌套。切到父级 driver.switch_to.parent_frame() ''' #alert切换 driver.find_element_by_name('click').click() #先定位元素 alert = driver.switch_to.alert #获取alert对象 alert.text #获取文本内容 alert.accept() #确认,返回原来的页面 alert.dismiss() #取消,返回原来的页面 #扩展知识@property driver.add_cookie() #添加cookie driver.get_cookie() #获取cookie
import venmoInfo import datetime import os CHROME_DRIVER_PATH = 'C:\ChromeDriver\chromedriver.exe' VENMO_URL = 'https://venmo.com/' browser = Chrome(CHROME_DRIVER_PATH) browser.get(VENMO_URL) if os.path.isfile('cookies.pkl'): # there is a cookie file cookies = pickle.load(open("cookies.pkl", "rb")) for cookie in cookies: browser.add_cookie(cookie) # click on the sign in link signin_link = browser.find_element_by_link_text("Sign in") signin_link.click() # enter the email and password and send it username_box = browser.find_element_by_class_name("email-username-phone") username_box.send_keys(venmoInfo.my_u) password_box = browser.find_element_by_class_name("password") password_box.send_keys(venmoInfo.my_p) send_button = browser.find_element_by_class_name("login") send_button.click() # enter the person's name you want to pay time.sleep(5)
class URLTracker: def __init__(self, watcher): self.wd = Chrome() self.watcher = watcher #self.load_cookie() def load_cookie(self): if exists('cookies'): with open('cookies', 'rb') as rf: self.cookies = load(rf) cookies = self.cookies for cookie in cookies: self.wd.add_cookie(cookie) logger.info('Cookies load successfully.') def dump_cookie(self): self.cookies = self.wd.get_cookies() with open('cookies', 'wb') as wf: dump(self.cookies, wf) def login(self, email, password): wd = self.wd wd.find_element_by_name('email').send_keys(email) wd.find_element_by_name('password').send_keys(password) wd.find_elements_by_xpath('//button[@type = "submit"]')[0].click() def close_window(self): def target(wd): while True: ele = wd.find_elements_by_xpath( '//button[@ng-click = "done()"]') if len(ele): ele[0].click() sleep(3) th = Thread(target=target, args=(self.wd, )) #th.start() def start(self): wd = self.wd watcher = self.watcher while True: url = wd.current_url url_parts = url.split('/') if url_parts[-1] == 'project': #self.dump_cookie() logger.info('Waiting for entering a project') #wait for url change while True: if wd.current_url != url: sleep(0.1) break #do nothing if len(url_parts) >= 2 and url_parts[-2] == 'project': logger.info('A project found, start syncing.') self.close_window() #start watching watcher.start() #wait for url change while True: if wd.current_url != url: watcher.stop() sleep(0.1) break sleep(0.1)
def set_chrome(self, **kwargs): """ ---------------------------------------------------------------------------- Configuration Method: 1. EXECUTABLE,SERVICE ARGS and PORT 2. DESIRED CAPABILITIES 3. CHROME OPTIONS Command Line Switches: http://peter.sh/experiments/chromium-command-line-switches/ Capabilities: https://sites.google.com/a/chromium.org/chromedriver/capabilities ---------------------------------------------------------------------------- This function should be a class for webdriver. For now, just setting up Chrome. driver_browsers = ['android', 'chrome', 'firefox', 'htmlunit', 'internet explorer', 'iPhone', 'iPad', 'opera', 'safari'] """ def set_defaults(self): default_settings = { 'bin_path': '/usr/local/bin/chromedriver', 'port': 15010, 'log_path': os_environ['BD'] + '/html/logs/chromedriver.log', 'user-agent': "Mozilla/5.0 (Windows NT 5.1; rv:13.0) Gecko/20100101 Firefox/13.0.1", # 1 in 1788 per panopticlick !! 'no_java': True, 'no_plugins': True, 'net-log-capture-mode': 'IncludeCookiesAndCredentials', 'log-level': 0, 'cookie_content': {}, 'capabilities': { 'acceptSslCerts': True, 'databaseEnabled': False, 'unexpectedAlertBehaviour': "accept", 'applicationCacheEnabled': False, 'webStorageEnabled': False, 'browserConnectionEnabled': False, 'locationContextEnabled': True, }, 'loggingPrefs': { "driver": "ALL", "server": "ALL", "browser": "ALL" }, 'true_opts': [ 'disable-core-animation-plugins', 'disable-plugins', 'disable-extensions', 'disable-plugins-discovery', 'disable-site-engagement-service', 'disable-text-input-focus-manager', 'enable-account-consistency', 'enable-devtools-experiments', 'enable-logging', 'enable-network-information', 'enable-net-benchmarking', 'enable-network-portal-notification', 'enable-strict-site-isolation', 'incognito', # if incognito, extensions must be disabled 'log-net-log', 'scripts-require-action', 'system-developer-mode', # 'use-mobile-user-agent', ], 'false_opts': [ 'enable-profiling', ], } excluded = [] if not (hasattr(self, 'T') and hasattr( self.T, 'excluded_defaults')) else self.T.excluded_defaults for k, v in default_settings.iteritems(): if excluded.count(k): if T.has_key(k): del T[k] else: T.update({k: v}) return T def set_desired_capabilities(self): from selenium.webdriver import DesiredCapabilities dc = DesiredCapabilities.CHROME.copy() platforms = [ 'WINDOWS', 'XP', 'VISTA', 'MAC', 'LINUX', 'UNIX', 'ANDROID', 'ANY' ] # -PROXY OBJECT # from selenium.webdriver import Proxy # -READ-WRITE CAPABILITIES rw_capabilities = [ 'acceptSslCerts', # boolean unless specified 'javascriptEnabled', 'databaseEnabled', 'proxy', # Proxy Object 'unexpectedAlertBehaviour', # string {"accept", "dismiss", "ignore"} 'applicationCacheEnabled', 'webStorageEnabled', 'rotatable', 'browserConnectionEnabled', 'locationContextEnabled', 'elementScrollBehavior', # int (align with the top (0) or bottom (1) of the viewport) 'nativeEvents' ] assert T.has_key('capabilities') for it in rw_capabilities: if T['capabilities'].has_key(it): dc[it] = str(T['capabilities'][it]) # -loggingPrefs OBJECT (dict) # "OFF", "SEVERE", "WARNING", # "INFO", "CONFIG", "FINE", # "FINER","FINEST", "ALL" if T.has_key('loggingPrefs'): dc[it] = T['loggingPrefs'] return dc def set_profile(self): profile = {#"download.default_directory" : "C:\\SeleniumTests\\PDF", "download.prompt_for_download" : False, "download.directory_upgrade" : True, "plugins.plugins_disabled" : ["Chromoting Viewer", "Chromium PDF Viewer"], } opts.add_experimental_option("prefs", profile) def set_performance_logging(self): perfLogging = { "enableNetwork": True, "enablePage": True, "enableTimeline": True, #"tracingCategories":<string>, "bufferUsageReportingInterval": 1000 } opts.add_experimental_option("perfLoggingPrefs", perfLogging) def set_chrome_options(self): from selenium.webdriver import ChromeOptions opts = ChromeOptions() ### Add Boolean Arguments if T.has_key('true_opts'): for it in T['true_opts']: opts.add_argument('%s=1' % it) if T.has_key('false_opts'): for it in T['false_opts']: opts.add_argument('%s=0' % it) value_opts = [ 'profile-directory', 'log-level', # 0 to 3: INFO = 0, WARNING = 1, LOG_ERROR = 2, LOG_FATAL = 3 'net-log-capture-mode', # "Default" "IncludeCookiesAndCredentials" "IncludeSocketBytes"' 'register-font-files', # might be windows only 'remote-debugging-port', 'user-agent', 'user-data-dir', # don't use b/c it negates no-extension options ] ### Add Value Arguments for it in value_opts: if T.has_key(it): opts.add_argument('%s=%s' % (it, T[it])) ### OTHER CHROME OPTIONS NOT YET FULLY CONFIGURED # -extensions list str # -localState dict # -prefs dict # set_profile() # -detach bool # -debuggerAddress str # -excludeSwitches list str # -minidumpPath str # -mobileEmulation dict # -perfLoggingPrefs OBJECT (dict) # set_performance_logging() return opts from selenium.webdriver import Chrome T = {} if kwargs: T.update(kwargs) if (hasattr(self, 'T') and hasattr(self.T, 'kwargs')): T.update(self.T.kwargs) # Cycle Through kwargs and Extract Configs if hasattr(self.T, 'id'): T.update(self.T.id.__dict__) if hasattr(self.T.id, 'details'): for k, v in self.T.id.details.__dict__.iteritems(): T.update({k.strip('_'): v}) if hasattr(self.T.id, 'cookie'): if hasattr(self.T.id.cookie, 'content'): T.update({'cookie_content': self.T.id.cookie.content}) # Set Defaults if not provided if not T.has_key('defaults'): T = set_defaults(self) # Config Data Storage if Possible if T.has_key('SAVE_DIR'): T['user-data-dir'] = T['SAVE_DIR'] T['profile-directory'] = 'Profile' if T.has_key('guid'): T['log_path'] = '%s/%s.log' % (T['SAVE_DIR'], T['guid']) # Configure with Special Profiles if Requested special_profiles = os_environ['BD'] + '/html/webdrivers/chrome/profiles' if T.has_key('no_java') and T['no_java']: if T.has_key('no_plugins') and T['no_plugins']: T['user-data-dir'] = special_profiles + '/no_java_no_plugins/' del T['profile-directory'] else: T['user-data-dir'] = special_profiles + '/no_java/' del T['profile-directory'] elif T.has_key('no_plugins') and T['no_plugins']: T['user-data-dir'] = special_profiles + '/no_plugins/' del T['profile-directory'] # SERVICE ARGS # ( somewhat documented in executable help, i.e., chromedriver --help ) service_args = ["--verbose", "--log-path=%(log_path)s" % T] dc = set_desired_capabilities(self) opts = set_chrome_options(self) d = Chrome(executable_path=T['bin_path'], port=T['port'], service_args=service_args, desired_capabilities=dc, chrome_options=opts) d.set_window_size(1280, 720) if T['cookie_content']: d.add_cookie(T['cookie_content']) self.config_browser(d, kwargs) return d, T
def set_chrome(self,**kwargs): """ ---------------------------------------------------------------------------- Configuration Method: 1. EXECUTABLE,SERVICE ARGS and PORT 2. DESIRED CAPABILITIES 3. CHROME OPTIONS Command Line Switches: http://peter.sh/experiments/chromium-command-line-switches/ Capabilities: https://sites.google.com/a/chromium.org/chromedriver/capabilities ---------------------------------------------------------------------------- This function should be a class for webdriver. For now, just setting up Chrome. driver_browsers = ['android', 'chrome', 'firefox', 'htmlunit', 'internet explorer', 'iPhone', 'iPad', 'opera', 'safari'] """ def set_defaults(self): default_settings = {'bin_path' : '/usr/local/bin/chromedriver', 'port' : 15010, 'log_path' : os_environ['BD'] + '/html/logs/chromedriver.log', 'user-agent' : "Mozilla/5.0 (Windows NT 5.1; rv:13.0) Gecko/20100101 Firefox/13.0.1", # 1 in 1788 per panopticlick !! 'no_java' : True, 'no_plugins' : True, 'net-log-capture-mode' : 'IncludeCookiesAndCredentials', 'log-level' : 0, 'cookie_content' : {}, 'capabilities' : { 'acceptSslCerts' : True, 'databaseEnabled' : False, 'unexpectedAlertBehaviour' : "accept", 'applicationCacheEnabled' : False, 'webStorageEnabled' : False, 'browserConnectionEnabled' : False, 'locationContextEnabled' : True, }, 'loggingPrefs' : { "driver" : "ALL", "server" : "ALL", "browser" : "ALL"}, 'true_opts' : [ 'disable-core-animation-plugins', 'disable-plugins', 'disable-extensions', 'disable-plugins-discovery', 'disable-site-engagement-service', 'disable-text-input-focus-manager', 'enable-account-consistency', 'enable-devtools-experiments', 'enable-logging', 'enable-network-information', 'enable-net-benchmarking', 'enable-network-portal-notification', 'enable-strict-site-isolation', 'incognito', # if incognito, extensions must be disabled 'log-net-log', 'scripts-require-action', 'system-developer-mode', # 'use-mobile-user-agent', ], 'false_opts' : [ 'enable-profiling', ], } excluded = [] if not (hasattr(self,'T') and hasattr(self.T,'excluded_defaults')) else self.T.excluded_defaults for k,v in default_settings.iteritems(): if excluded.count(k): if T.has_key(k): del T[k] else: T.update( {k : v}) return T def set_desired_capabilities(self): from selenium.webdriver import DesiredCapabilities dc = DesiredCapabilities.CHROME.copy() platforms = ['WINDOWS', 'XP', 'VISTA', 'MAC', 'LINUX', 'UNIX', 'ANDROID', 'ANY'] # -PROXY OBJECT # from selenium.webdriver import Proxy # -READ-WRITE CAPABILITIES rw_capabilities = [ 'acceptSslCerts', # boolean unless specified 'javascriptEnabled', 'databaseEnabled', 'proxy', # Proxy Object 'unexpectedAlertBehaviour', # string {"accept", "dismiss", "ignore"} 'applicationCacheEnabled', 'webStorageEnabled', 'rotatable', 'browserConnectionEnabled', 'locationContextEnabled', 'elementScrollBehavior', # int (align with the top (0) or bottom (1) of the viewport) 'nativeEvents' ] assert T.has_key('capabilities') for it in rw_capabilities: if T['capabilities'].has_key(it): dc[it] = str(T['capabilities'][it]) # -loggingPrefs OBJECT (dict) # "OFF", "SEVERE", "WARNING", # "INFO", "CONFIG", "FINE", # "FINER","FINEST", "ALL" if T.has_key('loggingPrefs'): dc[it] = T['loggingPrefs'] return dc def set_profile(self): profile = {#"download.default_directory" : "C:\\SeleniumTests\\PDF", "download.prompt_for_download" : False, "download.directory_upgrade" : True, "plugins.plugins_disabled" : ["Chromoting Viewer", "Chromium PDF Viewer"], } opts.add_experimental_option( "prefs", profile) def set_performance_logging(self): perfLogging = { "enableNetwork" : True, "enablePage" : True, "enableTimeline" : True, #"tracingCategories":<string>, "bufferUsageReportingInterval" : 1000 } opts.add_experimental_option( "perfLoggingPrefs",perfLogging) def set_chrome_options(self): from selenium.webdriver import ChromeOptions opts = ChromeOptions() ### Add Boolean Arguments if T.has_key('true_opts'): for it in T['true_opts']: opts.add_argument( '%s=1' % it ) if T.has_key('false_opts'): for it in T['false_opts']: opts.add_argument( '%s=0' % it ) value_opts = [ 'profile-directory', 'log-level', # 0 to 3: INFO = 0, WARNING = 1, LOG_ERROR = 2, LOG_FATAL = 3 'net-log-capture-mode', # "Default" "IncludeCookiesAndCredentials" "IncludeSocketBytes"' 'register-font-files', # might be windows only 'remote-debugging-port', 'user-agent', 'user-data-dir', # don't use b/c it negates no-extension options ] ### Add Value Arguments for it in value_opts: if T.has_key(it): opts.add_argument( '%s=%s' % (it,T[it]) ) ### OTHER CHROME OPTIONS NOT YET FULLY CONFIGURED # -extensions list str # -localState dict # -prefs dict # set_profile() # -detach bool # -debuggerAddress str # -excludeSwitches list str # -minidumpPath str # -mobileEmulation dict # -perfLoggingPrefs OBJECT (dict) # set_performance_logging() return opts from selenium.webdriver import Chrome T = {} if kwargs: T.update( kwargs) if (hasattr(self,'T') and hasattr(self.T,'kwargs')): T.update( self.T.kwargs) # Cycle Through kwargs and Extract Configs if hasattr(self.T,'id'): T.update( self.T.id.__dict__) if hasattr(self.T.id,'details'): for k,v in self.T.id.details.__dict__.iteritems(): T.update( { k.strip('_') : v}) if hasattr(self.T.id,'cookie'): if hasattr(self.T.id.cookie,'content'): T.update( {'cookie_content' : self.T.id.cookie.content}) # Set Defaults if not provided if not T.has_key('defaults'): T = set_defaults(self) # Config Data Storage if Possible if T.has_key('SAVE_DIR'): T['user-data-dir'] = T['SAVE_DIR'] T['profile-directory'] = 'Profile' if T.has_key('guid'): T['log_path'] = '%s/%s.log' % (T['SAVE_DIR'],T['guid']) # Configure with Special Profiles if Requested special_profiles = os_environ['BD'] + '/html/webdrivers/chrome/profiles' if T.has_key('no_java') and T['no_java']: if T.has_key('no_plugins') and T['no_plugins']: T['user-data-dir'] = special_profiles + '/no_java_no_plugins/' del T['profile-directory'] else: T['user-data-dir'] = special_profiles + '/no_java/' del T['profile-directory'] elif T.has_key('no_plugins') and T['no_plugins']: T['user-data-dir'] = special_profiles + '/no_plugins/' del T['profile-directory'] # SERVICE ARGS # ( somewhat documented in executable help, i.e., chromedriver --help ) service_args = ["--verbose", "--log-path=%(log_path)s" % T] dc = set_desired_capabilities(self) opts = set_chrome_options(self) d = Chrome( executable_path = T['bin_path'], port = T['port'], service_args = service_args, desired_capabilities = dc, chrome_options = opts) d.set_window_size( 1280,720) if T['cookie_content']: d.add_cookie( T['cookie_content']) self.config_browser( d,kwargs) return d,T
class FaucetCryptoBot: def __init__(self): self.debug = self._configParser()[5] self.proxy = self._configParser()[6] self.user_mail = self._configParser()[3] self.user_pswd = self._configParser()[4] self.driver_path = self._configParser()[1] self.browser_mode = self._configParser()[0] self.browser_binary_location = self._configParser()[2] self.log = Log() self.driver = Chrome(options=self._get_opts(), executable_path=self.driver_path) self.dash_board_url = "https://faucetcrypto.com/dashboard" self.login_url = "https://faucetcrypto.com/login" self.banner = draw_banner() self.log.write_log( "browser", f"starting browser session: {self.driver.session_id}") self.main_window = self.driver.current_window_handle def _get_opts(self): opts = webdriver.chrome.options.Options() if self.browser_mode == "headless": opts.add_argument("--headless") if self.proxy != "": opts.add_argument("--proxy-server=%s" % self.proxy) opts.add_argument("--no-sandbox") opts.add_argument("--disable-dev-shm-usage") opts.binary_location = self.browser_binary_location opts.add_argument("--ignore-certificate-erors") opts.add_argument("window-size=1920,1080") opts.add_argument("start-maximized") # opts.add_argument("user-data-dir=" + USER_DATA_DIR) opts.add_argument("disable-infobars") opts.add_experimental_option("excludeSwitches", ["disable-popup-blocking"]) opts.add_experimental_option("excludeSwitches", ["enable-automation"]) opts.add_experimental_option("useAutomationExtension", False) return opts def _configParser(self): from configparser import ConfigParser config = ConfigParser() config.readfp(open(f"config.cfg")) browser_mode = config.get("Browser", "browser-mode") driver_path = config.get("Browser", "driver-path") browser_binary_location = config.get("Browser", "browser-binary-location") user_mail = config.get("User", "mail") user_pswd = config.get("User", "password") debug = config.getboolean("Misc", "debug") proxy = config.get("Misc", "proxy") return ( browser_mode, driver_path, browser_binary_location, user_mail, user_pswd, debug, proxy, ) def quit(self): self.driver.close() def sleep(self, mins): import time self.log.write_log("bot", self.log.blue_text(f"Sleeping for {mins}m")) time.sleep(60 * int(mins)) def error_handler(self, msg): self.log.error_log(msg) def _click(self, element, msg="placeholder"): self.log.write_log(f"clicking on {msg}") self.driver.find_element_by_xpath(element).click() def _random_wait(self, t_min, t_max): import time import random random_time = random.randrange(t_min, t_max) self.log.write_log("bot", f"Waiting for {random_time} sec") time.sleep(random_time) def __switch_tab(self): self._random_wait(2, 4) visible_windows = self.driver.window_handles for window in visible_windows: if window != self.main_window: self.driver.switch_to.window(window) self.driver.close() self.driver.switch_to.window(self.main_window) def __get_xpath_elem(self, element): try: return self.driver.find_element_by_xpath(element) except Exception as e: if self.debug: self.log.write_log("warning", e) else: self.error_handler(e) pass def __check_main_reward_availability(self): if ("ready" in self.__get_xpath_elem( main_reward["main-reward-dash-link"]).text.lower()): return True else: return False def __captcha_check(self, captcha_block): if "good person" in self.__get_xpath_elem(captcha_block).text.lower(): self.log.write_log("success", "Havent caught me yet") return True else: self.log.write_log("warning", "Oops looks like i'm caught") return False def _modal_handler(self): try: self._click(user["user-modal-close"], "modal") self._click(user["user-chat-close"], "chat") except Exception as e: pass def get_user_balance(self): if self.driver.current_url != self.dash_board_url: self.driver.get(self.dash_board_url) coin_balance = self.__get_xpath_elem(user["user-coin-balance"]).text btc_balance = self.__get_xpath_elem(user["user-btc-balance"]).text balance_msg = "User balance: " + self.log.yellow_text(coin_balance + "/" + btc_balance) self.log.write_log("bot", balance_msg) def get_user_level(self): user_level = self.__get_xpath_elem(user["user-level"]).text user_level_percent = self.__get_xpath_elem( user["user-level-percent"]).text level_msg = "User level: " + self.log.blue_text(user_level + "/" + user_level_percent) self.log.write_log("bot", level_msg) def get_current_coin_rate(self): coin_rate = self.__get_xpath_elem(user["user-coin-rate"]).text coin_rate_msg = "Coin rate: " + self.log.yellow_text(coin_rate) self.log.write_log("bot", coin_rate_msg) def login_handler(self, remember=True, cookies=True): if self.driver.current_url == self.dash_board_url: pass else: self.driver.get(self.login_url) try: with open("cookies", "rb") as f: cookies = pickle.load(f) for cookie in cookies: self.driver.add_cookie(cookie) self.driver.refresh() except Exception as e: user_email = self.__get_xpath_elem( user["user-email-field"]).send_keys(self.user_mail) user_password = self.__get_xpath_elem( user["user-password-field"]).send_keys(self.user_pswd) if remember: user_remember_me = self._click(user["user-remember-me"]) self._click(user["user-login-btn"]) self._random_wait(3, 5) if cookies: if self.driver.current_url == self.dash_board_url: with open("cookies", "wb") as f: pickle.dump(self.driver.get_cookies(), f) def get_main_reward(self): self.log.write_log("bot", self.log.green_text("MAIN REWARD")) if self.driver.current_url != self.dash_board_url: self.driver.get(self.dash_board_url) self._modal_handler() if not os.path.exists("cookies"): with open("cookies", "wb") as f: pickle.dump(self.driver.get_cookies(), f) try: if self.__check_main_reward_availability(): self.log.write_log("success", "Main reward is available") self._click(main_reward["main-reward-dash-link"], "main reward dash link") self._random_wait(3, 5) if self.__captcha_check( main_reward["main-reward-captcha-block"]): self._random_wait(16, 18) self._click(main_reward["main-reward-claim-btn"], "main reward claim button") self.log.write_log("success", "Collected the main reward") self._random_wait(3, 5) else: self.log.write_log("bot", "Main reward is not available") except Exception as e: if self.debug: self.log.write_log("warning", e) else: self.error_handler(e) pass def get_ptc_ads(self): self.log.write_log("bot", self.log.green_text("PTC ADS")) if self.driver.current_url != self.dash_board_url: self.driver.get(self.dash_board_url) self._click(ptc_ads["ptc-ads-dash-link"]) self._random_wait(3, 5) total_ads_amount = self.__get_xpath_elem( ptc_ads["ptc-ads-total-amount"]).text total_ads_amount_msg = f"Total ads amount: {total_ads_amount}" self.log.write_log("bot", total_ads_amount_msg) completed_ads = self.__get_xpath_elem( ptc_ads["ptc-ads-completed-ads"]).text completed_ads_msg = f"Completed ads: {completed_ads}" self.log.write_log("bot", completed_ads_msg) available_ads = self.__get_xpath_elem( ptc_ads["ptc-ads-available-ads"]).text available_ads_msg = f"Available ads: {available_ads}" self.log.write_log("bot", available_ads_msg) earnable_coins = self.__get_xpath_elem( ptc_ads["ptc-ads-earnable-coins"]).text earnable_coins_msg = f"Earnable coins: {earnable_coins}" self.log.write_log("bot", earnable_coins_msg) if int(available_ads) > 0: for ad_div_block_no in range(0, int(available_ads) + 1): try: ad_title = self.__get_xpath_elem( ptc_ads["ptc-ads-title"]).text ad_title_msg = f"Ad [{ad_div_block_no}] {ad_title}" self.log.write_log("bot", ad_title_msg) ad_comp_time = self.__get_xpath_elem( ptc_ads["ptc-ads-completion-time"]).text[:2] ad_comp_time_msg = f"Ad completion time: {ad_comp_time} sec" self.log.write_log("bot", ad_comp_time_msg) ad_rew_coin = self.__get_xpath_elem( ptc_ads["ptc-ads-reward-coins"]).text ad_rew_coin_msg = f"Ad reward: {ad_rew_coin} coins" self.log.write_log("bot", ad_rew_coin_msg) self._click(ptc_ads["ptc-ads-watch-button"]) self._random_wait(2, 4) if self.__captcha_check(ptc_ads["ptc-ads-captcha-block"]): self._random_wait(13, 16) self._click(ptc_ads["ptc-ads-reward-claim-btn"]) self._random_wait( int(ad_comp_time) + 5, int(ad_comp_time) + 7) self._click(ptc_ads["ptc-ads-continue-btn"]) self.__switch_tab() self.log.write_log( "success", f"Fininshed {ad_title} ad successfully") self._random_wait(2, 4) except Exception as e: if self.debug: self.log.write_log("warning", e) else: self.error_handler(e) pass def get_shortlink_ads(self): self.log.write_log("bot", self.log.green_text("SHORTLINK ADS")) if self.driver.current_url != self.dash_board_url: self.driver.get(self.dash_board_url) self._click(shortlinks["general"]["shortlinks-dash-link"]) self._random_wait(3, 5) shortlinks_amount = self.__get_xpath_elem( shortlinks["general"]["shortlinks-amount"]).text shortlinks_amount_msg = f"Total shortlinks: {shortlinks_amount}" self.log.write_log("bot", shortlinks_amount_msg) shortlinks_completed = self.__get_xpath_elem( shortlinks["general"]["shortlinks-completed"]).text shortlinks_completed_msg = f"Completed shortlinks: {shortlinks_completed}" self.log.write_log("bot", shortlinks_completed_msg) shortlinks_available = self.__get_xpath_elem( shortlinks["general"]["shortlinks-available"]).text shortlinks_available_msg = f"Available shortlinks: {shortlinks_available}" self.log.write_log("bot", shortlinks_available_msg) shortlinks_earnable = self.__get_xpath_elem( shortlinks["general"]["shortlinks-earnable-coins"]).text shortlinks_earnable_msg = f"Total earnable coins: {shortlinks_earnable}" self.log.write_log("bot", shortlinks_earnable_msg) def switch(link): link = str(link).lower() def exe_io(): view_count = self.__get_xpath_elem( shortlinks["exe.io"]["shortlinks-view-count"]).text view_count_msg = f"View count: {link} [{view_count}]" self.log.write_log("bot", view_count_msg) reward_coin = self.__get_xpath_elem( shortlinks["exe.io"]["shortlinks-reward-coin"]).text reward_coin_msg = f"Reward coins: {link} [{reward_coin}]" self.log.write_log("bot", reward_coin_msg) self._random_wait(5, 10) pass def fc_lc(): view_count = self.__get_xpath_elem( shortlinks["fc.lc"]["shortlinks-view-count"]).text view_count_msg = f"View count: {link} [{view_count}]" self.log.write_log("bot", view_count_msg) reward_coin = self.__get_xpath_elem( shortlinks["fc.lc"]["shortlinks-reward-coin"]).text reward_coin_msg = f"Reward coins: {link} [{reward_coin}]" self.log.write_log("bot", reward_coin_msg) self._random_wait(5, 10) pass def sh_faucetcrypto_com(): view_count = self.__get_xpath_elem( shortlinks["sh.faucetcrypto.com"] ["shortlinks-view-count"]).text view_count_msg = f"View count: {link} [{view_count}]" self.log.write_log("bot", view_count_msg) reward_coin = self.__get_xpath_elem( shortlinks["sh.faucetcrypto.com"] ["shortlinks-reward-coin"]).text reward_coin_msg = f"Reward coins: {link} [{reward_coin}]" self.log.write_log("bot", reward_coin_msg) self._click( shortlinks["sh.faucetcrypto.com"]["shortlinks-claim-btn"]) self._random_wait(15, 18) orig_url = self.driver.current_url self._click( shortlinks["general"]["shortlinks-reward-claim-btn"]) if self.driver.current_url == orig_url: self._click( shortlinks["general"]["shortlinks-reward-claim-btn"]) self._random_wait(5, 7) try: step_count = self.__get_xpath_elem( faucet["faucet-current-step"]).text for i in range(int(step_count[2])): step_count_msg = f"Current step: {i+1}/{step_count[2]}" self.log.write_log( "bot", self.log.yellow_text( f"Current step count {step_count_msg}"), ) self._random_wait(5, 7) source = self.driver.execute_script("goto()") self._random_wait(3, 5) self.log.write_log("success", f"Fininshed shortlink successfully") except Exception as e: if self.debug: self.log.write_log("warning", e) else: self.error_handler(e) pass def sh_faucet_gold(): view_count = self.__get_xpath_elem( shortlinks[link]["shortlinks-view-count"]).text view_count_msg = f"View count: {link} [{view_count}]" self.log.write_log(view_count_msg) reward_coin = self.__get_xpath_elem( shortlinks[link]["shortlinks-reward-coin"]).text reward_coin_msg = f"Reward coins: {link} [{reward_coin}]" self.log.write_log(reward_coin_msg) self._click( shortlinks["sh.faucet.gold"]["shortlinks-claim-btn"]) self._random_wait(15, 18) orig_url = self.driver.current_url self._click( shortlinks["general"]["shortlinks-reward-claim-btn"]) if self.driver.current_url == orig_url: self._click( shortlinks["general"]["shortlinks-reward-claim-btn"]) self._random_wait(5, 7) try: step_count = self.__get_xpath_elem( faucet["faucet-current-step"]).text for i in range(int(step_count[2])): step_count_msg = f"Current step: {i+1}/{step_count[2]}" self.log.write_log( "bot", self.log.yellow_text( f"Current step count {step_count_msg}"), ) self._random_wait(5, 7) source = self.driver.execute_script("goto()") self._random_wait(3, 5) self.log.write_log("success", f"Fininshed shortlink successfully") except Exception as e: if self.debug: self.log.write_log("warning", e) else: self.error_handler(e) pass def sh_claim4_fun(): view_count = self.__get_xpath_elem( shortlinks[link]["shortlinks-view-count"]).text view_count_msg = f"View count: {link} [{view_count}]" self.log.write_log(view_count_msg) reward_coin = self.__get_xpath_elem( shortlinks[link]["shortlinks-reward-coin"]).text reward_coin_msg = f"Reward coins: {link} [{reward_coin}]" self.log.write_log(reward_coin_msg) self._click( shortlinks["sh.claim4.fun"]["shortlinks-claim-btn"]) self._random_wait(15, 18) orig_url = self.driver.current_url self._click( shortlinks["general"]["shortlinks-reward-claim-btn"]) if self.driver.current_url == orig_url: self._click( shortlinks["general"]["shortlinks-reward-claim-btn"]) self._random_wait(5, 7) try: step_count = self.__get_xpath_elem( faucet["faucet-current-step"]).text for i in range(int(step_count[2])): step_count_msg = f"Current step: {i+1}/{step_count[2]}" self.log.write_log( "bot", self.log.yellow_text( f"Current step count {step_count_msg}"), ) self._random_wait(5, 7) source = self.driver.execute_script("goto()") self._random_wait(3, 5) self.log.write_log("success", f"Fininshed shortlink successfully") except Exception as e: if self.debug: self.log.write_log("warning", e) else: self.error_handler(e) pass def default(): self.log.write_log("warning", "Invalid option") dict = { "exe.io": exe_io, "fc.lc": fc_lc, "sh.faucetcrypto.com": sh_faucetcrypto_com, "sh.faucet.gold": sh_faucet_gold, "sh.claim4.fun": sh_claim4_fun, } dict.get(link, default)() for links in shortlinks: if links.lower() == "general": continue try: view_count = self.__get_xpath_elem( shortlinks[links]["shortlinks-view-count"]).text[0] if int(view_count) > 0: self.log.write_log("bot", self.log.green_text(links.upper())) switch(links) except Exception as e: if self.debug: self.log.write_log("warning", e) else: self.error_handler(e)
class YouTube_Crawler: api_key = None kwonjun_api_key = None kyungsu_api_key = None is_driver = False IP = #IP database = #database user = #user password = #password def __init__(self, api_key=None): if api_key is not None: self.api_key = api_key def make_driver_ready(self): options = ChromeOptions() options.add_argument("--headless") options.add_argument("--no-sandbox") options.add_argument("--enable-automation") # options.headless = True options.add_argument("--disable-gpu") options.add_argument("--disable-features=VizDisplayCompositor") # options.add_argument('--disable-dev-shm-usage') # options.add_argument("disable-gpu") self.driver = Chrome( executable_path=r"/home/ubuntu/Crawler/chromedriver", # self.driver = Chrome(executable_path=r"chromedriver", options=options, ) # ,chrome_options=options self.driver.set_window_size(1920, 1080) self.driver.get("https://www.youtube.com/") self.driver.implicitly_wait(5) self.driver.delete_cookie("PREF") self.driver.add_cookie( { "domain": ".youtube.com", "httpOnly": False, "name": "PREF", "value": "gl=US&hl=en", "path": "/", } ) self.driver.get("https://www.youtube.com/") self.driver.implicitly_wait(5) self.is_driver = True def pre_process_sql(self, text): # temp = bytearray(text.encode('UTF-8')) # temp.replace(b'\x00', b'') # temp = temp.decode('utf-8', 'ignore') # re.sub("\"", " ", temp) return re.sub("'", "''", text) def pre_process_comment(self, text): temp = bytearray(text.encode("UTF-8")) temp.replace(b"\x00", b"") text = temp.decode("utf-8", "ignore") # re.sub("\"", " ", temp) return re.sub("'", "''", text) def update_video_and_comment(self, video_id): if New_YouTube_Crawler_Comment.main(video_id): return True else: return False def update_channel_info(self, channel_id, api_set=0): if api_set == 0: api_key = self.api_key elif api_set == 1: api_key = self.kwonjun_api_key else: api_key = self.kyungsu_api_key try: time.sleep(0.2) url = f"""https://www.googleapis.com/youtube/v3/channels?part=statistics&maxResults=50&id={channel_id}&key={api_key}""" response = requests.get(url) if response.status_code != 200: # print("response error: ", url) return False result = response.json() item = dict(*result["items"]) try: check = item["statistics"]["subscriberCount"] except: conn = pg2.connect( database = self.database, user = self.user, password = self.password, host = self.IP, port = "5432", ) conn.autocommit = False cur = conn.cursor() sql = f"""UPDATE channel SET hidden_subscriber = true WHERE channel_id = '{channel_id}';""" cur.execute(sql) conn.commit() conn.close() return True conn = pg2.connect( database = self.database, user = self.user, password = self.password, host = self.IP, port = "5432", ) conn.autocommit = False cur = conn.cursor() sql = f"""INSERT INTO channel_subscriber (channel_idx, subscriber_num, check_time) VALUES ((SELECT idx from channel where channel.channel_id='{channel_id}'), '{item['statistics']['subscriberCount']}', to_timestamp({time.time()})); INSERT INTO channel_views (channel_idx, view_count, check_time) VALUES ((SELECT idx from channel where channel.channel_id='{channel_id}'), '{item['statistics']['viewCount']}', to_timestamp({time.time()}));""" cur.execute(sql) conn.commit() conn.close() return True except Exception as e: # print(traceback.format_exc()) # print("ERROR", e) return False def insert_channel_info(self, channel_id): try: url = f"""https://www.googleapis.com/youtube/v3/channels?part=id,snippet,contentDetails,statistics,topicDetails&maxResults=50&id={channel_id}&key={self.api_key}""" response = requests.get(url) if response.status_code != 200: # print("response error: ", url) return False result = response.json() item = dict(*result["items"]) conn = pg2.connect( database = database, user = user, password = password, host = self.IP, port="5432", ) conn.autocommit = False cur = conn.cursor() sql = f"""UPDATE channel SET channel_name = '{self.pre_process_sql(item['snippet']["title"])}', channel_description = '{self.pre_process_sql(item['snippet']['description'])}', channel_start_date = to_date('{item['snippet']['publishedAt']}', 'YYYY-MM-DD'), upload_id = '{item['contentDetails']['relatedPlaylists']['uploads']}', hidden_subscriber = {item['statistics']['hiddenSubscriberCount']}, thumbnail_url = '{item['snippet']['thumbnails']['default']['url']}' WHERE channel_id = '{channel_id}'; INSERT INTO channel_subscriber (channel_idx, subscriber_num, check_time) VALUES ((SELECT idx from channel where channel.channel_id='{channel_id}'), '{item['statistics']['subscriberCount']}', to_timestamp({time.time()}));""" cur.execute(sql) conn.commit() conn.close() return True except Exception as e: # print(traceback.format_exc()) # print("ERROR", e) return False def update_video_info(self, upload_id, interval_day=30, api_set=0): if api_set == 0: api_key = self.api_key elif api_set == 1: api_key = self.kwonjun_api_key else: api_key = self.kyungsu_api_key try: next_page_token = None keep_going = True conn = pg2.connect( database = self.database, user = self.user, password = self.password, host = self.IP, port = "5432", ) conn.autocommit = False cur = conn.cursor() while keep_going: if next_page_token is None: url = f"""https://www.googleapis.com/youtube/v3/playlistItems?part=id,snippet,contentDetails,status&maxResults=50&playlistId={upload_id}&key={api_key}""" else: url = f"""https://www.googleapis.com/youtube/v3/playlistItems?part=id,snippet,contentDetails,status&maxResults=50&pageToken={next_page_token}&playlistId={upload_id}&key={api_key}""" response = requests.get(url) if response.status_code != 200: pass # # print("response error: ", url) result = response.json() try: next_page_token = result["nextPageToken"] except: next_page_token = None keep_going = False for items in result["items"]: item = dict(items) try: upload_time = time.strptime( item["contentDetails"]["videoPublishedAt"], "%Y-%m-%dT%H:%M:%SZ", ) except: upload_time = time.strptime( item["snippet"]["publishedAt"], "%Y-%m-%dT%H:%M:%SZ" ) # # print(upload_time) sql = f"""INSERT INTO video (channel_idx, video_id, upload_time, status) VALUES ((SELECT idx from channel where upload_id = '{upload_id}'), '{item['contentDetails']['videoId']}', to_timestamp('{item['snippet']['publishedAt']}', 'YYYY-MM-DDTHH24:MI:SSZ'), FALSE) ON CONFLICT DO NOTHING;""" cur.execute(sql) # # print("Disabled Video", item["contentDetails"]["videoId"]) if (time.mktime(time.localtime()) - time.mktime(upload_time)) / ( 60 * 60 * 24 ) <= interval_day: pass else: keep_going = False break continue # 90일 이내의 영상 # 2020-07-31T12:05:06Z if (time.mktime(time.localtime()) - time.mktime(upload_time)) / ( 60 * 60 * 24 ) <= interval_day: sql = f"""SELECT insert_video('{self.pre_process_sql(item['snippet']['title'])}', '{self.pre_process_sql(item['snippet']['description'])}', '{item['contentDetails']['videoId']}', '{item['contentDetails']['videoPublishedAt']}', '{upload_id}', '{item['snippet']['thumbnails']['high']['url']}')""" cur.execute(sql) success = cur.fetchone()[0] if not success: keep_going = False break else: keep_going = False break conn.commit() conn.close() return True except Exception as e: # print(traceback.format_exc()) # print("ERROR", e) return False def __del__(self): if self.is_driver: self.driver.close()
def start_browser_and_fetch(website, args): # returns None if access is not authorized in robots.txt opts = Options() if not args.headful: opts.headless = True assert opts.headless # Operating in headless mode if args.override_cmp: opts.add_extension('./extensions/override_cmp.crx') if args.cookie_glasses: opts.add_extension('./extensions/cookie_glasses.crx') if args.override_cmp_monitor_postmessages: opts.add_extension( './extensions/override_cmp_monitor_postmessages.crx') if args.monitor_postmessages: opts.add_extension('./extensions/monitor_postmessages.crx') if args.watch_requests: opts.add_extension('./extensions/watch_requests.crx') if args.get_euconsent: opts.add_extension('./extensions/get_euconsent.crx') if args.probe_cmp_postmessage: opts.add_extension('./extensions/probe_cmp_postmessage.crx') # enable browser logging d = DesiredCapabilities.CHROME d['goog:loggingPrefs'] = {'browser': 'ALL'} browser = Chrome(options=opts, desired_capabilities=d) if not args.ignore_robots_txt and not website.robot_txt_ban == False: # ignore, or already checked print("Checking robots.txt...") access_allowed = check_robots_txt_authorization(browser, website) if not access_allowed: website.robot_txt_ban = True if not args.bypass_robots_txt: quit_properly(browser) return None else: website.robot_txt_ban = False if website.access_successful == False: # server access failed when checking robots.txt quit_properly(browser) return None browser.set_window_size( 1366, 768 ) # most common display https://www.w3schools.com/browsers/browsers_display.asp if args.add_shared_cookie: # loading a site is necessary to be able to set a cookie # see https://github.com/w3c/webdriver/issues/1238 browser.get('https://perdu.com') browser.add_cookie({ 'name': 'euconsent', 'value': CONSENT_STRING_SENSCRITIQUE, 'domain': '.consensu.org', 'path': '/' }) print('cookie added') if args.no_fetch: time.sleep(3600) browser.set_page_load_timeout(TIMEOUT) for i in range(MAX_TRIES_TIMEOUT): try: browser.get(website.main_page_url) return browser except TimeoutException: print("Website timed out.") quit_properly(browser) website.access_successful = False return None
class LagoucrawlerDownloaderMiddleware(object): # Not all methods need to be defined. If a method is not defined, # scrapy acts as if the downloader middleware does not modify the # passed objects. def __init__(self, username, password, city, job_keywords): # 用户名 self.username = username # 用户密码 self.password = password # 选择城市 self.city = city # 搜索关键字 self.job_keywords = job_keywords # Chrome浏览器初始化 self.brower = Chrome() # Chrome浏览器窗口最大化 self.brower.maximize_window() # Chrome浏览器等待加载超时时间 self.wait = WebDriverWait(self.brower, 5) @classmethod def from_crawler(cls, crawler): """ 从setting.py文件提取出用户名、用户密码、搜索城市和搜索职位 :param crawler: :return: """ return cls(username=crawler.settings.get('USERNAME'), password=crawler.settings.get('PASSWORD'), city=crawler.settings.get('CITY'), job_keywords=crawler.settings.get('JOB_KEYWORDS')) def is_logined(self, request, spider): """ 初始请求时,总会弹出切换城市的窗口,所以先关掉它,然后通过判断右上角是否显示 用户名判断是否为登陆状态,并初始化整个程序的brower实例 :param request: 初始请求request,其meta包含index_page属性 :param spider: :return: 已经登陆返回True, 否则返回False """ self.brower.get(request.url) try: # 关掉城市选择窗口 box_close = self.wait.until( EC.element_to_be_clickable((By.XPATH, '//*[@id="cboxClose"]'))) box_close.click() # 获取右上角的登录状态 login_status = self.wait.until( EC.presence_of_element_located( (By.XPATH, '//*[@id="lg_tbar"]/div/ul/li[1]/a'))) # 若右上角显示为登陆,则说明用户还没有登陆 if login_status.text == '登录': return False else: return True except TimeoutException as e: # 二次请求,不会出现地址框,需要重新设计 spider.logger.info('Locate Username Element Failed:%s' % e.msg) return False def login_lagou(self, spider): """ 用selenium模拟登陆流程,并将登陆成功后的cookies保存为本地文件。 :param spider: :return: """ try: # 设置等待时间,否则会出现登陆元素查找不到的异常 time.sleep(2) # 点击进入登录页面 login_status = self.wait.until( EC.presence_of_element_located(( By.XPATH, '//*[@id="loginToolBar"]//a[@class="button bar_login passport_login_pop"]' ))) login_status.click() # 输入用户名 username = self.wait.until( EC.visibility_of_element_located( (By.XPATH, '//*[@data-propertyname="username"]/input'))) username.send_keys(self.username) # 输入用户密码 password = self.wait.until( EC.visibility_of_element_located( (By.XPATH, '//*[@data-propertyname="password"]/input'))) password.send_keys(self.password) # 点击登陆按钮 submit_button = self.wait.until( EC.visibility_of_element_located( (By.XPATH, '//*[@data-propertyname="submit"]/input'))) submit_button.click() # time.sleep(1) # 获取登录成功后的cookies cookies = self.brower.get_cookies() # 保存登陆后的cookies self.save_cookies(cookies) except TimeoutException as e: spider.logger.info('Locate Login Element Failed: %s' % e.msg) @staticmethod def save_cookies(cookies): """ 登陆成功后,将cookie保存为本地文件,供下次程序运行或者以后使用 :param cookies: :return: """ path = os.getcwd() + '/cookies/' if not os.path.exists(path): os.mkdir(path) with open(path + 'lagou.txt', 'w') as f: f.write(json.dumps(cookies)) def fetch_index_page(self, request, spider): """ 该函数使用selenium完成城市切换,搜索关键字输入并点击搜索按钮操作。如果点击搜索按钮后, 页面没有成功跳转,则会因为149行的代码,抛出NoSuchElementException,而在load_cookies() 函数报一个NoneType没有get_cookies()的错误。原因是response是空的。 :param request: :param spider: :return: """ try: # 判断是否需要切换城市 city_location = self.wait.until( EC.presence_of_element_located( (By.XPATH, '//*[@id="lg_tnav"]/div/div/div/strong'))) if city_location.text != self.city: time.sleep(1) city_change = self.wait.until( EC.presence_of_element_located( (By.XPATH, '//*[@id="changeCity_btn"]'))) city_change.click() # 根据搜索城市定位到相应元素并点击切换 # time.sleep(1) city_choice = self.wait.until( EC.presence_of_element_located((By.LINK_TEXT, self.city))) city_choice.click() time.sleep(1) # 定位关键字输入框并输入关键字 keywords_input = self.wait.until( EC.presence_of_element_located( (By.XPATH, '//*[@id="search_input"]'))) keywords_input.send_keys(self.job_keywords) # time.sleep(1) # 定位搜索按钮并点击,有时候点击后页面不会发生跳转,原因是被重定向了。 keywords_submit = self.wait.until( EC.element_to_be_clickable( (By.XPATH, '//*[@id="search_button"]'))) keywords_submit.click() # 跳转到列表页等待待抓取的内容元素加载完成,如果被重定向,则跳转不到该页面,会报NoSuchElementException self.wait.until( EC.visibility_of_all_elements_located( (By.XPATH, '//*[@id="s_position_list"]'))) pagenumber = self.wait.until( EC.presence_of_element_located(( By.XPATH, '//*[@id="s_position_list"]/div[@class="item_con_pager"]/div/span[@class="pager_next "]/preceding-sibling::span[1]' ))) # 获取一共有多少页,供通过response传递到parse_detail函数,进行后续的翻页解析使用 request.meta['pagenumber'] = pagenumber.text # 将brower和wait通过response传递到parse_detail函数,进行后续的翻页解析使用 request.meta['brower'] = self.brower request.meta['wait'] = self.wait body = self.brower.page_source # 返回初始搜索页面,在parse_detail函数中进行相关信息的解析 response = HtmlResponse(url=self.brower.current_url, body=body, encoding='utf-8', request=request) return response except TimeoutException: spider.logger.info( 'Locate Index Element Failed And Use Proxy Request Again') # except NoSuchElementException: # 如果捕捉到该异常,说明页面被重定向了,没有正常跳转,重新请求输入关键字页面 return request def load_cookies(self, path): """ 加载本地cookies文件,实现免登录访问 :param path: 本地cookies文件路径 :return: """ with open(path, 'r') as f: cookies = json.loads(f.read()) for cookie in cookies: cookies_dict = { 'name': cookie['name'], 'value': cookie['value'] } self.brower.add_cookie(cookies_dict) def process_request(self, request, spider): """ middleware的核心函数,每个request都会经过该函数。此函数过滤出初始request和详情页request, 对于初始request进行验证登陆、cookies等一系列操作,然后将最后获取到的索引页response返回,对 于详情页的request则,不做任何处理。 :param request: :param spider: :return: """ # 过滤出初始的登陆、切换索引页的request if 'index_flag' in request.meta.keys(): # 判断是否为登陆状态,若未登陆则判断是否有cookies文件存在 if not self.is_logined(request, spider): path = os.getcwd() + '/cookies/lagou.txt' # 若cookies文件存在,则加载cookie文件,否则进行登陆操作 if os.path.exists(path): self.load_cookies(path) else: # 登陆lagou网 self.login_lagou(spider) # 登陆成功后的索引页的响应体,若不登录,请求响应提详情页面的url时,会重定向到登陆页面 response = self.fetch_index_page(request, spider) return response
from selenium.webdriver import Chrome import time url = 'https://developer.huawei.com/consumer/cn/service/apcs/app/home.html' ck = 'apppromote_lang=cn; APCS_AT="CFwH17cENO7L4jUd/y7MlRsHFBzkKRUjo8iCuQtgoNlRvUPthwzltcQTH+4mZ0fCPSFCRFk+s4SwszZ9RoTH5//Upk96HCea9DrxHLarEOc5gYVlXtw="; SITE_ID=1' ck = [{ 'name': x.split('=')[0], 'value': x.split('=')[1] } for x in ck.split('; ')] d = Chrome() d.get(url) for c in ck: d.add_cookie(c) d.get(url) time.sleep(60)