Exemple #1
0
    def build_chrome(self):
        """构建浏览器

        :param cookie: selenium list类型的cookie
        :return: 返回构建完成的chrome浏览器
        """
        chrome_options = ChromeOptions()
        chrome = Chrome(chrome_options=chrome_options)
        chrome.get('http://pan.baidu.com')
        if not self.cookie:
            self.load_cookie()
        if (type(self.cookie) == str):
            for i in self.cookie.split(';'):
                I = i.split('=')
                if (len(I) >= 2):
                    chrome.add_cookie({'name': I[0], 'value': I[1]})
                else:
                    pass
        else:
            if (self.cookie != None):
                for i in self.cookie:
                    try:
                        del i['expiry']
                    except:
                        pass
                    chrome.add_cookie(i)
        chrome.refresh()
        return chrome
class BasePage:
    def __init__(self, maximize_window=True, hide_cookie_notice=True):
        self.hide_cookie_notice = hide_cookie_notice
        self.driver = Chrome(driver_path_resolver.resolve_driver_path())
        self._hide_cookie_notice() if self.hide_cookie_notice else None
        self.driver.implicitly_wait(DEFAULT_IMPLICITLY_WAIT)
        self.driver.maximize_window() if maximize_window else None
        self.error = None

    def _hide_cookie_notice(self):
        """
        Hiding cookie notice object by adding hideCookieNotice cookie
        """
        LOGGER.info('Started hiding cookie notice')
        wrong_path = '404'
        if self.hide_cookie_notice:
            self.driver.get(BASE_URL + wrong_path)
            LOGGER.debug('Opened 404 page')
            self.driver.delete_all_cookies()
            LOGGER.debug('Deleted all cookies')
            cookie = {'name': 'hideCookieNotice', 'value': '1'}
            self.driver.add_cookie(cookie)
            LOGGER.debug('Added cookie {}'.format(cookie))
            LOGGER.info('Cookie notice hiding finished success')
        else:
            pass

    def get_last_error(self):
        return repr(self.error)
Exemple #3
0
def sign_in_with_cookies(driver: webdriver.Chrome) -> bool:
    logger.info("Starting to sign in using pickled cookie")

    try:
        cookies_file = open(cookie_file_name, "rb")
        cookies = pickle.load(cookies_file)
        driver.get(amazon_fresh_home_url)
        for cookie in cookies:
            if 'expiry' in cookie:
                del cookie['expiry']
            driver.add_cookie(cookie)
    except IOError:
        logger.error("Unable to use pickled cookie to sign in")
        return False
    driver.refresh()
    try:
        WebDriverWait(driver, 10).until(
            expected_conditions.visibility_of_element_located(
                (By.ID, amazon_fresh_welcome_id)))
        if "signin" in driver.find_element_by_id(
                amazon_fresh_welcome_id).get_attribute("href"):
            return False
        else:
            return True
    except Exception:
        logger.error("Failed to use pickled cookie to sign in")
Exemple #4
0
class ChromeDriver:
    def __init__(self):
        self.options = self.get_options()
        self.driver = Chrome(chrome_options=self.options)
        self.implicitly_wait(10)

    def __getattr__(self, attr):
        return getattr(self.driver, attr)

    # overwrite
    def get(self, url, cookies=None, refresh=True):
        self.driver.get(url)
        if cookies is not None:
            for name, value in cookies.items():
                self.driver.add_cookie({'name': name, 'value': value})
            if refresh:
                self.refresh()

    def get_options(self):
        options = chrome.options.Options()
        options.add_argument('--headless')
        options.add_argument('--disable-gpu')
        # options.add_argument('blink-settings=imagesEnabled=false')
        options.add_argument('--proxy-server=http://127.0.0.1:10809')
        options.add_argument(
            'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36'
        )
        return options
Exemple #5
0
def start_browser(link,cookies):
    caps = DesiredCapabilities().CHROME
    caps["pageLoadStrategy"] = "eager" 
    chrome_options = ChromeOptions()
    chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
    chrome_options.add_experimental_option("useAutomationExtension", False)
    driver = Chrome(desired_capabilities=caps, executable_path=driver_path, options=chrome_options)
    driver.execute_cdp_cmd(
            "Page.addScriptToEvaluateOnNewDocument",
            {
                "source": """
        Object.defineProperty(window, 'navigator', {
            value: new Proxy(navigator, {
              has: (target, key) => (key === 'webdriver' ? false : key in target),
              get: (target, key) =>
                key === 'webdriver'
                  ? undefined
                  : typeof target[key] === 'function'
                  ? target[key].bind(target)
                  : target[key]
            })
        })
                  """
            },
    )
    driver.get(link)
    for cookie in cookies:
        driver.add_cookie({
            "name": cookie["name"],
            "value" : cookie["value"],
            "domain" : cookie["domain"]
        })
    driver.get(link)
Exemple #6
0
class Spider:
    def __init__(self, index_url, target_url, page_range):
        self.index_url = index_url
        self.target_url = target_url
        self.page_range = page_range + 1
        self.raw_html = []
        self.boot()

    def boot(self):
        self.driver = Chrome()
        self.driver.start_client()
        self.check_cookie()

    def check_cookie(self):
        from xcookie import cookie_list
        if cookie_list:
            self.driver.get(self.index_url)
            time.sleep(8)
            self.driver.delete_all_cookies()
            print('clear')
            for c in cookie_list:
                self.driver.add_cookie(c)
            print('Done')
        else:
            print('please insert cookie!')
            sys.exit()

    def crawl(self):
        for p in range(1, self.page_range):
            full_url = f'{self.target_url}{p}'
            self.driver.get(full_url)
            print(full_url)
            time.sleep(5)
            self.raw_html.append(self.driver.page_source)
def browser(
):  # na metodzie jest wykonuwany, bo defacto nasz fixture steruje przeglądarką, są tu 3 wyrażne sekcje:

    browser = Chrome(executable_path=ChromeDriverManager().install()
                     )  # 1.co się wykona przed każdym testem:

    browser.get(
        'https://www.awesome-testing.com/'
    )  # 2. Ta sekcja mówi nam co chcemy zwrócić do testów, return -> ale aby zwrócić coś w połowie używamy ,,yield", czyli nas fixture wydaje z siebie do testów otwartą na stronie do testów przeglądarkę
    # żeby skojarzyć to z testami, to wrzucimy fixture jako argument naszej metody
    cookie = {
        'name': 'displayCookieNotice',
        'value': 'y',
        'domain': 'www.awesome-testing.com'
    }
    browser.add_cookie(
        cookie
    )  #ciastka bierzemy ze strony -> aplication -> Cookies (klikamy ok na str z ciastkiem) -> refresh -> ciastko i wartości
    browser.refresh()

    # w razie problemów rozwiązaniem byłoby time.sleep(1)

    yield browser  # słówko yield jest granicą sekcji, jezeli nie ma go to wykona się tylko 1 sekcja, jeżeli chcemy zwrócić więcej niż 1 zmienną browser, zwracamy je w tupli

    browser.quit(
    )  # 3. coś co się wykona ZAWSZE po teście, niezaleznie od tego, czy się on uda, czy nie  (zeby wykonac 1 i 3 musimy skopiować 1 do 3?)
Exemple #8
0
def load_cookie(browser: Chrome):
    cookie_files = glob.glob(os.path.join(COOKIES_PATH, '*.cookie'))
    for file in cookie_files:
        with codecs.open(filename=file) as rfile:
            data_cookies = json.load(rfile)
            for data_cookie in data_cookies:
                if data_cookie['domain'][0] != '.':
                    data_cookie['domain'] = '.' + data_cookie['domain']
                browser.add_cookie(data_cookie)
Exemple #9
0
def login_by_cookies():
    with open("data/cookies.json") as fd:
        cookies = json.loads(fd.read())
    web = Chrome()
    web.get('https://www.mosoteach.cn/web/')
    for cookie in cookies:
        web.add_cookie(cookie)
    web.get('https://www.mosoteach.cn/web/')
    web.refresh()
    return web
def browser():
    browser = Chrome(executable_path=ChromeDriverManager().install())
    browser.get('https://www.awesome-testing.com/') 
    cookie = {'name': 'displayCookieNotice',
              'value': 'y',
              'domain': 'www.awesome-testing.com'}
    browser.add_cookie(cookie)
    browser.refresh()

    yield browser
    browser.quit()
Exemple #11
0
def check_queue(driver: webdriver.Chrome):
    """Schaut ob man im Wartebereich ist und versucht diesen zu skippen

    Args:
        driver (webdriver.Chrome): webdriver
    """
    # Cookie holen
    queue_cookie = driver.get_cookie("akavpwr_User_allowed")

    # Neuer Cookie erstellen falls vorhanden
    if queue_cookie:
        logging.debug("Warteraum - Try skipping")
        queue_cookie["name"] = "akavpau_User_allowed"
        driver.add_cookie(queue_cookie)

        # Seite neu laden
        driver.refresh()
Exemple #12
0
def driver_start(bot_name, headless_mode = True):
	options = Options()
	headless = headless_mode
	if headless:
		options.add_argument('--headless')
		options.add_argument('--disable-gpu')
	options.add_argument("--disable-notifications")
	options.add_argument("--disable-extensions")
	driver = Chrome("chromedriver.exe", options = options)
	driver.set_window_size(1366, 768) if headless == True else driver.maximize_window() 
	driver.get("https://twitter.com/login")
	for cookie in pickle.load(open("Cookies/" + bot_name + ".pkl", "rb")):
		if 'expiry' in cookie:
			del cookie['expiry']
			driver.add_cookie(cookie)
	driver.refresh()
	return driver
Exemple #13
0
def build_chrome(cookie=None):
    """构建浏览器

    :param cookie: selenium list类型的cookie
    :return: 返回构建完成的chrome浏览器
    """
    chrome_options = ChromeOptions()
    chrome = Chrome(chrome_options=chrome_options)
    chrome.get('http://pan.baidu.com')
    if (cookie != None or cookie != []):
        for i in cookie:
            try:
                del i['expiry']
            except:
                pass
            chrome.add_cookie(i)
    chrome.refresh()
    return chrome
Exemple #14
0
def login_cookie(driver: Chrome):
    raw_cookie = "sb=sJyfYPRg73_iH7HXwXh8Z7jS; datr=sJyfYG7kFe-wxSuCHWsSgiDv; dpr=1.25; wd=1479x734; c_user=100003617755928; xs=21%3AE0HZCHbl-LJw1Q%3A2%3A1621073168%3A-1%3A6381; fr=1bSzt7fdPoQutylf6.AWXwhkAAla0xRmN2Z1v3maVz5_w.Bgn5yw.O6.AAA.0.0.Bgn50Q.AWXmd3f3OSI; spin=r.1003799421_b.trunk_t.1621073170_s.1_v.2_"
    cookie = parse_dict_cookies(raw_cookie)

    print(json.dumps(cookie, indent=4, sort_keys=True))
    driver.get(
        "https://facebook.com"
    )  # NOTE: it must be access facebook.com before add cookie of facebook, else ERROR happend
    for key, value in cookie.items():
        driver.add_cookie({
            'name': key,
            'value': value,
            'domain': 'facebook.com'
        })

    # cookies = pickle.load(open("/home/xuananh/Dropbox/facebook_cookies.pkl", "rb"))
    # driver.get("https://facebook.com")  # NOTE: it must be access facebook.com before add cookie of facebook, else ERROR happend
    # for cookie in cookies:
    #     driver.add_cookie(cookie)

    driver.get("https://www.facebook.com")
    driver.maximize_window()
Exemple #15
0
class GameDriver:
    url = None
    cookies = None
    ua = "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36'"
    driver = None
    active = True
    base = None

    def __init__(self, url, cookies, base=None):
        self.url = url
        self.cookies = cookies
        self.base = base

    def run(self):
        driver = "chromedriver.exe"
        chrome_options = ChromeOptions()
        #chrome_options.add_argument('--headless')
        chrome_options.add_argument('--user-agent="%s"' % self.ua)

        self.driver = Chrome(executable_path=driver,
                             chrome_options=chrome_options)
        self.driver.get(url=self.base)
        time.sleep(1)
        for c in self.cookies:
            self.driver.add_cookie({
                'name': c.name,
                'value': c.value,
                'domain': c.domain
            })
            print("Setting Driver cookie: %s=%s (%s)" %
                  (c.name, c.value, c.domain))
        self.driver.get(url=self.url)

    def close(self):
        if self.driver:
            self.driver.close()
Exemple #16
0
class BoxDriver(object):
    """
    a simple usage of selenium framework tool
    """
    """
    私有全局变量
    """
    _web_driver = None
    _by_char = None
    _wait_seconds = None
    """
    构造方法
    """
    class DriverType(Enum):
        CHROME = 1,
        FIREFOX = 2,
        IE = 3,
        SAFARI = 4,
        CHROME_HEADLESS = 5

    def __init__(self,
                 driver_type: DriverType,
                 by_char=_CHARACTER_COMMA,
                 wait_seconds=_WAIT_SECONDS,
                 firefox_profile=None):
        """
        构造方法:实例化 BoxDriver 时候使用
        :type wait_seconds: object
        :param driver_type: DriverType: selenium driver
        :param by_char: 分隔符,默认使用","
        :param firefox_profile: 火狐浏览器配置
        """
        self._by_char = by_char
        self._wait_seconds = wait_seconds

        if driver_type is None or driver_type == "":
            driver_type = self.DriverType.CHROME

        self._set_selenium_driver(driver_type, firefox_profile)

    def _set_selenium_driver(self, driver_type, firefox_profile):

        if driver_type == self.DriverType.CHROME:
            self._web_driver = Chrome()

        elif driver_type == self.DriverType.FIREFOX:

            if firefox_profile and os.path.exists(firefox_profile):
                profile = FirefoxProfile(firefox_profile)
                self._web_driver = Firefox(firefox_profile=profile)
            else:
                self._web_driver = Firefox()
        elif driver_type == self.DriverType.IE:
            self._web_driver = Ie()

        elif driver_type == self.DriverType.SAFARI:
            self._web_driver = Safari()

        elif driver_type == self.DriverType.CHROME_HEADLESS:
            profile = ChromeOptions()
            profile.add_argument('headless')
            profile.add_experimental_option("excludeSwitches",
                                            ["ignore-certificate-errors"])
            self._web_driver = Chrome(options=profile)

        else:
            self._web_driver = Chrome()
            print("Invalid Driver Type filled: %r" % driver_type)

    """
    私有方法
    """

    def _convert_selector_to_locator(self, selector):
        """
        转换自定义的 selector 为 Selenium 支持的 locator
        :param selector: 定位字符,字符串类型,"i, xxx"
        :return: locator
        """
        if self._by_char not in selector:
            return By.ID, selector

        selector_by = selector.split(self._by_char)[0].strip()
        selector_value = selector.split(self._by_char)[1].strip()
        if selector_by == "i" or selector_by == 'id':
            locator = (By.ID, selector_value)
        elif selector_by == "n" or selector_by == 'name':
            locator = (By.NAME, selector_value)
        elif selector_by == "c" or selector_by == 'class_name':
            locator = (By.CLASS_NAME, selector_value)
        elif selector_by == "l" or selector_by == 'link_text':
            locator = (By.LINK_TEXT, selector_value)
        elif selector_by == "p" or selector_by == 'partial_link_text':
            locator = (By.PARTIAL_LINK_TEXT, selector_value)
        elif selector_by == "t" or selector_by == 'tag_name':
            locator = (By.TAG_NAME, selector_value)
        elif selector_by == "x" or selector_by == 'xpath':
            locator = (By.XPATH, selector_value)
        elif selector_by == "s" or selector_by == 'css_selector':
            locator = (By.CSS_SELECTOR, selector_value)
        else:
            raise NameError(
                "Please enter a valid selector of targeting elements.")

        return locator

    def _locate_element(self, selector):
        """
        to locate element by selector
        :arg
        selector should be passed by an example with "i,xxx"
        "x,//*[@id='langs']/button"
        :returns
        DOM element
        """
        locator = self._convert_selector_to_locator(selector)
        if locator is not None:
            element = self._web_driver.find_element(*locator)
        else:
            raise NameError(
                "Please enter a valid locator of targeting elements.")

        return element

    def _locate_elements(self, selector):
        """
        to locate element by selector
        :arg
        selector should be passed by an example with "i,xxx"
        "x,//*[@id='langs']/button"
        :returns
        DOM element
        """
        locator = self._convert_selector_to_locator(selector)
        if locator is not None:
            elements = self._web_driver.find_elements(*locator)
        else:
            raise NameError(
                "Please enter a valid locator of targeting elements.")

        return elements

    """
    cookie 相关方法
    """

    def clear_cookies(self):
        """
        clear all cookies after driver init
        """
        self._web_driver.delete_all_cookies()

    def add_cookies(self, cookies):
        """
        Add cookie by dict
        :param cookies:
        :return:
        """
        self._web_driver.add_cookie(cookie_dict=cookies)

    def add_cookie(self, cookie_dict):
        """
        Add single cookie by dict
        添加 单个 cookie
        如果该 cookie 已经存在,就先删除后,再添加
        :param cookie_dict: 字典类型,有两个key:name 和 value
        :return:
        """
        cookie_name = cookie_dict["name"]
        cookie_value = self._web_driver.get_cookie(cookie_name)
        if cookie_value is not None:
            self._web_driver.delete_cookie(cookie_name)

        self._web_driver.add_cookie(cookie_dict)

    def remove_cookie(self, name):
        """
        移除指定 name 的cookie
        :param name:
        :return:
        """
        # 检查 cookie 是否存在,存在就移除
        old_cookie_value = self._web_driver.get_cookie(name)
        if old_cookie_value is not None:
            self._web_driver.delete_cookie(name)

    """
    浏览器本身相关方法
    """

    def refresh(self, url=None):
        """
        刷新页面
        如果 url 是空值,就刷新当前页面,否则就刷新指定页面
        :param url: 默认值是空的
        :return:
        """
        if url is None:
            self._web_driver.refresh()
        else:
            self._web_driver.get(url)

        self.forced_wait(self._wait_seconds)

    def maximize_window(self):
        """
        最大化当前浏览器的窗口
        :return:
        """
        self._web_driver.maximize_window()

    def navigate(self, url):
        """
        打开 URL
        :param url:
        :return:
        """
        self._web_driver.get(url)
        self.forced_wait(self._wait_seconds)

    def quit(self):
        """
        退出驱动
        :return:
        """
        self._web_driver.quit()

    def close_browser(self):
        """
        关闭浏览器
        :return:
        """
        self._web_driver.close()

    """
    基本元素相关方法
    """

    def type(self, selector, text):
        """
        Operation input box.

        Usage:
        driver.type("i,el","selenium")
        """
        el = self._locate_element(selector)
        el.clear()
        el.send_keys(text)

    def click(self, selector):
        """
        It can click any text / image can be clicked
        Connection, check box, radio buttons, and even drop-down box etc..

        Usage:
        driver.click("i,el")
        """
        el = self._locate_element(selector)
        el.click()
        self.forced_wait(self._wait_seconds)

    def click_by_enter(self, selector):
        """
        It can type any text / image can be located  with ENTER key

        Usage:
        driver.click_by_enter("i,el")
        """
        el = self._locate_element(selector)
        el.send_keys(Keys.ENTER)

        self.forced_wait(self._wait_seconds)

    def click_by_text(self, text):
        """
        Click the element by the link text

        Usage:
        driver.click_text("新闻")
        """
        self._locate_element('p%s' % self._by_char + text).click()
        self.forced_wait(self._wait_seconds)

    def submit(self, selector):
        """
        Submit the specified form.

        Usage:
        driver.submit("i,el")
        """
        el = self._locate_element(selector)
        el.submit()

        self.forced_wait(self._wait_seconds)

    def move_to(self, selector):
        """
        to move mouse pointer to selector
        :param selector:
        :return:
        """
        el = self._locate_element(selector)
        ActionChains(self._web_driver).move_to_element(el).perform()
        self.forced_wait(self._wait_seconds)

    def right_click(self, selector):
        """
        to click the selector by the right button of mouse
        :param selector:
        :return:
        """
        el = self._locate_element(selector)
        ActionChains(self._web_driver).context_click(el).perform()
        self.forced_wait(self._wait_seconds)

    def count_elements(self, selector):
        """
        数一下元素的个数
        :param selector: 定位符
        :return:
        """
        els = self._locate_elements(selector)
        return len(els)

    def drag_element(self, source, target):
        """
        拖拽元素
        :param source:
        :param target:
        :return:
        """

        el_source = self._locate_element(source)
        el_target = self._locate_element(target)

        if self._web_driver.w3c:
            ActionChains(self._web_driver).drag_and_drop(el_source,
                                                         el_target).perform()
        else:
            ActionChains(self._web_driver).click_and_hold(el_source).perform()
            ActionChains(self._web_driver).move_to_element(el_target).perform()
            ActionChains(self._web_driver).release(el_target).perform()

        self.forced_wait(self._wait_seconds)

    def lost_focus(self):
        """
        当前元素丢失焦点
        :return:
        """
        ActionChains(self._web_driver).key_down(Keys.TAB).key_up(
            Keys.TAB).perform()
        self.forced_wait(self._wait_seconds)

    """
    <select> 元素相关
    """

    def select_by_index(self, selector, index):
        """
        It can click any text / image can be clicked
        Connection, check box, radio buttons, and even drop-down box etc..

        Usage:
        driver.select_by_index("i,el")
        """
        el = self._locate_element(selector)
        Select(el).select_by_index(index)

        self.forced_wait(self._wait_seconds)

    def get_selected_text(self, selector):
        """
        获取 Select 元素的选择的内容
        :param selector: 选择字符 "i, xxx"
        :return: 字符串
        """
        el = self._locate_element(selector)
        selected_opt = Select(el).first_selected_option()
        return selected_opt.text

    def select_by_visible_text(self, selector, text):
        """
        It can click any text / image can be clicked
        Connection, check box, radio buttons, and even drop-down box etc..

        Usage:
        driver.select_by_index("i,el")
        """
        el = self._locate_element(selector)
        Select(el).select_by_visible_text(text)

        self.forced_wait(self._wait_seconds)

    def select_by_value(self, selector, value):
        """
        It can click any text / image can be clicked
        Connection, check box, radio buttons, and even drop-down box etc..

        Usage:
        driver.select_by_index("i,el")
        """
        el = self._locate_element(selector)
        Select(el).select_by_value(value)

        self.forced_wait(self._wait_seconds)

    """
    JavaScript 相关
    """

    def execute_js(self, script):
        """
        Execute JavaScript scripts.

        Usage:
        driver.js("window.scrollTo(200,1000);")
        """
        self._web_driver.execute_script(script)

        self.forced_wait(self._wait_seconds)

    """
    元素属性相关方法
    """

    def get_value(self, selector):
        """
        返回元素的 value
        :param selector: 定位字符串
        :return:
        """
        el = self._locate_element(selector)
        return el.get_attribute("value")

    def get_attribute(self, selector, attribute):
        """
        Gets the value of an element attribute.

        Usage:
        driver.get_attribute("i,el","type")
        """
        el = self._locate_element(selector)
        return el.get_attribute(attribute)

    def get_text(self, selector):
        """
        Get element text information.

        Usage:
        driver.get_text("i,el")
        """
        el = self._locate_element(selector)
        return el.text

    def get_displayed(self, selector):
        """
        Gets the element to display,The return result is true or false.

        Usage:
        driver.get_display("i,el")
        """
        el = self._locate_element(selector)
        return el.is_displayed()

    def get_selected(self, selector):
        """
        to return the selected status of an WebElement
        :param selector: selector to locate
        :return: True False
        """
        el = self._locate_element(selector)
        return el.is_selected()

    def get_text_list(self, selector):
        """
        根据selector 获取多个元素,取得元素的text 列表
        :param selector:
        :return: list
        """

        el_list = self._locate_elements(selector)

        results = []
        for el in el_list:
            results.append(el.text)

        return results

    """
    窗口相关方法
    """

    def accept_alert(self):
        '''
            Accept warning box.

            Usage:
            driver.accept_alert()
            '''
        self._web_driver.switch_to.alert.accept()

        self.forced_wait(self._wait_seconds)

    def dismiss_alert(self):
        '''
        Dismisses the alert available.

        Usage:
        driver.dismissAlert()
        '''
        self._web_driver.switch_to.alert.dismiss()

        self.forced_wait(self._wait_seconds)

    def switch_to_frame(self, selector):
        """
        Switch to the specified frame.

        Usage:
        driver.switch_to_frame("i,el")
        """
        el = self._locate_element(selector)
        self._web_driver.switch_to.frame(el)

        self.forced_wait(self._wait_seconds)

    def switch_to_default(self):
        """
        Returns the current form machine form at the next higher level.
        Corresponding relationship with switch_to_frame () method.

        Usage:
        driver.switch_to_default()
        """
        self._web_driver.switch_to.default_content()

        self.forced_wait(self._wait_seconds)

    def switch_to_parent(self):
        """
        switch to parent frame
        :return:
        """
        self._web_driver.switch_to.parent_frame()

        self.forced_wait(self._wait_seconds)

    def switch_to_window_by_title(self, title):
        for handle in self._web_driver.window_handles:
            self._web_driver.switch_to.window(handle)
            if self._web_driver.title == title:
                break

            self._web_driver.switch_to.default_content()
            self.forced_wait(self._wait_seconds)

    def open_new_window(self, selector):
        '''
        Open the new window and switch the handle to the newly opened window.

        Usage:
        driver.open_new_window()
        '''
        original_windows = self._web_driver.current_window_handle
        el = self._locate_element(selector)
        el.click()
        all_handles = self._web_driver.window_handles
        for handle in all_handles:
            if handle != original_windows:
                self._web_driver.switch_to.window(handle)
                break

    def save_window_snapshot(self, file_name):
        """
        save screen snapshot
        :param file_name: the image file name and path
        :return:
        """
        driver = self._web_driver
        driver.save_screenshot(file_name)
        self.forced_wait(self._wait_seconds)

    def save_window_snapshot_by_png(self):
        return self._web_driver.get_screenshot_as_png()

    def save_element_snapshot_by_png(self, selector):
        """
        控件截图
        :param selector:
        :return:
        """
        el = self._locate_element(selector)
        self.forced_wait(self._wait_seconds)
        return el.screenshot_as_png

    def save_window_snapshot_by_io(self):
        """
        保存截图为文件流
        :return:
        """
        return self._web_driver.get_screenshot_as_base64()

    def save_element_snapshot_by_io(self, selector):
        """
        控件截图
        :param selector:
        :return:
        """
        el = self._locate_element(selector)
        return el.screenshot_as_base64

    """
    等待方法
    """

    @staticmethod
    def forced_wait(seconds):
        """
        强制等待
        :param seconds:
        :return:
        """
        time.sleep(seconds)

    def implicitly_wait(self, seconds):
        """
        Implicitly wait. All elements on the page.
        :param seconds 等待时间 秒
        隐式等待

        Usage:
        driver.implicitly_wait(10)
        """
        self._web_driver.implicitly_wait(seconds)

    def explicitly_wait(self, selector, seconds):
        """
        显式等待
        :param selector: 定位字符
        :param seconds: 最长等待时间,秒
        :return:
        """
        locator = self._convert_selector_to_locator(selector)

        WebDriverWait(self._web_driver, seconds).until(
            expected_conditions.presence_of_element_located(locator))

    def get_explicitly_wait_element_text(self, selector, seconds):
        """
        显式等待,得到元素的 text
        :param selector: locator
        :param seconds: max timeout sencods
        :return:  str, element.text
        """

        locator = self._convert_selector_to_locator(selector)
        driver = self._web_driver

        el = WebDriverWait(driver,
                           seconds).until(lambda d: d.find_element(*locator))
        if el and isinstance(el, WebElement):
            return el.text

        return None

    """
    属性
    """

    @property
    def current_title(self):
        '''
        Get window title.

        Usage:
        driver.current_title
        '''
        return self._web_driver.title

    @property
    def current_url(self):
        """
        Get the URL address of the current page.

        Usage:
        driver.current_url
        """
        return self._web_driver.current_url
Exemple #17
0
driver = Chrome(options=chrome_options,
                executable_path="/usr/local/bin/chromedriver")
driver.get(user_config_url)

cookie_path = os.path.dirname(os.path.realpath(__file__)) + '/cookie/'

if not os.path.exists(cookie_path):
    os.mkdir(cookie_path, mode=0o777)

tapdcookies_path = cookie_path + 'tapdcookies.pkl'
if os.path.exists(tapdcookies_path):
    cookies = pickle.load(open(tapdcookies_path, "rb"))
    for cookie in cookies:
        driver.add_cookie({
            'name': cookie["name"],
            'value': cookie["value"],
            'path': cookie["path"],
            'secure': True
        })
else:
    pickle.dump(driver.get_cookies(), open(tapdcookies_path, "wb"))

if isElementExist(driver, "username"):
    driver.find_element_by_id("username").send_keys(user_email)
    driver.find_element_by_id("password_input").send_keys(user_password)
    driver.find_element_by_id("tcloud_login_button").click()
    time.sleep(1)

arr = driver.find_elements_by_class_name("rowNOTdone")
undo_output = ''
doing_output = ''
Exemple #18
0
# -*- coding:utf-8 -*-
from time import sleep
from selenium.webdriver import Chrome

driver = Chrome()
cook = {
    'domain': '120.78.128.25',
    'name': 'fengwoo',
    'path': '/',
    'value': '2shlsj8b6qju618hgj9ggjs2e3'
}

driver.get('http://120.78.128.25:8765/Index/login.html')
driver.add_cookie(cook)
driver.get('http://120.78.128.25:8765')
# ele=driver.find_element_by_xpath("//a[text()='我的帐户[python10]']")
ele = driver.find_element_by_xpath("//a[contains(text(),'我的帐户')]")
ele2 = driver.find_element_by_xpath("//a[contains(@href,'Member')]")
print(ele.text)
print(ele2.text)
Exemple #19
0
class BasePage:
    def __init__(self, driver=None):
        if driver is None:
            self.driver = Chrome()
            self.driver.maximize_window()
            self.driver.get(
                "https://work.weixin.qq.com/wework_admin/frame#contacts")
            cookies = {
                "wwrtx.vst":
                "zenYAe4CxGbueq5ASVGKquiAk5PdPagPGzKHdLCVqT2i-M2L68XlyLV_-2tP7InD4kOpcBm"
                "_stcX8b9Y9z6ec1BgEMdhR-FASZD-wSBX7D37_L7OFcsEYXUePdKC8sPqQBza3KieYk7TE9De"
                "2a2AaILp3vEZTlaJMLwFDrOFjOBcFLvhY-k-VmX1gl-BGUklaeVgd8MBeY1ky3t4-2M0yiQlnA"
                "7VWwRByLyJxlGrHgCrxZhOhs_BhvyJzLmJOoFNQvhrVSvzAXXoFdHs51gdxA",
                "wwrtx.d2st":
                "a4861364",
                "wwrtx.sid":
                "iAu-Z4L3xTLbZ5elezl0oXsd6Y-SXiveFjergOybpzZeb_7vPhAIpt8yVlOv0Ki1",
                "wwrtx.ltype":
                "1",
                "wxpay.corpid":
                "1688852500754167",
                "wxpay.vid":
                "1688852500754167",
            }

            for k, v in cookies.items():
                self.driver.add_cookie({"name": k, "value": v})
            self.driver.refresh()
        else:
            self.driver = driver

    def get_visible_element(self, locator, eqc=20) -> WebElement:
        '''
        定位元素,参数locator为元祖类型
        :param locator:
        :param eqc:
        :return:
        '''
        try:
            ele = WebDriverWait(self.driver, timeout=eqc).until(
                EC.visibility_of_element_located(locator))
            logger.info('获取{}元素成功'.format(locator))
            return ele
        except:
            logger.error("相对时间内没有定位到{}元素".format(locator))
            allure.attach(self.get_windows_img())

    def get_presence_element(self, locator, eqc=10):
        """
        定位一组元素
        :param locator:
        :param eqc:
        :return:
        """
        try:
            ele = WebDriverWait(self.driver, timeout=eqc).until(
                EC.presence_of_element_located(locator))
            logger.info('获取{}元素成功'.format(locator))
            return ele
        except:
            logger.error("相对时间内没有定位到{}元素".format(locator))
            allure.attach(self.get_windows_img())

    def get_clickable_element(self, locator, eqc=20):
        try:
            ele = WebDriverWait(self.driver, timeout=eqc).until(
                EC.element_to_be_clickable(locator))
            logger.info('获取{}元素成功'.format(locator))
            return ele
        except:
            logger.error("相对时间内没有定位到{}元素".format(locator))
            allure.attach(self.get_windows_img())

    def send_keys(self, locator, text):
        '''
        发送文本,清空后输入
        locator = ('id','xxx')
        element.send_keys(locator,text)
        '''

        element = self.get_visible_element(locator)
        element.clear()
        element.send_keys(text)
        logger.info('SendKeys %s in %s success.' % (text, locator))

    def is_text_in_element(self, locator, text, timeout=10):
        '''
        判断文本在元素里,没有元素返回false打印日志,定位到返回判断结果的布尔值
        result = driver.text_in_element(locator,text)
        '''

        try:
            result = WebDriverWait(self.driver, timeout, 1).until(
                EC.text_to_be_present_in_element(locator, text))
        except TimeoutException:
            logger.info('No location to the element.')
            allure.attach(self.get_windows_img())
            return False
        else:
            return result

    def is_text_in_value(self, locator, value, timeout=10):
        '''
        判断元素的value值,没定位到元素返回false,定位到返回判断结果布尔值
        result = dirver.text_to_be_present_in_element_value(locator,text)
        '''

        try:
            result = WebDriverWait(self.driver, timeout, 1).until(
                EC.text_to_be_present_in_element_value(locator, value))
        except TimeoutException:
            logger.info('No location to the element.')
            allure.attach(self.get_windows_img())
            return False
        else:
            return result

    def is_title(self, title, timeout=10):
        '''
        判断元素的title是否完全等于
        '''

        result = WebDriverWait(self.driver, timeout,
                               1).until(EC.title_is(title))
        return result

    def is_title_contains(self, title, timeout=10):
        '''
        判断元素的title是否包含
        '''

        result = WebDriverWait(self.driver, timeout,
                               1).until(EC.title_contains(title))
        return result

    def is_selected(self, locator, timeout=10):
        '''
        判断元素是否被选中
        '''

        result = WebDriverWait(self.driver, timeout, 1).until(
            EC.element_located_to_be_selected(locator))
        return result

    def is_selected_be(self, locator, selected=True, timeout=10):
        '''
        判断元素的状态是不是符合期望的状态,selected是期望的状态
        '''

        result = WebDriverWait(self.driver, timeout, 1).until(
            EC.element_located_selection_state_to_be(locator, selected))
        return result

    def is_alert_present(self, timeout=10):
        '''
        判断页面是否有alert,有的话返回alert,没有返回False
        '''

        result = WebDriverWait(self.driver, timeout,
                               1).until(EC.alert_is_present())
        return result

    def is_visibility(self, locator, timeout=10):
        '''
        元素可见,返回本身,不可见返回False
        '''

        result = WebDriverWait(self.driver, timeout, 1).until(
            EC.visibility_of_element_located(locator))
        return result

    def is_invisibility(self, locator, timeout=10):
        '''
        元素可见返回本身,不可见返回Ture,没有找到元素也返回Ture
        '''

        result = WebDriverWait(self.driver, timeout, 1).until(
            EC.invisibility_of_element_located(locator))
        return result

    def is_clickable(self, locator, timeout=10):
        '''
        元素可以点击is_enabled返回本身,不可点击返回False
        '''

        result = WebDriverWait(self.driver, timeout,
                               1).until(EC.element_to_be_clickable(locator))
        return result

    def is_located(self, locator, timeout=10):
        '''
        判断元素有没有被定位到(并不意味着可见),定位到返回element,没有定位到返回False
        '''

        result = WebDriverWait(self.driver, timeout, 1).until(
            EC.presence_of_all_elements_located(locator))
        return result

    def move_to_element(self, locator):
        '''
        鼠标悬停操作
        locator=('id','xxx')
        driver.move_to_element(locator)
        '''

        element = self.get_visible_element(locator)
        ActionChains(self.driver).move_to_element(element).perform()
        # logger.info('ActionChins move to %s' % locator)

    def back(self):
        self.driver.back()

        logger.info('back driver!')

    def forward(self):
        self.driver.forward()

        logger.info('forward driver!')

    def close(self):
        self.driver.close()

        logger.info('close driver!')

    def refresh(self):
        return self.driver.refresh()

    def get_title(self):
        '''
        获取title
        '''

        logger.info('git dirver title.')
        return self.driver.title()

    def get_text(self, locator):
        '''
        获取文本
        '''

        element = self.get_visible_element(locator)
        # logger.info('get text in %s' % locator)
        text = element.text
        return text

    def get_attribute(self, locator, name):
        '''
        获取属性
        '''

        element = self.get_visible_element(locator)
        logger.info('get attribute in %s' % locator)
        return element.get_attribute(name)

    def js_execute(self, js):
        '''
        执行js
        '''

        try:
            logger.info('Execute js.%s' % js)
            return self.driver.execute_script(js)
        except:
            allure.attach(self.get_windows_img())
            logger.info('failed to excute js')

    def js_focus_element(self, locator):
        '''
        聚焦元素
        '''

        target = self.get_visible_element(locator)
        self.driver.execute_script("arguments[0].scrollIntoView();", target)

    def js_scroll_top(self):
        '''
        滚动到顶部
        '''

        js = 'window.scrollTo(0,0)'
        self.js_execute(js)
        logger.info('Roll to the top!')

    def js_scroll_end(self):
        '''
        滚动到底部
        '''

        js = "window.scrollTo(0,document.body.scrollHight)"
        self.js_execute(js)
        logger.info('Roll to the end!')

    def get_windows_img(self):
        try:
            file_name = contants.screenshot_img
            self.driver.get_screenshot_as_file(file_name)
            logger.info(
                'Had take screenshot and save to folder:output/screenshots')
        except NameError as e:
            logger.info('Failed to take the screenshot!%s' % e)
            self.get_windows_img()
        return file_name

    def switch_window(self, name=None, fqc=20):
        """
        切换窗口,有name切换至该name的窗口,没有则切换最新
        :param name:
        :param fqc:
        :return:
        """
        if name is None:
            current_handle = self.driver.current_window_handle
            WebDriverWait(self.driver,
                          fqc).until(EC.new_window_is_opened(current_handle))
            handles = self.driver.window_handles
            return self.driver.switch_to.window(handles[-1])
        return self.driver.switch_to.window()
class Mangafreak():
    def __init__(self):
        self.browser = Chrome(executable_path="E:\chromedriver.exe")
        self.browser.get(MANGAFREAK_FP)
        if os.path.exists('angel.pkl'):
            sleep(1)
            cookies = pickle.load(open('angel.pkl', 'rb'))
            for cookie in cookies:
                if cookie.get('expiry', None) is not None:
                    cookie['expires'] = cookie.pop('expiry')
                self.browser.add_cookie(cookie)
            self.browser.get(MANGAFREAK_FP + "/jobs")
            sleep(5)
            self.browser.get(MANGAFREAK_FP + "/jobs")
        else:
            log_in_btn = self.browser.find_element_by_xpath(
                "/html/body/div/div/header/div/div[2]/a[3]")
            log_in_btn.click()
            sleep(1)

            email_box = self.browser.find_element_by_id(
                "user_email").send_keys("*****@*****.**")
            passwd_box = self.browser.find_element_by_id(
                "user_password").send_keys("angelico@job55")
            enter = self.browser.find_element_by_name("commit").click()
            cookies = self.browser.get_cookies()
            pickle.dump(cookies, open('angel.pkl', 'wb'))
            jobs_portal = self.browser.find_element_by_xpath(
                "/html/body/div/header/div/div[1]/nav/ul/li[3]/a")
            jobs_portal.click()

        sleep(5)
        if os.path.exists('templateReq.txt'):
            self.template = open('templateReq.txt', 'r').read()
            # print("Template -- ",self.template)
        self.apply_to_jobs()

    def apply_to_jobs(self):
        sleep(4)
        starting_index = 0
        for _ in range(2):
            job_search_results = self.browser.find_element_by_xpath(
                '//*[@id="main"]/div/div[5]/div[2]/div')
            startup_results = job_search_results.find_elements_by_tag_name(
                'div')
            startup_results = [
                element for element in startup_results
                if element.get_attribute('data-test') == 'StartupResult'
            ]
            print("Startup entries", len(startup_results))

            for startup in startup_results[starting_index:]:
                startup.location_once_scrolled_into_view
                company_listings = startup.find_elements_by_class_name(
                    'component_07bb9')
                print("GEtting here", len(company_listings))
                # company_listings = [ e for e in company_listings if e.get_attribute('class') == 'listing_4d13a']
                for position in company_listings:
                    sleep(1)
                    self.apply_to_a_single_job_listing(position)

            starting_index = len(startup_results)
            # print(dir(startup_results[0]))
            input("Ready for next round")

    def display_text(self, elementArray, banner):
        if len(elementArray) > 0:
            for entry in elementArray:
                print(banner, entry.text)

    def display_attribute(self, elementArray):
        for entry in elementArray:
            print(entry.get_attribute('class') + "\n")

    def apply_to_a_single_job_listing(self, element):
        #Get the apply button
        company_info_1 = element.find_elements_by_tag_name('a')
        company_info_2 = element.find_elements_by_tag_name('span')
        self.display_text(company_info_1, "Company Info 1 -- ")
        self.display_text(company_info_2[-1:], "Company Info 2 -- ")
        # return
        apply_box = element.find_element_by_class_name('box_1bc08')
        apply_button = apply_box.find_element_by_tag_name('button')
        if apply_button.text != 'Applied':
            apply_button.click()
        sleep(3)
        applicationModal = self.browser.find_element_by_class_name(
            "ReactModalPortal")
        sleep(0.2)
        h4tags = applicationModal.find_elements_by_tag_name('h4')
        # self.display_text(h4tags, "Contact Person -- ")
        print(h4tags[-1].text.split(" is "))
        contact_person = h4tags[-1].text.split(" is ")[1]
        writeNoteToContact = applicationModal.find_element_by_tag_name(
            'textarea')

        writeNoteToContact.send_keys("Hi {}, {}".format(
            contact_person, self.template))
        bts = applicationModal.find_elements_by_tag_name('button')
        for e in bts:
            if e.text == 'Cancel':
                e.click()
                break
Exemple #21
0
    'file:///D:/Users/%E4%BD%95%E6%97%BA%E5%BD%A4/PycharmProjects/heyang-/index.html'
)
'''
#iframe切换  1、name  2、索引  3、WebElement
driver.switch_to.frame('myiframe')#第一种方法
#先拿到WebElement
frame = driver.find_element_by_tag_name('iframe')
driver.switch_to.frame(frame)

#等待新的iframe可以用在进行切换
ec.frame_to_be_available_and_switch_to_it((By.TAG_NAME,'frame'))

#怎么切换回去初始的HTML内容
driver.switch_to.default_content()

#多个iframe,多个嵌套。切到父级
driver.switch_to.parent_frame()
'''

#alert切换
driver.find_element_by_name('click').click()  #先定位元素
alert = driver.switch_to.alert  #获取alert对象
alert.text  #获取文本内容
alert.accept()  #确认,返回原来的页面
alert.dismiss()  #取消,返回原来的页面

#扩展知识@property

driver.add_cookie()  #添加cookie
driver.get_cookie()  #获取cookie
Exemple #22
0
import venmoInfo
import datetime
import os

CHROME_DRIVER_PATH = 'C:\ChromeDriver\chromedriver.exe'
VENMO_URL = 'https://venmo.com/'

browser = Chrome(CHROME_DRIVER_PATH)
browser.get(VENMO_URL)

if os.path.isfile('cookies.pkl'):
    # there is a cookie file

    cookies = pickle.load(open("cookies.pkl", "rb"))
    for cookie in cookies:
        browser.add_cookie(cookie)

    # click on the sign in link
    signin_link = browser.find_element_by_link_text("Sign in")
    signin_link.click()

    # enter the email and password and send it
    username_box = browser.find_element_by_class_name("email-username-phone")
    username_box.send_keys(venmoInfo.my_u)
    password_box = browser.find_element_by_class_name("password")
    password_box.send_keys(venmoInfo.my_p)
    send_button = browser.find_element_by_class_name("login")
    send_button.click()

    # enter the person's name you want to pay
    time.sleep(5)
Exemple #23
0
class URLTracker:
    def __init__(self, watcher):
        self.wd = Chrome()
        self.watcher = watcher
        #self.load_cookie()

    def load_cookie(self):
        if exists('cookies'):
            with open('cookies', 'rb') as rf:
                self.cookies = load(rf)
                cookies = self.cookies
                for cookie in cookies:
                    self.wd.add_cookie(cookie)
                logger.info('Cookies load successfully.')

    def dump_cookie(self):
        self.cookies = self.wd.get_cookies()
        with open('cookies', 'wb') as wf:
            dump(self.cookies, wf)

    def login(self, email, password):
        wd = self.wd
        wd.find_element_by_name('email').send_keys(email)
        wd.find_element_by_name('password').send_keys(password)
        wd.find_elements_by_xpath('//button[@type = "submit"]')[0].click()

    def close_window(self):
        def target(wd):
            while True:
                ele = wd.find_elements_by_xpath(
                    '//button[@ng-click = "done()"]')
                if len(ele):
                    ele[0].click()
                sleep(3)

        th = Thread(target=target, args=(self.wd, ))
        #th.start()

    def start(self):
        wd = self.wd
        watcher = self.watcher
        while True:
            url = wd.current_url
            url_parts = url.split('/')

            if url_parts[-1] == 'project':
                #self.dump_cookie()

                logger.info('Waiting for entering a project')
                #wait for url change
                while True:
                    if wd.current_url != url:
                        sleep(0.1)
                        break
                    #do nothing

            if len(url_parts) >= 2 and url_parts[-2] == 'project':
                logger.info('A project found, start syncing.')
                self.close_window()
                #start watching
                watcher.start()

                #wait for url change
                while True:
                    if wd.current_url != url:
                        watcher.stop()
                        sleep(0.1)
                        break

            sleep(0.1)
    def set_chrome(self, **kwargs):
        """
        ----------------------------------------------------------------------------

        Configuration Method:
            1. EXECUTABLE,SERVICE ARGS and PORT
            2. DESIRED CAPABILITIES
            3. CHROME OPTIONS


        Command Line Switches:
            http://peter.sh/experiments/chromium-command-line-switches/

        Capabilities:
            https://sites.google.com/a/chromium.org/chromedriver/capabilities


        ----------------------------------------------------------------------------
        This function should be a class for webdriver.
        For now, just setting up Chrome.

        driver_browsers                     =   ['android',
                                                 'chrome',
                                                 'firefox',
                                                 'htmlunit',
                                                 'internet explorer',
                                                 'iPhone',
                                                 'iPad',
                                                 'opera',
                                                 'safari']

        """
        def set_defaults(self):
            default_settings = {
                'bin_path':
                '/usr/local/bin/chromedriver',
                'port':
                15010,
                'log_path':
                os_environ['BD'] + '/html/logs/chromedriver.log',
                'user-agent':
                "Mozilla/5.0 (Windows NT 5.1; rv:13.0) Gecko/20100101 Firefox/13.0.1",
                # 1 in 1788 per panopticlick !!
                'no_java':
                True,
                'no_plugins':
                True,
                'net-log-capture-mode':
                'IncludeCookiesAndCredentials',
                'log-level':
                0,
                'cookie_content': {},
                'capabilities': {
                    'acceptSslCerts': True,
                    'databaseEnabled': False,
                    'unexpectedAlertBehaviour': "accept",
                    'applicationCacheEnabled': False,
                    'webStorageEnabled': False,
                    'browserConnectionEnabled': False,
                    'locationContextEnabled': True,
                },
                'loggingPrefs': {
                    "driver": "ALL",
                    "server": "ALL",
                    "browser": "ALL"
                },
                'true_opts': [
                    'disable-core-animation-plugins',
                    'disable-plugins',
                    'disable-extensions',
                    'disable-plugins-discovery',
                    'disable-site-engagement-service',
                    'disable-text-input-focus-manager',
                    'enable-account-consistency',
                    'enable-devtools-experiments',
                    'enable-logging',
                    'enable-network-information',
                    'enable-net-benchmarking',
                    'enable-network-portal-notification',
                    'enable-strict-site-isolation',
                    'incognito',  # if incognito, extensions must be disabled
                    'log-net-log',
                    'scripts-require-action',
                    'system-developer-mode',
                    # 'use-mobile-user-agent',
                ],
                'false_opts': [
                    'enable-profiling',
                ],
            }
            excluded = [] if not (hasattr(self, 'T') and hasattr(
                self.T, 'excluded_defaults')) else self.T.excluded_defaults
            for k, v in default_settings.iteritems():
                if excluded.count(k):
                    if T.has_key(k):
                        del T[k]
                else:
                    T.update({k: v})
            return T

        def set_desired_capabilities(self):
            from selenium.webdriver import DesiredCapabilities
            dc = DesiredCapabilities.CHROME.copy()
            platforms = [
                'WINDOWS', 'XP', 'VISTA', 'MAC', 'LINUX', 'UNIX', 'ANDROID',
                'ANY'
            ]

            # -PROXY OBJECT
            # from selenium.webdriver import Proxy

            # -READ-WRITE CAPABILITIES
            rw_capabilities = [
                'acceptSslCerts',  # boolean unless specified
                'javascriptEnabled',
                'databaseEnabled',
                'proxy',  # Proxy Object
                'unexpectedAlertBehaviour',  # string {"accept", "dismiss", "ignore"}
                'applicationCacheEnabled',
                'webStorageEnabled',
                'rotatable',
                'browserConnectionEnabled',
                'locationContextEnabled',
                'elementScrollBehavior',  # int (align with the top (0) or bottom (1) of the viewport)
                'nativeEvents'
            ]

            assert T.has_key('capabilities')
            for it in rw_capabilities:
                if T['capabilities'].has_key(it):
                    dc[it] = str(T['capabilities'][it])

            # -loggingPrefs                         OBJECT (dict)
            #   "OFF",  "SEVERE", "WARNING",
            #   "INFO", "CONFIG", "FINE",
            #   "FINER","FINEST", "ALL"

            if T.has_key('loggingPrefs'):
                dc[it] = T['loggingPrefs']

            return dc

        def set_profile(self):
            profile                         =   {#"download.default_directory"       :   "C:\\SeleniumTests\\PDF",
                                                 "download.prompt_for_download"     :   False,
                                                 "download.directory_upgrade"       :   True,
                                                 "plugins.plugins_disabled"         :   ["Chromoting Viewer",
                                                                                         "Chromium PDF Viewer"],
                                                                                         }
            opts.add_experimental_option("prefs", profile)

        def set_performance_logging(self):
            perfLogging = {
                "enableNetwork": True,
                "enablePage": True,
                "enableTimeline": True,
                #"tracingCategories":<string>,
                "bufferUsageReportingInterval": 1000
            }

            opts.add_experimental_option("perfLoggingPrefs", perfLogging)

        def set_chrome_options(self):
            from selenium.webdriver import ChromeOptions
            opts = ChromeOptions()

            ### Add Boolean Arguments
            if T.has_key('true_opts'):
                for it in T['true_opts']:
                    opts.add_argument('%s=1' % it)
            if T.has_key('false_opts'):
                for it in T['false_opts']:
                    opts.add_argument('%s=0' % it)

            value_opts = [
                'profile-directory',
                'log-level',  # 0 to 3: INFO = 0, WARNING = 1, LOG_ERROR = 2, LOG_FATAL = 3
                'net-log-capture-mode',  # "Default" "IncludeCookiesAndCredentials" "IncludeSocketBytes"'
                'register-font-files',  # might be windows only
                'remote-debugging-port',
                'user-agent',
                'user-data-dir',  # don't use b/c it negates no-extension options
            ]

            ### Add Value Arguments
            for it in value_opts:
                if T.has_key(it):
                    opts.add_argument('%s=%s' % (it, T[it]))

            ### OTHER CHROME OPTIONS NOT YET FULLY CONFIGURED

            # -extensions        list str
            # -localState        dict
            # -prefs             dict
            # set_profile()

            # -detach            bool
            # -debuggerAddress   str
            # -excludeSwitches   list str
            # -minidumpPath      str
            # -mobileEmulation   dict

            # -perfLoggingPrefs             OBJECT (dict)
            # set_performance_logging()

            return opts

        from selenium.webdriver import Chrome

        T = {}
        if kwargs:
            T.update(kwargs)
        if (hasattr(self, 'T') and hasattr(self.T, 'kwargs')):
            T.update(self.T.kwargs)

        # Cycle Through kwargs and Extract Configs
        if hasattr(self.T, 'id'):
            T.update(self.T.id.__dict__)

            if hasattr(self.T.id, 'details'):
                for k, v in self.T.id.details.__dict__.iteritems():
                    T.update({k.strip('_'): v})

            if hasattr(self.T.id, 'cookie'):
                if hasattr(self.T.id.cookie, 'content'):
                    T.update({'cookie_content': self.T.id.cookie.content})

        # Set Defaults if not provided
        if not T.has_key('defaults'):
            T = set_defaults(self)

        # Config Data Storage if Possible
        if T.has_key('SAVE_DIR'):
            T['user-data-dir'] = T['SAVE_DIR']
            T['profile-directory'] = 'Profile'
        if T.has_key('guid'):
            T['log_path'] = '%s/%s.log' % (T['SAVE_DIR'], T['guid'])

        # Configure with Special Profiles if Requested
        special_profiles = os_environ['BD'] + '/html/webdrivers/chrome/profiles'
        if T.has_key('no_java') and T['no_java']:
            if T.has_key('no_plugins') and T['no_plugins']:
                T['user-data-dir'] = special_profiles + '/no_java_no_plugins/'
                del T['profile-directory']
            else:
                T['user-data-dir'] = special_profiles + '/no_java/'
                del T['profile-directory']
        elif T.has_key('no_plugins') and T['no_plugins']:
            T['user-data-dir'] = special_profiles + '/no_plugins/'
            del T['profile-directory']

        # SERVICE ARGS          # ( somewhat documented in executable help, i.e., chromedriver --help )
        service_args = ["--verbose", "--log-path=%(log_path)s" % T]

        dc = set_desired_capabilities(self)
        opts = set_chrome_options(self)

        d = Chrome(executable_path=T['bin_path'],
                   port=T['port'],
                   service_args=service_args,
                   desired_capabilities=dc,
                   chrome_options=opts)
        d.set_window_size(1280, 720)
        if T['cookie_content']:
            d.add_cookie(T['cookie_content'])

        self.config_browser(d, kwargs)

        return d, T
Exemple #25
0
import venmoInfo
import datetime
import os

CHROME_DRIVER_PATH = 'C:\ChromeDriver\chromedriver.exe'
VENMO_URL = 'https://venmo.com/'

browser = Chrome(CHROME_DRIVER_PATH)
browser.get(VENMO_URL)

if os.path.isfile('cookies.pkl'):
    # there is a cookie file

    cookies = pickle.load(open("cookies.pkl", "rb"))
    for cookie in cookies:
        browser.add_cookie(cookie)

    # click on the sign in link
    signin_link = browser.find_element_by_link_text("Sign in")
    signin_link.click()

    # enter the email and password and send it
    username_box = browser.find_element_by_class_name("email-username-phone")
    username_box.send_keys(venmoInfo.my_u)
    password_box = browser.find_element_by_class_name("password")
    password_box.send_keys(venmoInfo.my_p)
    send_button = browser.find_element_by_class_name("login")
    send_button.click()

    # enter the person's name you want to pay
    time.sleep(5)
    def set_chrome(self,**kwargs):
        """
        ----------------------------------------------------------------------------

        Configuration Method:
            1. EXECUTABLE,SERVICE ARGS and PORT
            2. DESIRED CAPABILITIES
            3. CHROME OPTIONS


        Command Line Switches:
            http://peter.sh/experiments/chromium-command-line-switches/

        Capabilities:
            https://sites.google.com/a/chromium.org/chromedriver/capabilities


        ----------------------------------------------------------------------------
        This function should be a class for webdriver.
        For now, just setting up Chrome.

        driver_browsers                     =   ['android',
                                                 'chrome',
                                                 'firefox',
                                                 'htmlunit',
                                                 'internet explorer',
                                                 'iPhone',
                                                 'iPad',
                                                 'opera',
                                                 'safari']

        """

        def set_defaults(self):
            default_settings                =   {'bin_path'                             :   '/usr/local/bin/chromedriver',
                                                 'port'                                 :   15010,
                                                 'log_path'                             :   os_environ['BD'] + '/html/logs/chromedriver.log',
                                                 'user-agent'                           :   "Mozilla/5.0 (Windows NT 5.1; rv:13.0) Gecko/20100101 Firefox/13.0.1",
                                                 # 1 in 1788 per panopticlick !!
                                                 'no_java'                              :   True,
                                                 'no_plugins'                           :   True,
                                                 'net-log-capture-mode'                 :   'IncludeCookiesAndCredentials',
                                                 'log-level'                            :   0,
                                                 'cookie_content'                       :   {},
                                                 'capabilities'                         :
                                                     {  'acceptSslCerts'                :   True,
                                                        'databaseEnabled'               :   False,
                                                        'unexpectedAlertBehaviour'      :   "accept",
                                                        'applicationCacheEnabled'       :   False,
                                                        'webStorageEnabled'             :   False,
                                                        'browserConnectionEnabled'      :   False,
                                                        'locationContextEnabled'        :   True,
                                                        },
                                                 'loggingPrefs'                         :
                                                     {  "driver"                        :   "ALL",
                                                        "server"                        :   "ALL",
                                                        "browser"                       :   "ALL"},
                                                 'true_opts'                            :
                                                     [
                                                         'disable-core-animation-plugins',
                                                         'disable-plugins',
                                                         'disable-extensions',
                                                         'disable-plugins-discovery',
                                                         'disable-site-engagement-service',
                                                         'disable-text-input-focus-manager',

                                                         'enable-account-consistency',
                                                         'enable-devtools-experiments',
                                                         'enable-logging',
                                                         'enable-network-information',
                                                         'enable-net-benchmarking',
                                                         'enable-network-portal-notification',

                                                         'enable-strict-site-isolation',
                                                         'incognito',                           # if incognito, extensions must be disabled
                                                         'log-net-log',
                                                         'scripts-require-action',
                                                         'system-developer-mode',
                                                         # 'use-mobile-user-agent',
                                                     ],
                                                 'false_opts'                           :
                                                     [
                                                         'enable-profiling',
                                                         ],
                                                 }
            excluded                        =   [] if not (hasattr(self,'T') and hasattr(self.T,'excluded_defaults')) else self.T.excluded_defaults
            for k,v in default_settings.iteritems():
                if excluded.count(k):
                    if T.has_key(k):
                        del T[k]
                else:
                    T.update(                   {k                                      :   v})
            return T
        def set_desired_capabilities(self):
            from selenium.webdriver             import DesiredCapabilities
            dc                              =   DesiredCapabilities.CHROME.copy()
            platforms                       =   ['WINDOWS', 'XP', 'VISTA', 'MAC', 'LINUX', 'UNIX', 'ANDROID', 'ANY']

            # -PROXY OBJECT
            # from selenium.webdriver import Proxy

            # -READ-WRITE CAPABILITIES
            rw_capabilities                 =   [
                                                 'acceptSslCerts',              # boolean unless specified
                                                 'javascriptEnabled',
                                                 'databaseEnabled',
                                                 'proxy',                       # Proxy Object
                                                 'unexpectedAlertBehaviour',    # string {"accept", "dismiss", "ignore"}
                                                 'applicationCacheEnabled',
                                                 'webStorageEnabled',
                                                 'rotatable',
                                                 'browserConnectionEnabled',
                                                 'locationContextEnabled',
                                                 'elementScrollBehavior',       # int (align with the top (0) or bottom (1) of the viewport)
                                                 'nativeEvents'
                                                 ]

            assert T.has_key('capabilities')
            for it in rw_capabilities:
                if T['capabilities'].has_key(it):
                    dc[it]                  =   str(T['capabilities'][it])


            # -loggingPrefs                         OBJECT (dict)
            #   "OFF",  "SEVERE", "WARNING",
            #   "INFO", "CONFIG", "FINE",
            #   "FINER","FINEST", "ALL"

            if T.has_key('loggingPrefs'):
                dc[it]                      =   T['loggingPrefs']

            return dc
        def set_profile(self):
            profile                         =   {#"download.default_directory"       :   "C:\\SeleniumTests\\PDF",
                                                 "download.prompt_for_download"     :   False,
                                                 "download.directory_upgrade"       :   True,
                                                 "plugins.plugins_disabled"         :   ["Chromoting Viewer",
                                                                                         "Chromium PDF Viewer"],
                                                                                         }
            opts.add_experimental_option(       "prefs", profile)
        def set_performance_logging(self):
            perfLogging                     =   {
                                                 "enableNetwork"                    :   True,
                                                 "enablePage"                       :   True,
                                                 "enableTimeline"                   :   True,
                                                 #"tracingCategories":<string>,
                                                 "bufferUsageReportingInterval"     :   1000
                                                }

            opts.add_experimental_option(     "perfLoggingPrefs",perfLogging)
        def set_chrome_options(self):
            from selenium.webdriver             import ChromeOptions
            opts                            =   ChromeOptions()

            ### Add Boolean Arguments
            if T.has_key('true_opts'):
                for it in T['true_opts']:
                    opts.add_argument(          '%s=1' % it )
            if T.has_key('false_opts'):
                for it in T['false_opts']:
                    opts.add_argument(          '%s=0' % it )

            value_opts                      =   [
                                                 'profile-directory',
                                                 'log-level',                   # 0 to 3: INFO = 0, WARNING = 1, LOG_ERROR = 2, LOG_FATAL = 3
                                                 'net-log-capture-mode',        # "Default" "IncludeCookiesAndCredentials" "IncludeSocketBytes"'
                                                 'register-font-files',         # might be windows only
                                                 'remote-debugging-port',
                                                 'user-agent',
                                                 'user-data-dir',               # don't use b/c it negates no-extension options
                                                 ]

            ### Add Value Arguments
            for it in value_opts:
                if T.has_key(it):
                    opts.add_argument(           '%s=%s' % (it,T[it]) )

            ### OTHER CHROME OPTIONS NOT YET FULLY CONFIGURED

            # -extensions        list str
            # -localState        dict
            # -prefs             dict
            # set_profile()

            # -detach            bool
            # -debuggerAddress   str
            # -excludeSwitches   list str
            # -minidumpPath      str
            # -mobileEmulation   dict

            # -perfLoggingPrefs             OBJECT (dict)
            # set_performance_logging()

            return opts

        from selenium.webdriver             import Chrome

        T                                   =  {}
        if kwargs:
            T.update(                           kwargs)
        if (hasattr(self,'T') and hasattr(self.T,'kwargs')):
            T.update(                           self.T.kwargs)

        # Cycle Through kwargs and Extract Configs
        if hasattr(self.T,'id'):
            T.update(                           self.T.id.__dict__)

            if hasattr(self.T.id,'details'):
                for k,v in self.T.id.details.__dict__.iteritems():
                    T.update(                   { k.strip('_')                      :   v})

            if hasattr(self.T.id,'cookie'):
                if hasattr(self.T.id.cookie,'content'):
                    T.update(                   {'cookie_content'                   :   self.T.id.cookie.content})

        # Set Defaults if not provided
        if not T.has_key('defaults'):
            T                               =   set_defaults(self)

        # Config Data Storage if Possible
        if T.has_key('SAVE_DIR'):
            T['user-data-dir']              =   T['SAVE_DIR']
            T['profile-directory']          =   'Profile'
        if T.has_key('guid'):
            T['log_path']                   =   '%s/%s.log' % (T['SAVE_DIR'],T['guid'])

        # Configure with Special Profiles if Requested
        special_profiles                    =   os_environ['BD'] + '/html/webdrivers/chrome/profiles'
        if T.has_key('no_java') and T['no_java']:
            if T.has_key('no_plugins') and T['no_plugins']:
                T['user-data-dir']          =   special_profiles + '/no_java_no_plugins/'
                del T['profile-directory']
            else:
                T['user-data-dir']          =   special_profiles + '/no_java/'
                del T['profile-directory']
        elif T.has_key('no_plugins') and T['no_plugins']:
            T['user-data-dir']              =   special_profiles + '/no_plugins/'
            del T['profile-directory']



        # SERVICE ARGS          # ( somewhat documented in executable help, i.e., chromedriver --help )
        service_args                        =   ["--verbose",
                                                 "--log-path=%(log_path)s" % T]

        dc                                  =   set_desired_capabilities(self)
        opts                                =   set_chrome_options(self)

        d                                   =   Chrome(  executable_path        =   T['bin_path'],
                                                         port                   =   T['port'],
                                                         service_args           =   service_args,
                                                         desired_capabilities   =   dc,
                                                         chrome_options         =   opts)
        d.set_window_size(                      1280,720)
        if T['cookie_content']:
            d.add_cookie(                       T['cookie_content'])

        self.config_browser(                    d,kwargs)


        return d,T
Exemple #27
0
class FaucetCryptoBot:
    def __init__(self):

        self.debug = self._configParser()[5]
        self.proxy = self._configParser()[6]
        self.user_mail = self._configParser()[3]
        self.user_pswd = self._configParser()[4]
        self.driver_path = self._configParser()[1]
        self.browser_mode = self._configParser()[0]
        self.browser_binary_location = self._configParser()[2]

        self.log = Log()
        self.driver = Chrome(options=self._get_opts(),
                             executable_path=self.driver_path)
        self.dash_board_url = "https://faucetcrypto.com/dashboard"
        self.login_url = "https://faucetcrypto.com/login"
        self.banner = draw_banner()
        self.log.write_log(
            "browser", f"starting browser session: {self.driver.session_id}")
        self.main_window = self.driver.current_window_handle

    def _get_opts(self):

        opts = webdriver.chrome.options.Options()

        if self.browser_mode == "headless":
            opts.add_argument("--headless")
        if self.proxy != "":
            opts.add_argument("--proxy-server=%s" % self.proxy)

        opts.add_argument("--no-sandbox")
        opts.add_argument("--disable-dev-shm-usage")
        opts.binary_location = self.browser_binary_location
        opts.add_argument("--ignore-certificate-erors")
        opts.add_argument("window-size=1920,1080")
        opts.add_argument("start-maximized")
        # opts.add_argument("user-data-dir=" + USER_DATA_DIR)
        opts.add_argument("disable-infobars")
        opts.add_experimental_option("excludeSwitches",
                                     ["disable-popup-blocking"])
        opts.add_experimental_option("excludeSwitches", ["enable-automation"])
        opts.add_experimental_option("useAutomationExtension", False)

        return opts

    def _configParser(self):

        from configparser import ConfigParser

        config = ConfigParser()
        config.readfp(open(f"config.cfg"))

        browser_mode = config.get("Browser", "browser-mode")
        driver_path = config.get("Browser", "driver-path")
        browser_binary_location = config.get("Browser",
                                             "browser-binary-location")

        user_mail = config.get("User", "mail")
        user_pswd = config.get("User", "password")

        debug = config.getboolean("Misc", "debug")
        proxy = config.get("Misc", "proxy")

        return (
            browser_mode,
            driver_path,
            browser_binary_location,
            user_mail,
            user_pswd,
            debug,
            proxy,
        )

    def quit(self):
        self.driver.close()

    def sleep(self, mins):

        import time

        self.log.write_log("bot", self.log.blue_text(f"Sleeping for {mins}m"))
        time.sleep(60 * int(mins))

    def error_handler(self, msg):
        self.log.error_log(msg)

    def _click(self, element, msg="placeholder"):

        self.log.write_log(f"clicking on {msg}")
        self.driver.find_element_by_xpath(element).click()

    def _random_wait(self, t_min, t_max):

        import time
        import random

        random_time = random.randrange(t_min, t_max)
        self.log.write_log("bot", f"Waiting for {random_time} sec")
        time.sleep(random_time)

    def __switch_tab(self):

        self._random_wait(2, 4)
        visible_windows = self.driver.window_handles

        for window in visible_windows:
            if window != self.main_window:
                self.driver.switch_to.window(window)
                self.driver.close()
                self.driver.switch_to.window(self.main_window)

    def __get_xpath_elem(self, element):

        try:
            return self.driver.find_element_by_xpath(element)
        except Exception as e:

            if self.debug:
                self.log.write_log("warning", e)
            else:
                self.error_handler(e)
                pass

    def __check_main_reward_availability(self):

        if ("ready" in self.__get_xpath_elem(
                main_reward["main-reward-dash-link"]).text.lower()):
            return True
        else:
            return False

    def __captcha_check(self, captcha_block):

        if "good person" in self.__get_xpath_elem(captcha_block).text.lower():
            self.log.write_log("success", "Havent caught me yet")
            return True
        else:
            self.log.write_log("warning", "Oops looks like i'm caught")
            return False

    def _modal_handler(self):

        try:
            self._click(user["user-modal-close"], "modal")
            self._click(user["user-chat-close"], "chat")

        except Exception as e:
            pass

    def get_user_balance(self):

        if self.driver.current_url != self.dash_board_url:
            self.driver.get(self.dash_board_url)

        coin_balance = self.__get_xpath_elem(user["user-coin-balance"]).text
        btc_balance = self.__get_xpath_elem(user["user-btc-balance"]).text
        balance_msg = "User balance: " + self.log.yellow_text(coin_balance +
                                                              "/" +
                                                              btc_balance)
        self.log.write_log("bot", balance_msg)

    def get_user_level(self):

        user_level = self.__get_xpath_elem(user["user-level"]).text
        user_level_percent = self.__get_xpath_elem(
            user["user-level-percent"]).text
        level_msg = "User level: " + self.log.blue_text(user_level + "/" +
                                                        user_level_percent)
        self.log.write_log("bot", level_msg)

    def get_current_coin_rate(self):

        coin_rate = self.__get_xpath_elem(user["user-coin-rate"]).text
        coin_rate_msg = "Coin rate: " + self.log.yellow_text(coin_rate)
        self.log.write_log("bot", coin_rate_msg)

    def login_handler(self, remember=True, cookies=True):

        if self.driver.current_url == self.dash_board_url:
            pass

        else:
            self.driver.get(self.login_url)
            try:
                with open("cookies", "rb") as f:
                    cookies = pickle.load(f)
                    for cookie in cookies:
                        self.driver.add_cookie(cookie)
                self.driver.refresh()

            except Exception as e:

                user_email = self.__get_xpath_elem(
                    user["user-email-field"]).send_keys(self.user_mail)
                user_password = self.__get_xpath_elem(
                    user["user-password-field"]).send_keys(self.user_pswd)

                if remember:
                    user_remember_me = self._click(user["user-remember-me"])

                self._click(user["user-login-btn"])
                self._random_wait(3, 5)

                if cookies:
                    if self.driver.current_url == self.dash_board_url:
                        with open("cookies", "wb") as f:
                            pickle.dump(self.driver.get_cookies(), f)

    def get_main_reward(self):

        self.log.write_log("bot", self.log.green_text("MAIN REWARD"))
        if self.driver.current_url != self.dash_board_url:
            self.driver.get(self.dash_board_url)

        self._modal_handler()
        if not os.path.exists("cookies"):
            with open("cookies", "wb") as f:
                pickle.dump(self.driver.get_cookies(), f)

        try:
            if self.__check_main_reward_availability():
                self.log.write_log("success", "Main reward is available")

                self._click(main_reward["main-reward-dash-link"],
                            "main reward dash link")
                self._random_wait(3, 5)

                if self.__captcha_check(
                        main_reward["main-reward-captcha-block"]):
                    self._random_wait(16, 18)
                    self._click(main_reward["main-reward-claim-btn"],
                                "main reward claim button")

                    self.log.write_log("success", "Collected the main reward")
                    self._random_wait(3, 5)

            else:
                self.log.write_log("bot", "Main reward is not available")

        except Exception as e:

            if self.debug:
                self.log.write_log("warning", e)
            else:
                self.error_handler(e)
                pass

    def get_ptc_ads(self):

        self.log.write_log("bot", self.log.green_text("PTC ADS"))
        if self.driver.current_url != self.dash_board_url:
            self.driver.get(self.dash_board_url)

        self._click(ptc_ads["ptc-ads-dash-link"])
        self._random_wait(3, 5)

        total_ads_amount = self.__get_xpath_elem(
            ptc_ads["ptc-ads-total-amount"]).text
        total_ads_amount_msg = f"Total ads amount: {total_ads_amount}"
        self.log.write_log("bot", total_ads_amount_msg)

        completed_ads = self.__get_xpath_elem(
            ptc_ads["ptc-ads-completed-ads"]).text
        completed_ads_msg = f"Completed ads: {completed_ads}"
        self.log.write_log("bot", completed_ads_msg)

        available_ads = self.__get_xpath_elem(
            ptc_ads["ptc-ads-available-ads"]).text
        available_ads_msg = f"Available ads: {available_ads}"
        self.log.write_log("bot", available_ads_msg)

        earnable_coins = self.__get_xpath_elem(
            ptc_ads["ptc-ads-earnable-coins"]).text
        earnable_coins_msg = f"Earnable coins: {earnable_coins}"
        self.log.write_log("bot", earnable_coins_msg)

        if int(available_ads) > 0:
            for ad_div_block_no in range(0, int(available_ads) + 1):

                try:
                    ad_title = self.__get_xpath_elem(
                        ptc_ads["ptc-ads-title"]).text
                    ad_title_msg = f"Ad [{ad_div_block_no}] {ad_title}"
                    self.log.write_log("bot", ad_title_msg)

                    ad_comp_time = self.__get_xpath_elem(
                        ptc_ads["ptc-ads-completion-time"]).text[:2]
                    ad_comp_time_msg = f"Ad completion time: {ad_comp_time} sec"
                    self.log.write_log("bot", ad_comp_time_msg)

                    ad_rew_coin = self.__get_xpath_elem(
                        ptc_ads["ptc-ads-reward-coins"]).text
                    ad_rew_coin_msg = f"Ad reward: {ad_rew_coin} coins"
                    self.log.write_log("bot", ad_rew_coin_msg)

                    self._click(ptc_ads["ptc-ads-watch-button"])
                    self._random_wait(2, 4)

                    if self.__captcha_check(ptc_ads["ptc-ads-captcha-block"]):
                        self._random_wait(13, 16)
                        self._click(ptc_ads["ptc-ads-reward-claim-btn"])

                        self._random_wait(
                            int(ad_comp_time) + 5,
                            int(ad_comp_time) + 7)
                        self._click(ptc_ads["ptc-ads-continue-btn"])
                        self.__switch_tab()
                        self.log.write_log(
                            "success", f"Fininshed {ad_title} ad successfully")
                        self._random_wait(2, 4)

                except Exception as e:

                    if self.debug:
                        self.log.write_log("warning", e)
                    else:
                        self.error_handler(e)
                        pass

    def get_shortlink_ads(self):

        self.log.write_log("bot", self.log.green_text("SHORTLINK ADS"))
        if self.driver.current_url != self.dash_board_url:
            self.driver.get(self.dash_board_url)

        self._click(shortlinks["general"]["shortlinks-dash-link"])
        self._random_wait(3, 5)

        shortlinks_amount = self.__get_xpath_elem(
            shortlinks["general"]["shortlinks-amount"]).text
        shortlinks_amount_msg = f"Total shortlinks: {shortlinks_amount}"
        self.log.write_log("bot", shortlinks_amount_msg)

        shortlinks_completed = self.__get_xpath_elem(
            shortlinks["general"]["shortlinks-completed"]).text
        shortlinks_completed_msg = f"Completed shortlinks: {shortlinks_completed}"
        self.log.write_log("bot", shortlinks_completed_msg)
        shortlinks_available = self.__get_xpath_elem(
            shortlinks["general"]["shortlinks-available"]).text
        shortlinks_available_msg = f"Available shortlinks: {shortlinks_available}"
        self.log.write_log("bot", shortlinks_available_msg)

        shortlinks_earnable = self.__get_xpath_elem(
            shortlinks["general"]["shortlinks-earnable-coins"]).text
        shortlinks_earnable_msg = f"Total earnable coins: {shortlinks_earnable}"
        self.log.write_log("bot", shortlinks_earnable_msg)

        def switch(link):
            link = str(link).lower()

            def exe_io():
                view_count = self.__get_xpath_elem(
                    shortlinks["exe.io"]["shortlinks-view-count"]).text
                view_count_msg = f"View count: {link} [{view_count}]"
                self.log.write_log("bot", view_count_msg)

                reward_coin = self.__get_xpath_elem(
                    shortlinks["exe.io"]["shortlinks-reward-coin"]).text
                reward_coin_msg = f"Reward coins: {link} [{reward_coin}]"
                self.log.write_log("bot", reward_coin_msg)
                self._random_wait(5, 10)
                pass

            def fc_lc():
                view_count = self.__get_xpath_elem(
                    shortlinks["fc.lc"]["shortlinks-view-count"]).text
                view_count_msg = f"View count: {link} [{view_count}]"
                self.log.write_log("bot", view_count_msg)

                reward_coin = self.__get_xpath_elem(
                    shortlinks["fc.lc"]["shortlinks-reward-coin"]).text
                reward_coin_msg = f"Reward coins: {link} [{reward_coin}]"
                self.log.write_log("bot", reward_coin_msg)
                self._random_wait(5, 10)
                pass

            def sh_faucetcrypto_com():
                view_count = self.__get_xpath_elem(
                    shortlinks["sh.faucetcrypto.com"]
                    ["shortlinks-view-count"]).text
                view_count_msg = f"View count: {link} [{view_count}]"
                self.log.write_log("bot", view_count_msg)

                reward_coin = self.__get_xpath_elem(
                    shortlinks["sh.faucetcrypto.com"]
                    ["shortlinks-reward-coin"]).text
                reward_coin_msg = f"Reward coins: {link} [{reward_coin}]"
                self.log.write_log("bot", reward_coin_msg)

                self._click(
                    shortlinks["sh.faucetcrypto.com"]["shortlinks-claim-btn"])
                self._random_wait(15, 18)

                orig_url = self.driver.current_url
                self._click(
                    shortlinks["general"]["shortlinks-reward-claim-btn"])
                if self.driver.current_url == orig_url:
                    self._click(
                        shortlinks["general"]["shortlinks-reward-claim-btn"])
                self._random_wait(5, 7)

                try:
                    step_count = self.__get_xpath_elem(
                        faucet["faucet-current-step"]).text

                    for i in range(int(step_count[2])):
                        step_count_msg = f"Current step: {i+1}/{step_count[2]}"
                        self.log.write_log(
                            "bot",
                            self.log.yellow_text(
                                f"Current step count {step_count_msg}"),
                        )

                        self._random_wait(5, 7)
                        source = self.driver.execute_script("goto()")
                        self._random_wait(3, 5)
                    self.log.write_log("success",
                                       f"Fininshed shortlink successfully")

                except Exception as e:

                    if self.debug:
                        self.log.write_log("warning", e)
                    else:
                        self.error_handler(e)
                        pass

            def sh_faucet_gold():
                view_count = self.__get_xpath_elem(
                    shortlinks[link]["shortlinks-view-count"]).text
                view_count_msg = f"View count: {link} [{view_count}]"
                self.log.write_log(view_count_msg)

                reward_coin = self.__get_xpath_elem(
                    shortlinks[link]["shortlinks-reward-coin"]).text
                reward_coin_msg = f"Reward coins: {link} [{reward_coin}]"
                self.log.write_log(reward_coin_msg)

                self._click(
                    shortlinks["sh.faucet.gold"]["shortlinks-claim-btn"])
                self._random_wait(15, 18)

                orig_url = self.driver.current_url
                self._click(
                    shortlinks["general"]["shortlinks-reward-claim-btn"])
                if self.driver.current_url == orig_url:
                    self._click(
                        shortlinks["general"]["shortlinks-reward-claim-btn"])
                self._random_wait(5, 7)

                try:
                    step_count = self.__get_xpath_elem(
                        faucet["faucet-current-step"]).text

                    for i in range(int(step_count[2])):
                        step_count_msg = f"Current step: {i+1}/{step_count[2]}"
                        self.log.write_log(
                            "bot",
                            self.log.yellow_text(
                                f"Current step count {step_count_msg}"),
                        )

                        self._random_wait(5, 7)
                        source = self.driver.execute_script("goto()")
                        self._random_wait(3, 5)
                    self.log.write_log("success",
                                       f"Fininshed shortlink successfully")

                except Exception as e:

                    if self.debug:
                        self.log.write_log("warning", e)
                    else:
                        self.error_handler(e)
                        pass

            def sh_claim4_fun():
                view_count = self.__get_xpath_elem(
                    shortlinks[link]["shortlinks-view-count"]).text
                view_count_msg = f"View count: {link} [{view_count}]"
                self.log.write_log(view_count_msg)

                reward_coin = self.__get_xpath_elem(
                    shortlinks[link]["shortlinks-reward-coin"]).text
                reward_coin_msg = f"Reward coins: {link} [{reward_coin}]"
                self.log.write_log(reward_coin_msg)

                self._click(
                    shortlinks["sh.claim4.fun"]["shortlinks-claim-btn"])
                self._random_wait(15, 18)

                orig_url = self.driver.current_url
                self._click(
                    shortlinks["general"]["shortlinks-reward-claim-btn"])
                if self.driver.current_url == orig_url:
                    self._click(
                        shortlinks["general"]["shortlinks-reward-claim-btn"])
                self._random_wait(5, 7)

                try:
                    step_count = self.__get_xpath_elem(
                        faucet["faucet-current-step"]).text

                    for i in range(int(step_count[2])):
                        step_count_msg = f"Current step: {i+1}/{step_count[2]}"
                        self.log.write_log(
                            "bot",
                            self.log.yellow_text(
                                f"Current step count {step_count_msg}"),
                        )
                        self._random_wait(5, 7)
                        source = self.driver.execute_script("goto()")
                        self._random_wait(3, 5)
                    self.log.write_log("success",
                                       f"Fininshed shortlink successfully")

                except Exception as e:

                    if self.debug:
                        self.log.write_log("warning", e)
                    else:
                        self.error_handler(e)
                        pass

            def default():
                self.log.write_log("warning", "Invalid option")

            dict = {
                "exe.io": exe_io,
                "fc.lc": fc_lc,
                "sh.faucetcrypto.com": sh_faucetcrypto_com,
                "sh.faucet.gold": sh_faucet_gold,
                "sh.claim4.fun": sh_claim4_fun,
            }
            dict.get(link, default)()

        for links in shortlinks:
            if links.lower() == "general":
                continue

            try:
                view_count = self.__get_xpath_elem(
                    shortlinks[links]["shortlinks-view-count"]).text[0]
                if int(view_count) > 0:
                    self.log.write_log("bot",
                                       self.log.green_text(links.upper()))
                    switch(links)

            except Exception as e:

                if self.debug:
                    self.log.write_log("warning", e)
                else:
                    self.error_handler(e)
Exemple #28
0
class YouTube_Crawler:
    api_key = None
    kwonjun_api_key = None
    kyungsu_api_key = None
    is_driver = False
    IP = #IP
    database = #database
    user = #user
    password = #password

    def __init__(self, api_key=None):
        if api_key is not None:
            self.api_key = api_key

    def make_driver_ready(self):
        options = ChromeOptions()
        options.add_argument("--headless")
        options.add_argument("--no-sandbox")
        options.add_argument("--enable-automation")
        # options.headless = True
        options.add_argument("--disable-gpu")
        options.add_argument("--disable-features=VizDisplayCompositor")
        # options.add_argument('--disable-dev-shm-usage')
        # options.add_argument("disable-gpu")

        self.driver = Chrome(
            executable_path=r"/home/ubuntu/Crawler/chromedriver",
            # self.driver = Chrome(executable_path=r"chromedriver",
            options=options,
        )  # ,chrome_options=options
        self.driver.set_window_size(1920, 1080)
        self.driver.get("https://www.youtube.com/")
        self.driver.implicitly_wait(5)
        self.driver.delete_cookie("PREF")
        self.driver.add_cookie(
            {
                "domain": ".youtube.com",
                "httpOnly": False,
                "name": "PREF",
                "value": "gl=US&hl=en",
                "path": "/",
            }
        )
        self.driver.get("https://www.youtube.com/")
        self.driver.implicitly_wait(5)
        self.is_driver = True

    def pre_process_sql(self, text):
        # temp = bytearray(text.encode('UTF-8'))
        # temp.replace(b'\x00', b'')
        # temp = temp.decode('utf-8', 'ignore')
        # re.sub("\"", " ", temp)
        return re.sub("'", "''", text)

    def pre_process_comment(self, text):
        temp = bytearray(text.encode("UTF-8"))
        temp.replace(b"\x00", b"")
        text = temp.decode("utf-8", "ignore")
        # re.sub("\"", " ", temp)
        return re.sub("'", "''", text)

    def update_video_and_comment(self, video_id):
        if New_YouTube_Crawler_Comment.main(video_id):
            return True
        else:
            return False

    def update_channel_info(self, channel_id, api_set=0):
        if api_set == 0:
            api_key = self.api_key
        elif api_set == 1:
            api_key = self.kwonjun_api_key
        else:
            api_key = self.kyungsu_api_key

        try:
            time.sleep(0.2)
            url = f"""https://www.googleapis.com/youtube/v3/channels?part=statistics&maxResults=50&id={channel_id}&key={api_key}"""

            response = requests.get(url)
            if response.status_code != 200:
                # print("response error: ", url)
                return False

            result = response.json()
            item = dict(*result["items"])

            try:
                check = item["statistics"]["subscriberCount"]
            except:
                conn = pg2.connect(
                    database = self.database,
                    user = self.user,
                    password = self.password,
                    host = self.IP,
                    port = "5432",
                )
                conn.autocommit = False
                cur = conn.cursor()
                sql = f"""UPDATE channel SET hidden_subscriber = true WHERE channel_id = '{channel_id}';"""
                cur.execute(sql)
                conn.commit()
                conn.close()
                return True

            conn = pg2.connect(
                database = self.database,
                user = self.user,
                password = self.password,
                host = self.IP,
                port = "5432",
            )
            conn.autocommit = False
            cur = conn.cursor()

            sql = f"""INSERT INTO channel_subscriber (channel_idx, subscriber_num, check_time)
                    VALUES ((SELECT idx from channel where channel.channel_id='{channel_id}'), '{item['statistics']['subscriberCount']}', to_timestamp({time.time()}));
                    INSERT INTO channel_views (channel_idx, view_count, check_time)
                    VALUES ((SELECT idx from channel where channel.channel_id='{channel_id}'), '{item['statistics']['viewCount']}', to_timestamp({time.time()}));"""

            cur.execute(sql)
            conn.commit()
            conn.close()

            return True
        except Exception as e:
            # print(traceback.format_exc())
            # print("ERROR", e)
            return False

    def insert_channel_info(self, channel_id):
        try:
            url = f"""https://www.googleapis.com/youtube/v3/channels?part=id,snippet,contentDetails,statistics,topicDetails&maxResults=50&id={channel_id}&key={self.api_key}"""

            response = requests.get(url)
            if response.status_code != 200:
                # print("response error: ", url)
                return False

            result = response.json()
            item = dict(*result["items"])

            conn = pg2.connect(
                database = database,
                user = user,
                password = password,
                host = self.IP,
                port="5432",
            )
            conn.autocommit = False
            cur = conn.cursor()

            sql = f"""UPDATE channel
                        SET channel_name        = '{self.pre_process_sql(item['snippet']["title"])}',
                            channel_description = '{self.pre_process_sql(item['snippet']['description'])}',
                            channel_start_date  = to_date('{item['snippet']['publishedAt']}', 'YYYY-MM-DD'),
                            upload_id = '{item['contentDetails']['relatedPlaylists']['uploads']}',
                            hidden_subscriber = {item['statistics']['hiddenSubscriberCount']},
                            thumbnail_url = '{item['snippet']['thumbnails']['default']['url']}'
                        WHERE channel_id = '{channel_id}';
        
                    INSERT INTO channel_subscriber (channel_idx, subscriber_num, check_time)
                    VALUES ((SELECT idx from channel where channel.channel_id='{channel_id}'), '{item['statistics']['subscriberCount']}', to_timestamp({time.time()}));"""

            cur.execute(sql)
            conn.commit()
            conn.close()

            return True
        except Exception as e:
            # print(traceback.format_exc())
            # print("ERROR", e)
            return False

    def update_video_info(self, upload_id, interval_day=30, api_set=0):
        if api_set == 0:
            api_key = self.api_key
        elif api_set == 1:
            api_key = self.kwonjun_api_key
        else:
            api_key = self.kyungsu_api_key

        try:
            next_page_token = None
            keep_going = True

            conn = pg2.connect(
                database = self.database,
                user = self.user,
                password = self.password,
                host = self.IP,
                port = "5432",
            )
            conn.autocommit = False
            cur = conn.cursor()

            while keep_going:
                if next_page_token is None:
                    url = f"""https://www.googleapis.com/youtube/v3/playlistItems?part=id,snippet,contentDetails,status&maxResults=50&playlistId={upload_id}&key={api_key}"""
                else:
                    url = f"""https://www.googleapis.com/youtube/v3/playlistItems?part=id,snippet,contentDetails,status&maxResults=50&pageToken={next_page_token}&playlistId={upload_id}&key={api_key}"""

                response = requests.get(url)
                if response.status_code != 200:
                    pass
                    # # print("response error: ", url)
                result = response.json()

                try:
                    next_page_token = result["nextPageToken"]
                except:
                    next_page_token = None
                    keep_going = False

                for items in result["items"]:
                    item = dict(items)

                    try:
                        upload_time = time.strptime(
                            item["contentDetails"]["videoPublishedAt"], "%Y-%m-%dT%H:%M:%SZ",
                        )
                    except:
                        upload_time = time.strptime(
                            item["snippet"]["publishedAt"], "%Y-%m-%dT%H:%M:%SZ"
                        )
                        # # print(upload_time)
                        sql = f"""INSERT INTO video (channel_idx, video_id, upload_time, status)
                                VALUES ((SELECT idx from channel where upload_id = '{upload_id}'),
                                        '{item['contentDetails']['videoId']}', to_timestamp('{item['snippet']['publishedAt']}', 'YYYY-MM-DDTHH24:MI:SSZ'), FALSE)
                                ON CONFLICT DO NOTHING;"""
                        cur.execute(sql)
                        # # print("Disabled Video", item["contentDetails"]["videoId"])
                        if (time.mktime(time.localtime()) - time.mktime(upload_time)) / (
                            60 * 60 * 24
                        ) <= interval_day:
                            pass
                        else:
                            keep_going = False
                            break
                        continue

                    # 90일 이내의 영상
                    # 2020-07-31T12:05:06Z
                    if (time.mktime(time.localtime()) - time.mktime(upload_time)) / (
                        60 * 60 * 24
                    ) <= interval_day:
                        sql = f"""SELECT insert_video('{self.pre_process_sql(item['snippet']['title'])}', '{self.pre_process_sql(item['snippet']['description'])}', 
                                        '{item['contentDetails']['videoId']}', '{item['contentDetails']['videoPublishedAt']}', 
                                        '{upload_id}', '{item['snippet']['thumbnails']['high']['url']}')"""
                        cur.execute(sql)
                        success = cur.fetchone()[0]
                        if not success:
                            keep_going = False
                            break
                    else:
                        keep_going = False
                        break

            conn.commit()
            conn.close()

            return True
        except Exception as e:
            # print(traceback.format_exc())
            # print("ERROR", e)

            return False

    def __del__(self):
        if self.is_driver:
            self.driver.close()
Exemple #29
0
def start_browser_and_fetch(website, args):
    # returns None if access is not authorized in robots.txt
    opts = Options()
    if not args.headful:
        opts.headless = True
        assert opts.headless  # Operating in headless mode
    if args.override_cmp:
        opts.add_extension('./extensions/override_cmp.crx')
    if args.cookie_glasses:
        opts.add_extension('./extensions/cookie_glasses.crx')
    if args.override_cmp_monitor_postmessages:
        opts.add_extension(
            './extensions/override_cmp_monitor_postmessages.crx')
    if args.monitor_postmessages:
        opts.add_extension('./extensions/monitor_postmessages.crx')
    if args.watch_requests:
        opts.add_extension('./extensions/watch_requests.crx')
    if args.get_euconsent:
        opts.add_extension('./extensions/get_euconsent.crx')
    if args.probe_cmp_postmessage:
        opts.add_extension('./extensions/probe_cmp_postmessage.crx')
    # enable browser logging
    d = DesiredCapabilities.CHROME
    d['goog:loggingPrefs'] = {'browser': 'ALL'}
    browser = Chrome(options=opts, desired_capabilities=d)

    if not args.ignore_robots_txt and not website.robot_txt_ban == False:  # ignore, or already checked
        print("Checking robots.txt...")
        access_allowed = check_robots_txt_authorization(browser, website)
        if not access_allowed:
            website.robot_txt_ban = True
            if not args.bypass_robots_txt:
                quit_properly(browser)
                return None
        else:
            website.robot_txt_ban = False
        if website.access_successful == False:
            # server access failed when checking robots.txt
            quit_properly(browser)
            return None

    browser.set_window_size(
        1366, 768
    )  # most common display https://www.w3schools.com/browsers/browsers_display.asp
    if args.add_shared_cookie:
        # loading a site is necessary to be able to set a cookie
        # see https://github.com/w3c/webdriver/issues/1238
        browser.get('https://perdu.com')
        browser.add_cookie({
            'name': 'euconsent',
            'value': CONSENT_STRING_SENSCRITIQUE,
            'domain': '.consensu.org',
            'path': '/'
        })
        print('cookie added')
    if args.no_fetch:
        time.sleep(3600)

    browser.set_page_load_timeout(TIMEOUT)
    for i in range(MAX_TRIES_TIMEOUT):
        try:
            browser.get(website.main_page_url)
            return browser
        except TimeoutException:
            print("Website timed out.")
    quit_properly(browser)
    website.access_successful = False
    return None
Exemple #30
0
class LagoucrawlerDownloaderMiddleware(object):
    # Not all methods need to be defined. If a method is not defined,
    # scrapy acts as if the downloader middleware does not modify the
    # passed objects.

    def __init__(self, username, password, city, job_keywords):
        # 用户名
        self.username = username
        # 用户密码
        self.password = password
        # 选择城市
        self.city = city
        # 搜索关键字
        self.job_keywords = job_keywords
        # Chrome浏览器初始化
        self.brower = Chrome()
        # Chrome浏览器窗口最大化
        self.brower.maximize_window()
        # Chrome浏览器等待加载超时时间
        self.wait = WebDriverWait(self.brower, 5)

    @classmethod
    def from_crawler(cls, crawler):
        """
        从setting.py文件提取出用户名、用户密码、搜索城市和搜索职位
        :param crawler:
        :return:
        """
        return cls(username=crawler.settings.get('USERNAME'),
                   password=crawler.settings.get('PASSWORD'),
                   city=crawler.settings.get('CITY'),
                   job_keywords=crawler.settings.get('JOB_KEYWORDS'))

    def is_logined(self, request, spider):
        """
        初始请求时,总会弹出切换城市的窗口,所以先关掉它,然后通过判断右上角是否显示
        用户名判断是否为登陆状态,并初始化整个程序的brower实例
        :param request: 初始请求request,其meta包含index_page属性
        :param spider:
        :return: 已经登陆返回True, 否则返回False
        """
        self.brower.get(request.url)
        try:
            # 关掉城市选择窗口
            box_close = self.wait.until(
                EC.element_to_be_clickable((By.XPATH, '//*[@id="cboxClose"]')))
            box_close.click()
            # 获取右上角的登录状态
            login_status = self.wait.until(
                EC.presence_of_element_located(
                    (By.XPATH, '//*[@id="lg_tbar"]/div/ul/li[1]/a')))
            # 若右上角显示为登陆,则说明用户还没有登陆
            if login_status.text == '登录':
                return False
            else:
                return True
        except TimeoutException as e:
            # 二次请求,不会出现地址框,需要重新设计
            spider.logger.info('Locate Username Element Failed:%s' % e.msg)
            return False

    def login_lagou(self, spider):
        """
        用selenium模拟登陆流程,并将登陆成功后的cookies保存为本地文件。
        :param spider:
        :return:
        """
        try:
            # 设置等待时间,否则会出现登陆元素查找不到的异常
            time.sleep(2)
            # 点击进入登录页面
            login_status = self.wait.until(
                EC.presence_of_element_located((
                    By.XPATH,
                    '//*[@id="loginToolBar"]//a[@class="button bar_login passport_login_pop"]'
                )))
            login_status.click()
            # 输入用户名
            username = self.wait.until(
                EC.visibility_of_element_located(
                    (By.XPATH, '//*[@data-propertyname="username"]/input')))
            username.send_keys(self.username)
            # 输入用户密码
            password = self.wait.until(
                EC.visibility_of_element_located(
                    (By.XPATH, '//*[@data-propertyname="password"]/input')))
            password.send_keys(self.password)
            # 点击登陆按钮
            submit_button = self.wait.until(
                EC.visibility_of_element_located(
                    (By.XPATH, '//*[@data-propertyname="submit"]/input')))
            submit_button.click()
            # time.sleep(1)
            # 获取登录成功后的cookies
            cookies = self.brower.get_cookies()
            # 保存登陆后的cookies
            self.save_cookies(cookies)
        except TimeoutException as e:
            spider.logger.info('Locate Login Element Failed: %s' % e.msg)

    @staticmethod
    def save_cookies(cookies):
        """
        登陆成功后,将cookie保存为本地文件,供下次程序运行或者以后使用
        :param cookies:
        :return:
        """
        path = os.getcwd() + '/cookies/'
        if not os.path.exists(path):
            os.mkdir(path)
        with open(path + 'lagou.txt', 'w') as f:
            f.write(json.dumps(cookies))

    def fetch_index_page(self, request, spider):
        """
        该函数使用selenium完成城市切换,搜索关键字输入并点击搜索按钮操作。如果点击搜索按钮后,
        页面没有成功跳转,则会因为149行的代码,抛出NoSuchElementException,而在load_cookies()
        函数报一个NoneType没有get_cookies()的错误。原因是response是空的。
        :param request:
        :param spider:
        :return:
        """
        try:
            # 判断是否需要切换城市
            city_location = self.wait.until(
                EC.presence_of_element_located(
                    (By.XPATH, '//*[@id="lg_tnav"]/div/div/div/strong')))
            if city_location.text != self.city:
                time.sleep(1)
                city_change = self.wait.until(
                    EC.presence_of_element_located(
                        (By.XPATH, '//*[@id="changeCity_btn"]')))
                city_change.click()
                # 根据搜索城市定位到相应元素并点击切换
                # time.sleep(1)
                city_choice = self.wait.until(
                    EC.presence_of_element_located((By.LINK_TEXT, self.city)))
                city_choice.click()
            time.sleep(1)
            # 定位关键字输入框并输入关键字
            keywords_input = self.wait.until(
                EC.presence_of_element_located(
                    (By.XPATH, '//*[@id="search_input"]')))
            keywords_input.send_keys(self.job_keywords)
            # time.sleep(1)
            # 定位搜索按钮并点击,有时候点击后页面不会发生跳转,原因是被重定向了。
            keywords_submit = self.wait.until(
                EC.element_to_be_clickable(
                    (By.XPATH, '//*[@id="search_button"]')))
            keywords_submit.click()
            # 跳转到列表页等待待抓取的内容元素加载完成,如果被重定向,则跳转不到该页面,会报NoSuchElementException
            self.wait.until(
                EC.visibility_of_all_elements_located(
                    (By.XPATH, '//*[@id="s_position_list"]')))
            pagenumber = self.wait.until(
                EC.presence_of_element_located((
                    By.XPATH,
                    '//*[@id="s_position_list"]/div[@class="item_con_pager"]/div/span[@class="pager_next "]/preceding-sibling::span[1]'
                )))
            # 获取一共有多少页,供通过response传递到parse_detail函数,进行后续的翻页解析使用
            request.meta['pagenumber'] = pagenumber.text
            # 将brower和wait通过response传递到parse_detail函数,进行后续的翻页解析使用
            request.meta['brower'] = self.brower
            request.meta['wait'] = self.wait
            body = self.brower.page_source
            # 返回初始搜索页面,在parse_detail函数中进行相关信息的解析
            response = HtmlResponse(url=self.brower.current_url,
                                    body=body,
                                    encoding='utf-8',
                                    request=request)
            return response
        except TimeoutException:
            spider.logger.info(
                'Locate Index Element Failed And Use Proxy Request Again')
            # except NoSuchElementException:
            # 如果捕捉到该异常,说明页面被重定向了,没有正常跳转,重新请求输入关键字页面
            return request

    def load_cookies(self, path):
        """
        加载本地cookies文件,实现免登录访问
        :param path: 本地cookies文件路径
        :return:
        """
        with open(path, 'r') as f:
            cookies = json.loads(f.read())
            for cookie in cookies:
                cookies_dict = {
                    'name': cookie['name'],
                    'value': cookie['value']
                }
                self.brower.add_cookie(cookies_dict)

    def process_request(self, request, spider):
        """
        middleware的核心函数,每个request都会经过该函数。此函数过滤出初始request和详情页request,
        对于初始request进行验证登陆、cookies等一系列操作,然后将最后获取到的索引页response返回,对
        于详情页的request则,不做任何处理。
        :param request:
        :param spider:
        :return:
        """
        # 过滤出初始的登陆、切换索引页的request
        if 'index_flag' in request.meta.keys():
            # 判断是否为登陆状态,若未登陆则判断是否有cookies文件存在
            if not self.is_logined(request, spider):
                path = os.getcwd() + '/cookies/lagou.txt'
                # 若cookies文件存在,则加载cookie文件,否则进行登陆操作
                if os.path.exists(path):
                    self.load_cookies(path)
                else:
                    # 登陆lagou网
                    self.login_lagou(spider)
                # 登陆成功后的索引页的响应体,若不登录,请求响应提详情页面的url时,会重定向到登陆页面
                response = self.fetch_index_page(request, spider)
                return response
Exemple #31
0
from selenium.webdriver import Chrome
import time

url = 'https://developer.huawei.com/consumer/cn/service/apcs/app/home.html'
ck = 'apppromote_lang=cn; APCS_AT="CFwH17cENO7L4jUd/y7MlRsHFBzkKRUjo8iCuQtgoNlRvUPthwzltcQTH+4mZ0fCPSFCRFk+s4SwszZ9RoTH5//Upk96HCea9DrxHLarEOc5gYVlXtw="; SITE_ID=1'
ck = [{
    'name': x.split('=')[0],
    'value': x.split('=')[1]
} for x in ck.split('; ')]
d = Chrome()
d.get(url)
for c in ck:
    d.add_cookie(c)

d.get(url)
time.sleep(60)