def create_driver(self):

        if DEFAULT_BROWSER_TYPE.lower() == "selenium-chrome":
            capabilities = webdriver.DesiredCapabilities.CHROME
        elif DEFAULT_BROWSER_TYPE.lower() == "selenium-firefox":
            capabilities = webdriver.DesiredCapabilities.FIREFOX
        elif DEFAULT_BROWSER_TYPE.lower() == "selenium-htmlunit":
            capabilities = webdriver.DesiredCapabilities.HTMLUNIT
        elif DEFAULT_BROWSER_TYPE.lower() == "selenium-htmlunitwithjs":
            capabilities = webdriver.DesiredCapabilities.HTMLUNITWITHJS
        else:
            raise NotImplementedError()

        proxy_address = self.headers.get("proxy")
        if proxy_address:
            proxy = Proxy({
                'proxyType': ProxyType.MANUAL,
                'httpProxy': proxy_address,
                'ftpProxy': proxy_address,
                'sslProxy': proxy_address,
                'noProxy': ''
            })
        else:
            proxy = None

        options = Options()
        options.add_argument('--no-sandbox')
        options.add_argument('--disable-dev-shm-usage')
        options.add_argument("--start-fullscreen")
        """
        # optional arguments 
        # options.add_argument("--window-position=0,0")
        # options.add_argument('--headless')
        """
        options.add_experimental_option('useAutomationExtension', False)
        options.add_experimental_option("excludeSwitches",
                                        ["enable-automation"])

        user_agent = self.headers.get("user-agent")
        if user_agent:
            options.add_argument("user-agent={}".format(user_agent))

        driver = webdriver.Remote(command_executor='{}/wd/hub'.format(
            self.browser_settings.selenium_host),
                                  desired_capabilities=capabilities,
                                  proxy=proxy,
                                  options=options)
        return driver
Exemple #2
0
def login_test():
    """
    测试登录
    """
    try:
        prox = Proxy()
        prox.proxy_type = ProxyType.MANUAL
        prox.http_proxy = "10.211.55.4:808"
        capabilities = webdriver.DesiredCapabilities.CHROME
        prox.add_to_capabilities(capabilities)
        chrome_options = webdriver.ChromeOptions()
        #chrome_options.add_argument('--headless')
        chrome_options.add_argument('--disable-gpu')
        chrome_options.add_argument(
            '--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'
        )
        chrome_options.add_argument('--window-size=1024,768')
        driver = webdriver.Chrome(
            './chromedriver',
            chrome_options=chrome_options,
            service_args=["--verbose", "--log-path=driver.log"],
            desired_capabilities=capabilities)
        driver.get(conf.url)
        name = driver.find_element_by_xpath('//*[@id="username"]')
        name.send_keys(conf.username)
        password = driver.find_element_by_xpath('//*[@id="pwd"]')
        password.send_keys(conf.password)
        login = driver.find_element_by_xpath('//*[@id="loginform"]/button')
        login.click()
        #driver.get_screenshot_as_file("./tmp.png")
        if not isinstance(driver.get_cookies(), list):
            conf.logger.error("cookie is not list")
            exit(-1)
        has_right_sid = False
        for item in driver.get_cookies():
            if isinstance(item, dict) and item.has_key('name') and item[
                    'name'] == 'JSESSIONID' and item.has_key("value") and len(
                        item['value']) == 32:
                has_right_sid = True
        if not has_right_sid:
            conf.logger.error("jsessionid is wrong: {}".format(
                json.dumps(driver.get_cookies())))
            exit(-1)
        conf.logger.info("LOGIN TEST PASS")
        return driver
    except Exception as e:
        conf.logger.exception(e)
        exit(-1)
Exemple #3
0
def selenium_getter():

    if use_proxy == 1:
        randomproxy = getgoodproxy()
        myproxy = randomproxy

        proxy = Proxy({
            'proxyType': ProxyType.MANUAL,
            'httpProxy': myproxy,
            'ftpProxy': myproxy,
            'sslProxy': myproxy,
            'noProxy': ''
        })
        print(f"{TerminalColors.ENDC}using proxy {myproxy}")
        binary = FirefoxBinary(
            "C:\\Program Files\\Mozilla Firefox\\firefox.exe")

        capabilities = webdriver.DesiredCapabilities.FIREFOX
        proxy.add_to_capabilities(capabilities)

        options = Options()
        options.headless = True
        profile = webdriver.FirefoxProfile()
        profile.accept_untrusted_certs = True
        profile.set_preference("browser.privatebrowsing.autostart", True)

        driver = webdriver.Firefox(options=options,
                                   desired_capabilities=capabilities,
                                   firefox_profile=profile,
                                   firefox_binary=binary,
                                   executable_path="./geckodriver.exe")
        driver.implicitly_wait(5)
    else:
        binary = FirefoxBinary(
            "C:\\Program Files\\Mozilla Firefox\\firefox.exe")

        options = Options()
        options.headless = True
        profile = webdriver.FirefoxProfile()
        profile.accept_untrusted_certs = True
        profile.set_preference("browser.privatebrowsing.autostart", True)

        driver = webdriver.Firefox(options=options,
                                   firefox_profile=profile,
                                   firefox_binary=binary,
                                   executable_path="./geckodriver.exe")

    return driver
    def __activate_proxy(self, use_proxy):

        if use_proxy:

            # Initialize the proxy
            proxy = Proxy()
            proxy.proxyType = ProxyType.MANUAL
            proxy.autodetect = False

            # Load a random proxy
            self.proxy = choice(PROXIES)
            proxy.httpProxy = proxy.sslProxy = proxy.socksProxy = self.proxy

            # Apply it to self.options
            self.options.Proxy = proxy
            self.options.add_argument("ignore-certificate-errors")
Exemple #5
0
    def __init__(self, use_proxy):
        Scholarly.__init__(self, use_proxy)

        if use_proxy:
            self._session = webdriver.Firefox(
                proxy=Proxy({
                    "proxyType": ProxyType.MANUAL,
                    "httpProxy": "socks5://{0}".format(_PROXY),
                    "httpsProxy": "socks5://{0}".format(_PROXY),
                    "socksProxy": "socks5://{0}".format(_PROXY),
                    "sslProxy": "socks5://{0}".format(_PROXY),
                    "ftpProxy": "socks5://{0}".format(_PROXY),
                    "noProxy": ""
                }))
        else:
            self._session = webdriver.Firefox()
Exemple #6
0
    def __init__(self,
                 geckodriver_path=None,
                 extra_script_file=None,
                 proxy=None):
        super(ProxyFirefoxRequestMiddleware,
              self).__init__(geckodriver_path, extra_script_file)
        if not proxy:
            raise Exception('proxy setting is invalid')

        self.proxy = Proxy({
            'proxyType': ProxyType.MANUAL,
            'httpProxy': proxy,
            'ftpProxy': proxy,
            'sslProxy': proxy,
            'noProxy': ''
        })
Exemple #7
0
    def proxy(self):
        """
        Create proxy settings for our Firefox profile.
        :return: Proxy
        """
        proxy_url = '{0}:{1}'.format(self.proxy_address, self.proxy_port)

        p = Proxy({
            'proxyType': ProxyType.MANUAL,
            'httpProxy': proxy_url,
            'ftpProxy': proxy_url,
            'sslProxy': proxy_url,
            'noProxy': 'localhost, 127.0.0.1'
        })

        return p
Exemple #8
0
def get_driver_proxy_capabilities(proxy: str):
    """
    Use a single proxy directly from the browser
    :param proxy:
    :return:
    """

    proxy = Proxy()
    proxy.proxy_type = ProxyType.MANUAL
    proxy.http_proxy = proxy
    proxy.ssl_proxy = proxy
    proxy.ftp_proxy = proxy
    proxy.auto_detect = False
    capabilities = webdriver.DesiredCapabilities.CHROME.copy()
    proxy.add_to_capabilities(capabilities)
    return capabilities
Exemple #9
0
    def get_firefox(self, proxy):
        options = webdriver.FirefoxOptions()
        options.add_argument('--headless')

        proxy = Proxy({
            'proxyType': ProxyType.MANUAL,
            'httpProxy': proxy,
            'ftpProxy': proxy,
            'sslProxy': proxy,
            'noProxy': ''
        })

        driver = webdriver.Firefox(executable_path='webdrivers/geckodriver',
                                   options=options,
                                   proxy=proxy)
        return driver
Exemple #10
0
    def instantiateBrowser(self):

        if self.browser == None:
            chrome_options = webdriver.ChromeOptions()
            mobile_emulation = {"deviceName": "iPhone 7"}
            chrome_options = webdriver.ChromeOptions()
            chrome_options.add_argument('--no-sandbox')
            chrome_options.add_argument("--disable-setuid-sandbox")
            chrome_options.add_experimental_option("mobileEmulation",
                                                   mobile_emulation)
            chrome_options.add_argument('--disable-extensions')
            chrome_options.add_argument('--no-sandbox')

            path = config().getConstant("session_path")
            chrome_options.add_argument("user-data-dir={}".format(path))

            if constants.headless:
                chrome_options.add_argument('--headless')
                chrome_options.add_argument(
                    '--disable-gpu')  # Last I checked this was necessary.

            capabilities = DesiredCapabilities.CHROME
            if config().get("use_proxy"):
                print("using proxy")
                proxy_address = config().get("proxy_address")
                proxy = Proxy()
                proxy.socksPassword = config().get("proxy_password")
                proxy.socksUsername = config().get("proxy_username")
                proxy.ftpProxy = proxy_address
                proxy.httpProxy = proxy_address
                proxy.sslProxy = proxy_address
                proxy.proxy_type = {'ff_value': 1, 'string': 'MANUAL'}
                proxy.add_to_capabilities(capabilities)

            else:
                capabilities.pop('proxy', None)

            self.browser = webdriver.Chrome(chrome_options=chrome_options,
                                            desired_capabilities=capabilities)

            if True:  ## checking ip address
                print(capabilities)
                # print("screenshot")
                # self.browser.get("https://whatismyipaddress.com/")
                # self.browser.save_screenshot(config().get("bot_account_id") + ".png")

            AutoLogin(self.browser).login()
Exemple #11
0
def play_around(line):
    start = time.time()
    options = Options()
    options.add_argument('-headless')
    options.add_argument(f'user-agent={ua.random}')
    proxy = Proxy({
        'proxyType': ProxyType.MANUAL,
        'httpProxy': ip,
        'ftpProxy': ip,
        'sslProxy': ip,
        })
    driver = webdriver.Firefox(proxy=proxy, executable_path='geckodriver',options=options)
    site = line.rstrip()
    driver.get(site)
    try:
        WebDriverWait(driver,10).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.component_entry"))) #element
        WebDriverWait(driver,10).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.mean_tray")))
    except:
        time.sleep(5)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    # check audio and del
    try:
        for span in soup.find_all("span", class_='unit_listen'):
            span.decompose()
    except:
        print('no audio')
    for auto in soup.find_all('autolink'):
        auto.unwrap()
    for word_dic in soup.find_all('span', class_='u_word_dic'):
        word_dic.unwrap()
    for pinyin in soup.find_all('span', class_='pinyin'):
        pinyin.decompose()
    word_word = soup.find_all("strong", class_='word') # add word
    word_simple_mean = soup.find_all('p', class_='entry_mean')
    word_mean = soup.find("div", class_='mean_tray') # add meaning
    for stuff in word_mean(text=lambda text: isinstance(text, Comment)):
        stuff.extract()

    try:
        word_tit = word_word[0].get_text()
        outputline = word_tit + '\n' + word_tit + str(word_simple_mean[0]) + str(word_mean) + '\n</>\n'
        end = time.time()
        print(str(end - start))
    except:
        outputline = 'NOT FOUND' + line + '\n</>\n'
    print(outputline)
    return outputline
Exemple #12
0
    def __init__(self, logger=None, proxy=False, headless=False):
        self.logger = logger
        self.setup_log()

        geckdriver_path = r'/home/ghost/Drivers/geckodriver'

        # Initialize class variables
        self.fb_url = 'https://mbasic.facebook.com/'

        # Bot directive variables
        self.likes_monthly = 0
        self.tags_list = None
        self.groups_list = None
        self.like_vids_monthly = 0
        self.accept_all_requests = False
        self.send_requests = True
        self.post_imgs = 0

        browser_profile = webdriver.FirefoxProfile()
        browser_profile.set_preference('dom.webnotifications.enabled', False)

        options = Options()
        options.headless = headless

        # Prob create selenium instance here
        if proxy_conf.LIST is None:
            self.driver = webdriver.Firefox(executable_path=geckdriver_path,
                                            firefox_profile=browser_profile,
                                            firefox_options=options)
            self.bot_logger.info('Browser driver has been initialized')
        else:
            # Set up proxy if 'proxy' variables' value is True
            proxy_server = Proxy()
            proxy_server.proxy_type = ProxyType.MANUAL
            proxy_server.http_proxy = proxy_conf.LIST['http']
            proxy_server.socks_proxy = proxy_conf.LIST['sock']
            proxy_server.ssl_proxy = proxy_conf.LIST['ssl']

            capabilities = webdriver.DesiredCapabilities.FIREFOX
            proxy_server.add_to_capabilities(capabilities)

            self.driver = webdriver.Firefox(executable_path=geckdriver_path,
                                            firefox_profile=browser_profile,
                                            desired_capabilities=capabilities)
            bot_logger.info('Browser driver has been initialized')

        self.bot_utils = BotUtils(self.driver, self.fb_url)
Exemple #13
0
def chromeShot(url, f, p=""):
    whine("Taking Screenshot   : " + url, "debug")

    prox = Proxy()
    prox.proxy_type = ProxyType.MANUAL

    if p:
        prox.proxy_type = ProxyType.MANUAL
        prox.http_proxy = p
        prox.ssl_proxy = p

    capabilities = webdriver.DesiredCapabilities.CHROME
    prox.add_to_capabilities(capabilities)

    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--disable-logging")
    chrome_options.add_argument("--log-level=3")
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--user-data-dir /tmp")
    chrome_options.add_argument('--ignore-certificate-errors')

    chrome_driver = "/usr/bin/chromedriver"

    # Copy to dedicated screenshot directory
    sDir = os.path.dirname(f)
    sDir = os.path.dirname(sDir)
    sDir += "/ScreenShots/"
    if not os.path.exists(sDir):
        os.makedirs(sDir)

    sf = sDir + urllib.parse.quote(url, safe='') + ".png"

    try:
        driver = webdriver.Chrome(options=chrome_options,
                                  executable_path=chrome_driver,
                                  desired_capabilities=capabilities)
        driver.set_page_load_timeout(10)
        driver.get(url)
        driver.get_screenshot_as_file(f)
        # Copy to dedicated screenshot directory
        copy2(f, sf)
        driver.quit()
    except Exception as e:
        whine("screenshot Error:" + str(e), "debug")
Exemple #14
0
    def __init__(self, host="127.0.0.1", server="./selenium-server.jar"):
        if server and path.isfile(server) and not _server_started():
            self.selenium = _Selenium(server)

        proxy = Proxy({
            'proxyType': ProxyType.MANUAL,
            'httpProxy': host,
            'ftpProxy': host,
            'sslProxy': host,
            'noProxy': host 
        })
        caps = webdriver.DesiredCapabilities.FIREFOX
        proxy.add_to_capabilities(caps)
        try:
            self.driver = webdriver.Remote(desired_capabilities=caps)
        except URLError:
            raise SeleniumServerError
Exemple #15
0
def firefox_crawl(url, use_proxy=False, retries=3):
    retry = 0
    while retry < retries:
        try:
            global g_firefox_proxy_dict
            proxy = None
            if use_proxy:
                proxy = Proxy(g_firefox_proxy_dict)
            firefox_browser = webdriver.Firefox(proxy=proxy)
            firefox_browser.get(url)
            resp = firefox_browser.page_source
            firefox_browser.close()
            return resp
        except Exception as expt:
            print("error occurs when firefox_crawl url %s, error=%s" % (url, str(expt)))
            retry += 1
    return None
Exemple #16
0
def scribe_detail(url, driver=None, page=1):
    # resp = requests.get(url,headers=headers)
    # soup = BeautifulSoup(resp.text,'lxml')
    # video_href = soup.select('source[type="video/mp4"]')[0]['src']
    page_size = 0
    if not driver:
        fireFoxOptions = webdriver.FirefoxOptions()
        fireFoxOptions.set_headless()
        # fireFoxOptions.add_argument("--proxy-server=http://{}".format(random.choice(ip_list)))
        # driver = webdriver.Firefox( executable_path='temp_file/geckodriver',options=fireFoxOptions)
        ip_port = random.choice(ip_list)
        print(ip_port)
        proxy = Proxy({'httpProxy': ip_port})
        driver = webdriver.Firefox(executable_path='D:/geckodriver/geckodriver.exe', options=fireFoxOptions,
                                   proxy=proxy)
    real_url = url.format(page)
    driver.get(real_url)
    soup = BeautifulSoup(driver.page_source, 'lxml')
    #     获取最高评论
    try_time = 0
    while not soup.select('source[type="video/mp4"]'):
        try_time = try_time + 1
        soup = BeautifulSoup(driver.page_source, 'lxml')
        if try_time > 3:  # 尝试三次加载不出来换链接
            print('source[type="video/mp4"]')
            return scribe_detail(url, page=page)
    video_href = soup.select('source[type="video/mp4"]')[0]['src']
    driver.execute_script('window.scrollTo(0,document.body.scrollHeight)')
    soup = BeautifulSoup(driver.page_source, 'lxml')
    # 如果评论没有加载出来,等一面再从新获取soup
    try_time = 0
    while not soup.select('.comment_text'):
        try_time = try_time + 1
        soup = BeautifulSoup(driver.page_source, 'lxml')
        if try_time > 3:  # 尝试三次加载不出来换链接
            print('.comment_text')
            return scribe_detail(url, page=page)
    common = soup.select('.comment_text')[0].text
    print(video_href + '\n' + common + '\n\n')
    if not page_size:
        page_size = int(re.findall(r'\d+', soup.select('#seq')[0].text)[1])
    # 翻页
    print(page)
    if page < page_size:
        page = page + 1
        scribe_detail(url, driver, page)
Exemple #17
0
    def __init__(self, row):
        self.url = row.url
        self.click = row.click
        self.times_max = row.times_max
        self.times_real = row.times_real

        self.error = False
        self.error_message = ''
        self.proxy = Proxy()
        self.proxy.proxy_type = ProxyType.MANUAL
        self.proxy.http_proxy = '127.0.0.1:9950'
        self.capabilities = webdriver.DesiredCapabilities.FIREFOX
        self.proxy.add_to_capabilities(self.capabilities)
        self.firefox = webdriver.Firefox(
            desired_capabilities=self.capabilities)
        self.firefox.implicitly_wait(tables.config.browser_implicit_wait)
        self.browser_load_sleep = tables.config.browser_load_sleep
Exemple #18
0
def smartproxy():
    prox = Proxy()

    prox.proxy_type = ProxyType.MANUAL

    prox.http_proxy = '{hostname}:{port}'.format(hostname="ca.smartproxy.com",
                                                 port=20000)
    prox.ssl_proxy = '{hostname}:{port}'.format(hostname="ca.smartproxy.com",
                                                port=20000)

    if DRIVER == 'FIREFOX':
        capabilities = webdriver.DesiredCapabilities.FIREFOX
    elif DRIVER == 'CHROME':
        capabilities = webdriver.DesiredCapabilities.CHROME
    prox.add_to_capabilities(capabilities)

    return capabilities
Exemple #19
0
def advanced_crawl_js_var(url, var, use_proxy=False, retries=5):
    retry = 0
    while retry < retries:
        try:
            global g_firefox_proxy_dict
            proxy = None
            if use_proxy:
                proxy = Proxy(g_firefox_proxy_dict)
            browser = webdriver.Firefox(proxy=proxy)
            browser.get(url)
            result = browser.execute_script("return %s;" % var)
            browser.close()
            return result
        except Exception as expt:
            print("error occurs when advanced_crawl_js_var url %s, error=%s" % (url, str(expt)))
            retry += 1
    return None
Exemple #20
0
    def testCanAddManualProxyToDesiredCapabilities(self):
        proxy = Proxy()
        proxy.http_proxy = self.MANUAL_PROXY['httpProxy']
        proxy.ftp_proxy = self.MANUAL_PROXY['ftpProxy']
        proxy.no_proxy = self.MANUAL_PROXY['noProxy']
        proxy.sslProxy = self.MANUAL_PROXY['sslProxy']
        proxy.socksProxy = self.MANUAL_PROXY['socksProxy']
        proxy.socksUsername = self.MANUAL_PROXY['socksUsername']
        proxy.socksPassword = self.MANUAL_PROXY['socksPassword']

        desired_capabilities = {}
        proxy.add_to_capabilities(desired_capabilities)

        proxy_capabilities = self.MANUAL_PROXY.copy()
        proxy_capabilities['proxyType'] = 'MANUAL'
        expected_capabilities = {'proxy': proxy_capabilities}
        self.assertEqual(expected_capabilities, desired_capabilities)
Exemple #21
0
def get_screenshot(url):
    options = webdriver.ChromeOptions()
    options.add_argument('headless')

    prox = Proxy()
    prox.proxy_type = ProxyType.MANUAL
    prox.http_proxy = 'http://localhost:8118'
    prox.ssl_proxy = 'http://localhost:8118'

    capabilities = webdriver.DesiredCapabilities.CHROME
    prox.add_to_capabilities(capabilities)

    driver = webdriver.Chrome(options=options,
                              desired_capabilities=capabilities)
    driver.get('{}'.format(url))
    hostname = parse.urlparse(url).netloc
    driver.get_screenshot_as_file('/tmp/screenshots/{}.png'.format(hostname))
Exemple #22
0
 def __init__(self, headless=True):
     headless_proxy = "socks5://localhost:9050"
     proxy = Proxy({
         'proxyType': ProxyType.MANUAL,
         'httpProxy': headless_proxy,
         'ftpProxy': headless_proxy,
         'sslProxy': headless_proxy,
         'noProxy': ''
     })
     self.switch_ip()
     self.chrome_options = webdriver.ChromeOptions()
     if headless:
         self.chrome_options.add_argument("--headless")
     prefs = {"profile.managed_default_content_settings.images": 2}
     self.chrome_options.add_experimental_option("prefs", prefs)
     self.capabilities = dict(DesiredCapabilities.CHROME)
     proxy.add_to_capabilities(self.capabilities)
Exemple #23
0
def test_creates_capabilities(options):
    profile = FirefoxProfile()
    options._arguments = ['foo']
    options._binary = FirefoxBinary('/bar')
    options._preferences = {'foo': 'bar'}
    options._proxy = Proxy({'proxyType': ProxyType.MANUAL})
    options._profile = profile
    options.log.level = 'debug'
    caps = options.to_capabilities()
    opts = caps.get(Options.KEY)
    assert opts
    assert 'foo' in opts['args']
    assert opts['binary'] == '/bar'
    assert opts['prefs']['foo'] == 'bar'
    assert opts['profile'] == profile.encoded
    assert caps['proxy']['proxyType'] == ProxyType.MANUAL['string']
    assert opts['log']['level'] == 'debug'
def download_data(search_term, from_date, to_date, limit=0):
    """
    Download data from Twitter.

    Args:

    * search_term - The term to search for in Twitter. e.g. '@IvanDuque'.
    * from_date - The date from which to start including results.
    * to_date - The date until which to include results.
    * limit - Limit the number of tweets to take from the results. Defaults to 0, including all.
    """
    results = {}
    proxy_host = '' # Add a proxy host here. You can find available proxy lists online

    proxy = Proxy({
        'proxyType': ProxyType.MANUAL,
        'httpProxy': proxy_host,
        'ftpProxy': proxy_host,
        'sslProxy': proxy_host,
        'noProxy': ''
    })

    driver = webdriver.Firefox(proxy=proxy)
    driver.get(BASE_URL + get_query_string(search_term, from_date, to_date))
    time.sleep(2)

    tweets = driver.find_elements_by_class_name(CLASSES['tweet'])
    if limit > 0:
        while len(tweets) < limit:
            scroll_and_sleep(driver, 5)
            if len(driver.find_elements_by_class_name(CLASSES['tweet'])) == len(tweets):
                break

            tweets = driver.find_elements_by_class_name(CLASSES['tweet'])
    else:
        scroll_and_sleep(driver)
        while len(tweets) < len(driver.find_elements_by_class_name(CLASSES['tweet'])):
            tweets = driver.find_elements_by_class_name(CLASSES['tweet'])
            scroll_and_sleep(driver, 5)

    soup = BeautifulSoup(driver.page_source, "html.parser")
    append_results(soup, results)

    driver.close()
    return results
Exemple #25
0
class Downloader:
    """ Downloader class to requests for downloading pages.
        initial parameters:
        kwargs:
            delay (int) : time to wait upon the same domain, default to 2 sec
            proxy (dict) : proxy to be used, default to None
    """
    #class variables
    proxy = proxypool.pop_proxy()
    proxy_obj = Proxy({
        'proxyType': ProxyType.MANUAL,
        'httpProxy': proxy,
        'httpsProxy': proxy,
        'sslProxy': proxy
    })
    firefox_options = FirefoxOptions()
    firefox_options.set_headless(headless=True)
    driver = webdriver.Firefox(firefox_options=firefox_options,
                               proxy=proxy_obj)

    def __call__(self, url):
        """ Call the downloader class, which will return download HTML
            args:
                url (str): url to download
            kwargs:
                callback (int): function to be called on parsing HTML, default to None
        """
        # self.throttle.wait(url)
        print('Downloading:', url, 'with proxy {}'.format(self.proxy))
        # print ('Downloading:', url)
        try:
            self.driver.get(url)
            # 增加从conf获取配置参数的代码
            WebDriverWait(self.driver, 10).until(
                EC.presence_of_all_elements_located((By.CLASS_NAME, 'listTr')))
            header = self.driver.find_element_by_id('headerTr').get_attribute(
                'outerHTML')
            events = [
                element.get_attribute('outerHTML') for element in
                self.driver.find_elements_by_class_name('listTr')
            ]
            self.driver.quit()
            return header, events
        except WebDriverException as e:
            print('Downloading Error ->', e)
Exemple #26
0
def proxy_driver():
    global ALL_PROXIES, my_ip

    co = Options()
    prox = Proxy()

    if len(ALL_PROXIES) == 0:
        print("--- Proxies used up (%s)" % len(ALL_PROXIES))
        ALL_PROXIES = get_proxies()

    # temporal_index = len(ALL_PROXIES)
    # Accessing and removing last element of deque
    else:
        pxy = ALL_PROXIES.pop()
        my_ip = pxy
        print('Proxy Actual:', pxy)

        prox.proxy_type = ProxyType.MANUAL
        prox.autodetect = False
        prox.httpProxy = prox.sslProxy = pxy  #prox.socksProxy = pxy

        capabilities = webdriver.DesiredCapabilities.CHROME
        prox.add_to_capabilities(capabilities)

        #print('Proxy Options', prox)
        co.Proxy = prox
        co.add_argument("ignore-certificate-errors")

        co.add_argument("start-maximized")
        co.add_experimental_option("excludeSwitches", ["enable-automation"])
        co.add_experimental_option('useAutomationExtension', False)
        ua = UserAgent()
        userAgent = ua.random
        co.add_argument(f'user-agent={userAgent}')
        co.add_argument('--disable-notifications')

        # Se agrega el add-on Buster para validar los captchas
        co.add_extension('./buster_extension.crx')
        co.add_extension('./vpn.crx')

    driver = webdriver.Chrome(ChromeDriverManager().install(),
                              chrome_options=co)
    # driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")

    return driver
Exemple #27
0
def test_can_add_manual_proxy_to_desired_capabilities():
    proxy = Proxy()
    proxy.http_proxy = MANUAL_PROXY['httpProxy']
    proxy.ftp_proxy = MANUAL_PROXY['ftpProxy']
    proxy.no_proxy = MANUAL_PROXY['noProxy']
    proxy.sslProxy = MANUAL_PROXY['sslProxy']
    proxy.socksProxy = MANUAL_PROXY['socksProxy']
    proxy.socksUsername = MANUAL_PROXY['socksUsername']
    proxy.socksPassword = MANUAL_PROXY['socksPassword']
    proxy.socksVersion = MANUAL_PROXY['socksVersion']

    desired_capabilities = {}
    proxy.add_to_capabilities(desired_capabilities)

    proxy_capabilities = MANUAL_PROXY.copy()
    proxy_capabilities['proxyType'] = 'MANUAL'
    expected_capabilities = {'proxy': proxy_capabilities}
    assert expected_capabilities == desired_capabilities
def test_manual_proxy_is_set_in_profile():
    profile = FirefoxProfile()
    proxy = Proxy()
    proxy.no_proxy = 'localhost, foo.localhost'
    proxy.http_proxy = 'some.url:1234'
    proxy.ftp_proxy = None
    proxy.sslProxy = 'some2.url'

    profile.set_proxy(proxy)
    assert profile.default_preferences[
        "network.proxy.type"] == ProxyType.MANUAL['ff_value']
    assert profile.default_preferences[
        "network.proxy.no_proxies_on"] == 'localhost, foo.localhost'
    assert profile.default_preferences["network.proxy.http"] == 'some.url'
    assert profile.default_preferences["network.proxy.http_port"] == 1234
    assert profile.default_preferences["network.proxy.ssl"] == 'some2.url'
    assert "network.proxy.ssl_port" not in profile.default_preferences
    assert "network.proxy.ftp" not in profile.default_preferences
Exemple #29
0
def get_firefox(zap):
    driver_bin = zap.selenium.option_firefox_driver_path
    proxy = Proxy({
        'proxyType': ProxyType.MANUAL,
        'httpProxy': zap._ZAPv2__proxies['http'],
        'ftpProxy': zap._ZAPv2__proxies['http'],
        'sslProxy': zap._ZAPv2__proxies['http'],
        'noProxy': ''
    })
    profile = webdriver.FirefoxProfile()
    profile.accept_untrusted_certs = True
    opts = FirefoxOptions()
    opts.add_argument("--headless")
    driver = webdriver.Firefox(proxy=proxy,
                               executable_path=driver_bin,
                               firefox_options=opts,
                               firefox_profile=profile)
    return driver
Exemple #30
0
 def set_proxy(driver):
     url = "http://127.0.0.1:5010/get/"
     try:
         resp = urllib.urlopen(url).read()
         ip_port = str(resp)
         proxy = Proxy({
             'proxyType': ProxyType.MANUAL,
             'httpProxy': 'ip:port'  # 代理ip和端口
         })
         # 再新建一个“期望技能”,()
         desired_capabilities = DesiredCapabilities.PHANTOMJS.copy()
         # 把代理ip加入到技能中
         proxy.add_to_capabilities(desired_capabilities)
         # 新建一个会话,并把技能传入
         driver.start_session(desired_capabilities)
     except Exception as e:
         print(e)
     return driver