def __init__(self, without_window=False): self.without_window = without_window self.opts = Options() self.opts.headless = self.without_window self.browser = webdriver.Chrome('chromedriver', options=self.opts) self.webdriver = None self.r = poolmanager.PoolManager() self.check_sum = 0
def init_poolmanager(self, connections, maxsize, block=False): """Create and initialize the urllib3 Poolmanager.""" ctx = ssl.create_default_context() ctx.set_ciphers('DEFAULT@SECLEVEL=1') self.poolmanager = poolmanager.PoolManager( num_pools=connections, block=block, ssl_version=ssl.PROTOCOL_TLS, ssl_context=ctx)
def start_prop(self): self.props = {} self.urls = [] try: self.visited = self.read_visited() except FileNotFoundError: self.visited = [] self.write_visited() self.http = poolmanager.PoolManager() self.learn()
def init_poolmanager(self, connections, maxsize, block=adapters.DEFAULT_POOLBLOCK, **pool_kwargs): ctx = ssl.create_default_context() ctx.set_ciphers('DEFAULT@SECLEVEL=1') self.poolmanager = poolmanager.PoolManager( num_pools=connections, maxsize=maxsize, block=block, ssl_version=ssl.PROTOCOL_TLS, ssl_context=ctx )
def init_poolmanager(self, connections, maxsize, block=False): ctx = ssl.create_default_context() ctx.set_ciphers('DEFAULT@SECLEVEL=1') ctx.check_hostname = False self.poolmanager = poolmanager.PoolManager( num_pools=connections, maxsize=maxsize, block=block, ssl_version=ssl.PROTOCOL_TLS, ssl_context=ctx )
def init_poolmanager(self, connections, maxsize, block=False, **pool_kwargs): ctx = ssl.create_default_context() ctx.set_ciphers("DEFAULT@SECLEVEL=1") self.poolmanager = poolmanager.PoolManager( num_pools=connections, maxsize=maxsize, block=block, ssl_version=ssl.PROTOCOL_TLS, ssl_context=ctx, **pool_kwargs, )
def url_scheme_check(target_url): valid_check_url = util.parse_url(target_url) if valid_check_url.scheme is None: valid_check_url = valid_check_url._replace(scheme='https') try: open_test = poolmanager.PoolManager().request('GET', valid_check_url.url, timeout=float(default_timeout)) if open_test.status == 200: return valid_check_url.url, ProcessingMessage.get_web_type(valid_check_url.netloc) else: logger.print_log(ErrorMessage.UNVALID_URL) return valid_check_url.url, None except Exception as e_log: logger.print_log(ErrorMessage.URL_TIMEOUT) logger.print_log(e_log) return valid_check_url.url, None
def init_poolmanager(self, connections, maxsize, block=False, **pool_kwargs): """Create and initialize the urllib3 PoolManager.""" ssl_context = ssl_context_for_client(self.ca_cert, None, self.private_cert_path, self.private_key_path) self.poolmanager = poolmanager.PoolManager( num_pools=connections, maxsize=maxsize, block=block, ssl_version=ssl.PROTOCOL_TLS, ssl_context=ssl_context)
def __init__(self, base_url=None, proxy_url=None, headers={}, json_encoder=None): # type: (Optional[str], Optional[str], Mapping[str, str], Optional[Type[json.JSONEncoder]]) -> None base_headers = util.make_headers(keep_alive=True, accept_encoding=True) base_headers.update(headers) super(SyncSender, self).__init__( base_url=base_url, proxy_url=proxy_url, headers=base_headers, json_encoder=json_encoder) options = dict( block=True, maxsize=self.max_pool_size, ) if self.proxy_url is not None: self.pool_manager = poolmanager.ProxyManager(self.proxy_url, **options) # type: poolmanager.PoolManager else: self.pool_manager = poolmanager.PoolManager(**options)
def init_poolmanager(self, connections, maxsize, block=False): """Create and initialize the urllib3 PoolManager.""" ctx = ssl.create_default_context() ctx.set_ciphers('DEFAULT@SECLEVEL=1') # FIX #33770129 # https://stackoverflow.com/questions/33770129/how-do-i-disable-the-ssl-check-in-python-3-x ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE self.poolmanager = poolmanager.PoolManager( num_pools=connections, maxsize=maxsize, block=block, ssl_version=ssl.PROTOCOL_TLS, ssl_context=ctx, )
def open_first_link(self): self.logging('Test started for host :', self.host_name) # Requesting page try: self.browser.get(self.url) r = poolmanager.PoolManager() r.request('GET', self.url) except exceptions.MaxRetryError as e: AccesPep318.logging('Connection ERROR:\n%s' % (str(e)), 'Connection could not be esteablished') self.browser.quit() self.succes = 0 return 0 AccesPep318.logging("URL CHANGED", self.browser.current_url) # Assert landing on main page self.cpx('//*[@id="homepage"]', 'Python logo on main page') search_box = self.cpx('//*[@id="id-search-field"]', 'Search form on python.org landing page') go_button = self.cpx('//*[@id="submit"]', 'GO Button') # Treat AttributeErrors on main page try: actions = [search_box, go_button] for action in actions: i = actions.index(action) if (action.tag_name == 'input'): action.send_keys('Decorator') else: action.click() except AttributeError as e: AccesPep318.logging( 'Error: %s' % (e.args), 'Element %s does not have required attribute' % (str(actions[i]))) self.browser.quit() AccesPep318.logging('Browser quit', 'Error:%s' % (e)) self.succes = 0 return 0 # Treat AttributeErrors on search page try: search_page_header = self.cpx('//*[@id="content"]/div/section/h2', 'Search page header') assert search_page_header.text first_link = self.cpx( '//*[@id="content"]/div/section/form/ul/li[1]/h3/a', 'Required link') first_link.click() AccesPep318.logging("URL CHANGED", self.browser.current_url) first_link_header = self.cpx( '//*[@id="content"]/div/section/article/header/h1', 'Header of first link') assert (first_link_header.text == 'PEP 318 -- Decorators for Functions and Methods') except exceptions.MaxRetryError: AccesPep318.logging('Error', 'Host refused comunication request') self.browser.quit self.succes = 0 return 0 else: # Close the browser if (self.without_window == False): print("Browser will close in 5 seconds") time.sleep(5) self.browser.quit() self.succes = 1 return 1
def init_poolmanager(self, connections, maxsize, block=False): self.poolmanager = poolmanager.PoolManager( num_pools=connections, maxsize=maxsize, block=block, ssl_version=self.ssl_version)
def __init__(): connectBuilder = poolmanager.PoolManager() arrDespesa = getDownloadUrls(connectBuilder) downloadCsv(connectBuilder, arrDespesa) connectBuilder.clear()
def __init__(self, crawler_user, system_logger): self.pool_manager = poolmanager.PoolManager() self.user = crawler_user self.logger = system_logger self.stop_word = ['\\', '/', ':', '*', '?', '"', '<', '>', '|']
def open_first_link(url,without_window=False): cpx = check_presence_by_xpath # shortcut # Browser options opt = Options() opt.headless = without_window browser = webdriver.Chrome('chromedriver', options=opt) # Requesting page try: browser.get(url) r = poolmanager.PoolManager() r.request('GET',url) except exceptions.MaxRetryError as e: logging('Connection ERROR:\n%s'%(str(e)),'Connection could not be esteablished') browser.quit() logging('Browser quit with a code error','') return 0 logging("URL CHANGED",browser.current_url) # Assert landing on main page cpx('//*[@id="homepage"]','Python logo on main page',browser) search_box = cpx('//*[@id="id-search-field"]','Search form on python.org landing page',browser) go_button = cpx('//*[@id="submit"]','GO Button',browser) # Treat AttributeErrors on main page try: actions = [search_box,go_button] for action in actions: i=actions.index(action) if(action.tag_name == 'input'): action.send_keys('Decorator') else: action.click() except AttributeError as e: logging('Error: %s'%(e.args),'Element %s does not have required attribute'%(str(actions[i]))) browser.quit() logging('Browser quit','Error:%s'%(e)) logging('Browser quit with a code error','') return 0 # Treat AttributeErrors on search page try: search_page_header = cpx('//*[@id="content"]/div/section/h2','Search page header',browser) print(search_page_header.text) assert search_page_header.text == 'Search Python.org' first_link = cpx('//*[@id="content"]/div/section/form/ul/li[1]/h3/a','Required link',browser) first_link.click() logging("URL CHANGED",browser.current_url) first_link_header = cpx('//*[@id="content"]/div/section/article/header/h1','Header of first link',browser) assert(first_link_header.text == 'PEP 318 -- Decorators for Functions and Methods') except exceptions.MaxRetryError: logging('Error','Host refused comunication request') browser.quit logging('Browser quit with a code error','') return 0 else: # Close the browser if(without_window == False): print("Browser will close in 5 seconds") logging('Test succeded','Browser quit with 0 error') browser.quit() return 1