Ejemplo n.º 1
0
def get_internet_item(url, html=True):
    """ get html or data from given url

    :param url: target site url string
    :param html: download html or data boolean
    :return: html string
    """

    if PROXY_MODE == "http_proxy":
        http = ProxyManager(proxy_url=PROXY_URL_PORT)

    elif PROXY_MODE == "auth_proxy":
        auth_proxy_headers = make_headers(proxy_basic_auth=PROXY_BASIC_AUTH)
        http = ProxyManager(proxy_url=PROXY_URL_PORT,
                            proxy_headers=auth_proxy_headers,
                            cert_reqs="CERT_REQUIRED",
                            ca_certs=certifi.where())

    else:
        http = PoolManager(cert_reqs="CERT_REQUIRED",
                           ca_certs=certifi.where())

    r = http.request("GET", url)

    if r.status != 200:
        raise ConnectionError("http request failure")

    if html:
        data = r.data.decode()

    else:
        data = r.data

    return data
Ejemplo n.º 2
0
def configure_http_pool():

    global gl_http_pool

    if gl_args.mode == 'auto-scan' or gl_args.mode == 'file-scan':
        timeout = Timeout(connect=1.0, read=3.0)
    else:
        timeout = Timeout(connect=gl_args.timeout, read=6.0)

    if gl_args.proxy:
        # when using proxy, protocol should be informed
        if 'http' not in gl_args.host or 'http' not in gl_args.proxy:
            print_and_flush(RED + " * When using proxy, you must specify the http or https protocol"
                        " (eg. http://%s).\n\n" %(gl_args.host if 'http' not in gl_args.host else gl_args.proxy) +ENDC)
            logging.critical('Protocol not specified')
            exit(1)

        try:
            if gl_args.proxy_cred:
                headers = make_headers(proxy_basic_auth=gl_args.proxy_cred)
                gl_http_pool = ProxyManager(proxy_url=gl_args.proxy, proxy_headers=headers, timeout=timeout, cert_reqs='CERT_NONE')
            else:
                gl_http_pool = ProxyManager(proxy_url=gl_args.proxy, timeout=timeout, cert_reqs='CERT_NONE')
        except:
            print_and_flush(RED + " * An error occurred while setting the proxy. Please see log for details..\n\n" +ENDC)
            logging.critical('Error while setting the proxy', exc_info=traceback)
            exit(1)
    else:
        gl_http_pool = PoolManager(timeout=timeout, cert_reqs='CERT_NONE')
Ejemplo n.º 3
0
 def get_pool(self, req):
     if req['proxy']:
         if req['proxy_auth']:
             proxy_headers = make_headers(
                 proxy_basic_auth=req['proxy_auth'])
         else:
             proxy_headers = None
         proxy_url = '%s://%s' % (req['proxy_type'], req['proxy'])
         pool_key = (req['proxy_type'], req['proxy'], bool(req['verify']))
         if pool_key not in self.pools:
             if req['proxy_type'] == 'socks5':
                 opts = {
                     #num_pools=1000,
                     #maxsize=10,
                 }
                 if req['verify']:
                     pool = SOCKSProxyManager(proxy_url,
                                              cert_reqs='CERT_REQUIRED',
                                              ca_certs=certifi.where(),
                                              **opts)
                 else:
                     pool = SOCKSProxyManager(proxy_url, **opts)
             elif req['proxy_type'] == 'http':
                 opts = {
                     #num_pools=1000,
                     #maxsize=10,
                 }
                 if req['verify']:
                     pool = ProxyManager(
                         proxy_url,
                         proxy_headers=proxy_headers,
                         cert_reqs='CERT_REQUIRED',
                         ca_certs=certifi.where(),
                         **opts,
                     )
                 else:
                     pool = ProxyManager(proxy_url,
                                         proxy_headers=proxy_headers,
                                         **opts)
             else:
                 raise IowebConfigError(
                     'Invalid value of request option `proxy_type`: %s' %
                     req['proxy_type'])
             self.pools[pool_key] = pool
         else:
             pool = self.pools[pool_key]
     else:
         pool = self.pools[(None, None, bool(req['verify']))]
     return pool
Ejemplo n.º 4
0
    def __proxy_pool(self):
        """
        Create Proxy connection pool
        :raise ProxyRequestError
        :return: urllib3.HTTPConnectionPool
        """

        try:

            self.__server = self.__cfg.proxy if True is self.__cfg.is_standalone_proxy else self.__get_random_proxy()

            if self.__get_proxy_type(self.__server) == 'socks':

                disable_warnings(InsecureRequestWarning)

                if not hasattr(self, '__pm'):

                    package_module = importlib.import_module('urllib3.contrib.socks')
                    self.__pm = getattr(package_module, 'SOCKSProxyManager')

                pool = self.__pm(self.__server,
                                 num_pools=self.__cfg.threads,
                                 timeout=Timeout(self.__cfg.timeout,
                                 read=self.__cfg.timeout),
                                 block=True)
            else:
                pool = ProxyManager(self.__server,
                                    num_pools=self.__cfg.threads,
                                    timeout=Timeout(self.__cfg.timeout, read=self.__cfg.timeout),
                                    block=True)
            return pool
        except (DependencyWarning, ProxySchemeUnknown, ImportError) as error:
            raise ProxyRequestError(error)
def check_ip(ip_info, port_info, type):
    check_url = "https://bck.hermes.com/product-page?locale=us_en&productsku=H056289CC18"
    ip_url = "%s://%s:%s" % (type, ip_info, port_info)
    manager = ProxyManager(ip_url,
                           timeout=10,
                           cert_reqs='CERT_REQUIRED',
                           ca_certs=certifi.where())
    headers = util.make_headers(
        accept_encoding='gzip, deflate',
        keep_alive=True,
        user_agent=
        "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:47.0) Gecko/20100101 Firefox/47.0"
    )
    headers['Accept-Language'] = "en-US,en;q=0.5"
    headers['Connection'] = 'keep-alive'
    headers[
        'Accept'] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
    try:
        response = manager.request('GET',
                                   check_url,
                                   preload_content=False,
                                   headers=headers)
        res = response.data
        print(res)
        json.loads(res)
        return True
    except Exception as ex:
        return False
Ejemplo n.º 6
0
def get_web_page(url: str, proxies: list = None):

    headers = {
        'User-agent':
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582",
    }

    if proxies:
        import numpy as np
        status = 0
        i = 0
        while status != 220 and i < 10:
            try:
                proxy_url = np.random.choice(proxies)
                print(f"Proxy: {proxy_url}")
                proxy_url = "https://" + url.strip()
                http = ProxyManager(proxy_url=proxy_url,
                                    headers=headers,
                                    cert_reqs='CERT_NONE',
                                    assert_hostname=False)
                resp = http.request('GET', url)
                status = resp.status
                print(status)
            except:
                i += 1

    else:
        http = PoolManager(headers=headers,
                           cert_reqs='CERT_NONE',
                           assert_hostname=False)
        resp = http.request('GET', url)

    return resp.status, resp.data.decode('utf-8')
Ejemplo n.º 7
0
    def _init_connection(self):
        """Function for initiating connection with remote server"""
        cert_reqs = 'CERT_NONE'
        if self._connection_properties.get('ca_cert_data'):
            LOGGER.info('Using CA cert to confirm identity.')
            cert_reqs = 'CERT_REQUIRED'
            self._connection_properties.update(
                self._connection_properties.pop('ca_cert_data'))

        if self.proxy:
            if self.proxy.startswith('socks'):
                LOGGER.info("Initializing a SOCKS proxy.")
                http = SOCKSProxyManager(self.proxy, cert_reqs=cert_reqs, maxsize=6, \
                                                                **self._connection_properties)
            else:
                LOGGER.info("Initializing a HTTP proxy.")
                http = ProxyManager(self.proxy, cert_reqs=cert_reqs, maxsize=6, \
                                    **self._connection_properties)
        else:
            LOGGER.info("Initializing no proxy.")
            try:
                self._connection_properties.pop('ca_cert_data')
            except KeyError:
                pass
            http = PoolManager(cert_reqs=cert_reqs,
                               maxsize=6,
                               **self._connection_properties)

        self._conn = http.request
Ejemplo n.º 8
0
def check_stock_proxy_manager(url, proxy=None, count=0):
    if proxy is None:
        manager = PoolManager(timeout=5,
                              cert_reqs='CERT_REQUIRED',
                              ca_certs=certifi.where())
    else:
        proxy_url = "%s://%s:%s" % (proxy[0], proxy[1], proxy[2])
        manager = ProxyManager(proxy_url,
                               timeout=5,
                               cert_reqs='CERT_REQUIRED',
                               ca_certs=certifi.where())
    headers = util.make_headers(accept_encoding='gzip, deflate',
                                keep_alive=True,
                                user_agent="Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:47.0) Gecko/20100101 Firefox/47.0")
    headers['Accept-Language'] = "en-US,en;q=0.5"
    headers['Connection'] = 'keep-alive'
    headers['Accept'] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
    try:
        response = manager.request('GET',
                                   url,
                                   preload_content=False,
                                   headers=headers)
        content = json.loads(response.data)
        print("%s - Connect Success!" % count)
        return content['hasStock']
    except Exception as ex:
        print("%s - Connect Error!" % count)
        return False
Ejemplo n.º 9
0
 def get_pool(self, req, use_cache=True):
     if req['proxy']:
         if req['proxy_type'] == 'socks5' and req['proxy_auth']:
             proxy_url = '%s://%s@%s' % (req['proxy_type'],
                                         req['proxy_auth'], req['proxy'])
         else:
             proxy_url = '%s://%s' % (req['proxy_type'], req['proxy'])
         pool_key = (req['proxy_type'], req['proxy'], bool(req['verify']))
         if not use_cache or pool_key not in self.pools:
             if req['proxy_type'] == 'socks5':
                 if req['verify']:
                     pool = SOCKSProxyManager(
                         proxy_url,
                         cert_reqs='CERT_REQUIRED',
                         ca_certs=certifi.where(),
                     )
                 else:
                     pool = SOCKSProxyManager(proxy_url)
             elif req['proxy_type'] == 'http':
                 if req['proxy_auth']:
                     proxy_headers = make_headers(
                         proxy_basic_auth=req['proxy_auth'])
                 else:
                     proxy_headers = None
                 if req['verify']:
                     pool = ProxyManager(
                         proxy_url,
                         proxy_headers=proxy_headers,
                         cert_reqs='CERT_REQUIRED',
                         ca_certs=certifi.where(),
                     )
                 else:
                     pool = ProxyManager(
                         proxy_url,
                         proxy_headers=proxy_headers,
                     )
             else:
                 raise error.IowebConfigError(
                     'Invalid value of request option `proxy_type`: %s' %
                     req['proxy_type'])
             if use_cache:
                 self.pools[pool_key] = pool
         else:
             pool = self.pools[pool_key]
     else:
         pool = self.pools[(None, None, bool(req['verify']))]
     return pool
Ejemplo n.º 10
0
 def __init__(self):
     user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
     self.headers = {'User-Agent': user_agent}
     self.ip_url = 'http://icanhazip.com/'
     self.logger = logging.getLogger('gkp')
     retries = Retry(connect=5, read=5, redirect=5)
     self.agent = ProxyManager('http://localhost:8118/',
                               retries=retries,
                               timeout=Timeout(total=60.0))
Ejemplo n.º 11
0
    def request(self):
        req = self._request

        if req.proxy:
            if req.proxy_userpwd:
                headers = make_headers(proxy_basic_auth=req.proxy_userpwd)
            else:
                headers = None
            proxy_url = '%s://%s' % (req.proxy_type, req.proxy)
            if req.proxy_type == 'socks5':
                pool = SOCKSProxyManager(proxy_url)  # , proxy_headers=headers)
            else:
                pool = ProxyManager(proxy_url, proxy_headers=headers)
        else:
            pool = self.pool
        try:
            retry = Retry(redirect=False, connect=False, read=False)
            # The read timeout is not total response time timeout
            # It is the timeout on read of next data chunk from the server
            # Total response timeout is handled by Grab
            timeout = Timeout(connect=req.connect_timeout, read=req.timeout)
            #req_headers = dict((make_unicode(x), make_unicode(y))
            #                   for (x, y) in req.headers.items())
            if six.PY3:
                req_url = make_unicode(req.url)
                req_method = make_unicode(req.method)
            else:
                req_url = make_str(req.url)
                req_method = req.method
            req.op_started = time.time()
            res = pool.urlopen(req_method,
                               req_url,
                               body=req.data,
                               timeout=timeout,
                               retries=retry,
                               headers=req.headers,
                               preload_content=False)
        except exceptions.ReadTimeoutError as ex:
            raise error.GrabTimeoutError('ReadTimeoutError', ex)
        except exceptions.ConnectTimeoutError as ex:
            raise error.GrabConnectionError('ConnectTimeoutError', ex)
        except exceptions.ProtocolError as ex:
            # TODO:
            # the code
            # raise error.GrabConnectionError(ex.args[1][0], ex.args[1][1])
            # fails
            # with error TypeError: 'OSError' object is not subscriptable
            raise error.GrabConnectionError('ProtocolError', ex)

        # WTF?
        self.request_head = b''
        self.request_body = b''
        self.request_log = b''

        self._response = res
    def authenticate(self):
        self.base_url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
        self.my_query = "PD-1%20ab%20agonist%5BTitle%2FAbstract%5D)%20AND%20(%222000%2F01%2F01%22%5BDate%20-%20Publication%5D%20%3A%20%223000%22%5BDate%20-%20Publication%5D"
        self.database = "pubmed"
        self.second_url = "esearch.fcgi?db={db}&term={query}&usehistory=y"
        self.final_url = self.base_url + self.second_url.format(
            db=self.database, query=self.my_query)
        self.http = ProxyManager("http://proxy.gtm.lilly.com:9000/")
        self.response = self.http.request('GET', self.final_url)
        self.http = ProxyManager("http://proxy.gtm.lilly.com:9000/")
        self.firstResponse = self.http.request('GET', self.final_url)

        self.base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
        self.my_query = "id=29554659"
        self.database = "pubmed"
        self.second_url = "elink.fcgi?dbfrom=gene&db={db}&{query}"
        self.final_url = self.base_url + self.second_url.format(
            db=self.database, query=self.my_query)
        self.http = ProxyManager("http://proxy.gtm.lilly.com:9000/")
        self.secondResponse = self.http.request('GET', self.final_url)
Ejemplo n.º 13
0
    def __init_connection(self, url=None, proxy=False):
        """Function for initiating connection with remote server

        :param url: The URL of the remote system
        :type url: str

        """

        self.__url = url if url else self.__url
        http = ProxyManager(self.get_proxy(), cert_reqs='CERT_NONE') if self.get_proxy()\
                         and proxy else urllib3.PoolManager(cert_reqs='CERT_NONE')
        self._conn = http.request
Ejemplo n.º 14
0
 def request(self):
     QtWidgets.qApp.processEvents()
     self.proxi()
     print(self.stroka2)
     self.prm = ProxyManager(str(self.stroka2))
     print(self.stroka2)
     try:
         QtWidgets.qApp.processEvents()
         r = self.prm.request('GET', 'https://www.yandex.ru/')
     except:
         return False
     return True
Ejemplo n.º 15
0
    def request(self):
        req = self._request

        if req.proxy:
            if req.proxy_userpwd:
                headers = make_headers(proxy_basic_auth=req.proxy_userpwd)
            else:
                headers = None
            proxy_url = '%s://%s' % (req.proxy_type, req.proxy)
            try:
                pool = ProxyManager(proxy_url, proxy_headers=headers)
            except ProxySchemeUnknown:
                raise GrabMisuseError('Urllib3 transport does '
                                      'not support %s proxies' %
                                      req.proxy_type)
        else:
            pool = self.pool
        try:
            retry = Retry(redirect=False, connect=False, read=False)
            # The read timeout is not total response time timeout
            # It is the timeout on read of next data chunk from the server
            # Total response timeout is handled by Grab
            timeout = Timeout(connect=req.connect_timeout, read=req.timeout)
            #req_headers = dict((make_unicode(x), make_unicode(y))
            #                   for (x, y) in req.headers.items())
            if six.PY3:
                req_url = make_unicode(req.url)
                req_method = make_unicode(req.method)
            else:
                req_url = make_str(req.url)
                req_method = req.method
            req.op_started = time.time()
            res = pool.urlopen(req_method,
                               req_url,
                               body=req.data,
                               timeout=timeout,
                               retries=retry,
                               headers=req.headers,
                               preload_content=False)
        except exceptions.ReadTimeoutError as ex:
            raise error.GrabTimeoutError('Read timeout')
        except exceptions.ConnectTimeoutError as ex:
            raise error.GrabConnectionError('Could not create connection')
        except exceptions.ProtocolError as ex:
            raise error.GrabConnectionError(ex.args[1][0], ex.args[1][1])

        # WTF?
        self.request_head = b''
        self.request_body = b''
        self.request_log = b''

        self._response = res
Ejemplo n.º 16
0
def get_http_connector(conf, options):
    """
    Used to create http connector, depends on api_proxy configuration parameter

    :param conf: configuration object
    :param options: additional options

    :return: ProxyManager if api_proxy is set, otherwise PoolManager object
    """
    if conf.api_proxy:
        return ProxyManager(conf.api_proxy, **options)
    else:
        return PoolManager(**options)
Ejemplo n.º 17
0
def run_proxy(url):
	global lock
	#print(lock.locked())
	if(prox == ''):
		print('No proxys available.')
		return run(url)	
	print('Proxy: ' + prox)
	http = ProxyManager(prox)	
	try:
		data = {'attribute': 'value'}
		encoded_data = json.dumps(data).encode('utf-8')
		req = http.request(
		'POST',
		url,
		timeout = 3,
		body=encoded_data,
		headers={'Content-Type': 'html/text'})
		print(req.status)
		if(req.status == 404):
			print('Item Does not exist.')
			#return run(url)
			return
		if(req.status == 501):
			print('Proxy at api call limit')
			get_new_proxy()
			return run_proxy(url)
		if(req.status == 407):
			print('Authentication required')
			get_new_proxy()
			return run_proxy(url)
		if(req.status != 200):
			print('Unknown Status Code')
			print(req.status)
			get_new_proxy()
			return run_proxy(url)
	except:
		print('Request timed out.')
		get_new_proxy()
		return run(url)
	
	
	data = json.loads(req.data)
	req.release_conn()
	
	data = data['item']
	id = str(data['id'])
	print('ID: ' + id)
	file = open('ItemIds','a')
	file.write(id  + '\n')
	file.close()
    def get_uids(self, term):

        base_url = "https://www.ncbi.nlm.nih.gov/medgen/?term="
        term = term.replace(" ", "+")
        final_url = base_url + term
        http = urllib3.PoolManager()
        http = ProxyManager("http://proxy.gtm.lilly.com:9000/")
        response = http.request('GET', final_url)
        soup = BeautifulSoup(response.data, 'lxml')

        pattern = "<dd>[0-9]*</dd>"
        p = re.compile(pattern)
        ids = p.findall(str(soup))
        ids = [
            id.replace("<dd>", "").replace("</dd>", "").strip() for id in ids
        ]
        return ids
Ejemplo n.º 19
0
    def urllib3_from_pool(self, http_request):
        """
        Get a u3 pool from url and request
        :param http_request: HttpRequest
        :type http_request: HttpRequest
        :return urllib3.poolmanager.ProxyManager
        :rtype urllib3.poolmanager.ProxyManager
        """

        if not http_request.http_proxy_host:
            SolBase.sleep(0)
            return self._u3_basic_pool

        # Compute key
        key = "{0}#{1}#".format(
            http_request.http_proxy_host,
            http_request.http_proxy_port,
        )

        # Check
        if key in self._u3_proxy_pool:
            SolBase.sleep(0)
            return self._u3_proxy_pool[key]

        # Allocate (in lock)
        with self._u3_proxy_locker:
            # Check maxed
            if len(self._u3_proxy_pool) >= self._u3_proxy_pool_max:
                raise Exception("u3 pool maxed, cur={0}, max={1}".format(
                    len(self._u3_proxy_pool), self._u3_proxy_pool_max
                ))

            # Uri
            proxy_url = "http://{0}:{1}".format(
                http_request.http_proxy_host,
                http_request.http_proxy_port)

            # Ok, allocate
            # Force underlying fifo queue to 1024 via maxsize
            p = ProxyManager(num_pools=1024, maxsize=1024, proxy_url=proxy_url)
            self._u3_proxy_pool[key] = p
            logger.info("Started new pool for key=%s", key)
            SolBase.sleep(0)
            return p
    def get_uids(self, term):

        # Base Query and More Proxy Management #
        base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
        term = self.preprocess(term).replace(" ", "+")
        second_url = "esearch.fcgi?db={db}&term={query}&retmax=100&format=json"
        final_url = base_url + second_url.format(db=self.ontology, query=term)
        http = urllib3.PoolManager()
        http = ProxyManager("http://proxy.gtm.lilly.com:9000/")
        t.sleep(1)

        # Response data #
        response = http.request('GET', final_url)
        json_data = json.loads(response.data)

        # Updates number of search results #
        self.get_counts(int(json_data['esearchresult']['count']))

        # Returns ID List #
        return json_data['esearchresult']['idlist']
Ejemplo n.º 21
0
    def request(self):
        req = self._request

        if req.proxy:
            if req.proxy_userpwd:
                auth = '%s@' % req.proxy_userpwd
            else:
                auth = ''
            proxy_url = '%s://%s%s' % (req.proxy_type, auth, req.proxy)
            pool = ProxyManager(proxy_url)
        else:
            pool = self.pool
        try:
            retry = Retry(redirect=False, connect=False, read=False)
            timeout = Timeout(connect=req.connect_timeout, read=req.timeout)
            #req_headers = dict((make_unicode(x), make_unicode(y))
            #                   for (x, y) in req.headers.items())
            if six.PY3:
                req_url = make_unicode(req.url)
                req_method = make_unicode(req.method)
            else:
                req_url = make_str(req.url)
                req_method = req.method
            res = pool.urlopen(req_method,
                               req_url,
                               body=req.data,
                               timeout=timeout,
                               retries=retry,
                               headers=req.headers,
                               preload_content=False)
        except exceptions.ConnectTimeoutError as ex:
            raise error.GrabConnectionError('Could not create connection')
        except exceptions.ProtocolError as ex:
            raise error.GrabConnectionError(ex.args[1][0], ex.args[1][1])

        # WTF?
        self.request_head = ''
        self.request_body = ''
        self.request_log = ''

        self._response = res
Ejemplo n.º 22
0
    def __init_connection(self, url=None, proxy=False):
        """Function for initiating connection with remote server

        :param url: The URL of the remote system
        :type url: str

        """

        self.__url = url if url else self.__url
        if self.get_proxy() and proxy:
            if self.get_proxy().startswith('socks'):
                LOGGER.info("Initializing a SOCKS proxy.")
                http = SOCKSProxyManager(self.get_proxy(), cert_reqs='CERT_NONE')
            else:
                LOGGER.info("Initializing a HTTP proxy.")
                http = ProxyManager(self.get_proxy(), cert_reqs='CERT_NONE')
        else:
            LOGGER.info("Initializing no proxy.")
            http = urllib3.PoolManager(cert_reqs='CERT_NONE')

        self._conn = http.request
Ejemplo n.º 23
0
    def __init__(self, configuration: Configuration, pools_size: int = 4):
        self.configuration = configuration

        self.header_params: dict = {}
        self.user_agent = 'felix-scholz/website-python-client/1.0.1/python'

        addition_pool_args = {}
        if configuration.assert_hostname is not None:
            addition_pool_args[
                'assert_hostname'] = configuration.assert_hostname

        if configuration.retries is not None:
            addition_pool_args['retries'] = configuration.retries

        if configuration.proxy:
            self.pool_manager = ProxyManager(
                num_pools=pools_size,
                maxsize=configuration.connection_pool_maxsize
                if not None else 4,
                cert_reqs=ssl.CERT_REQUIRED
                if configuration.verify_ssl else ssl.CERT_NONE,
                ca_certs=configuration.ssl_ca_cert
                if configuration.ssl_ca_cert is not None else certifi.where(),
                cert_file=configuration.cert_file,
                key_file=configuration.key_file,
                proxy_url=configuration.proxy,
                proxy_headers=configuration.proxy_headers,
                **addition_pool_args)
        else:
            self.pool_manager = PoolManager(
                num_pools=pools_size,
                maxsize=configuration.connection_pool_maxsize
                if not None else 4,
                cert_reqs=ssl.CERT_REQUIRED
                if configuration.verify_ssl else ssl.CERT_NONE,
                ca_certs=configuration.ssl_ca_cert
                if configuration.ssl_ca_cert is not None else certifi.where(),
                cert_file=configuration.cert_file,
                key_file=configuration.key_file,
                **addition_pool_args)
Ejemplo n.º 24
0
    def _init_connection(self):
        """Function for initiating connection with remote server"""
        if self._connection_properties.get('ca_certs', None):
            LOGGER.info('Using CA cert to confirm identity.')
            cert_reqs = 'CERT_REQUIRED'
        else:
            LOGGER.info('Not using CA certificate.')
            cert_reqs = 'CERT_NONE'
        if self.proxy:
            if self.proxy.startswith('socks'):
                LOGGER.info("Initializing a SOCKS proxy.")
                http = SOCKSProxyManager(self.proxy, cert_reqs=cert_reqs, maxsize=6, \
                                                                    **self._connection_properties)
            else:
                LOGGER.info("Initializing a HTTP proxy.")
                http = ProxyManager(self.proxy, cert_reqs=cert_reqs, maxsize=6, \
                                    **self._connection_properties)
        else:
            LOGGER.info("Initializing no proxy.")
            http = PoolManager(cert_reqs=cert_reqs,
                               maxsize=6,
                               **self._connection_properties)

        self._conn = http.request
    def get_terms(self,
                  term,
                  id,
                  id_string,
                  number_of_results,
                  is_match=False):

        # Make API call to get xml data #
        term = self.lemmatize(self.preprocess(term))

        # Proxy Code and Base Query #
        base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
        second_url = "esummary.fcgi?db=medgen&db=medgen&{query}"
        final_url = base_url + second_url.format(db=self.ontology,
                                                 query="id=" + id_string)
        http = urllib3.PoolManager()
        http = ProxyManager("http://proxy.gtm.lilly.com:9000/")
        t.sleep(1)
        response = http.request('GET', final_url)
        soup = BeautifulSoup(response.data, 'lxml')

        # Get the separate hits in lists #
        hits = soup.find_all('documentsummary')

        # Dictionary to store the results #
        results = []

        # Set threshold, take the min of the threshold requested and the total number of search results #
        threshold = min(self.threshold, number_of_results)

        # For every hit (each hit represents data from ONE UID) #
        for hit in hits:

            # Keeps track of meeting the threshold #
            counter = 0

            # Check if return is a disease #
            check = "Blank" if not len(hit.find("semanticid")) else hit.find(
                "semanticid").text.strip()

            # List of acceptable semantic types #
            semantic_types = [
                'T191', 'T047', 'T048', 'T019', 'T190', 'T033', 'T049', 'T046',
                'T184', "Blank"
            ]

            # If term is a disease, execute the following: #
            if check in semantic_types:

                # Get Concept ID #
                concept_id = "Blank" if not len(hit.find(
                    'conceptid')) else hit.find('conceptid').text.strip()

                # Get Title #
                title = hit.find('title').text.strip()

                # Get name tags for looping #
                name_tags = hit.find_all('name')

                # Get definition/description #
                definition = hit.find('definition').text.strip()
                def_score = self.modified_jaccard_similarity(term, definition)

                # Get SAB, CODE, SCUI, SDUI, and Title #
                processed_term = self.stem(term)
                new_title = self.stem(self.lemmatize(self.preprocess(title)))

                # Keeps track of best scores for each uid #
                scores = []

                # Loop through synonyms #
                for data in name_tags:

                    # Get the max syn_score between a synonym and the title #
                    new_text = self.stem(
                        self.lemmatize(self.preprocess(data.text)))
                    syn_score = max(fuzz.ratio(new_text, processed_term),
                                    fuzz.ratio(processed_term, new_title))
                    syn_score = max(
                        fuzz.ratio(new_text, processed_term),
                        fuzz.ratio(processed_term, new_title)
                    ) if len(new_text.split()) == 1 and len(
                        new_title.split()) == 1 and len(processed_term.split(
                        )) == 1 else self.jaccard_similarity(
                            new_text, processed_term)

                    # If score is 100 or the term is one word, take the syn_score #
                    score = syn_score if len(
                        term.split()) == 1 or syn_score == 100 else max(
                            syn_score, def_score)

                    # Intialize dictionary to add to results #
                    value = dict()
                    code, sab, scui, sdui = None, None, None, None
                    index = hits.index(hit)

                    # Add Basic Data MetaData to Dictionary #
                    value['Disease_Input'] = term
                    value['Ontology'] = self.ontology
                    value['Synonym'] = data.text
                    value['Description'] = definition
                    value['Semantic_Type'] = check
                    value['UID'] = id[index]
                    value['Ontology_ID'] = concept_id
                    value['Final_Score'] = syn_score + def_score
                    value['Synonym_Score'] = syn_score
                    value['Description_Score'] = def_score
                    value['Title'] = title
                    value['Number_of_Results'] = number_of_results
                    value['Holder'] = score

                    # Add extra metadata that may throw errors and add to dictionary #
                    try:
                        code = data['code']
                        value['CODE'] = code
                    except:
                        value['CODE'] = np.nan
                    try:
                        sab = data['sab']
                        value['SAB'] = sab
                    except:
                        value['SAB'] = np.nan
                    try:
                        scui = data['scui']
                        value['SCUI'] = scui
                    except:
                        value['SCUI'] = np.nan
                    try:
                        sdui = data['sdui']
                        value['SDUI'] = sdui
                    except:
                        value['SDUI'] = np.nan

                    scores.append(value)

                # This code takes scores, (as it has metadata for only ONE uid) and finds the best match #
                # Get the best score, if scores has results (it maybe empty) #
                if scores:

                    # Gets the dictionary with the highest score and it's corresponding data #
                    best_score_data = max(scores,
                                          key=lambda x: x['Final_Score'])
                    best_score = best_score_data['Holder']
                    results.append(best_score_data)

                    # If best score is greater than or equal to the threshold, increase counter (a step closer to threshold) #
                    if best_score >= self.score_threshold or threshold == 1:
                        counter += 1

                    # If threshold is met, then return results #
                    if counter == threshold:
                        return results

        return results
Ejemplo n.º 26
0
def main():
    parser = argparse.ArgumentParser(
        description="Uploads images/documents to GitHub as issue attachments.\n"
        "See https://github.com/zmwangx/ghuc for detailed documentation.")
    parser.add_argument(
        "-r",
        "--repository-id",
        type=int,
        default=1,
        help="id of repository to upload from (defaults to 1)",
    )
    parser.add_argument("-x", "--proxy", help="HTTP or SOCKS proxy")
    parser.add_argument("-q",
                        "--quiet",
                        action="store_true",
                        help="set logging level to ERROR")
    parser.add_argument("--debug",
                        action="store_true",
                        help="set logging level to DEBUG")
    parser.add_argument(
        "--gui",
        action="store_true",
        help=
        "disable headless mode when running browser sessions through Selenium WebDriver",
    )
    parser.add_argument(
        "--container",
        action="store_true",
        help="add extra browser options to work around problems in containers",
    )
    parser.add_argument("--version", action="version", version=__version__)
    parser.add_argument("paths", type=pathlib.Path, nargs="+", metavar="PATH")
    args = parser.parse_args()

    if args.debug:
        custom_level = logging.DEBUG
    elif args.quiet:
        custom_level = logging.ERROR
    else:
        custom_level = None
    if custom_level is not None:
        logger.setLevel(custom_level)
        logger.handlers[0].setLevel(custom_level)

    global repository_id
    global proxy
    global headless
    global container

    repository_id = args.repository_id
    proxy = args.proxy or os.getenv("https_proxy")
    if proxy and not re.match(r"^(https?|socks(4a?|5h?))://", proxy):
        proxy = "http://%s" % proxy
    if proxy:
        logger.debug("using proxy %s", proxy)
    headless = not args.gui
    container = args.container

    common_http_options = dict(cert_reqs="CERT_REQUIRED", timeout=3.0)
    if not proxy:
        http_client = PoolManager(**common_http_options)
    elif proxy.startswith("http"):
        http_client = ProxyManager(proxy, **common_http_options)
    elif proxy.startswith("socks"):
        if SOCKSProxyManager:
            http_client = SOCKSProxyManager(proxy, **common_http_options)
        else:
            logger.critical(
                "your urllib3 installation does not support SOCKS proxies")
            sys.exit(1)
    else:
        logger.critical("unrecognized proxy type %s", proxy)
        sys.exit(1)

    try:
        load_cookie_and_token()
        count = len(args.paths)
        num_errors = 0
        for path in args.paths:
            try:
                upload_asset(http_client, path)
            except UploadError:
                num_errors += 1
        if count > 1 and num_errors > 0:
            logger.warning("%d failed uploads", num_errors)
        sys.exit(0 if num_errors == 0 else 1)
    except ExtractionError:
        logger.critical("aborting due to inability to extract credentials")
        sys.exit(1)
    def get_terms(self, term, id, number_of_results):

        # Make API call to get json_data #
        term = self.lemmatize(self.preprocess(term))

        # It stores a given score result that will be added to scores, then to results #
        json_dict = dict()

        # Base Query and More Proxy Management #
        base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
        second_url = "esummary.fcgi?db=mesh&db=mesh&{query}&format=json"
        final_url = base_url + second_url.format(db=self.ontology,
                                                 query="id=" + id)
        http = urllib3.PoolManager()
        http = ProxyManager("http://proxy.gtm.lilly.com:9000/")
        t.sleep(1)

        # Response data #
        response = http.request('GET', final_url)
        json_data = json.loads(response.data)
        uids = json_data['result']['uids']

        # Holds a list of dictionaries, will be converted to dataframe #
        results = []

        # Take the minimum of what the threshold is, versus the number of search hits #
        threshold = min(self.threshold, number_of_results)

        # Loop through each uid in the uids list #
        for uid in uids:

            # Keeps track of uids that score at or above the scoring requirement, used for pruning #
            counter = 0

            # This represents json data from the UID that is CURRENTLY being looped through #
            json_section = json_data['result'][uid]

            # Check if ID is a disease #
            check_id = self.filter_by_disease(id, json_section)

            # If the search term is a disease... #
            if check_id:

                # Pure extracted data from json file before processing #
                scope_note = json_section["ds_scopenote"]
                mesh_id = json_section["ds_meshui"]
                mesh_terms = json_section["ds_meshterms"]

                # Intitialize score variables #
                score = None
                syn_score = None
                processed_term = self.stem(term)
                def_score = self.modified_jaccard_similarity(term, scope_note)

                # Keeps track of best scores for each uid #
                scores = []

                # If there's only one search result, take it (regardless of score), and return it #
                # Adding it to just the scores list is fine since it's the only output #
                if threshold == 1:
                    processed_mesh_term = self.stem(
                        self.lemmatize(self.preprocess(mesh_terms[0])))
                    syn_score = fuzz.ratio(
                        processed_mesh_term, processed_term
                    ) if len(processed_term.split()) == 1 and len(
                        processed_mesh_term) == 1 else self.jaccard_similarity(
                            processed_mesh_term, processed_term)
                    score = max(syn_score, def_score)
                    json_dict = {
                        'Ontology': self.ontology,
                        'UID': uid,
                        'Ontology_ID': mesh_id,
                        'Disease_Input': term,
                        "Synonym": mesh_terms[0],
                        "Description": scope_note,
                        'Number_of_Results': number_of_results,
                        'Synonym_Score': syn_score,
                        'Description_Score': def_score,
                        'Final_Score': syn_score + def_score,
                        'Holder': score
                    }
                    scores.append(json_dict)
                    return scores

                else:

                    # Loop through each synonym in mesh_terms for scoring #
                    for mesh_term in mesh_terms:

                        # Prepare synonymn for levenstein distance matching (through fuzzy library) #
                        processed_mesh_term = self.stem(
                            self.lemmatize(self.preprocess(mesh_term)))
                        syn_score = fuzz.ratio(
                            processed_mesh_term, processed_term) if len(
                                processed_term.split()) == 1 and len(
                                    processed_mesh_term
                                ) == 1 else self.jaccard_similarity(
                                    processed_mesh_term, processed_term)

                        # If term is only one word, just take the syn_score as its final score, otherwise take the max #
                        score = syn_score if len(term.split()) == 1 else max(
                            syn_score, def_score)

                        # If the score is >= 60, add it to the scores list #
                        json_dict = {
                            'Ontology': self.ontology,
                            'UID': uid,
                            'Ontology_ID': mesh_id,
                            'Disease_Input': term,
                            "Synonym": mesh_term,
                            "Description": scope_note,
                            'Number_of_Results': number_of_results,
                            'Synonym_Score': syn_score,
                            'Description_Score': def_score,
                            'Final_Score': syn_score + def_score,
                            'Holder': score
                        }
                        scores.append(json_dict)

                # This code takes scores, (as it has metadata for only ONE uid) and finds the best match #
                # Get the best score, if scores has results (it maybe empty) #
                if scores:

                    # Gets the dictionary with the highest score and it's corresponding data #
                    best_score_data = max(scores,
                                          key=lambda x: x['Final_Score'])
                    best_score = best_score_data['Holder']
                    results.append(best_score_data)

                    # If best score is greater than or equal to the threshold, increase counter (a step closer to threshold) #
                    if best_score >= self.score_threshold or threshold == 1:
                        counter += 1

                    # If threshold is met, then return results #
                    if counter == threshold:
                        return results

        return results
Ejemplo n.º 28
0
from tempfile import TemporaryDirectory

from dulwich import porcelain
from dulwich.repo import Repo
from urllib3 import ProxyManager

## Configuration settings:
# Source url of the repo (Note: https here. ssh would work as well, a bit differently).
GITURL = "https://github.com/lomignet/thisdataguy_snippets"
# Gihthub token: https://docs.github.com/en/github/authenticating-to-github/creating-a-personal-access-token
TOKEN = "12345blah"
## /end of configuration.

# If the environment variable https_proxy exists, we need to tell Dulwich to use a proxy.
if environ.get("https_proxy", None):
    pool_manager = ProxyManager(environ["https_proxy"], num_pools=1)
else:
    pool_manager = None

with TemporaryDirectory() as gitrootdir:

    # Gotta love operator overloading!
    gitdir = Path(gitrootdir) / "repo"
    print("Cloning...")
    repo = porcelain.clone(
        GITURL,
        password=TOKEN,
        # Tokens are kinda public keys, no need for a username but it still needs to be provided for Dulwich.
        username="******",
        target=gitdir,
        checkout=True,
Ejemplo n.º 29
0
    def request(self):
        req = self._request

        if req.proxy:
            if req.proxy_userpwd:
                headers = make_headers(proxy_basic_auth=req.proxy_userpwd)
            else:
                headers = None
            proxy_url = '%s://%s' % (req.proxy_type, req.proxy)
            if req.proxy_type == 'socks5':
                pool = SOCKSProxyManager(
                    proxy_url,
                    cert_reqs='CERT_REQUIRED',
                    ca_certs=certifi.where())  # , proxy_headers=headers)
            else:
                pool = ProxyManager(proxy_url,
                                    proxy_headers=headers,
                                    cert_reqs='CERT_REQUIRED',
                                    ca_certs=certifi.where())
        else:
            pool = self.pool
        with self.wrap_transport_error():
            # Retries can be disabled by passing False:
            # http://urllib3.readthedocs.io/en/latest/reference/urllib3.util.html#module-urllib3.util.retry
            # Do not use False because of warning:
            # Converted retries value: False -> Retry(total=False,
            # connect=None, read=None, redirect=0, status=None)
            retry = Retry(
                total=False,
                connect=False,
                read=False,
                redirect=0,
                status=None,
            )
            # The read timeout is not total response time timeout
            # It is the timeout on read of next data chunk from the server
            # Total response timeout is handled by Grab
            timeout = Timeout(connect=req.connect_timeout, read=req.timeout)
            #req_headers = dict((make_unicode(x), make_unicode(y))
            #                   for (x, y) in req.headers.items())
            if six.PY3:
                req_url = make_unicode(req.url)
                req_method = make_unicode(req.method)
            else:
                req_url = make_str(req.url)
                req_method = req.method
            req.op_started = time.time()
            try:
                res = pool.urlopen(req_method,
                                   req_url,
                                   body=req.data,
                                   timeout=timeout,
                                   retries=retry,
                                   headers=req.headers,
                                   preload_content=False)
            except UnicodeError as ex:
                raise error.GrabConnectionError('GrabInvalidUrl', ex)
        #except exceptions.ReadTimeoutError as ex:
        #    raise error.GrabTimeoutError('ReadTimeoutError', ex)
        #except exceptions.ConnectTimeoutError as ex:
        #    raise error.GrabConnectionError('ConnectTimeoutError', ex)
        #except exceptions.ProtocolError as ex:
        #    # TODO:
        #    # the code
        #    # raise error.GrabConnectionError(ex.args[1][0], ex.args[1][1])
        #    # fails
        #    # with error TypeError: 'OSError' object is not subscriptable
        #    raise error.GrabConnectionError('ProtocolError', ex)
        #except exceptions.SSLError as ex:
        #    raise error.GrabConnectionError('SSLError', ex)

        # WTF?
        self.request_head = b''
        self.request_body = b''
        self.request_log = b''

        self._response = res
Ejemplo n.º 30
0
    def update_db(self, year):
        filename = CVE_FEED_FILENAME.replace('$$$$', year) + '.json'
        file_path = path.join(CACHE_PATH, filename)
        meta_filename = CVE_FEED_FILENAME.replace('$$$$', year) + '.meta'
        meta_file_path = path.join(CACHE_PATH, year + '.meta')

        if environ.get('http_proxy') is not None:
            http = ProxyManager(environ.get('http_proxy'), maxsize=10)
        else:
            http = PoolManager()
        disable_warnings(urllib3_exceptions.InsecureRequestWarning)
        r = None
        meta = None
        try:
            r = http.request('GET',
                             CVE_FEED_URL + meta_filename,
                             preload_content=False)
        except Exception as e:
            print("[!] Error obtaining CVE meta data: " + str(e))

        if path.isfile(meta_file_path):
            with open(meta_file_path, 'r') as myfile:
                meta = myfile.read()
            if r is not None and meta is not None and r.data.decode(
                    'utf-8').replace('\r', '') == meta:
                return

        else:
            if r is not None:
                with open(meta_file_path, 'wb') as out_file:
                    copyfileobj(r, out_file)

        try:
            with http.request('GET',
                              CVE_FEED_URL + filename + '.zip',
                              preload_content=False) as r, open(
                                  file_path + '.zip', 'wb') as out_file:
                copyfileobj(r, out_file)
        except Exception as e:
            print("[!] Error downloading CVE feed: " + str(e))
            return
        try:
            archive = ZipFile(file_path + '.zip', 'r')
            xml_data = archive.extract(filename, CACHE_PATH)
        except Exception as e:
            print("[!] Error extracting the CVE archive: " + str(e))
            return

        cve_cache = []
        actions = []
        count = 0

        with open(file_path, encoding='utf-8') as data_file:
            data = json.loads(data_file.read())["CVE_Items"]
        for i in data:
            item = cve_item()
            item.id = i["cve"]["CVE_data_meta"]["ID"]
            for j in i['cve']['references']['reference_data']:
                item.references.append(j)
            item.summary = i['cve']['description']['description_data'][0][
                "value"]
            for j in i['configurations']['nodes']:
                if 'cpe' in j:
                    for k in j['cpe']:
                        item.affected.append({
                            "vuln":
                            k['vulnerable'],
                            "cpe22":
                            k['cpe22Uri'],
                            "cpe23":
                            k['cpe23Uri'],
                            "vStartE":
                            k.get('versionStartExcluding', ''),
                            "vStartI":
                            k.get('versionStartIncluding', ''),
                            "vEndE":
                            k.get('versionEndExcluding', ''),
                            "vEndI":
                            k.get('versionEndIncluding', '')
                        })
                elif 'children' in j:
                    for t in j['children']:
                        if 'cpe' in t:
                            for k in t['cpe']:
                                item.affected.append({
                                    "vuln":
                                    k['vulnerable'],
                                    "cpe22":
                                    k['cpe22Uri'],
                                    "cpe23":
                                    k['cpe23Uri'],
                                    "vStartE":
                                    k.get('versionStartExcluding', ''),
                                    "vStartI":
                                    k.get('versionStartIncluding', ''),
                                    "vEndE":
                                    k.get('versionEndExcluding', ''),
                                    "vEndI":
                                    k.get('versionEndIncluding', '')
                                })
            if 'baseMetricV3' in i['impact']:
                item.cvss['vector_string_v3'] = i['impact']['baseMetricV3'][
                    'cvssV3']['vectorString']
                item.cvss['score_v3'] = i['impact']['baseMetricV3']['cvssV3'][
                    'baseScore']
            if 'baseMetricV2' in i['impact']:
                item.cvss['vector_string_v2'] = i['impact']['baseMetricV2'][
                    'cvssV2']['vectorString']
                item.cvss['score_v2'] = i['impact']['baseMetricV2']['cvssV2'][
                    'baseScore']
            item.published = i['publishedDate']
            item.last_modified = i['lastModifiedDate']
            cve_cache.append(item)
            if USE_ELASTIC_SEARCH:
                actions.append({
                    "_index": "cve-" + year,
                    "_type": "vulns",
                    "_source": {
                        'cve_id': item.id,
                        'summary': item.summary,
                        'published': item.published,
                        'last_modified': item.last_modified,
                        'score_v3': item.cvss.get('score_v3', 0),
                        'score_v2': item.cvss.get('score_v2', 0),
                        'vector_string_v2':
                        item.cvss.get('vector_string_v2', 'NA'),
                        'vector_string_v3':
                        item.cvss.get('vector_string_v3', 'NA'),
                        'affected': item.affected,
                        'cache-index': count,
                    }
                })
                count = count + 1

        if USE_ELASTIC_SEARCH is True:
            try:
                if self.es.indices.exists(index="cve-" + year):
                    self.es.indices.delete(index='cve-' + year,
                                           ignore=[400, 404],
                                           request_timeout=60)
                mappings = {
                    "mappings": {
                        "vulns": {
                            "properties": {
                                "cve_id": {
                                    "type": "keyword"
                                },
                                "score_v2": {
                                    "type": "float"
                                },
                                "score_v3": {
                                    "type": "float"
                                },
                                "affected": {
                                    "type": "nested",
                                    "properties": {
                                        "cpe22": {
                                            "type": "keyword"
                                        },
                                        "cpe23": {
                                            "type": "keyword"
                                        },
                                        "vStartE": {
                                            "type": "keyword"
                                        },
                                        "vStartI": {
                                            "type": "keyword"
                                        },
                                        "vEndE": {
                                            "type": "keyword"
                                        },
                                        "vEndI": {
                                            "type": "keyword"
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                self.es.indices.create(index="cve-" + year,
                                       ignore=400,
                                       body=mappings)
                self.helpers.bulk(self.es, actions, request_timeout=60)
            except Exception as e:
                print("[!] Elasticsearch indexing error: " + str(e))

        try:
            dump(cve_cache, open(path.join(CACHE_PATH, year + '.db'), "wb"),
                 HIGHEST_PROTOCOL)
            remove(file_path + '.zip')
            remove(file_path)
        except PickleError as e:
            print("[!] Error while caching CVE data: " + str(e))