def perform_request(self, method, url, params=None, body=None, timeout=None, ignore=(), headers=None): start = time.time() data = body or '' try: response = self.make_request(method, url, headers=headers, host=self.host, data=data, auth_path=url) except Exception as e: self.log_request_fail(method, url, url, body, time.time() - start, exception=e) if isinstance(e, BotoClientError): raise ConnectionError('N/A', str(e), e) return response.status, dict( response.msg.items()), response.read().decode('utf-8')
async def test_sniff_on_fail_triggers_sniffing_on_fail(self): t = AsyncTransport( [{ "exception": ConnectionError("abandon ship") }, { "data": CLUSTER_NODES }], connection_class=DummyConnection, sniff_on_connection_fail=True, max_retries=0, randomize_hosts=False, ) await t._async_call() connection_error = False try: await t.perform_request("GET", "/") except ConnectionError: connection_error = True await t.sniffing_task # Need to wait for the sniffing task to complete assert connection_error assert 1 == len(t.connection_pool.connections) assert "http://1.1.1.1:123" == t.get_connection().host
class CatchConnectionError(ElasticsearchBaseTestCase): """ Tests methods for the catch_connection_error decorator. """ def test_can_connect(self): """ Tests the catch_connection_error decorator when a connection is established. """ with LogCapture() as log_capture: catch_connection_error(self.engine.insert({'foo': 'bar'})) log_capture.check() @patch('engines.elasticsearch.engine.ELASTICSEARCH.index', side_effect=ConnectionError()) def test_cannot_connect(self, mock_index): """ Tests the catch_connection_error decorator when a connection is established. """ @catch_connection_error def test_decorator(): """Test the catch_connection_error decorator.""" self.engine.insert({'foo': 'bar'}) with LogCapture() as log_capture: test_decorator() expected = 'Cannot connect to Elasticsearch' log_capture.check( ('engines.elasticsearch.engine', 'ERROR', expected), )
def _try_load_queries(self, wait=True, timeout=1.0): added = 0 while len(self.handles) < self.max_active: try: if wait: handler = yield from wait_for(self.waiting_handles.get(), timeout, loop=self.loop) else: handler = self.waiting_handles.get_nowait() # only wait once wait = False # needed to keep reference count self.handles.add(handler) self.multi.add_handle(handler) added += 1 except QueueEmpty: break except TimeoutError: break if added > 0: ret, num_handles = self._perform_loop() if ret > 0: raise ConnectionError("pycurl failed", ret)
def perform_request(self, method, url, params=None, body=None, timeout=None, ignore=()): url_path = url if params: url_path = '%s?%s' % (url, urlencode(params or {})) url = self.base_url + url_path start = self.loop.time() response = None try: with aiohttp.Timeout(timeout or self.timeout): response = yield from self.session.request(method, url, data=body) raw_data = yield from response.text() duration = self.loop.time() - start except Exception as e: self.log_request_fail(method, url, url_path, body, self.loop.time() - start, exception=e) if isinstance(e, asyncio.TimeoutError): raise ConnectionTimeout('TIMEOUT', str(e), e) raise ConnectionError('N/A', str(e), e) finally: if response is not None: yield from response.release() # raise errors based on http status codes, let the client handle those if needed if not (200 <= response.status < 300) and response.status not in ignore: self.log_request_fail(method, url, url_path, body, duration, status_code=response.status, response=raw_data) self._raise_error(response.status, raw_data) self.log_request_success(method, url, url_path, body, response.status, raw_data, duration) return response.status, response.headers, raw_data
def test_sniff_on_fail_triggers_sniffing_on_fail(self): t = Transport([{'exception': ConnectionError('abandon ship')}, {"data": CLUSTER_NODES}], connection_class=DummyConnection, sniff_on_connection_fail=True, max_retries=0, randomize_hosts=False) self.assertRaises(ConnectionError, t.perform_request, 'GET', '/') self.assertEquals(1, len(t.connection_pool.connections)) self.assertEquals('http://1.1.1.1:123', t.get_connection().host)
def __init__(self, host, port, db_config): self.es = Elasticsearch([{"host": host, "port": port}]) try: if self.es.ping(): es_logger = logging.getLogger('elasticsearch') es_logger.setLevel(logging.CRITICAL) self.indices_client = IndicesClient(self.es) index_definitions = db_config["index_definitions"] self.settings = db_config["settings"] self.data_point_definition = index_definitions["data_point"] self.create_index_from_definition(self.data_point_definition, self.settings) self.data_point_type_name = self.data_point_definition["name"] self.data_point_index = self.data_point_definition[ "index_name"] self.definitions.append(self.data_point_definition) self.experiment_definition = index_definitions["experiment"] self.create_index_from_definition(self.experiment_definition, self.settings) self.experiment_type_name = self.experiment_definition["name"] self.experiment_index = self.experiment_definition[ "index_name"] self.definitions.append(self.experiment_definition) self.target_system_definition = index_definitions[ "target_system"] self.create_index_from_definition( self.target_system_definition, self.settings) self.target_system_type_name = self.target_system_definition[ "name"] self.target_system_index = self.target_system_definition[ "index_name"] self.definitions.append(self.target_system_definition) self.analysis_definition = index_definitions["analysis"] self.create_index_from_definition(self.analysis_definition, self.settings) self.analysis_type_name = self.analysis_definition["name"] self.analysis_index = self.analysis_definition["index_name"] self.definitions.append(self.analysis_definition) self.stage_definition = index_definitions["stage"] self.create_index_from_definition(self.stage_definition, self.settings) self.stage_type_name = self.stage_definition["name"] self.stage_index = self.stage_definition["index_name"] self.definitions.append(self.stage_definition) else: raise ConnectionError("Host/port values are not valid") except TransportError as err1: error( "TransportError while creating elasticsearch instance for experiments. Check type mappings in experiment_db_config.json." ) raise err1
def perform(self, timeout=0.1): """ Loop on waiting handles to process them until they are no more waiting one and all send are finished. It's never finished until closed for end of all processing, don't wait for it on loop :param timeout: the timeout for the loop :return: Nothing """ while self.running: if len(self.handles) == 0: # no activity, just sleep, for new queries yield from self._try_load_queries(True, timeout) else: yield from self._try_load_queries(False) if len(self.handles) == 0: continue # wait for something to happen selected = self.multi.select(timeout) if selected < 0: continue # it was not a select time out, something to do ret, num_handles = self._perform_loop() if ret > 0: raise ConnectionError("pycurl failed", ret) if len(self.handles) == 0: continue else: # some handles to process (waiting, succeded, failed) = self.multi.info_read() for handle in succeded: self.handles.remove(handle) status = handle.getinfo(pycurl.RESPONSE_CODE) self.multi.remove_handle(handle) content_type, decoded = decode_body(handle) if not self.running: # is stopped, just swallow content continue elif status >= 200 and status < 300: handle.cb(status, handle.headers, decoded) elif status >= 300: handle.f_cb( return_error( status, decoded, content_type, http_message=handle.headers.pop('__STATUS__'), url=handle.getinfo(pycurl.EFFECTIVE_URL))) for handle, code, message in failed: self.handles.remove(handle) self.multi.remove_handle(handle) if code == pycurl.E_OPERATION_TIMEDOUT: ex = ConnectionTimeout( code, message, handle.getinfo(pycurl.EFFECTIVE_URL), handle.getinfo(pycurl.TOTAL_TIME)) else: ex = PyCurlException( code, handle.errstr(), handle.getinfo(pycurl.EFFECTIVE_URL)) handle.f_cb(ex)
def check_host_reachable(self): try: alive = self.es.ping() if not alive: raise ConnectionError() except ConnectionError: logger.error("Elasticsearch server not reachable at {}. Abort.".format(self.es_full_host)) sys.exit(1)
def ping(self, token): s = self._health_check() if s is None or s['status'] == 'red': raise ConnectionError('ES Cluster Issue') if self.tokens.read(token) or self.tokens.write(token): return True
def test_failed_connection_will_be_marked_as_dead(self): t = Transport([{ 'exception': ConnectionError('abandon ship') }] * 2, connection_class=DummyConnection) self.assertRaises(ConnectionError, t.perform_request, 'GET', '/') self.assertEquals(0, len(t.connection_pool.connections))
def test_request_will_fail_after_X_retries(self): t = Transport( [{"exception": ConnectionError("abandon ship")}], connection_class=DummyConnection, ) self.assertRaises(ConnectionError, t.perform_request, "GET", "/") self.assertEquals(4, len(t.get_connection().calls))
def test_failed_connection_will_be_marked_as_dead(self): t = Transport( [{"exception": ConnectionError("abandon ship")}] * 2, connection_class=DummyConnection, ) self.assertRaises(ConnectionError, t.perform_request, "GET", "/") self.assertEquals(0, len(t.connection_pool.connections))
def test_request_will_fail_after_X_retries(self): t = Transport([{ 'exception': ConnectionError('abandon ship') }], connection_class=DummyConnection) self.assertRaises(ConnectionError, t.perform_request, 'GET', '/') self.assertEquals(4, len(t.get_connection().calls))
def test_opendistro_ping_failed(self): if self.driver_name != "odelasticsearch": return from elasticsearch.exceptions import ConnectionError with patch("elasticsearch.Elasticsearch.ping") as mock_ping: mock_ping.side_effect = ConnectionError() conn = self.engine.raw_connection() with self.assertRaises(DatabaseError): self.engine.dialect.do_ping(conn)
def perform_request(self, method, url, params=None, body=None, timeout=None, ignore=(), headers=None): url = self.base_url + url if params: url = '%s?%s' % (url, urlencode(params or {})) start = time.time() request = requests.Request(method=method, headers=headers, url=url, data=body) prepared_request = self.session.prepare_request(request) settings = self.session.merge_environment_settings( prepared_request.url, {}, None, None, None) send_kwargs = {'timeout': timeout or self.timeout} send_kwargs.update(settings) try: response = self.session.request(prepared_request.method, prepared_request.url, data=prepared_request.body, headers=prepared_request.headers, **send_kwargs) duration = time.time() - start raw_data = response.text except Exception as e: self.log_request_fail(method, url, prepared_request.path_url, body, time.time() - start, exception=e) if isinstance(e, requests.exceptions.SSLError): raise SSLError('N/A', str(e), e) if isinstance(e, requests.Timeout): raise ConnectionTimeout('TIMEOUT', str(e), e) raise ConnectionError('N/A', str(e), e) # raise errors based on http status codes, let the client handle those if needed if not (200 <= response.status_code < 300) and response.status_code not in ignore: self.log_request_fail(method, url, response.request.path_url, body, duration, response.status_code, raw_data) self._raise_error(response.status_code, raw_data) self.log_request_success(method, url, response.request.path_url, body, response.status_code, raw_data, duration) return response.status_code, response.headers, raw_data
def init_es(log, es_source, es_target, es_source_index, es_target_index): try: es_dict = {} es_dict.update({"es_source_client": Elasticsearch(hosts=[{"host": es_source}], maxsize=25, timeout=60, max_retries=2, retry_on_timeout=True), "es_target_client": Elasticsearch(hosts=[{"host": es_target}], maxsize=25, timeout=60, max_retries=2, retry_on_timeout=True), "source_index": es_source_index, "target_index": es_target_index}) # log.info("es initialised ...") return es_dict except Exception as err: log.error(err) raise ConnectionError(err)
def test_v1_status_es_unreachable(mock_es_health, mimir_es): mock_es_health.side_effect = ConnectionError("N/A", "Mocked connection error", None) client = TestClient(app) response = client.get("http://localhost/v1/status") assert response.status_code == 200 assert response.json() == { "es": { "reachable": False, "running": False }, "ready": False }
async def test_failed_connection_will_be_marked_as_dead(self): t = AsyncTransport( [{ "exception": ConnectionError("abandon ship") }] * 2, connection_class=DummyConnection, ) connection_error = False try: await t.perform_request("GET", "/") except ConnectionError: connection_error = True assert connection_error assert 0 == len(t.connection_pool.connections)
def test_sniff_on_fail_triggers_sniffing_on_fail(self): t = Transport( [{ "exception": ConnectionError("abandon ship") }, { "data": CLUSTER_NODES }], connection_class=DummyConnection, sniff_on_connection_fail=True, max_retries=0, randomize_hosts=False, ) self.assertRaises(ConnectionError, t.perform_request, "GET", "/") self.assertEqual(1, len(t.connection_pool.connections)) self.assertEqual("http://1.1.1.1:123", t.get_connection().host)
async def test_request_will_fail_after_X_retries(self): t = AsyncTransport( [{ "exception": ConnectionError("abandon ship") }], connection_class=DummyConnection, ) connection_error = False try: await t.perform_request("GET", "/") except ConnectionError: connection_error = True assert connection_error assert 4 == len(t.get_connection().calls)
def perform_request(self, method, url, params=None, body=None, timeout=None, ignore=(), headers=None): url_path = url if params: query_string = urlencode(params) else: query_string = "" # Provide correct URL object to avoid string parsing in low-level code url = yarl.URL.build(scheme=self.scheme, host=self.hostname, port=self.port, path=url, query_string=query_string, encoded=True) start = self.loop.time() response = None try: request_timeout = timeout or self.timeout.total with async_timeout.timeout(request_timeout, loop=self.loop): # override the default session timeout explicitly response = yield from self.session.request(method, url, data=body, headers=headers, timeout=request_timeout) raw_data = yield from response.text() duration = self.loop.time() - start except asyncio.CancelledError: raise except Exception as e: self.log_request_fail(method, url, url_path, body, self.loop.time() - start, exception=e) if isinstance(e, ServerFingerprintMismatch): raise SSLError('N/A', str(e), e) if isinstance(e, asyncio.TimeoutError): raise ConnectionTimeout('TIMEOUT', str(e), e) raise ConnectionError('N/A', str(e), e) finally: if response is not None: yield from response.release() # raise errors based on http status codes, let the client handle those if needed if not (200 <= response.status < 300) and response.status not in ignore: self.log_request_fail(method, url, url_path, body, duration, status_code=response.status, response=raw_data) self._raise_error(response.status, raw_data) self.log_request_success(method, url, url_path, body, response.status, raw_data, duration) return response.status, response.headers, raw_data
def SQLQuery(self, query): """ Runs a SQL Query, returning a dict per result with the fields required. :param query: SQL query :type query: string """ url_query = six.moves.urllib.parse.quote(query.encode('utf8')) uri = "http://%s:%s/_sql/_explain?sql=%s" % ( self.endpoint["host"], self.endpoint["port"], url_query) response = requests.get(uri) dsl_query = json.loads(response.text) if "error" in dsl_query: raise ConnectionError("Error in query: " + str(dsl_query["error"]["root_cause"])) dsl_query["body"] = {"query": dsl_query.pop("query")} dsl_query["from_"] = dsl_query.pop("from") dsl_query["_source_include"] = dsl_query["_source"]["includes"] dsl_query["_source_exclude"] = dsl_query["_source"]["excludes"] dsl_query.pop("_source") match = re.search(r"select.+?from[\s\"\']+([\w,]+)", query, flags=re.IGNORECASE) if match: table_name = match.group(1) else: table_name = "papers" dsl_query["index"] = index_equivalence[table_name]["index"] dsl_query["doc_type"] = index_equivalence[table_name]["type"] tmp_max = self.max_results ## self.max_results=dsl_query["size"] if "size" in dsl_query: del dsl_query["size"] results = self.unlimitedQuery(**dsl_query) self.max_results = tmp_max results = [r["_source"] for r in results] if len(dsl_query["_source_include"]) == 1: results = [r[dsl_query["_source_include"][0]] for r in results] return results
def __init__(self, host, port, db_config): self.es = Elasticsearch([{"host": host, "port": port}]) try: if self.es.ping(): index = db_config["index"] self.index = index["name"] stage_type = db_config["stage_type"] self.stage_type_name = stage_type["name"] analysis_type = db_config["analysis_type"] self.analysis_type_name = analysis_type["name"] data_point_type = db_config["data_point_type"] self.data_point_type_name = data_point_type["name"] target_system_type = db_config["target_system_type"] self.target_system_type_name = target_system_type["name"] experiment_system_type = db_config["experiment_type"] self.experiment_type_name = experiment_system_type["name"] mappings = dict() # user can specify an type without a mapping (dynamic mapping) if "mapping" in stage_type: mappings[self.stage_type_name] = stage_type["mapping"] if "mapping" in analysis_type: mappings[self.analysis_type_name] = analysis_type["mapping"] if "mapping" in data_point_type: mappings[self.data_point_type_name] = data_point_type["mapping"] body = dict() if "settings" in index: body["settings"] = index["settings"] if mappings: body["mappings"] = mappings self.indices_client = IndicesClient(self.es) if not self.indices_client.exists(self.index): self.indices_client.create(index=self.index, body=body) else: raise ConnectionError("Host/port values are not valid") except TransportError as err1: error("Error while creating elasticsearch for experiments. Check type mappings for experiments in experiment_db_config.json.") raise err1
def __init__(self, es_ip="localhost", es_port="9200", verbose=False, country_threshold=0.6, threads=True, progress=True, mod_date="2018-06-05", **kwargs): DATA_PATH = pkg_resources.resource_filename('mordecai', 'data/') MODELS_PATH = pkg_resources.resource_filename('mordecai', 'models/') self._cts = utilities.country_list_maker() self._just_cts = utilities.country_list_maker() self._inv_cts = utilities.make_inv_cts(self._cts) country_state_city = utilities.other_vectors() self._cts.update(country_state_city) self._ct_nlp = utilities.country_list_nlp(self._cts) self._prebuilt_vec = [w.vector for w in self._ct_nlp] self._both_codes = utilities.make_country_nationality_list(self._cts, DATA_PATH + "nat_df.csv") self._admin1_dict = utilities.read_in_admin1(DATA_PATH + "admin1CodesASCII.json") self.conn = utilities.setup_es(es_ip, es_port) self.country_model = keras.models.load_model(MODELS_PATH + "country_model.h5") self.rank_model = keras.models.load_model(MODELS_PATH + "rank_model.h5") self._skip_list = utilities.make_skip_list(self._cts) self.training_setting = False # make this true if you want training formatted # if the best country guess is below the country threshold, don't return anything at all self.country_threshold = country_threshold feature_codes = pd.read_csv(DATA_PATH + "feature_codes.txt", sep="\t", header=None) self._code_to_text = dict(zip(feature_codes[1], feature_codes[3])) # human readable geonames IDs self.verbose = verbose # return the full dictionary or just the good parts? self.progress = progress # display progress bars? self.threads = threads if 'n_threads' in kwargs.keys(): warnings.warn("n_threads is deprecated. Use threads=True instead.", DeprecationWarning) try: # https://www.reddit.com/r/Python/comments/3a2erd/exception_catch_not_catching_everything/ # with nostderr(): self.conn.count() except: raise ConnectionError("""Could not establish contact with Elasticsearch at {0} on port {1}. Are you sure it's running? Mordecai needs access to the Geonames/Elasticsearch gazetteer to function. See https://github.com/openeventdata/mordecai#installation-and-requirements for instructions on setting up Geonames/Elasticsearch""".format(es_ip, es_port)) es_date = utilities.check_geonames_date(self.conn) if es_date != mod_date: print("""You may be using an outdated Geonames index. Your index is from {0}, while the most recent is {1}. Please see https://github.com/openeventdata/mordecai/ for instructions on updating.""".format(es_date, mod_date))
def test_sniff_on_fail_failing_does_not_prevent_retires(self, sniff_hosts): sniff_hosts.side_effect = [TransportError("sniff failed")] t = Transport( [{ "exception": ConnectionError("abandon ship") }, { "data": CLUSTER_NODES }], connection_class=DummyConnection, sniff_on_connection_fail=True, max_retries=3, randomize_hosts=False, ) conn_err, conn_data = t.connection_pool.connections response = t.perform_request("GET", "/") self.assertEqual(json.loads(CLUSTER_NODES), response) self.assertEqual(1, sniff_hosts.call_count) self.assertEqual(1, len(conn_err.calls)) self.assertEqual(1, len(conn_data.calls))
async def test_sniff_on_fail_failing_does_not_prevent_retires( self, sniff_hosts): sniff_hosts.side_effect = [TransportError("sniff failed")] t = AsyncTransport( [{ "exception": ConnectionError("abandon ship") }, { "data": CLUSTER_NODES }], connection_class=DummyConnection, sniff_on_connection_fail=True, max_retries=3, randomize_hosts=False, ) await t._async_init() conn_err, conn_data = t.connection_pool.connections response = await t.perform_request("GET", "/") assert json.loads(CLUSTER_NODES) == response assert 1 == sniff_hosts.call_count assert 1 == len(conn_err.calls) assert 1 == len(conn_data.calls)
class TestApplication(BaseApplicationTest): def test_index(self): response = self.client.get('/') assert 200 == response.status_code assert 'links' in json.loads(response.get_data()) def test_404(self): response = self.client.get('/index/type/search') assert 404 == response.status_code def test_bearer_token_is_required(self): self.do_not_provide_access_token() response = self.client.get('/') assert 401 == response.status_code assert 'WWW-Authenticate' in response.headers def test_invalid_bearer_token_is_required(self): self.do_not_provide_access_token() response = self.client.get( '/', headers={'Authorization': 'Bearer invalid-token'}) assert 403 == response.status_code def test_ttl_is_not_set(self): response = self.client.get('/') assert response.cache_control.max_age is None @mock.patch( 'elasticsearch.transport.Urllib3HttpConnection.perform_request', side_effect=ConnectionError(500)) def test_elastic_search_client_performs_retries_on_connection_error( self, perform_request): with pytest.raises(ConnectionError): self.client.get('/') # FlaskElasticsearch attaches the es client to the context in flask_elasticsearch.py from flask import _app_ctx_stack assert perform_request.call_count == 1 + _app_ctx_stack.top.elasticsearch.transport.max_retries assert perform_request.call_count == 1 + 3
def perform_request(self, method, url, params=None, body=None, timeout=None, ignore=()): # noqa url_path = url url = (self.base_url / url.lstrip('/')).with_query(params) start = self.loop.time() response = None try: with aiohttp.Timeout(timeout or self.timeout, loop=self.loop): # noqa response = yield from self.session.request(method, url, data=body, headers=self.headers, timeout=None) # noqa raw_data = yield from response.text() duration = self.loop.time() - start except asyncio.TimeoutError as exc: self.log_request_fail(method, url, url_path, body, self.loop.time() - start, exception=exc) # noqa raise ConnectionTimeout('TIMEOUT', str(exc), exc) except FingerprintMismatch as exc: self.log_request_fail(method, url, url_path, body, self.loop.time() - start, exception=exc) # noqa raise SSLError('N/A', str(exc), exc) except ClientError as exc: self.log_request_fail(method, url, url_path, body, self.loop.time() - start, exception=exc) # noqa raise ConnectionError('N/A', str(exc), exc) finally: if response is not None: yield from response.release() # raise errors based on http status codes, let the client handle those if needed # noqa if not (200 <= response.status < 300) and response.status not in ignore: # noqa self.log_request_fail(method, url, url_path, body, duration, response.status, raw_data) # noqa self._raise_error(response.status, raw_data) self.log_request_success(method, url, url_path, body, response.status, raw_data, duration) # noqa return response.status, response.headers, raw_data
def perform_request(self, method, url, params=None, body=None, timeout=None, ignore=()): url = self.base_url + url if params: url = '%s?%s' % (url, urlencode(params or {})) start = time.time() headers = self.headers.copy() try: response = urlfetch.Fetch(url, payload=body, method=method, headers=headers, allow_truncated=False, follow_redirects=True, deadline=timeout, validate_certificate=self.verify_certs) duration = time.time() - start raw_data = response.content except Exception as e: self.log_request_fail(method, url, url, body, time.time() - start, exception=e) if isinstance(e, urlfetch_errors.SSLCertificateError): raise SSLError('N/A', str(e), e) if isinstance(e, urlfetch_errors.DeadlineExceededError): raise ConnectionTimeout('TIMEOUT', str(e), e) raise ConnectionError('N/A', str(e), e) # raise errors based on http status codes, let the client handle those if needed if not (200 <= response.status_code < 300) and response.status_code not in ignore: self.log_request_fail(method, url, url, body, duration) self._raise_error(response.status_code, raw_data) self.log_request_success(method, url, url, body, response.status_code, raw_data, duration) return response.status_code, response.headers, raw_data