def initialize_elasticsearch_connection( client, context: Optional[Union[Context, None]] = None, http_auth: Optional[Union[Tuple[str], None]] = None, scheme: Optional[str] = "https", ): elasticsearch_client_kwargs = { "http_auth": http_auth, "scheme": scheme, "transport_class": EsctlTransport, } if context is not None: if scheme == "https": if context.settings.get("no_check_certificate"): ssl_context = create_ssl_context() ssl_context.check_hostname = False ssl_context.verify_mode = ssl.CERT_NONE elasticsearch_client_kwargs["ssl_context"] = ssl_context if "max_retries" in context.settings: elasticsearch_client_kwargs[ "max_retries"] = context.settings.get("max_retries") if "timeout" in context.settings: elasticsearch_client_kwargs["timeout"] = context.settings.get( "timeout") return Elasticsearch(context.cluster.get("servers"), **elasticsearch_client_kwargs)
def Connect2ES(ip='127.0.0.1', port='9200', user="", password="", https=False, CertPath="", ES_Index='reports', Data=""): ## Connection to Elastic Search (http/https) raiseFieldLimit = ''' { "index.mapping.total_fields.limit": 500000 }''' if https: context = create_ssl_context(cafile=CertPath) es = Elasticsearch( [ip], http_auth=(user, password), scheme="https", port=int(port), ssl_context=context, ) else: es = Elasticsearch( [ip], scheme="http", port=int(port), ) if not es.indices.exists(index=ES_Index): es.indices.create(index=ES_Index, ignore=400, body=raiseFieldLimit) es.index(index=ES_Index, doc_type='Report', body=Data)
def main(): args = parse_args() if args.date_field: date_field = args.date_field else: date_field = "grimoire_creation_date" if args.fields: fields = args.fields else: fields = ["_score", "file_path", "blanks_per_loc", "ccn", "comments", "comments_per_loc", "loc", "loc_per_function", "num_funs", "tokens"] if args.output_file: output = args.output_file else: output = "data.json" logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(message)s') ssl_context = create_ssl_context() ssl_context.check_hostname = False ssl_context.verify_mode = ssl.CERT_NONE es = Elasticsearch([args.es_url], timeout=120, max_retries=20, ssl_context=ssl_context, retry_on_timeout=True, verify_certs=False) files_now = get_files_at_last_commit(es, args.index, date_field, args.date) city_items = get_last_city(es, args.index, files_now, date_field, fields, args.source_code, args.date) with open(output, 'w') as f: json.dump(city_items, f)
def create_es_client() -> Elasticsearch: if settings.ES_HOSTNAME is None or settings.ES_HOSTNAME == "": logger.error( "env var 'ES_HOSTNAME' needs to be set for Elasticsearch connection" ) global CLIENT es_config = { "hosts": [settings.ES_HOSTNAME], "timeout": settings.ES_TIMEOUT } try: # If the connection string is using SSL with localhost, disable verifying # the certificates to allow testing in a development environment # Also allow host.docker.internal, when SSH-tunneling on localhost to a remote nonprod instance over HTTPS if settings.ES_HOSTNAME.startswith( ("https://localhost", "https://host.docker.internal")): logger.warning( "SSL cert verification is disabled. Safe only for local development" ) import urllib3 urllib3.disable_warnings() ssl_context = create_ssl_context() ssl_context.check_hostname = False ssl_context.verify_mode = CERT_NONE es_config["ssl_context"] = ssl_context CLIENT = Elasticsearch(**es_config) except Exception as e: logger.error("Error creating the elasticsearch client: {}".format(e))
def __init__(self, target: str, username: Optional[str] = 'admin', password: Optional[str] = 'admin', start: Union[str, datetime] = 'now-24h', end: Union[str, datetime] = 'now', host_index: Optional[str] = None, stdout: Optional[bool] = True, verbose: Optional[bool] = False): """Quickly derive host information from indexed event and alert information Args: target: A URL for an DynamiteNSM Elasticsearch node. username: An Elasticsearch user with the ability to create new indices password: The corresponding password for the Elasticsearch user start: A absolute date or relative string (E.G now-24h) end: A absolute date or relative string (E.G now) host_index: The index where the hosts packages will be written stdout: Print the output to console verbose: Include detailed debug messages """ ssl_context = create_ssl_context() ssl_context.check_hostname = False ssl_context.verify_mode = ssl.CERT_NONE self.stdout = stdout self.verbose = verbose self.logger = get_logger('JOB.EVENTS_TO_HOSTS', level=logging.DEBUG if verbose else logging.INFO, stdout=stdout) self.start = start self.end = end self.es = es_client.DynamiteElasticsearch(hosts=[target], http_auth=(username, password), ssl_context=ssl_context) self.host_index = host_index if not host_index: current_day = datetime.utcnow() current_day_str = f'{current_day.year}-{str(current_day.month).zfill(2)}-{str(current_day.day).zfill(2)}' self.host_index = f'hosts-{current_day_str}'
def switch_alias(): global es_domain_url global index_name global index_name_base global index_pattern_prefix alias_name = index_pattern_prefix + index_name_base context = create_ssl_context(cafile=None, capath=None, cadata=None) es = elasticsearch.Elasticsearch([es_domain_url], verify_certs=False, ssl_context=context) current_index_name = "" try: try: alias_info = es.get_alias(name=alias_name) current_index_name = alias_info.keys()[0] except: pass es.delete_alias(index=index_name_base + "*", name=alias_name) es.put_alias(index=index_name, name=alias_name) except ElasticsearchException as ex: raise ex if current_index_name != "": es.delete(index=current_index_name)
def es_connection(host=ES_HOST): ssl_context = create_ssl_context() ssl_context.check_hostname = False ssl_context.verify_mode = ssl.CERT_NONE return Elasticsearch([host], scheme="https", ssl_context=ssl_context, timeout=5)
def __init__(self, hosts, client_options): self.hosts = hosts self.client_options = dict(client_options) self.ssl_context = None self.logger = logging.getLogger(__name__) masked_client_options = dict(client_options) if "basic_auth_password" in masked_client_options: masked_client_options["basic_auth_password"] = "******" if "http_auth" in masked_client_options: masked_client_options["http_auth"] = ( masked_client_options["http_auth"][0], "*****") self.logger.info( "Creating ES client connected to %s with options [%s]", hosts, masked_client_options) # we're using an SSL context now and it is not allowed to have use_ssl present in client options anymore if self.client_options.pop("use_ssl", False): import ssl from elasticsearch.connection import create_ssl_context self.logger.info("SSL support: on") self.client_options["scheme"] = "https" self.ssl_context = create_ssl_context( cafile=self.client_options.pop("ca_certs", certifi.where())) if not self.client_options.pop("verify_certs", True): self.logger.info("SSL certificate verification: off") self.ssl_context.check_hostname = False self.ssl_context.verify_mode = ssl.CERT_NONE self.logger.warning( "User has enabled SSL but disabled certificate verification. This is dangerous but may be ok for a " "benchmark. Disabling urllib warnings now to avoid a logging storm. " "See https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings for details." ) # disable: "InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly \ # advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings" urllib3.disable_warnings() else: self.logger.info("SSL certificate verification: on") else: self.logger.info("SSL support: off") self.client_options["scheme"] = "http" if self._is_set(self.client_options, "basic_auth_user") and self._is_set( self.client_options, "basic_auth_password"): self.logger.info("HTTP basic authentication: on") self.client_options["http_auth"] = ( self.client_options.pop("basic_auth_user"), self.client_options.pop("basic_auth_password")) else: self.logger.info("HTTP basic authentication: off")
def get_open_distro_client(self): ssl_context = self.ssl_context = create_ssl_context() ssl_context.check_hostname = False ssl_context.verify_mode = ssl.CERT_NONE open_distro_client = Elasticsearch([self.endpoint], http_auth=self.http_auth, verify_certs=False, ssl_context=ssl_context) return open_distro_client
def create_tls_context(ca_path=None, cert_path=None, key_path=None, key_pass=None): context = create_ssl_context() if ca_path != None: context.load_verify_locations(ca_path) if cert_path != None and key_path != None: context.load_cert_chain(certfile=cert_path, keyfile=key_path, password=key_pass) return context
def get_open_distro_client(self): ssl_context = self.ssl_context = create_ssl_context() ssl_context.check_hostname = False ssl_context.verify_mode = ssl.CERT_NONE open_distro_client = OpenSearch( [self.endpoint], http_auth=self.http_auth, verify_certs=False, ssl_context=ssl_context, connection_class=RequestsHttpConnection, ) return open_distro_client
def get_es_instance(): conf = get_config() ssl_context = create_ssl_context() ssl_context.check_hostname = False ssl_context.verify_mode = ssl.CERT_NONE user = conf['es']['user'] password = conf['es']['password'] if user and password: auth = (user, password) else: auth = None return Elastic(hosts=[conf['es']['host']], http_auth=auth, scheme=conf['es']['scheme'], port=conf['es']['port'], ssl_context=ssl_context)
def bulk_upload(records): context = create_ssl_context(cafile=None, capath=None, cadata=None) es_domain_url = es_domain_url_shared.value es = elasticsearch.Elasticsearch([es_domain_url], verify_certs=False, ssl_context=context) try: result = helpers.bulk(es, doc_generator(records), stats_only=True, raise_on_error=False, raise_on_exception=False, max_retries=1, initial_backoff=1, chunk_size=1000) succeed.add(result[0]) failed.add(result[1]) except ElasticsearchException as ex: print "bulk API error" print ex
def __init__(self, url, bulk=False, verify_certs=True): """ Initialize the MetaGen class. Parameters ---------- url: str the server url. bulk: bool, default False if set use the bulk API to perform many index operations in a single API call. This can greatly increase the indexing speed. verify_certs: bool, default True verify server certificate. """ kwargs = {} if not verify_certs: ssl_context = create_ssl_context() ssl_context.check_hostname = False ssl_context.verify_mode = ssl.CERT_NONE kwargs["verify_certs"] = verify_certs kwargs["ssl_context"] = ssl_context self.session = Elasticsearch([url], **kwargs) self.bulk = bulk
def connect(self): """ Initiate the elasticsearch session, We increase the timeout here from the default value (10 seconds) to ensure we wait for requests to finish even if the cluster is overwhelmed and it takes a bit longer to process one bulk. :return: """ try: logger.info("Connect to ES({0},{1},{2},{3})...".format( self.esaddress, self.username, self.password, self.port)) context = create_ssl_context( cafile=self.cafile) if self.cafile else '' auth = (self.username, self.password) if self.username and self.password else () es_conn = Elasticsearch(self.esaddress, http_auth=auth, verify_certs=(not self.no_verify), ssl_context=context, port=self.port, timeout=ES_CONN_TIMEOUT) return es_conn except Exception as e: raise Exception("Failed:Connect to ES!\n{0}".format(e))
import configparser from elasticsearch import Elasticsearch from elasticsearch.connection import create_ssl_context import ssl config = configparser.ConfigParser() config.read('config.ini') es_host = config['CLASSIFIER']['ES_HOST'] es_port = config['CLASSIFIER']['ES_PORT'] index = config['CLASSIFIER']['INDEX'] doc_type = config['CLASSIFIER']['TYPE'] ssl_context = create_ssl_context() ssl_context.check_hostname = False ssl_context.verify_mode = ssl.CERT_NONE es = Elasticsearch([{'host': es_host, 'port': es_port}],scheme="https", # to ensure that it does not use the default value `True` verify_certs=False, ssl_context= ssl_context, http_auth=("admin", "admin")) def update_group_no(group_no,group_name): return { "doc": { "group_no": group_no, "group_name": group_name } }
def main(): clients = [] all_indices = [] auth = None context = None # Set the timestamp STARTED_TIMESTAMP = int(time.time()) for esaddress in args.es_address: print("") print("Starting initialization of {0}".format(esaddress)) try: # Initiate the elasticsearch session # We increase the timeout here from the default value (10 seconds) # to ensure we wait for requests to finish even if the cluster is overwhelmed # and it takes a bit longer to process one bulk. if CA_FILE: context = create_ssl_context(cafile=CA_FILE) if AUTH_USERNAME and AUTH_PASSWORD: auth = (AUTH_USERNAME, AUTH_PASSWORD) es = Elasticsearch( esaddress, http_auth=auth, verify_certs=VERIFY_CERTS, ssl_context=context, timeout=60) except Exception as e: print("Could not connect to elasticsearch!") print(e) sys.exit(1) # Generate docs documents_templates = generate_documents() fill_documents(documents_templates) print("Done!") print("Creating indices.. ") indices = generate_indices(es) all_indices.extend(indices) try: #wait for cluster to be green if nothing else is set if WAIT_FOR_GREEN: es.cluster.health(wait_for_status='green', master_timeout='600s', timeout='600s') except Exception as e: print("Cluster timeout....") print("Cleaning up created indices.. "), cleanup_indices(es, indices) continue print("Generating documents and workers.. ") # Generate the clients clients.extend(generate_clients(es, indices, STARTED_TIMESTAMP)) print("Done!") print("Starting the test. Will print stats every {0} seconds.".format(STATS_FREQUENCY)) print("The test would run for {0} seconds, but it might take a bit more " "because we are waiting for current bulk operation to complete. \n".format(NUMBER_OF_SECONDS)) # Run the clients! for d in clients: d.start() # Create and start the print stats thread stats_thread = Thread(target=print_stats_worker, args=[STARTED_TIMESTAMP]) stats_thread.daemon = True stats_thread.start() for c in clients: while c.is_alive(): try: c.join(timeout=0.1) except KeyboardInterrupt: print("") print("Ctrl-c received! Sending kill to threads...") shutdown_event.set() # set loop flag true to get into loop flag = True while flag: #sleep 2 secs that we don't loop to often sleep(2) # set loop flag to false. If there is no thread still alive it will stay false flag = False # loop through each running thread and check if it is alive for t in threading.enumerate(): # if one single thread is still alive repeat the loop if t.isAlive(): flag = True print("Cleaning up created indices.. "), cleanup_indices(es, all_indices) print("\nTest is done! Final results:") print_stats(STARTED_TIMESTAMP) # Cleanup, unless we are told not to if not NO_CLEANUP: print("Cleaning up created indices.. "), cleanup_indices(es, all_indices) print("Done!") # # Main runner
def main(): clients = [] all_indices = [] auth = None context = None # Set the timestamp STARTED_TIMESTAMP = int(time.time()) for esaddress in args.es_address: print("") print("Starting initialization of {0}".format(esaddress)) try: # Initiate the elasticsearch session # We increase the timeout here from the default value (10 seconds) # to ensure we wait for requests to finish even if the cluster is overwhelmed # and it takes a bit longer to process one bulk. if CA_FILE: context = create_ssl_context(cafile=CA_FILE) if AUTH_USERNAME and AUTH_PASSWORD: auth = (AUTH_USERNAME, AUTH_PASSWORD) es = Elasticsearch(esaddress, http_auth=auth, verify_certs=VERIFY_CERTS, ssl_context=context, timeout=60) except Exception as e: print("Could not connect to elasticsearch!") print(e) sys.exit(1) # Generate docs documents_templates = generate_documents() fill_documents(documents_templates) print("Done!") print("Creating indices.. ") indices = generate_indices(es) all_indices.extend(indices) try: #wait for cluster to be green if nothing else is set if WAIT_FOR_GREEN: es.cluster.health(wait_for_status='green', master_timeout='600s', timeout='600s') except Exception as e: print("Cluster timeout....") print("Cleaning up created indices.. "), cleanup_indices(es, indices) continue print("Generating documents and workers.. ") # Generate the clients clients.extend(generate_clients(es, indices, STARTED_TIMESTAMP)) print("Done!") print("Starting the test. Will print stats every {0} seconds.".format( STATS_FREQUENCY)) print("The test would run for {0} seconds, but it might take a bit more " "because we are waiting for current bulk operation to complete. \n". format(NUMBER_OF_SECONDS)) # Run the clients! for d in clients: d.start() # Create and start the print stats thread stats_thread = Thread(target=print_stats_worker, args=[STARTED_TIMESTAMP]) stats_thread.daemon = True stats_thread.start() for c in clients: while c.is_alive(): try: c.join(timeout=0.1) except KeyboardInterrupt: print("") print("Ctrl-c received! Sending kill to threads...") shutdown_event.set() # set loop flag true to get into loop flag = True while flag: #sleep 2 secs that we don't loop to often sleep(2) # set loop flag to false. If there is no thread still alive it will stay false flag = False # loop through each running thread and check if it is alive for t in threading.enumerate(): # if one single thread is still alive repeat the loop if t.isAlive(): flag = True print("Cleaning up created indices.. "), cleanup_indices(es, all_indices) print("\nTest is done! Final results:") print_stats(STARTED_TIMESTAMP) # Cleanup, unless we are told not to if not NO_CLEANUP: print("Cleaning up created indices.. "), cleanup_indices(es, all_indices) print("Done!") # # Main runner