def main(): #collect arguments parser = argparse.ArgumentParser(description="run script") parser.add_argument( '-t', '--tool', action='store', dest='tool', help='Provide tool name') index_args, unknown = parser.parse_known_args() index_args.index_results = False index_args.prefix = "snafu-%s" % index_args.tool # set up a standard format for time FMT = '%Y-%m-%dT%H:%M:%SGMT' #instantiate elasticsearch instance and check connection es={} if "es" in os.environ: es['server'] = os.environ["es"] es['port'] = os.environ["es_port"] index_args.prefix = os.environ["es_index"] index_args.index_results = True try: _es_connection_string = str(es['server']) + ':' + str(es['port']) es = elasticsearch.Elasticsearch([_es_connection_string],send_get_body_as='POST') logger.info("Connected to the elasticsearch cluster with info as follows:" + str(es.info())) except Exception as e: logger.warn("Elasticsearch connection caused an exception :" + str(e)) index_args.index_results = False if index_args.index_results: #call py es bulk using a process generator to feed it ES documents res_beg, res_end, res_suc, res_dup, res_fail, res_retry = streaming_bulk(es, process_generator(index_args, parser)) logger.info("Indexed results - %s success, %s duplicates, %s failures, with %s retries." % (res_suc, res_dup, res_fail, res_retry)) start_t = time.strftime('%Y-%m-%dT%H:%M:%SGMT', time.gmtime(res_beg)) end_t = time.strftime('%Y-%m-%dT%H:%M:%SGMT', time.gmtime(res_end)) else: start_t = time.strftime('%Y-%m-%dT%H:%M:%SGMT', time.gmtime()) #need to loop through generator and pass on all yields #this will execute all jobs without elasticsearch for i in process_generator(index_args, parser): pass end_t = time.strftime('%Y-%m-%dT%H:%M:%SGMT', time.gmtime()) start_t = datetime.datetime.strptime(start_t, FMT) end_t = datetime.datetime.strptime(end_t, FMT) #get time delta for indexing run tdelta = end_t - start_t logger.info("Duration of execution - %s" % tdelta)
def main(): # collect arguments parser = argparse.ArgumentParser(description="run script", add_help=False) parser.add_argument('-v', '--verbose', action='store_const', dest='loglevel', const=logging.DEBUG, default=logging.INFO, help='enables verbose wrapper debugging info') parser.add_argument('-t', '--tool', help='Provide tool name', required=True) index_args, unknown = parser.parse_known_args() index_args.index_results = False index_args.prefix = "snafu-%s" % index_args.tool setup_loggers("snafu", index_args.loglevel) log_level_str = 'DEBUG' if index_args.loglevel == logging.DEBUG else 'INFO' logger.info("logging level is %s" % log_level_str) # set up a standard format for time FMT = '%Y-%m-%dT%H:%M:%SGMT' # instantiate elasticsearch instance and check connection es = {} if "es" in os.environ: if os.environ["es"] != "": es['server'] = os.environ["es"] logger.info("Using elasticsearch server with host:" + es['server']) if os.environ["es_port"] != "": es['port'] = os.environ["es_port"] logger.info("Using elasticsearch server with port:" + es['port']) es_verify_cert = os.getenv("es_verify_cert", "true") if len(es.keys()) == 2: if os.environ["es_index"] != "": index_args.prefix = os.environ["es_index"] logger.info("Using index prefix for ES:" + index_args.prefix) index_args.index_results = True try: _es_connection_string = str(es['server']) + ':' + str(es['port']) if es_verify_cert == "false": logger.info("Turning off TLS certificate verification") import urllib3 urllib3.disable_warnings( urllib3.exceptions.InsecureRequestWarning) ssl_ctx = ssl.create_default_context() ssl_ctx.check_hostname = False ssl_ctx.verify_mode = ssl.CERT_NONE es = elasticsearch.Elasticsearch([_es_connection_string], send_get_body_as='POST', ssl_context=ssl_ctx, use_ssl=True) else: es = elasticsearch.Elasticsearch([_es_connection_string], send_get_body_as='POST') logger.info( "Connected to the elasticsearch cluster with info as follows:{0}" .format(str(es.info()))) except Exception as e: logger.warn("Elasticsearch connection caused an exception : %s" % e) index_args.index_results = False index_args.document_size_capacity_bytes = 0 if index_args.index_results: # call py es bulk using a process generator to feed it ES documents res_beg, res_end, res_suc, res_dup, res_fail, res_retry = streaming_bulk( es, process_generator(index_args, parser)) logger.info( "Indexed results - %s success, %s duplicates, %s failures, with %s retries." % (res_suc, res_dup, res_fail, res_retry)) start_t = time.strftime('%Y-%m-%dT%H:%M:%SGMT', time.gmtime(res_beg)) end_t = time.strftime('%Y-%m-%dT%H:%M:%SGMT', time.gmtime(res_end)) else: start_t = time.strftime('%Y-%m-%dT%H:%M:%SGMT', time.gmtime()) # need to loop through generator and pass on all yields # this will execute all jobs without elasticsearch for i in process_generator(index_args, parser): pass end_t = time.strftime('%Y-%m-%dT%H:%M:%SGMT', time.gmtime()) start_t = datetime.datetime.strptime(start_t, FMT) end_t = datetime.datetime.strptime(end_t, FMT) # get time delta for indexing run tdelta = end_t - start_t total_capacity_bytes = index_args.document_size_capacity_bytes logger.info("Duration of execution - %s, with total size of %s bytes" % (tdelta, total_capacity_bytes))