Example #1
0
def main():

    #collect arguments
    parser = argparse.ArgumentParser(description="run script")
    parser.add_argument(
        '-t', '--tool', action='store', dest='tool', help='Provide tool name')
    index_args, unknown = parser.parse_known_args()
    index_args.index_results = False
    index_args.prefix = "snafu-%s" % index_args.tool
    # set up a standard format for time
    FMT = '%Y-%m-%dT%H:%M:%SGMT'

    #instantiate elasticsearch instance and check connection
    es={}
    if "es" in os.environ:
        es['server'] = os.environ["es"]
        es['port'] = os.environ["es_port"]
        index_args.prefix = os.environ["es_index"]
        index_args.index_results = True
        try:
            _es_connection_string = str(es['server']) + ':' + str(es['port'])
            es = elasticsearch.Elasticsearch([_es_connection_string],send_get_body_as='POST')
            logger.info("Connected to the elasticsearch cluster with info as follows:" + str(es.info()))
        except Exception as e:
            logger.warn("Elasticsearch connection caused an exception :" +  str(e))
            index_args.index_results = False

    if index_args.index_results:
        #call py es bulk using a process generator to feed it ES documents
        res_beg, res_end, res_suc, res_dup, res_fail, res_retry  = streaming_bulk(es, process_generator(index_args, parser))

        logger.info("Indexed results - %s success, %s duplicates, %s failures, with %s retries." % (res_suc,
                                                                                                    res_dup,
                                                                                                    res_fail,
                                                                                                    res_retry))

        start_t = time.strftime('%Y-%m-%dT%H:%M:%SGMT', time.gmtime(res_beg))
        end_t = time.strftime('%Y-%m-%dT%H:%M:%SGMT', time.gmtime(res_end))

    else:
        start_t = time.strftime('%Y-%m-%dT%H:%M:%SGMT', time.gmtime())
        #need to loop through generator and pass on all yields
        #this will execute all jobs without elasticsearch
        for i in process_generator(index_args, parser):
            pass
        end_t = time.strftime('%Y-%m-%dT%H:%M:%SGMT', time.gmtime())


    start_t = datetime.datetime.strptime(start_t, FMT)
    end_t = datetime.datetime.strptime(end_t, FMT)

    #get time delta for indexing run
    tdelta = end_t - start_t
    logger.info("Duration of execution - %s" % tdelta)
Example #2
0
def main():
    # collect arguments
    parser = argparse.ArgumentParser(description="run script", add_help=False)
    parser.add_argument('-v',
                        '--verbose',
                        action='store_const',
                        dest='loglevel',
                        const=logging.DEBUG,
                        default=logging.INFO,
                        help='enables verbose wrapper debugging info')
    parser.add_argument('-t',
                        '--tool',
                        help='Provide tool name',
                        required=True)
    index_args, unknown = parser.parse_known_args()
    index_args.index_results = False
    index_args.prefix = "snafu-%s" % index_args.tool

    setup_loggers("snafu", index_args.loglevel)
    log_level_str = 'DEBUG' if index_args.loglevel == logging.DEBUG else 'INFO'
    logger.info("logging level is %s" % log_level_str)

    # set up a standard format for time
    FMT = '%Y-%m-%dT%H:%M:%SGMT'

    # instantiate elasticsearch instance and check connection
    es = {}
    if "es" in os.environ:
        if os.environ["es"] != "":
            es['server'] = os.environ["es"]
            logger.info("Using elasticsearch server with host:" + es['server'])
        if os.environ["es_port"] != "":
            es['port'] = os.environ["es_port"]
            logger.info("Using elasticsearch server with port:" + es['port'])
    es_verify_cert = os.getenv("es_verify_cert", "true")
    if len(es.keys()) == 2:
        if os.environ["es_index"] != "":
            index_args.prefix = os.environ["es_index"]
            logger.info("Using index prefix for ES:" + index_args.prefix)
        index_args.index_results = True
        try:
            _es_connection_string = str(es['server']) + ':' + str(es['port'])
            if es_verify_cert == "false":
                logger.info("Turning off TLS certificate verification")
                import urllib3
                urllib3.disable_warnings(
                    urllib3.exceptions.InsecureRequestWarning)
                ssl_ctx = ssl.create_default_context()
                ssl_ctx.check_hostname = False
                ssl_ctx.verify_mode = ssl.CERT_NONE
                es = elasticsearch.Elasticsearch([_es_connection_string],
                                                 send_get_body_as='POST',
                                                 ssl_context=ssl_ctx,
                                                 use_ssl=True)
            else:
                es = elasticsearch.Elasticsearch([_es_connection_string],
                                                 send_get_body_as='POST')
            logger.info(
                "Connected to the elasticsearch cluster with info as follows:{0}"
                .format(str(es.info())))
        except Exception as e:
            logger.warn("Elasticsearch connection caused an exception : %s" %
                        e)
            index_args.index_results = False

    index_args.document_size_capacity_bytes = 0
    if index_args.index_results:
        # call py es bulk using a process generator to feed it ES documents
        res_beg, res_end, res_suc, res_dup, res_fail, res_retry = streaming_bulk(
            es, process_generator(index_args, parser))

        logger.info(
            "Indexed results - %s success, %s duplicates, %s failures, with %s retries."
            % (res_suc, res_dup, res_fail, res_retry))

        start_t = time.strftime('%Y-%m-%dT%H:%M:%SGMT', time.gmtime(res_beg))
        end_t = time.strftime('%Y-%m-%dT%H:%M:%SGMT', time.gmtime(res_end))

    else:
        start_t = time.strftime('%Y-%m-%dT%H:%M:%SGMT', time.gmtime())
        # need to loop through generator and pass on all yields
        # this will execute all jobs without elasticsearch
        for i in process_generator(index_args, parser):
            pass
        end_t = time.strftime('%Y-%m-%dT%H:%M:%SGMT', time.gmtime())

    start_t = datetime.datetime.strptime(start_t, FMT)
    end_t = datetime.datetime.strptime(end_t, FMT)

    # get time delta for indexing run
    tdelta = end_t - start_t
    total_capacity_bytes = index_args.document_size_capacity_bytes
    logger.info("Duration of execution - %s, with total size of %s bytes" %
                (tdelta, total_capacity_bytes))