Beispiel #1
0
def main():
    args = parseArguments()
    host = args.host
    port = args.port

    # Create the REST API client
    engine_client = EngineApiClient(host, BASE_URL, port)

    while True:
        (http_status_code, response) = engine_client.getJobs()
        if http_status_code != 200:
            print(http_status_code, json.dumps(response))
            break

        jobs = response['documents']
        if (len(jobs) == 0):
            print "Deleted all jobs"
            break

        print "Deleting %d jobs" % (len(jobs)),

        for job in jobs:
            (http_status_code, response) = engine_client.delete(job['id'])
            if http_status_code != 200:
                print(http_status_code, json.dumps(response))
            else:
                sys.stdout.write('.')
                sys.stdout.flush()
        print
def main():
    args = parseArguments()

    # read the config file
    config = ConfigParser.RawConfigParser()
    try:
        # insert a section header into the config so
        # ConfigParser will read it without complaint
        with open(args.config, "r") as config_file:
            ini_str = '[root]\n' + config_file.read()
            ini_fp = StringIO.StringIO(ini_str)
            config.readfp(ini_fp)
    except IOError:
        print "Error opening file " + args.config
        return

    try:
        region = config.get('root', 'region')
        access_id = config.get('root', 'aws_access_key_id')
        secret_key = config.get('root', 'aws_secret_access_key')
    except ConfigParser.NoOptionError as e:
        print e
        return

    # AWS CloudWatch connection
    cloudwatch_conn = boto.ec2.cloudwatch.connect_to_region(
        region, aws_access_key_id=access_id, aws_secret_access_key=secret_key)

    if cloudwatch_conn == None:
        print "Error unknown region " + region
        return

    # The Prelert REST API client
    engine_client = EngineApiClient(args.api_host, API_BASE_URL, args.api_port)

    # If no job ID is supplied create a new job
    job_id = createJob(args.job_id, engine_client)
    if job_id == None:
        return

    # default start date is None meaning run realtime
    start_date = None
    if args.start_date != None:
        start_date = datetime.strptime(args.start_date, "%Y-%m-%d")
        start_date = replaceTimezoneWithUtc(start_date)

    if start_date == None:
        runRealtime(job_id, cloudwatch_conn, engine_client)
    else:
        # historical mode, check for an end date
        end_date = replaceTimezoneWithUtc(datetime.utcnow())
        if args.end_date != None:
            end_date = datetime.strptime(args.end_date, "%Y-%m-%d")
            end_date = replaceTimezoneWithUtc(end_date)

        runHistorical(job_id, start_date, end_date, cloudwatch_conn,
                      engine_client)

    print "Closing job..."
    engine_client.close(job_id)
Beispiel #3
0
def main():

    setupLogging()

    args = parseArguments()
    job_id = args.jobid

    # Create the REST API client
    engine_client = EngineApiClient(args.host, BASE_URL, args.port)

    logging.info("Subscribing to job '" + job_id + "' for alerts")

    printHeader()

    while True:

        try:
            (http_status_code, response) = engine_client.alerts_longpoll(job_id,
                normalized_probability_threshold=args.normalizedProbability,
                anomaly_score_threshold=args.anomalyScore, timeout=args.timeout)
            if http_status_code != 200:
                print (http_status_code, json.dumps(response))
                break

            if response['timeout'] == False:
                printAlert(response)

        except KeyboardInterrupt:
            print "Exiting script..."
def main():
    args = parseArguments()
    host = args.host
    port = args.port
    base_url = BASE_URL

    # Create the REST API client
    engine_client = EngineApiClient(host, base_url, port)

    while True:
        (http_status_code, response) = engine_client.getJobs()
        if http_status_code != 200:
            print (http_status_code, json.dumps(response))
            break
        
        jobs = response['documents']        
        if (len(jobs) == 0):
            print "Deleted all jobs"
            break


        print "Deleting %d jobs" % (len(jobs)),

        for job in jobs:
            (http_status_code, response) = engine_client.delete(job['id'])
            if http_status_code != 200:
                print (http_status_code, json.dumps(response))
            else:
                sys.stdout.write('.')
                sys.stdout.flush()
        print
def main():

    setupLogging()

    args = parseArguments()
    job_id = args.jobid

    # Create the REST API client
    engine_client = EngineApiClient(args.host, BASE_URL, args.port)

    # Get all the buckets up to now
    logging.info("Get result buckets for job " + job_id)
    (http_status_code, response) = engine_client.getAllBuckets(job_id, 
        include_records=False, 
        anomaly_score_filter_value=args.anomalyScore,
        normalized_probability_filter_value=args.normalizedProbability)

    
    if http_status_code != 200:
        print (http_status_code, json.dumps(response))
        return
    
    
    printHeader()
    printBuckets(response)

    if args.continue_poll:

        if len(response) > 0:
            next_bucket_id = int(response[-1]['id']) + 1
        else:
            next_bucket_id = None
        
        while True:
            # Wait POLL_INTERVAL_SECS then query for any new buckets
            time.sleep(POLL_INTERVAL_SECS)

            (http_status_code, response) = engine_client.getBucketsByDate(job_id=job_id, 
                start_date=str(next_bucket_id), end_date=None, 
                include_records=False,         
                anomaly_score_filter_value=args.anomalyScore,
                normalized_probability_filter_value=args.normalizedProbability)

            if http_status_code != 200:
                print (http_status_code, json.dumps(response))
                break

            printBuckets(response)
            
            if len(response) > 0:
                next_bucket_id = int(response[-1]['id']) + 1
def main():

    setupLogging()

    args = parseArguments()

    # Create the REST API client
    engine_client = EngineApiClient(args.host, BASE_URL, args.port)

    job_config = '{"analysisConfig" : {\
                        "bucketSpan":3600,\
                        "detectors" :[{"function":"metric","fieldName":"responsetime","byFieldName":"airline"}] },\
                        "dataDescription" : {"fieldDelimiter":",", "timeField":"time", "timeFormat":"yyyy-MM-dd HH:mm:ssX"} }'

    logging.info("Creating job")
    (http_status_code, response) = engine_client.createJob(job_config)
    if http_status_code != 201:
        print (http_status_code, json.dumps(response))
        return

    job_id = response['id']

    logging.info("Uploading data to " + job_id)
    file = open(args.file, 'rb')
    (http_status_code, response) = engine_client.upload(job_id, file)
    if http_status_code != 202:
        print (http_status_code, json.dumps(response))
        return


    logging.info("Closing job " + job_id)
    (http_status_code, response) = engine_client.close(job_id)
    if http_status_code != 202:
        print (http_status_code, json.dumps(response))
        return

    logging.info("Get result buckets for job " + job_id)
    (http_status_code, response) = engine_client.getAllBuckets(job_id)
    if http_status_code != 200:
        print (http_status_code, json.dumps(response))
    else:
        print "Date,Anomaly Score,Max Normalized Probablility"
        for bucket in response:                                
            print "{0},{1},{2}".format(bucket['timestamp'], bucket['anomalyScore'], 
                        bucket['maxNormalizedProbability'])
def main():

    setupLogging()

    args = parseArguments()
    host = args.host
    port = args.port
    base_url = BASE_URL
    job_id = args.jobid

    # Create the REST API client
    engine_client = EngineApiClient(host, base_url, port)

    # Get all the buckets up to now
    logging.info("Get result buckets for job " + job_id)
    (http_status_code, response) = engine_client.getAllBuckets(job_id)
    if http_status_code != 200:
        print (http_status_code, json.dumps(response))
        return
    
    
    print "Date,BucketId,AnomalyScore"
    for bucket in response:
        print "{0},{1},{2}".format(bucket['timestamp'], bucket['id'], bucket['anomalyScore'])
    
    if len(response) > 0:
        next_bucket_id = int(response[-1]['id']) + 1
    else:
        next_bucket_id = None

    # Wait POLL_INTERVAL_SECS then query for any new buckets
    while True:
        time.sleep(POLL_INTERVAL_SECS)

        (http_status_code, response) = engine_client.getBucketsByDate(job_id=job_id, 
            start_date=str(next_bucket_id), end_date=None)
        if http_status_code != 200:
            print (http_status_code, json.dumps(response))
            break

        for bucket in response:
            print "{0},{1},{2}".format(bucket['timestamp'], bucket['id'], bucket['anomalyScore'])
        
        if len(response) > 0:
            next_bucket_id = int(response[-1]['id']) + 1
def main():

    setupLogging()

    args = parseArguments()
    job_id = args.jobid

    # Create the REST API client
    engine_client = EngineApiClient(args.host, BASE_URL, args.port)

    # Get all the records up to now
    logging.info("Get records for job " + job_id)

    skip = 0
    take = 200
    (http_status_code, response) = engine_client.getRecords(
        job_id,
        skip,
        take,
        normalized_probability_filter_value=args.normalizedProbability,
        anomaly_score_filter_value=args.anomalyScore)
    if http_status_code != 200:
        print(http_status_code, json.dumps(response))
        return

    hit_count = int(response['hitCount'])

    printHeader()
    printRecords(response['documents'])

    while (skip + take) < hit_count:
        skip += take

        (http_status_code, response) = engine_client.getRecords(
            job_id,
            skip,
            take,
            normalized_probability_filter_value=args.normalizedProbability,
            anomaly_score_filter_value=args.anomalyScore)

        if http_status_code != 200:
            print(http_status_code, json.dumps(response))
            return

        printRecords(response['documents'])
def main():

    setupLogging()

    args = parseArguments()    
    job_id = args.jobid

    # Create the REST API client
    engine_client = EngineApiClient(args.host, BASE_URL, args.port)

    # Get all the records up to now
    logging.info("Get records for job " + job_id)

    skip = 0
    take = 200
    (http_status_code, response) = engine_client.getRecords(job_id, skip, take,
                            normalized_probability_filter_value=args.normalizedProbability, 
                            anomaly_score_filter_value=args.anomalyScore)        
    if http_status_code != 200:
        print (http_status_code, json.dumps(response))
        return

    hit_count = int(response['hitCount'])

    printHeader()
    printRecords(response['documents'])

    while (skip + take) < hit_count:
        skip += take

        (http_status_code, response) = engine_client.getRecords(job_id, skip, take,
                            normalized_probability_filter_value=args.normalizedProbability, 
                            anomaly_score_filter_value=args.anomalyScore)        

        if http_status_code != 200:
            print (http_status_code, json.dumps(response))
            return

        printRecords(response['documents'])
Beispiel #10
0
def main():

    setupLogging()

    args = parseArguments()

    # Create the REST API client
    engine_client = EngineApiClient(args.host, BASE_URL, args.port)

    job_config = '{"analysisConfig" : {\
                        "bucketSpan":3600,\
                        "detectors" :[{"function":"metric","fieldName":"responsetime","byFieldName":"airline"}] },\
                        "dataDescription" : {"fieldDelimiter":",", "timeField":"time", "timeFormat":"yyyy-MM-dd HH:mm:ssX"} }'

    logging.info("Creating job")
    (http_status_code, response) = engine_client.createJob(job_config)
    if http_status_code != 201:
        print(http_status_code, json.dumps(response))
        return

    job_id = response['id']

    logging.info("Uploading data to " + job_id)
    file = open(args.file, 'rb')
    (http_status_code, response) = engine_client.upload(job_id, file)
    if http_status_code != 202:
        print(http_status_code, json.dumps(response))
        return

    logging.info("Closing job " + job_id)
    (http_status_code, response) = engine_client.close(job_id)
    if http_status_code != 202:
        print(http_status_code, json.dumps(response))
        return

    logging.info("Get result buckets for job " + job_id)
    (http_status_code, response) = engine_client.getAllBuckets(job_id)
    if http_status_code != 200:
        print(http_status_code, json.dumps(response))
    else:
        print "Date,Anomaly Score,Max Normalized Probablility"
        for bucket in response:
            print "{0},{1},{2}".format(bucket['timestamp'],
                                       bucket['anomalyScore'],
                                       bucket['maxNormalizedProbability'])
Beispiel #11
0
def main():

    setupLogging()
    args = parseArguments()

    # read the config file
    try:
        with open(args.file, "r") as config_file:
            config = json.load(config_file)
    except IOError:
        print "Error opening file " + args.file
        return

    # The ElasticSearch client
    es_client = Elasticsearch(args.es_host + ":" + str(args.es_port))

    # The REST API client
    engine_client = EngineApiClient(args.api_host, API_BASE_URL, args.api_port)

    job_id = args.job_id
    if job_id == None:
        (http_status,
         response) = engine_client.createJob(json.dumps(config['job_config']))
        job_id = response['id']
        print "Created job with id " + str(job_id)

    print "Using job id " + job_id

    data_type = config['type']
    raw_query = insertDateRangeFilter(config['search'])

    timezone = UTC()
    doc_count = 0
    try:
        query_end_time = datetime.now(timezone) - timedelta(
            seconds=args.update_interval)
        while True:
            query_start_time = query_end_time
            query_end_time = datetime.now(timezone)
            query_str = json.dumps(
                replaceDateArgs(raw_query, query_start_time, query_end_time))
            index_name = logstashIndex(query_start_time, args.update_interval)

            skip = 0
            try:
                # Query the documents from ElasticSearch and write to the Engine
                hits = es_client.search(index=index_name,
                                        doc_type=data_type,
                                        body=query_str,
                                        from_=skip,
                                        size=MAX_DOC_TAKE)
            except elasticsearch.exceptions.NotFoundError:
                print "Error: missing logstash index '" + index_name + "'"

            # upload to the API
            content = json.dumps(elasticSearchDocsToDicts(
                hits['hits']['hits']))

            (http_status, response) = engine_client.upload(job_id, content)
            if http_status != 202:
                print "Error uploading log content to the Engine"
                print http_status, json.dumps(response)

            doc_count += len(hits['hits']['hits'])

            # get any other docs
            hitcount = int(hits['hits']['total'])
            while hitcount > (skip + MAX_DOC_TAKE):
                skip += MAX_DOC_TAKE
                hits = es_client.search(index=index_name,
                                        doc_type=data_type,
                                        body=query_str,
                                        from_=skip,
                                        size=MAX_DOC_TAKE)

                content = json.dumps(
                    elasticSearchDocsToDicts(hits['hits']['hits']))

                (http_status, response) = engine_client.upload(job_id, content)
                if http_status != 202:
                    print "Error uploading log content to the Engine"
                    print json.dumps(response)

                doc_count += len(hits['hits']['hits'])

            print "Uploaded {0} records".format(str(doc_count))

            duration = datetime.now(timezone) - query_end_time
            sleep_time = max(args.update_interval - duration.seconds, 0)
            print "sleeping for " + str(sleep_time) + " seconds"

            if sleep_time > 0.0:
                time.sleep(sleep_time)

    except KeyboardInterrupt:
        print "Interrupt caught closing job..."

    engine_client.close(job_id)
Beispiel #12
0
def main():

    setupLogging()
    args = parseArguments()

    # read the config file
    try:
        with open(args.file, "r") as config_file:
            config = json.load(config_file)
    except IOError:
        print "Error opening file " + args.file
        return

    # default start date is None meaning 'all time'
    start_date = None
    if args.start_date != None:
        start_date = datetime.strptime(args.start_date, "%Y-%m-%d")

    # default end date is today
    end_date = datetime.today()
    if args.end_date != None:
        end_date = datetime.strptime(args.end_date, "%Y-%m-%d")

    # The ElasticSearch client
    es_client = Elasticsearch(args.es_host + ":" + str(args.es_port))

    data_type = config['type']
    search_body = json.dumps(config['search'])

    # If no start date find the first logstash index containing our docs
    if start_date == None:
        start_date = findDateOfFirstIndex(es_client, data_type, search_body)
        if start_date == None:
            print "No documents found with the query " + search_body
            return

    # The REST API client
    engine_client = EngineApiClient(args.api_host, API_BASE_URL, args.api_port)
    (http_status,
     response) = engine_client.createJob(json.dumps(config['job_config']))
    if http_status != 201:
        print "Error creatting job"
        print http_status, json.dumps(response)
        return

    job_id = response['id']
    print "Created job with id " + str(job_id)

    doc_count = 0
    for index_name in nextLogStashIndex(start_date, end_date):

        print "Reading from index " + index_name

        skip = 0
        try:
            # Query the documents from ElasticSearch and write to the Engine
            hits = es_client.search(index=index_name,
                                    doc_type=data_type,
                                    body=search_body,
                                    from_=skip,
                                    size=MAX_DOC_TAKE)
        except elasticsearch.exceptions.NotFoundError:
            # Index not found try the next one
            continue

        # upload to the API
        content = json.dumps(elasticSearchDocsToDicts(hits['hits']['hits']))
        (http_status, response) = engine_client.upload(job_id, content)
        if http_status != 202:
            print "Error uploading log content to the Engine"
            print http_status, json.dumps(response)
            continue

        doc_count += len(hits['hits']['hits'])

        # get any other docs
        hitcount = int(hits['hits']['total'])
        while hitcount > (skip + MAX_DOC_TAKE):
            skip += MAX_DOC_TAKE
            hits = es_client.search(index=index_name,
                                    doc_type=data_type,
                                    body=search_body,
                                    from_=skip,
                                    size=MAX_DOC_TAKE)

            content = json.dumps(elasticSearchDocsToDicts(
                hits['hits']['hits']))
            (http_status, response) = engine_client.upload(job_id, content)
            if http_status != 202:
                print json.dumps(response)
                continue

            doc_count += len(hits['hits']['hits'])

        print "Uploaded {0} records".format(str(doc_count))

    (http_status, response) = engine_client.close(job_id)
    if http_status != 202:
        print "Error closing job"
        print http_status, json.dumps(response)
        return
    print "{0} records successfully written to job {1}".format(
        str(doc_count), job_id)
Beispiel #13
0
def main():
    args = parseArguments()


    start_date = datetime(2014, 05, 18, 0, 0, 0, 0, UtcOffset())
    # interval between the generated timestamps for the records
    interval = timedelta(seconds=300)


    if args.duration <= 0:
        end_date = datetime.now(UtcOffset())
    else:
        duration = timedelta(hours=args.duration)
        end_date = start_date + duration


    job_config = '{\
        "analysisConfig" : {\
            "bucketSpan":3600,\
            "detectors" :[\
                {"fieldName":"In Discards","byFieldName":"host"},\
                {"fieldName":"In Octets","byFieldName":"host"},\
                {"fieldName":"Out Discards","byFieldName":"host"},\
                {"fieldName":"Out Octets","byFieldName":"host"} \
            ]\
        },\
        "dataDescription" : {\
            "fieldDelimiter":",",\
            "timeField":"time",\
            "timeFormat":"yyyy-MM-dd\'T\'HH:mm:ssXXX"\
        }\
    }'


    engine_client = EngineApiClient(args.host, BASE_URL, args.port)
    (http_status_code, response) = engine_client.createJob(job_config)
    if http_status_code != 201:
        print (http_status_code, json.dumps(response))
        return

    job_id = response['id']
    print 'Job created with Id = ' + job_id

    # get the csv header (the first record generated)
    record_generator = generateRecords(args.file, start_date, interval, end_date)
    header = ','.join(next(record_generator))
    header += '\n'

    count = 0
    try:
        # for the results
        next_bucket_id = 1
        print
        print "Date,Anomaly Score,Max Normalized Probablility"

        data = header
        for record in record_generator:
            # format as csv and append new line
            csv = ','.join(record) + '\n'
            data += csv
            # print data

            count += 1
            if count == 100:
                (http_status_code, response) = engine_client.upload(job_id, data)
                if http_status_code != 202:
                    print (http_status_code, json.dumps(response))
                    break

                # get the latest results...
                (http_status_code, response) = engine_client.getBucketsByDate(job_id=job_id,
                    start_date=str(next_bucket_id), end_date=None)
                if http_status_code != 200:
                    print (http_status_code, json.dumps(response))
                    break

                # and print them
                for bucket in response:
                    print "{0},{1},{2},{3}".format(bucket['timestamp'],
                        bucket['anomalyScore'], bucket['maxNormalizedProbability'])

                if len(response) > 0:
                    next_bucket_id = int(response[-1]['id']) + 1

                # must send the header every time
                data = header
                count = 0

            # sleep a little while (optional this can be removed)
            #time.sleep(0.1)

    except KeyboardInterrupt:
        print "Keyboard interrupt closing job..."

    (http_status_code, response) = engine_client.close(job_id)
    if http_status_code != 202:
        print (http_status_code, json.dumps(response))
def main():
    args = parseArguments()

    # read the config file
    config = ConfigParser.RawConfigParser()
    try:
        # insert a section header into the config so
        # ConfigParser will read it without complaint
        with open(args.config, "r") as config_file:
            ini_str = '[root]\n' + config_file.read()
            ini_fp = StringIO.StringIO(ini_str)
            config.readfp(ini_fp)
    except IOError:
        print "Error opening file " + args.config
        return


    try:
        region = config.get('root', 'region')
        access_id = config.get('root', 'aws_access_key_id')
        secret_key = config.get('root', 'aws_secret_access_key')
    except ConfigParser.NoOptionError as e:
        print e
        return


    # AWS CloudWatch connection
    cloudwatch_conn = boto.ec2.cloudwatch.connect_to_region(region,
                 aws_access_key_id=access_id,
                 aws_secret_access_key=secret_key)

    if cloudwatch_conn == None:
        print "Error unknown region " + region
        return

    # The Prelert REST API client
    engine_client = EngineApiClient(args.api_host, API_BASE_URL, args.api_port)

    # If no job ID is supplied create a new job
    job_id = createJob(args.job_id, engine_client)
    if job_id == None:
        return

    # default start date is None meaning run realtime
    start_date = None
    if args.start_date != None:
        start_date = datetime.strptime(args.start_date, "%Y-%m-%d")
        start_date = replaceTimezoneWithUtc(start_date)

    if start_date == None:
        runRealtime(job_id, cloudwatch_conn, engine_client)
    else:
        # historical mode, check for an end date
        end_date = replaceTimezoneWithUtc(datetime.utcnow())
        if args.end_date != None:
            end_date = datetime.strptime(args.end_date, "%Y-%m-%d")
            end_date = replaceTimezoneWithUtc(end_date)

        runHistorical(job_id, start_date, end_date, cloudwatch_conn, engine_client)


    print "Closing job..."
    engine_client.close(job_id)
Beispiel #15
0
def main():
    args = parseArguments()

    start_date = datetime(2014, 05, 18, 0, 0, 0, 0, UtcOffset())
    # interval between the generated timestamps for the records
    interval = timedelta(seconds=300)

    if args.duration <= 0:
        end_date = datetime.now(UtcOffset())
    else:
        duration = timedelta(hours=args.duration)
        end_date = start_date + duration

    job_config = '{\
        "analysisConfig" : {\
            "bucketSpan":3600,\
            "detectors" :[\
                {"fieldName":"In Discards","byFieldName":"host"},\
                {"fieldName":"In Octets","byFieldName":"host"},\
                {"fieldName":"Out Discards","byFieldName":"host"},\
                {"fieldName":"Out Octets","byFieldName":"host"} \
            ]\
        },\
        "dataDescription" : {\
            "fieldDelimiter":",",\
            "timeField":"time",\
            "timeFormat":"yyyy-MM-dd\'T\'HH:mm:ssXXX"\
        }\
    }'

    engine_client = EngineApiClient(args.host, BASE_URL, args.port)
    (http_status_code, response) = engine_client.createJob(job_config)
    if http_status_code != 201:
        print(http_status_code, json.dumps(response))
        return

    job_id = response['id']
    print 'Job created with Id = ' + job_id

    # get the csv header (the first record generated)
    record_generator = generateRecords(args.file, start_date, interval,
                                       end_date)
    header = ','.join(next(record_generator))
    header += '\n'

    count = 0
    try:
        # for the results
        next_bucket_id = 1
        print
        print "Date,Bucket ID,Anomaly Score,Max Normalized Probablility"

        data = header
        for record in record_generator:
            # format as csv and append new line
            csv = ','.join(record) + '\n'
            data += csv
            # print data

            count += 1
            if count == 100:
                (http_status_code,
                 response) = engine_client.upload(job_id, data)
                if http_status_code != 202:
                    print(http_status_code, json.dumps(response))
                    break

                # get the latest results...
                (http_status_code, response) = engine_client.getBucketsByDate(
                    job_id=job_id,
                    start_date=str(next_bucket_id),
                    end_date=None)
                if http_status_code != 200:
                    print(http_status_code, json.dumps(response))
                    break

                # and print them
                for bucket in response:
                    print "{0},{1},{2},{3}".format(
                        bucket['timestamp'], bucket['id'],
                        bucket['anomalyScore'],
                        bucket['maxNormalizedProbability'])

                if len(response) > 0:
                    next_bucket_id = int(response[-1]['id']) + 1

                # must send the header every time
                data = header
                count = 0

            # sleep a little while (optional this can be removed)
            #time.sleep(0.1)

    except KeyboardInterrupt:
        print "Keyboard interrupt closing job..."

    (http_status_code, response) = engine_client.close(job_id)
    if http_status_code != 202:
        print(http_status_code, json.dumps(response))
def main():

    setupLogging()
    args = parseArguments()

    # read the config file
    try:
        with open(args.file, "r") as config_file:
            config = json.load(config_file)
    except IOError:
        print "Error opening file " + args.file
        return
  

    # The ElasticSearch client
    es_client = Elasticsearch(args.es_host + ":" + str(args.es_port))

    # The REST API client
    engine_client = EngineApiClient(args.api_host, API_BASE_URL, args.api_port)

    job_id = args.job_id
    if job_id == None:
        (http_status, response) = engine_client.createJob(json.dumps(config['job_config']))
        job_id = response['id']  
        print "Created job with id " + str(job_id)

    print "Using job id " + job_id

    data_type = config['type']
    raw_query = insertDateRangeFilter(config['search'])
    

    timezone = UTC()
    doc_count = 0    
    try:
        query_end_time = datetime.now(timezone) - timedelta(seconds=args.update_interval)
        while True:
            query_start_time = query_end_time
            query_end_time = datetime.now(timezone)
            query_str = json.dumps(replaceDateArgs(raw_query, query_start_time, 
                query_end_time)) 
            index_name = logstashIndex(query_start_time)        

            skip = 0
            try:
                # Query the documents from ElasticSearch and write to the Engine
                hits = es_client.search(index=index_name, doc_type=data_type, 
                    body=query_str, from_=skip, size=MAX_DOC_TAKE)
            except elasticsearch.exceptions.NotFoundError:
                print "Error: missing logstash index '" + index_name + "'"
                

            # upload to the API
            content = json.dumps(elasticSearchDocsToDicts(hits['hits']['hits'])) 
            
            (http_status, response) = engine_client.upload(job_id, content)
            if http_status != 202:
                print "Error uploading log content to the Engine"
                print http_status, json.dumps(response)
                

            doc_count += len(hits['hits']['hits'])                 

            # get any other docs
            hitcount = int(hits['hits']['total'])
            while hitcount > (skip + MAX_DOC_TAKE):    
                skip += MAX_DOC_TAKE
                hits = es_client.search(index=index_name, doc_type=data_type, 
                    body=query_str, from_=skip, size=MAX_DOC_TAKE)

                content = json.dumps(elasticSearchDocsToDicts(hits['hits']['hits']))

                (http_status, response) = engine_client.upload(job_id, content)
                if http_status != 202:
                    print "Error uploading log content to the Engine"
                    print json.dumps(response)
                    

                doc_count += len(hits['hits']['hits']) 

            print "Uploaded {0} records".format(str(doc_count))

            duration = datetime.now(timezone) - query_end_time
            sleep_time = max(args.update_interval - duration.seconds, 0)
            print "sleeping for " + str(sleep_time) + " seconds"

            if sleep_time > 0.0:                
                time.sleep(sleep_time)

  
    except KeyboardInterrupt:
        print "Interrupt caught closing job..."

    

    engine_client.close(job_id)
def main():

    setupLogging()
    args = parseArguments()

    # read the config file
    try:
        with open(args.file, "r") as config_file:
            config = json.load(config_file)
    except IOError:
        print "Error opening file " + args.file
        return


    # default start date is None meaning 'all time'
    start_date = None
    if args.start_date != None:
        start_date = datetime.strptime(args.start_date, "%Y-%m-%d")

    # default end date is today
    end_date = datetime.today()
    if args.end_date != None:
        end_date = datetime.strptime(args.end_date, "%Y-%m-%d")
   

    # The ElasticSearch client
    es_client = Elasticsearch(args.es_host + ":" + str(args.es_port))

    data_type = config['type']
    search_body = json.dumps(config['search'])

    # If no start date find the first logstash index containing our docs
    if start_date == None:        
        start_date = findDateOfFirstIndex(es_client, data_type, search_body)
        if start_date == None:
            print "No documents found with the query " + search_body
            return

    # The REST API client
    engine_client = EngineApiClient(args.api_host, ABI_BASE_URL, args.api_port)
    (http_status, response) = engine_client.createJob(json.dumps(config['job_config']))
    if http_status != 201:
        print "Error creatting job"
        print http_status, json.dumps(response)
        return


    job_id = response['id']  
    print "Created job with id " + str(job_id)

    doc_count = 0
    for index_name in nextLogStashIndex(start_date, end_date):

        print "Reading from index " + index_name

        skip = 0
        try:
            # Query the documents from ElasticSearch and write to the Engine
            hits = es_client.search(index=index_name, doc_type=data_type, 
                body=search_body, from_=skip, size=MAX_DOC_TAKE)
        except elasticsearch.exceptions.NotFoundError:
            # Index not found try the next one
            continue

        # upload to the API
        content = json.dumps(elasticSearchDocsToDicts(hits['hits']['hits']))        
        (http_status, response) = engine_client.upload(job_id, content)
        if http_status != 202:
            print "Error uploading log content to the Engine"
            print http_status, json.dumps(response)
            continue

        doc_count += len(hits['hits']['hits']) 

        # get any other docs
        hitcount = int(hits['hits']['total'])
        while hitcount > (skip + MAX_DOC_TAKE):    
            skip += MAX_DOC_TAKE
            hits = es_client.search(index=index_name, doc_type=data_type, 
                body=search_body, from_=skip, size=MAX_DOC_TAKE)

            content = json.dumps(elasticSearchDocsToDicts(hits['hits']['hits']))        
            (http_status, response) = engine_client.upload(job_id, content)
            if http_status != 202:
                print json.dumps(response)
                continue

            doc_count += len(hits['hits']['hits']) 


        print "Uploaded {0} records".format(str(doc_count))
        
    engine_client.close(job_id)
    print "{0} records successfully written to job {1}".format(str(doc_count), job_id)