Python EngineApiClient.createJob Beispiele, prelert.engineApiClient.EngineApiClient.createJob Python Beispiele

Beispiel #1

0

Datei anzeigen

def main():

    setupLogging()

    args = parseArguments()

    # Create the REST API client
    engine_client = EngineApiClient(args.host, BASE_URL, args.port)

    job_config = '{"analysisConfig" : {\
                        "bucketSpan":3600,\
                        "detectors" :[{"function":"metric","fieldName":"responsetime","byFieldName":"airline"}] },\
                        "dataDescription" : {"fieldDelimiter":",", "timeField":"time", "timeFormat":"yyyy-MM-dd HH:mm:ssX"} }'

    logging.info("Creating job")
    (http_status_code, response) = engine_client.createJob(job_config)
    if http_status_code != 201:
        print(http_status_code, json.dumps(response))
        return

    job_id = response['id']

    logging.info("Uploading data to " + job_id)
    file = open(args.file, 'rb')
    (http_status_code, response) = engine_client.upload(job_id, file)
    if http_status_code != 202:
        print(http_status_code, json.dumps(response))
        return

    logging.info("Closing job " + job_id)
    (http_status_code, response) = engine_client.close(job_id)
    if http_status_code != 202:
        print(http_status_code, json.dumps(response))
        return

    logging.info("Get result buckets for job " + job_id)
    (http_status_code, response) = engine_client.getAllBuckets(job_id)
    if http_status_code != 200:
        print(http_status_code, json.dumps(response))
    else:
        print "Date,Anomaly Score,Max Normalized Probablility"
        for bucket in response:
            print "{0},{1},{2}".format(bucket['timestamp'],
                                       bucket['anomalyScore'],
                                       bucket['maxNormalizedProbability'])

Beispiel #2

0

Datei anzeigen

Datei: simpleEngineApiExample.py Projekt: OspreyX/engine-python

def main():

    setupLogging()

    args = parseArguments()

    # Create the REST API client
    engine_client = EngineApiClient(args.host, BASE_URL, args.port)

    job_config = '{"analysisConfig" : {\
                        "bucketSpan":3600,\
                        "detectors" :[{"function":"metric","fieldName":"responsetime","byFieldName":"airline"}] },\
                        "dataDescription" : {"fieldDelimiter":",", "timeField":"time", "timeFormat":"yyyy-MM-dd HH:mm:ssX"} }'

    logging.info("Creating job")
    (http_status_code, response) = engine_client.createJob(job_config)
    if http_status_code != 201:
        print (http_status_code, json.dumps(response))
        return

    job_id = response['id']

    logging.info("Uploading data to " + job_id)
    file = open(args.file, 'rb')
    (http_status_code, response) = engine_client.upload(job_id, file)
    if http_status_code != 202:
        print (http_status_code, json.dumps(response))
        return


    logging.info("Closing job " + job_id)
    (http_status_code, response) = engine_client.close(job_id)
    if http_status_code != 202:
        print (http_status_code, json.dumps(response))
        return

    logging.info("Get result buckets for job " + job_id)
    (http_status_code, response) = engine_client.getAllBuckets(job_id)
    if http_status_code != 200:
        print (http_status_code, json.dumps(response))
    else:
        print "Date,Anomaly Score,Max Normalized Probablility"
        for bucket in response:                                
            print "{0},{1},{2}".format(bucket['timestamp'], bucket['anomalyScore'], 
                        bucket['maxNormalizedProbability'])

Beispiel #3

0

Datei anzeigen

Datei: elk_connector_realtime.py Projekt: OspreyX/engine-python

def main():

    setupLogging()
    args = parseArguments()

    # read the config file
    try:
        with open(args.file, "r") as config_file:
            config = json.load(config_file)
    except IOError:
        print "Error opening file " + args.file
        return
  

    # The ElasticSearch client
    es_client = Elasticsearch(args.es_host + ":" + str(args.es_port))

    # The REST API client
    engine_client = EngineApiClient(args.api_host, API_BASE_URL, args.api_port)

    job_id = args.job_id
    if job_id == None:
        (http_status, response) = engine_client.createJob(json.dumps(config['job_config']))
        job_id = response['id']  
        print "Created job with id " + str(job_id)

    print "Using job id " + job_id

    data_type = config['type']
    raw_query = insertDateRangeFilter(config['search'])
    

    timezone = UTC()
    doc_count = 0    
    try:
        query_end_time = datetime.now(timezone) - timedelta(seconds=args.update_interval)
        while True:
            query_start_time = query_end_time
            query_end_time = datetime.now(timezone)
            query_str = json.dumps(replaceDateArgs(raw_query, query_start_time, 
                query_end_time)) 
            index_name = logstashIndex(query_start_time)        

            skip = 0
            try:
                # Query the documents from ElasticSearch and write to the Engine
                hits = es_client.search(index=index_name, doc_type=data_type, 
                    body=query_str, from_=skip, size=MAX_DOC_TAKE)
            except elasticsearch.exceptions.NotFoundError:
                print "Error: missing logstash index '" + index_name + "'"
                

            # upload to the API
            content = json.dumps(elasticSearchDocsToDicts(hits['hits']['hits'])) 
            
            (http_status, response) = engine_client.upload(job_id, content)
            if http_status != 202:
                print "Error uploading log content to the Engine"
                print http_status, json.dumps(response)
                

            doc_count += len(hits['hits']['hits'])                 

            # get any other docs
            hitcount = int(hits['hits']['total'])
            while hitcount > (skip + MAX_DOC_TAKE):    
                skip += MAX_DOC_TAKE
                hits = es_client.search(index=index_name, doc_type=data_type, 
                    body=query_str, from_=skip, size=MAX_DOC_TAKE)

                content = json.dumps(elasticSearchDocsToDicts(hits['hits']['hits']))

                (http_status, response) = engine_client.upload(job_id, content)
                if http_status != 202:
                    print "Error uploading log content to the Engine"
                    print json.dumps(response)
                    

                doc_count += len(hits['hits']['hits']) 

            print "Uploaded {0} records".format(str(doc_count))

            duration = datetime.now(timezone) - query_end_time
            sleep_time = max(args.update_interval - duration.seconds, 0)
            print "sleeping for " + str(sleep_time) + " seconds"

            if sleep_time > 0.0:                
                time.sleep(sleep_time)

  
    except KeyboardInterrupt:
        print "Interrupt caught closing job..."

    

    engine_client.close(job_id)

Beispiel #4

0

Datei anzeigen

def main():

    setupLogging()
    args = parseArguments()

    # read the config file
    try:
        with open(args.file, "r") as config_file:
            config = json.load(config_file)
    except IOError:
        print "Error opening file " + args.file
        return

    # The ElasticSearch client
    es_client = Elasticsearch(args.es_host + ":" + str(args.es_port))

    # The REST API client
    engine_client = EngineApiClient(args.api_host, API_BASE_URL, args.api_port)

    job_id = args.job_id
    if job_id == None:
        (http_status,
         response) = engine_client.createJob(json.dumps(config['job_config']))
        job_id = response['id']
        print "Created job with id " + str(job_id)

    print "Using job id " + job_id

    data_type = config['type']
    raw_query = insertDateRangeFilter(config['search'])

    timezone = UTC()
    doc_count = 0
    try:
        query_end_time = datetime.now(timezone) - timedelta(
            seconds=args.update_interval)
        while True:
            query_start_time = query_end_time
            query_end_time = datetime.now(timezone)
            query_str = json.dumps(
                replaceDateArgs(raw_query, query_start_time, query_end_time))
            index_name = logstashIndex(query_start_time, args.update_interval)

            skip = 0
            try:
                # Query the documents from ElasticSearch and write to the Engine
                hits = es_client.search(index=index_name,
                                        doc_type=data_type,
                                        body=query_str,
                                        from_=skip,
                                        size=MAX_DOC_TAKE)
            except elasticsearch.exceptions.NotFoundError:
                print "Error: missing logstash index '" + index_name + "'"

            # upload to the API
            content = json.dumps(elasticSearchDocsToDicts(
                hits['hits']['hits']))

            (http_status, response) = engine_client.upload(job_id, content)
            if http_status != 202:
                print "Error uploading log content to the Engine"
                print http_status, json.dumps(response)

            doc_count += len(hits['hits']['hits'])

            # get any other docs
            hitcount = int(hits['hits']['total'])
            while hitcount > (skip + MAX_DOC_TAKE):
                skip += MAX_DOC_TAKE
                hits = es_client.search(index=index_name,
                                        doc_type=data_type,
                                        body=query_str,
                                        from_=skip,
                                        size=MAX_DOC_TAKE)

                content = json.dumps(
                    elasticSearchDocsToDicts(hits['hits']['hits']))

                (http_status, response) = engine_client.upload(job_id, content)
                if http_status != 202:
                    print "Error uploading log content to the Engine"
                    print json.dumps(response)

                doc_count += len(hits['hits']['hits'])

            print "Uploaded {0} records".format(str(doc_count))

            duration = datetime.now(timezone) - query_end_time
            sleep_time = max(args.update_interval - duration.seconds, 0)
            print "sleeping for " + str(sleep_time) + " seconds"

            if sleep_time > 0.0:
                time.sleep(sleep_time)

    except KeyboardInterrupt:
        print "Interrupt caught closing job..."

    engine_client.close(job_id)

Beispiel #5

0

Datei anzeigen

Datei: elk_connector.py Projekt: richcollier/engine-python

def main():

    setupLogging()
    args = parseArguments()

    # read the config file
    try:
        with open(args.file, "r") as config_file:
            config = json.load(config_file)
    except IOError:
        print "Error opening file " + args.file
        return


    # default start date is None meaning 'all time'
    start_date = None
    if args.start_date != None:
        start_date = datetime.strptime(args.start_date, "%Y-%m-%d")

    # default end date is today
    end_date = datetime.today()
    if args.end_date != None:
        end_date = datetime.strptime(args.end_date, "%Y-%m-%d")
   

    # The ElasticSearch client
    es_client = Elasticsearch(args.es_host + ":" + str(args.es_port))

    data_type = config['type']
    search_body = json.dumps(config['search'])

    # If no start date find the first logstash index containing our docs
    if start_date == None:        
        start_date = findDateOfFirstIndex(es_client, data_type, search_body)
        if start_date == None:
            print "No documents found with the query " + search_body
            return

    # The REST API client
    engine_client = EngineApiClient(args.api_host, ABI_BASE_URL, args.api_port)
    (http_status, response) = engine_client.createJob(json.dumps(config['job_config']))
    if http_status != 201:
        print "Error creatting job"
        print http_status, json.dumps(response)
        return


    job_id = response['id']  
    print "Created job with id " + str(job_id)

    doc_count = 0
    for index_name in nextLogStashIndex(start_date, end_date):

        print "Reading from index " + index_name

        skip = 0
        try:
            # Query the documents from ElasticSearch and write to the Engine
            hits = es_client.search(index=index_name, doc_type=data_type, 
                body=search_body, from_=skip, size=MAX_DOC_TAKE)
        except elasticsearch.exceptions.NotFoundError:
            # Index not found try the next one
            continue

        # upload to the API
        content = json.dumps(elasticSearchDocsToDicts(hits['hits']['hits']))        
        (http_status, response) = engine_client.upload(job_id, content)
        if http_status != 202:
            print "Error uploading log content to the Engine"
            print http_status, json.dumps(response)
            continue

        doc_count += len(hits['hits']['hits']) 

        # get any other docs
        hitcount = int(hits['hits']['total'])
        while hitcount > (skip + MAX_DOC_TAKE):    
            skip += MAX_DOC_TAKE
            hits = es_client.search(index=index_name, doc_type=data_type, 
                body=search_body, from_=skip, size=MAX_DOC_TAKE)

            content = json.dumps(elasticSearchDocsToDicts(hits['hits']['hits']))        
            (http_status, response) = engine_client.upload(job_id, content)
            if http_status != 202:
                print json.dumps(response)
                continue

            doc_count += len(hits['hits']['hits']) 


        print "Uploaded {0} records".format(str(doc_count))
        
    engine_client.close(job_id)
    print "{0} records successfully written to job {1}".format(str(doc_count), job_id)

Beispiel #6

0

Datei anzeigen

Datei: streamingApm.py Projekt: prelert/engine-python

def main():
    args = parseArguments()


    start_date = datetime(2014, 05, 18, 0, 0, 0, 0, UtcOffset())
    # interval between the generated timestamps for the records
    interval = timedelta(seconds=300)


    if args.duration <= 0:
        end_date = datetime.now(UtcOffset())
    else:
        duration = timedelta(hours=args.duration)
        end_date = start_date + duration


    job_config = '{\
        "analysisConfig" : {\
            "bucketSpan":3600,\
            "detectors" :[\
                {"fieldName":"In Discards","byFieldName":"host"},\
                {"fieldName":"In Octets","byFieldName":"host"},\
                {"fieldName":"Out Discards","byFieldName":"host"},\
                {"fieldName":"Out Octets","byFieldName":"host"} \
            ]\
        },\
        "dataDescription" : {\
            "fieldDelimiter":",",\
            "timeField":"time",\
            "timeFormat":"yyyy-MM-dd\'T\'HH:mm:ssXXX"\
        }\
    }'


    engine_client = EngineApiClient(args.host, BASE_URL, args.port)
    (http_status_code, response) = engine_client.createJob(job_config)
    if http_status_code != 201:
        print (http_status_code, json.dumps(response))
        return

    job_id = response['id']
    print 'Job created with Id = ' + job_id

    # get the csv header (the first record generated)
    record_generator = generateRecords(args.file, start_date, interval, end_date)
    header = ','.join(next(record_generator))
    header += '\n'

    count = 0
    try:
        # for the results
        next_bucket_id = 1
        print
        print "Date,Anomaly Score,Max Normalized Probablility"

        data = header
        for record in record_generator:
            # format as csv and append new line
            csv = ','.join(record) + '\n'
            data += csv
            # print data

            count += 1
            if count == 100:
                (http_status_code, response) = engine_client.upload(job_id, data)
                if http_status_code != 202:
                    print (http_status_code, json.dumps(response))
                    break

                # get the latest results...
                (http_status_code, response) = engine_client.getBucketsByDate(job_id=job_id,
                    start_date=str(next_bucket_id), end_date=None)
                if http_status_code != 200:
                    print (http_status_code, json.dumps(response))
                    break

                # and print them
                for bucket in response:
                    print "{0},{1},{2},{3}".format(bucket['timestamp'],
                        bucket['anomalyScore'], bucket['maxNormalizedProbability'])

                if len(response) > 0:
                    next_bucket_id = int(response[-1]['id']) + 1

                # must send the header every time
                data = header
                count = 0

            # sleep a little while (optional this can be removed)
            #time.sleep(0.1)

    except KeyboardInterrupt:
        print "Keyboard interrupt closing job..."

    (http_status_code, response) = engine_client.close(job_id)
    if http_status_code != 202:
        print (http_status_code, json.dumps(response))

Beispiel #7

0

Datei anzeigen

def main():
    args = parseArguments()

    start_date = datetime(2014, 05, 18, 0, 0, 0, 0, UtcOffset())
    # interval between the generated timestamps for the records
    interval = timedelta(seconds=300)

    if args.duration <= 0:
        end_date = datetime.now(UtcOffset())
    else:
        duration = timedelta(hours=args.duration)
        end_date = start_date + duration

    job_config = '{\
        "analysisConfig" : {\
            "bucketSpan":3600,\
            "detectors" :[\
                {"fieldName":"In Discards","byFieldName":"host"},\
                {"fieldName":"In Octets","byFieldName":"host"},\
                {"fieldName":"Out Discards","byFieldName":"host"},\
                {"fieldName":"Out Octets","byFieldName":"host"} \
            ]\
        },\
        "dataDescription" : {\
            "fieldDelimiter":",",\
            "timeField":"time",\
            "timeFormat":"yyyy-MM-dd\'T\'HH:mm:ssXXX"\
        }\
    }'

    engine_client = EngineApiClient(args.host, BASE_URL, args.port)
    (http_status_code, response) = engine_client.createJob(job_config)
    if http_status_code != 201:
        print(http_status_code, json.dumps(response))
        return

    job_id = response['id']
    print 'Job created with Id = ' + job_id

    # get the csv header (the first record generated)
    record_generator = generateRecords(args.file, start_date, interval,
                                       end_date)
    header = ','.join(next(record_generator))
    header += '\n'

    count = 0
    try:
        # for the results
        next_bucket_id = 1
        print
        print "Date,Bucket ID,Anomaly Score,Max Normalized Probablility"

        data = header
        for record in record_generator:
            # format as csv and append new line
            csv = ','.join(record) + '\n'
            data += csv
            # print data

            count += 1
            if count == 100:
                (http_status_code,
                 response) = engine_client.upload(job_id, data)
                if http_status_code != 202:
                    print(http_status_code, json.dumps(response))
                    break

                # get the latest results...
                (http_status_code, response) = engine_client.getBucketsByDate(
                    job_id=job_id,
                    start_date=str(next_bucket_id),
                    end_date=None)
                if http_status_code != 200:
                    print(http_status_code, json.dumps(response))
                    break

                # and print them
                for bucket in response:
                    print "{0},{1},{2},{3}".format(
                        bucket['timestamp'], bucket['id'],
                        bucket['anomalyScore'],
                        bucket['maxNormalizedProbability'])

                if len(response) > 0:
                    next_bucket_id = int(response[-1]['id']) + 1

                # must send the header every time
                data = header
                count = 0

            # sleep a little while (optional this can be removed)
            #time.sleep(0.1)

    except KeyboardInterrupt:
        print "Keyboard interrupt closing job..."

    (http_status_code, response) = engine_client.close(job_id)
    if http_status_code != 202:
        print(http_status_code, json.dumps(response))

Beispiel #8

0

Datei anzeigen

def main():

    setupLogging()
    args = parseArguments()

    # read the config file
    try:
        with open(args.file, "r") as config_file:
            config = json.load(config_file)
    except IOError:
        print "Error opening file " + args.file
        return

    # default start date is None meaning 'all time'
    start_date = None
    if args.start_date != None:
        start_date = datetime.strptime(args.start_date, "%Y-%m-%d")

    # default end date is today
    end_date = datetime.today()
    if args.end_date != None:
        end_date = datetime.strptime(args.end_date, "%Y-%m-%d")

    # The ElasticSearch client
    es_client = Elasticsearch(args.es_host + ":" + str(args.es_port))

    data_type = config['type']
    search_body = json.dumps(config['search'])

    # If no start date find the first logstash index containing our docs
    if start_date == None:
        start_date = findDateOfFirstIndex(es_client, data_type, search_body)
        if start_date == None:
            print "No documents found with the query " + search_body
            return

    # The REST API client
    engine_client = EngineApiClient(args.api_host, API_BASE_URL, args.api_port)
    (http_status,
     response) = engine_client.createJob(json.dumps(config['job_config']))
    if http_status != 201:
        print "Error creatting job"
        print http_status, json.dumps(response)
        return

    job_id = response['id']
    print "Created job with id " + str(job_id)

    doc_count = 0
    for index_name in nextLogStashIndex(start_date, end_date):

        print "Reading from index " + index_name

        skip = 0
        try:
            # Query the documents from ElasticSearch and write to the Engine
            hits = es_client.search(index=index_name,
                                    doc_type=data_type,
                                    body=search_body,
                                    from_=skip,
                                    size=MAX_DOC_TAKE)
        except elasticsearch.exceptions.NotFoundError:
            # Index not found try the next one
            continue

        # upload to the API
        content = json.dumps(elasticSearchDocsToDicts(hits['hits']['hits']))
        (http_status, response) = engine_client.upload(job_id, content)
        if http_status != 202:
            print "Error uploading log content to the Engine"
            print http_status, json.dumps(response)
            continue

        doc_count += len(hits['hits']['hits'])

        # get any other docs
        hitcount = int(hits['hits']['total'])
        while hitcount > (skip + MAX_DOC_TAKE):
            skip += MAX_DOC_TAKE
            hits = es_client.search(index=index_name,
                                    doc_type=data_type,
                                    body=search_body,
                                    from_=skip,
                                    size=MAX_DOC_TAKE)

            content = json.dumps(elasticSearchDocsToDicts(
                hits['hits']['hits']))
            (http_status, response) = engine_client.upload(job_id, content)
            if http_status != 202:
                print json.dumps(response)
                continue

            doc_count += len(hits['hits']['hits'])

        print "Uploaded {0} records".format(str(doc_count))

    (http_status, response) = engine_client.close(job_id)
    if http_status != 202:
        print "Error closing job"
        print http_status, json.dumps(response)
        return
    print "{0} records successfully written to job {1}".format(
        str(doc_count), job_id)