def main(): setupLogging() args = parseArguments() # Create the REST API client engine_client = EngineApiClient(args.host, BASE_URL, args.port) job_config = '{"analysisConfig" : {\ "bucketSpan":3600,\ "detectors" :[{"function":"metric","fieldName":"responsetime","byFieldName":"airline"}] },\ "dataDescription" : {"fieldDelimiter":",", "timeField":"time", "timeFormat":"yyyy-MM-dd HH:mm:ssX"} }' logging.info("Creating job") (http_status_code, response) = engine_client.createJob(job_config) if http_status_code != 201: print(http_status_code, json.dumps(response)) return job_id = response['id'] logging.info("Uploading data to " + job_id) file = open(args.file, 'rb') (http_status_code, response) = engine_client.upload(job_id, file) if http_status_code != 202: print(http_status_code, json.dumps(response)) return logging.info("Closing job " + job_id) (http_status_code, response) = engine_client.close(job_id) if http_status_code != 202: print(http_status_code, json.dumps(response)) return logging.info("Get result buckets for job " + job_id) (http_status_code, response) = engine_client.getAllBuckets(job_id) if http_status_code != 200: print(http_status_code, json.dumps(response)) else: print "Date,Anomaly Score,Max Normalized Probablility" for bucket in response: print "{0},{1},{2}".format(bucket['timestamp'], bucket['anomalyScore'], bucket['maxNormalizedProbability'])
def main(): setupLogging() args = parseArguments() # Create the REST API client engine_client = EngineApiClient(args.host, BASE_URL, args.port) job_config = '{"analysisConfig" : {\ "bucketSpan":3600,\ "detectors" :[{"function":"metric","fieldName":"responsetime","byFieldName":"airline"}] },\ "dataDescription" : {"fieldDelimiter":",", "timeField":"time", "timeFormat":"yyyy-MM-dd HH:mm:ssX"} }' logging.info("Creating job") (http_status_code, response) = engine_client.createJob(job_config) if http_status_code != 201: print (http_status_code, json.dumps(response)) return job_id = response['id'] logging.info("Uploading data to " + job_id) file = open(args.file, 'rb') (http_status_code, response) = engine_client.upload(job_id, file) if http_status_code != 202: print (http_status_code, json.dumps(response)) return logging.info("Closing job " + job_id) (http_status_code, response) = engine_client.close(job_id) if http_status_code != 202: print (http_status_code, json.dumps(response)) return logging.info("Get result buckets for job " + job_id) (http_status_code, response) = engine_client.getAllBuckets(job_id) if http_status_code != 200: print (http_status_code, json.dumps(response)) else: print "Date,Anomaly Score,Max Normalized Probablility" for bucket in response: print "{0},{1},{2}".format(bucket['timestamp'], bucket['anomalyScore'], bucket['maxNormalizedProbability'])
def main(): setupLogging() args = parseArguments() # read the config file try: with open(args.file, "r") as config_file: config = json.load(config_file) except IOError: print "Error opening file " + args.file return # The ElasticSearch client es_client = Elasticsearch(args.es_host + ":" + str(args.es_port)) # The REST API client engine_client = EngineApiClient(args.api_host, API_BASE_URL, args.api_port) job_id = args.job_id if job_id == None: (http_status, response) = engine_client.createJob(json.dumps(config['job_config'])) job_id = response['id'] print "Created job with id " + str(job_id) print "Using job id " + job_id data_type = config['type'] raw_query = insertDateRangeFilter(config['search']) timezone = UTC() doc_count = 0 try: query_end_time = datetime.now(timezone) - timedelta(seconds=args.update_interval) while True: query_start_time = query_end_time query_end_time = datetime.now(timezone) query_str = json.dumps(replaceDateArgs(raw_query, query_start_time, query_end_time)) index_name = logstashIndex(query_start_time) skip = 0 try: # Query the documents from ElasticSearch and write to the Engine hits = es_client.search(index=index_name, doc_type=data_type, body=query_str, from_=skip, size=MAX_DOC_TAKE) except elasticsearch.exceptions.NotFoundError: print "Error: missing logstash index '" + index_name + "'" # upload to the API content = json.dumps(elasticSearchDocsToDicts(hits['hits']['hits'])) (http_status, response) = engine_client.upload(job_id, content) if http_status != 202: print "Error uploading log content to the Engine" print http_status, json.dumps(response) doc_count += len(hits['hits']['hits']) # get any other docs hitcount = int(hits['hits']['total']) while hitcount > (skip + MAX_DOC_TAKE): skip += MAX_DOC_TAKE hits = es_client.search(index=index_name, doc_type=data_type, body=query_str, from_=skip, size=MAX_DOC_TAKE) content = json.dumps(elasticSearchDocsToDicts(hits['hits']['hits'])) (http_status, response) = engine_client.upload(job_id, content) if http_status != 202: print "Error uploading log content to the Engine" print json.dumps(response) doc_count += len(hits['hits']['hits']) print "Uploaded {0} records".format(str(doc_count)) duration = datetime.now(timezone) - query_end_time sleep_time = max(args.update_interval - duration.seconds, 0) print "sleeping for " + str(sleep_time) + " seconds" if sleep_time > 0.0: time.sleep(sleep_time) except KeyboardInterrupt: print "Interrupt caught closing job..." engine_client.close(job_id)
def main(): setupLogging() args = parseArguments() # read the config file try: with open(args.file, "r") as config_file: config = json.load(config_file) except IOError: print "Error opening file " + args.file return # The ElasticSearch client es_client = Elasticsearch(args.es_host + ":" + str(args.es_port)) # The REST API client engine_client = EngineApiClient(args.api_host, API_BASE_URL, args.api_port) job_id = args.job_id if job_id == None: (http_status, response) = engine_client.createJob(json.dumps(config['job_config'])) job_id = response['id'] print "Created job with id " + str(job_id) print "Using job id " + job_id data_type = config['type'] raw_query = insertDateRangeFilter(config['search']) timezone = UTC() doc_count = 0 try: query_end_time = datetime.now(timezone) - timedelta( seconds=args.update_interval) while True: query_start_time = query_end_time query_end_time = datetime.now(timezone) query_str = json.dumps( replaceDateArgs(raw_query, query_start_time, query_end_time)) index_name = logstashIndex(query_start_time, args.update_interval) skip = 0 try: # Query the documents from ElasticSearch and write to the Engine hits = es_client.search(index=index_name, doc_type=data_type, body=query_str, from_=skip, size=MAX_DOC_TAKE) except elasticsearch.exceptions.NotFoundError: print "Error: missing logstash index '" + index_name + "'" # upload to the API content = json.dumps(elasticSearchDocsToDicts( hits['hits']['hits'])) (http_status, response) = engine_client.upload(job_id, content) if http_status != 202: print "Error uploading log content to the Engine" print http_status, json.dumps(response) doc_count += len(hits['hits']['hits']) # get any other docs hitcount = int(hits['hits']['total']) while hitcount > (skip + MAX_DOC_TAKE): skip += MAX_DOC_TAKE hits = es_client.search(index=index_name, doc_type=data_type, body=query_str, from_=skip, size=MAX_DOC_TAKE) content = json.dumps( elasticSearchDocsToDicts(hits['hits']['hits'])) (http_status, response) = engine_client.upload(job_id, content) if http_status != 202: print "Error uploading log content to the Engine" print json.dumps(response) doc_count += len(hits['hits']['hits']) print "Uploaded {0} records".format(str(doc_count)) duration = datetime.now(timezone) - query_end_time sleep_time = max(args.update_interval - duration.seconds, 0) print "sleeping for " + str(sleep_time) + " seconds" if sleep_time > 0.0: time.sleep(sleep_time) except KeyboardInterrupt: print "Interrupt caught closing job..." engine_client.close(job_id)
def main(): setupLogging() args = parseArguments() # read the config file try: with open(args.file, "r") as config_file: config = json.load(config_file) except IOError: print "Error opening file " + args.file return # default start date is None meaning 'all time' start_date = None if args.start_date != None: start_date = datetime.strptime(args.start_date, "%Y-%m-%d") # default end date is today end_date = datetime.today() if args.end_date != None: end_date = datetime.strptime(args.end_date, "%Y-%m-%d") # The ElasticSearch client es_client = Elasticsearch(args.es_host + ":" + str(args.es_port)) data_type = config['type'] search_body = json.dumps(config['search']) # If no start date find the first logstash index containing our docs if start_date == None: start_date = findDateOfFirstIndex(es_client, data_type, search_body) if start_date == None: print "No documents found with the query " + search_body return # The REST API client engine_client = EngineApiClient(args.api_host, ABI_BASE_URL, args.api_port) (http_status, response) = engine_client.createJob(json.dumps(config['job_config'])) if http_status != 201: print "Error creatting job" print http_status, json.dumps(response) return job_id = response['id'] print "Created job with id " + str(job_id) doc_count = 0 for index_name in nextLogStashIndex(start_date, end_date): print "Reading from index " + index_name skip = 0 try: # Query the documents from ElasticSearch and write to the Engine hits = es_client.search(index=index_name, doc_type=data_type, body=search_body, from_=skip, size=MAX_DOC_TAKE) except elasticsearch.exceptions.NotFoundError: # Index not found try the next one continue # upload to the API content = json.dumps(elasticSearchDocsToDicts(hits['hits']['hits'])) (http_status, response) = engine_client.upload(job_id, content) if http_status != 202: print "Error uploading log content to the Engine" print http_status, json.dumps(response) continue doc_count += len(hits['hits']['hits']) # get any other docs hitcount = int(hits['hits']['total']) while hitcount > (skip + MAX_DOC_TAKE): skip += MAX_DOC_TAKE hits = es_client.search(index=index_name, doc_type=data_type, body=search_body, from_=skip, size=MAX_DOC_TAKE) content = json.dumps(elasticSearchDocsToDicts(hits['hits']['hits'])) (http_status, response) = engine_client.upload(job_id, content) if http_status != 202: print json.dumps(response) continue doc_count += len(hits['hits']['hits']) print "Uploaded {0} records".format(str(doc_count)) engine_client.close(job_id) print "{0} records successfully written to job {1}".format(str(doc_count), job_id)
def main(): args = parseArguments() start_date = datetime(2014, 05, 18, 0, 0, 0, 0, UtcOffset()) # interval between the generated timestamps for the records interval = timedelta(seconds=300) if args.duration <= 0: end_date = datetime.now(UtcOffset()) else: duration = timedelta(hours=args.duration) end_date = start_date + duration job_config = '{\ "analysisConfig" : {\ "bucketSpan":3600,\ "detectors" :[\ {"fieldName":"In Discards","byFieldName":"host"},\ {"fieldName":"In Octets","byFieldName":"host"},\ {"fieldName":"Out Discards","byFieldName":"host"},\ {"fieldName":"Out Octets","byFieldName":"host"} \ ]\ },\ "dataDescription" : {\ "fieldDelimiter":",",\ "timeField":"time",\ "timeFormat":"yyyy-MM-dd\'T\'HH:mm:ssXXX"\ }\ }' engine_client = EngineApiClient(args.host, BASE_URL, args.port) (http_status_code, response) = engine_client.createJob(job_config) if http_status_code != 201: print (http_status_code, json.dumps(response)) return job_id = response['id'] print 'Job created with Id = ' + job_id # get the csv header (the first record generated) record_generator = generateRecords(args.file, start_date, interval, end_date) header = ','.join(next(record_generator)) header += '\n' count = 0 try: # for the results next_bucket_id = 1 print print "Date,Anomaly Score,Max Normalized Probablility" data = header for record in record_generator: # format as csv and append new line csv = ','.join(record) + '\n' data += csv # print data count += 1 if count == 100: (http_status_code, response) = engine_client.upload(job_id, data) if http_status_code != 202: print (http_status_code, json.dumps(response)) break # get the latest results... (http_status_code, response) = engine_client.getBucketsByDate(job_id=job_id, start_date=str(next_bucket_id), end_date=None) if http_status_code != 200: print (http_status_code, json.dumps(response)) break # and print them for bucket in response: print "{0},{1},{2},{3}".format(bucket['timestamp'], bucket['anomalyScore'], bucket['maxNormalizedProbability']) if len(response) > 0: next_bucket_id = int(response[-1]['id']) + 1 # must send the header every time data = header count = 0 # sleep a little while (optional this can be removed) #time.sleep(0.1) except KeyboardInterrupt: print "Keyboard interrupt closing job..." (http_status_code, response) = engine_client.close(job_id) if http_status_code != 202: print (http_status_code, json.dumps(response))
def main(): args = parseArguments() start_date = datetime(2014, 05, 18, 0, 0, 0, 0, UtcOffset()) # interval between the generated timestamps for the records interval = timedelta(seconds=300) if args.duration <= 0: end_date = datetime.now(UtcOffset()) else: duration = timedelta(hours=args.duration) end_date = start_date + duration job_config = '{\ "analysisConfig" : {\ "bucketSpan":3600,\ "detectors" :[\ {"fieldName":"In Discards","byFieldName":"host"},\ {"fieldName":"In Octets","byFieldName":"host"},\ {"fieldName":"Out Discards","byFieldName":"host"},\ {"fieldName":"Out Octets","byFieldName":"host"} \ ]\ },\ "dataDescription" : {\ "fieldDelimiter":",",\ "timeField":"time",\ "timeFormat":"yyyy-MM-dd\'T\'HH:mm:ssXXX"\ }\ }' engine_client = EngineApiClient(args.host, BASE_URL, args.port) (http_status_code, response) = engine_client.createJob(job_config) if http_status_code != 201: print(http_status_code, json.dumps(response)) return job_id = response['id'] print 'Job created with Id = ' + job_id # get the csv header (the first record generated) record_generator = generateRecords(args.file, start_date, interval, end_date) header = ','.join(next(record_generator)) header += '\n' count = 0 try: # for the results next_bucket_id = 1 print print "Date,Bucket ID,Anomaly Score,Max Normalized Probablility" data = header for record in record_generator: # format as csv and append new line csv = ','.join(record) + '\n' data += csv # print data count += 1 if count == 100: (http_status_code, response) = engine_client.upload(job_id, data) if http_status_code != 202: print(http_status_code, json.dumps(response)) break # get the latest results... (http_status_code, response) = engine_client.getBucketsByDate( job_id=job_id, start_date=str(next_bucket_id), end_date=None) if http_status_code != 200: print(http_status_code, json.dumps(response)) break # and print them for bucket in response: print "{0},{1},{2},{3}".format( bucket['timestamp'], bucket['id'], bucket['anomalyScore'], bucket['maxNormalizedProbability']) if len(response) > 0: next_bucket_id = int(response[-1]['id']) + 1 # must send the header every time data = header count = 0 # sleep a little while (optional this can be removed) #time.sleep(0.1) except KeyboardInterrupt: print "Keyboard interrupt closing job..." (http_status_code, response) = engine_client.close(job_id) if http_status_code != 202: print(http_status_code, json.dumps(response))
def main(): setupLogging() args = parseArguments() # read the config file try: with open(args.file, "r") as config_file: config = json.load(config_file) except IOError: print "Error opening file " + args.file return # default start date is None meaning 'all time' start_date = None if args.start_date != None: start_date = datetime.strptime(args.start_date, "%Y-%m-%d") # default end date is today end_date = datetime.today() if args.end_date != None: end_date = datetime.strptime(args.end_date, "%Y-%m-%d") # The ElasticSearch client es_client = Elasticsearch(args.es_host + ":" + str(args.es_port)) data_type = config['type'] search_body = json.dumps(config['search']) # If no start date find the first logstash index containing our docs if start_date == None: start_date = findDateOfFirstIndex(es_client, data_type, search_body) if start_date == None: print "No documents found with the query " + search_body return # The REST API client engine_client = EngineApiClient(args.api_host, API_BASE_URL, args.api_port) (http_status, response) = engine_client.createJob(json.dumps(config['job_config'])) if http_status != 201: print "Error creatting job" print http_status, json.dumps(response) return job_id = response['id'] print "Created job with id " + str(job_id) doc_count = 0 for index_name in nextLogStashIndex(start_date, end_date): print "Reading from index " + index_name skip = 0 try: # Query the documents from ElasticSearch and write to the Engine hits = es_client.search(index=index_name, doc_type=data_type, body=search_body, from_=skip, size=MAX_DOC_TAKE) except elasticsearch.exceptions.NotFoundError: # Index not found try the next one continue # upload to the API content = json.dumps(elasticSearchDocsToDicts(hits['hits']['hits'])) (http_status, response) = engine_client.upload(job_id, content) if http_status != 202: print "Error uploading log content to the Engine" print http_status, json.dumps(response) continue doc_count += len(hits['hits']['hits']) # get any other docs hitcount = int(hits['hits']['total']) while hitcount > (skip + MAX_DOC_TAKE): skip += MAX_DOC_TAKE hits = es_client.search(index=index_name, doc_type=data_type, body=search_body, from_=skip, size=MAX_DOC_TAKE) content = json.dumps(elasticSearchDocsToDicts( hits['hits']['hits'])) (http_status, response) = engine_client.upload(job_id, content) if http_status != 202: print json.dumps(response) continue doc_count += len(hits['hits']['hits']) print "Uploaded {0} records".format(str(doc_count)) (http_status, response) = engine_client.close(job_id) if http_status != 202: print "Error closing job" print http_status, json.dumps(response) return print "{0} records successfully written to job {1}".format( str(doc_count), job_id)