def main(): args = parseArguments() # read the config file config = ConfigParser.RawConfigParser() try: # insert a section header into the config so # ConfigParser will read it without complaint with open(args.config, "r") as config_file: ini_str = '[root]\n' + config_file.read() ini_fp = StringIO.StringIO(ini_str) config.readfp(ini_fp) except IOError: print "Error opening file " + args.config return try: region = config.get('root', 'region') access_id = config.get('root', 'aws_access_key_id') secret_key = config.get('root', 'aws_secret_access_key') except ConfigParser.NoOptionError as e: print e return # AWS CloudWatch connection cloudwatch_conn = boto.ec2.cloudwatch.connect_to_region( region, aws_access_key_id=access_id, aws_secret_access_key=secret_key) if cloudwatch_conn == None: print "Error unknown region " + region return # The Prelert REST API client engine_client = EngineApiClient(args.api_host, API_BASE_URL, args.api_port) # If no job ID is supplied create a new job job_id = createJob(args.job_id, engine_client) if job_id == None: return # default start date is None meaning run realtime start_date = None if args.start_date != None: start_date = datetime.strptime(args.start_date, "%Y-%m-%d") start_date = replaceTimezoneWithUtc(start_date) if start_date == None: runRealtime(job_id, cloudwatch_conn, engine_client) else: # historical mode, check for an end date end_date = replaceTimezoneWithUtc(datetime.utcnow()) if args.end_date != None: end_date = datetime.strptime(args.end_date, "%Y-%m-%d") end_date = replaceTimezoneWithUtc(end_date) runHistorical(job_id, start_date, end_date, cloudwatch_conn, engine_client) print "Closing job..." engine_client.close(job_id)
def main(): setupLogging() args = parseArguments() # Create the REST API client engine_client = EngineApiClient(args.host, BASE_URL, args.port) job_config = '{"analysisConfig" : {\ "bucketSpan":3600,\ "detectors" :[{"function":"metric","fieldName":"responsetime","byFieldName":"airline"}] },\ "dataDescription" : {"fieldDelimiter":",", "timeField":"time", "timeFormat":"yyyy-MM-dd HH:mm:ssX"} }' logging.info("Creating job") (http_status_code, response) = engine_client.createJob(job_config) if http_status_code != 201: print(http_status_code, json.dumps(response)) return job_id = response['id'] logging.info("Uploading data to " + job_id) file = open(args.file, 'rb') (http_status_code, response) = engine_client.upload(job_id, file) if http_status_code != 202: print(http_status_code, json.dumps(response)) return logging.info("Closing job " + job_id) (http_status_code, response) = engine_client.close(job_id) if http_status_code != 202: print(http_status_code, json.dumps(response)) return logging.info("Get result buckets for job " + job_id) (http_status_code, response) = engine_client.getAllBuckets(job_id) if http_status_code != 200: print(http_status_code, json.dumps(response)) else: print "Date,Anomaly Score,Max Normalized Probablility" for bucket in response: print "{0},{1},{2}".format(bucket['timestamp'], bucket['anomalyScore'], bucket['maxNormalizedProbability'])
def main(): setupLogging() args = parseArguments() # Create the REST API client engine_client = EngineApiClient(args.host, BASE_URL, args.port) job_config = '{"analysisConfig" : {\ "bucketSpan":3600,\ "detectors" :[{"function":"metric","fieldName":"responsetime","byFieldName":"airline"}] },\ "dataDescription" : {"fieldDelimiter":",", "timeField":"time", "timeFormat":"yyyy-MM-dd HH:mm:ssX"} }' logging.info("Creating job") (http_status_code, response) = engine_client.createJob(job_config) if http_status_code != 201: print (http_status_code, json.dumps(response)) return job_id = response['id'] logging.info("Uploading data to " + job_id) file = open(args.file, 'rb') (http_status_code, response) = engine_client.upload(job_id, file) if http_status_code != 202: print (http_status_code, json.dumps(response)) return logging.info("Closing job " + job_id) (http_status_code, response) = engine_client.close(job_id) if http_status_code != 202: print (http_status_code, json.dumps(response)) return logging.info("Get result buckets for job " + job_id) (http_status_code, response) = engine_client.getAllBuckets(job_id) if http_status_code != 200: print (http_status_code, json.dumps(response)) else: print "Date,Anomaly Score,Max Normalized Probablility" for bucket in response: print "{0},{1},{2}".format(bucket['timestamp'], bucket['anomalyScore'], bucket['maxNormalizedProbability'])
def main(): setupLogging() args = parseArguments() # read the config file try: with open(args.file, "r") as config_file: config = json.load(config_file) except IOError: print "Error opening file " + args.file return # The ElasticSearch client es_client = Elasticsearch(args.es_host + ":" + str(args.es_port)) # The REST API client engine_client = EngineApiClient(args.api_host, API_BASE_URL, args.api_port) job_id = args.job_id if job_id == None: (http_status, response) = engine_client.createJob(json.dumps(config['job_config'])) job_id = response['id'] print "Created job with id " + str(job_id) print "Using job id " + job_id data_type = config['type'] raw_query = insertDateRangeFilter(config['search']) timezone = UTC() doc_count = 0 try: query_end_time = datetime.now(timezone) - timedelta(seconds=args.update_interval) while True: query_start_time = query_end_time query_end_time = datetime.now(timezone) query_str = json.dumps(replaceDateArgs(raw_query, query_start_time, query_end_time)) index_name = logstashIndex(query_start_time) skip = 0 try: # Query the documents from ElasticSearch and write to the Engine hits = es_client.search(index=index_name, doc_type=data_type, body=query_str, from_=skip, size=MAX_DOC_TAKE) except elasticsearch.exceptions.NotFoundError: print "Error: missing logstash index '" + index_name + "'" # upload to the API content = json.dumps(elasticSearchDocsToDicts(hits['hits']['hits'])) (http_status, response) = engine_client.upload(job_id, content) if http_status != 202: print "Error uploading log content to the Engine" print http_status, json.dumps(response) doc_count += len(hits['hits']['hits']) # get any other docs hitcount = int(hits['hits']['total']) while hitcount > (skip + MAX_DOC_TAKE): skip += MAX_DOC_TAKE hits = es_client.search(index=index_name, doc_type=data_type, body=query_str, from_=skip, size=MAX_DOC_TAKE) content = json.dumps(elasticSearchDocsToDicts(hits['hits']['hits'])) (http_status, response) = engine_client.upload(job_id, content) if http_status != 202: print "Error uploading log content to the Engine" print json.dumps(response) doc_count += len(hits['hits']['hits']) print "Uploaded {0} records".format(str(doc_count)) duration = datetime.now(timezone) - query_end_time sleep_time = max(args.update_interval - duration.seconds, 0) print "sleeping for " + str(sleep_time) + " seconds" if sleep_time > 0.0: time.sleep(sleep_time) except KeyboardInterrupt: print "Interrupt caught closing job..." engine_client.close(job_id)
def main(): setupLogging() args = parseArguments() # read the config file try: with open(args.file, "r") as config_file: config = json.load(config_file) except IOError: print "Error opening file " + args.file return # The ElasticSearch client es_client = Elasticsearch(args.es_host + ":" + str(args.es_port)) # The REST API client engine_client = EngineApiClient(args.api_host, API_BASE_URL, args.api_port) job_id = args.job_id if job_id == None: (http_status, response) = engine_client.createJob(json.dumps(config['job_config'])) job_id = response['id'] print "Created job with id " + str(job_id) print "Using job id " + job_id data_type = config['type'] raw_query = insertDateRangeFilter(config['search']) timezone = UTC() doc_count = 0 try: query_end_time = datetime.now(timezone) - timedelta( seconds=args.update_interval) while True: query_start_time = query_end_time query_end_time = datetime.now(timezone) query_str = json.dumps( replaceDateArgs(raw_query, query_start_time, query_end_time)) index_name = logstashIndex(query_start_time, args.update_interval) skip = 0 try: # Query the documents from ElasticSearch and write to the Engine hits = es_client.search(index=index_name, doc_type=data_type, body=query_str, from_=skip, size=MAX_DOC_TAKE) except elasticsearch.exceptions.NotFoundError: print "Error: missing logstash index '" + index_name + "'" # upload to the API content = json.dumps(elasticSearchDocsToDicts( hits['hits']['hits'])) (http_status, response) = engine_client.upload(job_id, content) if http_status != 202: print "Error uploading log content to the Engine" print http_status, json.dumps(response) doc_count += len(hits['hits']['hits']) # get any other docs hitcount = int(hits['hits']['total']) while hitcount > (skip + MAX_DOC_TAKE): skip += MAX_DOC_TAKE hits = es_client.search(index=index_name, doc_type=data_type, body=query_str, from_=skip, size=MAX_DOC_TAKE) content = json.dumps( elasticSearchDocsToDicts(hits['hits']['hits'])) (http_status, response) = engine_client.upload(job_id, content) if http_status != 202: print "Error uploading log content to the Engine" print json.dumps(response) doc_count += len(hits['hits']['hits']) print "Uploaded {0} records".format(str(doc_count)) duration = datetime.now(timezone) - query_end_time sleep_time = max(args.update_interval - duration.seconds, 0) print "sleeping for " + str(sleep_time) + " seconds" if sleep_time > 0.0: time.sleep(sleep_time) except KeyboardInterrupt: print "Interrupt caught closing job..." engine_client.close(job_id)
def main(): setupLogging() args = parseArguments() # read the config file try: with open(args.file, "r") as config_file: config = json.load(config_file) except IOError: print "Error opening file " + args.file return # default start date is None meaning 'all time' start_date = None if args.start_date != None: start_date = datetime.strptime(args.start_date, "%Y-%m-%d") # default end date is today end_date = datetime.today() if args.end_date != None: end_date = datetime.strptime(args.end_date, "%Y-%m-%d") # The ElasticSearch client es_client = Elasticsearch(args.es_host + ":" + str(args.es_port)) data_type = config['type'] search_body = json.dumps(config['search']) # If no start date find the first logstash index containing our docs if start_date == None: start_date = findDateOfFirstIndex(es_client, data_type, search_body) if start_date == None: print "No documents found with the query " + search_body return # The REST API client engine_client = EngineApiClient(args.api_host, ABI_BASE_URL, args.api_port) (http_status, response) = engine_client.createJob(json.dumps(config['job_config'])) if http_status != 201: print "Error creatting job" print http_status, json.dumps(response) return job_id = response['id'] print "Created job with id " + str(job_id) doc_count = 0 for index_name in nextLogStashIndex(start_date, end_date): print "Reading from index " + index_name skip = 0 try: # Query the documents from ElasticSearch and write to the Engine hits = es_client.search(index=index_name, doc_type=data_type, body=search_body, from_=skip, size=MAX_DOC_TAKE) except elasticsearch.exceptions.NotFoundError: # Index not found try the next one continue # upload to the API content = json.dumps(elasticSearchDocsToDicts(hits['hits']['hits'])) (http_status, response) = engine_client.upload(job_id, content) if http_status != 202: print "Error uploading log content to the Engine" print http_status, json.dumps(response) continue doc_count += len(hits['hits']['hits']) # get any other docs hitcount = int(hits['hits']['total']) while hitcount > (skip + MAX_DOC_TAKE): skip += MAX_DOC_TAKE hits = es_client.search(index=index_name, doc_type=data_type, body=search_body, from_=skip, size=MAX_DOC_TAKE) content = json.dumps(elasticSearchDocsToDicts(hits['hits']['hits'])) (http_status, response) = engine_client.upload(job_id, content) if http_status != 202: print json.dumps(response) continue doc_count += len(hits['hits']['hits']) print "Uploaded {0} records".format(str(doc_count)) engine_client.close(job_id) print "{0} records successfully written to job {1}".format(str(doc_count), job_id)
def main(): args = parseArguments() start_date = datetime(2014, 05, 18, 0, 0, 0, 0, UtcOffset()) # interval between the generated timestamps for the records interval = timedelta(seconds=300) if args.duration <= 0: end_date = datetime.now(UtcOffset()) else: duration = timedelta(hours=args.duration) end_date = start_date + duration job_config = '{\ "analysisConfig" : {\ "bucketSpan":3600,\ "detectors" :[\ {"fieldName":"In Discards","byFieldName":"host"},\ {"fieldName":"In Octets","byFieldName":"host"},\ {"fieldName":"Out Discards","byFieldName":"host"},\ {"fieldName":"Out Octets","byFieldName":"host"} \ ]\ },\ "dataDescription" : {\ "fieldDelimiter":",",\ "timeField":"time",\ "timeFormat":"yyyy-MM-dd\'T\'HH:mm:ssXXX"\ }\ }' engine_client = EngineApiClient(args.host, BASE_URL, args.port) (http_status_code, response) = engine_client.createJob(job_config) if http_status_code != 201: print (http_status_code, json.dumps(response)) return job_id = response['id'] print 'Job created with Id = ' + job_id # get the csv header (the first record generated) record_generator = generateRecords(args.file, start_date, interval, end_date) header = ','.join(next(record_generator)) header += '\n' count = 0 try: # for the results next_bucket_id = 1 print print "Date,Anomaly Score,Max Normalized Probablility" data = header for record in record_generator: # format as csv and append new line csv = ','.join(record) + '\n' data += csv # print data count += 1 if count == 100: (http_status_code, response) = engine_client.upload(job_id, data) if http_status_code != 202: print (http_status_code, json.dumps(response)) break # get the latest results... (http_status_code, response) = engine_client.getBucketsByDate(job_id=job_id, start_date=str(next_bucket_id), end_date=None) if http_status_code != 200: print (http_status_code, json.dumps(response)) break # and print them for bucket in response: print "{0},{1},{2},{3}".format(bucket['timestamp'], bucket['anomalyScore'], bucket['maxNormalizedProbability']) if len(response) > 0: next_bucket_id = int(response[-1]['id']) + 1 # must send the header every time data = header count = 0 # sleep a little while (optional this can be removed) #time.sleep(0.1) except KeyboardInterrupt: print "Keyboard interrupt closing job..." (http_status_code, response) = engine_client.close(job_id) if http_status_code != 202: print (http_status_code, json.dumps(response))
def main(): args = parseArguments() # read the config file config = ConfigParser.RawConfigParser() try: # insert a section header into the config so # ConfigParser will read it without complaint with open(args.config, "r") as config_file: ini_str = '[root]\n' + config_file.read() ini_fp = StringIO.StringIO(ini_str) config.readfp(ini_fp) except IOError: print "Error opening file " + args.config return try: region = config.get('root', 'region') access_id = config.get('root', 'aws_access_key_id') secret_key = config.get('root', 'aws_secret_access_key') except ConfigParser.NoOptionError as e: print e return # AWS CloudWatch connection cloudwatch_conn = boto.ec2.cloudwatch.connect_to_region(region, aws_access_key_id=access_id, aws_secret_access_key=secret_key) if cloudwatch_conn == None: print "Error unknown region " + region return # The Prelert REST API client engine_client = EngineApiClient(args.api_host, API_BASE_URL, args.api_port) # If no job ID is supplied create a new job job_id = createJob(args.job_id, engine_client) if job_id == None: return # default start date is None meaning run realtime start_date = None if args.start_date != None: start_date = datetime.strptime(args.start_date, "%Y-%m-%d") start_date = replaceTimezoneWithUtc(start_date) if start_date == None: runRealtime(job_id, cloudwatch_conn, engine_client) else: # historical mode, check for an end date end_date = replaceTimezoneWithUtc(datetime.utcnow()) if args.end_date != None: end_date = datetime.strptime(args.end_date, "%Y-%m-%d") end_date = replaceTimezoneWithUtc(end_date) runHistorical(job_id, start_date, end_date, cloudwatch_conn, engine_client) print "Closing job..." engine_client.close(job_id)
def main(): args = parseArguments() start_date = datetime(2014, 05, 18, 0, 0, 0, 0, UtcOffset()) # interval between the generated timestamps for the records interval = timedelta(seconds=300) if args.duration <= 0: end_date = datetime.now(UtcOffset()) else: duration = timedelta(hours=args.duration) end_date = start_date + duration job_config = '{\ "analysisConfig" : {\ "bucketSpan":3600,\ "detectors" :[\ {"fieldName":"In Discards","byFieldName":"host"},\ {"fieldName":"In Octets","byFieldName":"host"},\ {"fieldName":"Out Discards","byFieldName":"host"},\ {"fieldName":"Out Octets","byFieldName":"host"} \ ]\ },\ "dataDescription" : {\ "fieldDelimiter":",",\ "timeField":"time",\ "timeFormat":"yyyy-MM-dd\'T\'HH:mm:ssXXX"\ }\ }' engine_client = EngineApiClient(args.host, BASE_URL, args.port) (http_status_code, response) = engine_client.createJob(job_config) if http_status_code != 201: print(http_status_code, json.dumps(response)) return job_id = response['id'] print 'Job created with Id = ' + job_id # get the csv header (the first record generated) record_generator = generateRecords(args.file, start_date, interval, end_date) header = ','.join(next(record_generator)) header += '\n' count = 0 try: # for the results next_bucket_id = 1 print print "Date,Bucket ID,Anomaly Score,Max Normalized Probablility" data = header for record in record_generator: # format as csv and append new line csv = ','.join(record) + '\n' data += csv # print data count += 1 if count == 100: (http_status_code, response) = engine_client.upload(job_id, data) if http_status_code != 202: print(http_status_code, json.dumps(response)) break # get the latest results... (http_status_code, response) = engine_client.getBucketsByDate( job_id=job_id, start_date=str(next_bucket_id), end_date=None) if http_status_code != 200: print(http_status_code, json.dumps(response)) break # and print them for bucket in response: print "{0},{1},{2},{3}".format( bucket['timestamp'], bucket['id'], bucket['anomalyScore'], bucket['maxNormalizedProbability']) if len(response) > 0: next_bucket_id = int(response[-1]['id']) + 1 # must send the header every time data = header count = 0 # sleep a little while (optional this can be removed) #time.sleep(0.1) except KeyboardInterrupt: print "Keyboard interrupt closing job..." (http_status_code, response) = engine_client.close(job_id) if http_status_code != 202: print(http_status_code, json.dumps(response))
def main(): setupLogging() args = parseArguments() # read the config file try: with open(args.file, "r") as config_file: config = json.load(config_file) except IOError: print "Error opening file " + args.file return # default start date is None meaning 'all time' start_date = None if args.start_date != None: start_date = datetime.strptime(args.start_date, "%Y-%m-%d") # default end date is today end_date = datetime.today() if args.end_date != None: end_date = datetime.strptime(args.end_date, "%Y-%m-%d") # The ElasticSearch client es_client = Elasticsearch(args.es_host + ":" + str(args.es_port)) data_type = config['type'] search_body = json.dumps(config['search']) # If no start date find the first logstash index containing our docs if start_date == None: start_date = findDateOfFirstIndex(es_client, data_type, search_body) if start_date == None: print "No documents found with the query " + search_body return # The REST API client engine_client = EngineApiClient(args.api_host, API_BASE_URL, args.api_port) (http_status, response) = engine_client.createJob(json.dumps(config['job_config'])) if http_status != 201: print "Error creatting job" print http_status, json.dumps(response) return job_id = response['id'] print "Created job with id " + str(job_id) doc_count = 0 for index_name in nextLogStashIndex(start_date, end_date): print "Reading from index " + index_name skip = 0 try: # Query the documents from ElasticSearch and write to the Engine hits = es_client.search(index=index_name, doc_type=data_type, body=search_body, from_=skip, size=MAX_DOC_TAKE) except elasticsearch.exceptions.NotFoundError: # Index not found try the next one continue # upload to the API content = json.dumps(elasticSearchDocsToDicts(hits['hits']['hits'])) (http_status, response) = engine_client.upload(job_id, content) if http_status != 202: print "Error uploading log content to the Engine" print http_status, json.dumps(response) continue doc_count += len(hits['hits']['hits']) # get any other docs hitcount = int(hits['hits']['total']) while hitcount > (skip + MAX_DOC_TAKE): skip += MAX_DOC_TAKE hits = es_client.search(index=index_name, doc_type=data_type, body=search_body, from_=skip, size=MAX_DOC_TAKE) content = json.dumps(elasticSearchDocsToDicts( hits['hits']['hits'])) (http_status, response) = engine_client.upload(job_id, content) if http_status != 202: print json.dumps(response) continue doc_count += len(hits['hits']['hits']) print "Uploaded {0} records".format(str(doc_count)) (http_status, response) = engine_client.close(job_id) if http_status != 202: print "Error closing job" print http_status, json.dumps(response) return print "{0} records successfully written to job {1}".format( str(doc_count), job_id)