def panorama_anomaly_details(anomaly_id): """ Gets the details for an anomaly from the database. """ logger.info('panorama_anomaly_details - getting details for anomaly id %s' % str(anomaly_id)) metric_id = 0 # Added nosec to exclude from bandit tests query = 'select metric_id from anomalies WHERE id=\'%s\'' % str(anomaly_id) # nosec try: result = mysql_select(skyline_app, query) metric_id = int(result[0][0]) except: logger.error(traceback.format_exc()) logger.error('error :: panorama_anomaly_details - failed to get metric_id from db') return False if metric_id > 0: logger.info('panorama_anomaly_details - getting metric for metric_id - %s' % str(metric_id)) # Added nosec to exclude from bandit tests query = 'select metric from metrics WHERE id=\'%s\'' % str(metric_id) # nosec try: result = mysql_select(skyline_app, query) metric = str(result[0][0]) except: logger.error(traceback.format_exc()) logger.error('error :: panorama_anomaly_details - failed to get metric from db') return False query = 'select id, metric_id, anomalous_datapoint, anomaly_timestamp, full_duration, created_timestamp, anomaly_end_timestamp from anomalies WHERE id=\'%s\'' % str(anomaly_id) # nosec logger.info('panorama_anomaly_details - running query - %s' % str(query)) try: rows = mysql_select(skyline_app, query) except: logger.error(traceback.format_exc()) logger.error('error :: panorama_anomaly_details - failed to get anomaly details from db') return False anomaly_data = None for row in rows: anomalous_datapoint = float(row[2]) anomaly_timestamp = int(row[3]) full_duration = int(row[4]) created_timestamp = str(row[5]) try: anomaly_end_timestamp = int(row[6]) except: anomaly_end_timestamp = None anomaly_data = [int(anomaly_id), str(metric), anomalous_datapoint, anomaly_timestamp, full_duration, created_timestamp, anomaly_end_timestamp] break return anomaly_data
def get_list(thing): """ Get a list of names for things in a database table. :param thing: the thing, e.g. 'algorithm' :type thing: str :return: array :rtype: array, boolean """ table = '%ss' % thing query = 'select %s from %s' % (thing, table) logger.info('select %s from %s' % (thing, table)) try: results = mysql_select(skyline_app, query) except: logger.error('error :: failed to get list of %ss from %s' % (thing, table)) results = None logger.info('results: %s' % str(results)) things = [] for result in results: things.append(str(result[0])) logger.info('things: %s' % str(things)) return things
def get_anomaly(request_type): """ Query the database for the anomaly details """ logger = logging.getLogger(skyline_app_logger) if isinstance(request_type, int): latest = False else: latest = True if latest: query = 'SELECT * FROM anomalies ORDER BY id DESC LIMIT 1' else: query = 'SELECT * FROM anomalies WHERE id=%s' % str(request_type) try: results = mysql_select(skyline_app, query) except: logger.error(traceback.format_exc()) logger.error('MySQL error') return (False, False, False, False) try: anomaly_id = int(results[0][0]) metric_id = int(results[0][1]) anomaly_timestamp = int(results[0][5]) query = 'SELECT metric FROM metrics WHERE id=%s' % str(metric_id) results = mysql_select(skyline_app, query) base_name = str(results[0][0]) except: logger.error(traceback.format_exc()) logger.error('error :: MySQL error - %s' % query) return (False, False, False, False) return (anomaly_id, metric_id, anomaly_timestamp, base_name)
def get_list(thing): """ Get a list of names for things in a database table. :param thing: the thing, e.g. 'algorithm' :type thing: str :return: list :rtype: list """ table = '%ss' % thing # @modified 20170913 - Task #2160: Test skyline with bandit # Added nosec to exclude from bandit tests query = 'select %s from %s' % (thing, table) # nosec logger.info('select %s from %s' % (thing, table)) # nosec got_results = False try: results = mysql_select(skyline_app, query) got_results = True except: logger.error('error :: failed to get list of %ss from %s' % (thing, table)) results = None things = [] results_array_valid = False try: test_results = results[0] results_array_valid = True except: logger.error( 'error :: invalid results array for get list of %ss from %s' % (thing, table)) if results_array_valid: logger.info('results: %s' % str(results)) for result in results: things.append(str(result[0])) logger.info('things: %s' % str(things)) return things
def get_list(thing): """ Get a list of names for things in a database table. :param thing: the thing, e.g. 'algorithm' :type thing: str :return: array :rtype: array, boolean """ table = '%ss' % thing query = 'select %s from %s' % (thing, table) logger.info('select %s from %s' % (thing, table)) got_results = False try: results = mysql_select(skyline_app, query) got_results = True except: logger.error('error :: failed to get list of %ss from %s' % (thing, table)) results = None things = [] results_array_valid = False try: test_results = results[0] results_array_valid = True except: logger.error('error :: invalid results array for get list of %ss from %s' % (thing, table)) if results_array_valid: logger.info('results: %s' % str(results)) for result in results: things.append(str(result[0])) logger.info('things: %s' % str(things)) return things
def panorama_request(): """ Gets the details of anomalies from the database, using the URL arguments that are passed in by the :obj:`request.args` to build the MySQL select query string and queries the database, parse the results and creates an array of the anomalies that matched the query and creates the ``panaroma.json`` file, then returns the array. The Webapp needs both the array and the JSONP file to serve to the browser for the client side ``panaroma.js``. :param None: determined from :obj:`request.args` :return: array :rtype: array .. note:: And creates ``panaroma.js`` for client side javascript """ logger.info('determining request args') def get_ids_from_rows(thing, rows): found_ids = [] for row in rows: found_id = str(row[0]) found_ids.append(int(found_id)) # @modified 20191014 - Task #3270: Deprecate string.replace for py3 # Branch #3262: py3 # ids_first = string.replace(str(found_ids), '[', '') # in_ids = string.replace(str(ids_first), ']', '') found_ids_str = str(found_ids) ids_first = found_ids_str.replace('[', '') in_ids = ids_first.replace(']', '') return in_ids try: request_args_len = len(request.args) except: request_args_len = False latest_anomalies = False if request_args_len == 0: request_args_len = 'No request arguments passed' # return str(request_args_len) latest_anomalies = True metric = False if metric: logger.info('Getting db id for %s' % metric) # @modified 20170913 - Task #2160: Test skyline with bandit # Added nosec to exclude from bandit tests query = 'select id from metrics WHERE metric=\'%s\'' % metric # nosec try: result = mysql_select(skyline_app, query) except: logger.error('error :: failed to get id from db: %s' % traceback.format_exc()) result = 'metric id not found in database' return str(result[0][0]) search_request = True count_request = False if latest_anomalies: logger.info('Getting latest anomalies') # @modified 20191108 - Feature #3306: Record the anomaly_end_timestamp # Branch #3262: py3 # query = 'select id, metric_id, anomalous_datapoint, anomaly_timestamp, full_duration, created_timestamp from anomalies ORDER BY id DESC LIMIT 10' query = 'select id, metric_id, anomalous_datapoint, anomaly_timestamp, full_duration, created_timestamp, anomaly_end_timestamp from anomalies ORDER BY id DESC LIMIT 10' try: rows = mysql_select(skyline_app, query) except: logger.error('error :: failed to get anomalies from db: %s' % traceback.format_exc()) rows = [] if not latest_anomalies: logger.info('Determining search parameters') # @modified 20191108 - Feature #3306: Record the end_timestamp of anomalies # Branch #3262: py3 # query_string = 'select id, metric_id, anomalous_datapoint, anomaly_timestamp, full_duration, created_timestamp from anomalies' query_string = 'select id, metric_id, anomalous_datapoint, anomaly_timestamp, full_duration, created_timestamp, anomaly_end_timestamp from anomalies' needs_and = False # If we have to '' a string we cannot escape the query it seems... do_not_escape = False if 'metric' in request.args: metric = request.args.get('metric', None) if metric and metric != 'all': # @modified 20170913 - Task #2160: Test skyline with bandit # Added nosec to exclude from bandit tests query = "select id from metrics WHERE metric='%s'" % (metric) # nosec try: found_id = mysql_select(skyline_app, query) except: logger.error('error :: failed to get app ids from db: %s' % traceback.format_exc()) found_id = None if found_id: target_id = str(found_id[0][0]) if needs_and: new_query_string = '%s AND metric_id=%s' % (query_string, target_id) else: new_query_string = '%s WHERE metric_id=%s' % (query_string, target_id) query_string = new_query_string needs_and = True if 'metric_like' in request.args: metric_like = request.args.get('metric_like', None) if metric_like and metric_like != 'all': # @modified 20170913 - Task #2160: Test skyline with bandit # Added nosec to exclude from bandit tests query = 'select id from metrics WHERE metric LIKE \'%s\'' % (str(metric_like)) # nosec try: rows = mysql_select(skyline_app, query) except: logger.error('error :: failed to get metric ids from db: %s' % traceback.format_exc()) return False rows_returned = None try: rows_returned = rows[0] if ENABLE_WEBAPP_DEBUG: logger.info('debug :: rows - rows[0] - %s' % str(rows[0])) except: rows_returned = False if ENABLE_WEBAPP_DEBUG: logger.info('debug :: no rows returned') if rows_returned: ids = get_ids_from_rows('metric', rows) new_query_string = '%s WHERE metric_id IN (%s)' % (query_string, str(ids)) else: # Get nothing new_query_string = '%s WHERE metric_id IN (0)' % (query_string) if ENABLE_WEBAPP_DEBUG: logger.info('debug :: no rows returned using new_query_string - %s' % new_query_string) query_string = new_query_string needs_and = True if 'count_by_metric' in request.args: count_by_metric = request.args.get('count_by_metric', None) if count_by_metric and count_by_metric != 'false': search_request = False count_request = True # query_string = 'SELECT metric_id, COUNT(*) FROM anomalies GROUP BY metric_id ORDER BY COUNT(*) DESC' query_string = 'SELECT metric_id, COUNT(*) FROM anomalies' needs_and = False if 'from_timestamp' in request.args: from_timestamp = request.args.get('from_timestamp', None) if from_timestamp and from_timestamp != 'all': if ":" in from_timestamp: import time import datetime new_from_timestamp = time.mktime(datetime.datetime.strptime(from_timestamp, '%Y%m%d %H:%M').timetuple()) from_timestamp = str(int(new_from_timestamp)) if needs_and: new_query_string = '%s AND anomaly_timestamp >= %s' % (query_string, from_timestamp) query_string = new_query_string needs_and = True else: new_query_string = '%s WHERE anomaly_timestamp >= %s' % (query_string, from_timestamp) query_string = new_query_string needs_and = True if 'until_timestamp' in request.args: until_timestamp = request.args.get('until_timestamp', None) if until_timestamp and until_timestamp != 'all': if ":" in until_timestamp: import time import datetime new_until_timestamp = time.mktime(datetime.datetime.strptime(until_timestamp, '%Y%m%d %H:%M').timetuple()) until_timestamp = str(int(new_until_timestamp)) if needs_and: new_query_string = '%s AND anomaly_timestamp <= %s' % (query_string, until_timestamp) query_string = new_query_string needs_and = True else: new_query_string = '%s WHERE anomaly_timestamp <= %s' % (query_string, until_timestamp) query_string = new_query_string needs_and = True if 'app' in request.args: app = request.args.get('app', None) if app and app != 'all': # @modified 20170913 - Task #2160: Test skyline with bandit # Added nosec to exclude from bandit tests query = 'select id from apps WHERE app=\'%s\'' % (str(app)) # nosec try: found_id = mysql_select(skyline_app, query) except: logger.error('error :: failed to get app ids from db: %s' % traceback.format_exc()) found_id = None if found_id: target_id = str(found_id[0][0]) if needs_and: new_query_string = '%s AND app_id=%s' % (query_string, target_id) else: new_query_string = '%s WHERE app_id=%s' % (query_string, target_id) query_string = new_query_string needs_and = True if 'source' in request.args: source = request.args.get('source', None) if source and source != 'all': # @modified 20170913 - Task #2160: Test skyline with bandit # Added nosec to exclude from bandit tests query = 'select id from sources WHERE source=\'%s\'' % (str(source)) # nosec try: found_id = mysql_select(skyline_app, query) except: logger.error('error :: failed to get source id from db: %s' % traceback.format_exc()) found_id = None if found_id: target_id = str(found_id[0][0]) if needs_and: new_query_string = '%s AND source_id=\'%s\'' % (query_string, target_id) else: new_query_string = '%s WHERE source_id=\'%s\'' % (query_string, target_id) query_string = new_query_string needs_and = True if 'algorithm' in request.args: algorithm = request.args.get('algorithm', None) # DISABLED as it is difficult match algorithm_id in the # triggered_algorithms csv list algorithm = 'all' if algorithm and algorithm != 'all': # @modified 20170913 - Task #2160: Test skyline with bandit # Added nosec to exclude from bandit tests query = 'select id from algorithms WHERE algorithm LIKE \'%s\'' % (str(algorithm)) # nosec try: rows = mysql_select(skyline_app, query) except: logger.error('error :: failed to get algorithm ids from db: %s' % traceback.format_exc()) rows = [] ids = get_ids_from_rows('algorithm', rows) if needs_and: new_query_string = '%s AND algorithm_id IN (%s)' % (query_string, str(ids)) else: new_query_string = '%s WHERE algorithm_id IN (%s)' % (query_string, str(ids)) query_string = new_query_string needs_and = True if 'host' in request.args: host = request.args.get('host', None) if host and host != 'all': # @modified 20170913 - Task #2160: Test skyline with bandit # Added nosec to exclude from bandit tests query = 'select id from hosts WHERE host=\'%s\'' % (str(host)) # nosec try: found_id = mysql_select(skyline_app, query) except: logger.error('error :: failed to get host id from db: %s' % traceback.format_exc()) found_id = None if found_id: target_id = str(found_id[0][0]) if needs_and: new_query_string = '%s AND host_id=\'%s\'' % (query_string, target_id) else: new_query_string = '%s WHERE host_id=\'%s\'' % (query_string, target_id) query_string = new_query_string needs_and = True if 'limit' in request.args: limit = request.args.get('limit', '10') else: limit = '10' if 'order' in request.args: order = request.args.get('order', 'DESC') else: order = 'DESC' search_query = '%s ORDER BY id %s LIMIT %s' % ( query_string, order, limit) if 'count_by_metric' in request.args: count_by_metric = request.args.get('count_by_metric', None) if count_by_metric and count_by_metric != 'false': # query_string = 'SELECT metric_id, COUNT(*) FROM anomalies GROUP BY metric_id ORDER BY COUNT(*) DESC' search_query = '%s GROUP BY metric_id ORDER BY COUNT(*) %s LIMIT %s' % ( query_string, order, limit) try: rows = mysql_select(skyline_app, search_query) except: logger.error('error :: failed to get anomalies from db: %s' % traceback.format_exc()) rows = [] anomalies = [] anomalous_metrics = [] if search_request: # @modified 20191014 - Task #3270: Deprecate string.replace for py3 # Branch #3262: py3 anomalies_json = path.abspath(path.join(path.dirname(__file__), '..', settings.ANOMALY_DUMP)) # panorama_json = string.replace(str(anomalies_json), 'anomalies.json', 'panorama.json') panorama_json = anomalies_json.replace('anomalies.json', 'panorama.json') if ENABLE_WEBAPP_DEBUG: logger.info('debug :: panorama_json - %s' % str(panorama_json)) for row in rows: if search_request: anomaly_id = str(row[0]) metric_id = str(row[1]) if count_request: metric_id = str(row[0]) anomaly_count = str(row[1]) # @modified 20170913 - Task #2160: Test skyline with bandit # Added nosec to exclude from bandit tests query = 'select metric from metrics WHERE id=%s' % metric_id # nosec try: result = mysql_select(skyline_app, query) except: logger.error('error :: failed to get id from db: %s' % traceback.format_exc()) continue metric = str(result[0][0]) if search_request: anomalous_datapoint = str(row[2]) anomaly_timestamp = str(row[3]) anomaly_timestamp = str(row[3]) full_duration = str(row[4]) created_timestamp = str(row[5]) # @modified 20191108 - Feature #3306: Record the anomaly_end_timestamp # Branch #3262: py3 # anomaly_data = (anomaly_id, metric, anomalous_datapoint, anomaly_timestamp, full_duration, created_timestamp) # anomalies.append([int(anomaly_id), str(metric), anomalous_datapoint, anomaly_timestamp, full_duration, created_timestamp]) anomaly_end_timestamp = str(row[6]) anomaly_data = (anomaly_id, metric, anomalous_datapoint, anomaly_timestamp, full_duration, created_timestamp, anomaly_end_timestamp) anomalies.append([int(anomaly_id), str(metric), anomalous_datapoint, anomaly_timestamp, full_duration, created_timestamp, anomaly_end_timestamp]) anomalous_metrics.append(str(metric)) if count_request: limit_argument = anomaly_count if int(anomaly_count) > 100: limit_argument = 100 anomaly_data = (int(anomaly_count), metric, str(limit_argument)) anomalies.append([int(anomaly_count), str(metric), str(limit_argument)]) anomalies.sort(key=operator.itemgetter(int(0))) if search_request: with open(panorama_json, 'w') as fh: pass # Write anomalous_metrics to static webapp directory with open(panorama_json, 'a') as fh: # Make it JSONP with a handle_data() function fh.write('handle_data(%s)' % anomalies) if latest_anomalies: return anomalies else: return search_query, anomalies
def run(self): """ Called when the process intializes. """ # Log management to prevent overwriting # Allow the bin/<skyline_app>.d to manage the log if os.path.isfile(skyline_app_logwait): try: logger.info('removing %s' % skyline_app_logwait) os.remove(skyline_app_logwait) except OSError: logger.error('error :: failed to remove %s, continuing' % skyline_app_logwait) pass now = time() log_wait_for = now + 5 while now < log_wait_for: if os.path.isfile(skyline_app_loglock): sleep(.1) now = time() else: now = log_wait_for + 1 logger.info('starting %s run' % skyline_app) if os.path.isfile(skyline_app_loglock): logger.error( 'error :: bin/%s.d log management seems to have failed, continuing' % skyline_app) try: os.remove(skyline_app_loglock) logger.info('log lock file removed') except OSError: logger.error('error :: failed to remove %s, continuing' % skyline_app_loglock) pass else: logger.info('bin/%s.d log management done' % skyline_app) while 1: now = time() # Make sure Redis is up try: self.redis_conn.ping() if ENABLE_LUMINOSITY_DEBUG: logger.info('debug :: connected to Redis') except: logger.error( 'error :: cannot connect to redis at socket path %s' % (settings.REDIS_SOCKET_PATH)) sleep(30) # @modified 20180519 - Feature #2378: Add redis auth to Skyline and rebrow if settings.REDIS_PASSWORD: self.redis_conn = StrictRedis( password=settings.REDIS_PASSWORD, unix_socket_path=settings.REDIS_SOCKET_PATH) else: self.redis_conn = StrictRedis( unix_socket_path=settings.REDIS_SOCKET_PATH) continue # Report app up try: self.redis_conn.setex(skyline_app, 120, now) logger.info('updated Redis key for %s up' % skyline_app) except: logger.error('error :: failed to update Redis key for %s up' % skyline_app) """ Determine if any new anomalies have been added """ while True: process_anomaly_id = None last_processed_anomaly_id = None memcache_last_processed_anomaly_id_data = False # Check memcached before MySQL memcache_key = '%s.last.processed.anomaly.id' % skyline_app if settings.MEMCACHE_ENABLED: try: last_processed_anomaly_id = self.memcache_client.get( memcache_key) # if memcache does not have the key the response to the # client is None, it does not except except: logger.error( 'error :: failed to get %s from memcache' % memcache_key) try: self.memcache_client.close() except: logger.error( 'error :: failed to close memcache_client') if last_processed_anomaly_id: logger.info( 'last_processed_anomaly_id found in memcache - %s' % str(last_processed_anomaly_id)) memcache_last_processed_anomaly_id_data = True else: logger.info( 'last_processed_anomaly_id key was NOT found in memcache - %s' % str(last_processed_anomaly_id)) if not last_processed_anomaly_id: query = 'SELECT id FROM luminosity WHERE id=(SELECT MAX(id) FROM luminosity) ORDER BY id DESC LIMIT 1' results = None try: results = mysql_select(skyline_app, query) except: logger.error(traceback.format_exc()) logger.error('error :: MySQL quey failed - %s' % query) if results: try: last_processed_anomaly_id = int(results[0][0]) logger.info( 'last_processed_anomaly_id found from DB - %s' % str(last_processed_anomaly_id)) except: logger.error(traceback.format_exc()) if last_processed_anomaly_id and settings.MEMCACHE_ENABLED: if not memcache_last_processed_anomaly_id_data: logger.info( 'Populating memcache with DB result - %s' % str(last_processed_anomaly_id)) try: self.memcache_client.set( memcache_key, int(last_processed_anomaly_id)) logger.info( 'populated memcache key %s with %s' % (memcache_key, str(last_processed_anomaly_id))) except: logger.error( 'error :: failed to set the memcache key - %s - %s' % (memcache_key, str(last_processed_anomaly_id))) try: self.memcache_client.close() except: logger.error( 'error :: failed to close memcache_client' ) if not last_processed_anomaly_id: # Check MySQL now = int(time()) after = now - 600 query = 'SELECT * FROM anomalies WHERE anomaly_timestamp > \'%s\'' % str( after) # nosec results = None try: results = mysql_select(skyline_app, query) except: logger.error('error :: MySQL quey failed - %s' % query) if results: process_anomaly_id = int(results[0][0]) logger.info( 'found new anomaly id to process from the DB - %s' % str(process_anomaly_id)) # Handle the first one last_processed_anomaly_id = process_anomaly_id - 1 else: logger.info('no new anomalies in the anomalies table') query = 'SELECT * FROM anomalies WHERE id > \'%s\'' % str( last_processed_anomaly_id) # nosec results = None try: results = mysql_select(skyline_app, query) except: logger.error('error :: MySQL quey failed - %s' % query) if results: try: process_anomaly_id = int(results[0][0]) logger.info( 'found the next new anomaly id to process from the DB - %s' % str(process_anomaly_id)) except: logger.error(traceback.format_exc()) logger.error('error :: from query - %s' % query) else: logger.info('no new anomalies in the anomalies table') if process_anomaly_id and last_processed_anomaly_id: if isinstance(last_processed_anomaly_id, int): if isinstance(process_anomaly_id, int): if last_processed_anomaly_id == process_anomaly_id: logger.info( 'anomaly id already processed - %s' % str(process_anomaly_id)) process_anomaly_id = None if not process_anomaly_id: logger.info( 'sleeping 20 no anomalies to correlate - last processed anomaly id - %s' % str(last_processed_anomaly_id)) sleep(20) up_now = time() # Report app up try: self.redis_conn.setex(skyline_app, 120, up_now) logger.info('updated Redis key for %s up' % skyline_app) except: logger.error( 'error :: failed to update Redis key for %s up' % skyline_app) cache_key = '%s.sent_graphite_metrics' % skyline_app redis_sent_graphite_metrics = False try: redis_sent_graphite_metrics = self.redis_conn.get( cache_key) except Exception as e: logger.error( 'error :: could not query Redis for key %s: %s' % (cache_key, e)) # Flush metrics to Graphite if not redis_sent_graphite_metrics: try: total_anomalies = str(len(self.anomalous_metrics)) except: total_anomalies = '0' logger.info('total_anomalies :: %s' % total_anomalies) send_metric_name = '%s.total_anomalies' % skyline_app_graphite_namespace send_graphite_metric(skyline_app, send_metric_name, total_anomalies) try: correlations = str(len(self.correlations)) except: correlations = '0' logger.info('correlations :: %s' % correlations) send_metric_name = '%s.correlations' % skyline_app_graphite_namespace send_graphite_metric(skyline_app, send_metric_name, correlations) sent_graphite_metrics_now = int(time()) try: self.redis_conn.setex(cache_key, 59, sent_graphite_metrics_now) logger.info('updated Redis key - %s' % cache_key) except: logger.error( 'error :: failed to update Redis key - %s up' % cache_key) # Reset lists self.anomalous_metrics[:] = [] self.correlations[:] = [] if process_anomaly_id: break # Spawn process logger.info('spawning processes to correlate anomaly id %s' % str(process_anomaly_id)) pids = [] spawned_pids = [] pid_count = 0 now = time() for i in range(1, luminosity_processes + 1): try: p = Process(target=self.spin_process, args=(i, process_anomaly_id)) pids.append(p) pid_count += 1 logger.info('starting %s of %s spin_process/es' % (str(pid_count), str(luminosity_processes))) p.start() spawned_pids.append(p.pid) except: logger.error(traceback.format_exc()) logger.error('error :: failed to start spin_process') continue # Self monitor processes and terminate if any spin_process has run # for to long p_starts = time() while time() - p_starts <= 60: if any(p.is_alive() for p in pids): # Just to avoid hogging the CPU sleep(.1) else: # All the processes are done, break now. time_to_run = time() - p_starts logger.info('%s spin_process completed in %.2f seconds' % (str(luminosity_processes), time_to_run)) break else: # We only enter this if we didn't 'break' above. logger.info('timed out, killing all spin_process processes') for p in pids: try: p.terminate() # p.join() logger.info('killed spin_process process') except: logger.error(traceback.format_exc()) logger.error( 'error :: killing all spin_process processes') for p in pids: if p.is_alive(): logger.info('stopping spin_process - %s' % (str(p.is_alive()))) p.join() process_runtime = time() - now if process_runtime < 10: sleep_for = (10 - process_runtime) logger.info( 'sleeping for %.2f seconds due to low run time...' % sleep_for) sleep(sleep_for) try: del sleep_for except: logger.error('error :: failed to del sleep_for') try: del process_runtime except: logger.error('error :: failed to del process_runtime')
def spin_process(self, i, anomaly_id): """ Assign an anomalous metric and determine correlated metrics :param i: python process id :param anomaly_id: the anomaly_id :type i: object :type anomaly_id: int :return: boolean :rtype: boolean """ child_process_pid = os.getpid() logger.info('child process pid %s - processing anomaly_id - %s' % (str(child_process_pid), str(anomaly_id))) try: from process_correlations import process_correlations except: logger.error(traceback.format_exc()) logger.error('error :: importing process_correlations') def get_an_engine(): try: engine, log_msg, trace = get_engine(skyline_app) return engine, log_msg, trace except: logger.error(traceback.format_exc()) log_msg = 'error :: failed to get MySQL engine in spin_process' logger.error( 'error :: failed to get MySQL engine in spin_process') return None, log_msg, trace def engine_disposal(engine): if engine: try: engine.dispose() except: logger.error(traceback.format_exc()) logger.error('error :: calling engine.dispose()') return if settings.MEMCACHE_ENABLED: try: memcache_key = '%s.last.processed.anomaly.id' % skyline_app self.memcache_client.set(memcache_key, int(anomaly_id)) logger.info('processed - set the memcache key - %s - %s' % (memcache_key, str(anomaly_id))) except: logger.error( 'error :: failed to set the memcache key - %s - %s' % (memcache_key, str(anomaly_id))) try: self.memcache_client.close() except: logger.error('error :: failed to close memcache_client') try: base_name, anomaly_timestamp, anomalies, correlated_metrics, correlations, sorted_correlations = process_correlations( i, anomaly_id) except: logger.error(traceback.format_exc()) logger.error('error :: processing correlations') return False # @added 20180414 - Bug #2352: Luminosity no metrics MySQL error # Do not query with an empty string if not correlated_metrics: logger.info('no correlations found for %s anomaly id %s' % (base_name, str(anomaly_id))) return False metrics_str = '' for metric_name in correlated_metrics: if metrics_str == '': new_metrics_str = "'%s'" % metric_name else: new_metrics_str = "%s,'%s'" % (metrics_str, metric_name) metrics_str = new_metrics_str metrics_str query = 'SELECT id,metric FROM metrics WHERE metric in (%s)' % str( metrics_str) try: results = mysql_select(skyline_app, query) except: logger.error(traceback.format_exc()) logger.error( 'error :: querying MySQL - SELECT id,metric FROM metrics WHERE metric in (%s)' % str(metrics_str)) return False correlated_metrics_list = [] for metric_id, metric in results: correlated_metrics_list.append([int(metric_id), str(metric)]) if sorted_correlations: luminosity_correlations = [] for metric, coefficient, shifted, shifted_coefficient in sorted_correlations: for metric_id, metric_name in correlated_metrics_list: if metric == metric_name: if shifted < 2: luminosity_correlations.append([ anomaly_id, int(metric_id), coefficient, shifted, shifted_coefficient ]) first_value_not_added = True values_string = 'INSERT INTO luminosity (id, metric_id, coefficient, shifted, shifted_coefficient) VALUES ' # @added 20180420 - Branch #2270: luminosity # Only try and insert if there are values present values_present = False for anomaly_id, metric_id, coefficient, shifted, shifted_coefficient in luminosity_correlations: if coefficient: values_present = True ins_values = '(%s,%s,%s,%s,%s)' % ( str(anomaly_id), str(metric_id), str(round( coefficient, 5)), str(shifted), str(round(shifted_coefficient, 5))) if first_value_not_added: first_value_not_added = False values_string = 'INSERT INTO luminosity (id, metric_id, coefficient, shifted, shifted_coefficient) VALUES %s' % ins_values else: new_values_string = '%s,%s' % (values_string, ins_values) values_string = new_values_string new_values_string = '%s;' % values_string values_string = new_values_string # logger.info('debug insert string :: %s' % str(values_string)) # 'INSERT INTO luminosity (anomaly_id, metric_id, coefficient, shifted, shifted_coefficient) VALUES (68882,619,1.0,0,1.0),...,(68882,489,1.0,0,1.0);' # Needs a mysql_insert not SQLAlchemy luminosity_populated = False if luminosity_correlations and values_present: try: self.mysql_insert(values_string) luminosity_populated = True except: logger.error(traceback.format_exc()) logger.error('error :: MySQL insert - %s' % str(values_string)) return luminosity_populated
def get_yhat_values( metric, from_timestamp, until_timestamp, include_value, include_mean, include_yhat_real_lower, include_anomalous_periods): timeseries = [] try: logger.info('get_yhat_values :: for %s from %s until %s' % ( metric, str(from_timestamp), str(until_timestamp))) timeseries = get_graphite_metric('webapp', metric, from_timestamp, until_timestamp, 'list', 'object') except: logger.error(traceback.format_exc()) logger.error('error :: get_yhat_values :: failed to get timeseries data for %s' % ( metric)) return None yhat_dict = {} logger.info('get_yhat_values :: %s values in timeseries for %s to calculate yhat values from' % ( str(len(timeseries)), metric)) # @added 20210126 - Task #3958: Handle secondary algorithms in yhat_values anomalous_periods_dict = {} if timeseries: metric_id = 0 if metric: logger.info('get_yhat_values :: getting db id for metric - %s' % metric) query = 'select id from metrics WHERE metric=\'%s\'' % metric # nosec try: result = mysql_select(skyline_app, query) metric_id = int(result[0][0]) except: logger.error('error :: get_yhat_values :: failed to get id from db: %s' % traceback.format_exc()) anomalies_at = [] if metric_id: logger.info('get_yhat_values :: getting latest anomalies') query = 'select anomaly_timestamp, anomalous_datapoint, anomaly_end_timestamp from anomalies WHERE metric_id=%s AND anomaly_timestamp >= %s AND anomaly_timestamp <= %s' % ( str(metric_id), str(from_timestamp), str(until_timestamp)) try: rows = mysql_select(skyline_app, query) for row in rows: a_timestamp = int(row[0]) a_value = float(row[1]) try: a_end_timestamp = int(row[2]) except: a_end_timestamp = 0 anomalies_at.append([a_timestamp, a_value, a_end_timestamp]) except: logger.error('error :: get_yhat_values :: failed to get anomalies from db: %s' % traceback.format_exc()) rows = [] timeseries_ranges = [] last_timestamp = None for index, item in enumerate(timeseries): if last_timestamp: t_range = list(range(last_timestamp, int(item[0]))) timeseries_ranges.append([index, t_range, item]) last_timestamp = int(item[0]) t_range = list(range(last_timestamp, (int(item[0]) + 1))) timeseries_ranges.append([index, t_range, item]) anomalies_index = [] for index, time_range, item in timeseries_ranges: for a_timestamp, a_value, a_end_timestamp in anomalies_at: if a_timestamp in time_range: anomalies_index.append([index, item]) anomalous_period_indices = [] anomalies_indices = [item[0] for item in anomalies_index] for index, item in enumerate(timeseries): for idx in anomalies_indices: anomaly_index_range = list(range((idx - 3), (idx + 5))) if index in anomaly_index_range: for i in anomaly_index_range: anomalous_period_indices.append(i) anomaly_timestamps_indices = [] anomalies = [] for item in anomalies_index: anomaly_timestamps_indices.append(item[0]) anomalies.append(item[1]) if timeseries: try: array_amin = np.amin([item[1] for item in timeseries]) values = [] # @added 20210126 - Task #3958: Handle secondary algorithms in yhat_values # last_value = None # start_anomalous_period = None # end_anomalous_period = None # sigma3_array = [] # sigma3_values = [] # extended_values = [] last_breach = 0 breach_for = 10 last_breach_vector = 'positive' # last_used_extended = False # last_used_extended_value = None top = [] bottom = [] left = [] right = [] # @modified 20210126 - Task #3958: Handle secondary algorithms in yhat_values # for ts, value in timeseries: # values.append(value) # va = np.array(values) # va_mean = va.mean() # va_std_3 = 3 * va.std() for index, item in enumerate(timeseries): ts = item[0] value = item[1] values.append(value) va = np.array(values) va_mean = va.mean() va_std_3 = 3 * va.std() # @added 20210126 - Task #3958: Handle secondary algorithms in yhat_values anomalous_period = 0 three_sigma_lower = va_mean - va_std_3 three_sigma_upper = va_mean + va_std_3 # sigma3_array.append([ts, value, va_mean, [three_sigma_lower, three_sigma_upper]]) # sigma3_values.append([three_sigma_lower, three_sigma_upper]) use_extended = False drop_expected_range = False if index not in anomaly_timestamps_indices: use_extended = True # if last_used_extended: # last_used_extended_value = None else: drop_expected_range = True for anomaly_index in anomaly_timestamps_indices: if index > anomaly_index: # if index < (anomaly_index + 30): if index < (anomaly_index + breach_for): use_extended = False anomalous_period = 1 break extended_lower = three_sigma_lower extended_upper = three_sigma_upper if use_extended: if item[1] > three_sigma_upper: extended_lower = three_sigma_lower extended_upper = (item[1] + ((item[1] / 100) * 5)) last_breach = index last_breach_vector = 'positive' elif item[1] < three_sigma_lower: extended_lower = (item[1] - ((item[1] / 100) * 5)) extended_upper = three_sigma_upper last_breach = index last_breach_vector = 'negative' elif index < (last_breach + breach_for) and index > last_breach: if last_breach_vector == 'positive': extended_value = (item[1] + ((item[1] / 100) * 5)) three_sigma_value = three_sigma_upper if three_sigma_value > extended_value: extended_value = (three_sigma_value + ((three_sigma_value / 100) * 5)) extended_lower = three_sigma_lower extended_upper = extended_value else: extended_lower = (item[1] - ((item[1] / 100) * 5)) extended_upper = three_sigma_upper if drop_expected_range: use_extended = False if last_breach_vector == 'positive': extended_lower = three_sigma_lower - (three_sigma_upper * 0.1) extended_upper = item[1] - (item[1] * 0.1) if last_breach_vector == 'negative': extended_lower = three_sigma_lower - (three_sigma_lower * 0.1) extended_upper = item[1] + (item[1] * 0.1) else: extended_lower = three_sigma_lower extended_upper = three_sigma_upper if drop_expected_range: use_extended = False if last_breach_vector == 'positive': extended_lower = three_sigma_lower - (three_sigma_upper * 0.1) extended_upper = item[1] - (item[1] * 0.1) if last_breach_vector == 'negative': extended_lower = three_sigma_lower - (three_sigma_lower * 0.1) extended_upper = item[1] + (item[1] * 0.1) else: extended_lower = three_sigma_lower extended_upper = three_sigma_upper if drop_expected_range: use_extended = False if last_breach_vector == 'positive': extended_lower = three_sigma_lower - (three_sigma_upper * 0.1) extended_upper = item[1] - (item[1] * 0.1) if last_breach_vector == 'negative': extended_lower = three_sigma_lower - (three_sigma_lower * 0.1) extended_upper = item[1] + (item[1] * 0.1) # extended_values.append([extended_lower, extended_upper]) lower = extended_lower upper = extended_upper if index in sorted(list(set(anomalous_period_indices))): if index in anomalies_indices: continue for idx in anomaly_timestamps_indices: if (index + 3) == idx: a_top = extended_upper + (extended_upper * 0.1) top.append(a_top) a_bottom = extended_lower - (extended_lower * 0.1) bottom.append(a_bottom) a_left = item[0] left.append(a_left) if (index - 4) == idx: a_right = item[0] right.append(a_right) # @modified 20201126 - Feature #3850: webapp - yhat_values API endoint # Change dict key to int not float int_ts = int(ts) yhat_dict[int_ts] = {} if include_value: yhat_dict[int_ts]['value'] = value if include_mean: yhat_dict[int_ts]['mean'] = va_mean if include_mean: yhat_dict[int_ts]['mean'] = va_mean # @modified 20210201 - Task #3958: Handle secondary algorithms in yhat_values # yhat_lower = va_mean - va_std_3 yhat_lower = lower yhat_upper = upper if include_yhat_real_lower: # @modified 20201202 - Feature #3850: webapp - yhat_values API endoint # Set the yhat_real_lower correctly # if yhat_lower < array_amin and array_amin == 0: # yhat_dict[int_ts]['yhat_real_lower'] = array_amin if yhat_lower < 0 and array_amin > -0.0000000001: yhat_dict[int_ts]['yhat_real_lower'] = 0 else: yhat_dict[int_ts]['yhat_real_lower'] = yhat_lower yhat_dict[int_ts]['yhat_lower'] = yhat_lower # @modified 20210201 - Task #3958: Handle secondary algorithms in yhat_values yhat_dict[int_ts]['yhat_upper'] = va_mean + va_std_3 yhat_dict[int_ts]['yhat_upper'] = upper # @added 20210201 - Task #3958: Handle secondary algorithms in yhat_values if use_extended: if yhat_lower != three_sigma_lower: yhat_dict[int_ts]['3sigma_lower'] = three_sigma_lower if yhat_upper != three_sigma_upper: yhat_dict[int_ts]['3sigma_upper'] = three_sigma_upper if include_anomalous_periods: yhat_dict[int_ts]['anomalous_period'] = anomalous_period except: logger.error(traceback.format_exc()) logger.error('error :: get_yhat_values :: failed create yhat_dict for %s' % ( metric)) return None logger.info('get_yhat_values :: calculated yhat values for %s data points' % str(len(yhat_dict))) if yhat_dict: yhat_dict_cache_key = 'webapp.%s.%s.%s.%s.%s.%s' % ( metric, str(from_timestamp), str(until_timestamp), str(include_value), str(include_mean), str(include_yhat_real_lower)) logger.info('get_yhat_values :: saving yhat_dict to Redis key - %s' % yhat_dict_cache_key) try: REDIS_CONN.setex(yhat_dict_cache_key, 14400, str(yhat_dict)) logger.info('get_yhat_values :: created Redis key - %s with 14400 TTL' % yhat_dict_cache_key) except: logger.error(traceback.format_exc()) logger.error('error :: get_yhat_values :: failed to setex Redis key - %s' % yhat_dict_cache_key) # @added 20210126 - Task #3958: Handle secondary algorithms in yhat_values # Add rectangle coordinates that describe anomalous periods anomalous_periods_dict['rectangles'] = {} anomalous_periods_dict['rectangles']['top'] = top anomalous_periods_dict['rectangles']['bottom'] = bottom anomalous_periods_dict['rectangles']['left'] = left anomalous_periods_dict['rectangles']['right'] = right if anomalous_periods_dict: yhat_anomalous_periods_dict_cache_key = 'webapp.%s.%s.%s.%s.%s.%s.anomalous_periods' % ( metric, str(from_timestamp), str(until_timestamp), str(include_value), str(include_mean), str(include_yhat_real_lower)) logger.info('get_yhat_values :: saving yhat_dict to Redis key - %s' % yhat_anomalous_periods_dict_cache_key) try: REDIS_CONN.setex(yhat_anomalous_periods_dict_cache_key, 14400, str(yhat_anomalous_periods_dict_cache_key)) logger.info('get_yhat_values :: created Redis key - %s with 14400 TTL' % yhat_anomalous_periods_dict_cache_key) except: logger.error(traceback.format_exc()) logger.error('error :: get_yhat_values :: failed to setex Redis key - %s' % yhat_dict_cache_key) # @modified 20210201 - Task #3958: Handle secondary algorithms in yhat_values # return yhat_dict return yhat_dict, anomalous_periods_dict
def run(self): """ Called when the process intializes. """ # Log management to prevent overwriting # Allow the bin/<skyline_app>.d to manage the log if os.path.isfile(skyline_app_logwait): try: logger.info('removing %s' % skyline_app_logwait) os.remove(skyline_app_logwait) except OSError: logger.error('error :: failed to remove %s, continuing' % skyline_app_logwait) pass now = time() log_wait_for = now + 5 while now < log_wait_for: if os.path.isfile(skyline_app_loglock): sleep(.1) now = time() else: now = log_wait_for + 1 logger.info('starting %s run' % skyline_app) if os.path.isfile(skyline_app_loglock): logger.error( 'error :: bin/%s.d log management seems to have failed, continuing' % skyline_app) try: os.remove(skyline_app_loglock) logger.info('log lock file removed') except OSError: logger.error('error :: failed to remove %s, continuing' % skyline_app_loglock) pass else: logger.info('bin/%s.d log management done' % skyline_app) # @added 20190417 - Feature #2948: LUMINOSITY_ENABLED setting # If Luminosity is not enabled, do nothing luminosity_enabled = True try: luminosity_enabled = settings.LUMINOSITY_ENABLED logger.info('LUMINOSITY_ENABLED is set to %s' % str(luminosity_enabled)) except: logger.info( 'warning :: LUMINOSITY_ENABLED is not declared in settings.py, defaults to True' ) # @added 20190417 - Feature #2950: Report defaulted settings to log # Added all the globally declared settings to enable reporting in the # log the state of each setting. try: ENABLE_LUMINOSITY_DEBUG = settings.ENABLE_LUMINOSITY_DEBUG logger.info( 'ENABLE_LUMINOSITY_DEBUG is set from settings.py to %s' % str(ENABLE_LUMINOSITY_DEBUG)) except: logger.info( 'warning :: ENABLE_LUMINOSITY_DEBUG is not declared in settings.py, defaults to False' ) ENABLE_LUMINOSITY_DEBUG = False try: SERVER_METRIC_PATH = '.%s' % settings.SERVER_METRICS_NAME if SERVER_METRIC_PATH == '.': SERVER_METRIC_PATH = '' logger.info('SERVER_METRIC_PATH is set from settings.py to %s' % str(SERVER_METRIC_PATH)) except: SERVER_METRIC_PATH = '' logger.info( 'warning :: SERVER_METRIC_PATH is not declared in settings.py, defaults to \'\'' ) try: LUMINOSITY_PROCESSES = settings.LUMINOSITY_PROCESSES logger.info('LUMINOSITY_PROCESSES is set from settings.py to %s' % str(LUMINOSITY_PROCESSES)) except: # @modified 20180110 - Task #2266: Evaluate luminol for the luminosity branch # It is fast and lightweight # luminosity_processes = 2 LUMINOSITY_PROCESSES = 1 logger.info( 'warning :: cannot determine LUMINOSITY_PROCESSES from settings.py, defaults to %s' % str(LUMINOSITY_PROCESSES)) while 1: now = time() # Make sure Redis is up try: self.redis_conn.ping() if ENABLE_LUMINOSITY_DEBUG: logger.info('debug :: connected to Redis') except: logger.error( 'error :: cannot connect to redis at socket path %s' % (settings.REDIS_SOCKET_PATH)) sleep(30) # @modified 20180519 - Feature #2378: Add redis auth to Skyline and rebrow # @modified 20191115 - Bug #3266: py3 Redis binary objects not strings # Branch #3262: py3 # Use get_redis_conn and get_redis_conn_decoded to use on Redis sets when the bytes # types need to be decoded as utf-8 to str # if settings.REDIS_PASSWORD: # self.redis_conn = StrictRedis(password=settings.REDIS_PASSWORD, unix_socket_path=settings.REDIS_SOCKET_PATH) # else: # self.redis_conn = StrictRedis(unix_socket_path=settings.REDIS_SOCKET_PATH) # @added 20191115 - Bug #3266: py3 Redis binary objects not strings # Branch #3262: py3 self.redis_conn = get_redis_conn(skyline_app) self.redis_conn_decoded = get_redis_conn_decoded(skyline_app) continue # Report app up try: self.redis_conn.setex(skyline_app, 120, now) logger.info('updated Redis key for %s up' % skyline_app) except: logger.error('error :: failed to update Redis key for %s up' % skyline_app) # @added 20190417 - Feature #: LUMINOSITY_ENABLED setting # If Luminosity is not enabled, do nothing if not luminosity_enabled: logger.info( 'luminosity is not enabled LUMINOSITY_ENABLED set to %s, sleeping for 20 seconds' % str(settings.LUMINOSITY_ENABLED)) sleep(20) continue """ Determine if any new anomalies have been added """ while True: process_anomaly_id = None last_processed_anomaly_id = None memcache_last_processed_anomaly_id_data = False # Check memcached before MySQL memcache_key = '%s.last.processed.anomaly.id' % skyline_app if settings.MEMCACHE_ENABLED: try: # @modified 20191029 - Task #3304: py3 - handle pymemcache bytes not str # last_processed_anomaly_id = self.memcache_client.get(memcache_key) if python_version == 2: last_processed_anomaly_id = self.memcache_client.get( memcache_key) else: last_processed_anomaly_id = self.memcache_client.get( memcache_key).decode('utf-8') # if memcache does not have the key the response to the # client is None, it does not except except: # @modified 20200507 - stop reporting this as an error # it can be expected to happen from time to time # logger.error('error :: failed to get %s from memcache' % memcache_key) logger.info( 'failed to get %s from memcache, will query DB' % memcache_key) try: self.memcache_client.close() except: logger.error( 'error :: failed to close memcache_client') if last_processed_anomaly_id: logger.info( 'last_processed_anomaly_id found in memcache - %s' % str(last_processed_anomaly_id)) memcache_last_processed_anomaly_id_data = True else: # @modified 20190517 - Bug #3016: Handle no anomaly ids in luminosity # Branch #3002: docker # Log appropriate to whether memcache is enabled or not if settings.MEMCACHE_ENABLED: logger.info( 'last_processed_anomaly_id key was NOT found in memcache - %s' % str(last_processed_anomaly_id)) else: logger.info( 'memcache not enabled not checking for last_processed_anomaly_id key' ) if not last_processed_anomaly_id: query = 'SELECT id FROM luminosity WHERE id=(SELECT MAX(id) FROM luminosity) ORDER BY id DESC LIMIT 1' results = None try: results = mysql_select(skyline_app, query) except: logger.error(traceback.format_exc()) logger.error('error :: MySQL quey failed - %s' % query) if results: try: last_processed_anomaly_id = int(results[0][0]) logger.info( 'last_processed_anomaly_id found from DB - %s' % str(last_processed_anomaly_id)) except: logger.error(traceback.format_exc()) if last_processed_anomaly_id and settings.MEMCACHE_ENABLED: if not memcache_last_processed_anomaly_id_data: logger.info( 'Populating memcache with DB result - %s' % str(last_processed_anomaly_id)) try: self.memcache_client.set( memcache_key, int(last_processed_anomaly_id)) logger.info( 'populated memcache key %s with %s' % (memcache_key, str(last_processed_anomaly_id))) except: logger.error( 'error :: failed to set the memcache key - %s - %s' % (memcache_key, str(last_processed_anomaly_id))) try: self.memcache_client.close() except: logger.error( 'error :: failed to close memcache_client' ) if not last_processed_anomaly_id: # Check MySQL now = int(time()) after = now - 600 query = 'SELECT * FROM anomalies WHERE anomaly_timestamp > \'%s\'' % str( after) # nosec results = None try: results = mysql_select(skyline_app, query) except: logger.error('error :: MySQL quey failed - %s' % query) if results: process_anomaly_id = int(results[0][0]) logger.info( 'found new anomaly id to process from the DB - %s' % str(process_anomaly_id)) # Handle the first one last_processed_anomaly_id = process_anomaly_id - 1 else: logger.info('no new anomalies in the anomalies table') # @added 20190517 - Bug #3016: Handle no anomaly ids in luminosity # Branch #3002: docker # When Skyline is first installed, if luminosity is enabled it # reports errors as there are no anomaly ids if str(last_processed_anomaly_id) == 'None': last_processed_anomaly_id = 0 query = 'SELECT * FROM anomalies WHERE id > \'%s\'' % str( last_processed_anomaly_id) # nosec results = None try: results = mysql_select(skyline_app, query) except: logger.error('error :: MySQL quey failed - %s' % query) if results: try: process_anomaly_id = int(results[0][0]) logger.info( 'found the next new anomaly id to process from the DB - %s' % str(process_anomaly_id)) except: logger.error(traceback.format_exc()) logger.error('error :: from query - %s' % query) else: logger.info('no new anomalies in the anomalies table') if process_anomaly_id and last_processed_anomaly_id: if isinstance(last_processed_anomaly_id, int): if isinstance(process_anomaly_id, int): if last_processed_anomaly_id == process_anomaly_id: logger.info( 'anomaly id already processed - %s' % str(process_anomaly_id)) process_anomaly_id = None if not process_anomaly_id: logger.info( 'sleeping 20 no anomalies to correlate - last processed anomaly id - %s' % str(last_processed_anomaly_id)) sleep(20) up_now = time() # Report app up try: self.redis_conn.setex(skyline_app, 120, up_now) logger.info('updated Redis key for %s up' % skyline_app) except: logger.error( 'error :: failed to update Redis key for %s up' % skyline_app) cache_key = '%s.sent_graphite_metrics' % skyline_app redis_sent_graphite_metrics = False try: redis_sent_graphite_metrics = self.redis_conn.get( cache_key) except Exception as e: logger.error( 'error :: could not query Redis for key %s: %s' % (cache_key, e)) # Flush metrics to Graphite if not redis_sent_graphite_metrics: try: # @modified 20190522 - Task #3034: Reduce multiprocessing Manager list usage # correlations = str(len(self.correlations)) # @modified 20191030 - Bug #3266: py3 Redis binary objects not strings # Branch #3262: py3 # correlations = str(len(list(self.redis_conn.smembers('luminosity.correlations')))) correlations = str( len( list( self.redis_conn_decoded.smembers( 'luminosity.correlations')))) except: correlations = '0' logger.info('correlations :: %s' % correlations) send_metric_name = '%s.correlations' % skyline_app_graphite_namespace send_graphite_metric(skyline_app, send_metric_name, correlations) # @added 20190522 - Task #3034: Reduce multiprocessing Manager list usage try: # @modified 20191030 - Bug #3266: py3 Redis binary objects not strings # Branch #3262: py3 # runtimes = list(self.redis_conn.smembers('luminosity.runtimes')) runtimes = list( self.redis_conn_decoded.smembers( 'luminosity.runtimes')) except: runtimes = [] # @added 20180720 - Task #2462: Implement useful metrics for Luminosity # Branch #2270: luminosity # runtime metric to monitor the time it takes to process # correlations try: # @modified 20190522 - Task #3034: Reduce multiprocessing Manager list usage # if len(self.runtimes) > 1: # avg_runtime = sum(self.runtimes) / len(self.runtimes) # else: # avg_runtime = sum(self.runtimes) if len(runtimes) > 1: avg_runtime = sum(runtimes) / len(runtimes) else: avg_runtime = sum(runtimes) except: avg_runtime = '0' logger.info('avg_runtime :: %s' % str(avg_runtime)) send_metric_name = '%s.avg_runtime' % skyline_app_graphite_namespace send_graphite_metric(skyline_app, send_metric_name, str(avg_runtime)) try: # @modified 20190522 - Task #3034: Reduce multiprocessing Manager list usage # metrics_checked_for_correlation = str(sum(self.metrics_checked_for_correlation)) # @modified 20191030 - Bug #3266: py3 Redis binary objects not strings # Branch #3262: py3 # metrics_checked_for_correlation = str(len(list(self.redis_conn.smembers('luminosity.metrics_checked_for_correlation')))) metrics_checked_for_correlation = str( len( list( self.redis_conn_decoded.smembers( 'luminosity.metrics_checked_for_correlation' )))) except: metrics_checked_for_correlation = '0' logger.info('metrics_checked_for_correlation :: %s' % metrics_checked_for_correlation) send_metric_name = '%s.metrics_checked_for_correlation' % skyline_app_graphite_namespace send_graphite_metric(skyline_app, send_metric_name, metrics_checked_for_correlation) sent_graphite_metrics_now = int(time()) try: self.redis_conn.setex(cache_key, 59, sent_graphite_metrics_now) logger.info('updated Redis key - %s' % cache_key) except: logger.error( 'error :: failed to update Redis key - %s up' % cache_key) # Reset lists # @modified 20190522 - Task #3034: Reduce multiprocessing Manager list usage # self.correlations[:] = [] # @added 20180720 - Task #2462: Implement useful metrics for Luminosity # @modified 20190522 - Task #3034: Reduce multiprocessing Manager list usage # self.runtimes[:] = [] # self.metrics_checked_for_correlation[:] = [] # @added 20190522 - Task #3034: Reduce multiprocessing Manager list usage # Use Redis sets instead of Manager().list() delete_redis_sets = [ 'luminosity.correlations', 'luminosity.runtimes', 'luminosity.metrics_checked_for_correlation' ] for i_redis_set in delete_redis_sets: redis_set_to_delete = i_redis_set try: self.redis_conn.delete(redis_set_to_delete) logger.info('deleted Redis set - %s' % redis_set_to_delete) except: logger.error(traceback.format_exc()) logger.error( 'error :: failed to delete Redis set - %s' % redis_set_to_delete) # @added 20180720 - Task #2462: Implement useful metrics for Luminosity # Feature #2464: luminosity_remote_data # Added the ability to add a Redis key to overview the memcached # key luminosity.last.processed.anomaly.id some it does not have # to be changed via telnet to memcache. if not process_anomaly_id or not redis_sent_graphite_metrics: cache_key = '%s.last.processed.anomaly.id' % skyline_app redis_last_processed_anomaly_id_redis_key = False try: redis_last_processed_anomaly_id_redis_key = self.redis_conn.get( cache_key) except Exception as e: logger.error( 'error :: could not query Redis for key %s: %s' % (cache_key, e)) if redis_last_processed_anomaly_id_redis_key: logger.info( 'found Redis %s key to override the mecache key setting process_anomaly_id to %s' % (cache_key, str(redis_last_processed_anomaly_id_redis_key))) try: process_anomaly_id = int( redis_last_processed_anomaly_id_redis_key) except: logger.error(traceback.format_exc()) logger.error( 'error :: failed to set process_anomaly_id from Rdis override key value' ) # And remove the Redis override key as it is only meant # to override once to allow for a replay for debug # purposes only. try: self.redis_conn.setex( cache_key, 1, int(redis_last_processed_anomaly_id_redis_key)) logger.info('updated Redis key - %s' % cache_key) except: logger.error( 'error :: failed to update Redis key - %s up to 1 second expiring to delete it.' % cache_key) if process_anomaly_id: break # Spawn process logger.info('spawning processes to correlate anomaly id %s' % str(process_anomaly_id)) pids = [] spawned_pids = [] pid_count = 0 now = time() for i in range(1, LUMINOSITY_PROCESSES + 1): try: p = Process(target=self.spin_process, args=(i, process_anomaly_id)) pids.append(p) pid_count += 1 logger.info('starting %s of %s spin_process/es' % (str(pid_count), str(LUMINOSITY_PROCESSES))) p.start() spawned_pids.append(p.pid) except: logger.error(traceback.format_exc()) logger.error('error :: failed to start spin_process') continue # Self monitor processes and terminate if any spin_process has run # for to long p_starts = time() while time() - p_starts <= 60: if any(p.is_alive() for p in pids): # Just to avoid hogging the CPU sleep(.1) else: # All the processes are done, break now. time_to_run = time() - p_starts logger.info('%s spin_process completed in %.2f seconds' % (str(LUMINOSITY_PROCESSES), time_to_run)) break else: # We only enter this if we didn't 'break' above. logger.info('timed out, killing all spin_process processes') for p in pids: try: p.terminate() # p.join() logger.info('killed spin_process process') except: logger.error(traceback.format_exc()) logger.error( 'error :: killing all spin_process processes') for p in pids: if p.is_alive(): logger.info('stopping spin_process - %s' % (str(p.is_alive()))) p.join() process_runtime = time() - now if process_runtime < 10: sleep_for = (10 - process_runtime) logger.info( 'sleeping for %.2f seconds due to low run time...' % sleep_for) sleep(sleep_for) try: del sleep_for except: logger.error('error :: failed to del sleep_for') try: del process_runtime except: logger.error('error :: failed to del process_runtime')
def spin_process(self, i, anomaly_id): """ Assign an anomalous metric and determine correlated metrics :param i: python process id :param anomaly_id: the anomaly_id :type i: object :type anomaly_id: int :return: boolean :rtype: boolean """ child_process_pid = os.getpid() logger.info('child process pid %s - processing anomaly_id - %s' % (str(child_process_pid), str(anomaly_id))) try: from process_correlations import process_correlations except: logger.error(traceback.format_exc()) logger.error('error :: importing process_correlations') def get_an_engine(): try: engine, log_msg, trace = get_engine(skyline_app) return engine, log_msg, trace except: logger.error(traceback.format_exc()) log_msg = 'error :: failed to get MySQL engine in spin_process' logger.error( 'error :: failed to get MySQL engine in spin_process') return None, log_msg, trace def engine_disposal(engine): if engine: try: engine.dispose() except: logger.error(traceback.format_exc()) logger.error('error :: calling engine.dispose()') return if settings.MEMCACHE_ENABLED: try: memcache_key = '%s.last.processed.anomaly.id' % skyline_app self.memcache_client.set(memcache_key, int(anomaly_id)) logger.info('processed - set the memcache key - %s - %s' % (memcache_key, str(anomaly_id))) except: logger.error( 'error :: failed to set the memcache key - %s - %s' % (memcache_key, str(anomaly_id))) try: self.memcache_client.close() except: logger.error('error :: failed to close memcache_client') try: # @modified 20180720 - Task #2462: Implement useful metrics for Luminosity # Added runtime base_name, anomaly_timestamp, anomalies, correlated_metrics, correlations, sorted_correlations, metrics_checked_for_correlation, runtime = process_correlations( i, anomaly_id) except: logger.error(traceback.format_exc()) logger.error('error :: processing correlations') return False # @added 20180414 - Bug #2352: Luminosity no metrics MySQL error # Do not query with an empty string if not correlated_metrics: logger.info('no correlations found for %s anomaly id %s' % (base_name, str(anomaly_id))) return False else: logger.info( '%s correlations found for %s anomaly id %s' % (str(len(correlated_metrics)), base_name, str(anomaly_id))) # @added 20180720 - Task #2462: Implement useful metrics for Luminosity # @modified 20190522 - Task #3034: Reduce multiprocessing Manager list usage # self.metrics_checked_for_correlation.append(metrics_checked_for_correlation) # self.runtimes.append(runtime) redis_set = 'luminosity.correlations' data = str(metrics_checked_for_correlation) try: self.redis_conn.sadd(redis_set, data) except: logger.info(traceback.format_exc()) logger.error('error :: failed to add %s to Redis set %s' % (str(data), str(redis_set))) redis_set = 'luminosity.runtimes' data = str(runtime) try: self.redis_conn.sadd(redis_set, data) except: logger.info(traceback.format_exc()) logger.error('error :: failed to add %s to Redis set %s' % (str(data), str(redis_set))) metrics_str = '' for metric_name in correlated_metrics: if metrics_str == '': new_metrics_str = "'%s'" % metric_name else: new_metrics_str = "%s,'%s'" % (metrics_str, metric_name) metrics_str = new_metrics_str metrics_str query = 'SELECT id,metric FROM metrics WHERE metric in (%s)' % str( metrics_str) try: results = mysql_select(skyline_app, query) except: logger.error(traceback.format_exc()) logger.error( 'error :: querying MySQL - SELECT id,metric FROM metrics WHERE metric in (%s)' % str(metrics_str)) return False correlated_metrics_list = [] for metric_id, metric in results: correlated_metrics_list.append([int(metric_id), str(metric)]) logger.info( 'number of metric ids determined from the metrics tables - %s' % str(len(correlated_metrics_list))) correlations_shifted_too_far = 0 if sorted_correlations: logger.info('number of correlations shifted too far - %s' % str(correlations_shifted_too_far)) logger.info('sorted_correlations :: %s' % str(sorted_correlations)) luminosity_correlations = [] for metric, coefficient, shifted, shifted_coefficient in sorted_correlations: for metric_id, metric_name in correlated_metrics_list: if metric == metric_name: if shifted < 2: luminosity_correlations.append([ anomaly_id, int(metric_id), coefficient, shifted, shifted_coefficient ]) else: correlations_shifted_too_far += 1 logger.info('number of correlations shifted too far - %s' % str(correlations_shifted_too_far)) first_value_not_added = True values_string = 'INSERT INTO luminosity (id, metric_id, coefficient, shifted, shifted_coefficient) VALUES ' # @added 20180420 - Branch #2270: luminosity # Only try and insert if there are values present values_present = False number_of_correlations_in_insert = 0 for anomaly_id, metric_id, coefficient, shifted, shifted_coefficient in luminosity_correlations: if coefficient: values_present = True # @added 20170720 - Task #2462: Implement useful metrics for Luminosity # Populate the self.correlations list to send a count to Graphite # @modified 20190522 - Task #3034: Reduce multiprocessing Manager list usage # self.correlations.append(coefficient) redis_set = 'luminosity.correlations' data = str(coefficient) try: self.redis_conn.sadd(redis_set, data) except: logger.info(traceback.format_exc()) logger.error( 'error :: failed to add %s to Redis set %s' % (str(data), str(redis_set))) number_of_correlations_in_insert += 1 ins_values = '(%s,%s,%s,%s,%s)' % ( str(anomaly_id), str(metric_id), str(round( coefficient, 5)), str(shifted), str(round(shifted_coefficient, 5))) if first_value_not_added: first_value_not_added = False values_string = 'INSERT INTO luminosity (id, metric_id, coefficient, shifted, shifted_coefficient) VALUES %s' % ins_values else: new_values_string = '%s,%s' % (values_string, ins_values) values_string = new_values_string new_values_string = '%s;' % values_string values_string = new_values_string # logger.info('debug insert string :: %s' % str(values_string)) # 'INSERT INTO luminosity (anomaly_id, metric_id, coefficient, shifted, shifted_coefficient) VALUES (68882,619,1.0,0,1.0),...,(68882,489,1.0,0,1.0);' # Needs a mysql_insert not SQLAlchemy luminosity_populated = False if luminosity_correlations and values_present: try: self.mysql_insert(values_string) luminosity_populated = True except: logger.error(traceback.format_exc()) logger.error('error :: MySQL insert - %s' % str(values_string)) if luminosity_populated: logger.info( '%s correlations added to database for %s anomaly id %s' % (str(number_of_correlations_in_insert), base_name, str(anomaly_id))) logger.info('values_string :: %s' % str(values_string)) return luminosity_populated
def classify_anomalies(i, classify_anomalies_set, start_timestamp, classify_for): logger = logging.getLogger(skyline_app_logger) debug_algorithms = False logger.info('classify_anomalies :: with start_timestamp - %s' % str(start_timestamp)) start_classify_anomalies = timer() def mysql_insert(insert): """ Insert data into mysql table :param insert: the insert string :type insert: str :return: int :rtype: int or boolean - **Example usage**:: query = 'insert into host (host) VALUES (\'this_host\')' result = self.mysql_insert(query) .. note:: - If the MySQL query fails a boolean will be returned not a tuple * ``False`` * ``None`` """ try: cnx = mysql.connector.connect(**config) except mysql.connector.Error as err: logger.error('error :: classify_anomalies :: mysql error - %s' % str(err)) logger.error( 'error :: classify_anomalies :: failed to connect to mysql') raise if cnx: try: cursor = cnx.cursor() cursor.execute(insert) inserted_id = cursor.rowcount # Make sure data is committed to the database cnx.commit() cursor.close() cnx.close() return inserted_id except mysql.connector.Error as err: logger.error( 'error :: classify_anomalies :: failed to insert record - mysql error - %s' % str(err)) cnx.close() raise else: cnx.close() return False return False # Handle luminosity running with multiple processes def manage_processing_key(current_pid, base_name, timestamp, classify_for, action): result = False processing_key = 'luminosity.classify_anomalies.processing.%s.%s' % ( str(timestamp), str(base_name)) if action == 'add': key_exists = None try: key_exists = redis_conn_decoded.get(processing_key) if key_exists: result = False return result except: logger.error(traceback.format_exc()) logger.error( 'error :: classify_anomalies :: failed to query Redis for %s' % (processing_key)) try: data = {'pid': current_pid, 'timestamp': int(time())} redis_conn.setex(processing_key, classify_for, str(data)) result = True logger.info( 'classify_anomalies :: managing %s added %s with %s' % (str(base_name), processing_key, str(data))) except: logger.error(traceback.format_exc()) logger.error( 'error :: classify_anomalies :: failed to create key %s' % (processing_key)) if action == 'remove': try: redis_conn.delete(processing_key) result = True except: logger.error(traceback.format_exc()) logger.error( 'error :: classify_anomalies :: failed to remove key %s' % (processing_key)) return result classify_anomalies_list = [] for classify_anomaly in classify_anomalies_set: classify_anomalies_list.append(literal_eval(classify_anomaly)) if classify_anomalies_list: classify_anomalies_list = sorted(classify_anomalies_list, key=lambda x: x[2], reverse=False) current_pid = getpid() anomalies_proceessed = 0 for classify_anomaly in classify_anomalies_list: anomaly_data_dict = classify_anomaly[3] base_name = anomaly_data_dict['metric'] timestamp = anomaly_data_dict['timestamp'] # logger.debug('debug :: classify_anomalies :: %s' % str(classify_anomaly)) anomalies_proceessed += 1 # Handle luminosity running with multiple processes manage_metric = False try: manage_metric = manage_processing_key(current_pid, base_name, timestamp, classify_for, 'add') except: logger.error(traceback.format_exc()) logger.error( 'error :: classify_anomalies :: failed to run manage_processing_key' ) if not manage_metric: logger.info( 'classify_anomalies :: skipping as processing key exists for %s' % base_name) continue # Remove anomaly if not classified in 1800 seconds if (int(time()) - 1800) > int(anomaly_data_dict['added_at']): logger.info( 'classify_anomalies :: anomaly not classified in 1800 seocnds, removing from luminosity.classify_anomalies' ) try: redis_conn.srem('luminosity.classify_anomalies', str(classify_anomaly)) logger.info( 'classify_anomalies :: removed %s, %s, %s item from luminosity.classify_anomalies Redis set' % (base_name, str(timestamp), anomaly_data_dict['app'])) except: logger.error(traceback.format_exc()) logger.error( 'error :: classify_anomalies :: failed to remove %s, %s, %s item from luminosity.classify_anomalies Redis set' % (base_name, str(timestamp), anomaly_data_dict['app'])) try: manage_processing_key(current_pid, base_name, timestamp, classify_for, 'remove') except: logger.error(traceback.format_exc()) logger.error( 'error :: classify_anomalies :: failed to run manage_processing_key - %s' % base_name) continue metric_timeseries_dir = base_name.replace('.', '/') metric_training_data_dir = '%s/%s/%s' % ( settings.IONOSPHERE_DATA_FOLDER, timestamp, metric_timeseries_dir) anomaly_json = '%s/%s.json' % (metric_training_data_dir, base_name) timeseries = [] # Try load training data if os.path.isfile(anomaly_json): logger.info('classify_anomalies :: anomaly_json found - %s' % anomaly_json) try: with open((anomaly_json), 'r') as f: raw_timeseries = f.read() timeseries_array_str = str(raw_timeseries).replace( '(', '[').replace(')', ']') del raw_timeseries timeseries = literal_eval(timeseries_array_str) del timeseries_array_str except: logger.error(traceback.format_exc()) logger.error( 'error :: classify_anomalies :: could not create timeseries from anomaly json %s' % anomaly_json) logger.info( 'classify_anomalies :: timeseries from anomaly_json has %s datapoints' % str(len(timeseries))) else: logger.info( 'classify_anomalies :: no anomaly_json not found removing %s from luminosity.classify_anomalies Redis set' % (base_name)) try: redis_conn.srem('luminosity.classify_anomalies', str(classify_anomaly)) logger.info( 'classify_anomalies :: removed %s, %s, %s item from luminosity.classify_anomalies Redis set' % (base_name, str(timestamp), anomaly_data_dict['app'])) except: logger.error(traceback.format_exc()) logger.error( 'error :: classify_anomalies :: failed to remove %s, %s, %s item from luminosity.classify_anomalies Redis set' % (base_name, str(timestamp), anomaly_data_dict['app'])) if not timeseries: try: manage_processing_key(current_pid, base_name, timestamp, classify_for, 'remove') except: logger.error(traceback.format_exc()) logger.error( 'error :: classify_anomalies :: failed to run manage_processing_key - %s' % base_name) continue # Classify anomaly or continue classifying metric window = 5 window_timestamps = [ts for ts, value in timeseries[-window:]] algorithms_to_process = len(LUMINOSITY_CLASSIFY_ANOMALY_ALGORITHMS) algorithms_processed = 0 algorithm_results = {} algorithms_processed_key = 'luminosity.classify_anomalies.algorithms_processed.%s.%s' % ( str(timestamp), str(base_name)) try: algorithm_results = redis_conn_decoded.get( algorithms_processed_key) if not algorithm_results: algorithm_results = {} else: algorithm_results = literal_eval(algorithm_results) except: logger.error(traceback.format_exc()) logger.error( 'error :: classify_anomalies :: failed to query Redis for %s' % (algorithms_processed_key)) if not algorithm_results: for algorithm in LUMINOSITY_CLASSIFY_ANOMALY_ALGORITHMS: algorithm_results[algorithm] = {} algorithm_results[algorithm]['processed'] = False algorithm_results[algorithm]['result'] = None try: redis_conn.setex(algorithms_processed_key, 300, str(algorithm_results)) except: logger.error(traceback.format_exc()) logger.error( 'error :: classify_anomalies :: failed to set Redis key %s' % (algorithms_processed_key)) for algorithm in LUMINOSITY_CLASSIFY_ANOMALY_ALGORITHMS: if algorithm_results[algorithm]['processed']: algorithms_processed += 1 logger.info( 'classify_anomalies :: %s at %s already processed with %s with result %s' % (str(base_name), str(timestamp), algorithm, str(algorithm_results[algorithm]['result']))) continue custom_algorithm = algorithm custom_algorithm_dict = {} custom_algorithm_dict['debug_logging'] = False debug_algorithm_logging = False if debug_algorithms: custom_algorithm_dict['debug_logging'] = True debug_algorithm_logging = True algorithm_source = '/opt/skyline/github/skyline/skyline/custom_algorithms/%s.py' % algorithm custom_algorithm_dict['algorithm_source'] = algorithm_source if LUMINOSITY_CLASSIFY_ANOMALIES_SAVE_PLOTS: custom_algorithm_dict['algorithm_parameters'] = { 'window': window, 'c': 6.0, 'return_anomalies': True, 'realtime_analysis': False, 'save_plots_to': metric_training_data_dir, 'save_plots_to_absolute_dir': True, 'filename_prefix': 'luminosity.classify_anomaly', 'debug_logging': debug_algorithm_logging, } custom_algorithm_dict['max_execution_time'] = 10.0 else: custom_algorithm_dict['algorithm_parameters'] = { 'window': window, 'c': 6.0, 'return_anomalies': True, 'realtime_analysis': False, 'debug_logging': debug_algorithm_logging, } custom_algorithm_dict['max_execution_time'] = 5.0 result = None anomalyScore = None anomalies = [] try: result, anomalyScore, anomalies = run_custom_algorithm_on_timeseries( skyline_app, current_pid, base_name, timeseries, custom_algorithm, custom_algorithm_dict, debug_algorithms) logger.info( 'classify_anomalies :: run_custom_algorithm_on_timeseries run %s on %s with result - %s, anomalyScore - %s' % (custom_algorithm, base_name, str(result), str(anomalyScore))) except: logger.error(traceback.format_exc()) logger.error( 'error :: classify_anomalies :: failed to run custom_algorithm %s on %s' % (custom_algorithm, base_name)) triggered = False if anomalies: anomalies.reverse() for ts, value in anomalies: if ts in window_timestamps: triggered = True break if ts < window_timestamps[0]: break if triggered: logger.info( 'classify_anomalies :: %s triggered on %s within the window at %s' % (custom_algorithm, base_name, str(ts))) else: logger.info( 'classify_anomalies :: %s did not trigger on %s within the window' % (custom_algorithm, base_name)) algorithm_results[algorithm]['processed'] = True algorithm_results[algorithm]['result'] = triggered try: redis_conn.setex(algorithms_processed_key, 300, str(algorithm_results)) except: logger.error(traceback.format_exc()) logger.error( 'error :: classify_anomalies :: failed to set Redis key %s' % (algorithms_processed_key)) algorithms_processed += 1 time_now = time() runtime = time_now - start_timestamp if runtime >= (classify_for - 0.3): logger.info( 'classify_anomalies :: stopping before timeout is reached') break time_now = time() runtime = time_now - start_timestamp if runtime >= (classify_for - 0.3): logger.info( 'classify_anomalies :: stopping before timeout is reached') break anomaly_types = [] results_recorded = False if algorithms_processed == algorithms_to_process: for algorithm in LUMINOSITY_CLASSIFY_ANOMALY_ALGORITHMS: if algorithm_results[algorithm]['result']: anomaly_types.append(algorithm) if not anomaly_types: results_recorded = True else: logger.info( 'classify_anomalies :: anomaly_types identified for %s - %s' % (base_name, str(anomaly_types))) anomaly_id = 0 if anomaly_types: try: anomaly_id = get_anomaly_id(skyline_app, base_name, timestamp) except: logger.error( 'error :: classify_anomalies :: get_anomaly_id failed to determine id' ) anomaly_id = 0 logger.info('classify_anomalies :: anomaly_id: %s' % (str(anomaly_id))) type_data = [] if anomaly_id: query = 'SELECT id,algorithm,type FROM anomaly_types' try: results = mysql_select(skyline_app, query) except: logger.error(traceback.format_exc()) logger.error( 'error :: querying MySQL - SELECT id,type FROM anomaly_types' ) db_anomaly_types = {} for id, associated_algorithm, anomaly_type in results: db_anomaly_types[associated_algorithm] = {} db_anomaly_types[associated_algorithm]['id'] = id db_anomaly_types[associated_algorithm]['type'] = anomaly_type metric_id = 0 query = 'SELECT id FROM metrics WHERE metric=\'%s\'' % base_name try: results = mysql_select(skyline_app, query) for item in results: metric_id = item[0] break except: logger.error(traceback.format_exc()) logger.error( 'error :: classify_anomalies :: querying MySQL - SELECT id FROM metrics WHERE metric=\'%s\'' % base_name) type_data = [] for anomaly_type in anomaly_types: type_data.append(int(db_anomaly_types[anomaly_type]['id'])) logger.info('classify_anomalies :: type_data: %s' % (str(type_data))) classification_exists = None if type_data and anomaly_id: query = 'SELECT metric_id FROM anomalies_type WHERE id=%s' % anomaly_id try: results = mysql_select(skyline_app, query) for item in results: classification_exists = item[0] break except: logger.error(traceback.format_exc()) logger.error( 'error :: classify_anomalies :: querying MySQL - SELECT metric_id FROM anomalies_type WHERE id=%s' % anomaly_id) if classification_exists: try: redis_conn.srem('luminosity.classify_anomalies', str(classify_anomaly)) logger.info( 'classify_anomalies :: results already recorded for metric_id %s so removed %s, %s, %s item from luminosity.classify_anomalies Redis set' % (str(classification_exists), base_name, str(timestamp), anomaly_data_dict['app'])) except: logger.error(traceback.format_exc()) logger.error( 'error :: classify_anomalies :: after results recorded failed to remove %s, %s, %s item from luminosity.classify_anomalies Redis set' % (base_name, str(timestamp), anomaly_data_dict['app'])) type_data = None if type_data: type_data_str = '' for id in type_data: if type_data_str == '': type_data_str = '%s' % str(id) else: type_data_str = '%s,%s' % (type_data_str, str(id)) ins_values = '(%s,%s,\'%s\')' % (str(anomaly_id), str(metric_id), type_data_str) values_string = 'INSERT INTO anomalies_type (id, metric_id, type) VALUES %s' % ins_values try: results_recorded = mysql_insert(values_string) logger.debug('debug :: classify_anomalies :: INSERT: %s' % (str(values_string))) logger.debug( 'debug :: classify_anomalies :: results_recorded: %s' % (str(results_recorded))) except Exception as e: # Handle a process updating on SystemExit if 'Duplicate entry' in str(e): results_recorded = True logger.info( 'classify_anomalies :: a entry already exists in anomalies_type for anomaly id %s on %s, OK' % (str(anomaly_id), str(base_name))) else: logger.error(traceback.format_exc()) logger.error('error :: MySQL insert - %s' % str(values_string)) results_recorded = 0 if results_recorded: logger.info( 'classify_anomalies :: added %s row to anomalies_type for anomaly id %s on %s - %s' % (str(results_recorded), str(anomaly_id), base_name, str(type_data))) if results_recorded: try: redis_conn.srem('luminosity.classify_anomalies', str(classify_anomaly)) logger.info( 'classify_anomalies :: results recorded so removed %s, %s, %s item from luminosity.classify_anomalies Redis set' % (base_name, str(timestamp), anomaly_data_dict['app'])) except: logger.error(traceback.format_exc()) logger.error( 'error :: classify_anomalies :: after results recorded failed to remove %s, %s, %s item from luminosity.classify_anomalies Redis set' % (base_name, str(timestamp), anomaly_data_dict['app'])) try: manage_processing_key(current_pid, base_name, timestamp, classify_for, 'remove') except: logger.error(traceback.format_exc()) logger.error( 'error :: classify_anomalies :: failed to run manage_processing_key - %s' % base_name) end_classify_anomalies = timer() logger.info( 'classify_anomalies :: %s anomalies were processed, took %.6f seconds' % (str(anomalies_proceessed), (end_classify_anomalies - start_classify_anomalies))) return
def panorama_request(): """ Gets the details of anomalies from the database, using the URL arguments that are passed in by the :obj:`request.args` to build the MySQL select query string and queries the database, parse the results and creates an array of the anomalies that matched the query and creates the ``panaroma.json`` file, then returns the array. The Webapp needs both the array and the JSONP file to serve to the browser for the client side ``panaroma.js``. :param None: determined from :obj:`request.args` :return: array :rtype: array .. note:: And creates ``panaroma.js`` for client side javascript """ logger.info('determining request args') def get_ids_from_rows(thing, rows): found_ids = [] for row in rows: found_id = str(row[0]) found_ids.append(int(found_id)) ids_first = string.replace(str(found_ids), '[', '') in_ids = string.replace(str(ids_first), ']', '') return in_ids try: request_args_len = len(request.args) except: request_args_len = False latest_anomalies = False if request_args_len == 0: request_args_len = 'No request arguments passed' # return str(request_args_len) latest_anomalies = True metric = False if metric: logger.info('Getting db id for %s' % metric) query = 'select id from metrics WHERE metric=\'%s\'' % metric try: result = mysql_select(skyline_app, query) except: logger.error('error :: failed to get id from db: %s' % traceback.format_exc()) result = 'metric id not found in database' return str(result[0][0]) search_request = True count_request = False if latest_anomalies: logger.info('Getting latest anomalies') query = 'select id, metric_id, anomalous_datapoint, anomaly_timestamp, full_duration, created_timestamp from anomalies ORDER BY id DESC LIMIT 10' try: rows = mysql_select(skyline_app, query) except: logger.error('error :: failed to get anomalies from db: %s' % traceback.format_exc()) rows = [] if not latest_anomalies: logger.info('Determining search parameters') query_string = 'select id, metric_id, anomalous_datapoint, anomaly_timestamp, full_duration, created_timestamp from anomalies' needs_and = False # If we have to '' a string we cannot escape the query it seems... do_not_escape = False if 'metric' in request.args: metric = request.args.get('metric', None) if metric and metric != 'all': query = "select id from metrics WHERE metric='%s'" % (metric) try: found_id = mysql_select(skyline_app, query) except: logger.error('error :: failed to get app ids from db: %s' % traceback.format_exc()) found_id = None if found_id: target_id = str(found_id[0][0]) if needs_and: new_query_string = '%s AND metric_id=%s' % (query_string, target_id) else: new_query_string = '%s WHERE metric_id=%s' % (query_string, target_id) query_string = new_query_string needs_and = True if 'metric_like' in request.args: metric_like = request.args.get('metric_like', None) if metric_like and metric_like != 'all': query = 'select id from metrics WHERE metric LIKE \'%s\'' % (str(metric_like)) try: rows = mysql_select(skyline_app, query) except: logger.error('error :: failed to get metric ids from db: %s' % traceback.format_exc()) return False ids = get_ids_from_rows('metric', rows) new_query_string = '%s WHERE metric_id IN (%s)' % (query_string, str(ids)) query_string = new_query_string needs_and = True if 'count_by_metric' in request.args: count_by_metric = request.args.get('count_by_metric', None) if count_by_metric and count_by_metric != 'false': search_request = False count_request = True # query_string = 'SELECT metric_id, COUNT(*) FROM anomalies GROUP BY metric_id ORDER BY COUNT(*) DESC' query_string = 'SELECT metric_id, COUNT(*) FROM anomalies' needs_and = False if 'from_timestamp' in request.args: from_timestamp = request.args.get('from_timestamp', None) if from_timestamp and from_timestamp != 'all': if ":" in from_timestamp: import time import datetime new_from_timestamp = time.mktime(datetime.datetime.strptime(from_timestamp, '%Y%m%d %H:%M').timetuple()) from_timestamp = str(int(new_from_timestamp)) if needs_and: new_query_string = '%s AND anomaly_timestamp >= %s' % (query_string, from_timestamp) query_string = new_query_string needs_and = True else: new_query_string = '%s WHERE anomaly_timestamp >= %s' % (query_string, from_timestamp) query_string = new_query_string needs_and = True if 'until_timestamp' in request.args: until_timestamp = request.args.get('until_timestamp', None) if until_timestamp and until_timestamp != 'all': if ":" in until_timestamp: import time import datetime new_until_timestamp = time.mktime(datetime.datetime.strptime(until_timestamp, '%Y%m%d %H:%M').timetuple()) until_timestamp = str(int(new_until_timestamp)) if needs_and: new_query_string = '%s AND anomaly_timestamp <= %s' % (query_string, until_timestamp) query_string = new_query_string needs_and = True else: new_query_string = '%s WHERE anomaly_timestamp <= %s' % (query_string, until_timestamp) query_string = new_query_string needs_and = True if 'app' in request.args: app = request.args.get('app', None) if app and app != 'all': query = 'select id from apps WHERE app=\'%s\'' % (str(app)) try: found_id = mysql_select(skyline_app, query) except: logger.error('error :: failed to get app ids from db: %s' % traceback.format_exc()) found_id = None if found_id: target_id = str(found_id[0][0]) if needs_and: new_query_string = '%s AND app_id=%s' % (query_string, target_id) else: new_query_string = '%s WHERE app_id=%s' % (query_string, target_id) query_string = new_query_string needs_and = True if 'source' in request.args: source = request.args.get('source', None) if source and source != 'all': query = 'select id from sources WHERE source=\'%s\'' % (str(source)) try: found_id = mysql_select(skyline_app, query) except: logger.error('error :: failed to get source id from db: %s' % traceback.format_exc()) found_id = None if found_id: target_id = str(found_id[0][0]) if needs_and: new_query_string = '%s AND source_id=\'%s\'' % (query_string, target_id) else: new_query_string = '%s WHERE source_id=\'%s\'' % (query_string, target_id) query_string = new_query_string needs_and = True if 'algorithm' in request.args: algorithm = request.args.get('algorithm', None) # DISABLED as it is difficult match algorithm_id in the # triggered_algorithms csv list algorithm = 'all' if algorithm and algorithm != 'all': query = 'select id from algorithms WHERE algorithm LIKE \'%s\'' % (str(algorithm)) try: rows = mysql_select(skyline_app, query) except: logger.error('error :: failed to get algorithm ids from db: %s' % traceback.format_exc()) rows = [] ids = get_ids_from_rows('algorithm', rows) if needs_and: new_query_string = '%s AND algorithm_id IN (%s)' % (query_string, str(ids)) else: new_query_string = '%s WHERE algorithm_id IN (%s)' % (query_string, str(ids)) query_string = new_query_string needs_and = True if 'host' in request.args: host = request.args.get('host', None) if host and host != 'all': query = 'select id from hosts WHERE host=\'%s\'' % (str(host)) try: found_id = mysql_select(skyline_app, query) except: logger.error('error :: failed to get host id from db: %s' % traceback.format_exc()) found_id = None if found_id: target_id = str(found_id[0][0]) if needs_and: new_query_string = '%s AND host_id=\'%s\'' % (query_string, target_id) else: new_query_string = '%s WHERE host_id=\'%s\'' % (query_string, target_id) query_string = new_query_string needs_and = True if 'limit' in request.args: limit = request.args.get('limit', '10') else: limit = '10' if 'order' in request.args: order = request.args.get('order', 'DESC') else: order = 'DESC' search_query = '%s ORDER BY id %s LIMIT %s' % ( query_string, order, limit) if 'count_by_metric' in request.args: count_by_metric = request.args.get('count_by_metric', None) if count_by_metric and count_by_metric != 'false': # query_string = 'SELECT metric_id, COUNT(*) FROM anomalies GROUP BY metric_id ORDER BY COUNT(*) DESC' search_query = '%s GROUP BY metric_id ORDER BY COUNT(*) %s LIMIT %s' % ( query_string, order, limit) try: rows = mysql_select(skyline_app, search_query) except: logger.error('error :: failed to get anomalies from db: %s' % traceback.format_exc()) rows = [] anomalies = [] anomalous_metrics = [] if search_request: anomalies_json = path.abspath(path.join(path.dirname(__file__), '..', settings.ANOMALY_DUMP)) panorama_json = string.replace(str(anomalies_json), 'anomalies.json', 'panorama.json') if ENABLE_WEBAPP_DEBUG: logger.info('debug :: panorama_json - %s' % str(panorama_json)) for row in rows: if search_request: anomaly_id = str(row[0]) metric_id = str(row[1]) if count_request: metric_id = str(row[0]) anomaly_count = str(row[1]) query = 'select metric from metrics WHERE id=%s' % metric_id try: result = mysql_select(skyline_app, query) except: logger.error('error :: failed to get id from db: %s' % traceback.format_exc()) continue metric = str(result[0][0]) if search_request: anomalous_datapoint = str(row[2]) anomaly_timestamp = str(row[3]) full_duration = str(row[4]) created_timestamp = str(row[5]) anomaly_data = (anomaly_id, metric, anomalous_datapoint, anomaly_timestamp, full_duration, created_timestamp) anomalies.append([int(anomaly_id), str(metric), anomalous_datapoint, anomaly_timestamp, full_duration, created_timestamp]) anomalous_metrics.append(str(metric)) if count_request: limit_argument = anomaly_count if int(anomaly_count) > 200: limit_argument = 200 anomaly_data = (int(anomaly_count), metric, str(limit_argument)) anomalies.append([int(anomaly_count), str(metric), str(limit_argument)]) anomalies.sort(key=operator.itemgetter(int(0))) if search_request: with open(panorama_json, 'w') as fh: pass # Write anomalous_metrics to static webapp directory with open(panorama_json, 'a') as fh: # Make it JSONP with a handle_data() function fh.write('handle_data(%s)' % anomalies) if latest_anomalies: return anomalies else: return search_query, anomalies
def on_demand_motif_analysis(metric, timestamp, similarity, batch_size, top_matches, max_distance, range_padding, max_area_percent_diff): """ Process a motif similarity search on demand """ import numpy as np import mass_ts as mts logger = logging.getLogger(skyline_app_logger) dev_null = None function_str = 'on_demand_motif_analysis' logger.info( '%s :: with parameters :: metric: %s, timestamp: %s, similarity: %s, batch_size:%s, top_matches: %s, max_distance: %s, range_padding: %s, max_area_percent_diff: %s' % (function_str, str(metric), str(timestamp), str(similarity), str(batch_size), str(top_matches), str(max_distance), str(range_padding), str(max_area_percent_diff))) trace = 'none' fail_msg = 'none' start = time.time() start_timer = timer() metric_vars_dict = {} metric_id = 0 fp_ids = [] timeseries = [] not_similar_enough_sample = 0 not_similar_motifs = 0 similar_motifs = 0 exact_motifs = 0 distance_motifs = 0 motifs_found = [] find_exact_matches_run = False exact_matches_found = [] fps_timeseries = {} # A motif_analysis dict to add to and return motif_analysis = {} motif_analysis[metric] = {} motif_analysis[metric]['timestamp'] = int(timestamp) motif_analysis[metric]['started'] = start motif_analysis[metric]['motifs'] = {} motif_analysis[metric]['exact_motifs'] = exact_motifs motif_analysis[metric]['similar_motifs'] = similar_motifs motif_analysis[metric]['not_similar_motifs'] = not_similar_motifs motif_analysis[metric][ 'not_similar_enough_sample'] = not_similar_enough_sample # @added 20210417 - Feature #4014: Ionosphere - inference # Allow the user to define the batch_size per similarity search motif_analysis[metric]['batch_size'] = int(batch_size) motif_analysis[metric]['top_matches'] = int(top_matches) motif_analysis[metric]['max_distance'] = float(max_distance) # @added 20210425 - Feature #4014: Ionosphere - inference # Added max_area_percent_diff for computing the area under the curve motif_analysis[metric]['max_area_percent_diff'] = float( max_area_percent_diff) fps_checked_for_motifs = [] metric_dir = metric.replace('.', '/') metric_timeseries_dir = '%s/%s/%s' % (settings.IONOSPHERE_DATA_FOLDER, str(timestamp), metric_dir) # @added 20210418 - Feature #4014: Ionosphere - inference # Allow for the similarity search on saved_training_data if 'saved_training_data' in request.args: saved_training_data_str = request.args.get('saved_training_data', 'false') if saved_training_data_str == 'true': saved_metric_timeseries_dir = '%s_saved/%s/%s' % ( settings.IONOSPHERE_DATA_FOLDER, str(timestamp), metric_dir) if path.exists(saved_metric_timeseries_dir): metric_timeseries_dir = saved_metric_timeseries_dir logger.info('%s :: using saved training_data dir - %s' % (function_str, saved_metric_timeseries_dir)) metric_vars_file = '%s/%s.txt' % (metric_timeseries_dir, metric) timeseries_json = '%s/%s.json' % (metric_timeseries_dir, metric) full_duration_in_hours = int(settings.FULL_DURATION / 60 / 60) full_duration_timeseries_json = '%s/%s.mirage.redis.%sh.json' % ( metric_timeseries_dir, metric, str(full_duration_in_hours)) try: metric_vars_dict = mirage_load_metric_vars(skyline_app, metric_vars_file, True) except Exception as e: logger.error( 'error :: inference :: failed to load metric variables from check file - %s - %s' % (metric_vars_file, e)) if not metric_vars_dict: motif_analysis[metric]['status'] = 'error' motif_analysis[metric][ 'reason'] = 'could not load training data variables' return motif_analysis full_duration = metric_vars_dict['metric_vars']['full_duration'] # Determine the metric details from the database metric_id = 0 metric_db_object = {} try: metric_db_object = get_metrics_db_object(metric) except Exception as e: logger.error('error :: %s :: failed to get_metrics_db_object - %s' % (function_str, e)) try: metric_id = int(metric_db_object['id']) except Exception as e: logger.error( 'error :: %s :: failed to determine metric_id from metric_db_object %s - %s' % (function_str, str(metric_db_object), e)) metric_id = 0 if not metric_id: logger.error( 'error :: %s :: failed to get metric id for %s from the database' % (function_str, str(metric))) fail_msg = 'failed to get metric id' motif_analysis[metric]['status'] = 'error' motif_analysis[metric]['reason'] = 'could not determine metric id' return motif_analysis, fail_msg, trace # @modified 20210419 - Feature #4014: Ionosphere - inference # Create a unique dir for each batch_size max_distance # motif_images_dir = '%s/motifs' % metric_timeseries_dir motif_images_dir = '%s/motifs/batch_size.%s/top_matches.%s/max_distance.%s' % ( metric_timeseries_dir, str(batch_size), str(top_matches), str(max_distance)) if not path.exists(motif_images_dir): # provision motifs image resources mkdir_p(motif_images_dir) full_durations = [full_duration] if path.isfile(full_duration_timeseries_json): full_durations = [full_duration, settings.FULL_DURATION] logger.info('%s :: full_durations - %s' % (function_str, str(full_durations))) # Loop through analysis per full_duration for full_duration in full_durations: start_full_duration = timer() fp_ids = [] try: query = 'SELECT id,last_matched from ionosphere WHERE metric_id=%s AND full_duration=%s AND enabled=1 ORDER BY last_matched DESC' % ( str(metric_id), str(full_duration)) results = mysql_select(skyline_app, query) for row in results: fp_ids.append(int(row[0])) except Exception as e: logger.error( 'error :: %s :: failed to get fp ids via mysql_select from %s - %s' % (function_str, metric, e)) logger.info('%s :: metric_id: %s, full_duration: %s, fp_ids: %s' % (function_str, (metric_id), str(full_duration), str(fp_ids))) if not fp_ids: continue # Now there are known fps, load the timeseries if full_duration == settings.FULL_DURATION: timeseries_json_file = full_duration_timeseries_json else: timeseries_json_file = timeseries_json try: with open((timeseries_json_file), 'r') as f: raw_timeseries = f.read() timeseries_array_str = str(raw_timeseries).replace('(', '[').replace( ')', ']') del raw_timeseries timeseries = literal_eval(timeseries_array_str) del timeseries_array_str except Exception as e: logger.error( 'error :: %s :: failed to load timeseries for %s from %s - %s' % (function_str, metric, timeseries_json_file, e)) continue anomalous_timeseries_subsequence = [] for timestamp_float, value in timeseries[-int(batch_size):]: anomalous_timeseries_subsequence.append( [int(timestamp_float), value]) logger.info( '%s :: looking for motif in trained fps of full_duration: %s' % (function_str, (full_duration))) dataset = [float(item[1]) for item in anomalous_timeseries_subsequence] max_y = max(dataset) min_y = min(dataset) # full_y_range = max_y - min_y # range_padding_percent = range_padding # This was just a test that did not have the desired results # if full_y_range < 10: # range_padding_percent = 35 # if full_y_range < 5: # range_padding_percent = 75 # if full_y_range < 2: # range_padding_percent = 100 use_range_padding = ((max_y - min_y) / 100) * range_padding if min_y > 0 and (min_y - use_range_padding) > 0: min_y_padded = min_y - use_range_padding else: min_y_padded = min_y max_y_padded = max_y + use_range_padding if min_y_padded == max_y_padded: min_y_padded = min_y_padded - ( (min_y_padded / 100) * range_padding) max_y_padded = max_y_padded + ( (max_y_padded / 100) * range_padding) # anomalous_ts = np.array(dataset) anomalous_ts = dataset mass2_batch_times = [] exact_match_times = [] nan = np.array([np.nan]) nanj = complex(0.0, float('nan')) empty_dists = np.array(nan + nanj) # plotted = False count = 0 # fp_ids = [fp_id for index, fp_id in enumerate(fp_ids) if index == 0] # motifs_found = [] # exact_matches_found = [] # fps_timeseries = {} for fp_id in fp_ids: if (time.time() - start) >= 20: break # Attempt to surface the fp timeseries from memcache and/or db # @modified 20210424 - Feature #4014: Ionosphere - inference # Task #4030: refactoring fp_timeseries = None try: fp_timeseries = get_fp_timeseries(skyline_app, metric_id, fp_id) except Exception as e: logger.error( 'inference :: did not get fp timeseries with get_fp_timeseries(%s, %s, %s) - %s' % (skyline_app, str(metric_id), str(fp_id), e)) if not fp_timeseries: continue relate_dataset = [float(item[1]) for item in fp_timeseries] fps_timeseries[fp_id] = fp_timeseries current_best_indices = [] current_best_dists = [] best_indices = None best_dists = None try: logger.info( '%s :: running mts.mass2_batch fp_id: %s, full_duration: %s, batch_size: %s, top_matches: %s, max_distance: %s, motif_size: %s' % (function_str, str(fp_id), str(full_duration), str(batch_size), str(top_matches), str(max_distance), str(len(anomalous_ts)))) # @added 20210418 - Feature #4014: Ionosphere - inference # Handle top_matches being greater than possible kth that can be found # mts.mass2_batch error: kth(=50) out of bounds (16) use_top_matches = int(top_matches) if (len(fp_timeseries) / int(batch_size)) <= int(top_matches): use_top_matches = round( len(fp_timeseries) / int(batch_size)) - 1 if use_top_matches == 2: use_top_matches = 1 logger.info( '%s :: adjusting top_matches to %s (the maximum possible top - 1) as kth(=%s) will be out of bounds mts.mass2_batch' % (function_str, str(use_top_matches), str(top_matches))) start_mass2_batch = timer() # @modified 20210418 - Feature #4014: Ionosphere - inference # Handle top_matches being greater than possible kth that can be found # best_indices, best_dists = mts.mass2_batch(relate_dataset, anomalous_ts, batch_size=int(batch_size), top_matches=int(top_matches)) best_indices, best_dists = mts.mass2_batch( relate_dataset, anomalous_ts, batch_size=int(batch_size), top_matches=int(use_top_matches)) end_mass2_batch = timer() mass2_batch_times.append((end_mass2_batch - start_mass2_batch)) current_best_indices = best_indices.tolist() current_best_dists = best_dists.tolist() # @added 20210412 - Feature #4014: Ionosphere - inference # Branch #3590: inference # Add fp_id to fps_checked_for_motifs to enable ionosphere to update the # motif related columns in the ionosphere database table fps_checked_for_motifs.append(fp_id) except Exception as e: logger.error('error :: %s :: %s mts.mass2_batch error: %s' % (function_str, (fp_id), str(e))) continue try: if str(list(best_dists)) == str(list(empty_dists)): logger.info( '%s :: mts.mass2_batch no similar motif from fp id %s - best_dists: %s' % (function_str, (fp_id), str(list(best_dists)))) continue except Exception as e: dev_null = e if not current_best_indices[0]: continue # if list(best_indices)[0] != anomalous_index: # continue # If the best_dists is > 1 they are not very similar # if list(best_dists)[0].real > 1.0: # continue # if list(best_indices)[0] and best_dists: for index, best_dist in enumerate(current_best_dists): try: motif_added = False """ Note: mass_ts finds similar motifs NOT the same motif, the same motif will result in the best_dists being a nan+nanj So it is DIYed """ try: # @modified 20210414 - Feature #4014: Ionosphere - inference # Branch #3590: inference # Store the not anomalous motifs # motif = [fp_id, current_best_indices[index], best_dist.real] motif = [ fp_id, current_best_indices[index], best_dist.real, anomalous_timeseries_subsequence, full_duration ] except Exception as e: dev_null = e motif = [] # if list(best_indices)[0] and best_dists: # If it is greater than 1.0 it is not similar # if best_dist.real > 1.0: # if best_dist.real > IONOSPHERE_INFERENCE_MASS_TS_MAX_DISTANCE: if best_dist.real > float(max_distance): continue else: if motif: count += 1 motifs_found.append(motif) motif_added = True if not motif_added: if best_dist == nanj: count += 1 motifs_found.append(motif) motif_added = True if not motif_added: if str(best_dist) == 'nan+nanj': count += 1 motifs_found.append([ fp_id, current_best_indices[index], 0.0, anomalous_timeseries_subsequence, full_duration ]) motif_added = True if not motif_added: if best_dist == empty_dists: count += 1 motifs_found.append(motif) motif_added = True except Exception as e: logger.error(traceback.format_exc()) logger.error( 'error :: %s :: could not determine is if fp id %s timeseries at index %s was a match - %s' % (function_str, str(fp_id), str(current_best_indices[index]), e)) continue # FIND EXACT MATCHES # Seeing as I cannot reproduce finding nan+nanj which represents an # exact match with mts.mass2_batch, do it DIY style - iterate the # timeseries and create a batch_size subsequence for every index and # compare the values to the anomalous_ts for an exact match. # This takes ~0.024850 seconds on a timeseries with 10079 datapoints # @modified 20210418 - Feature #4014: Ionosphere - inference # However fiding exact matches can add ~2.5 seconds on 90 minute # batch_size and with a proproptionally scaled max_distance of say 15 # finding an exact match in a longer sequence is less important, # the greater the batch_size the most likely greater the variability # and the chance of an exact match decreases. So save 2.5 seconds. # UNLESS # At a 5 (to 10) batch_size and max_distance of 1.0 an exact match # can be found. Exact matches are quite frequent and sometimes with # such little variability, similar matchs may not be found. # Therefore find find_exact_matches has its place. MASS # A CAVEAT here is that boring metrics and that change and have a # low variability even at a larger batch_size could also benefit and # possibly achieve better accruracy from the use of find_exact_matches # as they can be shapelets resemble a batch_size 5 shapelet. # It would perhaps be possible to use one or more of the features # profile tsfresh values to identify these types of shapelets, if # you knew which feature/s were most descriptive of this type of # shapelet, e.g. 'value__skewness': 3.079477685394873, etc (maybe) # However I predict that this method will perform worst on these # types of shapelets. # find_exact_matches = False # exact matches can be found in batch sizes of 500 and similar not # So actually always run it. find_exact_matches = True find_exact_matches_run = True if int(batch_size) < 10: find_exact_matches = True find_exact_matches_run = True if find_exact_matches: try: start_exact_match = timer() indexed_relate_dataset = [] for index, item in enumerate(relate_dataset): indexed_relate_dataset.append([index, item]) last_index = indexed_relate_dataset[-1][0] current_index = 0 while current_index < last_index: subsequence = [ value for index, value in indexed_relate_dataset[current_index:( current_index + int(batch_size))] ] if subsequence == anomalous_ts: exact_matches_found.append([ fp_id, current_index, 0.0, anomalous_timeseries_subsequence, full_duration ]) motifs_found.append([ fp_id, current_index, 0.0, anomalous_timeseries_subsequence, full_duration ]) current_index += 1 end_exact_match = timer() exact_match_times.append( (end_exact_match - start_exact_match)) except Exception as e: logger.error(traceback.format_exc()) logger.error( 'error :: %s :: could not determine it any exact matches could be found in fp id %s timeseries - %s' % (function_str, str(fp_id), e)) logger.info( '%s :: mts.mass2_batch runs on %s fps of full_duration %s in %.6f seconds' % (function_str, str(len(mass2_batch_times)), str(full_duration), sum(mass2_batch_times))) if find_exact_matches_run: logger.info( '%s :: exact_match runs on %s fps of full_duration %s in %.6f seconds' % (function_str, str(len(exact_match_times)), str(full_duration), sum(exact_match_times))) end_full_duration = timer() logger.info( '%s :: analysed %s fps of full_duration %s in %.6f seconds' % (function_str, str(len(fp_ids)), str(full_duration), (end_full_duration - start_full_duration))) # Patterns are sorted sorted_motifs = [] motifs_found_in_fps = [] if motifs_found: sorted_motifs = sorted(motifs_found, key=lambda x: x[2]) for item in sorted_motifs: motifs_found_in_fps.append(item[0]) logger.info('%s :: %s motifs found' % (function_str, str(len(sorted_motifs)))) for motif in sorted_motifs: if (time.time() - start) >= 25: break try: add_match = False all_in_range = False fp_id = motif[0] best_index = motif[1] best_dist = motif[2] # @added 20210414 - Feature #4014: Ionosphere - inference # Branch #3590: inference # Store the not anomalous motifs motif_sequence = motif[3] motif_full_duration = motif[4] match_type = 'not_similar_enough' if motif in exact_matches_found: add_match = True match_type = 'exact' all_in_range = True exact_motifs += 1 full_relate_timeseries = fps_timeseries[fp_id] # full_relate_dataset = [float(item[1]) for item in full_relate_timeseries] relate_timeseries = [ item for index, item in enumerate(full_relate_timeseries) if index >= best_index and index < (best_index + int(batch_size)) ] relate_dataset = [item[1] for item in relate_timeseries] if not add_match: all_in_range = True for value in relate_dataset: if value < min_y_padded: all_in_range = False break if value > max_y_padded: all_in_range = False break if all_in_range: related_max_y = max(relate_dataset) if related_max_y < (max_y - range_padding): all_in_range = False if related_max_y > (max_y + range_padding): all_in_range = False related_min_y = min(relate_dataset) if related_min_y < (min_y - range_padding): all_in_range = False if related_min_y > (min_y + range_padding): all_in_range = False if all_in_range: logger.info( '%s :: ALL IN RANGE - all_in_range: %s, motif: %s' % (function_str, str(all_in_range), str(relate_dataset[0:2]))) add_match = True match_type = 'all_in_range' similar_motifs += 1 # @added 20210425 - Feature #4014: Ionosphere - inference # Compute the area using the composite trapezoidal rule. motif_area = None fp_motif_area = None percent_different = None try: batch_size_dataset = [ float(item[1]) for item in motif_sequence ] y_motif = np.array(batch_size_dataset) motif_area = np.trapz(y_motif, dx=1) except Exception as e: logger.error( 'error :: %s :: failed to get motif_area with np.trapz - %s' % (function_str, e)) try: y_fp_motif = np.array(relate_dataset) fp_motif_area = np.trapz(y_fp_motif, dx=1) except Exception as e: logger.error( 'error :: %s :: failed to get fp_motif_area with np.trapz - %s' % (function_str, e)) # Determine the percentage difference (as a # positive value) of the areas under the # curves. if motif_area and fp_motif_area: percent_different = get_percent_different( fp_motif_area, motif_area, True) if percent_different > max_area_percent_diff: if add_match: logger.info( '%s :: AREA TOO DIFFERENT - not adding all_in_range match' % (function_str)) add_match = False # BUT ... if best_dist < 3 and not add_match: logger.info( '%s :: DISTANCE VERY SIMILAR - adding match even though area_percent_diff is greater than max_area_percent_diff because best_dist: %s' % (function_str, str(best_dist))) add_match = True match_type = 'distance' distance_motifs += 1 if similarity == 'all': if not add_match: not_similar_motifs += 1 if not_similar_enough_sample >= 10: continue not_similar_enough_sample += 1 add_match = True match_type = 'not_similar_enough' if add_match: generation = 0 fp_id_row = None try: fp_id_row = get_ionosphere_fp_db_row( skyline_app, int(fp_id)) except Exception as e: logger.error( 'error :: %s :: failed to get_ionosphere_fp_db_row for fp_id %s - %s' % (function_str, str(fp_id), e)) if fp_id_row: try: generation = fp_id_row['generation'] except Exception as e: logger.error( 'error :: %s :: failed to generation from fp_id_row for fp_id %s - %s' % (function_str, str(fp_id), e)) if generation == 0: generation_str = 'trained' else: generation_str = 'LEARNT' motif_match_types = motif_match_types_dict() type_id = motif_match_types[match_type] motif_id = '%s-%s' % (str(fp_id), str(best_index)) motif_analysis[metric]['motifs'][motif_id] = {} motif_analysis[metric]['motifs'][motif_id][ 'metric_id'] = metric_id motif_analysis[metric]['motifs'][motif_id]['fp_id'] = fp_id motif_analysis[metric]['motifs'][motif_id][ 'generation'] = generation motif_analysis[metric]['motifs'][motif_id][ 'index'] = best_index motif_analysis[metric]['motifs'][motif_id][ 'distance'] = best_dist motif_analysis[metric]['motifs'][motif_id]['size'] = int( batch_size) motif_analysis[metric]['motifs'][motif_id][ 'max_distance'] = float(max_distance) motif_analysis[metric]['motifs'][motif_id][ 'timestamp'] = timestamp motif_analysis[metric]['motifs'][motif_id][ 'type_id'] = type_id motif_analysis[metric]['motifs'][motif_id][ 'type'] = match_type motif_analysis[metric]['motifs'][motif_id][ 'full_duration'] = motif_full_duration # @added 20210414 - Feature #4014: Ionosphere - inference # Branch #3590: inference # Store the not anomalous motifs motif_analysis[metric]['motifs'][motif_id][ 'motif_timeseries'] = anomalous_timeseries_subsequence motif_analysis[metric]['motifs'][motif_id][ 'motif_sequence'] = motif_sequence not_anomalous_timestamp = int( anomalous_timeseries_subsequence[-1][0]) graph_period_seconds = not_anomalous_timestamp - int( anomalous_timeseries_subsequence[0][0]) motif_analysis[metric]['motifs'][motif_id][ 'motif_period_seconds'] = graph_period_seconds motif_analysis[metric]['motifs'][motif_id][ 'motif_period_minutes'] = round(graph_period_seconds / 60) motif_analysis[metric]['motifs'][motif_id]['image'] = None motif_analysis[metric]['motifs'][motif_id][ 'motif_area'] = motif_area motif_analysis[metric]['motifs'][motif_id][ 'fp_motif_area'] = fp_motif_area motif_analysis[metric]['motifs'][motif_id][ 'area_percent_diff'] = percent_different motif_analysis[metric]['motifs'][motif_id][ 'max_area_percent_diff'] = max_area_percent_diff if (time.time() - start) >= 25: continue graph_image_file = '%s/motif.%s.%s.%s.with_max_distance.%s.png' % ( motif_images_dir, motif_id, match_type, str(batch_size), str(max_distance)) plotted_image = False on_demand_motif_analysis = True if not path.isfile(graph_image_file): plotted_image, plotted_image_file = plot_motif_match( skyline_app, metric, timestamp, fp_id, full_duration, generation_str, motif_id, best_index, int(batch_size), best_dist, type_id, relate_dataset, anomalous_timeseries_subsequence, graph_image_file, on_demand_motif_analysis) else: plotted_image = True logger.info('%s :: plot already exists - %s' % (function_str, str(graph_image_file))) if plotted_image: motif_analysis[metric]['motifs'][motif_id][ 'image'] = graph_image_file else: logger.error('failed to plot motif match plot') graph_image_file = None except Exception as e: logger.error(traceback.format_exc()) logger.error( 'error :: inference :: with fp id %s proceesing motif at index: %s - %s' % (str(fp_id), str(motif[0]), str(e))) continue end_timer = timer() motif_analysis[metric]['fps_checked'] = fps_checked_for_motifs motif_analysis[metric]['exact_motifs'] = exact_motifs motif_analysis[metric]['similar_motifs'] = similar_motifs motif_analysis[metric]['distance_motifs'] = distance_motifs motif_analysis[metric]['not_similar_motifs'] = not_similar_motifs motif_analysis[metric][ 'not_similar_enough_sample'] = not_similar_enough_sample motif_analysis_file = '%s/motif.analysis.similarity_%s.batch_size_%s.top_matches_%s.max_distance_%s.dict' % ( motif_images_dir, similarity, str(batch_size), str(top_matches), str(max_distance)) try: write_data_to_file(skyline_app, motif_analysis_file, 'w', str(motif_analysis)) except Exception as e: trace = traceback.format_exc() logger.error('%s' % trace) fail_msg = '%s :: error :: failed to write motif_analysis_file - %s' % ( function_str, motif_analysis_file) logger.error('%s' % fail_msg) dev_null = e motif_ids = list(motif_analysis[metric]['motifs'].keys()) logger.info( '%s :: %s motif matches found, %s fps where checked and motifs plotted in %.6f seconds for %s' % (function_str, str(len(motif_ids)), str(len(fps_checked_for_motifs)), (end_timer - start_timer), metric)) if dev_null: del dev_null return motif_analysis, fail_msg, trace