def run_selected_algorithm(timeseries, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, autoaggregate, autoaggregate_value, algorithm): """ Filter timeseries and run selected algorithm. """ if ENABLE_BOUNDARY_DEBUG: logger.info('debug :: assigning in algoritms.py - %s, %s' % (metric_name, algorithm)) # Get rid of short series # @modified 20190312 - Task #2862: Allow Boundary to analyse short time series # https://github.com/earthgecko/skyline/issues/88 # Allow class as TooShort if the algorithm is detect_drop_off_cliff if algorithm == 'detect_drop_off_cliff': if len(timeseries) < MIN_TOLERABLE_LENGTH: if ENABLE_BOUNDARY_DEBUG: logger.debug('debug :: TooShort - %s, %s' % (metric_name, algorithm)) raise TooShort() # Get rid of stale series if time() - timeseries[-1][0] > STALE_PERIOD: if ENABLE_BOUNDARY_DEBUG: logger.debug('debug :: Stale - %s, %s' % (metric_name, algorithm)) raise Stale() # Get rid of boring series if algorithm == 'detect_drop_off_cliff' or algorithm == 'less_than': if len(set(item[1] for item in timeseries[-MAX_TOLERABLE_BOREDOM:])) == BOREDOM_SET_SIZE: if ENABLE_BOUNDARY_DEBUG: logger.debug('debug :: Boring - %s, %s' % (metric_name, algorithm)) raise Boring() if autoaggregate: if ENABLE_BOUNDARY_DEBUG: logger.debug('debug :: auto aggregating %s for %s' % (metric_name, algorithm)) try: agg_timeseries = autoaggregate_ts(timeseries, autoaggregate_value) if ENABLE_BOUNDARY_DEBUG: logger.debug( 'debug :: aggregated_timeseries returned %s for %s' % (metric_name, algorithm)) except Exception as e: agg_timeseries = [] if ENABLE_BOUNDARY_DEBUG: logger.error('Algorithm error: %s' % traceback.format_exc()) logger.error('error: %e' % e) logger.debug( 'debug error - autoaggregate excpection %s for %s' % (metric_name, algorithm)) if len(agg_timeseries) > 10: timeseries = agg_timeseries else: if ENABLE_BOUNDARY_DEBUG: logger.debug('debug :: TooShort - %s, %s' % (metric_name, algorithm)) raise TooShort() # @modified 20190312 - Task #2862: Allow Boundary to analyse short time series # https://github.com/earthgecko/skyline/issues/88 # if len(timeseries) < 10: if len(timeseries) < 1: if ENABLE_BOUNDARY_DEBUG: logger.debug( 'debug :: timeseries too short - %s - timeseries length - %s' % (metric_name, str(len(timeseries)))) raise TooShort() try: ensemble = [ globals()[algorithm](timeseries, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger) ] if ensemble.count(True) == 1: if ENABLE_BOUNDARY_DEBUG: logger.debug( # @modified 20200624 - Task #3594: Add timestamp to ENABLE_BOUNDARY_DEBUG output # Feature #3532: Sort all time series # Added timestamp to debug output # 'debug :: anomalous datapoint = %s - %s, %s, %s, %s, %s, %s, %s, %s' % ( # str(timeseries[-1][1]), 'debug :: anomalous at %s with datapoint = %s - %s, %s, %s, %s, %s, %s, %s, %s' % (str(timeseries[-1][0]), str(timeseries[-1][1]), str(metric_name), str(metric_expiration_time), str(metric_min_average), str(metric_min_average_seconds), str(metric_trigger), str(alert_threshold), str(metric_alerters), str(algorithm))) return True, ensemble, timeseries[-1][ 1], metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm else: if ENABLE_BOUNDARY_DEBUG: logger.debug( # @modified 20200624 - Task #3594: Add timestamp to ENABLE_BOUNDARY_DEBUG output # Feature #3532: Sort all time series # Added timestamp to debug output # 'debug :: not anomalous datapoint = %s - %s, %s, %s, %s, %s, %s, %s, %s' % ( # str(timeseries[-1][1]), 'debug :: not anomalous at %s with datapoint = %s - %s, %s, %s, %s, %s, %s, %s, %s' % (str(timeseries[-1][0]), str(timeseries[-1][1]), str(metric_name), str(metric_expiration_time), str(metric_min_average), str(metric_min_average_seconds), str(metric_trigger), str(alert_threshold), str(metric_alerters), str(algorithm))) return False, ensemble, timeseries[-1][ 1], metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm except: logger.error('Algorithm error: %s' % traceback.format_exc()) return False, [], 1, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm
def run_selected_algorithm(timeseries, metric_name): """ Filter timeseries and run selected algorithm. """ # @added 20180807 - Feature #2492: alert on stale metrics # Determine if a metric has stopped sending data and if so add to the # analyzer.alert_on_stale_metrics Redis set if ALERT_ON_STALE_METRICS: add_to_alert_on_stale_metrics = False # @modified 20180816 - Feature #2492: alert on stale metrics # Added try and except to prevent some errors that are encounter between # 00:14 and 00:17 on some days # Traceback (most recent call last): # File "/opt/skyline/github/skyline/skyline/analyzer/analyzer.py", line 394, in spin_process # anomalous, ensemble, datapoint = run_selected_algorithm(timeseries, metric_name) # File "/opt/skyline/github/skyline/skyline/analyzer/algorithms.py", line 530, in run_selected_algorithm # if int(time()) - int(timeseries[-1][0]) >= ALERT_ON_STALE_PERIOD: # IndexError: list index out of range try: if int(time()) - int(timeseries[-1][0]) >= ALERT_ON_STALE_PERIOD: add_to_alert_on_stale_metrics = True except: # @modified 20180816 - # Feature #2492: alert on stale metrics add_to_alert_on_stale_metrics = False try: if int(time()) - int(timeseries[-1][0]) >= STALE_PERIOD: add_to_alert_on_stale_metrics = False except: add_to_alert_on_stale_metrics = False if add_to_alert_on_stale_metrics: try: redis_conn.ping() except: from redis import StrictRedis if REDIS_PASSWORD: redis_conn = StrictRedis( password=REDIS_PASSWORD, unix_socket_path=REDIS_SOCKET_PATH) else: redis_conn = StrictRedis( unix_socket_path=REDIS_SOCKET_PATH) try: redis_conn.sadd('analyzer.alert_on_stale_metrics', metric_name) except: pass # Get rid of short series if len(timeseries) < MIN_TOLERABLE_LENGTH: raise TooShort() # Get rid of stale series if time() - timeseries[-1][0] > STALE_PERIOD: raise Stale() # Get rid of boring series if len(set(item[1] for item in timeseries[-MAX_TOLERABLE_BOREDOM:])) == BOREDOM_SET_SIZE: raise Boring() # RUN_OPTIMIZED_WORKFLOW - replaces the original ensemble method: # ensemble = [globals()[algorithm](timeseries) for algorithm in ALGORITHMS] # which runs all timeseries through all ALGORITHMS final_ensemble = [] number_of_algorithms_triggered = 0 number_of_algorithms_run = 0 number_of_algorithms = len(ALGORITHMS) maximum_false_count = number_of_algorithms - CONSENSUS + 1 # logger.info('the maximum_false_count is %s, above which CONSENSUS cannot be achieved' % (str(maximum_false_count))) consensus_possible = True # DEVELOPMENT: this is for a development version of analyzer only if skyline_app == 'analyzer_dev': time_all_algorithms = True else: time_all_algorithms = False algorithm_tmp_file_prefix = '%s/%s.' % (SKYLINE_TMP_DIR, skyline_app) for algorithm in ALGORITHMS: if consensus_possible: if send_algorithm_run_metrics: algorithm_count_file = '%s%s.count' % ( algorithm_tmp_file_prefix, algorithm) algorithm_timings_file = '%s%s.timings' % ( algorithm_tmp_file_prefix, algorithm) run_algorithm = [] run_algorithm.append(algorithm) number_of_algorithms_run += 1 if send_algorithm_run_metrics: start = timer() try: algorithm_result = [ globals()[test_algorithm](timeseries) for test_algorithm in run_algorithm ] except: # logger.error('%s failed' % (algorithm)) algorithm_result = [None] if send_algorithm_run_metrics: end = timer() with open(algorithm_count_file, 'a') as f: f.write('1\n') with open(algorithm_timings_file, 'a') as f: f.write('%.6f\n' % (end - start)) else: algorithm_result = [False] # logger.info('CONSENSUS NOT ACHIEVABLE - skipping %s' % (str(algorithm))) if algorithm_result.count(True) == 1: result = True number_of_algorithms_triggered += 1 # logger.info('algorithm %s triggerred' % (str(algorithm))) elif algorithm_result.count(False) == 1: result = False elif algorithm_result.count(None) == 1: result = None else: result = False final_ensemble.append(result) if not RUN_OPTIMIZED_WORKFLOW: continue if time_all_algorithms: continue if ENABLE_ALL_ALGORITHMS_RUN_METRICS: continue # true_count = final_ensemble.count(True) # false_count = final_ensemble.count(False) # logger.info('current false_count %s' % (str(false_count))) if final_ensemble.count(False) >= maximum_false_count: consensus_possible = False # logger.info('CONSENSUS cannot be reached as %s algorithms have already not been triggered' % (str(false_count))) # skip_algorithms_count = number_of_algorithms - number_of_algorithms_run # logger.info('skipping %s algorithms' % (str(skip_algorithms_count))) # logger.info('final_ensemble: %s' % (str(final_ensemble))) try: # ensemble = [globals()[algorithm](timeseries) for algorithm in ALGORITHMS] ensemble = final_ensemble threshold = len(ensemble) - CONSENSUS if ensemble.count(False) <= threshold: if ENABLE_SECOND_ORDER: if is_anomalously_anomalous(metric_name, ensemble, timeseries[-1][1]): return True, ensemble, timeseries[-1][1] else: return True, ensemble, timeseries[-1][1] return False, ensemble, timeseries[-1][1] except: logger.error('Algorithm error: %s' % traceback.format_exc()) return False, [], 1
def run_selected_batch_algorithm(timeseries, metric_name, run_negatives_present): """ Filter timeseries and run selected algorithm. """ try: from settings import BATCH_PROCESSING_STALE_PERIOD # @modified 20200816 - Feature #3678: SNAB - anomalyScore # Renamed to avoid confusion # STALE_PERIOD = int(BATCH_PROCESSING_STALE_PERIOD) BATCH_PROCESSING_STALE_PERIOD = int(BATCH_PROCESSING_STALE_PERIOD) except: BATCH_PROCESSING_STALE_PERIOD = 86400 # Get rid of short series if len(timeseries) < MIN_TOLERABLE_LENGTH: raise TooShort() # Get rid of stale series # @modified 20200816 - Feature #3678: SNAB - anomalyScore # Renamed to avoid confusion # if time() - timeseries[-1][0] > BATCH_PROCESSING_STALE_PERIOD: if time() - timeseries[-1][0] > BATCH_PROCESSING_STALE_PERIOD: raise Stale() # Get rid of boring series if len(set(item[1] for item in timeseries[-MAX_TOLERABLE_BOREDOM:])) == BOREDOM_SET_SIZE: raise Boring() # RUN_OPTIMIZED_WORKFLOW - replaces the original ensemble method: # ensemble = [globals()[algorithm](timeseries) for algorithm in ALGORITHMS] # which runs all timeseries through all ALGORITHMS final_ensemble = [] number_of_algorithms_triggered = 0 number_of_algorithms_run = 0 number_of_algorithms = len(ALGORITHMS) maximum_false_count = number_of_algorithms - CONSENSUS + 1 # logger.info('the maximum_false_count is %s, above which CONSENSUS cannot be achieved' % (str(maximum_false_count))) consensus_possible = True time_all_algorithms = False algorithm_tmp_file_prefix = '%s/%s.' % (SKYLINE_TMP_DIR, skyline_app) # @added 20200607 - Feature #3566: custom_algorithms algorithms_run = [] custom_consensus_override = False custom_consensus_values = [] run_3sigma_algorithms = True run_3sigma_algorithms_overridden_by = [] custom_algorithm = None # @modified 20200817 - Bug #3652: Handle multiple metrics in base_name conversion # base_name = metric_name.replace(FULL_NAMESPACE, '', 1) if metric_name.startswith(FULL_NAMESPACE): base_name = metric_name.replace(FULL_NAMESPACE, '', 1) else: base_name = metric_name if CUSTOM_ALGORITHMS: custom_algorithms_to_run = {} try: custom_algorithms_to_run = get_custom_algorithms_to_run( skyline_app, base_name, CUSTOM_ALGORITHMS, DEBUG_CUSTOM_ALGORITHMS) if DEBUG_CUSTOM_ALGORITHMS: if custom_algorithms_to_run: logger.debug( 'algorithms :: debug :: custom algorithms ARE RUN on %s' % (str(base_name))) except: logger.error('error :: get_custom_algorithms_to_run :: %s' % traceback.format_exc()) custom_algorithms_to_run = {} for custom_algorithm in custom_algorithms_to_run: if consensus_possible: algorithm = custom_algorithm debug_logging = False try: debug_logging = custom_algorithms_to_run[custom_algorithm][ 'debug_logging'] except: debug_logging = False if DEBUG_CUSTOM_ALGORITHMS: debug_logging = True if send_algorithm_run_metrics: algorithm_count_file = '%s%s.count' % ( algorithm_tmp_file_prefix, algorithm) algorithm_timings_file = '%s%s.timings' % ( algorithm_tmp_file_prefix, algorithm) run_algorithm = [] run_algorithm.append(algorithm) number_of_algorithms += 1 number_of_algorithms_run += 1 if send_algorithm_run_metrics: start = timer() if DEBUG_CUSTOM_ALGORITHMS or debug_logging: logger.debug( 'debug :: algorithms :: running custom algorithm %s on %s' % (str(algorithm), str(base_name))) start_debug_timer = timer() run_custom_algorithm_on_timeseries = None try: from custom_algorithms import run_custom_algorithm_on_timeseries if DEBUG_CUSTOM_ALGORITHMS or debug_logging: logger.debug( 'debug :: algorithms :: loaded run_custom_algorithm_on_timeseries' ) except: if DEBUG_CUSTOM_ALGORITHMS or debug_logging: logger.error(traceback.format_exc()) logger.error( 'error :: algorithms :: failed to load run_custom_algorithm_on_timeseries' ) result = None anomalyScore = None if run_custom_algorithm_on_timeseries: try: result, anomalyScore = run_custom_algorithm_on_timeseries( skyline_app, getpid(), base_name, timeseries, custom_algorithm, custom_algorithms_to_run[custom_algorithm], DEBUG_CUSTOM_ALGORITHMS) algorithm_result = [result] if DEBUG_CUSTOM_ALGORITHMS or debug_logging: logger.debug( 'debug :: algorithms :: run_custom_algorithm_on_timeseries run with result - %s, anomalyScore - %s' % (str(result), str(anomalyScore))) except: if DEBUG_CUSTOM_ALGORITHMS or debug_logging: logger.error(traceback.format_exc()) logger.error( 'error :: algorithms :: failed to run custom_algorithm %s on %s' % (custom_algorithm, base_name)) result = None algorithm_result = [None] else: if DEBUG_CUSTOM_ALGORITHMS or debug_logging: logger.error( 'error :: debug :: algorithms :: run_custom_algorithm_on_timeseries was not loaded so was not run' ) if DEBUG_CUSTOM_ALGORITHMS or debug_logging: end_debug_timer = timer() logger.debug( 'debug :: algorithms :: ran custom algorithm %s on %s with result of (%s, %s) in %.6f seconds' % (str(algorithm), str(base_name), str(result), str(anomalyScore), (end_debug_timer - start_debug_timer))) algorithms_run.append(algorithm) if send_algorithm_run_metrics: end = timer() with open(algorithm_count_file, 'a') as f: f.write('1\n') with open(algorithm_timings_file, 'a') as f: f.write('%.6f\n' % (end - start)) else: algorithm_result = [None] algorithms_run.append(algorithm) if algorithm_result.count(True) == 1: result = True number_of_algorithms_triggered += 1 elif algorithm_result.count(False) == 1: result = False elif algorithm_result.count(None) == 1: result = None else: result = False final_ensemble.append(result) custom_consensus = None algorithms_allowed_in_consensus = [] # @added 20200605 - Feature #3566: custom_algorithms # Allow only single or multiple custom algorithms to run and allow # the a custom algorithm to specify not to run 3sigma aglorithms custom_run_3sigma_algorithms = True try: custom_run_3sigma_algorithms = custom_algorithms_to_run[ custom_algorithm]['run_3sigma_algorithms'] except: custom_run_3sigma_algorithms = True if not custom_run_3sigma_algorithms and result: run_3sigma_algorithms = False run_3sigma_algorithms_overridden_by.append(custom_algorithm) if DEBUG_CUSTOM_ALGORITHMS or debug_logging: logger.debug( 'debug :: algorithms :: run_3sigma_algorithms is False on %s for %s' % (custom_algorithm, base_name)) if result: try: custom_consensus = custom_algorithms_to_run[ custom_algorithm]['consensus'] if custom_consensus == 0: custom_consensus = int(CONSENSUS) else: custom_consensus_values.append(custom_consensus) except: custom_consensus = int(CONSENSUS) try: algorithms_allowed_in_consensus = custom_algorithms_to_run[ custom_algorithm]['algorithms_allowed_in_consensus'] except: algorithms_allowed_in_consensus = [] if custom_consensus == 1: consensus_possible = False custom_consensus_override = True logger.info( 'algorithms :: overidding the CONSENSUS as custom algorithm %s overides on %s' % (str(algorithm), str(base_name))) # TODO - figure out how to handle consensus overrides if # multiple custom algorithms are used if DEBUG_CUSTOM_ALGORITHMS: if not run_3sigma_algorithms: logger.debug('algorithms :: not running 3 sigma algorithms') if len(run_3sigma_algorithms_overridden_by) > 0: logger.debug( 'algorithms :: run_3sigma_algorithms overridden by %s' % (str(run_3sigma_algorithms_overridden_by))) # @added 20200425 - Feature #3508: ionosphere.untrainable_metrics # Added negatives_found negatives_found = False # @added 20200817 - Feature #3684: ROOMBA_BATCH_METRICS_CUSTOM_DURATIONS # Feature #3650: ROOMBA_DO_NOT_PROCESS_BATCH_METRICS # Feature #3480: batch_processing # Feature #3678: SNAB - anomalyScore # Allow for custom durations on namespaces use_full_duration = int(FULL_DURATION) + 0 if ROOMBA_BATCH_METRICS_CUSTOM_DURATIONS: for metric_namespace, custom_full_duration in ROOMBA_BATCH_METRICS_CUSTOM_DURATIONS: if metric_namespace in base_name: use_full_duration = custom_full_duration detect_drop_off_cliff_trigger = False for algorithm in ALGORITHMS: # @modified 20200607 - Feature #3566: custom_algorithms # Added run_3sigma_algorithms to allow only single or multiple custom # algorithms to run and allow the a custom algorithm to specify not to # run 3sigma aglorithms. # if consensus_possible: if consensus_possible and run_3sigma_algorithms: if send_algorithm_run_metrics: algorithm_count_file = '%s%s.count' % ( algorithm_tmp_file_prefix, algorithm) algorithm_timings_file = '%s%s.timings' % ( algorithm_tmp_file_prefix, algorithm) run_algorithm = [] run_algorithm.append(algorithm) number_of_algorithms_run += 1 if send_algorithm_run_metrics: start = timer() try: # @added 20200817 - Feature #3684: ROOMBA_BATCH_METRICS_CUSTOM_DURATIONS # Feature #3650: ROOMBA_DO_NOT_PROCESS_BATCH_METRICS # Feature #3480: batch_processing # Feature #3678: SNAB - anomalyScore # Allow for custom durations on namespaces # algorithm_result = [globals()[test_algorithm](timeseries) for test_algorithm in run_algorithm] algorithm_result = [ globals()[test_algorithm](timeseries, use_full_duration) for test_algorithm in run_algorithm ] except: # logger.error('%s failed' % (algorithm)) algorithm_result = [None] # @added 20200607 - Feature #3566: custom_algorithms algorithms_run.append(algorithm) if send_algorithm_run_metrics: end = timer() with open(algorithm_count_file, 'a') as f: f.write('1\n') with open(algorithm_timings_file, 'a') as f: f.write('%.6f\n' % (end - start)) else: algorithm_result = [None] algorithms_run.append(algorithm) if algorithm_result.count(True) == 1: result = True number_of_algorithms_triggered += 1 # logger.info('algorithm %s triggerred' % (str(algorithm))) elif algorithm_result.count(False) == 1: result = False elif algorithm_result.count(None) == 1: result = None else: result = False final_ensemble.append(result) if not RUN_OPTIMIZED_WORKFLOW: continue if time_all_algorithms: continue if ENABLE_ALL_ALGORITHMS_RUN_METRICS: continue # true_count = final_ensemble.count(True) # false_count = final_ensemble.count(False) # logger.info('current false_count %s' % (str(false_count))) if final_ensemble.count(False) >= maximum_false_count: consensus_possible = False # logger.info('CONSENSUS cannot be reached as %s algorithms have already not been triggered' % (str(false_count))) # skip_algorithms_count = number_of_algorithms - number_of_algorithms_run # logger.info('skipping %s algorithms' % (str(skip_algorithms_count))) # logger.info('final_ensemble: %s' % (str(final_ensemble))) try: # ensemble = [globals()[algorithm](timeseries) for algorithm in ALGORITHMS] ensemble = final_ensemble # @modified 20200607 - Feature #3566: custom_algorithms # threshold = len(ensemble) - CONSENSUS if custom_consensus_override: threshold = len(ensemble) - 1 else: threshold = len(ensemble) - CONSENSUS if ensemble.count(False) <= threshold: # @added 20200425 - Feature #3508: ionosphere.untrainable_metrics # Only run a negatives_present check if it is anomalous, there # is no need to check unless it is related to an anomaly if run_negatives_present: try: # @added 20200817 - Feature #3684: ROOMBA_BATCH_METRICS_CUSTOM_DURATIONS # Feature #3650: ROOMBA_DO_NOT_PROCESS_BATCH_METRICS # Feature #3480: batch_processing # Feature #3678: SNAB - anomalyScore # Allow for custom durations on namespaces # negatives_found = negatives_present(timeseries) negatives_found = negatives_present( timeseries, use_full_duration) except: logger.error('Algorithm error: negatives_present :: %s' % traceback.format_exc()) negatives_found = False # @modified 20200425 - Feature #3508: ionosphere.untrainable_metrics # return True, ensemble, timeseries[-1][1] # @modified 20200607 - Feature #3566: custom_algorithms # Added algorithms_run # return True, ensemble, timeseries[-1][1], negatives_found # @modified 20200815 - Feature #3678: SNAB - anomalyScore # Added the number_of_algorithms to calculate anomalyScore from # return True, ensemble, timeseries[-1][1], negatives_found, algorithms_run return True, ensemble, timeseries[-1][ 1], negatives_found, algorithms_run, number_of_algorithms # @modified 20200425 - Feature #3508: ionosphere.untrainable_metrics # return False, ensemble, timeseries[-1][1] # @modified 20200607 - Feature #3566: custom_algorithms # Added algorithms_run # @modified 20200815 - Feature #3678: SNAB - anomalyScore # Added the number_of_algorithms to calculate anomalyScore from # return False, ensemble, timeseries[-1][1], negatives_found, algorithms_run return False, ensemble, timeseries[-1][ 1], negatives_found, algorithms_run, number_of_algorithms except: logger.error('Algorithm error: %s' % traceback.format_exc()) # @modified 20200425 - Feature #3508: ionosphere.untrainable_metrics # return False, [], 1 # @modified 20200607 - Feature #3566: custom_algorithms # Added algorithms_run # return False, ensemble, timeseries[-1][1], negatives_found, algorithms_run # @modified 20200815 - Feature #3678: SNAB - anomalyScore # Added the number_of_algorithms to calculate anomalyScore from # return False, [], 1, negatives_found, algorithms_run return False, [], 1, negatives_found, algorithms_run, 0
def run_selected_batch_algorithm(timeseries, metric_name, run_negatives_present): """ Filter timeseries and run selected algorithm. """ try: from settings import BATCH_PROCESSING_STALE_PERIOD STALE_PERIOD = BATCH_PROCESSING_STALE_PERIOD except: STALE_PERIOD = 86400 # Get rid of short series if len(timeseries) < MIN_TOLERABLE_LENGTH: raise TooShort() # Get rid of stale series if time() - timeseries[-1][0] > STALE_PERIOD: raise Stale() # Get rid of boring series if len(set(item[1] for item in timeseries[-MAX_TOLERABLE_BOREDOM:])) == BOREDOM_SET_SIZE: raise Boring() # RUN_OPTIMIZED_WORKFLOW - replaces the original ensemble method: # ensemble = [globals()[algorithm](timeseries) for algorithm in ALGORITHMS] # which runs all timeseries through all ALGORITHMS final_ensemble = [] number_of_algorithms_triggered = 0 number_of_algorithms_run = 0 number_of_algorithms = len(ALGORITHMS) maximum_false_count = number_of_algorithms - CONSENSUS + 1 # logger.info('the maximum_false_count is %s, above which CONSENSUS cannot be achieved' % (str(maximum_false_count))) consensus_possible = True time_all_algorithms = False algorithm_tmp_file_prefix = '%s/%s.' % (SKYLINE_TMP_DIR, skyline_app) # @added 20200425 - Feature #3508: ionosphere.untrainable_metrics # Added negatives_found negatives_found = False for algorithm in ALGORITHMS: if consensus_possible: if send_algorithm_run_metrics: algorithm_count_file = '%s%s.count' % ( algorithm_tmp_file_prefix, algorithm) algorithm_timings_file = '%s%s.timings' % ( algorithm_tmp_file_prefix, algorithm) run_algorithm = [] run_algorithm.append(algorithm) number_of_algorithms_run += 1 if send_algorithm_run_metrics: start = timer() try: algorithm_result = [ globals()[test_algorithm](timeseries) for test_algorithm in run_algorithm ] except: # logger.error('%s failed' % (algorithm)) algorithm_result = [None] if send_algorithm_run_metrics: end = timer() with open(algorithm_count_file, 'a') as f: f.write('1\n') with open(algorithm_timings_file, 'a') as f: f.write('%.6f\n' % (end - start)) else: algorithm_result = [False] # logger.info('CONSENSUS NOT ACHIEVABLE - skipping %s' % (str(algorithm))) if algorithm_result.count(True) == 1: result = True number_of_algorithms_triggered += 1 # logger.info('algorithm %s triggerred' % (str(algorithm))) elif algorithm_result.count(False) == 1: result = False elif algorithm_result.count(None) == 1: result = None else: result = False final_ensemble.append(result) if not RUN_OPTIMIZED_WORKFLOW: continue if time_all_algorithms: continue if ENABLE_ALL_ALGORITHMS_RUN_METRICS: continue # true_count = final_ensemble.count(True) # false_count = final_ensemble.count(False) # logger.info('current false_count %s' % (str(false_count))) if final_ensemble.count(False) >= maximum_false_count: consensus_possible = False # logger.info('CONSENSUS cannot be reached as %s algorithms have already not been triggered' % (str(false_count))) # skip_algorithms_count = number_of_algorithms - number_of_algorithms_run # logger.info('skipping %s algorithms' % (str(skip_algorithms_count))) # logger.info('final_ensemble: %s' % (str(final_ensemble))) try: # ensemble = [globals()[algorithm](timeseries) for algorithm in ALGORITHMS] ensemble = final_ensemble threshold = len(ensemble) - CONSENSUS if ensemble.count(False) <= threshold: # @added 20200425 - Feature #3508: ionosphere.untrainable_metrics # Only run a negatives_present check if it is anomalous, there # is no need to check unless it is related to an anomaly if run_negatives_present: try: negatives_found = negatives_present(timeseries) except: logger.error('Algorithm error: negatives_present :: %s' % traceback.format_exc()) negatives_found = False # @modified 20200425 - Feature #3508: ionosphere.untrainable_metrics # return True, ensemble, timeseries[-1][1] return True, ensemble, timeseries[-1][1], negatives_found # @modified 20200425 - Feature #3508: ionosphere.untrainable_metrics # return False, ensemble, timeseries[-1][1] return False, ensemble, timeseries[-1][1], negatives_found except: logger.error('Algorithm error: %s' % traceback.format_exc()) # @modified 20200425 - Feature #3508: ionosphere.untrainable_metrics # return False, [], 1 return False, [], 1, negatives_found
def run_selected_algorithm(timeseries, metric_name): """ Filter timeseries and run selected algorithm. """ # Get rid of short series if len(timeseries) < MIN_TOLERABLE_LENGTH: raise TooShort() # Get rid of stale series if time() - timeseries[-1][0] > STALE_PERIOD: raise Stale() # Get rid of boring series if len(set(item[1] for item in timeseries[-MAX_TOLERABLE_BOREDOM:])) == BOREDOM_SET_SIZE: raise Boring() # RUN_OPTIMIZED_WORKFLOW - replaces the original ensemble method: # ensemble = [globals()[algorithm](timeseries) for algorithm in ALGORITHMS] # which runs all timeseries through all ALGORITHMS final_ensemble = [] number_of_algorithms_triggered = 0 number_of_algorithms_run = 0 number_of_algorithms = len(ALGORITHMS) maximum_false_count = number_of_algorithms - CONSENSUS + 1 # logger.info('the maximum_false_count is %s, above which CONSENSUS cannot be achieved' % (str(maximum_false_count))) consensus_possible = True # DEVELOPMENT: this is for a development version of analyzer only if skyline_app == 'analyzer_dev': time_all_algorithms = True else: time_all_algorithms = False algorithm_tmp_file_prefix = '%s/%s.' % (SKYLINE_TMP_DIR, skyline_app) for algorithm in ALGORITHMS: if consensus_possible: if send_algorithm_run_metrics: algorithm_count_file = '%s%s.count' % ( algorithm_tmp_file_prefix, algorithm) algorithm_timings_file = '%s%s.timings' % ( algorithm_tmp_file_prefix, algorithm) run_algorithm = [] run_algorithm.append(algorithm) number_of_algorithms_run += 1 if send_algorithm_run_metrics: start = timer() try: algorithm_result = [ globals()[test_algorithm](timeseries) for test_algorithm in run_algorithm ] except: # logger.error('%s failed' % (algorithm)) algorithm_result = [None] if send_algorithm_run_metrics: end = timer() with open(algorithm_count_file, 'a') as f: f.write('1\n') with open(algorithm_timings_file, 'a') as f: f.write('%.6f\n' % (end - start)) else: algorithm_result = [False] # logger.info('CONSENSUS NOT ACHIEVABLE - skipping %s' % (str(algorithm))) if algorithm_result.count(True) == 1: result = True number_of_algorithms_triggered += 1 # logger.info('algorithm %s triggerred' % (str(algorithm))) elif algorithm_result.count(False) == 1: result = False elif algorithm_result.count(None) == 1: result = None else: result = False final_ensemble.append(result) if not RUN_OPTIMIZED_WORKFLOW: continue if time_all_algorithms: continue if ENABLE_ALL_ALGORITHMS_RUN_METRICS: continue # true_count = final_ensemble.count(True) # false_count = final_ensemble.count(False) # logger.info('current false_count %s' % (str(false_count))) if final_ensemble.count(False) >= maximum_false_count: consensus_possible = False # logger.info('CONSENSUS cannot be reached as %s algorithms have already not been triggered' % (str(false_count))) # skip_algorithms_count = number_of_algorithms - number_of_algorithms_run # logger.info('skipping %s algorithms' % (str(skip_algorithms_count))) # logger.info('final_ensemble: %s' % (str(final_ensemble))) try: # ensemble = [globals()[algorithm](timeseries) for algorithm in ALGORITHMS] ensemble = final_ensemble threshold = len(ensemble) - CONSENSUS if ensemble.count(False) <= threshold: if ENABLE_SECOND_ORDER: if is_anomalously_anomalous(metric_name, ensemble, timeseries[-1][1]): return True, ensemble, timeseries[-1][1] else: return True, ensemble, timeseries[-1][1] return False, ensemble, timeseries[-1][1] except: logger.error('Algorithm error: %s' % traceback.format_exc()) return False, [], 1
def run_selected_algorithm(timeseries, metric_name, airgapped_metrics, airgapped_metrics_filled, run_negatives_present, check_for_airgaps_only): """ Filter timeseries and run selected algorithm. """ # @added 20180807 - Feature #2492: alert on stale metrics # Determine if a metric has stopped sending data and if so add to the # analyzer.alert_on_stale_metrics Redis set add_to_alert_on_stale_metrics = False if ALERT_ON_STALE_METRICS: # @modified 20180816 - Feature #2492: alert on stale metrics # Added try and except to prevent some errors that are encounter between # 00:14 and 00:17 on some days # Traceback (most recent call last): # File "/opt/skyline/github/skyline/skyline/analyzer/analyzer.py", line 394, in spin_process # anomalous, ensemble, datapoint = run_selected_algorithm(timeseries, metric_name) # File "/opt/skyline/github/skyline/skyline/analyzer/algorithms.py", line 530, in run_selected_algorithm # if int(time()) - int(timeseries[-1][0]) >= ALERT_ON_STALE_PERIOD: # IndexError: list index out of range try: if int(time()) - int(timeseries[-1][0]) >= ALERT_ON_STALE_PERIOD: add_to_alert_on_stale_metrics = True except: # @modified 20180816 - # Feature #2492: alert on stale metrics add_to_alert_on_stale_metrics = False try: if int(time()) - int(timeseries[-1][0]) >= STALE_PERIOD: add_to_alert_on_stale_metrics = False except: add_to_alert_on_stale_metrics = False if add_to_alert_on_stale_metrics: try: # @added 20200505 - Feature #3504: Handle airgaps in batch metrics # Use get_redis_conn from skyline_functions import get_redis_conn redis_conn = get_redis_conn(skyline_app) redis_conn.sadd('analyzer.alert_on_stale_metrics', metric_name) except: pass # @added 20200505 - Feature #3504: Handle airgaps in batch metrics # Check to see if this is a batch processing metric that has been sent # through Analyzer to check for airgaps only and if so do not check the # timeseries for exceptions check_for_timeseries_exceptions = True check_airgap_only = None if BATCH_PROCESSING and check_for_airgaps_only: check_airgap_only_key = 'analyzer.check_airgap_only.%s' % metric_name try: if not add_to_alert_on_stale_metrics: # @added 20200505 - Feature #3504: Handle airgaps in batch metrics # Use get_redis_conn from skyline_functions import get_redis_conn redis_conn = get_redis_conn(skyline_app) check_airgap_only = redis_conn.get(check_airgap_only_key) except: check_airgap_only = None if check_airgap_only: check_for_timeseries_exceptions = False # @modified 20200505 - Feature #3504: Handle airgaps in batch metrics # Wrapped in check_for_timeseries_exceptions as if it is a check_airgap_only # metric then the time series should not be checked for exceptions if check_for_timeseries_exceptions: # Get rid of short series if len(timeseries) < MIN_TOLERABLE_LENGTH: raise TooShort() # Get rid of stale series if time() - timeseries[-1][0] > STALE_PERIOD: raise Stale() # Get rid of boring series if len(set(item[1] for item in timeseries[-MAX_TOLERABLE_BOREDOM:])) == BOREDOM_SET_SIZE: raise Boring() # @added 20200423 - Feature #3508: ionosphere.untrainable_metrics # Added run_negatives_present negatives_found = False # @added 20200117 - Feature #3400: Identify air gaps in the metric data # @modified 20200214 - Bug #3448: Repeated airgapped_metrics # Feature #3400: Identify air gaps in the metric data # if IDENTIFY_AIRGAPS: if IDENTIFY_AIRGAPS or IDENTIFY_UNORDERED_TIMESERIES: # airgaps = identify_airgaps(metric_name, timeseries, airgapped_metrics) # if airgaps: process_metric = True if IDENTIFY_AIRGAPS: if CHECK_AIRGAPS: process_metric = False # @added 20200423 - Feature #3504: Handle airgaps in batch metrics # Feature #3400: Identify air gaps in the metric data # Replaced code block below to determine if a metric is a check # with a skyline_functions definition of that block as # the check_metric_for_airgaps function check_metric_for_airgaps = False try: check_metric_for_airgaps = is_check_airgap_metric( metric_name) except: check_metric_for_airgaps = False try: logger.error( 'failed to determine if %s is an airgap metric: %s' % (str(metric_name), traceback.format_exc())) except: logger.error( 'failed to determine if the metric is an airgap metric' ) if check_metric_for_airgaps: process_metric = True else: # If IDENTIFY_AIRGAPS is not enabled and # IDENTIFY_UNORDERED_TIMESERIES is enabled process the metric if IDENTIFY_UNORDERED_TIMESERIES: process_metric = True airgaps = None unordered_timeseries = False if process_metric: # @modified 20200501 - Feature #3400: Identify air gaps in the metric data # Added airgapped_metrics_filled # airgaps, unordered_timeseries = identify_airgaps(metric_name, timeseries, airgapped_metrics) airgaps, unordered_timeseries = identify_airgaps( metric_name, timeseries, airgapped_metrics, airgapped_metrics_filled) if airgaps or unordered_timeseries: try: redis_conn.ping() except: # @added 20200505 - Feature #3504: Handle airgaps in batch metrics # Use get_redis_conn from skyline_functions import get_redis_conn redis_conn = get_redis_conn(skyline_app) if airgaps: for i in airgaps: try: redis_conn.sadd('analyzer.airgapped_metrics', str(i)) logger.info('adding airgap %s' % str(i)) # TODO: learn_airgapped_metrics except: pass del airgaps # @added 20200214 - Bug #3448: Repeated airgapped_metrics # Feature #3400: Identify air gaps in the metric data # Also add unordered time series to the analyzer.unordered_timeseries # Redis set if unordered_timeseries: try: redis_conn.sadd('analyzer.unordered_timeseries', metric_name) del unorder_timeseries except: pass # @added 20200423 - Feature #3504: Handle airgaps in batch metrics # Feature #3480: batch_processing # Feature #3486: analyzer_batch # Feature #3400: Identify air gaps in the metric data # Check to see if this is a batch processing metric that has been sent to # analyzer_batch for processing but sent through Analyzer to check for # airgaps only and if so return as it should not be run through algorithms if BATCH_PROCESSING: if check_airgap_only: try: redis_conn.delete(check_airgap_only_key) except: try: logger.error( 'failed to delete Redis key %s: %s' % (str(check_airgap_only_key), traceback.format_exc())) except: logger.error( 'failed to failure regarding deleting the check_airgap_only_key Redis key' ) # @modified 20200430 - Feature #3480: batch_processing # Tidy up and reduce logging, only log if debug enabled if BATCH_PROCESSING_DEBUG: logger.info( 'algorithms :: batch processing - batch metric %s checked for airgaps only, not analysing' % (str(metric_name))) # TODO: the only worry here is that this metric then gets added to # the not_anomalous Redis set? Not sure if that is a problem, I do # not think it is. Unless it is in the end of anomaly_end_timestamp # context? # @modified 20200424 - Feature #3508: ionosphere.untrainable_metrics # Added negatives_found return False, [], 1, negatives_found # RUN_OPTIMIZED_WORKFLOW - replaces the original ensemble method: # ensemble = [globals()[algorithm](timeseries) for algorithm in ALGORITHMS] # which runs all timeseries through all ALGORITHMS final_ensemble = [] number_of_algorithms_triggered = 0 number_of_algorithms_run = 0 number_of_algorithms = len(ALGORITHMS) maximum_false_count = number_of_algorithms - CONSENSUS + 1 # logger.info('the maximum_false_count is %s, above which CONSENSUS cannot be achieved' % (str(maximum_false_count))) consensus_possible = True # DEVELOPMENT: this is for a development version of analyzer only if skyline_app == 'analyzer_dev': time_all_algorithms = True else: time_all_algorithms = False algorithm_tmp_file_prefix = '%s/%s.' % (SKYLINE_TMP_DIR, skyline_app) for algorithm in ALGORITHMS: if consensus_possible: if send_algorithm_run_metrics: algorithm_count_file = '%s%s.count' % ( algorithm_tmp_file_prefix, algorithm) algorithm_timings_file = '%s%s.timings' % ( algorithm_tmp_file_prefix, algorithm) run_algorithm = [] run_algorithm.append(algorithm) number_of_algorithms_run += 1 if send_algorithm_run_metrics: start = timer() try: algorithm_result = [ globals()[test_algorithm](timeseries) for test_algorithm in run_algorithm ] except: # logger.error('%s failed' % (algorithm)) algorithm_result = [None] if send_algorithm_run_metrics: end = timer() with open(algorithm_count_file, 'a') as f: f.write('1\n') with open(algorithm_timings_file, 'a') as f: f.write('%.6f\n' % (end - start)) else: algorithm_result = [False] # logger.info('CONSENSUS NOT ACHIEVABLE - skipping %s' % (str(algorithm))) if algorithm_result.count(True) == 1: result = True number_of_algorithms_triggered += 1 # logger.info('algorithm %s triggerred' % (str(algorithm))) elif algorithm_result.count(False) == 1: result = False elif algorithm_result.count(None) == 1: result = None else: result = False final_ensemble.append(result) if not RUN_OPTIMIZED_WORKFLOW: continue if time_all_algorithms: continue if ENABLE_ALL_ALGORITHMS_RUN_METRICS: continue # true_count = final_ensemble.count(True) # false_count = final_ensemble.count(False) # logger.info('current false_count %s' % (str(false_count))) if final_ensemble.count(False) >= maximum_false_count: consensus_possible = False # logger.info('CONSENSUS cannot be reached as %s algorithms have already not been triggered' % (str(false_count))) # skip_algorithms_count = number_of_algorithms - number_of_algorithms_run # logger.info('skipping %s algorithms' % (str(skip_algorithms_count))) # logger.info('final_ensemble: %s' % (str(final_ensemble))) try: # ensemble = [globals()[algorithm](timeseries) for algorithm in ALGORITHMS] ensemble = final_ensemble threshold = len(ensemble) - CONSENSUS if ensemble.count(False) <= threshold: # @added 20200425 - Feature #3508: ionosphere.untrainable_metrics # Only run a negatives_present check if it is anomalous, there # is no need to check unless it is related to an anomaly if run_negatives_present: try: negatives_found = negatives_present(timeseries) except: logger.error('Algorithm error: negatives_present :: %s' % traceback.format_exc()) negatives_found = False if ENABLE_SECOND_ORDER: if is_anomalously_anomalous(metric_name, ensemble, timeseries[-1][1]): # @modified 20200423 - Feature #3508: ionosphere.untrainable_metrics # Added negatives_found return True, ensemble, timeseries[-1][1], negatives_found else: return True, ensemble, timeseries[-1][1], negatives_found # @modified 20200423 - Feature #3508: ionosphere.untrainable_metrics # Added negatives_found return False, ensemble, timeseries[-1][1], negatives_found except: logger.error('Algorithm error: %s' % traceback.format_exc()) # @modified 20200423 - Feature #3508: ionosphere.untrainable_metrics # Added negatives_found return False, [], 1, negatives_found
def run_selected_algorithm(timeseries, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, autoaggregate, autoaggregate_value, algorithm): """ Filter timeseries and run selected algorithm. """ if ENABLE_BOUNDARY_DEBUG: logger.info('debug :: assigning in algoritms.py - %s, %s' % (metric_name, algorithm)) # Get rid of short series if len(timeseries) < MIN_TOLERABLE_LENGTH: if ENABLE_BOUNDARY_DEBUG: logger.info('debug :: TooShort - %s, %s' % (metric_name, algorithm)) raise TooShort() # Get rid of stale series if time() - timeseries[-1][0] > STALE_PERIOD: if ENABLE_BOUNDARY_DEBUG: logger.info('debug :: Stale - %s, %s' % (metric_name, algorithm)) raise Stale() # Get rid of boring series if len(set(item[1] for item in timeseries[-MAX_TOLERABLE_BOREDOM:])) == BOREDOM_SET_SIZE: if ENABLE_BOUNDARY_DEBUG: logger.info('debug :: Boring - %s, %s' % (metric_name, algorithm)) raise Boring() if autoaggregate: if ENABLE_BOUNDARY_DEBUG: logger.info('debug :: auto aggregating %s for %s' % (metric_name, algorithm)) try: agg_timeseries = autoaggregate_ts(timeseries, autoaggregate_value) if ENABLE_BOUNDARY_DEBUG: logger.info( 'debug :: aggregated_timeseries returned %s for %s' % (metric_name, algorithm)) except Exception as e: agg_timeseries = [] if ENABLE_BOUNDARY_DEBUG: logger.info( 'debug error - autoaggregate excpection %s for %s' % (metric_name, algorithm)) logger.error('Algorithm error: %s' % traceback.format_exc()) logger.error('error: %e' % e) if len(agg_timeseries) > 10: timeseries = agg_timeseries else: if ENABLE_BOUNDARY_DEBUG: logger.info('debug :: TooShort - %s, %s' % (metric_name, algorithm)) raise TooShort() if len(timeseries) < 10: if ENABLE_BOUNDARY_DEBUG: logger.info( 'debug :: timeseries too short - %s - timeseries length - %s' % (metric_name, str(len(timeseries)))) raise TooShort() try: ensemble = [ globals()[algorithm](timeseries, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger) ] if ensemble.count(True) == 1: if ENABLE_BOUNDARY_DEBUG: logger.info( 'debug :: anomalous datapoint = %s - %s, %s, %s, %s, %s, %s, %s, %s' % (str(timeseries[-1][1]), str(metric_name), str(metric_expiration_time), str(metric_min_average), str(metric_min_average_seconds), str(metric_trigger), str(alert_threshold), str(metric_alerters), str(algorithm))) return True, ensemble, timeseries[-1][ 1], metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm else: if ENABLE_BOUNDARY_DEBUG: logger.info( 'debug :: not anomalous datapoint = %s - %s, %s, %s, %s, %s, %s, %s, %s' % (str(timeseries[-1][1]), str(metric_name), str(metric_expiration_time), str(metric_min_average), str(metric_min_average_seconds), str(metric_trigger), str(alert_threshold), str(metric_alerters), str(algorithm))) return False, ensemble, timeseries[-1][ 1], metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm except: logger.error('Algorithm error: %s' % traceback.format_exc()) return False, [], 1, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm