def test_run_selected_algorithm(self, timeMock): timeMock.return_value, timeseries = self.data(time()) result, ensemble, datapoint = algorithms.run_selected_algorithm( timeseries, "test.metric") self.assertTrue(result) self.assertTrue(len(filter(None, ensemble)) >= settings.CONSENSUS) self.assertEqual(datapoint, 1000)
def check_match(self): anomalous = False if len(self.data_matches) == 0: return anomalous try: anomalous, ensemble = algorithms.run_selected_algorithm(self.data_matches) # If it's anomalous, add it to list if anomalous: # Get the anomaly breakdown - who returned True? for index, value in enumerate(ensemble): if value: algorithm = config.idea['algorithms'][index] self.anomaly_breakdown[algorithm] += 1 # It could have been deleted by the Roomba except TypeError: self.exceptions['DeletedByRoomba'] += 1 except ex.TooShort: self.exceptions['TooShort'] += 1 except ex.Stale: self.exceptions['Stale'] += 1 except ex.Boring: self.exceptions['Boring'] += 1 except: self.exceptions['Other'] += 1 logger.log.info(traceback.format_exc()) return anomalous
def test_run_selected_algorithm_runs_novel_algorithm(self, timeMock, algorithmsListMock, consensusMock): """ Assert that a user can add their own custom algorithm. This mocks out settings.ALGORITHMS and settings.CONSENSUS to use only a single custom-defined function (alwaysTrue) """ algorithmsListMock.__iter__.return_value = ['alwaysTrue'] consensusMock=1 timeMock.return_value, timeseries = self.data(time.time()) alwaysTrue = Mock(return_value=True) with patch.dict(algorithms.__dict__, {'alwaysTrue': alwaysTrue}): result, ensemble, tail_avg = algorithms.run_selected_algorithm(timeseries) alwaysTrue.assert_called_with(timeseries) self.assertTrue(result) self.assertEqual(ensemble, [True]) self.assertEqual(tail_avg, 334)
def spin_process(self, i, unique_metrics): """ Assign a bunch of metrics for a process to analyze. """ # Discover assigned metrics keys_per_processor = int( ceil( float(len(unique_metrics)) / float(settings.ANALYZER_PROCESSES))) if i == settings.ANALYZER_PROCESSES: assigned_max = len(unique_metrics) else: assigned_max = i * keys_per_processor assigned_min = assigned_max - keys_per_processor assigned_keys = range(assigned_min, assigned_max) # Compile assigned metrics assigned_metrics = [unique_metrics[index] for index in assigned_keys] # Check if this process is unnecessary if len(assigned_metrics) == 0: return # Multi get series raw_assigned = self.redis_conn.mget(assigned_metrics) # Make process-specific dicts exceptions = defaultdict(int) anomaly_breakdown = defaultdict(int) # Distill timeseries strings into lists for i, metric_name in enumerate(assigned_metrics): self.check_if_parent_is_alive() try: raw_series = raw_assigned[i] unpacker = Unpacker(use_list=False) unpacker.feed(raw_series) timeseries = list(unpacker) anomalous, ensemble, datapoint = run_selected_algorithm( timeseries) # If it's anomalous, add it to list if anomalous: metric = [datapoint, metric_name] self.anomalous_metrics.append(metric) # Get the anomaly breakdown - who returned True? for index, value in enumerate(ensemble): if value: algorithm = settings.ALGORITHMS[index] anomaly_breakdown[algorithm] += 1 # It could have been deleted by the Roomba except AttributeError: exceptions['DeletedByRoomba'] += 1 except TooShort: exceptions['TooShort'] += 1 except Stale: exceptions['Stale'] += 1 except Incomplete: exceptions['Incomplete'] += 1 except Boring: exceptions['Boring'] += 1 except: exceptions['Other'] += 1 logger.info(traceback.format_exc()) # Collate process-specific dicts to main dicts with self.lock: for key, value in anomaly_breakdown.items(): if key not in self.anomaly_breakdown: self.anomaly_breakdown[key] = value else: self.anomaly_breakdown[key] += value for key, value in exceptions.items(): if key not in self.exceptions: self.exceptions[key] = value else: self.exceptions[key] += value
def spin_process(self, i, unique_metrics): """ Assign a bunch of metrics for a process to analyze. Multiple get the assigned_metrics to the process from Redis. For each metric: - unpack the `raw_timeseries` for the metric. - Analyse each timeseries against `ALGORITHMS` to determine if it is anomalous. - If anomalous add it to the :obj:`self.anomalous_metrics` list - Add what algorithms triggered to the :obj:`self.anomaly_breakdown_q` queue - If :mod:`settings.ENABLE_CRUCIBLE` is ``True``: - Add a crucible data file with the details about the timeseries and anomaly. - Write the timeseries to a json file for crucible. Add keys and values to the queue so the parent process can collate for:\n * :py:obj:`self.anomaly_breakdown_q` * :py:obj:`self.exceptions_q` """ spin_start = time() logger.info('spin_process started') # Discover assigned metrics keys_per_processor = int(ceil(float(len(unique_metrics)) / float(settings.ANALYZER_PROCESSES))) if i == settings.ANALYZER_PROCESSES: assigned_max = len(unique_metrics) else: assigned_max = min(len(unique_metrics), i * keys_per_processor) # Fix analyzer worker metric assignment #94 # https://github.com/etsy/skyline/pull/94 @languitar:worker-fix assigned_min = (i - 1) * keys_per_processor assigned_keys = range(assigned_min, assigned_max) # Compile assigned metrics assigned_metrics = [unique_metrics[index] for index in assigned_keys] # Check if this process is unnecessary if len(assigned_metrics) == 0: return # Multi get series raw_assigned = self.redis_conn.mget(assigned_metrics) # Make process-specific dicts exceptions = defaultdict(int) anomaly_breakdown = defaultdict(int) # Distill timeseries strings into lists for i, metric_name in enumerate(assigned_metrics): self.check_if_parent_is_alive() try: raw_series = raw_assigned[i] unpacker = Unpacker(use_list=False) unpacker.feed(raw_series) timeseries = list(unpacker) anomalous, ensemble, datapoint = run_selected_algorithm(timeseries, metric_name) # If it's anomalous, add it to list if anomalous: base_name = metric_name.replace(settings.FULL_NAMESPACE, '', 1) metric = [datapoint, base_name] self.anomalous_metrics.append(metric) # Get the anomaly breakdown - who returned True? triggered_algorithms = [] for index, value in enumerate(ensemble): if value: algorithm = settings.ALGORITHMS[index] anomaly_breakdown[algorithm] += 1 triggered_algorithms.append(algorithm) # If Crucible or Panorama are enabled determine details determine_anomaly_details = False if settings.ENABLE_CRUCIBLE and settings.ANALYZER_CRUCIBLE_ENABLED: determine_anomaly_details = True if settings.PANORAMA_ENABLED: determine_anomaly_details = True if determine_anomaly_details: metric_timestamp = str(int(timeseries[-1][0])) from_timestamp = str(int(timeseries[1][0])) timeseries_dir = base_name.replace('.', '/') # If Panorama is enabled - create a Panorama check if settings.PANORAMA_ENABLED: if not os.path.exists(settings.PANORAMA_CHECK_PATH): if python_version == 2: mode_arg = int('0755') if python_version == 3: mode_arg = mode=0o755 os.makedirs(settings.PANORAMA_CHECK_PATH, mode_arg) # Note: # The values are enclosed is single quoted intentionally # as the imp.load_source used results in a shift in the # decimal position when double quoted, e.g. # value = "5622.0" gets imported as # 2016-03-02 12:53:26 :: 28569 :: metric variable - value - 562.2 # single quoting results in the desired, # 2016-03-02 13:16:17 :: 1515 :: metric variable - value - 5622.0 added_at = str(int(time())) source = 'graphite' panaroma_anomaly_data = 'metric = \'%s\'\n' \ 'value = \'%s\'\n' \ 'from_timestamp = \'%s\'\n' \ 'metric_timestamp = \'%s\'\n' \ 'algorithms = %s\n' \ 'triggered_algorithms = %s\n' \ 'app = \'%s\'\n' \ 'source = \'%s\'\n' \ 'added_by = \'%s\'\n' \ 'added_at = \'%s\'\n' \ % (base_name, str(datapoint), from_timestamp, metric_timestamp, str(settings.ALGORITHMS), triggered_algorithms, skyline_app, source, this_host, added_at) # Create an anomaly file with details about the anomaly panaroma_anomaly_file = '%s/%s.%s.txt' % ( settings.PANORAMA_CHECK_PATH, added_at, base_name) try: write_data_to_file( skyline_app, panaroma_anomaly_file, 'w', panaroma_anomaly_data) logger.info('added panorama anomaly file :: %s' % (panaroma_anomaly_file)) except: logger.error('error :: failed to add panorama anomaly file :: %s' % (panaroma_anomaly_file)) logger.info(traceback.format_exc()) # If Crucible is enabled - save timeseries and create a # Crucible check if settings.ENABLE_CRUCIBLE and settings.ANALYZER_CRUCIBLE_ENABLED: crucible_anomaly_dir = settings.CRUCIBLE_DATA_FOLDER + '/' + timeseries_dir + '/' + metric_timestamp if not os.path.exists(crucible_anomaly_dir): if python_version == 2: mode_arg = int('0755') if python_version == 3: mode_arg = mode=0o755 os.makedirs(crucible_anomaly_dir, mode_arg) # Note: # The values are enclosed is single quoted intentionally # as the imp.load_source used in crucible results in a # shift in the decimal position when double quoted, e.g. # value = "5622.0" gets imported as # 2016-03-02 12:53:26 :: 28569 :: metric variable - value - 562.2 # single quoting results in the desired, # 2016-03-02 13:16:17 :: 1515 :: metric variable - value - 5622.0 crucible_anomaly_data = 'metric = \'%s\'\n' \ 'value = \'%s\'\n' \ 'from_timestamp = \'%s\'\n' \ 'metric_timestamp = \'%s\'\n' \ 'algorithms = %s\n' \ 'triggered_algorithms = %s\n' \ 'anomaly_dir = \'%s\'\n' \ 'graphite_metric = True\n' \ 'run_crucible_tests = False\n' \ 'added_by = \'%s\'\n' \ 'added_at = \'%s\'\n' \ % (base_name, str(datapoint), from_timestamp, metric_timestamp, str(settings.ALGORITHMS), triggered_algorithms, crucible_anomaly_dir, skyline_app, metric_timestamp) # Create an anomaly file with details about the anomaly crucible_anomaly_file = '%s/%s.txt' % (crucible_anomaly_dir, base_name) try: write_data_to_file( skyline_app, crucible_anomaly_file, 'w', crucible_anomaly_data) logger.info('added crucible anomaly file :: %s' % (crucible_anomaly_file)) except: logger.error('error :: failed to add crucible anomaly file :: %s' % (crucible_anomaly_file)) logger.info(traceback.format_exc()) # Create timeseries json file with the timeseries json_file = '%s/%s.json' % (crucible_anomaly_dir, base_name) timeseries_json = str(timeseries).replace('[', '(').replace(']', ')') try: write_data_to_file(skyline_app, json_file, 'w', timeseries_json) logger.info('added crucible timeseries file :: %s' % (json_file)) except: logger.error('error :: failed to add crucible timeseries file :: %s' % (json_file)) logger.info(traceback.format_exc()) # Create a crucible check file crucible_check_file = '%s/%s.%s.txt' % (settings.CRUCIBLE_CHECK_PATH, metric_timestamp, base_name) try: write_data_to_file( skyline_app, crucible_check_file, 'w', crucible_anomaly_data) logger.info('added crucible check :: %s,%s' % (base_name, metric_timestamp)) except: logger.error('error :: failed to add crucible check file :: %s' % (crucible_check_file)) logger.info(traceback.format_exc()) # It could have been deleted by the Roomba except TypeError: exceptions['DeletedByRoomba'] += 1 except TooShort: exceptions['TooShort'] += 1 except Stale: exceptions['Stale'] += 1 except Boring: exceptions['Boring'] += 1 except: exceptions['Other'] += 1 logger.info(traceback.format_exc()) # Add values to the queue so the parent process can collate for key, value in anomaly_breakdown.items(): self.anomaly_breakdown_q.put((key, value)) for key, value in exceptions.items(): self.exceptions_q.put((key, value)) spin_end = time() - spin_start logger.info('spin_process took %.2f seconds' % spin_end)
def spin_process(self, i, unique_metrics): """ Assign a bunch of metrics for a process to analyze. """ # Discover assigned metrics keys_per_processor = int(ceil(float(len(unique_metrics)) / float(settings.ANALYZER_PROCESSES))) if i == settings.ANALYZER_PROCESSES: assigned_max = len(unique_metrics) else: assigned_max = i * keys_per_processor assigned_min = assigned_max - keys_per_processor assigned_keys = range(assigned_min, assigned_max) # Compile assigned metrics assigned_metrics = [unique_metrics[index] for index in assigned_keys] # Check if this process is unnecessary if len(assigned_metrics) == 0: return # Multi get series raw_assigned = self.redis_conn.mget(assigned_metrics) # Make process-specific dicts exceptions = defaultdict(int) anomaly_breakdown = defaultdict(int) # Distill timeseries strings into lists for i, metric_name in enumerate(assigned_metrics): self.check_if_parent_is_alive() try: raw_series = raw_assigned[i] unpacker = Unpacker(use_list = False) unpacker.feed(raw_series) timeseries = list(unpacker) anomalous, ensemble, datapoint = run_selected_algorithm(timeseries) # If it's anomalous, add it to list if anomalous: metric = [datapoint, metric_name] self.anomalous_metrics.append(metric) # Get the anomaly breakdown - who returned True? for index, value in enumerate(ensemble): if value: algorithm = settings.ALGORITHMS[index] anomaly_breakdown[algorithm] += 1 # It could have been deleted by the Roomba except AttributeError: exceptions['DeletedByRoomba'] += 1 except TooShort: exceptions['TooShort'] += 1 except Stale: exceptions['Stale'] += 1 except Incomplete: exceptions['Incomplete'] += 1 except Boring: exceptions['Boring'] += 1 except: exceptions['Other'] += 1 logger.info(traceback.format_exc()) # Collate process-specific dicts to main dicts with self.lock: for key, value in anomaly_breakdown.items(): if key not in self.anomaly_breakdown: self.anomaly_breakdown[key] = value else: self.anomaly_breakdown[key] += value for key, value in exceptions.items(): if key not in self.exceptions: self.exceptions[key] = value else: self.exceptions[key] += value
def spin_process(self, i, boundary_metrics): """ Assign a bunch of metrics for a process to analyze. """ # Determine assigned metrics bp = settings.BOUNDARY_PROCESSES bm_range = len(boundary_metrics) keys_per_processor = int(ceil(float(bm_range) / float(bp))) if i == settings.BOUNDARY_PROCESSES: assigned_max = len(boundary_metrics) else: # This is a skyine bug, the original skyline code uses 1 as the # beginning position of the index, python indices begin with 0 # assigned_max = len(boundary_metrics) # This closes the etsy/skyline pull request opened by @languitar on 17 Jun 2014 # https://github.com/etsy/skyline/pull/94 Fix analyzer worker metric assignment assigned_max = min(len(boundary_metrics), i * keys_per_processor) assigned_min = (i - 1) * keys_per_processor assigned_keys = range(assigned_min, assigned_max) # Compile assigned metrics assigned_metrics_and_algos = [boundary_metrics[index] for index in assigned_keys] if ENABLE_BOUNDARY_DEBUG: logger.info('debug - printing assigned_metrics_and_algos') for assigned_metric_and_algo in assigned_metrics_and_algos: logger.info('debug - assigned_metric_and_algo - %s' % str(assigned_metric_and_algo)) # Compile assigned metrics assigned_metrics = [] for i in assigned_metrics_and_algos: assigned_metrics.append(i[0]) # unique unhashed things def unique_noHash(seq): seen = set() return [x for x in seq if str(x) not in seen and not seen.add(str(x))] unique_assigned_metrics = unique_noHash(assigned_metrics) if ENABLE_BOUNDARY_DEBUG: logger.info('debug - unique_assigned_metrics - %s' % str(unique_assigned_metrics)) logger.info('debug - printing unique_assigned_metrics:') for unique_assigned_metric in unique_assigned_metrics: logger.info('debug - unique_assigned_metric - %s' % str(unique_assigned_metric)) # Check if this process is unnecessary if len(unique_assigned_metrics) == 0: return # Multi get series try: raw_assigned = self.redis_conn.mget(unique_assigned_metrics) except: logger.error("failed to mget assigned_metrics from redis") return # Make process-specific dicts exceptions = defaultdict(int) anomaly_breakdown = defaultdict(int) # Reset boundary_algortims all_boundary_algorithms = [] for metric in BOUNDARY_METRICS: all_boundary_algorithms.append(metric[1]) # The unique algorithms that are being used boundary_algorithms = unique_noHash(all_boundary_algorithms) if ENABLE_BOUNDARY_DEBUG: logger.info('debug - boundary_algorithms - %s' % str(boundary_algorithms)) discover_run_metrics = [] # Distill metrics into a run list for i, metric_name, in enumerate(unique_assigned_metrics): self.check_if_parent_is_alive() try: if ENABLE_BOUNDARY_DEBUG: logger.info('debug - unpacking timeseries for %s - %s' % (metric_name, str(i))) raw_series = raw_assigned[i] unpacker = Unpacker(use_list=False) unpacker.feed(raw_series) timeseries = list(unpacker) except Exception as e: exceptions['Other'] += 1 logger.error("redis data error: " + traceback.format_exc()) logger.error("error: %e" % e) base_name = metric_name.replace(FULL_NAMESPACE, '', 1) # Determine the metrics BOUNDARY_METRICS metric tuple settings for metrick in BOUNDARY_METRICS: CHECK_MATCH_PATTERN = metrick[0] check_match_pattern = re.compile(CHECK_MATCH_PATTERN) pattern_match = check_match_pattern.match(base_name) metric_pattern_matched = False if pattern_match: metric_pattern_matched = True algo_pattern_matched = False for algo in boundary_algorithms: for metric in BOUNDARY_METRICS: CHECK_MATCH_PATTERN = metric[0] check_match_pattern = re.compile(CHECK_MATCH_PATTERN) pattern_match = check_match_pattern.match(base_name) if pattern_match: if ENABLE_BOUNDARY_DEBUG: logger.info("debug - metric and algo pattern MATCHED - " + metric[0] + " | " + base_name + " | " + str(metric[1])) metric_expiration_time = False metric_min_average = False metric_min_average_seconds = False metric_trigger = False algorithm = False algo_pattern_matched = True algorithm = metric[1] try: if metric[2]: metric_expiration_time = metric[2] except: metric_expiration_time = False try: if metric[3]: metric_min_average = metric[3] except: metric_min_average = False try: if metric[4]: metric_min_average_seconds = metric[4] except: metric_min_average_seconds = 1200 try: if metric[5]: metric_trigger = metric[5] except: metric_trigger = False try: if metric[6]: alert_threshold = metric[6] except: alert_threshold = False try: if metric[7]: metric_alerters = metric[7] except: metric_alerters = False if metric_pattern_matched and algo_pattern_matched: if ENABLE_BOUNDARY_DEBUG: logger.info('debug - added metric - %s, %s, %s, %s, %s, %s, %s, %s, %s' % (str(i), metric_name, str(metric_expiration_time), str(metric_min_average), str(metric_min_average_seconds), str(metric_trigger), str(alert_threshold), metric_alerters, algorithm)) discover_run_metrics.append([i, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm]) if ENABLE_BOUNDARY_DEBUG: logger.info('debug - printing discover_run_metrics') for discover_run_metric in discover_run_metrics: logger.info('debug - discover_run_metrics - %s' % str(discover_run_metric)) logger.info('debug - build unique boundary metrics to analyze') # Determine the unique set of metrics to run run_metrics = unique_noHash(discover_run_metrics) if ENABLE_BOUNDARY_DEBUG: logger.info('debug - printing run_metrics') for run_metric in run_metrics: logger.info('debug - run_metrics - %s' % str(run_metric)) # Distill timeseries strings and submit to run_selected_algorithm for metric_and_algo in run_metrics: self.check_if_parent_is_alive() try: raw_assigned_id = metric_and_algo[0] metric_name = metric_and_algo[1] base_name = metric_name.replace(FULL_NAMESPACE, '', 1) metric_expiration_time = metric_and_algo[2] metric_min_average = metric_and_algo[3] metric_min_average_seconds = metric_and_algo[4] metric_trigger = metric_and_algo[5] alert_threshold = metric_and_algo[6] metric_alerters = metric_and_algo[7] algorithm = metric_and_algo[8] if ENABLE_BOUNDARY_DEBUG: logger.info('debug - unpacking timeseries for %s - %s' % (metric_name, str(raw_assigned_id))) raw_series = raw_assigned[metric_and_algo[0]] unpacker = Unpacker(use_list=False) unpacker.feed(raw_series) timeseries = list(unpacker) if ENABLE_BOUNDARY_DEBUG: logger.info('debug - unpacked OK - %s - %s' % (metric_name, str(raw_assigned_id))) autoaggregate = False autoaggregate_value = 0 # Determine if the namespace is to be aggregated if BOUNDARY_AUTOAGGRERATION: for autoaggregate_metric in BOUNDARY_AUTOAGGRERATION_METRICS: autoaggregate = False autoaggregate_value = 0 CHECK_MATCH_PATTERN = autoaggregate_metric[0] base_name = metric_name.replace(FULL_NAMESPACE, '', 1) check_match_pattern = re.compile(CHECK_MATCH_PATTERN) pattern_match = check_match_pattern.match(base_name) if pattern_match: autoaggregate = True autoaggregate_value = autoaggregate_metric[1] if ENABLE_BOUNDARY_DEBUG: logger.info('debug - BOUNDARY_AUTOAGGRERATION passed - %s - %s' % (metric_name, str(autoaggregate))) if ENABLE_BOUNDARY_DEBUG: logger.info( 'debug - analysing - %s, %s, %s, %s, %s, %s, %s, %s, %s, %s' % ( metric_name, str(metric_expiration_time), str(metric_min_average), str(metric_min_average_seconds), str(metric_trigger), str(alert_threshold), metric_alerters, autoaggregate, autoaggregate_value, algorithm) ) # Dump the the timeseries data to a file timeseries_dump_dir = "/tmp/skyline/boundary/" + algorithm self.mkdir_p(timeseries_dump_dir) timeseries_dump_file = timeseries_dump_dir + "/" + metric_name + ".json" with open(timeseries_dump_file, 'w+') as f: f.write(str(timeseries)) f.close() # Check if a metric has its own unique BOUNDARY_METRICS alert # tuple, this allows us to paint an entire metric namespace with # the same brush AND paint a unique metric or namespace with a # different brush or scapel has_unique_tuple = False run_tupple = False boundary_metric_tuple = (base_name, algorithm, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters) wildcard_namespace = True for metric_tuple in BOUNDARY_METRICS: if not has_unique_tuple: CHECK_MATCH_PATTERN = metric_tuple[0] check_match_pattern = re.compile(CHECK_MATCH_PATTERN) pattern_match = check_match_pattern.match(base_name) if pattern_match: if metric_tuple[0] == base_name: wildcard_namespace = False if not has_unique_tuple: if boundary_metric_tuple == metric_tuple: has_unique_tuple = True run_tupple = True if ENABLE_BOUNDARY_DEBUG: logger.info('unique_tuple:') logger.info('boundary_metric_tuple: %s' % str(boundary_metric_tuple)) logger.info('metric_tuple: %s' % str(metric_tuple)) if not has_unique_tuple: if wildcard_namespace: if ENABLE_BOUNDARY_DEBUG: logger.info('wildcard_namespace:') logger.info('boundary_metric_tuple: %s' % str(boundary_metric_tuple)) run_tupple = True else: if ENABLE_BOUNDARY_DEBUG: logger.info('wildcard_namespace: BUT WOULD NOT RUN') logger.info('boundary_metric_tuple: %s' % str(boundary_metric_tuple)) if ENABLE_BOUNDARY_DEBUG: logger.info('WOULD RUN run_selected_algorithm = %s' % run_tupple) if run_tupple: # Submit the timeseries and settings to run_selected_algorithm anomalous, ensemble, datapoint, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm = run_selected_algorithm( timeseries, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, autoaggregate, autoaggregate_value, algorithm ) if ENABLE_BOUNDARY_DEBUG: logger.info('debug - analysed - %s' % (metric_name)) else: anomalous = False if ENABLE_BOUNDARY_DEBUG: logger.info('debug - more unique metric tuple not analysed - %s' % (metric_name)) # If it's anomalous, add it to list if anomalous: anomalous_metric = [datapoint, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm] self.anomalous_metrics.append(anomalous_metric) # Get the anomaly breakdown - who returned True? for index, value in enumerate(ensemble): if value: anomaly_breakdown[algorithm] += 1 # It could have been deleted by the Roomba except TypeError: exceptions['DeletedByRoomba'] += 1 except TooShort: exceptions['TooShort'] += 1 except Stale: exceptions['Stale'] += 1 except Boring: exceptions['Boring'] += 1 except: exceptions['Other'] += 1 logger.info("exceptions['Other'] traceback follows:") logger.info(traceback.format_exc()) # Add values to the queue so the parent process can collate for key, value in anomaly_breakdown.items(): self.anomaly_breakdown_q.put((key, value)) for key, value in exceptions.items(): self.exceptions_q.put((key, value))
def test_run_selected_algorithm(self, timeMock): timeMock.return_value, timeseries = self.data(time.time()) result, ensemble, tail_avg = algorithms.run_selected_algorithm(timeseries) self.assertTrue(result) self.assertTrue(len(filter(None, ensemble)) >= settings.CONSENSUS) self.assertEqual(tail_avg, 334)
import sys import json import algorithms from algorithm_exceptions import * if len(sys.argv) != 2: print(("usage: %s /path/to/serie.json" % sys.argv[0]), file=sys.stderr) sys.exit(1) with open(sys.argv[1]) as data_file: data = json.load(data_file) if data['serie'] is None: print(("usage: %s /path/to/serie.json" % sys.argv[0]), file=sys.stderr) sys.exit(2) try: sys.exit(1 if algorithms.run_selected_algorithm(data['serie']) else 0) except TooShort: print("Too short") sys.exit(0) except Boring: print("Boring") sys.exit(0) except Stale: print("Stale") sys.exit(0)
def spin_process(self, i, unique_metrics): """ Assign a bunch of metrics for a process to analyze. """ logger.info("metric index : "+str(i)) logger.info("metric name : " + str(unique_metrics)) # Discover assigned metrics keys_per_processor = int(ceil(float(len(unique_metrics)) / float(settings.ANALYZER_PROCESSES))) if i == settings.ANALYZER_PROCESSES: assigned_max = len(unique_metrics) else: assigned_max = i * keys_per_processor assigned_min = assigned_max - keys_per_processor assigned_keys = range(assigned_min, assigned_max) # Compile assigned metrics assigned_metrics = [unique_metrics[index] for index in assigned_keys] # Check if this process is unnecessary if len(assigned_metrics) == 0: logger.info("unnecessary process ,return direct") return # Multi get series raw_assigned = self.redis_conn.mget(assigned_metrics) # Make process-specific dicts exceptions = defaultdict(int) anomaly_breakdown = defaultdict(int) logger.info("[data length] assigned_metrics len : " + str(len(assigned_metrics))) # Distill timeseries strings into lists for i, metric_name in enumerate(assigned_metrics): self.check_if_parent_is_alive() try: raw_series = raw_assigned[i] unpacker = Unpacker(use_list = False) unpacker.feed(raw_series) timeseries = list(unpacker) logger.info("start run selected algorithm ~") anomalous, ensemble, datapoint = run_selected_algorithm(timeseries, metric_name) #logger.info(anomalous,ensemble,datapoint) # If it's anomalous, add it to list if anomalous: base_name = metric_name.replace(settings.FULL_NAMESPACE, '', 1) metric = [datapoint, base_name] self.anomalous_metrics.append(metric) # Get the anomaly breakdown - who returned True? for index, value in enumerate(ensemble): if value: algorithm = settings.ALGORITHMS[index] anomaly_breakdown[algorithm] += 1 # It could have been deleted by the Roomba except TypeError: exceptions['DeletedByRoomba'] += 1 except TooShort: exceptions['TooShort'] += 1 except Stale: exceptions['Stale'] += 1 except Boring: exceptions['Boring'] += 1 except: exceptions['Other'] += 1 logger.info(traceback.format_exc()) # Add values to the queue so the parent process can collate for key, value in anomaly_breakdown.items(): self.anomaly_breakdown_q.put((key, value)) for key, value in exceptions.items(): self.exceptions_q.put((key, value))
def test_run_selected_algorithm(self, timeMock): timeMock.return_value, timeseries = self.data(time()) result, ensemble, datapoint = algorithms.run_selected_algorithm(timeseries, "test.metric") self.assertTrue(result) self.assertTrue(len(filter(None, ensemble)) >= settings.CONSENSUS) self.assertEqual(datapoint, 1000)
def spin_process(self, i, boundary_metrics): """ Assign a bunch of metrics for a process to analyze. """ # Determine assigned metrics bp = settings.BOUNDARY_PROCESSES bm_range = len(boundary_metrics) keys_per_processor = int(ceil(float(bm_range) / float(bp))) if i == settings.BOUNDARY_PROCESSES: assigned_max = len(boundary_metrics) else: # This is a skyine bug, the original skyline code uses 1 as the # beginning position of the index, python indices begin with 0 # assigned_max = len(boundary_metrics) # This closes the etsy/skyline pull request opened by @languitar on 17 Jun 2014 # https://github.com/etsy/skyline/pull/94 Fix analyzer worker metric assignment assigned_max = min(len(boundary_metrics), i * keys_per_processor) assigned_min = (i - 1) * keys_per_processor assigned_keys = range(assigned_min, assigned_max) # Compile assigned metrics assigned_metrics_and_algos = [ boundary_metrics[index] for index in assigned_keys ] if ENABLE_BOUNDARY_DEBUG: logger.info('debug - printing assigned_metrics_and_algos') for assigned_metric_and_algo in assigned_metrics_and_algos: logger.info('debug - assigned_metric_and_algo - %s' % str(assigned_metric_and_algo)) # Compile assigned metrics assigned_metrics = [] for i in assigned_metrics_and_algos: assigned_metrics.append(i[0]) # unique unhashed things def unique_noHash(seq): seen = set() return [ x for x in seq if str(x) not in seen and not seen.add(str(x)) ] unique_assigned_metrics = unique_noHash(assigned_metrics) if ENABLE_BOUNDARY_DEBUG: logger.info('debug - unique_assigned_metrics - %s' % str(unique_assigned_metrics)) logger.info('debug - printing unique_assigned_metrics:') for unique_assigned_metric in unique_assigned_metrics: logger.info('debug - unique_assigned_metric - %s' % str(unique_assigned_metric)) # Check if this process is unnecessary if len(unique_assigned_metrics) == 0: return # Multi get series try: raw_assigned = self.redis_conn.mget(unique_assigned_metrics) except: logger.error("failed to mget assigned_metrics from redis") return # Make process-specific dicts exceptions = defaultdict(int) anomaly_breakdown = defaultdict(int) # Reset boundary_algortims all_boundary_algorithms = [] for metric in BOUNDARY_METRICS: all_boundary_algorithms.append(metric[1]) # The unique algorithms that are being used boundary_algorithms = unique_noHash(all_boundary_algorithms) if ENABLE_BOUNDARY_DEBUG: logger.info('debug - boundary_algorithms - %s' % str(boundary_algorithms)) discover_run_metrics = [] # Distill metrics into a run list for i, metric_name, in enumerate(unique_assigned_metrics): self.check_if_parent_is_alive() try: if ENABLE_BOUNDARY_DEBUG: logger.info('debug - unpacking timeseries for %s - %s' % (metric_name, str(i))) raw_series = raw_assigned[i] unpacker = Unpacker(use_list=False) unpacker.feed(raw_series) timeseries = list(unpacker) except Exception as e: exceptions['Other'] += 1 logger.error("redis data error: " + traceback.format_exc()) logger.error("error: %e" % e) base_name = metric_name.replace(FULL_NAMESPACE, '', 1) # Determine the metrics BOUNDARY_METRICS metric tuple settings for metrick in BOUNDARY_METRICS: CHECK_MATCH_PATTERN = metrick[0] check_match_pattern = re.compile(CHECK_MATCH_PATTERN) pattern_match = check_match_pattern.match(base_name) metric_pattern_matched = False if pattern_match: metric_pattern_matched = True algo_pattern_matched = False for algo in boundary_algorithms: for metric in BOUNDARY_METRICS: CHECK_MATCH_PATTERN = metric[0] check_match_pattern = re.compile( CHECK_MATCH_PATTERN) pattern_match = check_match_pattern.match( base_name) if pattern_match: if ENABLE_BOUNDARY_DEBUG: logger.info( "debug - metric and algo pattern MATCHED - " + metric[0] + " | " + base_name + " | " + str(metric[1])) metric_expiration_time = False metric_min_average = False metric_min_average_seconds = False metric_trigger = False algorithm = False algo_pattern_matched = True algorithm = metric[1] try: if metric[2]: metric_expiration_time = metric[2] except: metric_expiration_time = False try: if metric[3]: metric_min_average = metric[3] except: metric_min_average = False try: if metric[4]: metric_min_average_seconds = metric[4] except: metric_min_average_seconds = 1200 try: if metric[5]: metric_trigger = metric[5] except: metric_trigger = False try: if metric[6]: alert_threshold = metric[6] except: alert_threshold = False try: if metric[7]: metric_alerters = metric[7] except: metric_alerters = False if metric_pattern_matched and algo_pattern_matched: if ENABLE_BOUNDARY_DEBUG: logger.info( 'debug - added metric - %s, %s, %s, %s, %s, %s, %s, %s, %s' % (str(i), metric_name, str(metric_expiration_time), str(metric_min_average), str(metric_min_average_seconds), str(metric_trigger), str(alert_threshold), metric_alerters, algorithm)) discover_run_metrics.append([ i, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm ]) if ENABLE_BOUNDARY_DEBUG: logger.info('debug - printing discover_run_metrics') for discover_run_metric in discover_run_metrics: logger.info('debug - discover_run_metrics - %s' % str(discover_run_metric)) logger.info('debug - build unique boundary metrics to analyze') # Determine the unique set of metrics to run run_metrics = unique_noHash(discover_run_metrics) if ENABLE_BOUNDARY_DEBUG: logger.info('debug - printing run_metrics') for run_metric in run_metrics: logger.info('debug - run_metrics - %s' % str(run_metric)) # Distill timeseries strings and submit to run_selected_algorithm for metric_and_algo in run_metrics: self.check_if_parent_is_alive() try: raw_assigned_id = metric_and_algo[0] metric_name = metric_and_algo[1] base_name = metric_name.replace(FULL_NAMESPACE, '', 1) metric_expiration_time = metric_and_algo[2] metric_min_average = metric_and_algo[3] metric_min_average_seconds = metric_and_algo[4] metric_trigger = metric_and_algo[5] alert_threshold = metric_and_algo[6] metric_alerters = metric_and_algo[7] algorithm = metric_and_algo[8] if ENABLE_BOUNDARY_DEBUG: logger.info('debug - unpacking timeseries for %s - %s' % (metric_name, str(raw_assigned_id))) raw_series = raw_assigned[metric_and_algo[0]] unpacker = Unpacker(use_list=False) unpacker.feed(raw_series) timeseries = list(unpacker) if ENABLE_BOUNDARY_DEBUG: logger.info('debug - unpacked OK - %s - %s' % (metric_name, str(raw_assigned_id))) autoaggregate = False autoaggregate_value = 0 # Determine if the namespace is to be aggregated if BOUNDARY_AUTOAGGRERATION: for autoaggregate_metric in BOUNDARY_AUTOAGGRERATION_METRICS: autoaggregate = False autoaggregate_value = 0 CHECK_MATCH_PATTERN = autoaggregate_metric[0] base_name = metric_name.replace(FULL_NAMESPACE, '', 1) check_match_pattern = re.compile(CHECK_MATCH_PATTERN) pattern_match = check_match_pattern.match(base_name) if pattern_match: autoaggregate = True autoaggregate_value = autoaggregate_metric[1] if ENABLE_BOUNDARY_DEBUG: logger.info( 'debug - BOUNDARY_AUTOAGGRERATION passed - %s - %s' % (metric_name, str(autoaggregate))) if ENABLE_BOUNDARY_DEBUG: logger.info( 'debug - analysing - %s, %s, %s, %s, %s, %s, %s, %s, %s, %s' % (metric_name, str(metric_expiration_time), str(metric_min_average), str(metric_min_average_seconds), str(metric_trigger), str(alert_threshold), metric_alerters, autoaggregate, autoaggregate_value, algorithm)) # Dump the the timeseries data to a file timeseries_dump_dir = "/tmp/skyline/boundary/" + algorithm self.mkdir_p(timeseries_dump_dir) timeseries_dump_file = timeseries_dump_dir + "/" + metric_name + ".json" with open(timeseries_dump_file, 'w+') as f: f.write(str(timeseries)) f.close() # Check if a metric has its own unique BOUNDARY_METRICS alert # tuple, this allows us to paint an entire metric namespace with # the same brush AND paint a unique metric or namespace with a # different brush or scapel has_unique_tuple = False run_tupple = False boundary_metric_tuple = (base_name, algorithm, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters) wildcard_namespace = True for metric_tuple in BOUNDARY_METRICS: if not has_unique_tuple: CHECK_MATCH_PATTERN = metric_tuple[0] check_match_pattern = re.compile(CHECK_MATCH_PATTERN) pattern_match = check_match_pattern.match(base_name) if pattern_match: if metric_tuple[0] == base_name: wildcard_namespace = False if not has_unique_tuple: if boundary_metric_tuple == metric_tuple: has_unique_tuple = True run_tupple = True if ENABLE_BOUNDARY_DEBUG: logger.info('unique_tuple:') logger.info( 'boundary_metric_tuple: %s' % str(boundary_metric_tuple)) logger.info('metric_tuple: %s' % str(metric_tuple)) if not has_unique_tuple: if wildcard_namespace: if ENABLE_BOUNDARY_DEBUG: logger.info('wildcard_namespace:') logger.info('boundary_metric_tuple: %s' % str(boundary_metric_tuple)) run_tupple = True else: if ENABLE_BOUNDARY_DEBUG: logger.info( 'wildcard_namespace: BUT WOULD NOT RUN') logger.info('boundary_metric_tuple: %s' % str(boundary_metric_tuple)) if ENABLE_BOUNDARY_DEBUG: logger.info('WOULD RUN run_selected_algorithm = %s' % run_tupple) if run_tupple: # Submit the timeseries and settings to run_selected_algorithm anomalous, ensemble, datapoint, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm = run_selected_algorithm( timeseries, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, autoaggregate, autoaggregate_value, algorithm) if ENABLE_BOUNDARY_DEBUG: logger.info('debug - analysed - %s' % (metric_name)) else: anomalous = False if ENABLE_BOUNDARY_DEBUG: logger.info( 'debug - more unique metric tuple not analysed - %s' % (metric_name)) # If it's anomalous, add it to list if anomalous: anomalous_metric = [ datapoint, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm ] self.anomalous_metrics.append(anomalous_metric) # Get the anomaly breakdown - who returned True? for index, value in enumerate(ensemble): if value: anomaly_breakdown[algorithm] += 1 # It could have been deleted by the Roomba except TypeError: exceptions['DeletedByRoomba'] += 1 except TooShort: exceptions['TooShort'] += 1 except Stale: exceptions['Stale'] += 1 except Boring: exceptions['Boring'] += 1 except: exceptions['Other'] += 1 logger.info("exceptions['Other'] traceback follows:") logger.info(traceback.format_exc()) # Add values to the queue so the parent process can collate for key, value in anomaly_breakdown.items(): self.anomaly_breakdown_q.put((key, value)) for key, value in exceptions.items(): self.exceptions_q.put((key, value))
def spin_process(self, i, run_timestamp): """ Assign a metric for a process to analyze. """ # Discover metric to analyze metric_var_files = [ f for f in listdir(settings.MIRAGE_CHECK_PATH) if isfile(join(settings.MIRAGE_CHECK_PATH, f)) ] # Check if this process is unnecessary if len(metric_var_files) == 0: return metric_var_files_sorted = sorted(metric_var_files) metric_check_file = settings.MIRAGE_CHECK_PATH + "/" + metric_var_files_sorted[ 0] # Load metric variables self.load_metric_vars(metric_check_file) # Test metric variables if len(metric_vars.metric) == 0: return else: metric = metric_vars.metric metric_name = ['metric_name', metric_vars.metric] self.metric_variables.append(metric_name) if len(metric_vars.value) == 0: return else: metric_value = ['metric_value', metric_vars.value] self.metric_variables.append(metric_value) if len(metric_vars.hours_to_resolve) == 0: return else: hours_to_resolve = [ 'hours_to_resolve', metric_vars.hours_to_resolve ] self.metric_variables.append(hours_to_resolve) if len(metric_vars.metric_timestamp) == 0: return else: metric_timestamp = [ 'metric_timestamp', metric_vars.metric_timestamp ] self.metric_variables.append(metric_timestamp) # Ignore any metric check with a timestamp greater than 10 minutes ago int_metric_timestamp = int(metric_vars.metric_timestamp) int_run_timestamp = int(run_timestamp) metric_timestamp_age = int_run_timestamp - int_metric_timestamp if metric_timestamp_age > settings.MIRAGE_STALE_SECONDS: logger.info( 'stale check :: %s check request is %s seconds old - discarding' % (metric_vars.metric, metric_timestamp_age)) # Remove metric check file # try: # os.remove(metric_check_file) # except OSError: # pass # return if os.path.exists(metric_check_file): os.remove(metric_check_file) logger.info('removed %s' % (metric_check_file)) else: logger.info('could not remove %s' % (metric_check_file)) # Calculate hours second order resolution to seconds second_order_resolution_seconds = int( metric_vars.hours_to_resolve) * 3600 # Calculate graphite from and until parameters from the metric timestamp graphite_until = datetime.datetime.fromtimestamp( int(metric_vars.metric_timestamp)).strftime('%H:%M_%Y%m%d') int_second_order_resolution_seconds = int( second_order_resolution_seconds) second_resolution_timestamp = int_metric_timestamp - int_second_order_resolution_seconds graphite_from = datetime.datetime.fromtimestamp( int(second_resolution_timestamp)).strftime('%H:%M_%Y%m%d') # Remove any old json file related to the metric metric_json_file = settings.MIRAGE_DATA_FOLDER + "/" + metric_vars.metric + "/" + metric_vars.metric + '.json' try: os.remove(metric_json_file) except OSError: pass # Get data from graphite logger.info( 'retrieve data :: surfacing %s timeseries from graphite for %s seconds' % (metric_vars.metric, second_order_resolution_seconds)) self.surface_graphite_metric_data(metric_vars.metric, graphite_from, graphite_until) # Check there is a json timeseries file to test if os.path.isfile(metric_json_file) != True: logger.error( 'retrieve failed :: failed to surface %s timeseries from graphite' % (metric_vars.metric)) # Remove metric check file try: os.remove(metric_check_file) except OSError: pass return else: logger.info('retrieved data :: for %s at %s seconds' % (metric_vars.metric, second_order_resolution_seconds)) # Make process-specific dicts exceptions = defaultdict(int) anomaly_breakdown = defaultdict(int) self.check_if_parent_is_alive() with open((metric_json_file), 'r') as f: timeseries = json.loads(f.read()) logger.info('data points surfaced :: %s' % (len(timeseries))) try: logger.info('analyzing :: %s at %s seconds' % (metric_vars.metric, second_order_resolution_seconds)) anomalous, ensemble, datapoint = run_selected_algorithm( timeseries, metric_vars.metric, second_order_resolution_seconds) # If it's anomalous, add it to list if anomalous: base_name = metric.replace(settings.FULL_NAMESPACE, '', 1) anomalous_metric = [datapoint, base_name] self.anomalous_metrics.append(anomalous_metric) logger.info('anomaly detected :: %s with %s' % (metric_vars.metric, metric_vars.value)) # It runs so fast, this allows us to process 30 anomalies/min sleep(2) # Get the anomaly breakdown - who returned True? for index, value in enumerate(ensemble): if value: algorithm = settings.MIRAGE_ALGORITHMS[index] anomaly_breakdown[algorithm] += 1 else: base_name = metric.replace(settings.FULL_NAMESPACE, '', 1) not_anomalous_metric = [datapoint, base_name] self.not_anomalous_metrics.append(not_anomalous_metric) logger.info('not anomalous :: %s with %s' % (metric_vars.metric, metric_vars.value)) # It could have been deleted by the Roomba except TypeError: exceptions['DeletedByRoomba'] += 1 logger.info('exceptions :: DeletedByRoomba') except TooShort: exceptions['TooShort'] += 1 logger.info('exceptions :: TooShort') except Stale: exceptions['Stale'] += 1 logger.info('exceptions :: Stale') except Boring: exceptions['Boring'] += 1 logger.info('exceptions :: Boring') except: exceptions['Other'] += 1 logger.info('exceptions :: Other') logger.info(traceback.format_exc()) # Add values to the queue so the parent process can collate for key, value in anomaly_breakdown.items(): self.mirage_anomaly_breakdown_q.put((key, value)) for key, value in exceptions.items(): self.mirage_exceptions_q.put((key, value)) # Remove metric check file try: os.remove(metric_check_file) except OSError: pass
def spin_process(self, i, unique_metrics): process_key = '.'.join(['skyline','analyzer', socket.gethostname(), str(i)]) alive_key = '.'.join([process_key, 'alive']) self.ring.run('set', alive_key, 1) self.ring.run('expire', alive_key, 30) """ Assign a bunch of metrics for a process to analyze. """ processes = list(self.ring.run('zrange', settings.ANALYZER_PROCESS_KEY, 0, -1)) for key in processes: value = self.ring.run('get', key) if not value: self.ring.run('zrem', settings.ANALYZER_PROCESS_KEY, 0, key) # Add current process to index and determine position if not self.ring.run('zscore', settings.ANALYZER_PROCESS_KEY, alive_key): self.ring.run('zadd', settings.ANALYZER_PROCESS_KEY, time(), alive_key) self.ring.run('expire', settings.ANALYZER_PROCESS_KEY, 60) process_position = self.ring.run('zrank', settings.ANALYZER_PROCESS_KEY, alive_key) + 1 process_count = self.ring.run('zcard', settings.ANALYZER_PROCESS_KEY) # If there are less processes then we know are going to be running assume # the others will start if process_count < settings.ANALYZER_PROCESSES: process_count = settings.ANALYZER_PROCESSES # Discover assigned metrics keys_per_processor = int(ceil(float(len(unique_metrics)) / float(process_count))) if process_position == process_count: assigned_max = len(unique_metrics) else: assigned_max = process_position * keys_per_processor assigned_min = assigned_max - keys_per_processor assigned_keys = range(assigned_min, assigned_max) # Compile assigned metrics assigned_metrics = [unique_metrics[index] for index in assigned_keys] # Check if this process is unnecessary if len(assigned_metrics) == 0: return # Multi get series raw_assigned = self.ring.run('mget', assigned_metrics) # Make process-specific dicts exceptions = defaultdict(int) anomaly_breakdown = defaultdict(int) # Distill timeseries strings into lists for i, metric_name in enumerate(assigned_metrics): self.check_if_parent_is_alive() try: raw_series = raw_assigned[i] unpacker = Unpacker(use_list = False) unpacker.feed(raw_series) timeseries = list(unpacker) anomalous, ensemble, datapoint = run_selected_algorithm(timeseries, metric_name) # If it's anomalous, add it to list if anomalous: base_name = metric_name.replace(settings.FULL_NAMESPACE, '', 1) metric = [datapoint, base_name] self.anomalous_metrics.append(metric) # Get the anomaly breakdown - who returned True? for index, value in enumerate(ensemble): if value: algorithm = settings.ALGORITHMS[index] anomaly_breakdown[algorithm] += 1 # It could have been deleted by the Roomba except TypeError: exceptions['DeletedByRoomba'] += 1 except TooShort: exceptions['TooShort'] += 1 except Stale: exceptions['Stale'] += 1 except Incomplete: exceptions['Incomplete'] += 1 except Boring: exceptions['Boring'] += 1 except: exceptions['Other'] += 1 logger.info(traceback.format_exc()) # if anomalies detected Pack and Write anomoly data to Redis if len(anomalous_metrics) > 0: packed = Packer().pack(anomalous_metrics) self.ring.run('set', process_key, packed) # expire the key in 30s so anomalys don't show up for too long self.ring.run('expire', process_key, 30) self.ring.run('sadd', settings.ANALYZER_ANOMALY_KEY, process_key) # expire the key in 60s so anomalys don't show up for too long self.ring.run('expire', settings.ANALYZER_ANOMALY_KEY, 60) # Collate process-specific dicts to main dicts with self.lock: for key, value in anomaly_breakdown.items(): if key not in self.anomaly_breakdown: self.anomaly_breakdown[key] = value else: self.anomaly_breakdown[key] += value for key, value in exceptions.items(): if key not in self.exceptions: self.exceptions[key] = value else: self.exceptions[key] += value for key, value in exceptions.items(): self.exceptions_q.put((key, value))