예제 #1
0
 def test_run_selected_algorithm(self, timeMock):
     timeMock.return_value, timeseries = self.data(time())
     result, ensemble, datapoint = algorithms.run_selected_algorithm(
         timeseries, "test.metric")
     self.assertTrue(result)
     self.assertTrue(len(filter(None, ensemble)) >= settings.CONSENSUS)
     self.assertEqual(datapoint, 1000)
예제 #2
0
파일: matcher.py 프로젝트: zhiaixuexi/hodor
    def check_match(self):
        anomalous = False
        if len(self.data_matches) == 0:
            return anomalous
        try:
            anomalous, ensemble = algorithms.run_selected_algorithm(self.data_matches)
            # If it's anomalous, add it to list
            if anomalous:
                # Get the anomaly breakdown - who returned True?
                for index, value in enumerate(ensemble):
                    if value:
                        algorithm = config.idea['algorithms'][index]
                        self.anomaly_breakdown[algorithm] += 1

        # It could have been deleted by the Roomba
        except TypeError:
            self.exceptions['DeletedByRoomba'] += 1
        except ex.TooShort:
            self.exceptions['TooShort'] += 1
        except ex.Stale:
            self.exceptions['Stale'] += 1
        except ex.Boring:
            self.exceptions['Boring'] += 1
        except:
            self.exceptions['Other'] += 1
            logger.log.info(traceback.format_exc())
        return anomalous
예제 #3
0
    def test_run_selected_algorithm_runs_novel_algorithm(self, timeMock, 
            algorithmsListMock, consensusMock):
        """
        Assert that a user can add their own custom algorithm.

        This mocks out settings.ALGORITHMS and settings.CONSENSUS to use only a
        single custom-defined function (alwaysTrue)
        """
        algorithmsListMock.__iter__.return_value = ['alwaysTrue']
        consensusMock=1
        timeMock.return_value, timeseries = self.data(time.time())
        
        alwaysTrue = Mock(return_value=True)
        with patch.dict(algorithms.__dict__, {'alwaysTrue': alwaysTrue}):
            result, ensemble, tail_avg = algorithms.run_selected_algorithm(timeseries)
        
        alwaysTrue.assert_called_with(timeseries)
        self.assertTrue(result)
        self.assertEqual(ensemble, [True])
        self.assertEqual(tail_avg, 334)
예제 #4
0
파일: analyzer.py 프로젝트: ozzie00/skyline
    def spin_process(self, i, unique_metrics):
        """
        Assign a bunch of metrics for a process to analyze.
        """
        # Discover assigned metrics
        keys_per_processor = int(
            ceil(
                float(len(unique_metrics)) /
                float(settings.ANALYZER_PROCESSES)))
        if i == settings.ANALYZER_PROCESSES:
            assigned_max = len(unique_metrics)
        else:
            assigned_max = i * keys_per_processor
        assigned_min = assigned_max - keys_per_processor
        assigned_keys = range(assigned_min, assigned_max)

        # Compile assigned metrics
        assigned_metrics = [unique_metrics[index] for index in assigned_keys]

        # Check if this process is unnecessary
        if len(assigned_metrics) == 0:
            return

        # Multi get series
        raw_assigned = self.redis_conn.mget(assigned_metrics)

        # Make process-specific dicts
        exceptions = defaultdict(int)
        anomaly_breakdown = defaultdict(int)

        # Distill timeseries strings into lists
        for i, metric_name in enumerate(assigned_metrics):
            self.check_if_parent_is_alive()

            try:
                raw_series = raw_assigned[i]
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)

                anomalous, ensemble, datapoint = run_selected_algorithm(
                    timeseries)

                # If it's anomalous, add it to list
                if anomalous:
                    metric = [datapoint, metric_name]
                    self.anomalous_metrics.append(metric)

                    # Get the anomaly breakdown - who returned True?
                    for index, value in enumerate(ensemble):
                        if value:
                            algorithm = settings.ALGORITHMS[index]
                            anomaly_breakdown[algorithm] += 1

            # It could have been deleted by the Roomba
            except AttributeError:
                exceptions['DeletedByRoomba'] += 1
            except TooShort:
                exceptions['TooShort'] += 1
            except Stale:
                exceptions['Stale'] += 1
            except Incomplete:
                exceptions['Incomplete'] += 1
            except Boring:
                exceptions['Boring'] += 1
            except:
                exceptions['Other'] += 1
                logger.info(traceback.format_exc())

        # Collate process-specific dicts to main dicts
        with self.lock:
            for key, value in anomaly_breakdown.items():
                if key not in self.anomaly_breakdown:
                    self.anomaly_breakdown[key] = value
                else:
                    self.anomaly_breakdown[key] += value

            for key, value in exceptions.items():
                if key not in self.exceptions:
                    self.exceptions[key] = value
                else:
                    self.exceptions[key] += value
예제 #5
0
파일: analyzer.py 프로젝트: blak3r2/skyline
    def spin_process(self, i, unique_metrics):
        """
        Assign a bunch of metrics for a process to analyze.

        Multiple get the assigned_metrics to the process from Redis.

        For each metric:

        - unpack the `raw_timeseries` for the metric.
        - Analyse each timeseries against `ALGORITHMS` to determine if it is
          anomalous.
        - If anomalous add it to the :obj:`self.anomalous_metrics` list
        - Add what algorithms triggered to the :obj:`self.anomaly_breakdown_q`
          queue
        - If :mod:`settings.ENABLE_CRUCIBLE` is ``True``:

          - Add a crucible data file with the details about the timeseries and
            anomaly.
          - Write the timeseries to a json file for crucible.

        Add keys and values to the queue so the parent process can collate for:\n
        * :py:obj:`self.anomaly_breakdown_q`
        * :py:obj:`self.exceptions_q`
        """

        spin_start = time()
        logger.info('spin_process started')

        # Discover assigned metrics
        keys_per_processor = int(ceil(float(len(unique_metrics)) / float(settings.ANALYZER_PROCESSES)))
        if i == settings.ANALYZER_PROCESSES:
            assigned_max = len(unique_metrics)
        else:
            assigned_max = min(len(unique_metrics), i * keys_per_processor)
        # Fix analyzer worker metric assignment #94
        # https://github.com/etsy/skyline/pull/94 @languitar:worker-fix
        assigned_min = (i - 1) * keys_per_processor
        assigned_keys = range(assigned_min, assigned_max)

        # Compile assigned metrics
        assigned_metrics = [unique_metrics[index] for index in assigned_keys]

        # Check if this process is unnecessary
        if len(assigned_metrics) == 0:
            return

        # Multi get series
        raw_assigned = self.redis_conn.mget(assigned_metrics)

        # Make process-specific dicts
        exceptions = defaultdict(int)
        anomaly_breakdown = defaultdict(int)

        # Distill timeseries strings into lists
        for i, metric_name in enumerate(assigned_metrics):
            self.check_if_parent_is_alive()

            try:
                raw_series = raw_assigned[i]
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)

                anomalous, ensemble, datapoint = run_selected_algorithm(timeseries, metric_name)

                # If it's anomalous, add it to list
                if anomalous:
                    base_name = metric_name.replace(settings.FULL_NAMESPACE, '', 1)
                    metric = [datapoint, base_name]
                    self.anomalous_metrics.append(metric)

                    # Get the anomaly breakdown - who returned True?
                    triggered_algorithms = []
                    for index, value in enumerate(ensemble):
                        if value:
                            algorithm = settings.ALGORITHMS[index]
                            anomaly_breakdown[algorithm] += 1
                            triggered_algorithms.append(algorithm)

                    # If Crucible or Panorama are enabled determine details
                    determine_anomaly_details = False
                    if settings.ENABLE_CRUCIBLE and settings.ANALYZER_CRUCIBLE_ENABLED:
                        determine_anomaly_details = True
                    if settings.PANORAMA_ENABLED:
                        determine_anomaly_details = True

                    if determine_anomaly_details:
                        metric_timestamp = str(int(timeseries[-1][0]))
                        from_timestamp = str(int(timeseries[1][0]))
                        timeseries_dir = base_name.replace('.', '/')

                    # If Panorama is enabled - create a Panorama check
                    if settings.PANORAMA_ENABLED:
                        if not os.path.exists(settings.PANORAMA_CHECK_PATH):
                            if python_version == 2:
                                mode_arg = int('0755')
                            if python_version == 3:
                                mode_arg = mode=0o755
                            os.makedirs(settings.PANORAMA_CHECK_PATH, mode_arg)

                        # Note:
                        # The values are enclosed is single quoted intentionally
                        # as the imp.load_source used results in a shift in the
                        # decimal position when double quoted, e.g.
                        # value = "5622.0" gets imported as
                        # 2016-03-02 12:53:26 :: 28569 :: metric variable - value - 562.2
                        # single quoting results in the desired,
                        # 2016-03-02 13:16:17 :: 1515 :: metric variable - value - 5622.0
                        added_at = str(int(time()))
                        source = 'graphite'
                        panaroma_anomaly_data = 'metric = \'%s\'\n' \
                                                'value = \'%s\'\n' \
                                                'from_timestamp = \'%s\'\n' \
                                                'metric_timestamp = \'%s\'\n' \
                                                'algorithms = %s\n' \
                                                'triggered_algorithms = %s\n' \
                                                'app = \'%s\'\n' \
                                                'source = \'%s\'\n' \
                                                'added_by = \'%s\'\n' \
                                                'added_at = \'%s\'\n' \
                            % (base_name, str(datapoint), from_timestamp,
                               metric_timestamp, str(settings.ALGORITHMS),
                               triggered_algorithms, skyline_app, source,
                               this_host, added_at)

                        # Create an anomaly file with details about the anomaly
                        panaroma_anomaly_file = '%s/%s.%s.txt' % (
                            settings.PANORAMA_CHECK_PATH, added_at,
                            base_name)
                        try:
                            write_data_to_file(
                                skyline_app, panaroma_anomaly_file, 'w',
                                panaroma_anomaly_data)
                            logger.info('added panorama anomaly file :: %s' % (panaroma_anomaly_file))
                        except:
                            logger.error('error :: failed to add panorama anomaly file :: %s' % (panaroma_anomaly_file))
                            logger.info(traceback.format_exc())

                    # If Crucible is enabled - save timeseries and create a
                    # Crucible check
                    if settings.ENABLE_CRUCIBLE and settings.ANALYZER_CRUCIBLE_ENABLED:
                        crucible_anomaly_dir = settings.CRUCIBLE_DATA_FOLDER + '/' + timeseries_dir + '/' + metric_timestamp
                        if not os.path.exists(crucible_anomaly_dir):
                            if python_version == 2:
                                mode_arg = int('0755')
                            if python_version == 3:
                                mode_arg = mode=0o755
                            os.makedirs(crucible_anomaly_dir, mode_arg)

                        # Note:
                        # The values are enclosed is single quoted intentionally
                        # as the imp.load_source used in crucible results in a
                        # shift in the decimal position when double quoted, e.g.
                        # value = "5622.0" gets imported as
                        # 2016-03-02 12:53:26 :: 28569 :: metric variable - value - 562.2
                        # single quoting results in the desired,
                        # 2016-03-02 13:16:17 :: 1515 :: metric variable - value - 5622.0

                        crucible_anomaly_data = 'metric = \'%s\'\n' \
                                                'value = \'%s\'\n' \
                                                'from_timestamp = \'%s\'\n' \
                                                'metric_timestamp = \'%s\'\n' \
                                                'algorithms = %s\n' \
                                                'triggered_algorithms = %s\n' \
                                                'anomaly_dir = \'%s\'\n' \
                                                'graphite_metric = True\n' \
                                                'run_crucible_tests = False\n' \
                                                'added_by = \'%s\'\n' \
                                                'added_at = \'%s\'\n' \
                            % (base_name, str(datapoint), from_timestamp,
                               metric_timestamp, str(settings.ALGORITHMS),
                               triggered_algorithms, crucible_anomaly_dir,
                               skyline_app, metric_timestamp)

                        # Create an anomaly file with details about the anomaly
                        crucible_anomaly_file = '%s/%s.txt' % (crucible_anomaly_dir, base_name)
                        try:
                            write_data_to_file(
                                skyline_app, crucible_anomaly_file, 'w',
                                crucible_anomaly_data)
                            logger.info('added crucible anomaly file :: %s' % (crucible_anomaly_file))
                        except:
                            logger.error('error :: failed to add crucible anomaly file :: %s' % (crucible_anomaly_file))
                            logger.info(traceback.format_exc())

                        # Create timeseries json file with the timeseries
                        json_file = '%s/%s.json' % (crucible_anomaly_dir, base_name)
                        timeseries_json = str(timeseries).replace('[', '(').replace(']', ')')
                        try:
                            write_data_to_file(skyline_app, json_file, 'w', timeseries_json)
                            logger.info('added crucible timeseries file :: %s' % (json_file))
                        except:
                            logger.error('error :: failed to add crucible timeseries file :: %s' % (json_file))
                            logger.info(traceback.format_exc())

                        # Create a crucible check file
                        crucible_check_file = '%s/%s.%s.txt' % (settings.CRUCIBLE_CHECK_PATH, metric_timestamp, base_name)
                        try:
                            write_data_to_file(
                                skyline_app, crucible_check_file, 'w',
                                crucible_anomaly_data)
                            logger.info('added crucible check :: %s,%s' % (base_name, metric_timestamp))
                        except:
                            logger.error('error :: failed to add crucible check file :: %s' % (crucible_check_file))
                            logger.info(traceback.format_exc())

            # It could have been deleted by the Roomba
            except TypeError:
                exceptions['DeletedByRoomba'] += 1
            except TooShort:
                exceptions['TooShort'] += 1
            except Stale:
                exceptions['Stale'] += 1
            except Boring:
                exceptions['Boring'] += 1
            except:
                exceptions['Other'] += 1
                logger.info(traceback.format_exc())

        # Add values to the queue so the parent process can collate
        for key, value in anomaly_breakdown.items():
            self.anomaly_breakdown_q.put((key, value))

        for key, value in exceptions.items():
            self.exceptions_q.put((key, value))

        spin_end = time() - spin_start
        logger.info('spin_process took %.2f seconds' % spin_end)
예제 #6
0
    def spin_process(self, i, unique_metrics):
        """
        Assign a bunch of metrics for a process to analyze.
        """
        # Discover assigned metrics
        keys_per_processor = int(ceil(float(len(unique_metrics)) / float(settings.ANALYZER_PROCESSES)))
        if i == settings.ANALYZER_PROCESSES:
            assigned_max = len(unique_metrics)
        else:
            assigned_max = i * keys_per_processor
        assigned_min = assigned_max - keys_per_processor
        assigned_keys = range(assigned_min, assigned_max)

        # Compile assigned metrics
        assigned_metrics = [unique_metrics[index] for index in assigned_keys]

        # Check if this process is unnecessary
        if len(assigned_metrics) == 0:
            return

        # Multi get series
        raw_assigned = self.redis_conn.mget(assigned_metrics)

        # Make process-specific dicts
        exceptions = defaultdict(int)
        anomaly_breakdown = defaultdict(int)

        # Distill timeseries strings into lists
        for i, metric_name in enumerate(assigned_metrics):
            self.check_if_parent_is_alive()

            try:
                raw_series = raw_assigned[i]
                unpacker = Unpacker(use_list = False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)

                anomalous, ensemble, datapoint = run_selected_algorithm(timeseries)

                # If it's anomalous, add it to list
                if anomalous:
                    metric = [datapoint, metric_name]
                    self.anomalous_metrics.append(metric)

                    # Get the anomaly breakdown - who returned True?
                    for index, value in enumerate(ensemble):
                        if value:
                            algorithm = settings.ALGORITHMS[index]
                            anomaly_breakdown[algorithm] += 1

            # It could have been deleted by the Roomba
            except AttributeError:
                exceptions['DeletedByRoomba'] += 1
            except TooShort:
                exceptions['TooShort'] += 1
            except Stale:
                exceptions['Stale'] += 1
            except Incomplete:
                exceptions['Incomplete'] += 1
            except Boring:
                exceptions['Boring'] += 1
            except:
                exceptions['Other'] += 1
                logger.info(traceback.format_exc())

        # Collate process-specific dicts to main dicts
        with self.lock:
            for key, value in anomaly_breakdown.items():
                if key not in self.anomaly_breakdown:
                    self.anomaly_breakdown[key] = value
                else:
        	        self.anomaly_breakdown[key] += value

            for key, value in exceptions.items():
                if key not in self.exceptions:
                    self.exceptions[key] = value
                else:
        	        self.exceptions[key] += value
예제 #7
0
파일: boundary.py 프로젝트: gaos1/skyline
    def spin_process(self, i, boundary_metrics):
        """
        Assign a bunch of metrics for a process to analyze.
        """
        # Determine assigned metrics
        bp = settings.BOUNDARY_PROCESSES
        bm_range = len(boundary_metrics)
        keys_per_processor = int(ceil(float(bm_range) / float(bp)))
        if i == settings.BOUNDARY_PROCESSES:
            assigned_max = len(boundary_metrics)
        else:
            # This is a skyine bug, the original skyline code uses 1 as the
            # beginning position of the index, python indices begin with 0
            # assigned_max = len(boundary_metrics)
            # This closes the etsy/skyline pull request opened by @languitar on 17 Jun 2014
            # https://github.com/etsy/skyline/pull/94 Fix analyzer worker metric assignment
            assigned_max = min(len(boundary_metrics), i * keys_per_processor)
        assigned_min = (i - 1) * keys_per_processor
        assigned_keys = range(assigned_min, assigned_max)

        # Compile assigned metrics
        assigned_metrics_and_algos = [boundary_metrics[index] for index in assigned_keys]
        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug - printing assigned_metrics_and_algos')
            for assigned_metric_and_algo in assigned_metrics_and_algos:
                logger.info('debug - assigned_metric_and_algo - %s' % str(assigned_metric_and_algo))

        # Compile assigned metrics
        assigned_metrics = []
        for i in assigned_metrics_and_algos:
            assigned_metrics.append(i[0])

        # unique unhashed things
        def unique_noHash(seq):
            seen = set()
            return [x for x in seq if str(x) not in seen and not seen.add(str(x))]

        unique_assigned_metrics = unique_noHash(assigned_metrics)

        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug - unique_assigned_metrics - %s' % str(unique_assigned_metrics))
            logger.info('debug - printing unique_assigned_metrics:')
            for unique_assigned_metric in unique_assigned_metrics:
                logger.info('debug - unique_assigned_metric - %s' % str(unique_assigned_metric))

        # Check if this process is unnecessary
        if len(unique_assigned_metrics) == 0:
            return

        # Multi get series
        try:
            raw_assigned = self.redis_conn.mget(unique_assigned_metrics)
        except:
            logger.error("failed to mget assigned_metrics from redis")
            return

        # Make process-specific dicts
        exceptions = defaultdict(int)
        anomaly_breakdown = defaultdict(int)

        # Reset boundary_algortims
        all_boundary_algorithms = []
        for metric in BOUNDARY_METRICS:
            all_boundary_algorithms.append(metric[1])

        # The unique algorithms that are being used
        boundary_algorithms = unique_noHash(all_boundary_algorithms)
        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug - boundary_algorithms - %s' % str(boundary_algorithms))

        discover_run_metrics = []

        # Distill metrics into a run list
        for i, metric_name, in enumerate(unique_assigned_metrics):
            self.check_if_parent_is_alive()

            try:
                if ENABLE_BOUNDARY_DEBUG:
                    logger.info('debug - unpacking timeseries for %s - %s' % (metric_name, str(i)))
                raw_series = raw_assigned[i]
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)
            except Exception as e:
                exceptions['Other'] += 1
                logger.error("redis data error: " + traceback.format_exc())
                logger.error("error: %e" % e)

            base_name = metric_name.replace(FULL_NAMESPACE, '', 1)

            # Determine the metrics BOUNDARY_METRICS metric tuple settings
            for metrick in BOUNDARY_METRICS:
                CHECK_MATCH_PATTERN = metrick[0]
                check_match_pattern = re.compile(CHECK_MATCH_PATTERN)
                pattern_match = check_match_pattern.match(base_name)
                metric_pattern_matched = False
                if pattern_match:
                    metric_pattern_matched = True
                    algo_pattern_matched = False
                    for algo in boundary_algorithms:
                        for metric in BOUNDARY_METRICS:
                            CHECK_MATCH_PATTERN = metric[0]
                            check_match_pattern = re.compile(CHECK_MATCH_PATTERN)
                            pattern_match = check_match_pattern.match(base_name)
                            if pattern_match:
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info("debug - metric and algo pattern MATCHED - " + metric[0] + " | " + base_name + " | " + str(metric[1]))
                                metric_expiration_time = False
                                metric_min_average = False
                                metric_min_average_seconds = False
                                metric_trigger = False
                                algorithm = False
                                algo_pattern_matched = True
                                algorithm = metric[1]
                                try:
                                    if metric[2]:
                                        metric_expiration_time = metric[2]
                                except:
                                    metric_expiration_time = False
                                try:
                                    if metric[3]:
                                        metric_min_average = metric[3]
                                except:
                                    metric_min_average = False
                                try:
                                    if metric[4]:
                                        metric_min_average_seconds = metric[4]
                                except:
                                    metric_min_average_seconds = 1200
                                try:
                                    if metric[5]:
                                        metric_trigger = metric[5]
                                except:
                                    metric_trigger = False
                                try:
                                    if metric[6]:
                                        alert_threshold = metric[6]
                                except:
                                    alert_threshold = False
                                try:
                                    if metric[7]:
                                        metric_alerters = metric[7]
                                except:
                                    metric_alerters = False
                            if metric_pattern_matched and algo_pattern_matched:
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info('debug - added metric - %s, %s, %s, %s, %s, %s, %s, %s, %s' % (str(i), metric_name, str(metric_expiration_time), str(metric_min_average), str(metric_min_average_seconds), str(metric_trigger), str(alert_threshold), metric_alerters, algorithm))
                                discover_run_metrics.append([i, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm])

        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug - printing discover_run_metrics')
            for discover_run_metric in discover_run_metrics:
                logger.info('debug - discover_run_metrics - %s' % str(discover_run_metric))
            logger.info('debug - build unique boundary metrics to analyze')

        # Determine the unique set of metrics to run
        run_metrics = unique_noHash(discover_run_metrics)

        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug - printing run_metrics')
            for run_metric in run_metrics:
                logger.info('debug - run_metrics - %s' % str(run_metric))

        # Distill timeseries strings and submit to run_selected_algorithm
        for metric_and_algo in run_metrics:
            self.check_if_parent_is_alive()

            try:
                raw_assigned_id = metric_and_algo[0]
                metric_name = metric_and_algo[1]
                base_name = metric_name.replace(FULL_NAMESPACE, '', 1)
                metric_expiration_time = metric_and_algo[2]
                metric_min_average = metric_and_algo[3]
                metric_min_average_seconds = metric_and_algo[4]
                metric_trigger = metric_and_algo[5]
                alert_threshold = metric_and_algo[6]
                metric_alerters = metric_and_algo[7]
                algorithm = metric_and_algo[8]

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info('debug - unpacking timeseries for %s - %s' % (metric_name, str(raw_assigned_id)))

                raw_series = raw_assigned[metric_and_algo[0]]
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info('debug - unpacked OK - %s - %s' % (metric_name, str(raw_assigned_id)))

                autoaggregate = False
                autoaggregate_value = 0

                # Determine if the namespace is to be aggregated
                if BOUNDARY_AUTOAGGRERATION:
                    for autoaggregate_metric in BOUNDARY_AUTOAGGRERATION_METRICS:
                        autoaggregate = False
                        autoaggregate_value = 0
                        CHECK_MATCH_PATTERN = autoaggregate_metric[0]
                        base_name = metric_name.replace(FULL_NAMESPACE, '', 1)
                        check_match_pattern = re.compile(CHECK_MATCH_PATTERN)
                        pattern_match = check_match_pattern.match(base_name)
                        if pattern_match:
                            autoaggregate = True
                            autoaggregate_value = autoaggregate_metric[1]

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info('debug - BOUNDARY_AUTOAGGRERATION passed - %s - %s' % (metric_name, str(autoaggregate)))

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info(
                        'debug - analysing - %s, %s, %s, %s, %s, %s, %s, %s, %s, %s' % (
                            metric_name, str(metric_expiration_time),
                            str(metric_min_average),
                            str(metric_min_average_seconds),
                            str(metric_trigger), str(alert_threshold),
                            metric_alerters, autoaggregate,
                            autoaggregate_value, algorithm)
                    )
                    # Dump the the timeseries data to a file
                    timeseries_dump_dir = "/tmp/skyline/boundary/" + algorithm
                    self.mkdir_p(timeseries_dump_dir)
                    timeseries_dump_file = timeseries_dump_dir + "/" + metric_name + ".json"
                    with open(timeseries_dump_file, 'w+') as f:
                        f.write(str(timeseries))
                        f.close()

                # Check if a metric has its own unique BOUNDARY_METRICS alert
                # tuple, this allows us to paint an entire metric namespace with
                # the same brush AND paint a unique metric or namespace with a
                # different brush or scapel
                has_unique_tuple = False
                run_tupple = False
                boundary_metric_tuple = (base_name, algorithm, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters)
                wildcard_namespace = True
                for metric_tuple in BOUNDARY_METRICS:
                    if not has_unique_tuple:
                        CHECK_MATCH_PATTERN = metric_tuple[0]
                        check_match_pattern = re.compile(CHECK_MATCH_PATTERN)
                        pattern_match = check_match_pattern.match(base_name)
                        if pattern_match:
                            if metric_tuple[0] == base_name:
                                wildcard_namespace = False
                            if not has_unique_tuple:
                                if boundary_metric_tuple == metric_tuple:
                                    has_unique_tuple = True
                                    run_tupple = True
                                    if ENABLE_BOUNDARY_DEBUG:
                                        logger.info('unique_tuple:')
                                        logger.info('boundary_metric_tuple: %s' % str(boundary_metric_tuple))
                                        logger.info('metric_tuple: %s' % str(metric_tuple))

                if not has_unique_tuple:
                    if wildcard_namespace:
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info('wildcard_namespace:')
                            logger.info('boundary_metric_tuple: %s' % str(boundary_metric_tuple))
                        run_tupple = True
                    else:
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info('wildcard_namespace: BUT WOULD NOT RUN')
                            logger.info('boundary_metric_tuple: %s' % str(boundary_metric_tuple))

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info('WOULD RUN run_selected_algorithm = %s' % run_tupple)

                if run_tupple:
                    # Submit the timeseries and settings to run_selected_algorithm
                    anomalous, ensemble, datapoint, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm = run_selected_algorithm(
                        timeseries, metric_name,
                        metric_expiration_time,
                        metric_min_average,
                        metric_min_average_seconds,
                        metric_trigger,
                        alert_threshold,
                        metric_alerters,
                        autoaggregate,
                        autoaggregate_value,
                        algorithm
                    )
                    if ENABLE_BOUNDARY_DEBUG:
                        logger.info('debug - analysed - %s' % (metric_name))
                else:
                    anomalous = False
                    if ENABLE_BOUNDARY_DEBUG:
                        logger.info('debug - more unique metric tuple not analysed - %s' % (metric_name))

                # If it's anomalous, add it to list
                if anomalous:
                    anomalous_metric = [datapoint, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm]
                    self.anomalous_metrics.append(anomalous_metric)
                    # Get the anomaly breakdown - who returned True?
                    for index, value in enumerate(ensemble):
                        if value:
                            anomaly_breakdown[algorithm] += 1

            # It could have been deleted by the Roomba
            except TypeError:
                exceptions['DeletedByRoomba'] += 1
            except TooShort:
                exceptions['TooShort'] += 1
            except Stale:
                exceptions['Stale'] += 1
            except Boring:
                exceptions['Boring'] += 1
            except:
                exceptions['Other'] += 1
                logger.info("exceptions['Other'] traceback follows:")
                logger.info(traceback.format_exc())

        # Add values to the queue so the parent process can collate
        for key, value in anomaly_breakdown.items():
            self.anomaly_breakdown_q.put((key, value))

        for key, value in exceptions.items():
            self.exceptions_q.put((key, value))
예제 #8
0
 def test_run_selected_algorithm(self, timeMock):
     timeMock.return_value, timeseries = self.data(time.time())
     result, ensemble, tail_avg = algorithms.run_selected_algorithm(timeseries)
     self.assertTrue(result)
     self.assertTrue(len(filter(None, ensemble)) >= settings.CONSENSUS)
     self.assertEqual(tail_avg, 334)
예제 #9
0
파일: __main__.py 프로젝트: redox/skyline
import sys
import json
import algorithms
from algorithm_exceptions import *

if len(sys.argv) != 2:
  print(("usage: %s /path/to/serie.json" % sys.argv[0]), file=sys.stderr)
  sys.exit(1)

with open(sys.argv[1]) as data_file: 
  data = json.load(data_file)

if data['serie'] is None:
  print(("usage: %s /path/to/serie.json" % sys.argv[0]), file=sys.stderr)
  sys.exit(2)

try:
  sys.exit(1 if algorithms.run_selected_algorithm(data['serie']) else 0)
except TooShort:
  print("Too short")
  sys.exit(0)
except Boring:
  print("Boring")
  sys.exit(0)
except Stale:
  print("Stale")
  sys.exit(0)
예제 #10
0
    def spin_process(self, i, unique_metrics):
        """
        Assign a bunch of metrics for a process to analyze.
        """
        logger.info("metric index : "+str(i))
        logger.info("metric name : " + str(unique_metrics))
        # Discover assigned metrics
        keys_per_processor = int(ceil(float(len(unique_metrics)) / float(settings.ANALYZER_PROCESSES)))
        if i == settings.ANALYZER_PROCESSES:
            assigned_max = len(unique_metrics)
        else:
            assigned_max = i * keys_per_processor
        assigned_min = assigned_max - keys_per_processor
        assigned_keys = range(assigned_min, assigned_max)

        # Compile assigned metrics
        assigned_metrics = [unique_metrics[index] for index in assigned_keys]

        # Check if this process is unnecessary
        if len(assigned_metrics) == 0:
            logger.info("unnecessary process ,return direct")
            return

        # Multi get series
        raw_assigned = self.redis_conn.mget(assigned_metrics)

        # Make process-specific dicts
        exceptions = defaultdict(int)
        anomaly_breakdown = defaultdict(int)
        logger.info("[data length] assigned_metrics len : " + str(len(assigned_metrics)))

        # Distill timeseries strings into lists
        for i, metric_name in enumerate(assigned_metrics):
            self.check_if_parent_is_alive()

            try:
                raw_series = raw_assigned[i]
                unpacker = Unpacker(use_list = False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)
                logger.info("start run selected algorithm ~")
                anomalous, ensemble, datapoint = run_selected_algorithm(timeseries, metric_name)
                #logger.info(anomalous,ensemble,datapoint)
                # If it's anomalous, add it to list
                if anomalous:
                    base_name = metric_name.replace(settings.FULL_NAMESPACE, '', 1)
                    metric = [datapoint, base_name]
                    self.anomalous_metrics.append(metric)

                    # Get the anomaly breakdown - who returned True?
                    for index, value in enumerate(ensemble):
                        if value:
                            algorithm = settings.ALGORITHMS[index]
                            anomaly_breakdown[algorithm] += 1

            # It could have been deleted by the Roomba
            except TypeError:
                exceptions['DeletedByRoomba'] += 1
            except TooShort:
                exceptions['TooShort'] += 1
            except Stale:
                exceptions['Stale'] += 1
            except Boring:
                exceptions['Boring'] += 1
            except:
                exceptions['Other'] += 1
                logger.info(traceback.format_exc())

        # Add values to the queue so the parent process can collate
        for key, value in anomaly_breakdown.items():
            self.anomaly_breakdown_q.put((key, value))

        for key, value in exceptions.items():
            self.exceptions_q.put((key, value))
예제 #11
0
 def test_run_selected_algorithm(self, timeMock):
     timeMock.return_value, timeseries = self.data(time())
     result, ensemble, datapoint = algorithms.run_selected_algorithm(timeseries, "test.metric")
     self.assertTrue(result)
     self.assertTrue(len(filter(None, ensemble)) >= settings.CONSENSUS)
     self.assertEqual(datapoint, 1000)
예제 #12
0
파일: boundary.py 프로젝트: gaos1/skyline
    def spin_process(self, i, boundary_metrics):
        """
        Assign a bunch of metrics for a process to analyze.
        """
        # Determine assigned metrics
        bp = settings.BOUNDARY_PROCESSES
        bm_range = len(boundary_metrics)
        keys_per_processor = int(ceil(float(bm_range) / float(bp)))
        if i == settings.BOUNDARY_PROCESSES:
            assigned_max = len(boundary_metrics)
        else:
            # This is a skyine bug, the original skyline code uses 1 as the
            # beginning position of the index, python indices begin with 0
            # assigned_max = len(boundary_metrics)
            # This closes the etsy/skyline pull request opened by @languitar on 17 Jun 2014
            # https://github.com/etsy/skyline/pull/94 Fix analyzer worker metric assignment
            assigned_max = min(len(boundary_metrics), i * keys_per_processor)
        assigned_min = (i - 1) * keys_per_processor
        assigned_keys = range(assigned_min, assigned_max)

        # Compile assigned metrics
        assigned_metrics_and_algos = [
            boundary_metrics[index] for index in assigned_keys
        ]
        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug - printing assigned_metrics_and_algos')
            for assigned_metric_and_algo in assigned_metrics_and_algos:
                logger.info('debug - assigned_metric_and_algo - %s' %
                            str(assigned_metric_and_algo))

        # Compile assigned metrics
        assigned_metrics = []
        for i in assigned_metrics_and_algos:
            assigned_metrics.append(i[0])

        # unique unhashed things
        def unique_noHash(seq):
            seen = set()
            return [
                x for x in seq if str(x) not in seen and not seen.add(str(x))
            ]

        unique_assigned_metrics = unique_noHash(assigned_metrics)

        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug - unique_assigned_metrics - %s' %
                        str(unique_assigned_metrics))
            logger.info('debug - printing unique_assigned_metrics:')
            for unique_assigned_metric in unique_assigned_metrics:
                logger.info('debug - unique_assigned_metric - %s' %
                            str(unique_assigned_metric))

        # Check if this process is unnecessary
        if len(unique_assigned_metrics) == 0:
            return

        # Multi get series
        try:
            raw_assigned = self.redis_conn.mget(unique_assigned_metrics)
        except:
            logger.error("failed to mget assigned_metrics from redis")
            return

        # Make process-specific dicts
        exceptions = defaultdict(int)
        anomaly_breakdown = defaultdict(int)

        # Reset boundary_algortims
        all_boundary_algorithms = []
        for metric in BOUNDARY_METRICS:
            all_boundary_algorithms.append(metric[1])

        # The unique algorithms that are being used
        boundary_algorithms = unique_noHash(all_boundary_algorithms)
        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug - boundary_algorithms - %s' %
                        str(boundary_algorithms))

        discover_run_metrics = []

        # Distill metrics into a run list
        for i, metric_name, in enumerate(unique_assigned_metrics):
            self.check_if_parent_is_alive()

            try:
                if ENABLE_BOUNDARY_DEBUG:
                    logger.info('debug - unpacking timeseries for %s - %s' %
                                (metric_name, str(i)))
                raw_series = raw_assigned[i]
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)
            except Exception as e:
                exceptions['Other'] += 1
                logger.error("redis data error: " + traceback.format_exc())
                logger.error("error: %e" % e)

            base_name = metric_name.replace(FULL_NAMESPACE, '', 1)

            # Determine the metrics BOUNDARY_METRICS metric tuple settings
            for metrick in BOUNDARY_METRICS:
                CHECK_MATCH_PATTERN = metrick[0]
                check_match_pattern = re.compile(CHECK_MATCH_PATTERN)
                pattern_match = check_match_pattern.match(base_name)
                metric_pattern_matched = False
                if pattern_match:
                    metric_pattern_matched = True
                    algo_pattern_matched = False
                    for algo in boundary_algorithms:
                        for metric in BOUNDARY_METRICS:
                            CHECK_MATCH_PATTERN = metric[0]
                            check_match_pattern = re.compile(
                                CHECK_MATCH_PATTERN)
                            pattern_match = check_match_pattern.match(
                                base_name)
                            if pattern_match:
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info(
                                        "debug - metric and algo pattern MATCHED - "
                                        + metric[0] + " | " + base_name +
                                        " | " + str(metric[1]))
                                metric_expiration_time = False
                                metric_min_average = False
                                metric_min_average_seconds = False
                                metric_trigger = False
                                algorithm = False
                                algo_pattern_matched = True
                                algorithm = metric[1]
                                try:
                                    if metric[2]:
                                        metric_expiration_time = metric[2]
                                except:
                                    metric_expiration_time = False
                                try:
                                    if metric[3]:
                                        metric_min_average = metric[3]
                                except:
                                    metric_min_average = False
                                try:
                                    if metric[4]:
                                        metric_min_average_seconds = metric[4]
                                except:
                                    metric_min_average_seconds = 1200
                                try:
                                    if metric[5]:
                                        metric_trigger = metric[5]
                                except:
                                    metric_trigger = False
                                try:
                                    if metric[6]:
                                        alert_threshold = metric[6]
                                except:
                                    alert_threshold = False
                                try:
                                    if metric[7]:
                                        metric_alerters = metric[7]
                                except:
                                    metric_alerters = False
                            if metric_pattern_matched and algo_pattern_matched:
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info(
                                        'debug - added metric - %s, %s, %s, %s, %s, %s, %s, %s, %s'
                                        % (str(i), metric_name,
                                           str(metric_expiration_time),
                                           str(metric_min_average),
                                           str(metric_min_average_seconds),
                                           str(metric_trigger),
                                           str(alert_threshold),
                                           metric_alerters, algorithm))
                                discover_run_metrics.append([
                                    i, metric_name, metric_expiration_time,
                                    metric_min_average,
                                    metric_min_average_seconds, metric_trigger,
                                    alert_threshold, metric_alerters, algorithm
                                ])

        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug - printing discover_run_metrics')
            for discover_run_metric in discover_run_metrics:
                logger.info('debug - discover_run_metrics - %s' %
                            str(discover_run_metric))
            logger.info('debug - build unique boundary metrics to analyze')

        # Determine the unique set of metrics to run
        run_metrics = unique_noHash(discover_run_metrics)

        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug - printing run_metrics')
            for run_metric in run_metrics:
                logger.info('debug - run_metrics - %s' % str(run_metric))

        # Distill timeseries strings and submit to run_selected_algorithm
        for metric_and_algo in run_metrics:
            self.check_if_parent_is_alive()

            try:
                raw_assigned_id = metric_and_algo[0]
                metric_name = metric_and_algo[1]
                base_name = metric_name.replace(FULL_NAMESPACE, '', 1)
                metric_expiration_time = metric_and_algo[2]
                metric_min_average = metric_and_algo[3]
                metric_min_average_seconds = metric_and_algo[4]
                metric_trigger = metric_and_algo[5]
                alert_threshold = metric_and_algo[6]
                metric_alerters = metric_and_algo[7]
                algorithm = metric_and_algo[8]

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info('debug - unpacking timeseries for %s - %s' %
                                (metric_name, str(raw_assigned_id)))

                raw_series = raw_assigned[metric_and_algo[0]]
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info('debug - unpacked OK - %s - %s' %
                                (metric_name, str(raw_assigned_id)))

                autoaggregate = False
                autoaggregate_value = 0

                # Determine if the namespace is to be aggregated
                if BOUNDARY_AUTOAGGRERATION:
                    for autoaggregate_metric in BOUNDARY_AUTOAGGRERATION_METRICS:
                        autoaggregate = False
                        autoaggregate_value = 0
                        CHECK_MATCH_PATTERN = autoaggregate_metric[0]
                        base_name = metric_name.replace(FULL_NAMESPACE, '', 1)
                        check_match_pattern = re.compile(CHECK_MATCH_PATTERN)
                        pattern_match = check_match_pattern.match(base_name)
                        if pattern_match:
                            autoaggregate = True
                            autoaggregate_value = autoaggregate_metric[1]

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info(
                        'debug - BOUNDARY_AUTOAGGRERATION passed - %s - %s' %
                        (metric_name, str(autoaggregate)))

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info(
                        'debug - analysing - %s, %s, %s, %s, %s, %s, %s, %s, %s, %s'
                        %
                        (metric_name, str(metric_expiration_time),
                         str(metric_min_average),
                         str(metric_min_average_seconds), str(metric_trigger),
                         str(alert_threshold), metric_alerters, autoaggregate,
                         autoaggregate_value, algorithm))
                    # Dump the the timeseries data to a file
                    timeseries_dump_dir = "/tmp/skyline/boundary/" + algorithm
                    self.mkdir_p(timeseries_dump_dir)
                    timeseries_dump_file = timeseries_dump_dir + "/" + metric_name + ".json"
                    with open(timeseries_dump_file, 'w+') as f:
                        f.write(str(timeseries))
                        f.close()

                # Check if a metric has its own unique BOUNDARY_METRICS alert
                # tuple, this allows us to paint an entire metric namespace with
                # the same brush AND paint a unique metric or namespace with a
                # different brush or scapel
                has_unique_tuple = False
                run_tupple = False
                boundary_metric_tuple = (base_name, algorithm,
                                         metric_expiration_time,
                                         metric_min_average,
                                         metric_min_average_seconds,
                                         metric_trigger, alert_threshold,
                                         metric_alerters)
                wildcard_namespace = True
                for metric_tuple in BOUNDARY_METRICS:
                    if not has_unique_tuple:
                        CHECK_MATCH_PATTERN = metric_tuple[0]
                        check_match_pattern = re.compile(CHECK_MATCH_PATTERN)
                        pattern_match = check_match_pattern.match(base_name)
                        if pattern_match:
                            if metric_tuple[0] == base_name:
                                wildcard_namespace = False
                            if not has_unique_tuple:
                                if boundary_metric_tuple == metric_tuple:
                                    has_unique_tuple = True
                                    run_tupple = True
                                    if ENABLE_BOUNDARY_DEBUG:
                                        logger.info('unique_tuple:')
                                        logger.info(
                                            'boundary_metric_tuple: %s' %
                                            str(boundary_metric_tuple))
                                        logger.info('metric_tuple: %s' %
                                                    str(metric_tuple))

                if not has_unique_tuple:
                    if wildcard_namespace:
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info('wildcard_namespace:')
                            logger.info('boundary_metric_tuple: %s' %
                                        str(boundary_metric_tuple))
                        run_tupple = True
                    else:
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info(
                                'wildcard_namespace: BUT WOULD NOT RUN')
                            logger.info('boundary_metric_tuple: %s' %
                                        str(boundary_metric_tuple))

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info('WOULD RUN run_selected_algorithm = %s' %
                                run_tupple)

                if run_tupple:
                    # Submit the timeseries and settings to run_selected_algorithm
                    anomalous, ensemble, datapoint, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm = run_selected_algorithm(
                        timeseries, metric_name, metric_expiration_time,
                        metric_min_average, metric_min_average_seconds,
                        metric_trigger, alert_threshold, metric_alerters,
                        autoaggregate, autoaggregate_value, algorithm)
                    if ENABLE_BOUNDARY_DEBUG:
                        logger.info('debug - analysed - %s' % (metric_name))
                else:
                    anomalous = False
                    if ENABLE_BOUNDARY_DEBUG:
                        logger.info(
                            'debug - more unique metric tuple not analysed - %s'
                            % (metric_name))

                # If it's anomalous, add it to list
                if anomalous:
                    anomalous_metric = [
                        datapoint, metric_name, metric_expiration_time,
                        metric_min_average, metric_min_average_seconds,
                        metric_trigger, alert_threshold, metric_alerters,
                        algorithm
                    ]
                    self.anomalous_metrics.append(anomalous_metric)
                    # Get the anomaly breakdown - who returned True?
                    for index, value in enumerate(ensemble):
                        if value:
                            anomaly_breakdown[algorithm] += 1

            # It could have been deleted by the Roomba
            except TypeError:
                exceptions['DeletedByRoomba'] += 1
            except TooShort:
                exceptions['TooShort'] += 1
            except Stale:
                exceptions['Stale'] += 1
            except Boring:
                exceptions['Boring'] += 1
            except:
                exceptions['Other'] += 1
                logger.info("exceptions['Other'] traceback follows:")
                logger.info(traceback.format_exc())

        # Add values to the queue so the parent process can collate
        for key, value in anomaly_breakdown.items():
            self.anomaly_breakdown_q.put((key, value))

        for key, value in exceptions.items():
            self.exceptions_q.put((key, value))
예제 #13
0
파일: mirage.py 프로젝트: gaos1/skyline
    def spin_process(self, i, run_timestamp):
        """
        Assign a metric for a process to analyze.
        """

        # Discover metric to analyze
        metric_var_files = [
            f for f in listdir(settings.MIRAGE_CHECK_PATH)
            if isfile(join(settings.MIRAGE_CHECK_PATH, f))
        ]

        # Check if this process is unnecessary
        if len(metric_var_files) == 0:
            return

        metric_var_files_sorted = sorted(metric_var_files)
        metric_check_file = settings.MIRAGE_CHECK_PATH + "/" + metric_var_files_sorted[
            0]

        # Load metric variables
        self.load_metric_vars(metric_check_file)

        # Test metric variables
        if len(metric_vars.metric) == 0:
            return
        else:
            metric = metric_vars.metric
            metric_name = ['metric_name', metric_vars.metric]
            self.metric_variables.append(metric_name)
        if len(metric_vars.value) == 0:
            return
        else:
            metric_value = ['metric_value', metric_vars.value]
            self.metric_variables.append(metric_value)
        if len(metric_vars.hours_to_resolve) == 0:
            return
        else:
            hours_to_resolve = [
                'hours_to_resolve', metric_vars.hours_to_resolve
            ]
            self.metric_variables.append(hours_to_resolve)
        if len(metric_vars.metric_timestamp) == 0:
            return
        else:
            metric_timestamp = [
                'metric_timestamp', metric_vars.metric_timestamp
            ]
            self.metric_variables.append(metric_timestamp)

        # Ignore any metric check with a timestamp greater than 10 minutes ago
        int_metric_timestamp = int(metric_vars.metric_timestamp)
        int_run_timestamp = int(run_timestamp)
        metric_timestamp_age = int_run_timestamp - int_metric_timestamp
        if metric_timestamp_age > settings.MIRAGE_STALE_SECONDS:
            logger.info(
                'stale check       :: %s check request is %s seconds old - discarding'
                % (metric_vars.metric, metric_timestamp_age))
            # Remove metric check file
            #            try:
            #                os.remove(metric_check_file)
            #            except OSError:
            #                pass
            #            return
            if os.path.exists(metric_check_file):
                os.remove(metric_check_file)
                logger.info('removed %s' % (metric_check_file))
            else:
                logger.info('could not remove %s' % (metric_check_file))

        # Calculate hours second order resolution to seconds
        second_order_resolution_seconds = int(
            metric_vars.hours_to_resolve) * 3600

        # Calculate graphite from and until parameters from the metric timestamp
        graphite_until = datetime.datetime.fromtimestamp(
            int(metric_vars.metric_timestamp)).strftime('%H:%M_%Y%m%d')
        int_second_order_resolution_seconds = int(
            second_order_resolution_seconds)
        second_resolution_timestamp = int_metric_timestamp - int_second_order_resolution_seconds
        graphite_from = datetime.datetime.fromtimestamp(
            int(second_resolution_timestamp)).strftime('%H:%M_%Y%m%d')

        # Remove any old json file related to the metric
        metric_json_file = settings.MIRAGE_DATA_FOLDER + "/" + metric_vars.metric + "/" + metric_vars.metric + '.json'
        try:
            os.remove(metric_json_file)
        except OSError:
            pass

        # Get data from graphite
        logger.info(
            'retrieve data     :: surfacing %s timeseries from graphite for %s seconds'
            % (metric_vars.metric, second_order_resolution_seconds))
        self.surface_graphite_metric_data(metric_vars.metric, graphite_from,
                                          graphite_until)

        # Check there is a json timeseries file to test
        if os.path.isfile(metric_json_file) != True:
            logger.error(
                'retrieve failed   :: failed to surface %s timeseries from graphite'
                % (metric_vars.metric))
            # Remove metric check file
            try:
                os.remove(metric_check_file)
            except OSError:
                pass
            return
        else:
            logger.info('retrieved data    :: for %s at %s seconds' %
                        (metric_vars.metric, second_order_resolution_seconds))

        # Make process-specific dicts
        exceptions = defaultdict(int)
        anomaly_breakdown = defaultdict(int)

        self.check_if_parent_is_alive()

        with open((metric_json_file), 'r') as f:
            timeseries = json.loads(f.read())
            logger.info('data points surfaced :: %s' % (len(timeseries)))

        try:
            logger.info('analyzing         :: %s at %s seconds' %
                        (metric_vars.metric, second_order_resolution_seconds))
            anomalous, ensemble, datapoint = run_selected_algorithm(
                timeseries, metric_vars.metric,
                second_order_resolution_seconds)

            # If it's anomalous, add it to list
            if anomalous:
                base_name = metric.replace(settings.FULL_NAMESPACE, '', 1)
                anomalous_metric = [datapoint, base_name]
                self.anomalous_metrics.append(anomalous_metric)
                logger.info('anomaly detected  :: %s with %s' %
                            (metric_vars.metric, metric_vars.value))
                # It runs so fast, this allows us to process 30 anomalies/min
                sleep(2)

                # Get the anomaly breakdown - who returned True?
                for index, value in enumerate(ensemble):
                    if value:
                        algorithm = settings.MIRAGE_ALGORITHMS[index]
                        anomaly_breakdown[algorithm] += 1
            else:
                base_name = metric.replace(settings.FULL_NAMESPACE, '', 1)
                not_anomalous_metric = [datapoint, base_name]
                self.not_anomalous_metrics.append(not_anomalous_metric)
                logger.info('not anomalous     :: %s with %s' %
                            (metric_vars.metric, metric_vars.value))

        # It could have been deleted by the Roomba
        except TypeError:
            exceptions['DeletedByRoomba'] += 1
            logger.info('exceptions        :: DeletedByRoomba')
        except TooShort:
            exceptions['TooShort'] += 1
            logger.info('exceptions        :: TooShort')
        except Stale:
            exceptions['Stale'] += 1
            logger.info('exceptions        :: Stale')
        except Boring:
            exceptions['Boring'] += 1
            logger.info('exceptions        :: Boring')
        except:
            exceptions['Other'] += 1
            logger.info('exceptions        :: Other')
            logger.info(traceback.format_exc())

        # Add values to the queue so the parent process can collate
        for key, value in anomaly_breakdown.items():
            self.mirage_anomaly_breakdown_q.put((key, value))

        for key, value in exceptions.items():
            self.mirage_exceptions_q.put((key, value))

        # Remove metric check file
        try:
            os.remove(metric_check_file)
        except OSError:
            pass
예제 #14
0
    def spin_process(self, i, unique_metrics):
        process_key = '.'.join(['skyline','analyzer', socket.gethostname(), str(i)])
        alive_key = '.'.join([process_key, 'alive'])
        self.ring.run('set', alive_key, 1)
        self.ring.run('expire', alive_key, 30)
        """
        Assign a bunch of metrics for a process to analyze.
        """
        processes = list(self.ring.run('zrange', settings.ANALYZER_PROCESS_KEY, 0, -1))
        for key in processes:
            value = self.ring.run('get', key)
            if not value:
                self.ring.run('zrem', settings.ANALYZER_PROCESS_KEY, 0, key)

        # Add current process to index and determine position
        if not self.ring.run('zscore', settings.ANALYZER_PROCESS_KEY, alive_key):
            self.ring.run('zadd', settings.ANALYZER_PROCESS_KEY, time(), alive_key)
        self.ring.run('expire', settings.ANALYZER_PROCESS_KEY, 60)
        process_position = self.ring.run('zrank', settings.ANALYZER_PROCESS_KEY, alive_key) + 1
        process_count = self.ring.run('zcard', settings.ANALYZER_PROCESS_KEY)

        # If there are less processes then we know are going to be running assume
        # the others will start
        if process_count < settings.ANALYZER_PROCESSES:
            process_count = settings.ANALYZER_PROCESSES

        # Discover assigned metrics
        keys_per_processor = int(ceil(float(len(unique_metrics)) / float(process_count)))
        if process_position == process_count:
            assigned_max = len(unique_metrics)
        else:
            assigned_max = process_position * keys_per_processor
        assigned_min = assigned_max - keys_per_processor
        assigned_keys = range(assigned_min, assigned_max)

        # Compile assigned metrics
        assigned_metrics = [unique_metrics[index] for index in assigned_keys]

        # Check if this process is unnecessary
        if len(assigned_metrics) == 0:
            return

        # Multi get series
        raw_assigned = self.ring.run('mget', assigned_metrics)

        # Make process-specific dicts
        exceptions = defaultdict(int)
        anomaly_breakdown = defaultdict(int)

        # Distill timeseries strings into lists
        for i, metric_name in enumerate(assigned_metrics):
            self.check_if_parent_is_alive()

            try:
                raw_series = raw_assigned[i]
                unpacker = Unpacker(use_list = False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)

                anomalous, ensemble, datapoint = run_selected_algorithm(timeseries, metric_name)

                # If it's anomalous, add it to list
                if anomalous:
                    base_name = metric_name.replace(settings.FULL_NAMESPACE, '', 1)
                    metric = [datapoint, base_name]
                    self.anomalous_metrics.append(metric)

                    # Get the anomaly breakdown - who returned True?
                    for index, value in enumerate(ensemble):
                        if value:
                            algorithm = settings.ALGORITHMS[index]
                            anomaly_breakdown[algorithm] += 1

            # It could have been deleted by the Roomba
            except TypeError:
                exceptions['DeletedByRoomba'] += 1
            except TooShort:
                exceptions['TooShort'] += 1
            except Stale:
                exceptions['Stale'] += 1
            except Incomplete:
                exceptions['Incomplete'] += 1
            except Boring:
                exceptions['Boring'] += 1
            except:
                exceptions['Other'] += 1
                logger.info(traceback.format_exc())

        # if anomalies detected Pack and Write anomoly data to Redis
        if len(anomalous_metrics) > 0:
            packed = Packer().pack(anomalous_metrics)
            self.ring.run('set', process_key, packed)
            # expire the key in 30s so anomalys don't show up for too long
            self.ring.run('expire', process_key, 30)
            self.ring.run('sadd', settings.ANALYZER_ANOMALY_KEY, process_key)
            # expire the key in 60s so anomalys don't show up for too long
            self.ring.run('expire', settings.ANALYZER_ANOMALY_KEY, 60)

        # Collate process-specific dicts to main dicts
        with self.lock:
            for key, value in anomaly_breakdown.items():
                if key not in self.anomaly_breakdown:
                    self.anomaly_breakdown[key] = value
                else:
        	        self.anomaly_breakdown[key] += value

            for key, value in exceptions.items():
                if key not in self.exceptions:
                    self.exceptions[key] = value
                else:
        	        self.exceptions[key] += value

        for key, value in exceptions.items():
            self.exceptions_q.put((key, value))