Ejemplo n.º 1
0
def get_correlations(
    base_name, anomaly_timestamp, anomalous_ts, assigned_metrics, raw_assigned,
        remote_assigned, anomalies):

    logger = logging.getLogger(skyline_app_logger)

    # Distill timeseries strings into lists
    start = timer()
    count = 0
    metrics_checked_for_correlation = 0
    # Sample the time series
    # @modified 20180720 - Feature #2464: luminosity_remote_data
    # Added note here - if you modify the value of 600 here, it must be
    # modified in the luminosity_remote_data function in
    # skyline/webapp/backend.py as well
    from_timestamp = anomaly_timestamp - 600
    correlated_metrics = []
    correlations = []
    no_data = False
    if not anomalous_ts:
        no_data = True
    if not assigned_metrics:
        no_data = True
    if not raw_assigned:
        no_data = True
    if not anomalies:
        no_data = True
    if no_data:
        logger.error('error :: get_correlations :: no data')
        return (correlated_metrics, correlations)

    # @added 20200428 - Feature #3510: Enable Luminosity to handle correlating namespaces only
    #                   Feature #3500: webapp - crucible_process_metrics
    #                   Feature #1448: Crucible web UI
    # Discard the check if the anomaly_timestamp is not in FULL_DURATION as it
    # will have been added via the Crucible or webapp/crucible route
    start_timestamp_of_full_duration_data = int(time() - settings.FULL_DURATION)
    if anomaly_timestamp < (start_timestamp_of_full_duration_data + 2000):
        logger.info('get_correlations :: the anomaly_timestamp is too old not correlating')
        return (correlated_metrics, correlations)

    start_local_correlations = timer()

    local_redis_metrics_checked_count = 0
    local_redis_metrics_correlations_count = 0

    logger.info('get_correlations :: the local Redis metric count is %s' % str(len(assigned_metrics)))

    # @added 20200428 - Feature #3510: Enable Luminosity to handle correlating namespaces only
    # Removed here and handled in get_assigned_metrics

    for i, metric_name in enumerate(assigned_metrics):
        count += 1
        # print(metric_name)
        # @modified 20180719 - Branch #2270: luminosity
        # Removed test limiting that was errorneously left in
        # if count > 1000:
        #     break
        correlated = None
        metric_base_name = metric_name.replace(settings.FULL_NAMESPACE, '', 1)
        if str(metric_base_name) == str(base_name):
            continue
        try:
            raw_series = raw_assigned[i]
            unpacker = Unpacker(use_list=False)
            unpacker.feed(raw_series)
            timeseries = list(unpacker)
        except:
            timeseries = []
        if not timeseries:
            # print('no time series data for %s' % base_name)
            continue

        # @added 20200507 - Feature #3532: Sort all time series
        # To ensure that there are no unordered timestamps in the time
        # series which are artefacts of the collector or carbon-relay, sort
        # all time series by timestamp before analysis.
        original_timeseries = timeseries
        if original_timeseries:
            timeseries = sort_timeseries(original_timeseries)
            del original_timeseries

        # Convert the time series if this is a known_derivative_metric
        known_derivative_metric = is_derivative_metric(skyline_app, metric_base_name)
        if known_derivative_metric:
            try:
                derivative_timeseries = nonNegativeDerivative(timeseries)
                timeseries = derivative_timeseries
            except:
                logger.error(traceback.format_exc())
                logger.error('error :: nonNegativeDerivative')

        correlate_ts = []
        for ts, value in timeseries:
            if int(ts) < from_timestamp:
                continue
            if int(ts) <= anomaly_timestamp:
                correlate_ts.append((int(ts), value))
            # @modified 20180720 - Feature #2464: luminosity_remote_data
            # Added note here - if you modify the value of 61 here, it must be
            # modified in the luminosity_remote_data function in
            # skyline/webapp/backend.py as well
            if int(ts) > (anomaly_timestamp + 61):
                break
        if not correlate_ts:
            continue

        local_redis_metrics_checked_count += 1
        anomaly_ts_dict = dict(anomalous_ts)
        correlate_ts_dict = dict(correlate_ts)

        for a in anomalies:
            try:
                # @modified 20180720 - Feature #2464: luminosity_remote_data
                # Added note here - if you modify the value of 120 here, it must be
                # modified in the luminosity_remote_data function in
                # skyline/webapp/backend.py as well
                if int(a.exact_timestamp) < int(anomaly_timestamp - 120):
                    continue
                if int(a.exact_timestamp) > int(anomaly_timestamp + 120):
                    continue
            except:
                continue
            try:
                time_period = (int(anomaly_timestamp - 120), int(anomaly_timestamp + 120))
                my_correlator = Correlator(anomaly_ts_dict, correlate_ts_dict, time_period)
                # For better correlation use 0.9 instead of 0.8 for the threshold
                # @modified 20180524 - Feature #2360: CORRELATE_ALERTS_ONLY
                #                      Branch #2270: luminosity
                #                      Feature #2378: Add redis auth to Skyline and rebrow
                # Added this to setting.py
                # if my_correlator.is_correlated(threshold=0.9):
                try:
                    cross_correlation_threshold = settings.LUMINOL_CROSS_CORRELATION_THRESHOLD
                    metrics_checked_for_correlation += 1
                except:
                    cross_correlation_threshold = 0.9
                if my_correlator.is_correlated(threshold=cross_correlation_threshold):
                    correlation = my_correlator.get_correlation_result()
                    correlated = True
                    correlations.append([metric_base_name, correlation.coefficient, correlation.shift, correlation.shifted_coefficient])
                    local_redis_metrics_correlations_count += 1
            except:
                pass
        if correlated:
            correlated_metrics.append(metric_base_name)

    # @added 20180720 - Feature #2464: luminosity_remote_data
    # Added the correlation of preprocessed remote data
    end_local_correlations = timer()
    logger.info('get_correlations :: checked - local_redis_metrics_checked_count is %s' % str(local_redis_metrics_checked_count))
    logger.info('get_correlations :: correlated - local_redis_metrics_correlations_count is %s' % str(local_redis_metrics_correlations_count))
    logger.info('get_correlations :: processed %s correlations on local_redis_metrics_checked_count %s local metrics in %.6f seconds' % (
        str(local_redis_metrics_correlations_count),
        str(local_redis_metrics_checked_count),
        (end_local_correlations - start_local_correlations)))

    remote_metrics_count = 0
    remote_correlations_check_count = 0
    remote_correlations_count = 0
    logger.info('get_correlations :: remote_assigned count %s' % str(len(remote_assigned)))
    start_remote_correlations = timer()
    for ts_data in remote_assigned:
        remote_metrics_count += 1
        correlated = None
        metric_name = str(ts_data[0])
        metric_base_name = metric_name.replace(settings.FULL_NAMESPACE, '', 1)
        if str(metric_base_name) == str(base_name):
            continue
        timeseries = []
        try:
            timeseries = ts_data[1]
        except:
            timeseries = []
        if not timeseries:
            continue

        correlate_ts = []
        for ts, value in timeseries:
            if int(ts) < from_timestamp:
                continue
            if int(ts) <= anomaly_timestamp:
                correlate_ts.append((int(ts), value))
            # @modified 20180720 - Feature #2464: luminosity_remote_data
            # Added note here - if you modify the value of 61 here, it must be
            # modified in the luminosity_remote_data function in
            # skyline/webapp/backend.py as well
            if int(ts) > (anomaly_timestamp + 61):
                break
        if not correlate_ts:
            continue

        anomaly_ts_dict = dict(anomalous_ts)
        correlate_ts_dict = dict(correlate_ts)

        for a in anomalies:
            try:
                # @modified 20180720 - Feature #2464: luminosity_remote_data
                # Added note here - if you modify the value of 120 here, it must be
                # modified in the luminosity_remote_data function in
                # skyline/webapp/backend.py as well
                if int(a.exact_timestamp) < int(anomaly_timestamp - 120):
                    continue
                if int(a.exact_timestamp) > int(anomaly_timestamp + 120):
                    continue
            except:
                continue
            try:
                time_period = (int(anomaly_timestamp - 120), int(anomaly_timestamp + 120))
                my_correlator = Correlator(anomaly_ts_dict, correlate_ts_dict, time_period)
                metrics_checked_for_correlation += 1
                remote_correlations_check_count += 1
                try:
                    cross_correlation_threshold = settings.LUMINOL_CROSS_CORRELATION_THRESHOLD
                except:
                    cross_correlation_threshold = 0.9
                if my_correlator.is_correlated(threshold=cross_correlation_threshold):
                    correlation = my_correlator.get_correlation_result()
                    correlated = True
                    correlations.append([metric_base_name, correlation.coefficient, correlation.shift, correlation.shifted_coefficient])
                    remote_correlations_count += 1
            except:
                pass
        if correlated:
            correlated_metrics.append(metric_base_name)

    end_remote_correlations = timer()
    logger.info('get_correlations :: checked - remote_correlations_check_count is %s' % str(remote_correlations_check_count))
    logger.info('get_correlations :: correlated - remote_correlations_count is %s' % str(remote_correlations_count))
    logger.info('get_correlations :: processed remote correlations on remote_metrics_count %s local metric in %.6f seconds' % (
        str(remote_metrics_count),
        (end_remote_correlations - start_remote_correlations)))

    end = timer()
    logger.info('get_correlations :: checked a total of %s metrics and correlated %s metrics to %s anomaly, processed in %.6f seconds' % (
        str(metrics_checked_for_correlation), str(len(correlated_metrics)),
        base_name, (end - start)))
    # @added 20170720 - Task #2462: Implement useful metrics for Luminosity
    # Added runtime to calculate avg_runtime Graphite metric
    runtime = '%.6f' % (end - start)
    return (correlated_metrics, correlations, metrics_checked_for_correlation, runtime)
Ejemplo n.º 2
0
def luminosity_remote_data(anomaly_timestamp):
    """
    Gets all the unique_metrics from Redis and then mgets Redis data for all
    metrics.  The data is then preprocessed for the remote Skyline luminosity
    instance and only the relevant fragments of the time series are
    returned.  This return is then gzipped by the Flask Webapp response to
    ensure the minimum about of bandwidth is used.

    :param anomaly_timestamp: the anomaly timestamp
    :type anomaly_timestamp: int
    :return: list
    :rtype: list

    """

    message = 'luminosity_remote_data returned'
    success = False
    luminosity_data = []
    logger.info('luminosity_remote_data :: determining unique_metrics')
    unique_metrics = []
    # If you modify the values of 61 or 600 here, it must be modified in the
    # luminosity_remote_data function in
    # skyline/luminosity/process_correlations.py as well
    from_timestamp = int(anomaly_timestamp) - 600
    until_timestamp = int(anomaly_timestamp) + 61

    try:
        unique_metrics = list(REDIS_CONN.smembers(settings.FULL_NAMESPACE + 'unique_metrics'))
    except Exception as e:
        logger.error('error :: %s' % str(e))
        logger.error('error :: luminosity_remote_data :: could not determine unique_metrics from Redis set')
    if not unique_metrics:
        message = 'error :: luminosity_remote_data :: could not determine unique_metrics from Redis set'
        return luminosity_data, success, message
    logger.info('luminosity_remote_data :: %s unique_metrics' % str(len(unique_metrics)))

    # assigned metrics
    assigned_min = 0
    assigned_max = len(unique_metrics)
    assigned_keys = range(assigned_min, assigned_max)

    # Compile assigned metrics
    assigned_metrics = [unique_metrics[index] for index in assigned_keys]
    # Check if this process is unnecessary
    if len(assigned_metrics) == 0:
        message = 'error :: luminosity_remote_data :: assigned_metrics length is 0'
        logger.error(message)
        return luminosity_data, success, message

    # Multi get series
    raw_assigned_failed = True
    try:
        raw_assigned = REDIS_CONN.mget(assigned_metrics)
        raw_assigned_failed = False
    except:
        logger.info(traceback.format_exc())
        message = 'error :: luminosity_remote_data :: failed to mget raw_assigned'
        logger.error(message)
        return luminosity_data, success, message
    if raw_assigned_failed:
        message = 'error :: luminosity_remote_data :: failed to mget raw_assigned'
        logger.error(message)
        return luminosity_data, success, message

    # Distill timeseries strings into lists
    for i, metric_name in enumerate(assigned_metrics):
        timeseries = []
        try:
            raw_series = raw_assigned[i]
            unpacker = Unpacker(use_list=False)
            unpacker.feed(raw_series)
            timeseries = list(unpacker)
        except:
            timeseries = []

        if not timeseries:
            continue

        # @added 20200507 - Feature #3532: Sort all time series
        # To ensure that there are no unordered timestamps in the time
        # series which are artefacts of the collector or carbon-relay, sort
        # all time series by timestamp before analysis.
        original_timeseries = timeseries
        if original_timeseries:
            timeseries = sort_timeseries(original_timeseries)
            del original_timeseries

        # Convert the time series if this is a known_derivative_metric
        base_name = metric_name.replace(settings.FULL_NAMESPACE, '', 1)
        known_derivative_metric = is_derivative_metric('webapp', base_name)
        if known_derivative_metric:
            try:
                derivative_timeseries = nonNegativeDerivative(timeseries)
                timeseries = derivative_timeseries
            except:
                logger.error('error :: nonNegativeDerivative failed')

        correlate_ts = []
        for ts, value in timeseries:
            if int(ts) < from_timestamp:
                continue
            if int(ts) <= anomaly_timestamp:
                correlate_ts.append((int(ts), value))
            if int(ts) > (anomaly_timestamp + until_timestamp):
                break
        if not correlate_ts:
            continue
        metric_data = [str(metric_name), correlate_ts]
        luminosity_data.append(metric_data)

    logger.info('luminosity_remote_data :: %s valid metric time series data preprocessed for the remote request' % str(len(luminosity_data)))

    return luminosity_data, success, message
Ejemplo n.º 3
0
def alert_smtp(datapoint, metric_name, expiration_time, metric_trigger,
               algorithm):

    sender = settings.BOUNDARY_SMTP_OPTS['sender']

    matched_namespaces = []
    for namespace in settings.BOUNDARY_SMTP_OPTS['recipients']:
        CHECK_MATCH_PATTERN = namespace
        check_match_pattern = re.compile(CHECK_MATCH_PATTERN)
        pattern_match = check_match_pattern.match(metric_name)
        if pattern_match:
            matched_namespaces.append(namespace)
    matched_recipients = []
    for namespace in matched_namespaces:
        for recipients in settings.BOUNDARY_SMTP_OPTS['recipients'][namespace]:
            matched_recipients.append(recipients)

    def unique_noHash(seq):
        seen = set()
        return [x for x in seq if str(x) not in seen and not seen.add(str(x))]

    recipients = unique_noHash(matched_recipients)

    # Backwards compatibility
    if type(recipients) is str:
        recipients = [recipients]

    # @added 20180524 - Task #2384: Change alerters to cc other recipients
    # The alerters did send an individual email to each recipient. This would be
    # more useful if one email was sent with the first smtp recipient being the
    # to recipient and the subsequent recipients were add in cc.
    primary_recipient = False
    cc_recipients = False
    if recipients:
        for i_recipient in recipients:
            if not primary_recipient:
                primary_recipient = str(i_recipient)
            if primary_recipient != i_recipient:
                if not cc_recipients:
                    cc_recipients = str(i_recipient)
                else:
                    new_cc_recipients = '%s,%s' % (str(cc_recipients),
                                                   str(i_recipient))
                    cc_recipients = str(new_cc_recipients)
        logger.info(
            'alert_smtp - will send to primary_recipient :: %s, cc_recipients :: %s'
            % (str(primary_recipient), str(cc_recipients)))

    alert_algo = str(algorithm)
    alert_context = alert_algo.upper()

    # @added 20191008 - Feature #3194: Add CUSTOM_ALERT_OPTS to settings
    try:
        main_alert_title = settings.CUSTOM_ALERT_OPTS['main_alert_title']
    except:
        main_alert_title = 'Skyline'
    try:
        app_alert_context = settings.CUSTOM_ALERT_OPTS[
            'boundary_alert_heading']
    except:
        app_alert_context = 'Boundary'

    # @modified 20191002 - Feature #3194: Add CUSTOM_ALERT_OPTS to settings
    # Use alert_context
    # unencoded_graph_title = 'Skyline Boundary - %s at %s hours - %s - %s' % (
    #     alert_context, graphite_previous_hours, metric_name, datapoint)
    unencoded_graph_title = '%s %s - %s at %s hours - %s - %s' % (
        main_alert_title, app_alert_context, alert_context,
        graphite_previous_hours, metric_name, datapoint)

    # @added 20181126 - Task #2742: Update Boundary
    #                   Feature #2034: analyse_derivatives
    # Added deriative functions to convert the values of metrics strictly
    # increasing monotonically to their deriative products in alert graphs and
    # specify it in the graph_title
    known_derivative_metric = False
    try:
        # @modified 20180519 - Feature #2378: Add redis auth to Skyline and rebrow
        # @modified 20191030 - Bug #3266: py3 Redis binary objects not strings
        #                      Branch #3262: py3
        # Use get_redis_conn_decoded
        # if settings.REDIS_PASSWORD:
        #     # @modified 20191022 - Bug #3266: py3 Redis binary objects not strings
        #     #                      Branch #3262: py3
        #     # REDIS_ALERTER_CONN = redis.StrictRedis(password=settings.REDIS_PASSWORD, unix_socket_path=settings.REDIS_SOCKET_PATH)
        #     REDIS_ALERTER_CONN = redis.StrictRedis(password=settings.REDIS_PASSWORD, unix_socket_path=settings.REDIS_SOCKET_PATH, charset='utf-8', decode_responses=True)
        # else:
        #     # REDIS_ALERTER_CONN = redis.StrictRedis(unix_socket_path=settings.REDIS_SOCKET_PATH)
        #     REDIS_ALERTER_CONN = redis.StrictRedis(unix_socket_path=settings.REDIS_SOCKET_PATH, charset='utf-8', decode_responses=True)
        REDIS_ALERTER_CONN = get_redis_conn_decoded(skyline_app)
    except:
        logger.error('error :: alert_smtp - redis connection failed')

    # @modified 20191022 - Bug #3266: py3 Redis binary objects not strings
    #                      Branch #3262: py3
    try:
        derivative_metrics = list(
            REDIS_ALERTER_CONN.smembers('derivative_metrics'))
    except:
        derivative_metrics = []
    redis_metric_name = '%s%s' % (settings.FULL_NAMESPACE, str(metric_name))
    if redis_metric_name in derivative_metrics:
        known_derivative_metric = True
    if known_derivative_metric:
        try:
            non_derivative_monotonic_metrics = settings.NON_DERIVATIVE_MONOTONIC_METRICS
        except:
            non_derivative_monotonic_metrics = []
        skip_derivative = in_list(redis_metric_name,
                                  non_derivative_monotonic_metrics)
        if skip_derivative:
            known_derivative_metric = False

    known_derivative_metric = is_derivative_metric(skyline_app, metric_name)

    if known_derivative_metric:
        # @modified 20191002 - Feature #3194: Add CUSTOM_ALERT_OPTS to settings
        # unencoded_graph_title = 'Skyline Boundary - %s at %s hours - derivative graph - %s - %s' % (
        #     alert_context, graphite_previous_hours, metric_name, datapoint)
        unencoded_graph_title = '%s %s - %s at %s hours - derivative graph - %s - %s' % (
            main_alert_title, app_alert_context, alert_context,
            graphite_previous_hours, metric_name, datapoint)

    graph_title_string = quote(unencoded_graph_title, safe='')
    graph_title = '&title=%s' % graph_title_string

    # @added 20181126 - Bug #2498: Incorrect scale in some graphs
    #                   Task #2742: Update Boundary
    # If -xhours is used the scale is incorrect if x hours > than first
    # retention period, passing from and until renders the graph with the
    # correct scale.
    graphite_port = '80'
    if settings.GRAPHITE_PORT != '':
        graphite_port = str(settings.GRAPHITE_PORT)
    until_timestamp = int(time())
    from_seconds_ago = graphite_previous_hours * 3600
    from_timestamp = until_timestamp - from_seconds_ago
    graphite_from = dt.datetime.fromtimestamp(
        int(from_timestamp)).strftime('%H:%M_%Y%m%d')
    logger.info('graphite_from - %s' % str(graphite_from))
    graphite_until = dt.datetime.fromtimestamp(
        int(until_timestamp)).strftime('%H:%M_%Y%m%d')
    logger.info('graphite_until - %s' % str(graphite_until))
    # @modified 20191022 - Task #3294: py3 - handle system parameter in Graphite cactiStyle
    # graphite_target = 'target=cactiStyle(%s)'
    graphite_target = 'target=cactiStyle(%s,%%27si%%27)' % metric_name
    if known_derivative_metric:
        # @modified 20191022 - Task #3294: py3 - handle system parameter in Graphite cactiStyle
        # graphite_target = 'target=cactiStyle(nonNegativeDerivative(%s))'
        graphite_target = 'target=cactiStyle(nonNegativeDerivative(%s),%%27si%%27)' % metric_name
    # @modified 20190520 - Branch #3002: docker
    # Use GRAPHITE_RENDER_URI
    # link = '%s://%s:%s/render/?from=%s&until=%s&%s%s%s&colorList=%s' % (
    #     settings.GRAPHITE_PROTOCOL, settings.GRAPHITE_HOST, graphite_port,
    #     str(graphite_from), str(graphite_until), graphite_target,
    #     settings.GRAPHITE_GRAPH_SETTINGS, graph_title,
    #     graphite_graph_line_color)
    link = '%s://%s:%s/%s/?from=%s&until=%s&%s%s%s&colorList=%s' % (
        settings.GRAPHITE_PROTOCOL, settings.GRAPHITE_HOST,
        graphite_port, settings.GRAPHITE_RENDER_URI, str(graphite_from),
        str(graphite_until), graphite_target, settings.GRAPHITE_GRAPH_SETTINGS,
        graph_title, graphite_graph_line_color)

    content_id = metric_name
    image_data = None

    image_file = '%s/%s.%s.%s.alert_smtp.png' % (
        settings.SKYLINE_TMP_DIR, skyline_app, str(until_timestamp),
        metric_name)
    if settings.BOUNDARY_SMTP_OPTS.get('embed-images'):
        image_data = get_graphite_graph_image(skyline_app, link, image_file)

    if settings.BOUNDARY_SMTP_OPTS.get('embed-images_disabled3290'):
        # @modified 20191021 - Task #3290: Handle urllib2 in py3
        #                      Branch #3262: py3
        if python_version == 2:
            try:
                # @modified 20170913 - Task #2160: Test skyline with bandit
                # Added nosec to exclude from bandit tests
                # image_data = urllib2.urlopen(link).read()  # nosec
                image_data = None
            except urllib2.URLError:
                image_data = None
        if python_version == 3:
            try:
                # image_data = urllib.request.urlopen(link).read()  # nosec
                image_data = None
            except:
                logger.error(traceback.format_exc())
                logger.error(
                    'error :: boundary_alerters :: alert_smtp :: failed to urlopen %s'
                    % str(link))
                image_data = None

    # If we failed to get the image or if it was explicitly disabled,
    # use the image URL instead of the content.
    if image_data is None:
        img_tag = '<img src="%s"/>' % link
    else:
        img_tag = '<img src="cid:%s"/>' % content_id

    # @modified 20191002 - Feature #3194: Add CUSTOM_ALERT_OPTS to settings
    # body = '%s :: %s <br> Next alert in: %s seconds <br> skyline Boundary alert - %s <br><a href="%s">%s</a>' % (
    #     datapoint, metric_name, expiration_time, alert_context, link, img_tag)
    body = '%s :: %s <br> Next alert in: %s seconds <br> %s %s alert - %s <br><a href="%s">%s</a>' % (
        main_alert_title, app_alert_context, datapoint, metric_name,
        expiration_time, alert_context, link, img_tag)

    # @modified 20180524 - Task #2384: Change alerters to cc other recipients
    # Do not send to each recipient, send to primary_recipient and cc the other
    # recipients, thereby sending only one email
    # for recipient in recipients:
    if primary_recipient:
        logger.info(
            'alert_smtp - will send to primary_recipient :: %s, cc_recipients :: %s'
            % (str(primary_recipient), str(cc_recipients)))

        msg = MIMEMultipart('alternative')
        # @modified 20191002 - Feature #3194: Add CUSTOM_ALERT_OPTS to settings
        # msg['Subject'] = '[Skyline alert] ' + 'Boundary ALERT - ' + alert_context + ' - ' + datapoint + ' - ' + metric_name
        msg['Subject'] = '[' + main_alert_title + ' alert] ' + app_alert_context + ' ALERT - ' + alert_context + ' - ' + datapoint + ' - ' + metric_name
        msg['From'] = sender
        # @modified 20180524 - Task #2384: Change alerters to cc other recipients
        # msg['To'] = recipient
        msg['To'] = primary_recipient

        # @added 20180524 - Task #2384: Change alerters to cc other recipients
        # Added Cc
        if cc_recipients:
            msg['Cc'] = cc_recipients

        msg.attach(MIMEText(body, 'html'))
        if image_data is not None:

            # msg_attachment = MIMEImage(image_data)
            fp = open(image_file, 'rb')
            msg_attachment = MIMEImage(fp.read())
            fp.close()

            msg_attachment.add_header('Content-ID', '<%s>' % content_id)
            msg.attach(msg_attachment)

        s = SMTP('127.0.0.1')
        # @modified 20180524 - Task #2384: Change alerters to cc other recipients
        # Send to primary_recipient and cc_recipients
        # s.sendmail(sender, recipient, msg.as_string())
        try:
            if cc_recipients:
                s.sendmail(sender, [primary_recipient, cc_recipients],
                           msg.as_string())
            else:
                s.sendmail(sender, primary_recipient, msg.as_string())
        except:
            logger.error(traceback.format_exc())
            logger.error(
                'error :: alert_smtp - could not send email to primary_recipient :: %s, cc_recipients :: %s'
                % (str(primary_recipient), str(cc_recipients)))
        s.quit()
Ejemplo n.º 4
0
def get_anomalous_ts(base_name, anomaly_timestamp):

    logger = logging.getLogger(skyline_app_logger)

    # @added 20180423 - Feature #2360: CORRELATE_ALERTS_ONLY
    #                   Branch #2270: luminosity
    # Only correlate metrics with an alert setting
    if correlate_alerts_only:
        try:
            # @modified 20191030 - Bug #3266: py3 Redis binary objects not strings
            #                      Branch #3262: py3
            # smtp_alerter_metrics = list(redis_conn.smembers('analyzer.smtp_alerter_metrics'))
            # @modified 20200421 - Feature #3306: Record anomaly_end_timestamp
            #                      Branch #2270: luminosity
            #                      Branch #3262: py3
            # Changed to use the aet Redis set, used to determine and record the
            # anomaly_end_timestamp, some transient sets need to copied so that
            # the data always exists, even if it is sourced from a transient set.
            # smtp_alerter_metrics = list(redis_conn_decoded.smembers('analyzer.smtp_alerter_metrics'))
            smtp_alerter_metrics = list(redis_conn_decoded.smembers('aet.analyzer.smtp_alerter_metrics'))
        except:
            smtp_alerter_metrics = []
        if base_name not in smtp_alerter_metrics:
            logger.error('%s has no alerter setting, not correlating' % base_name)
            return []

    if not base_name or not anomaly_timestamp:
        return []

    # from skyline_functions import nonNegativeDerivative
    anomalous_metric = '%s%s' % (settings.FULL_NAMESPACE, base_name)
    unique_metrics = []
    try:
        # @modified 20191030 - Bug #3266: py3 Redis binary objects not strings
        #                      Branch #3262: py3
        # unique_metrics = list(redis_conn.smembers(settings.FULL_NAMESPACE + 'unique_metrics'))
        unique_metrics = list(redis_conn_decoded.smembers(settings.FULL_NAMESPACE + 'unique_metrics'))
    except:
        logger.error(traceback.format_exc())
        logger.error('error :: get_assigned_metrics :: no unique_metrics')
        return []
    # @added 20180720 - Feature #2464: luminosity_remote_data
    # Ensure that Luminosity only processes it's own Redis metrics so that if
    # multiple Skyline instances are running, Luminosity does not process an
    # anomaly_id for a metric that is not local to itself.  This will stop the
    # call to the remote Redis with other_redis_conn below.  With the
    # introduction of the preprocessing luminosity_remote_data API endpoint for
    # remote Skyline instances, there is no further requirement for Skyline
    # instances to have direct access to Redis on another Skyline instance.
    # A much better solution and means all data is preprocessed and encrypted,
    # there is no need for iptables other than 443 (or custom https port).
    #
    if anomalous_metric in unique_metrics:
        logger.info('%s is a metric in Redis, processing on this Skyline instance' % base_name)
    else:
        logger.info('%s is not a metric in Redis, not processing on this Skyline instance' % base_name)
        return []

    assigned_metrics = [anomalous_metric]
    # @modified 20180419 -
    raw_assigned = []
    try:
        raw_assigned = redis_conn.mget(assigned_metrics)
    except:
        raw_assigned = []
    if raw_assigned == [None]:
        logger.info('%s data not retrieved from local Redis' % (str(base_name)))
        raw_assigned = []

    # @modified 20180721 - Feature #2464: luminosity_remote_data
    # TO BE DEPRECATED settings.OTHER_SKYLINE_REDIS_INSTANCES
    # with the addition of the luminosity_remote_data API call and the above
    if not raw_assigned and settings.OTHER_SKYLINE_REDIS_INSTANCES:
        # @modified 20180519 - Feature #2378: Add redis auth to Skyline and rebrow
        # for redis_ip, redis_port in settings.OTHER_SKYLINE_REDIS_INSTANCES:
        for redis_ip, redis_port, redis_password in settings.OTHER_SKYLINE_REDIS_INSTANCES:
            if not raw_assigned:
                try:
                    if redis_password:
                        other_redis_conn = StrictRedis(host=str(redis_ip), port=int(redis_port), password=str(redis_password))
                    else:
                        other_redis_conn = StrictRedis(host=str(redis_ip), port=int(redis_port))
                    raw_assigned = other_redis_conn.mget(assigned_metrics)
                    if raw_assigned == [None]:
                        logger.info('%s data not retrieved from Redis at %s on port %s' % (str(base_name), str(redis_ip), str(redis_port)))
                        raw_assigned = []
                    if raw_assigned:
                        logger.info('%s data retrieved from Redis at %s on port %s' % (str(base_name), str(redis_ip), str(redis_port)))
                except:
                    logger.error(traceback.format_exc())
                    logger.error('error :: failed to connect to Redis at %s on port %s' % (str(redis_ip), str(redis_port)))
                    raw_assigned = []

    if not raw_assigned or raw_assigned == [None]:
        logger.info('%s data not retrieved' % (str(base_name)))
        return []

    for i, metric_name in enumerate(assigned_metrics):
        try:
            raw_series = raw_assigned[i]
            unpacker = Unpacker(use_list=False)
            unpacker.feed(raw_series)
            timeseries = list(unpacker)
        except:
            timeseries = []

        # @added 20200507 - Feature #3532: Sort all time series
        # To ensure that there are no unordered timestamps in the time
        # series which are artefacts of the collector or carbon-relay, sort
        # all time series by timestamp before analysis.
        original_timeseries = timeseries
        if original_timeseries:
            timeseries = sort_timeseries(original_timeseries)
            del original_timeseries

    # Convert the time series if this is a known_derivative_metric
    known_derivative_metric = is_derivative_metric(skyline_app, base_name)
    if known_derivative_metric:
        derivative_timeseries = nonNegativeDerivative(timeseries)
        timeseries = derivative_timeseries

    # Sample the time series
    # @modified 20180720 - Feature #2464: luminosity_remote_data
    # Added note here - if you modify the value of 600 here, it must be
    # modified in the luminosity_remote_data function in
    # skyline/webapp/backend.py as well
    from_timestamp = anomaly_timestamp - 600
    anomaly_ts = []
    for ts, value in timeseries:
        if int(ts) < from_timestamp:
            continue
        if int(ts) <= anomaly_timestamp:
            anomaly_ts.append((int(ts), value))
        if int(ts) > anomaly_timestamp:
            break

    # @added 20190515 - Bug #3008: luminosity - do not analyse short time series
    # Only return a time series sample if the sample has sufficient data points
    # otherwise get_anomalies() will throw and error
    len_anomaly_ts = len(anomaly_ts)
    if len_anomaly_ts <= 9:
        logger.info('%s insufficient data not retrieved, only %s data points surfaced, not correlating' % (
            str(base_name), str(len_anomaly_ts)))
        return []

    return anomaly_ts
Ejemplo n.º 5
0
def alert_slack(datapoint, metric_name, expiration_time, metric_trigger,
                algorithm):

    if not settings.SLACK_ENABLED:
        return False

    from slackclient import SlackClient
    metric = metric_name
    logger.info('alert_slack - anomalous metric :: metric: %s - %s' %
                (metric, algorithm))
    base_name = metric
    alert_algo = str(algorithm)
    alert_context = alert_algo.upper()

    # The known_derivative_metric state is determine in case we need to surface
    # the png image from Graphite if the Ionosphere image is not available for
    # some reason.  This will result in Skyline at least still sending an alert
    # to slack, even if some gear fails in Ionosphere or slack alerting is used
    # without Ionosphere enabled. Yes not DRY but multiprocessing and spawn
    # safe.
    known_derivative_metric = False

    #    try:
    #        if settings.REDIS_PASSWORD:
    #            # @modified 20191022 - Bug #3266: py3 Redis binary objects not strings
    #            #                      Branch #3262: py3
    #            # REDIS_ALERTER_CONN = redis.StrictRedis(password=settings.REDIS_PASSWORD, unix_socket_path=settings.REDIS_SOCKET_PATH)
    #            REDIS_ALERTER_CONN = redis.StrictRedis(password=settings.REDIS_PASSWORD, unix_socket_path=settings.REDIS_SOCKET_PATH, charset='utf-8', decode_responses=True)
    #        else:
    #            # REDIS_ALERTER_CONN = redis.StrictRedis(unix_socket_path=settings.REDIS_SOCKET_PATH)
    #            REDIS_ALERTER_CONN = redis.StrictRedis(unix_socket_path=settings.REDIS_SOCKET_PATH, charset='utf-8', decode_responses=True)
    #    except:
    #        logger.error('error :: alert_slack - redis connection failed')
    #    try:
    #        derivative_metrics = list(REDIS_ALERTER_CONN.smembers('derivative_metrics'))
    #    except:
    #        derivative_metrics = []
    redis_metric_name = '%s%s' % (settings.FULL_NAMESPACE, str(base_name))
    #    if redis_metric_name in derivative_metrics:
    #        known_derivative_metric = True
    known_derivative_metric = is_derivative_metric(skyline_app, str(base_name))

    # if known_derivative_metric:
    #     try:
    #         non_derivative_monotonic_metrics = settings.NON_DERIVATIVE_MONOTONIC_METRICS
    #     except:
    #         non_derivative_monotonic_metrics = []
    #     skip_derivative = in_list(redis_metric_name, non_derivative_monotonic_metrics)
    #     if skip_derivative:
    #         known_derivative_metric = False

    # @added 20191008 - Feature #3194: Add CUSTOM_ALERT_OPTS to settings
    try:
        main_alert_title = settings.CUSTOM_ALERT_OPTS['main_alert_title']
    except:
        main_alert_title = 'Skyline'
    try:
        app_alert_context = settings.CUSTOM_ALERT_OPTS[
            'boundary_alert_heading']
    except:
        app_alert_context = 'Boundary'

    if known_derivative_metric:
        # @modified 20191008 - Feature #3194: Add CUSTOM_ALERT_OPTS to settings
        # unencoded_graph_title = 'Skyline Boundary - ALERT %s at %s hours - derivative graph - %s' % (
        #     alert_context, str(graphite_previous_hours), metric)
        # slack_title = '*Skyline Boundary - ALERT* %s on %s at %s hours - derivative graph - %s' % (
        #     alert_context, metric, str(graphite_previous_hours), datapoint)
        unencoded_graph_title = '%s %s - ALERT %s at %s hours - derivative graph - %s' % (
            main_alert_title, app_alert_context, alert_context,
            str(graphite_previous_hours), metric)
        slack_title = '*%s %s - ALERT* %s on %s at %s hours - derivative graph - %s' % (
            main_alert_title, app_alert_context, alert_context, metric,
            str(graphite_previous_hours), datapoint)
    else:
        # unencoded_graph_title = 'Skyline Boundary - ALERT %s at %s hours - %s' % (
        #     alert_context, str(graphite_previous_hours), metric)
        # slack_title = '*Skyline Boundary - ALERT* %s on %s at %s hours - %s' % (
        #     alert_context, metric, str(graphite_previous_hours), datapoint)
        unencoded_graph_title = '%s %s - ALERT %s at %s hours - %s' % (
            main_alert_title, app_alert_context, alert_context,
            str(graphite_previous_hours), metric)
        slack_title = '*%s %s - ALERT* %s on %s at %s hours - %s' % (
            main_alert_title, app_alert_context, alert_context, metric,
            str(graphite_previous_hours), datapoint)

    graph_title_string = quote(unencoded_graph_title, safe='')
    graph_title = '&title=%s' % graph_title_string

    until_timestamp = int(time())
    target_seconds = int((graphite_previous_hours * 60) * 60)
    from_timestamp = str(until_timestamp - target_seconds)

    graphite_from = dt.datetime.fromtimestamp(
        int(from_timestamp)).strftime('%H:%M_%Y%m%d')
    logger.info('graphite_from - %s' % str(graphite_from))
    graphite_until = dt.datetime.fromtimestamp(
        int(until_timestamp)).strftime('%H:%M_%Y%m%d')
    logger.info('graphite_until - %s' % str(graphite_until))
    # @added 20181025 - Feature #2618: alert_slack
    # Added date and time info so you do not have to mouseover the slack
    # message to determine the time at which the alert came in
    timezone = strftime("%Z", gmtime())
    # @modified 20181029 - Feature #2618: alert_slack
    # Use the standard UNIX data format
    # human_anomaly_time = dt.datetime.fromtimestamp(int(until_timestamp)).strftime('%Y-%m-%d %H:%M:%S')
    human_anomaly_time = dt.datetime.fromtimestamp(
        int(until_timestamp)).strftime('%c')
    slack_time_string = '%s %s' % (human_anomaly_time, timezone)

    # @added 20191106 - Branch #3262: py3
    #                   Branch #3002: docker
    graphite_port = get_graphite_port(skyline_app)
    graphite_render_uri = get_graphite_render_uri(skyline_app)
    graphite_custom_headers = get_graphite_custom_headers(skyline_app)

    if settings.GRAPHITE_PORT != '':
        if known_derivative_metric:
            # @modified 20190520 - Branch #3002: docker
            # Use GRAPHITE_RENDER_URI
            # link = '%s://%s:%s/render/?from=%s&until=%s&target=cactiStyle(nonNegativeDerivative(%s))%s%s&colorList=orange' % (
            #     settings.GRAPHITE_PROTOCOL, settings.GRAPHITE_HOST,
            #     settings.GRAPHITE_PORT, str(graphite_from), str(graphite_until),
            #     metric, settings.GRAPHITE_GRAPH_SETTINGS, graph_title)
            # @modified 20191022 - Task #3294: py3 - handle system parameter in Graphite cactiStyle
            # link = '%s://%s:%s/%s/?from=%s&until=%s&target=cactiStyle(nonNegativeDerivative(%s))%s%s&colorList=orange' % (
            link = '%s://%s:%s/%s/?from=%s&until=%s&target=cactiStyle(nonNegativeDerivative(%s),%%27si%%27)%s%s&colorList=orange' % (
                settings.GRAPHITE_PROTOCOL, settings.GRAPHITE_HOST,
                settings.GRAPHITE_PORT, settings.GRAPHITE_RENDER_URI,
                str(graphite_from), str(graphite_until), metric,
                settings.GRAPHITE_GRAPH_SETTINGS, graph_title)
        else:
            # @modified 20190520 - Branch #3002: docker
            # Use GRAPHITE_RENDER_URI
            # link = '%s://%s:%s/render/?from=%s&until=%s&target=cactiStyle(%s)%s%s&colorList=orange' % (
            #     settings.GRAPHITE_PROTOCOL, settings.GRAPHITE_HOST,
            #     settings.GRAPHITE_PORT, str(graphite_from), str(graphite_until),
            #     metric, settings.GRAPHITE_GRAPH_SETTINGS, graph_title)
            # @modified 20191022 - Task #3294: py3 - handle system parameter in Graphite cactiStyle
            # link = '%s://%s:%s/%s/?from=%s&until=%s&target=cactiStyle(%s)%s%s&colorList=orange' % (
            link = '%s://%s:%s/%s/?from=%s&until=%s&target=cactiStyle(%s,%%27si%%27)%s%s&colorList=orange' % (
                settings.GRAPHITE_PROTOCOL, settings.GRAPHITE_HOST,
                settings.GRAPHITE_PORT, settings.GRAPHITE_RENDER_URI,
                str(graphite_from), str(graphite_until), metric,
                settings.GRAPHITE_GRAPH_SETTINGS, graph_title)
    else:
        if known_derivative_metric:
            # @modified 20190520 - Branch #3002: docker
            # Use GRAPHITE_RENDER_URI
            # link = '%s://%s/render/?from=%s&until=%s&target=cactiStyle(nonNegativeDerivative(%s))%s%s&colorList=orange' % (
            #     settings.GRAPHITE_PROTOCOL, settings.GRAPHITE_HOST,
            #     str(graphite_from), str(graphite_until), metric,
            #     settings.GRAPHITE_GRAPH_SETTINGS, graph_title)
            # @modified 20191022 - Task #3294: py3 - handle system parameter in Graphite cactiStyle
            # link = '%s://%s/%s/?from=%s&until=%s&target=cactiStyle(nonNegativeDerivative(%s))%s%s&colorList=orange' % (
            link = '%s://%s/%s/?from=%s&until=%s&target=cactiStyle(nonNegativeDerivative(%s),%%27si%%27)%s%s&colorList=orange' % (
                settings.GRAPHITE_PROTOCOL, settings.GRAPHITE_HOST,
                settings.GRAPHITE_RENDER_URI, str(graphite_from),
                str(graphite_until), metric, settings.GRAPHITE_GRAPH_SETTINGS,
                graph_title)
        else:
            # @modified 20190520 - Branch #3002: docker
            # Use GRAPHITE_RENDER_URI
            # link = '%s://%s/render/?from=%s&until=%s&target=cactiStyle(%s)%s%s&colorList=orange' % (
            #     settings.GRAPHITE_PROTOCOL, settings.GRAPHITE_HOST,
            #     str(graphite_from), str(graphite_until), metric,
            #     settings.GRAPHITE_GRAPH_SETTINGS, graph_title)
            # @modified 20191022 - Task #3294: py3 - handle system parameter in Graphite cactiStyle
            # link = '%s://%s/%s/?from=%s&until=%s&target=cactiStyle(%s)%s%s&colorList=orange' % (
            link = '%s://%s/%s/?from=%s&until=%s&target=cactiStyle(%s,%%27si%%27)%s%s&colorList=orange' % (
                settings.GRAPHITE_PROTOCOL, settings.GRAPHITE_HOST,
                settings.GRAPHITE_RENDER_URI, str(graphite_from),
                str(graphite_until), metric, settings.GRAPHITE_GRAPH_SETTINGS,
                graph_title)

    # slack does not allow embedded images, nor will it fetch links behind
    # authentication so Skyline uploads a png graphite image with the message
    image_file = None

    # Fetch the png from Graphite
    # @modified 20191021 - Task #3290: Handle urllib2 in py3
    #                      Branch #3262: py3
    image_file = '%s/%s.%s.graphite.%sh.png' % (
        settings.SKYLINE_TMP_DIR, base_name, skyline_app,
        str(int(graphite_previous_hours)))

    if python_version == 22:
        try:
            # image_data = urllib2.urlopen(link).read()  # nosec
            image_data = None
        # except urllib2.URLError:
        except:
            logger.error(traceback.format_exc())
            logger.error('error :: alert_slack - failed to get image graph')
            logger.error('error :: alert_slack - %s' % str(link))
            image_data = None
    if python_version == 33:
        try:
            image_file = '%s/%s.%s.graphite.%sh.png' % (
                settings.SKYLINE_TMP_DIR, base_name, skyline_app,
                str(int(graphite_previous_hours)))
            #            urllib.request.urlretrieve(link, image_file)
            image_data = 'retrieved'
            image_data = None
        except:
            try:
                # @added 20191022 - Task #3294: py3 - handle system parameter in Graphite cactiStyle
                image_data = None
                original_traceback = traceback.format_exc()
                if 'cactiStyle' in link:
                    metric_replace = '%s,%%27si%%27' % metric
                    original_link = link
                    link = link.replace(metric, metric_replace)
                    logger.info(
                        'link replaced with cactiStyle system parameter added - %s'
                        % str(link))
                    urllib.request.urlretrieve(link, image_file)
                    image_data = 'retrieved'
            except:
                new_trackback = traceback.format_exc()
                logger.error(original_traceback)
                logger.error(
                    'error :: boundary_alerters :: alert_slack :: failed to urlopen %s'
                    % str(original_link))
                logger.error(new_trackback)
                logger.error(
                    'error :: boundary_alerters :: alert_slack :: failed to urlopen with system parameter added %s'
                    % str(link))
                image_data = None

    # @added 20191025 -
    image_data = get_graphite_graph_image(skyline_app, link, image_file)

    if image_data == 'disabled_for_testing':
        image_file = '%s/%s.%s.graphite.%sh.png' % (
            settings.SKYLINE_TMP_DIR, base_name, skyline_app,
            str(int(graphite_previous_hours)))
        if image_data != 'retrieved':
            try:
                write_data_to_file(skyline_app, image_file, 'w', image_data)
                logger.info('alert_slack - added Graphite image :: %s' %
                            (image_file))
            except:
                logger.info(traceback.format_exc())
                logger.error(
                    'error :: alert_slack - failed to add %s Graphite image' %
                    (image_file))
                image_file = None
    try:
        filename = os.path.basename(image_file)
    except:
        filename = None

    try:
        bot_user_oauth_access_token = settings.BOUNDARY_SLACK_OPTS[
            'bot_user_oauth_access_token']
    except:
        logger.error(
            'error :: alert_slack - could not determine bot_user_oauth_access_token'
        )
        return False

    # Allow for absolute path metric namespaces but also allow for and match
    # match wildcard namepaces if there is not an absolute path metric namespace
    channels = 'unknown'
    notify_channels = []
    matched_channels = []
    try:
        channels = settings.BOUNDARY_SLACK_OPTS['channels'][metric_name]
        notify_channels.append(channels)
    except:
        for channel in settings.BOUNDARY_SLACK_OPTS['channels']:
            CHECK_MATCH_PATTERN = channel
            check_match_pattern = re.compile(CHECK_MATCH_PATTERN)
            pattern_match = check_match_pattern.match(metric_name)
            if pattern_match:
                matched_channels.append(channel)

    if matched_channels != []:
        for i_metric_name in matched_channels:
            channels = settings.BOUNDARY_SLACK_OPTS['channels'][i_metric_name]
            notify_channels.append(channels)

    if not notify_channels:
        logger.error('error :: alert_slack - could not determine channel')
        return False
    else:
        channels = notify_channels

    try:
        icon_emoji = settings.BOUNDARY_SLACK_OPTS['icon_emoji']
    except:
        icon_emoji = ':chart_with_upwards_trend:'

    try:
        sc = SlackClient(bot_user_oauth_access_token)
    except:
        logger.info(traceback.format_exc())
        logger.error('error :: alert_slack - could not initiate SlackClient')
        return False

    for channel in channels:
        initial_comment = slack_title + ' :: <' + link + '|graphite image link>\nFor anomaly at ' + slack_time_string
        try:
            # slack does not allow embedded images, nor links behind authentication
            # or color text, so we have jump through all the API hoops to end up
            # having to upload an image with a very basic message.
            if os.path.isfile(image_file):
                slack_file_upload = sc.api_call(
                    'files.upload',
                    filename=filename,
                    channels=channel,
                    initial_comment=initial_comment,
                    file=open(image_file, 'rb'))
                if not slack_file_upload['ok']:
                    logger.error(
                        'error :: alert_slack - failed to send slack message with file upload'
                    )
                    logger.error(
                        'error :: alert_slack - slack_file_upload - %s' %
                        str(slack_file_upload))
                try:
                    os.remove(image_file)
                except OSError:
                    logger.error('error - failed to remove %s, continuing' %
                                 image_file)
                    pass
            else:
                send_text = initial_comment + '  ::  error :: there was no graph image to upload'
                send_message = sc.api_call('chat.postMessage',
                                           channel=channel,
                                           icon_emoji=icon_emoji,
                                           text=send_text)
                if not send_message['ok']:
                    logger.error(
                        'error :: alert_slack - failed to send slack message')
                else:
                    logger.info('alert_slack - sent slack message')
        except:
            logger.info(traceback.format_exc())
            logger.error('error :: alert_slack - could not upload file')
            return False
Ejemplo n.º 6
0
def submit_crucible_job(from_timestamp, until_timestamp, metrics_list,
                        namespaces_list, source, alert_interval, user_id, user,
                        add_to_panorama, pad_timeseries, training_data_json,
                        run_algorithms):
    """
    Get a list of all the metrics passed and generate Crucible check files for
    each

    :param from_timestamp: the timestamp at which to start the time series
    :param until_timestamp: the timestamp at which to end the time series
    :param metrics_list: a list of metric names to analyse
    :param namespaces_list: a list of metric namespaces to analyse
    :param source: the source webapp making the request
    :param alert_interval: the alert_interval at which Crucible should trigger
        anomalies
    :param user_id: the user id of the user making the request
    :param user: the username making the request
    :param add_to_panorama: whether Crucible should add Skyline CONSENSUS
        anomalies to Panorama
    :param pad_timeseries: the amount of data to pad the time series with
    :param training_data_json: the full path to the training_data json file if
        source is training_data
    :param run_algorithms: list of algorithms to run
    :type from_timestamp: int
    :type until_timestamp: int
    :type metrics_list: list
    :type namespaces_list: list
    :type source: str
    :type alert_interval: int
    :type user_id: int
    :type user: str
    :type add_to_panorama: boolean
    :type pad_timeseries: str
    :type training_data_json: str
    :type run_algorithms: list
    :return: tuple of lists
    :rtype:  (list, list, list, list)

    Returns (crucible_job_id, metrics_submitted_to_process, fail_msg, trace)

    """

    fail_msg = None
    trace = None
    crucible_job_id = None
    metrics_submitted_to_process = 0

    # Generate a job id based on the YMDHMS.user_id and a job directory
    try:
        jobid_timestamp = int(time())
        jobid_datetimestamp = dt.datetime.fromtimestamp(
            jobid_timestamp).strftime('%Y%m%d%H%M%S')
        crucible_job_id = '%s.%s' % (str(jobid_datetimestamp), str(user_id))
    except:
        logger.error(traceback.format_exc())
        logger.error('error :: failed to determine a crucible_job_id')
        raise  # to webapp to return in the UI

    # Generate a job id based on the YMDHMS.user_id and a job directory
    try:
        crucible_path = os.path.dirname(settings.CRUCIBLE_DATA_FOLDER)
        crucible_job_dir = '%s/jobs/%s' % (crucible_path, crucible_job_id)
        if not path.exists(crucible_job_dir):
            logger.info('creating crucible job directory - %s' %
                        (str(crucible_job_dir)))
            mkdir_p(crucible_job_dir)
    except:
        trace = traceback.format_exc()
        fail_msg = 'error :: failed to create the crucible job directory'
        logger.error(trace)
        logger.error(fail_msg)
        raise  # to webapp to return in the UI

    # TODO added checks of metric names
    metric_names = []
    if metrics_list:
        logger.info('submit_crucible_job :: %s metrics passed' %
                    str(len(metrics_list)))
        for metric in metrics_list:
            metric_names.append(metric)

    # TODO added checks of metric namespaces, harder to do, but so that the UI
    # errors to the usr rather than sending a bad or non-existent metric to
    # Crucible
    if namespaces_list:
        logger.info('submit_crucible_job :: %s namespaces passed' %
                    str(len(namespaces_list)))
        logger.info(
            'submit_crucible_job :: determine metrics for submit_crucible_job between %s and %s'
            % (str(from_timestamp), str(until_timestamp)))
        logger.info('getting MySQL engine')
        try:
            engine, fail_msg, trace = get_an_engine()
            logger.info(fail_msg)
        except:
            trace = traceback.format_exc()
            logger.error(trace)
            logger.error('%s' % fail_msg)
            logger.error(
                'error :: could not get a MySQL engine to get metric names')
            raise  # to webapp to return in the UI

        if not engine:
            trace = 'none'
            fail_msg = 'error :: engine not obtained'
            logger.error(fail_msg)
            raise

        try:
            metrics_table, log_msg, trace = metrics_table_meta(
                skyline_app, engine)
            logger.info(log_msg)
            logger.info('metrics_table OK')
        except:
            logger.error(traceback.format_exc())
            logger.error('error :: failed to get metrics_table meta')
            if engine:
                engine_disposal(engine)
            raise  # to webapp to return in the UI

        metrics_like_query = text(
            """SELECT metric FROM metrics WHERE metric LIKE :like_string""")
        for namespace in namespaces_list:
            try:
                connection = engine.connect()
                results = connection.execute(metrics_like_query,
                                             like_string=str(namespace))
                connection.close()
                for row in results:
                    metric_name = str(row[0])
                    metric_names.append(metric_name)
            except:
                trace = traceback.format_exc()
                logger.error(trace)
                logger.error(
                    'error :: could not determine metrics from metrics table')
                if engine:
                    engine_disposal(engine)
                raise
        logger.info(
            'submit_crucible_job :: %s metrics determined from passed namespaces'
            % str(len(metric_names)))

    logger.info('submit_crucible_job :: %s metrics to process' %
                str(len(metric_names)))
    metrics_submitted_to_process = []
    datapoint = 0
    triggered_algorithms = [
        'histogram_bins', 'first_hour_average', 'stddev_from_average',
        'grubbs', 'ks_test', 'mean_subtraction_cumulation',
        'median_absolute_deviation', 'stddev_from_moving_average',
        'least_squares'
    ]
    added_at = int(time())
    for base_name in metric_names:
        sane_metricname = filesafe_metricname(str(base_name))
        derivative_metric = is_derivative_metric(skyline_app, base_name)
        if derivative_metric:
            target = 'nonNegativeDerivative(%s)' % base_name
        else:
            target = base_name
        # Generate a metric job directory
        crucible_anomaly_dir = '%s/%s' % (crucible_job_dir, sane_metricname)
        try:
            if not path.exists(crucible_anomaly_dir):
                logger.info('creating crucible metric job directory - %s' %
                            (str(crucible_anomaly_dir)))
                mkdir_p(crucible_anomaly_dir)
        except:
            trace = traceback.format_exc()
            fail_msg = 'error :: failed to create the crucible metric job directory'
            logger.error(trace)
            logger.error(fail_msg)
            raise  # to webapp to return in the UI
        if source == 'graphite':
            graphite_metric = True
        else:
            graphite_metric = False

        # @added 20200422 - Feature #3500: webapp - crucible_process_metrics
        #                   Feature #1448: Crucible web UI
        # In order for metrics to be analysed in Crucible like the Analyzer or
        # Mirage analysis, the time series data needs to be padded
        # Added pad_timeseries
        graphite_override_uri_parameters = 'from=%s&until=%s&target=%s' % (
            str(from_timestamp), str(until_timestamp), target)
        timeseries_full_duration = int(until_timestamp) - int(from_timestamp)
        pad_timeseries_with = 0
        if pad_timeseries == 'auto':
            if timeseries_full_duration > 3600:
                pad_timeseries_with = 3600
            if timeseries_full_duration > 86400:
                pad_timeseries_with = 86400
        if pad_timeseries == '86400':
            pad_timeseries_with = 86400
        if pad_timeseries == '604800':
            pad_timeseries_with = 604800
        if pad_timeseries == '0':
            pad_timeseries_with = 0
        if pad_timeseries_with:
            try:
                padded_from_timestamp = int(
                    from_timestamp) - pad_timeseries_with
                graphite_override_uri_parameters = 'from=%s&until=%s&target=%s' % (
                    str(padded_from_timestamp), str(until_timestamp), target)
                logger.info('padding time series with %s seconds - %s' %
                            (str(pad_timeseries_with),
                             str(graphite_override_uri_parameters)))
            except:
                logger.error(traceback.format_exc())
                logger.error(
                    'error :: failed to construct graphite_override_uri_parameters with pad_timeseries_with %s'
                    % str(pad_timeseries_with))

        # @added 20200817 - Feature #3682: SNAB - webapp - crucible_process - run_algorithms
        # Allow the user to pass algorithms to run
        algorithms = settings.ALGORITHMS
        if run_algorithms:
            algorithms = run_algorithms

        # @modified 20200421 - Feature #3500: webapp - crucible_process_metrics
        #                      Feature #1448: Crucible web UI
        # Added add_to_panorama
        # @added 20200607 - Feature #3630: webapp - crucible_process_training_data
        # Added training_data_json
        crucible_anomaly_data = 'metric = \'%s\'\n' \
                                'value = \'%s\'\n' \
                                'from_timestamp = \'%s\'\n' \
                                'metric_timestamp = \'%s\'\n' \
                                'algorithms = %s\n' \
                                'triggered_algorithms = %s\n' \
                                'anomaly_dir = \'%s\'\n' \
                                'graphite_metric = %s\n' \
                                'run_crucible_tests = True\n' \
                                'added_by = \'%s\'\n' \
                                'added_at = \'%s\'\n' \
                                'graphite_override_uri_parameters = \'%s\'\n' \
                                'alert_interval = \'%s\'\n' \
                                'add_to_panorama = %s\n' \
                                'training_data_json = %s\n' \
            % (base_name, str(datapoint), str(from_timestamp),
               # @modified 20200817 - Feature #3682: SNAB - webapp - crucible_process - run_algorithms
               # str(until_timestamp), str(settings.ALGORITHMS),
               str(until_timestamp), str(algorithms),
               triggered_algorithms, crucible_anomaly_dir, str(graphite_metric),
               skyline_app, str(added_at), str(graphite_override_uri_parameters),
               str(alert_interval), str(add_to_panorama), str(training_data_json))

        # Create an anomaly file with details about the anomaly
        crucible_anomaly_file = '%s/%s.txt' % (crucible_anomaly_dir,
                                               sane_metricname)
        try:
            write_data_to_file(skyline_app, crucible_anomaly_file, 'w',
                               crucible_anomaly_data)
            logger.info('added crucible anomaly file :: %s' %
                        (crucible_anomaly_file))
        except:
            logger.error(traceback.format_exc())
            logger.error('error :: failed to add crucible anomaly file :: %s' %
                         (crucible_anomaly_file))
        # Create a crucible check file
        crucible_check_file = '%s/%s.%s.txt' % (settings.CRUCIBLE_CHECK_PATH,
                                                str(added_at), sane_metricname)
        try:
            write_data_to_file(skyline_app, crucible_check_file, 'w',
                               crucible_anomaly_data)
            logger.info('added crucible check :: %s,%s' %
                        (base_name, str(added_at)))
            metrics_submitted_to_process.append(base_name)
        except:
            logger.error(traceback.format_exc())
            logger.error('error :: failed to add crucible check file :: %s' %
                         (crucible_check_file))

    return (crucible_job_id, metrics_submitted_to_process, fail_msg, trace)
Ejemplo n.º 7
0
def get_anomalous_ts(base_name, anomaly_timestamp):

    logger = logging.getLogger(skyline_app_logger)

    # @added 20180423 - Feature #2360: CORRELATE_ALERTS_ONLY
    #                   Branch #2270: luminosity
    # Only correlate metrics with an alert setting
    if correlate_alerts_only:
        try:
            smtp_alerter_metrics = list(
                redis_conn.smembers('analyzer.smtp_alerter_metrics'))
        except:
            smtp_alerter_metrics = []
        if base_name not in smtp_alerter_metrics:
            logger.error('%s has no alerter setting, not correlating' %
                         base_name)
            return []

    if not base_name or not anomaly_timestamp:
        return []

    # from skyline_functions import nonNegativeDerivative
    anomalous_metric = '%s%s' % (settings.FULL_NAMESPACE, base_name)
    unique_metrics = []
    try:
        unique_metrics = list(
            redis_conn.smembers(settings.FULL_NAMESPACE + 'unique_metrics'))
    except:
        logger.error(traceback.format_exc())
        logger.error('error :: get_assigned_metrics :: no unique_metrics')
        return []
    # @added 20180720 - Feature #2464: luminosity_remote_data
    # Ensure that Luminosity only processes it's own Redis metrics so that if
    # multiple Skyline instances are running, Luminosity does not process an
    # anomaly_id for a metric that is not local to itself.  This will stop the
    # call to the remote Redis with other_redis_conn below.  With the
    # introduction of the preprocessing luminosity_remote_data API endpoint for
    # remote Skyline instances, there is no further requirement for Skyline
    # instances to have direct access to Redis on another Skyline instance.
    # A much better solution and means all data is preprocessed and encrypted,
    # there is no need for iptables other than 443 (or custom https port).
    #
    if anomalous_metric in unique_metrics:
        logger.info(
            '%s is a metric in Redis, processing on this Skyline instance' %
            base_name)
    else:
        logger.info(
            '%s is not a metric in Redis, not processing on this Skyline instance'
            % base_name)
        return []

    assigned_metrics = [anomalous_metric]
    # @modified 20180419 -
    raw_assigned = []
    try:
        raw_assigned = redis_conn.mget(assigned_metrics)
    except:
        raw_assigned = []
    if raw_assigned == [None]:
        logger.info('%s data not retrieved from local Redis' %
                    (str(base_name)))
        raw_assigned = []

    # @modified 20180721 - Feature #2464: luminosity_remote_data
    # TO BE DEPRECATED settings.OTHER_SKYLINE_REDIS_INSTANCES
    # with the addition of the luminosity_remote_data API call and the above
    if not raw_assigned and settings.OTHER_SKYLINE_REDIS_INSTANCES:
        # @modified 20180519 - Feature #2378: Add redis auth to Skyline and rebrow
        # for redis_ip, redis_port in settings.OTHER_SKYLINE_REDIS_INSTANCES:
        for redis_ip, redis_port, redis_password in settings.OTHER_SKYLINE_REDIS_INSTANCES:
            if not raw_assigned:
                try:
                    if redis_password:
                        other_redis_conn = StrictRedis(
                            host=str(redis_ip),
                            port=int(redis_port),
                            password=str(redis_password))
                    else:
                        other_redis_conn = StrictRedis(host=str(redis_ip),
                                                       port=int(redis_port))
                    raw_assigned = other_redis_conn.mget(assigned_metrics)
                    if raw_assigned == [None]:
                        logger.info(
                            '%s data not retrieved from Redis at %s on port %s'
                            % (str(base_name), str(redis_ip), str(redis_port)))
                        raw_assigned = []
                    if raw_assigned:
                        logger.info(
                            '%s data retrieved from Redis at %s on port %s' %
                            (str(base_name), str(redis_ip), str(redis_port)))
                except:
                    logger.error(traceback.format_exc())
                    logger.error(
                        'error :: failed to connect to Redis at %s on port %s'
                        % (str(redis_ip), str(redis_port)))
                    raw_assigned = []

    if not raw_assigned or raw_assigned == [None]:
        logger.info('%s data not retrieved' % (str(base_name)))
        return []

    for i, metric_name in enumerate(assigned_metrics):
        try:
            raw_series = raw_assigned[i]
            unpacker = Unpacker(use_list=False)
            unpacker.feed(raw_series)
            timeseries = list(unpacker)
        except:
            timeseries = []

    # Convert the time series if this is a known_derivative_metric
    known_derivative_metric = is_derivative_metric(skyline_app, base_name)
    if known_derivative_metric:
        derivative_timeseries = nonNegativeDerivative(timeseries)
        timeseries = derivative_timeseries

    # Sample the time series
    # @modified 20180720 - Feature #2464: luminosity_remote_data
    # Added note here - if you modify the value of 600 here, it must be
    # modified in the luminosity_remote_data function in
    # skyline/webapp/backend.py as well
    from_timestamp = anomaly_timestamp - 600
    anomaly_ts = []
    for ts, value in timeseries:
        if int(ts) < from_timestamp:
            continue
        if int(ts) <= anomaly_timestamp:
            anomaly_ts.append((int(ts), value))
        if int(ts) > anomaly_timestamp:
            break
    return anomaly_ts
Ejemplo n.º 8
0
def get_anomalous_ts(base_name, anomaly_timestamp):

    logger = logging.getLogger(skyline_app_logger)

    # @added 20180423 - Feature #2360: CORRELATE_ALERTS_ONLY
    #                   Branch #2270: luminosity
    # Only correlate metrics with an alert setting
    if correlate_alerts_only:
        try:
            smtp_alerter_metrics = list(
                redis_conn.smembers('analyzer.smtp_alerter_metrics'))
        except:
            smtp_alerter_metrics = []
        if base_name not in smtp_alerter_metrics:
            logger.error('%s has no alerter setting, not correlating' %
                         base_name)
            return False

    if not base_name or not anomaly_timestamp:
        return False

    # from skyline_functions import nonNegativeDerivative
    anomalous_metric = '%s%s' % (settings.FULL_NAMESPACE, base_name)
    assigned_metrics = [anomalous_metric]
    # @modified 20180419 -
    raw_assigned = []
    try:
        raw_assigned = redis_conn.mget(assigned_metrics)
    except:
        raw_assigned = []
    if raw_assigned == [None]:
        logger.info('%s data not retrieved from local Redis' %
                    (str(base_name)))
        raw_assigned = []

    if not raw_assigned and settings.OTHER_SKYLINE_REDIS_INSTANCES:
        # @modified 20180519 - Feature #2378: Add redis auth to Skyline and rebrow
        # for redis_ip, redis_port in settings.OTHER_SKYLINE_REDIS_INSTANCES:
        for redis_ip, redis_port, redis_password in settings.OTHER_SKYLINE_REDIS_INSTANCES:
            if not raw_assigned:
                try:
                    if redis_password:
                        other_redis_conn = StrictRedis(
                            host=str(redis_ip),
                            port=int(redis_port),
                            password=str(redis_password))
                    else:
                        other_redis_conn = StrictRedis(host=str(redis_ip),
                                                       port=int(redis_port))
                    raw_assigned = other_redis_conn.mget(assigned_metrics)
                    if raw_assigned == [None]:
                        logger.info(
                            '%s data not retrieved from Redis at %s on port %s'
                            % (str(base_name), str(redis_ip), str(redis_port)))
                        raw_assigned = []
                    if raw_assigned:
                        logger.info(
                            '%s data retrieved from Redis at %s on port %s' %
                            (str(base_name), str(redis_ip), str(redis_port)))
                except:
                    logger.error(traceback.format_exc())
                    logger.error(
                        'error :: failed to connect to Redis at %s on port %s'
                        % (str(redis_ip), str(redis_port)))
                    raw_assigned = []

    if not raw_assigned or raw_assigned == [None]:
        logger.info('%s data not retrieved' % (str(base_name)))
        return False

    for i, metric_name in enumerate(assigned_metrics):
        try:
            raw_series = raw_assigned[i]
            unpacker = Unpacker(use_list=False)
            unpacker.feed(raw_series)
            timeseries = list(unpacker)
        except:
            timeseries = []

    # Convert the time series if this is a known_derivative_metric
    known_derivative_metric = is_derivative_metric(skyline_app, base_name)
    if known_derivative_metric:
        derivative_timeseries = nonNegativeDerivative(timeseries)
        timeseries = derivative_timeseries

    # Sample the time series
    from_timestamp = anomaly_timestamp - 600
    anomaly_ts = []
    for ts, value in timeseries:
        if int(ts) < from_timestamp:
            continue
        if int(ts) <= anomaly_timestamp:
            anomaly_ts.append((int(ts), value))
        if int(ts) > anomaly_timestamp:
            break
    return anomaly_ts
Ejemplo n.º 9
0
def get_correlations(base_name, anomaly_timestamp, anomalous_ts,
                     assigned_metrics, raw_assigned, anomalies):

    logger = logging.getLogger(skyline_app_logger)

    # Distill timeseries strings into lists
    start = timer()
    count = 0
    # Sample the time series
    from_timestamp = anomaly_timestamp - 600
    correlated_metrics = []
    correlations = []
    no_data = False
    if not anomalous_ts:
        no_data = True
    if not assigned_metrics:
        no_data = True
    if not raw_assigned:
        no_data = True
    if not anomalies:
        no_data = True
    if no_data:
        logger.error('error :: get_correlations :: no data')
        return (correlated_metrics, correlations)

    for i, metric_name in enumerate(assigned_metrics):
        count += 1
        # print(metric_name)
        if count > 1000:
            break
        correlated = None
        metric_base_name = metric_name.replace(settings.FULL_NAMESPACE, '', 1)
        if str(metric_base_name) == str(base_name):
            continue
        try:
            raw_series = raw_assigned[i]
            unpacker = Unpacker(use_list=False)
            unpacker.feed(raw_series)
            timeseries = list(unpacker)
        except:
            timeseries = []
        if not timeseries:
            # print('no time series data for %s' % base_name)
            continue

        # Convert the time series if this is a known_derivative_metric
        known_derivative_metric = is_derivative_metric(skyline_app,
                                                       metric_base_name)
        if known_derivative_metric:
            try:
                derivative_timeseries = nonNegativeDerivative(timeseries)
                timeseries = derivative_timeseries
            except:
                logger.error(traceback.format_exc())
                logger.error('error :: nonNegativeDerivative')

        correlate_ts = []
        for ts, value in timeseries:
            if int(ts) < from_timestamp:
                continue
            if int(ts) <= anomaly_timestamp:
                correlate_ts.append((int(ts), value))
            if int(ts) > (anomaly_timestamp + 61):
                break
        if not correlate_ts:
            continue

        anomaly_ts_dict = dict(anomalous_ts)
        correlate_ts_dict = dict(correlate_ts)

        for a in anomalies:
            try:
                if int(a.exact_timestamp) < int(anomaly_timestamp - 120):
                    continue
                if int(a.exact_timestamp) > int(anomaly_timestamp + 120):
                    continue
            except:
                continue
            try:
                time_period = (int(anomaly_timestamp - 120),
                               int(anomaly_timestamp + 120))
                my_correlator = Correlator(anomaly_ts_dict, correlate_ts_dict,
                                           time_period)
                # For better correlation use 0.9 instead of 0.8 for the threshold
                # @modified 20180524 - Feature #2360: CORRELATE_ALERTS_ONLY
                #                      Branch #2270: luminosity
                #                      Feature #2378: Add redis auth to Skyline and rebrow
                # Added this to setting.py
                # if my_correlator.is_correlated(threshold=0.9):
                try:
                    cross_correlation_threshold = settings.LUMINOL_CROSS_CORRELATION_THRESHOLD
                except:
                    cross_correlation_threshold = 0.9
                if my_correlator.is_correlated(
                        threshold=cross_correlation_threshold):
                    correlation = my_correlator.get_correlation_result()
                    correlated = True
                    correlations.append([
                        metric_base_name, correlation.coefficient,
                        correlation.shift, correlation.shifted_coefficient
                    ])
            except:
                pass
        if correlated:
            correlated_metrics.append(metric_base_name)

    end = timer()
    logger.info(
        'correlated %s metrics to %s anomaly, processed in %.6f seconds' %
        (str(len(correlated_metrics)), base_name, (end - start)))
    return (correlated_metrics, correlations)