Ejemplo n.º 1
0
class MsgpackProtocol(asyncio.Protocol):

    def __init__(self, routes):
        self.__routes = routes
        self.packer = Unpacker()

    def connection_made(self, transport):
        peername = transport.get_extra_info('peername')
        print('Connection from {}'.format(peername))
        self.transport = transport
        self.transport.write(packb([2, 'peername', peername]))

    def data_received(self, data):
        self.packer.feed(data)
        for msg in self.packer:
            assert_request(msg)
            self.routing(msg)

    def routing(self, cmd):
        assert cmd[2] in self.__routes
        t = asyncio.ensure_future(response(cmd[1], self.transport,
                                           self.__routes[cmd[2]], cmd[3]))

    def eof_received(self):
        return True
Ejemplo n.º 2
0
def test_foobar():
    unpacker = Unpacker(read_size=3)
    unpacker.feed("foobar")
    assert unpacker.unpack() == ord(b"f")
    assert unpacker.unpack() == ord(b"o")
    assert unpacker.unpack() == ord(b"o")
    assert unpacker.unpack() == ord(b"b")
    assert unpacker.unpack() == ord(b"a")
    assert unpacker.unpack() == ord(b"r")
    try:
        o = unpacker.unpack()
        print "Oops!", o
        assert 0
    except StopIteration:
        assert 1
    else:
        assert 0
    unpacker.feed(b"foo")
    unpacker.feed(b"bar")

    k = 0
    for o, e in zip(unpacker, b"foobarbaz"):
        assert o == ord(e)
        k += 1
    assert k == len(b"foobar")
Ejemplo n.º 3
0
class SReader():
    """ Define an asyncio msgpack stream decoder. """

    def __init__(self, reader, writer):
        """ Pass ina  stream reader to unmarshall msgpack objects from. """
        self.reader = reader
        self.writer = writer
        self.decoder = make_decoder()
        self.unpacker = Unpacker(ext_hook=self.decoder, encoding="utf8")
        self.obj_buf = []


    @asyncio.coroutine
    def get(self):
        """ The co-routine providing objects. """

        while len(self.obj_buf) == 0:
            buf = yield from self.reader.read(1000)

            self.unpacker.feed(buf)
            for o in self.unpacker:
                self.obj_buf.append(o)

        return self.obj_buf.pop(0)


    def put(self, obj):
        """ Write an object to the channel. """
        self.writer.write(encode(obj))
def test_foobar():
    unpacker = Unpacker(read_size=3)
    unpacker.feed('foobar')
    assert_equal(unpacker.unpack(), ord('f'))
    assert_equal(unpacker.unpack(), ord('o'))
    assert_equal(unpacker.unpack(), ord('o'))
    assert_equal(unpacker.unpack(), ord('b'))
    assert_equal(unpacker.unpack(), ord('a'))
    assert_equal(unpacker.unpack(), ord('r'))
    try:
        o = unpacker.unpack()
        print "Oops!", o
        assert 0
    except StopIteration:
        assert 1
    else:
        assert 0
    unpacker.feed('foo')
    unpacker.feed('bar')

    k = 0
    for o, e in zip(unpacker, 'foobarbaz'):
        assert o == ord(e)
        k += 1
    assert k == len('foobar')
Ejemplo n.º 5
0
def test3():
    start = 0
    end = 10

    metric = "marion.channel-0"

    raw_series = REDIS_CONN.get(settings.FULL_NAMESPACE + metric)
    if not raw_series:
        resp = json.dumps({'results': 'Error: No metric by that name'})
        return resp, 404
    else:
        unpacker = Unpacker(use_list = False)
        unpacker.feed(raw_series)
        timeseries = []

        point = {'x':datapoint[0],'y':datapoint[1]}

        if (start is None) and (end is not None):
            for datapoint in unpacker:
                if datapoint[0] < int(end):
                    timeseries.append(point)
        elif (start is not None) and (end is None):
            for datapoint in unpacker:
                if datapoint[0] > int(start):
                    timeseries.append(point)
        elif (start is not None) and (end is not None):
            for datapoint in unpacker:
                if (datapoint[0] > int(start)) and (datapoint[0] < int(end)):
                    timeseries.append(point)
        elif (start is None) and (end is None):
            timeseries = [{'x':datapoint[0],'y':datapoint[1]} for datapoint in unpacker]

        resp = json.dumps({'results': timeseries})
        return resp, 200
Ejemplo n.º 6
0
def anomalies():
    resp = 'handle_data([])'
    try:
        analyzer_key_node = REDIS_BACKENDS.get_node(settings.ANALYZER_ANOMALY_KEY)
        anomaly_keys = RING.run('smembers', settings.ANALYZER_ANOMALY_KEY)
        anomalies = {}
        if not anomaly_keys:
            logger.info("No anomaly key found!")
            return resp, 200
        for key in list(anomaly_keys):
            raw_anomalies = RING.run('get',key)
            if not raw_anomalies:
                logger.info("Can't get anomalies for key %s, removing it from set" % key)
                RING.run('srem', settings.ANALYZER_ANOMALY_KEY, key)
                continue
            unpacker = Unpacker(use_list = False)
            unpacker.feed(raw_anomalies)
            for item in unpacker:
                anomalies.update(item)
        anomaly_list = []
        for anom, value in anomalies.iteritems():
                anomaly_list.append([value, anom])
        if len(anomaly_list) > 0:
            anomaly_list.sort(key=operator.itemgetter(1))
            resp = 'handle_data(%s)' % anomaly_list
    except Exception as e:
        logger.error("Error getting anomalies: %s" % str(e))
    return resp, 200
Ejemplo n.º 7
0
def test_foobar():
    unpacker = Unpacker(read_size=3)
    unpacker.feed(b'foobar')
    assert unpacker.unpack() == ord(b'f')
    assert unpacker.unpack() == ord(b'o')
    assert unpacker.unpack() == ord(b'o')
    assert unpacker.unpack() == ord(b'b')
    assert unpacker.unpack() == ord(b'a')
    assert unpacker.unpack() == ord(b'r')
    try:
        o = unpacker.unpack()
        print(("Oops!", o))
        assert 0
    except StopIteration:
        assert 1
    else:
        assert 0
    unpacker.feed(b'foo')
    unpacker.feed(b'bar')

    k = 0
    for o, e in zip(unpacker, b'foobarbaz'):
        assert o == e
        k += 1
    assert k == len(b'foobar')
Ejemplo n.º 8
0
class ClientProtocol(asyncio.Protocol):

    def __init__(self):
        self._cpt = -1
        self.packer = Unpacker()
        self._responses = dict()

    def connection_made(self, transport):
        print("connected")
        self.transport = transport

    def request(self, name, args, f):
        print("send request")
        self._cpt += 1
        self._responses[self._cpt] = f
        self.transport.write(packb([0, self._cpt, name, args]))


    def data_received(self, data):
        self.packer.feed(data)
        for msg in self.packer:
            if msg[0] == 1:
                self._responses[msg[1]].set_result(msg)

    def connection_lost(self, exc):
        pass
Ejemplo n.º 9
0
def data():
    metric = request.args.get('metric', None)
    start = request.args.get('start', None)
    end = request.args.get('end', None)

    if metric is None:
        metrics = ['channel-0', 'channel-1', 'channel-2', 'channel-3', 'channel-4', 'channel-5', 'channel-6', 'channel-7']
    else:
        metrics = [metric]

    try:
        all_channels_data = []
        for metric in metrics:

            single_channel_data = {}

            raw_series = REDIS_CONN.get(settings.FULL_NAMESPACE + metric)
            if not raw_series:
                resp = json.dumps({'results': 'Error: No metric by that name'})
                return resp, 404
            else:
                unpacker = Unpacker(use_list = False)
                unpacker.feed(raw_series)
                timeseries = []

                if (start is None) and (end is not None):
                    for datapoint in unpacker:
                        if datapoint[0] < int(end):
                            point = {'x' : datapoint[0], 'y':datapoint[1]}
                            timeseries.append(point)
                elif (start is not None) and (end is None):
                    for datapoint in unpacker:
                        if datapoint[0] > int(start):
                            point = {'x' : datapoint[0], 'y':datapoint[1]}
                            timeseries.append(point)
                elif (start is not None) and (end is not None):
                    for datapoint in unpacker:
                        if (datapoint[0] > int(start)) and (datapoint[0] < int(end)):
                            point = {'x' : datapoint[0], 'y':datapoint[1]}
                            timeseries.append(point)
                elif (start is None) and (end is None):
                    timeseries = [{'x' : datapoint[0], 'y':datapoint[1]} for datapoint in unpacker]

                single_channel_data['key'] = metric
                single_channel_data['values'] = timeseries
                all_channels_data.append(single_channel_data)

        resp = json.dumps({'results': all_channels_data})
        return resp, 200

    except Exception as e:
        error = "Error: " + e
        resp = json.dumps({'results': error})
        return resp, 500

    except Exception as e:
        error = "Error: " + e
        resp = json.dumps({'results': error})
        return resp, 500
Ejemplo n.º 10
0
def mpdecode(iterable):
    unpacker = Unpacker(encoding='utf8')
    for chunk in iterable:
        unpacker.feed(chunk)
        # Each chunk can have none or many objects,
        # so here we dispatch any object ready
        for obj in unpacker:
            yield obj
Ejemplo n.º 11
0
def test_incorrect_type_nested_map():
    unpacker = Unpacker()
    unpacker.feed(packb([{"a": "b"}]))
    try:
        unpacker.read_map_header()
        assert 0, "should raise exception"
    except UnexpectedTypeException:
        assert 1, "okay"
Ejemplo n.º 12
0
def test_correct_type_nested_array():
    unpacker = Unpacker()
    unpacker.feed(packb({"a": ["b", "c", "d"]}))
    try:
        unpacker.read_array_header()
        assert 0, "should raise exception"
    except UnexpectedTypeException:
        assert 1, "okay"
Ejemplo n.º 13
0
def test_correct_type_nested_array():
    unpacker = Unpacker()
    unpacker.feed(packb({'a': ['b', 'c', 'd']}))
    try:
        unpacker.read_array_header()
        assert 0, 'should raise exception'
    except UnexpectedTypeException:
        assert 1, 'okay'
Ejemplo n.º 14
0
def test_incorrect_type_nested_map():
    unpacker = Unpacker()
    unpacker.feed(packb([{'a': 'b'}]))
    try:
        unpacker.read_map_header()
        assert 0, 'should raise exception'
    except UnexpectedTypeException:
        assert 1, 'okay'
Ejemplo n.º 15
0
def test_incorrect_type_array():
    unpacker = Unpacker()
    unpacker.feed(packb(1))
    try:
        unpacker.read_array_header()
        assert 0, 'should raise exception'
    except UnexpectedTypeException:
        assert 1, 'okay'
Ejemplo n.º 16
0
    def setUp(self):
        address = 0xfa1afe1
        device = "LivewareProblem"

        raw_packet = encode_erase_flash_page(address, device)

        unpacker = Unpacker()
        unpacker.feed(raw_packet)
        self.command = list(unpacker)[1:]
Ejemplo n.º 17
0
def unpack_gen(file, size):
   u = Unpacker()
   while True:
      data = file.read(size)
      if not data:
         break
      u.feed(data)
      for o in u:
         yield o
Ejemplo n.º 18
0
def test_auto_max_array_len():
    packed = b'\xde\x00\x06zz'
    with pytest.raises(UnpackValueError):
        unpackb(packed, raw=False)

    unpacker = Unpacker(max_buffer_size=5, raw=False)
    unpacker.feed(packed)
    with pytest.raises(UnpackValueError):
        unpacker.unpack()
Ejemplo n.º 19
0
def test_auto_max_map_len():
    # len(packed) == 6 -> max_map_len == 3
    packed = b'\xde\x00\x04zzz'
    with pytest.raises(UnpackValueError):
        unpackb(packed, raw=False)

    unpacker = Unpacker(max_buffer_size=6, raw=False)
    unpacker.feed(packed)
    with pytest.raises(UnpackValueError):
        unpacker.unpack()
Ejemplo n.º 20
0
 def mpack_handler(self, data, sock):
     unpacker = Unpacker()
     unpacker.feed(data)
     while 1:
         for msg in unpacker:
             self.on_message(msg)
         next = sock.recv(1000000)
         if not next:
             break
         unpacker.feed(next)
Ejemplo n.º 21
0
def test_foobar_skip():
    unpacker = Unpacker(read_size=3, use_list=1)
    unpacker.feed(b'foobar')
    assert unpacker.unpack() == ord(b'f')
    unpacker.skip()
    assert unpacker.unpack() == ord(b'o')
    unpacker.skip()
    assert unpacker.unpack() == ord(b'a')
    unpacker.skip()
    with raises(OutOfData):
        unpacker.unpack()
Ejemplo n.º 22
0
def test_read_map_header():
    unpacker = Unpacker()
    unpacker.feed(packb({'a': 'A'}))
    assert unpacker.read_map_header() == 1
    assert unpacker.unpack() == B'a'
    assert unpacker.unpack() == B'A'
    try:
        unpacker.unpack()
        assert 0, 'should raise exception'
    except OutOfData:
        assert 1, 'okay'
Ejemplo n.º 23
0
class MsgpackStream(object):

    """Two-way msgpack stream that wraps a event loop byte stream.

    This wraps the event loop interface for reading/writing bytes and
    exposes an interface for reading/writing msgpack documents.
    """

    def __init__(self, event_loop):
        """Wrap `event_loop` on a msgpack-aware interface."""
        self.loop = event_loop
        self._packer = Packer(encoding='utf-8',
                              unicode_errors=unicode_errors_default)
        self._unpacker = Unpacker()
        self._message_cb = None

    def threadsafe_call(self, fn):
        """Wrapper around `BaseEventLoop.threadsafe_call`."""
        self.loop.threadsafe_call(fn)

    def send(self, msg):
        """Queue `msg` for sending to Nvim."""
        debug('sent %s', msg)
        self.loop.send(self._packer.pack(msg))

    def run(self, message_cb):
        """Run the event loop to receive messages from Nvim.

        While the event loop is running, `message_cb` will be called whenever
        a message has been successfully parsed from the input stream.
        """
        self._message_cb = message_cb
        self.loop.run(self._on_data)
        self._message_cb = None

    def stop(self):
        """Stop the event loop."""
        self.loop.stop()

    def close(self):
        """Close the event loop."""
        self.loop.close()

    def _on_data(self, data):
        self._unpacker.feed(data)
        while True:
            try:
                debug('waiting for message...')
                msg = next(self._unpacker)
                debug('received message: %s', msg)
                self._message_cb(msg)
            except StopIteration:
                debug('unpacker needs more data...')
                break
Ejemplo n.º 24
0
    def setUp(self):
        address = 0xdeadbeef
        data = bytes(range(4))
        device = "dummy"

        raw_packet = encode_write_flash(data, address, device)

        unpacker = Unpacker()
        unpacker.feed(raw_packet)
        # Discards command set version
        self.command = list(unpacker)[1:]
Ejemplo n.º 25
0
    def test_has_correct_protocol_version(self):
        """
        Checks that the command encoding function works corectly.
        """
        raw_packet = encode_command(command_code=10)

        unpacker = Unpacker()
        unpacker.feed(raw_packet)

        version, *_ = list(unpacker)
        self.assertEqual(2, version)
Ejemplo n.º 26
0
def test_foobar_skip():
    unpacker = Unpacker(read_size=3, use_list=1)
    unpacker.feed(b"foobar")
    assert unpacker.unpack() == ord(b"f")
    unpacker.skip()
    assert unpacker.unpack() == ord(b"o")
    unpacker.skip()
    assert unpacker.unpack() == ord(b"a")
    unpacker.skip()
    with raises(OutOfData):
        unpacker.unpack()
Ejemplo n.º 27
0
def test_read_map_header():
    unpacker = Unpacker()
    unpacker.feed(packb({"a": "A"}))
    assert unpacker.read_map_header() == 1
    assert unpacker.unpack() == b"a"
    assert unpacker.unpack() == b"A"
    try:
        unpacker.unpack()
        assert 0, "should raise exception"
    except StopIteration:
        assert 1, "okay"
Ejemplo n.º 28
0
 def mpack_handler(self, data, sock):
     unpacker = Unpacker()
     unpacker.feed(data)
     # default chunk size of memory buffer is 32MB
     RECV_SIZE = 32*1024*1024
     while 1:
         for msg in unpacker:
             self.on_message(msg)
         next_data = sock.recv(RECV_SIZE)
         if not next_data:
             break
         unpacker.feed(next_data)
Ejemplo n.º 29
0
def test_max_ext_len():
    d = ExtType(42, b"abc")
    packed = packb(d)

    unpacker = Unpacker(max_ext_len=3)
    unpacker.feed(packed)
    assert unpacker.unpack() == d

    unpacker = Unpacker(max_ext_len=2)
    with pytest.raises(ValueError):
        unpacker.feed(packed)
        unpacker.unpack()
Ejemplo n.º 30
0
def test_read_array_header():
    unpacker = Unpacker()
    unpacker.feed(packb(['a', 'b', 'c']))
    assert unpacker.read_array_header() == 3
    assert unpacker.unpack() == b'a'
    assert unpacker.unpack() == b'b'
    assert unpacker.unpack() == b'c'
    try:
        unpacker.unpack()
        assert 0, 'should raise exception'
    except OutOfData:
        assert 1, 'okay'
Ejemplo n.º 31
0
def get_redis_metrics_timeseries(current_skyline_app, metrics, log=False):
    """
    Return a dict of metrics timeseries as lists e.g.
    {
        'base_name.1': [[ts, value], [ts, value], ..., [ts, value]],
        'base_name.2': [[ts, value], [ts, value], ..., [ts, value]]
    }

    :param current_skyline_app: the app calling the function
    :param metrics: a list of base_names or full Redis metric names
    :param log: whether to log or not, optional, defaults to False
    :type current_skyline_app: str
    :type metrics: list
    :type log: boolean
    :return: metrics_timeseries
    :rtype: dict

    """

    function_str = 'functions.redis.get_metrics_timeseries'
    if log:
        current_skyline_app_logger = current_skyline_app + 'Log'
        current_logger = logging.getLogger(current_skyline_app_logger)
    else:
        current_logger = None

    metrics_timeseries = {}
    try:
        redis_conn = get_redis_conn(current_skyline_app)
    except Exception as err:
        if not log:
            current_skyline_app_logger = current_skyline_app + 'Log'
            current_logger = logging.getLogger(current_skyline_app_logger)
        current_logger.error(
            'error :: %s :: %s :: get_redis_conn failed - %s' %
            (current_skyline_app, function_str, str(err)))

    try:
        redis_conn_decoded = get_redis_conn_decoded(current_skyline_app)
    except Exception as err:
        if not log:
            current_skyline_app_logger = current_skyline_app + 'Log'
            current_logger = logging.getLogger(current_skyline_app_logger)
        current_logger.error(
            'error :: %s :: %s :: get_redis_conn_decoded failed - %s' %
            (current_skyline_app, function_str, str(err)))

    assigned_metrics = []
    base_names = []
    for metric in metrics:
        if metric.startswith(FULL_NAMESPACE):
            metric_name = str(metric)
            base_name = metric.replace(FULL_NAMESPACE, '')
        else:
            metric_name = '%s%s' % (FULL_NAMESPACE, str(metric))
            base_name = str(metric)
        assigned_metrics.append(metric_name)
        base_names.append(base_name)
        metrics_timeseries[base_name] = {}

    derivative_metrics = []
    try:
        # @modified 20211012 - Feature #4280: aet.metrics_manager.derivative_metrics Redis hash
        # derivative_metrics = list(redis_conn_decoded.smembers('derivative_metrics'))
        derivative_metrics = list(
            redis_conn_decoded.smembers(
                'aet.metrics_manager.derivative_metrics'))
    except Exception as err:
        if not log:
            current_skyline_app_logger = current_skyline_app + 'Log'
            current_logger = logging.getLogger(current_skyline_app_logger)
        current_logger.error(traceback.format_exc())
        current_logger.error(
            'error :: %s :: %s :: failed to get derivative_metrics from Redis - %s'
            % (current_skyline_app, function_str, str(err)))

    raw_assigned = {}
    try:
        raw_assigned = redis_conn.mget(assigned_metrics)
    except Exception as err:
        if not log:
            current_skyline_app_logger = current_skyline_app + 'Log'
            current_logger = logging.getLogger(current_skyline_app_logger)
        current_logger.error(traceback.format_exc())
        current_logger.error(
            'error :: %s :: %s :: failed to get raw_assigned from Redis - %s' %
            (current_skyline_app, function_str, str(err)))

    if raw_assigned:
        for index, metric_name in enumerate(assigned_metrics):
            timeseries = []
            try:
                raw_series = raw_assigned[index]
                if raw_series:
                    unpacker = Unpacker(use_list=False)
                    unpacker.feed(raw_series)
                    timeseries = list(unpacker)
            except Exception as err:
                if not log:
                    current_skyline_app_logger = current_skyline_app + 'Log'
                    current_logger = logging.getLogger(
                        current_skyline_app_logger)
                current_logger.error(
                    'error :: %s :: %s :: failed to unpack %s timeseries - %s'
                    %
                    (current_skyline_app, function_str, metric_name, str(err)))
                timeseries = []
            if timeseries:
                # Convert Redis ts floats to ints
                timeseries = [[int(ts), value] for ts, value in timeseries]
            if timeseries:
                # To ensure that there are no unordered timestamps in the time
                # series which are artefacts of the collector or carbon-relay, sort
                # all time series by timestamp before analysis.
                original_timeseries = timeseries
                if original_timeseries:
                    timeseries = sort_timeseries(original_timeseries)
                    del original_timeseries
                if metric_name in derivative_metrics:
                    if len(timeseries) > 3:
                        try:
                            derivative_timeseries = nonNegativeDerivative(
                                timeseries)
                            timeseries = derivative_timeseries
                        except Exception as err:
                            if not log:
                                current_skyline_app_logger = current_skyline_app + 'Log'
                                current_logger = logging.getLogger(
                                    current_skyline_app_logger)
                            current_logger.error(traceback.format_exc())
                            current_logger.error(
                                'error :: %s :: %s :: nonNegativeDerivative failed on timeseries for %s - %s'
                                % (current_skyline_app, function_str,
                                   metric_name, str(err)))
            if timeseries:
                base_name = base_names[index]
                metrics_timeseries[base_name] = timeseries

    return metrics_timeseries
Ejemplo n.º 32
0
class IControlTask(DeviceOperationMixIn):
    st_id = -3  # Device status ID
    main_e_axis = 0  # E axis control
    cmd_index = 0  # Command counter
    cmd_queue = None  # Command store queue
    udp_sock = None  # UDP socket to send status
    handler = None  # Client TCP connection object
    known_position = None  # Is toolhead position is known or not
    mainboard = None  # Mainborad Controller
    toolhead = None  # Headboard Controller
    head_resp_stack = None  # Toolhead raw rasponse stack

    def __init__(self, stack, handler):
        super(IControlTask, self).__init__(stack, handler)
        self.handler = proxy(handler)
        self.handler.binary_mode = True
        self.cmd_queue = deque()
        self.meta = Metadata.instance()

        self._ready = 0

        def on_mainboard_ready(ctrl):
            self._ready |= 1
            self.mainboard.send_cmd("X8F")
            self.mainboard.send_cmd("T0")
            self.mainboard.send_cmd("G90")
            self.mainboard.send_cmd("G92E0")
            handler.send_text("ok")

        self.mainboard = MainController(
            self._sock_mb.fileno(),
            bufsize=14,
            empty_callback=self.on_mainboard_empty,
            sendable_callback=self.on_mainboard_sendable,
            ctrl_callback=self.on_mainboard_result)
        self.toolhead = HeadController(
            self._sock_th.fileno(),
            msg_callback=self.toolhead_message_callback)

        self.mainboard.bootstrap(on_mainboard_ready)
        self.unpacker = Unpacker()

    def on_toolhead_ready(self, ctrl):
        self._ready |= 2

    @property
    def buflen(self):
        return len(self.cmd_queue) + self.mainboard.buffered_cmd_size

    def on_mainboard_empty(self, caller):
        self.fire()

    def on_mainboard_sendable(self, caller):
        self.fire()

    def toolhead_message_callback(self, sender, data):
        if data and self.head_resp_stack is not None and \
                len(self.head_resp_stack) <= 32:
            self.head_resp_stack.append(data)
            self.send_udp1(sender)

    def on_binary(self, buf, handler):
        self.unpacker.feed(buf)
        for payload in self.unpacker:
            self.process_cmd(handler, *payload)

    def process_cmd(self, handler, index, cmd, *params):
        if index != self.cmd_index:
            logger.debug("Ignore %s 0x%02x %s", index, cmd, params)
            return

        fn = CMD_MATRIX.get(cmd)
        try:
            if cmd < 0xf0:
                fn(self, handler, *params)
            else:
                fn(self, handler, *params)

            self.cmd_index += 1

        except InternalError as e:
            self.handler.send(packb((0xff, self.cmd_index, e[1])))

        except Exception:
            logger.exception("Unknown error during processing command")
            self.handler.send(packb((0xff, self.cmd_index, MSG_UNKNOWN_ERROR)))
            self.on_require_kill(handler)

    def fire(self):
        if self.cmd_queue:
            target, cmd = self.cmd_queue[0]
            if target == TARGET_MAINBOARD:
                if self.mainboard.queue_full:
                    return
                else:
                    self.cmd_queue.popleft()
                    self.mainboard.send_cmd(cmd)
            elif target == TARGET_TOOLHEAD:
                if self.mainboard.buffered_cmd_size == 0:
                    if self.toolhead.sendable():
                        self.cmd_queue.popleft()
                        # TODO
                        self.toolhead.send_cmd(cmd, self)
                else:
                    return

    def on_mainboard_message(self, watcher, revent):
        try:
            self.mainboard.handle_recv()
        except IOError:
            logger.error("Mainboard connection broken")
            self.stack.exit_task(self)
            self.send_udp0()
        except Exception:
            logger.exception("Unhandle Error")

    def on_headboard_message(self, watcher, revent):
        try:
            self.toolhead.handle_recv()
            check_toolhead_errno(self.toolhead, self.th_error_flag)
            self.fire()

        except IOError:
            logger.error("Headboard connection broken")
            self.stack.exit_task(self)

        except (HeadResetError, HeadOfflineError, HeadTypeError):
            self._ready &= ~2

        except HeadError as e:
            logger.info("Head Error: %s", e)

        except Exception:
            logger.exception("Unhandle Error")

    def on_mainboard_result(self, controller, message):
        # Note: message will be...
        #   "DATA HOME 12.3 -23.2 122.3"
        if message.startswith("DATA HOME"):
            position = [float(val) for val in message[10:].split(" ")]
            if float("nan") in position:
                self.handler.send(packb((CMD_G028, 1, None)))
            else:
                self.handler.send(packb((CMD_G028, 0, position)))
                self.known_position = [0, 0, 240]

        #   "DATA READ X:0.124 Y:0.234 Z:0.534 F0:1 F1:0 MB:0"
        if message.startswith("DATA READ "):
            output = {}
            for key, val in ((p.split(":") for p in message[10:].split(" "))):
                if key in ("X", "Y", "Z"):
                    output[key] = float(val)
                elif key in ("F0", "F1"):
                    output[key] = (val == "1")
                elif key == "MB":
                    output[key] = (val == "1")
            self.handler.send(packb((CMD_VALU, output)))
        #   "DATA ZPROBE -0.5"
        if message.startswith("DATA ZPROBE "):
            self.handler.send(packb((CMD_G030, float(message[12:]))))

    def send_udp0(self):
        if self.udp_sock:
            try:
                buf = packb((0, "", self.cmd_index, self.buflen))
                self.udp_sock.send(buf)
            except socket.error:
                pass

    def send_udp1(self, toolhead):
        if self.udp_sock:
            try:
                if self.head_resp_stack is not None:
                    buf = packb((2, "", 0, len(self.head_resp_stack)))
                    self.udp_sock.send(buf)

                if toolhead.ready:
                    buf = packb(
                        (1, "", 0, toolhead.error_code, toolhead.status))
                    self.udp_sock.send(buf)

                # elif toolhead.ready_flag > 0:
                #     buf = packb((1, "", 0, -1, {}))
                #     self.udp_sock.send(buf)

                else:
                    buf = packb((1, "", 0, -2, {}))
                    self.udp_sock.send(buf)

            except socket.error:
                pass

    def send_udps(self, signal):
        if self.udp_sock:
            try:
                self.udp_sock.send(packb((signal, )))
            except socket.error:
                pass

    def on_timer(self, watcher, revent):
        self.meta.update_device_status(self.st_id, 0, "N/A",
                                       self.handler.address)

        self.send_udp0()
        if not self._ready & 2:
            self.send_udp1(self.toolhead)

        try:
            self.mainboard.patrol()
        except RuntimeError as e:
            logger.info("%s", e)

        except Exception:
            logger.exception("Mainboard dead")
            self.handler.send_text(packb((0xff, -1, 0xff, SUBSYSTEM_ERROR)))
            self.on_require_kill(self.handler)
            return

        try:
            self.toolhead.patrol()
        except (HeadOfflineError, HeadResetError) as e:
            logger.debug("Head Offline/Reset: %s", e)

        except RuntimeError as e:
            logger.info("%s", e)

        except socket.error:
            logger.warn("Socket IO Error")
            self.handler.close()

        except Exception:
            logger.exception("Toolhead dead")
            self.handler.send_text(packb((0xff, -1, 0xff, SUBSYSTEM_ERROR)))
            self.on_require_kill(self.handler)
            return

    def clean(self):
        self.mainboard.send_cmd("@HOME_BUTTON_TRIGGER\n")

        if self.toolhead:
            if self.toolhead.ready:
                self.toolhead.shutdown()
            self.toolhead = None

        if self.mainboard:
            self.mainboard.close()
            self.mainboard = None

        self.handler.binary_mode = False

    def append_cmd(self, target, cmd):
        self.cmd_queue.append((target, cmd))
        self.fire()

    def create_movement_command(self,
                                F=None,
                                X=None,
                                Y=None,
                                Z=None,
                                E0=None,
                                E1=None,
                                E2=None):  # noqa
        target = self.known_position
        yield "G1"

        if F:
            yield "F%i" % F

        if X is not None or Y is not None or Z is not None:
            if self.known_position:
                if X is not None:
                    target[0] = X
                    yield "X%.5f" % X
                if Y is not None:
                    target[1] = Y
                    yield "Y%.5f" % Y
                if Z is not None:
                    target[2] = Z
                    yield "Z%.5f" % Z

                if (target[0]**2 + target[1]**2) > 28900:
                    raise InternalError(CMD_G001, MSG_OPERATION_ERROR)
                elif target[2] > 240 or target[2] < 0:
                    raise InternalError(CMD_G001, MSG_OPERATION_ERROR)

            else:
                raise InternalError(CMD_G001, MSG_OPERATION_ERROR)

        eflag = False
        for i, e in ((0, E0), (1, E1), (2, E2)):
            if e is not None:
                if eflag:
                    raise InternalError(CMD_G001, MSG_OPERATION_ERROR)
                else:
                    eflag = True
                    if self.main_e_axis != i:
                        yield "T%i" % i
                        self.main_e_axis = i
                    yield "E%.5f" % e

        self.known_position = target

    def on_move(self, handler, kw):
        try:
            cmd = "".join(self.create_movement_command(**kw))
            self.append_cmd(TARGET_MAINBOARD, cmd)
        except TypeError:
            raise InternalError(CMD_G001, MSG_BAD_PARAMS)

    def on_sleep(self, handler, secondes):
        try:
            cmd = "G4S%.4f" % secondes
            self.append_cmd(TARGET_MAINBOARD, cmd)
        except TypeError:
            raise InternalError(CMD_G004, MSG_BAD_PARAMS)

    def on_scan_lasr(self, handler, flags):
        try:
            cmd = "X1E%i" % flags
            self.append_cmd(TARGET_MAINBOARD, cmd)
        except TypeError:
            raise InternalError(CMD_SLSR, MSG_BAD_PARAMS)

    def on_home(self, handler):
        self.append_cmd(TARGET_MAINBOARD, "X6")
        self.known_position = None

    def on_lock_motors(self, handler):
        self.append_cmd(TARGET_MAINBOARD, "M17")

    def on_release_motors(self, handler):
        self.append_cmd(TARGET_MAINBOARD, "M84")
        self.known_position = None

    def on_z_probe(self, handler, x, y):
        try:
            if self.known_position and x**2 + y**2 <= 7225:
                cmd = "G30X%.5fY%.5f" % (x, y)
                self.append_cmd(TARGET_MAINBOARD, cmd)

            else:
                raise InternalError(CMD_G030, MSG_OPERATION_ERROR)
        except TypeError:
            raise InternalError(CMD_G030, MSG_BAD_PARAMS)

    # def on_adjust(self, handler, kw):
    #     pass

    def on_set_toolhead_temperature(self, handler, index, temperature):
        if index == 0 and temperature >= 0 and temperature <= 220:
            cmd = "H%i%.1f" % (index, temperature)
            self.append_cmd(TARGET_TOOLHEAD, cmd)
        else:
            raise InternalError(CMD_M104, MSG_OPERATION_ERROR)

    def on_set_toolhead_fan_speed(self, handler, index, speed):
        if index == 0 and speed >= 0 and speed <= 1:
            cmd = "F%i%.3f" % (index, speed)
            self.append_cmd(TARGET_TOOLHEAD, cmd)
        else:
            raise InternalError(CMD_M106, MSG_OPERATION_ERROR)

    def on_set_toolhead_pwm(self, handler, pwm):
        if pwm >= 0 and pwm <= 1:
            cmd = "X2O" % (pwm * 255)
            self.append_cmd(TARGET_MAINBOARD, cmd)
        else:
            raise InternalError(CMD_HLSR, MSG_OPERATION_ERROR)

    def on_query_value(self, handler, flags):
        self.append_cmd(TARGET_MAINBOARD, "X87F%i" % flags)

    def on_toolhead_profile(self, handler):
        buf = packb((CMD_THPF, self.toolhead.info()))
        self.handler.send(buf)

    def on_toolhead_raw_command(self, handler, cmd):
        self.append_cmd(TARGET_TOOLHEAD, cmd)

    def on_toolhead_raw_response(self, handler):
        buf = packb((CMD_THRR, self.head_resp_stack))
        self.head_resp_stack = []
        self.handler.send(buf)

    def on_require_sync(self, handler, ipaddr, port, salt):
        endpoint = (ipaddr, port)
        logger.debug("Create sync udp endpoint at %s", repr(endpoint))
        try:
            s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
            s.connect(endpoint)
            s.send(packb((0xff, )))
        except (TypeError, OSError):
            raise InternalError(CMD_SYNC, MSG_OPERATION_ERROR)

        try:
            if self.udp_sock:
                self.udp_sock.close()
        finally:
            self.udp_sock = s

    def on_require_head(self, handler, head_type):
        self.toolhead = HeadController(
            self._sock_th.fileno(),
            required_module=head_type,
            msg_callback=self.toolhead_message_callback)

        self.head_resp_stack = [] if head_type == "USER" else None

    def on_bootstrap_toolhead(self, handler):
        self.toolhead.bootstrap(self.on_toolhead_ready)

    def on_clean_toolhead_error(self, handler):
        self.toolhead.errcode = 0

    def on_require_quit(self, handler):
        if self.buflen:
            raise InternalError(CMD_QUIT, MSG_OPERATION_ERROR)

        self.stack.exit_task(self)
        self.handler.send(packb((CMD_QUIT, 0)))

    def on_require_kill(self, handler):
        try:
            self.send_udps(0xfe)
            self.stack.exit_task(self)
        finally:
            from fluxmonitor.hal.tools import reset_mb
            reset_mb()
            self.handler.send(packb((CMD_QUIT, 0)))
Ejemplo n.º 33
0
def get_correlations(base_name, anomaly_timestamp, anomalous_ts,
                     assigned_metrics, raw_assigned, remote_assigned,
                     anomalies):

    logger = logging.getLogger(skyline_app_logger)

    # Distill timeseries strings into lists
    start = timer()
    count = 0
    metrics_checked_for_correlation = 0

    # @added 20201203 - Feature #3860: luminosity - handle low frequency data
    # Determine data resolution
    resolution = determine_resolution(anomalous_ts)

    # Sample the time series
    # @modified 20180720 - Feature #2464: luminosity_remote_data
    # Added note here - if you modify the value of 600 here, it must be
    # modified in the luminosity_remote_data function in
    # skyline/webapp/backend.py as well
    # @modified 20201203 - Feature #3860: luminosity - handle low frequency data
    # from_timestamp = anomaly_timestamp - 600
    from_timestamp = anomaly_timestamp - (resolution * 10)

    correlated_metrics = []
    correlations = []
    no_data = False
    if not anomalous_ts:
        no_data = True
    if not assigned_metrics:
        no_data = True
    if not raw_assigned:
        no_data = True
    if not anomalies:
        no_data = True
    if no_data:
        logger.error('error :: get_correlations :: no data')
        return (correlated_metrics, correlations)

    # @added 20200428 - Feature #3510: Enable Luminosity to handle correlating namespaces only
    #                   Feature #3500: webapp - crucible_process_metrics
    #                   Feature #1448: Crucible web UI
    # Discard the check if the anomaly_timestamp is not in FULL_DURATION as it
    # will have been added via the Crucible or webapp/crucible route
    start_timestamp_of_full_duration_data = int(time() -
                                                settings.FULL_DURATION)
    if anomaly_timestamp < (start_timestamp_of_full_duration_data + 2000):
        logger.info(
            'get_correlations :: the anomaly_timestamp is too old not correlating'
        )
        return (correlated_metrics, correlations)

    start_local_correlations = timer()

    local_redis_metrics_checked_count = 0
    local_redis_metrics_correlations_count = 0

    logger.info('get_correlations :: the local Redis metric count is %s' %
                str(len(assigned_metrics)))

    # @added 20200428 - Feature #3510: Enable Luminosity to handle correlating namespaces only
    # Removed here and handled in get_assigned_metrics

    for i, metric_name in enumerate(assigned_metrics):
        count += 1
        # print(metric_name)
        # @modified 20180719 - Branch #2270: luminosity
        # Removed test limiting that was errorneously left in
        # if count > 1000:
        #     break
        correlated = None
        # @modified 20200728 - Bug #3652: Handle multiple metrics in base_name conversion
        # metric_base_name = metric_name.replace(settings.FULL_NAMESPACE, '', 1)
        if metric_name.startswith(settings.FULL_NAMESPACE):
            metric_base_name = metric_name.replace(settings.FULL_NAMESPACE, '',
                                                   1)
        else:
            metric_base_name = metric_name

        if str(metric_base_name) == str(base_name):
            continue
        try:
            raw_series = raw_assigned[i]
            unpacker = Unpacker(use_list=False)
            unpacker.feed(raw_series)
            timeseries = list(unpacker)
        except:
            timeseries = []
        if not timeseries:
            # print('no time series data for %s' % base_name)
            continue

        # @added 20200507 - Feature #3532: Sort all time series
        # To ensure that there are no unordered timestamps in the time
        # series which are artefacts of the collector or carbon-relay, sort
        # all time series by timestamp before analysis.
        original_timeseries = timeseries
        if original_timeseries:
            timeseries = sort_timeseries(original_timeseries)
            del original_timeseries

        # Convert the time series if this is a known_derivative_metric
        known_derivative_metric = is_derivative_metric(skyline_app,
                                                       metric_base_name)
        if known_derivative_metric:
            try:
                derivative_timeseries = nonNegativeDerivative(timeseries)
                timeseries = derivative_timeseries
            except:
                logger.error(traceback.format_exc())
                logger.error('error :: nonNegativeDerivative')

        correlate_ts = []

        # @added 20201203 - Feature #3860: luminosity - handle low frequency data
        # Determine data resolution
        resolution = determine_resolution(timeseries)

        for ts, value in timeseries:
            if int(ts) < from_timestamp:
                continue
            if int(ts) <= anomaly_timestamp:
                correlate_ts.append((int(ts), value))
            # @modified 20180720 - Feature #2464: luminosity_remote_data
            # Added note here - if you modify the value of 61 here, it must be
            # modified in the luminosity_remote_data function in
            # skyline/webapp/backend.py as well
            # @modified 20201203 - Feature #3860: luminosity - handle low frequency data
            # Handle varying metric resolutions
            # if int(ts) > (anomaly_timestamp + 61):
            if int(ts) > (anomaly_timestamp + (resolution + 1)):
                break
        if not correlate_ts:
            continue

        local_redis_metrics_checked_count += 1
        anomaly_ts_dict = dict(anomalous_ts)
        correlate_ts_dict = dict(correlate_ts)

        for a in anomalies:
            try:
                # @modified 20180720 - Feature #2464: luminosity_remote_data
                # Added note here - if you modify the value of 120 here, it must be
                # modified in the luminosity_remote_data function in
                # skyline/webapp/backend.py as well
                # @modified 20201203 - Feature #3860: luminosity - handle low frequency data
                # Handle varying metric resolutions
                # if int(a.exact_timestamp) < int(anomaly_timestamp - 120):
                #     continue
                # if int(a.exact_timestamp) > int(anomaly_timestamp + 120):
                #     continue
                if int(a.exact_timestamp) < int(anomaly_timestamp -
                                                (resolution * 2)):
                    continue
                if int(a.exact_timestamp) > int(anomaly_timestamp +
                                                (resolution * 2)):
                    continue

            except:
                continue
            try:
                # @modified 20201203 - Feature #3860: luminosity - handle low frequency data
                # Handle varying metric resolutions
                # time_period = (int(anomaly_timestamp - 120), int(anomaly_timestamp + 120))
                time_period = (int(anomaly_timestamp - (resolution * 2)),
                               int(anomaly_timestamp + (resolution * 2)))

                my_correlator = Correlator(anomaly_ts_dict, correlate_ts_dict,
                                           time_period)
                # For better correlation use 0.9 instead of 0.8 for the threshold
                # @modified 20180524 - Feature #2360: CORRELATE_ALERTS_ONLY
                #                      Branch #2270: luminosity
                #                      Feature #2378: Add redis auth to Skyline and rebrow
                # Added this to setting.py
                # if my_correlator.is_correlated(threshold=0.9):
                try:
                    cross_correlation_threshold = settings.LUMINOL_CROSS_CORRELATION_THRESHOLD
                    metrics_checked_for_correlation += 1
                except:
                    cross_correlation_threshold = 0.9
                if my_correlator.is_correlated(
                        threshold=cross_correlation_threshold):
                    correlation = my_correlator.get_correlation_result()
                    correlated = True
                    correlations.append([
                        metric_base_name, correlation.coefficient,
                        correlation.shift, correlation.shifted_coefficient
                    ])
                    local_redis_metrics_correlations_count += 1
            except:
                pass
        if correlated:
            correlated_metrics.append(metric_base_name)

    # @added 20180720 - Feature #2464: luminosity_remote_data
    # Added the correlation of preprocessed remote data
    end_local_correlations = timer()
    logger.info(
        'get_correlations :: checked - local_redis_metrics_checked_count is %s'
        % str(local_redis_metrics_checked_count))
    logger.info(
        'get_correlations :: correlated - local_redis_metrics_correlations_count is %s'
        % str(local_redis_metrics_correlations_count))
    logger.info(
        'get_correlations :: processed %s correlations on local_redis_metrics_checked_count %s local metrics in %.6f seconds'
        % (str(local_redis_metrics_correlations_count),
           str(local_redis_metrics_checked_count),
           (end_local_correlations - start_local_correlations)))

    # @added 20201207 - Feature #3858: skyline_functions - correlate_or_relate_with
    do_not_correlate_with = []

    remote_metrics_count = 0
    remote_correlations_check_count = 0
    remote_correlations_count = 0
    logger.info('get_correlations :: remote_assigned count %s' %
                str(len(remote_assigned)))
    start_remote_correlations = timer()
    for ts_data in remote_assigned:
        remote_metrics_count += 1
        correlated = None
        metric_name = str(ts_data[0])
        # @modified 20200728 - Bug #3652: Handle multiple metrics in base_name conversion
        # metric_base_name = metric_name.replace(settings.FULL_NAMESPACE, '', 1)
        if metric_name.startswith(settings.FULL_NAMESPACE):
            metric_base_name = metric_name.replace(settings.FULL_NAMESPACE, '',
                                                   1)
        else:
            metric_base_name = metric_name

        if str(metric_base_name) == str(base_name):
            continue

        # @added 20201207 - Feature #3858: skyline_functions - correlate_or_relate_with
        try:
            correlate_or_relate = correlate_or_relate_with(
                skyline_app, base_name, metric_base_name)
            if not correlate_or_relate:
                do_not_correlate_with.append(metric_base_name)
                continue
        except:
            logger.error(traceback.format_exc())
            logger.error(
                'error :: get_remote_assigned :: failed to evaluate correlate_or_relate_with'
            )

        timeseries = []
        try:
            timeseries = ts_data[1]
        except:
            timeseries = []
        if not timeseries:
            continue

        # @added 20201203 - Feature #3860: luminosity - handle low frequency data
        # Determine data resolution
        resolution = determine_resolution(timeseries)

        correlate_ts = []
        for ts, value in timeseries:
            if int(ts) < from_timestamp:
                continue
            if int(ts) <= anomaly_timestamp:
                correlate_ts.append((int(ts), value))
            # @modified 20180720 - Feature #2464: luminosity_remote_data
            # Added note here - if you modify the value of 61 here, it must be
            # modified in the luminosity_remote_data function in
            # skyline/webapp/backend.py as well
            # @modified 20201203 - Feature #3860: luminosity - handle low frequency data
            # Handle varying metric resolutions
            # if int(ts) > (anomaly_timestamp + 61):
            if int(ts) > (anomaly_timestamp + (resolution + 1)):
                break
        if not correlate_ts:
            continue

        anomaly_ts_dict = dict(anomalous_ts)
        correlate_ts_dict = dict(correlate_ts)

        for a in anomalies:
            try:
                # @modified 20180720 - Feature #2464: luminosity_remote_data
                # Added note here - if you modify the value of 120 here, it must be
                # modified in the luminosity_remote_data function in
                # skyline/webapp/backend.py as well
                # @modified 20201203 - Feature #3860: luminosity - handle low frequency data
                # Handle varying metric resolutions
                # if int(a.exact_timestamp) < int(anomaly_timestamp - 120):
                #     continue
                # if int(a.exact_timestamp) > int(anomaly_timestamp + 120):
                #     continue
                if int(a.exact_timestamp) < int(anomaly_timestamp -
                                                (resolution * 2)):
                    continue
                if int(a.exact_timestamp) > int(anomaly_timestamp +
                                                (resolution * 2)):
                    continue
            except:
                continue
            try:
                # @modified 20201203 - Feature #3860: luminosity - handle low frequency data
                # Handle varying metric resolutions
                # time_period = (int(anomaly_timestamp - 120), int(anomaly_timestamp + 120))
                time_period = (int(anomaly_timestamp - (resolution * 2)),
                               int(anomaly_timestamp + (resolution * 2)))

                my_correlator = Correlator(anomaly_ts_dict, correlate_ts_dict,
                                           time_period)
                metrics_checked_for_correlation += 1
                remote_correlations_check_count += 1
                try:
                    cross_correlation_threshold = settings.LUMINOL_CROSS_CORRELATION_THRESHOLD
                except:
                    cross_correlation_threshold = 0.9
                if my_correlator.is_correlated(
                        threshold=cross_correlation_threshold):
                    correlation = my_correlator.get_correlation_result()
                    correlated = True
                    correlations.append([
                        metric_base_name, correlation.coefficient,
                        correlation.shift, correlation.shifted_coefficient
                    ])
                    remote_correlations_count += 1
            except:
                pass
        if correlated:
            correlated_metrics.append(metric_base_name)

    end_remote_correlations = timer()

    # @added 20201207 - Feature #3858: skyline_functions - correlate_or_relate_with
    if len(do_not_correlate_with) > 0:
        logger.info(
            'get_correlations :: discarded %s remote assigned metrics as not in a correlation group with %s'
            % (str(len(do_not_correlate_with)), base_name))

    logger.info(
        'get_correlations :: checked - remote_correlations_check_count is %s' %
        str(remote_correlations_check_count))
    logger.info(
        'get_correlations :: correlated - remote_correlations_count is %s' %
        str(remote_correlations_count))
    logger.info(
        'get_correlations :: processed remote correlations on remote_metrics_count %s local metric in %.6f seconds'
        % (str(remote_metrics_count),
           (end_remote_correlations - start_remote_correlations)))

    end = timer()
    logger.info(
        'get_correlations :: checked a total of %s metrics and correlated %s metrics to %s anomaly, processed in %.6f seconds'
        % (str(metrics_checked_for_correlation), str(
            len(correlated_metrics)), base_name, (end - start)))
    # @added 20170720 - Task #2462: Implement useful metrics for Luminosity
    # Added runtime to calculate avg_runtime Graphite metric
    runtime = '%.6f' % (end - start)
    return (correlated_metrics, correlations, metrics_checked_for_correlation,
            runtime)
Ejemplo n.º 34
0
    def spin_process(self, i, boundary_metrics):
        """
        Assign a bunch of metrics for a process to analyze.
        """
        # Determine assigned metrics
        bp = settings.BOUNDARY_PROCESSES
        bm_range = len(boundary_metrics)
        keys_per_processor = int(ceil(float(bm_range) / float(bp)))
        if i == settings.BOUNDARY_PROCESSES:
            assigned_max = len(boundary_metrics)
        else:
            # This is a skyine bug, the original skyline code uses 1 as the
            # beginning position of the index, python indices begin with 0
            # assigned_max = len(boundary_metrics)
            # This closes the etsy/skyline pull request opened by @languitar on 17 Jun 2014
            # https://github.com/etsy/skyline/pull/94 Fix analyzer worker metric assignment
            assigned_max = min(len(boundary_metrics), i * keys_per_processor)
        assigned_min = (i - 1) * keys_per_processor
        assigned_keys = range(assigned_min, assigned_max)

        # Compile assigned metrics
        assigned_metrics_and_algos = [
            boundary_metrics[index] for index in assigned_keys
        ]
        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug - printing assigned_metrics_and_algos')
            for assigned_metric_and_algo in assigned_metrics_and_algos:
                logger.info('debug - assigned_metric_and_algo - %s' %
                            str(assigned_metric_and_algo))

        # Compile assigned metrics
        assigned_metrics = []
        for i in assigned_metrics_and_algos:
            assigned_metrics.append(i[0])

        # unique unhashed things
        def unique_noHash(seq):
            seen = set()
            return [
                x for x in seq if str(x) not in seen and not seen.add(str(x))
            ]

        unique_assigned_metrics = unique_noHash(assigned_metrics)

        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug - unique_assigned_metrics - %s' %
                        str(unique_assigned_metrics))
            logger.info('debug - printing unique_assigned_metrics:')
            for unique_assigned_metric in unique_assigned_metrics:
                logger.info('debug - unique_assigned_metric - %s' %
                            str(unique_assigned_metric))

        # Check if this process is unnecessary
        if len(unique_assigned_metrics) == 0:
            return

        # Multi get series
        try:
            raw_assigned = self.redis_conn.mget(unique_assigned_metrics)
        except:
            logger.error("failed to mget assigned_metrics from redis")
            return

        # Make process-specific dicts
        exceptions = defaultdict(int)
        anomaly_breakdown = defaultdict(int)

        # Reset boundary_algortims
        all_boundary_algorithms = []
        for metric in BOUNDARY_METRICS:
            all_boundary_algorithms.append(metric[1])

        # The unique algorithms that are being used
        boundary_algorithms = unique_noHash(all_boundary_algorithms)
        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug - boundary_algorithms - %s' %
                        str(boundary_algorithms))

        discover_run_metrics = []

        # Distill metrics into a run list
        for i, metric_name, in enumerate(unique_assigned_metrics):
            self.check_if_parent_is_alive()

            try:
                if ENABLE_BOUNDARY_DEBUG:
                    logger.info('debug - unpacking timeseries for %s - %s' %
                                (metric_name, str(i)))
                raw_series = raw_assigned[i]
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)
            except Exception as e:
                exceptions['Other'] += 1
                logger.error("redis data error: " + traceback.format_exc())
                logger.error("error: %e" % e)

            base_name = metric_name.replace(FULL_NAMESPACE, '', 1)

            # Determine the metrics BOUNDARY_METRICS metric tuple settings
            for metrick in BOUNDARY_METRICS:
                CHECK_MATCH_PATTERN = metrick[0]
                check_match_pattern = re.compile(CHECK_MATCH_PATTERN)
                pattern_match = check_match_pattern.match(base_name)
                metric_pattern_matched = False
                if pattern_match:
                    metric_pattern_matched = True
                    algo_pattern_matched = False
                    for algo in boundary_algorithms:
                        for metric in BOUNDARY_METRICS:
                            CHECK_MATCH_PATTERN = metric[0]
                            check_match_pattern = re.compile(
                                CHECK_MATCH_PATTERN)
                            pattern_match = check_match_pattern.match(
                                base_name)
                            if pattern_match:
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info(
                                        "debug - metric and algo pattern MATCHED - "
                                        + metric[0] + " | " + base_name +
                                        " | " + str(metric[1]))
                                metric_expiration_time = False
                                metric_min_average = False
                                metric_min_average_seconds = False
                                metric_trigger = False
                                algorithm = False
                                algo_pattern_matched = True
                                algorithm = metric[1]
                                try:
                                    if metric[2]:
                                        metric_expiration_time = metric[2]
                                except:
                                    metric_expiration_time = False
                                try:
                                    if metric[3]:
                                        metric_min_average = metric[3]
                                except:
                                    metric_min_average = False
                                try:
                                    if metric[4]:
                                        metric_min_average_seconds = metric[4]
                                except:
                                    metric_min_average_seconds = 1200
                                try:
                                    if metric[5]:
                                        metric_trigger = metric[5]
                                except:
                                    metric_trigger = False
                                try:
                                    if metric[6]:
                                        alert_threshold = metric[6]
                                except:
                                    alert_threshold = False
                                try:
                                    if metric[7]:
                                        metric_alerters = metric[7]
                                except:
                                    metric_alerters = False
                            if metric_pattern_matched and algo_pattern_matched:
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info(
                                        'debug - added metric - %s, %s, %s, %s, %s, %s, %s, %s, %s'
                                        % (str(i), metric_name,
                                           str(metric_expiration_time),
                                           str(metric_min_average),
                                           str(metric_min_average_seconds),
                                           str(metric_trigger),
                                           str(alert_threshold),
                                           metric_alerters, algorithm))
                                discover_run_metrics.append([
                                    i, metric_name, metric_expiration_time,
                                    metric_min_average,
                                    metric_min_average_seconds, metric_trigger,
                                    alert_threshold, metric_alerters, algorithm
                                ])

        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug - printing discover_run_metrics')
            for discover_run_metric in discover_run_metrics:
                logger.info('debug - discover_run_metrics - %s' %
                            str(discover_run_metric))
            logger.info('debug - build unique boundary metrics to analyze')

        # Determine the unique set of metrics to run
        run_metrics = unique_noHash(discover_run_metrics)

        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug - printing run_metrics')
            for run_metric in run_metrics:
                logger.info('debug - run_metrics - %s' % str(run_metric))

        # Distill timeseries strings and submit to run_selected_algorithm
        for metric_and_algo in run_metrics:
            self.check_if_parent_is_alive()

            try:
                raw_assigned_id = metric_and_algo[0]
                metric_name = metric_and_algo[1]
                base_name = metric_name.replace(FULL_NAMESPACE, '', 1)
                metric_expiration_time = metric_and_algo[2]
                metric_min_average = metric_and_algo[3]
                metric_min_average_seconds = metric_and_algo[4]
                metric_trigger = metric_and_algo[5]
                alert_threshold = metric_and_algo[6]
                metric_alerters = metric_and_algo[7]
                algorithm = metric_and_algo[8]

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info('debug - unpacking timeseries for %s - %s' %
                                (metric_name, str(raw_assigned_id)))

                raw_series = raw_assigned[metric_and_algo[0]]
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info('debug - unpacked OK - %s - %s' %
                                (metric_name, str(raw_assigned_id)))

                autoaggregate = False
                autoaggregate_value = 0

                # Determine if the namespace is to be aggregated
                if BOUNDARY_AUTOAGGRERATION:
                    for autoaggregate_metric in BOUNDARY_AUTOAGGRERATION_METRICS:
                        autoaggregate = False
                        autoaggregate_value = 0
                        CHECK_MATCH_PATTERN = autoaggregate_metric[0]
                        base_name = metric_name.replace(FULL_NAMESPACE, '', 1)
                        check_match_pattern = re.compile(CHECK_MATCH_PATTERN)
                        pattern_match = check_match_pattern.match(base_name)
                        if pattern_match:
                            autoaggregate = True
                            autoaggregate_value = autoaggregate_metric[1]

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info(
                        'debug - BOUNDARY_AUTOAGGRERATION passed - %s - %s' %
                        (metric_name, str(autoaggregate)))

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info(
                        'debug - analysing - %s, %s, %s, %s, %s, %s, %s, %s, %s, %s'
                        %
                        (metric_name, str(metric_expiration_time),
                         str(metric_min_average),
                         str(metric_min_average_seconds), str(metric_trigger),
                         str(alert_threshold), metric_alerters, autoaggregate,
                         autoaggregate_value, algorithm))
                    # Dump the the timeseries data to a file
                    timeseries_dump_dir = "/tmp/skyline/boundary/" + algorithm
                    self.mkdir_p(timeseries_dump_dir)
                    timeseries_dump_file = timeseries_dump_dir + "/" + metric_name + ".json"
                    with open(timeseries_dump_file, 'w+') as f:
                        f.write(str(timeseries))
                        f.close()

                # Check if a metric has its own unique BOUNDARY_METRICS alert
                # tuple, this allows us to paint an entire metric namespace with
                # the same brush AND paint a unique metric or namespace with a
                # different brush or scapel
                has_unique_tuple = False
                run_tupple = False
                boundary_metric_tuple = (base_name, algorithm,
                                         metric_expiration_time,
                                         metric_min_average,
                                         metric_min_average_seconds,
                                         metric_trigger, alert_threshold,
                                         metric_alerters)
                wildcard_namespace = True
                for metric_tuple in BOUNDARY_METRICS:
                    if not has_unique_tuple:
                        CHECK_MATCH_PATTERN = metric_tuple[0]
                        check_match_pattern = re.compile(CHECK_MATCH_PATTERN)
                        pattern_match = check_match_pattern.match(base_name)
                        if pattern_match:
                            if metric_tuple[0] == base_name:
                                wildcard_namespace = False
                            if not has_unique_tuple:
                                if boundary_metric_tuple == metric_tuple:
                                    has_unique_tuple = True
                                    run_tupple = True
                                    if ENABLE_BOUNDARY_DEBUG:
                                        logger.info('unique_tuple:')
                                        logger.info(
                                            'boundary_metric_tuple: %s' %
                                            str(boundary_metric_tuple))
                                        logger.info('metric_tuple: %s' %
                                                    str(metric_tuple))

                if not has_unique_tuple:
                    if wildcard_namespace:
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info('wildcard_namespace:')
                            logger.info('boundary_metric_tuple: %s' %
                                        str(boundary_metric_tuple))
                        run_tupple = True
                    else:
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info(
                                'wildcard_namespace: BUT WOULD NOT RUN')
                            logger.info('boundary_metric_tuple: %s' %
                                        str(boundary_metric_tuple))

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info('WOULD RUN run_selected_algorithm = %s' %
                                run_tupple)

                if run_tupple:
                    # Submit the timeseries and settings to run_selected_algorithm
                    anomalous, ensemble, datapoint, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm = run_selected_algorithm(
                        timeseries, metric_name, metric_expiration_time,
                        metric_min_average, metric_min_average_seconds,
                        metric_trigger, alert_threshold, metric_alerters,
                        autoaggregate, autoaggregate_value, algorithm)
                    if ENABLE_BOUNDARY_DEBUG:
                        logger.info('debug - analysed - %s' % (metric_name))
                else:
                    anomalous = False
                    if ENABLE_BOUNDARY_DEBUG:
                        logger.info(
                            'debug - more unique metric tuple not analysed - %s'
                            % (metric_name))

                # If it's anomalous, add it to list
                if anomalous:
                    anomalous_metric = [
                        datapoint, metric_name, metric_expiration_time,
                        metric_min_average, metric_min_average_seconds,
                        metric_trigger, alert_threshold, metric_alerters,
                        algorithm
                    ]
                    self.anomalous_metrics.append(anomalous_metric)
                    # Get the anomaly breakdown - who returned True?
                    for index, value in enumerate(ensemble):
                        if value:
                            anomaly_breakdown[algorithm] += 1

            # It could have been deleted by the Roomba
            except TypeError:
                exceptions['DeletedByRoomba'] += 1
            except TooShort:
                exceptions['TooShort'] += 1
            except Stale:
                exceptions['Stale'] += 1
            except Boring:
                exceptions['Boring'] += 1
            except:
                exceptions['Other'] += 1
                logger.info("exceptions['Other'] traceback follows:")
                logger.info(traceback.format_exc())

        # Add values to the queue so the parent process can collate
        for key, value in anomaly_breakdown.items():
            self.anomaly_breakdown_q.put((key, value))

        for key, value in exceptions.items():
            self.exceptions_q.put((key, value))
Ejemplo n.º 35
0
def get_anomalous_ts(base_name, anomaly_timestamp):

    logger = logging.getLogger(skyline_app_logger)

    # @added 20180423 - Feature #2360: CORRELATE_ALERTS_ONLY
    #                   Branch #2270: luminosity
    # Only correlate metrics with an alert setting
    if correlate_alerts_only:
        try:
            # @modified 20191030 - Bug #3266: py3 Redis binary objects not strings
            #                      Branch #3262: py3
            # smtp_alerter_metrics = list(redis_conn.smembers('analyzer.smtp_alerter_metrics'))
            # @modified 20200421 - Feature #3306: Record anomaly_end_timestamp
            #                      Branch #2270: luminosity
            #                      Branch #3262: py3
            # Changed to use the aet Redis set, used to determine and record the
            # anomaly_end_timestamp, some transient sets need to copied so that
            # the data always exists, even if it is sourced from a transient set.
            # smtp_alerter_metrics = list(redis_conn_decoded.smembers('analyzer.smtp_alerter_metrics'))
            smtp_alerter_metrics = list(
                redis_conn_decoded.smembers(
                    'aet.analyzer.smtp_alerter_metrics'))
        except:
            smtp_alerter_metrics = []

        if base_name not in smtp_alerter_metrics:
            logger.error('%s has no alerter setting, not correlating' %
                         base_name)
            return []

    if not base_name or not anomaly_timestamp:
        return []

    # from skyline_functions import nonNegativeDerivative
    anomalous_metric = '%s%s' % (settings.FULL_NAMESPACE, base_name)
    unique_metrics = []
    try:
        # @modified 20191030 - Bug #3266: py3 Redis binary objects not strings
        #                      Branch #3262: py3
        # unique_metrics = list(redis_conn.smembers(settings.FULL_NAMESPACE + 'unique_metrics'))
        unique_metrics = list(
            redis_conn_decoded.smembers(settings.FULL_NAMESPACE +
                                        'unique_metrics'))
    except:
        logger.error(traceback.format_exc())
        logger.error('error :: get_assigned_metrics :: no unique_metrics')
        return []
    # @added 20180720 - Feature #2464: luminosity_remote_data
    # Ensure that Luminosity only processes it's own Redis metrics so that if
    # multiple Skyline instances are running, Luminosity does not process an
    # anomaly_id for a metric that is not local to itself.  This will stop the
    # call to the remote Redis with other_redis_conn below.  With the
    # introduction of the preprocessing luminosity_remote_data API endpoint for
    # remote Skyline instances, there is no further requirement for Skyline
    # instances to have direct access to Redis on another Skyline instance.
    # A much better solution and means all data is preprocessed and encrypted,
    # there is no need for iptables other than 443 (or custom https port).
    #
    if anomalous_metric in unique_metrics:
        logger.info(
            '%s is a metric in Redis, processing on this Skyline instance' %
            base_name)
    else:
        logger.info(
            '%s is not a metric in Redis, not processing on this Skyline instance'
            % base_name)
        return []

    assigned_metrics = [anomalous_metric]
    # @modified 20180419 -
    raw_assigned = []
    try:
        raw_assigned = redis_conn.mget(assigned_metrics)
    except:
        raw_assigned = []
    if raw_assigned == [None]:
        logger.info('%s data not retrieved from local Redis' %
                    (str(base_name)))
        raw_assigned = []

    # @modified 20180721 - Feature #2464: luminosity_remote_data
    # TO BE DEPRECATED settings.OTHER_SKYLINE_REDIS_INSTANCES
    # with the addition of the luminosity_remote_data API call and the above
    if not raw_assigned and settings.OTHER_SKYLINE_REDIS_INSTANCES:
        # @modified 20180519 - Feature #2378: Add redis auth to Skyline and rebrow
        # for redis_ip, redis_port in settings.OTHER_SKYLINE_REDIS_INSTANCES:
        for redis_ip, redis_port, redis_password in settings.OTHER_SKYLINE_REDIS_INSTANCES:
            if not raw_assigned:
                try:
                    if redis_password:
                        other_redis_conn = StrictRedis(
                            host=str(redis_ip),
                            port=int(redis_port),
                            password=str(redis_password))
                    else:
                        other_redis_conn = StrictRedis(host=str(redis_ip),
                                                       port=int(redis_port))
                    raw_assigned = other_redis_conn.mget(assigned_metrics)
                    if raw_assigned == [None]:
                        logger.info(
                            '%s data not retrieved from Redis at %s on port %s'
                            % (str(base_name), str(redis_ip), str(redis_port)))
                        raw_assigned = []
                    if raw_assigned:
                        logger.info(
                            '%s data retrieved from Redis at %s on port %s' %
                            (str(base_name), str(redis_ip), str(redis_port)))
                except:
                    logger.error(traceback.format_exc())
                    logger.error(
                        'error :: failed to connect to Redis at %s on port %s'
                        % (str(redis_ip), str(redis_port)))
                    raw_assigned = []

    if not raw_assigned or raw_assigned == [None]:
        logger.info('%s data not retrieved' % (str(base_name)))
        return []

    for i, metric_name in enumerate(assigned_metrics):
        try:
            raw_series = raw_assigned[i]
            unpacker = Unpacker(use_list=False)
            unpacker.feed(raw_series)
            timeseries = list(unpacker)
        except:
            timeseries = []

        # @added 20200507 - Feature #3532: Sort all time series
        # To ensure that there are no unordered timestamps in the time
        # series which are artefacts of the collector or carbon-relay, sort
        # all time series by timestamp before analysis.
        original_timeseries = timeseries
        if original_timeseries:
            timeseries = sort_timeseries(original_timeseries)
            del original_timeseries

    # Convert the time series if this is a known_derivative_metric
    known_derivative_metric = is_derivative_metric(skyline_app, base_name)
    if known_derivative_metric:
        derivative_timeseries = nonNegativeDerivative(timeseries)
        timeseries = derivative_timeseries

    # @added 20201203 - Feature #3860: luminosity - handle low frequency data
    # Determine data resolution
    resolution = determine_resolution(timeseries)

    # Sample the time series
    # @modified 20180720 - Feature #2464: luminosity_remote_data
    # Added note here - if you modify the value of 600 here, it must be
    # modified in the luminosity_remote_data function in
    # skyline/webapp/backend.py as well
    # @modified 20201203 - Feature #3860: luminosity - handle low frequency data
    # from_timestamp = anomaly_timestamp - 600
    from_timestamp = anomaly_timestamp - (resolution * 10)

    anomaly_ts = []
    for ts, value in timeseries:
        if int(ts) < from_timestamp:
            continue
        if int(ts) <= anomaly_timestamp:
            anomaly_ts.append((int(ts), value))
        if int(ts) > anomaly_timestamp:
            break

    # @added 20190515 - Bug #3008: luminosity - do not analyse short time series
    # Only return a time series sample if the sample has sufficient data points
    # otherwise get_anomalies() will throw and error
    len_anomaly_ts = len(anomaly_ts)
    if len_anomaly_ts <= 9:
        logger.info(
            '%s insufficient data not retrieved, only %s data points surfaced, not correlating'
            % (str(base_name), str(len_anomaly_ts)))
        return []

    return anomaly_ts
Ejemplo n.º 36
0
    def spin_process(self, i, unique_metrics):
        """
        Assign a bunch of metrics for a process to analyze.

        Multiple get the assigned_metrics to the process from Redis.

        For each metric:\n
        * unpack the `raw_timeseries` for the metric.\n
        * Analyse each timeseries against `ALGORITHMS` to determine if it is\n
          anomalous.\n
        * If anomalous add it to the :obj:`self.anomalous_metrics` list\n
        * Add what algorithms triggered to the :obj:`self.anomaly_breakdown_q` queue\n

        Add keys and values to the queue so the parent process can collate for:\n
        * :py:obj:`self.anomaly_breakdown_q`
        * :py:obj:`self.exceptions_q`
        """

        spin_start = time()
        logger.info('spin_process started')

        # Discover assigned metrics
        keys_per_processor = int(
            ceil(
                float(len(unique_metrics)) /
                float(settings.ANALYZER_PROCESSES)))
        if i == settings.ANALYZER_PROCESSES:
            assigned_max = len(unique_metrics)
        else:
            assigned_max = min(len(unique_metrics), i * keys_per_processor)
        # Fix analyzer worker metric assignment #94
        # https://github.com/etsy/skyline/pull/94 @languitar:worker-fix
        assigned_min = (i - 1) * keys_per_processor
        assigned_keys = range(assigned_min, assigned_max)
        # assigned_keys = range(300, 310)

        # Compile assigned metrics
        assigned_metrics = [unique_metrics[index] for index in assigned_keys]

        # Check if this process is unnecessary
        if len(assigned_metrics) == 0:
            return

        # Multi get series
        raw_assigned = self.redis_conn.mget(assigned_metrics)

        # Make process-specific dicts
        exceptions = defaultdict(int)
        anomaly_breakdown = defaultdict(int)

        # Distill timeseries strings into lists
        for i, metric_name in enumerate(assigned_metrics):
            self.check_if_parent_is_alive()

            # logger.info('analysing %s' % metric_name)

            try:
                raw_series = raw_assigned[i]
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)

                anomalous, ensemble, datapoint = run_selected_algorithm(
                    timeseries, metric_name)

                # If it's anomalous, add it to list
                if anomalous:
                    base_name = metric_name.replace(settings.FULL_NAMESPACE,
                                                    '', 1)
                    metric = [datapoint, base_name]
                    self.anomalous_metrics.append(metric)

                    # Get the anomaly breakdown - who returned True?
                    triggered_algorithms = []
                    for index, value in enumerate(ensemble):
                        if value:
                            algorithm = settings.ALGORITHMS[index]
                            anomaly_breakdown[algorithm] += 1
                            triggered_algorithms.append(algorithm)

            # It could have been deleted by the Roomba
            except TypeError:
                # logger.error('TypeError analysing %s' % metric_name)
                exceptions['DeletedByRoomba'] += 1
            except TooShort:
                # logger.error('TooShort analysing %s' % metric_name)
                exceptions['TooShort'] += 1
            except Stale:
                # logger.error('Stale analysing %s' % metric_name)
                exceptions['Stale'] += 1
            except Boring:
                # logger.error('Boring analysing %s' % metric_name)
                exceptions['Boring'] += 1
            except:
                # logger.error('Other analysing %s' % metric_name)
                exceptions['Other'] += 1
                logger.info(traceback.format_exc())

        # Add values to the queue so the parent process can collate
        for key, value in anomaly_breakdown.items():
            self.anomaly_breakdown_q.put((key, value))

        for key, value in exceptions.items():
            self.exceptions_q.put((key, value))

        spin_end = time() - spin_start
        logger.info('spin_process took %.2f seconds' % spin_end)
Ejemplo n.º 37
0
    def vacuum(self, i, namespace, duration):
        """
        Trim metrics that are older than settings.FULL_DURATION and purge old
        metrics.
        """
        begin = time()
        logger.info('%s :: started vacuum' % (skyline_app))

        # Discover assigned metrics
        namespace_unique_metrics = '%sunique_metrics' % str(namespace)
        unique_metrics = list(self.redis_conn.smembers(namespace_unique_metrics))
        keys_per_processor = int(ceil(float(len(unique_metrics)) / float(settings.ROOMBA_PROCESSES)))
        if i == settings.ROOMBA_PROCESSES:
            assigned_max = len(unique_metrics)
        else:
            assigned_max = min(len(unique_metrics), i * keys_per_processor)
        assigned_min = (i - 1) * keys_per_processor
        assigned_keys = range(assigned_min, assigned_max)

        # Compile assigned metrics
        assigned_metrics = [unique_metrics[index] for index in assigned_keys]

        euthanized = 0
        blocked = 0
        trimmed_keys = 0
        active_keys = 0

        for i in xrange(len(assigned_metrics)):
            self.check_if_parent_is_alive()

            pipe = self.redis_conn.pipeline()
            now = time()
            key = assigned_metrics[i]

            try:
                # WATCH the key
                pipe.watch(key)

                # Everything below NEEDS to happen before another datapoint
                # comes in. If your data has a very small resolution (<.1s),
                # this technique may not suit you.
                raw_series = pipe.get(key)
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = sorted([unpacked for unpacked in unpacker])

                # Put pipe back in multi mode
                pipe.multi()

                # There's one value. Purge if it's too old
                try:
                    if python_version == 2:
                        if not isinstance(timeseries[0], TupleType):
                            if timeseries[0] < now - duration:
                                pipe.delete(key)
                                pipe.srem(namespace_unique_metrics, key)
                                pipe.execute()
                                euthanized += 1
                            continue
                    if python_version == 3:
                        if not isinstance(timeseries[0], tuple):
                            if timeseries[0] < now - duration:
                                pipe.delete(key)
                                pipe.srem(namespace_unique_metrics, key)
                                pipe.execute()
                                euthanized += 1
                            continue
                except IndexError:
                    continue

                # Check if the last value is too old and purge
                if timeseries[-1][0] < now - duration:
                    pipe.delete(key)
                    pipe.srem(namespace_unique_metrics, key)
                    pipe.execute()
                    euthanized += 1
                    continue

                # Remove old datapoints and duplicates from timeseries
                temp = set()
                temp_add = temp.add
                delta = now - duration
                trimmed = [
                    tuple for tuple in timeseries
                    if tuple[0] > delta and
                    tuple[0] not in temp and not
                    temp_add(tuple[0])
                ]

                # Purge if everything was deleted, set key otherwise
                if len(trimmed) > 0:
                    # Serialize and turn key back into not-an-array
                    btrimmed = packb(trimmed)
                    if len(trimmed) <= 15:
                        value = btrimmed[1:]
                    elif len(trimmed) <= 65535:
                        value = btrimmed[3:]
                        trimmed_keys += 1
                    else:
                        value = btrimmed[5:]
                        trimmed_keys += 1
                    pipe.set(key, value)
                    active_keys += 1
                else:
                    pipe.delete(key)
                    pipe.srem(namespace_unique_metrics, key)
                    euthanized += 1

                pipe.execute()

            except WatchError:
                blocked += 1
                assigned_metrics.append(key)
            except Exception as e:
                # If something bad happens, zap the key and hope it goes away
                pipe.delete(key)
                pipe.srem(namespace_unique_metrics, key)
                pipe.execute()
                euthanized += 1
                logger.info(e)
                logger.info('%s :: vacuum Euthanizing %s' % (skyline_app, key))
            finally:
                pipe.reset()

        logger.info(
            '%s :: vacuum operated on %s %d keys in %f seconds' %
            (skyline_app, namespace, len(assigned_metrics), time() - begin))
        logger.info('%s :: vaccum %s keyspace is now %d keys' % (skyline_app, namespace, (len(assigned_metrics) - euthanized)))
        logger.info('%s :: vaccum blocked %d times' % (skyline_app, blocked))
        logger.info('%s :: vacuum euthanized %d geriatric keys' % (skyline_app, euthanized))
        logger.info('%s :: vacuum processed %d active keys' % (skyline_app, active_keys))
        logger.info('%s :: vacuum potentially trimmed %d keys' % (skyline_app, trimmed_keys))
Ejemplo n.º 38
0
def get_correlations(base_name, anomaly_timestamp, anomalous_ts,
                     assigned_metrics, raw_assigned, remote_assigned,
                     anomalies):

    logger = logging.getLogger(skyline_app_logger)

    # Distill timeseries strings into lists
    start = timer()
    count = 0
    metrics_checked_for_correlation = 0
    # Sample the time series
    # @modified 20180720 - Feature #2464: luminosity_remote_data
    # Added note here - if you modify the value of 600 here, it must be
    # modified in the luminosity_remote_data function in
    # skyline/webapp/backend.py as well
    from_timestamp = anomaly_timestamp - 600
    correlated_metrics = []
    correlations = []
    no_data = False
    if not anomalous_ts:
        no_data = True
    if not assigned_metrics:
        no_data = True
    if not raw_assigned:
        no_data = True
    if not anomalies:
        no_data = True
    if no_data:
        logger.error('error :: get_correlations :: no data')
        return (correlated_metrics, correlations)

    start_local_correlations = timer()

    local_redis_metrics_checked_count = 0
    local_redis_metrics_correlations_count = 0

    logger.info('get_correlations :: the local Redis metric count is %s' %
                str(len(assigned_metrics)))
    for i, metric_name in enumerate(assigned_metrics):
        count += 1
        # print(metric_name)
        # @modified 20180719 - Branch #2270: luminosity
        # Removed test limiting that was errorneously left in
        # if count > 1000:
        #     break
        correlated = None
        metric_base_name = metric_name.replace(settings.FULL_NAMESPACE, '', 1)
        if str(metric_base_name) == str(base_name):
            continue
        try:
            raw_series = raw_assigned[i]
            unpacker = Unpacker(use_list=False)
            unpacker.feed(raw_series)
            timeseries = list(unpacker)
        except:
            timeseries = []
        if not timeseries:
            # print('no time series data for %s' % base_name)
            continue

        # Convert the time series if this is a known_derivative_metric
        known_derivative_metric = is_derivative_metric(skyline_app,
                                                       metric_base_name)
        if known_derivative_metric:
            try:
                derivative_timeseries = nonNegativeDerivative(timeseries)
                timeseries = derivative_timeseries
            except:
                logger.error(traceback.format_exc())
                logger.error('error :: nonNegativeDerivative')

        correlate_ts = []
        for ts, value in timeseries:
            if int(ts) < from_timestamp:
                continue
            if int(ts) <= anomaly_timestamp:
                correlate_ts.append((int(ts), value))
            # @modified 20180720 - Feature #2464: luminosity_remote_data
            # Added note here - if you modify the value of 61 here, it must be
            # modified in the luminosity_remote_data function in
            # skyline/webapp/backend.py as well
            if int(ts) > (anomaly_timestamp + 61):
                break
        if not correlate_ts:
            continue

        local_redis_metrics_checked_count += 1
        anomaly_ts_dict = dict(anomalous_ts)
        correlate_ts_dict = dict(correlate_ts)

        for a in anomalies:
            try:
                # @modified 20180720 - Feature #2464: luminosity_remote_data
                # Added note here - if you modify the value of 120 here, it must be
                # modified in the luminosity_remote_data function in
                # skyline/webapp/backend.py as well
                if int(a.exact_timestamp) < int(anomaly_timestamp - 120):
                    continue
                if int(a.exact_timestamp) > int(anomaly_timestamp + 120):
                    continue
            except:
                continue
            try:
                time_period = (int(anomaly_timestamp - 120),
                               int(anomaly_timestamp + 120))
                my_correlator = Correlator(anomaly_ts_dict, correlate_ts_dict,
                                           time_period)
                # For better correlation use 0.9 instead of 0.8 for the threshold
                # @modified 20180524 - Feature #2360: CORRELATE_ALERTS_ONLY
                #                      Branch #2270: luminosity
                #                      Feature #2378: Add redis auth to Skyline and rebrow
                # Added this to setting.py
                # if my_correlator.is_correlated(threshold=0.9):
                try:
                    cross_correlation_threshold = settings.LUMINOL_CROSS_CORRELATION_THRESHOLD
                    metrics_checked_for_correlation += 1
                except:
                    cross_correlation_threshold = 0.9
                if my_correlator.is_correlated(
                        threshold=cross_correlation_threshold):
                    correlation = my_correlator.get_correlation_result()
                    correlated = True
                    correlations.append([
                        metric_base_name, correlation.coefficient,
                        correlation.shift, correlation.shifted_coefficient
                    ])
                    local_redis_metrics_correlations_count += 1
            except:
                pass
        if correlated:
            correlated_metrics.append(metric_base_name)

    # @added 20180720 - Feature #2464: luminosity_remote_data
    # Added the correlation of preprocessed remote data
    end_local_correlations = timer()
    logger.info(
        'get_correlations :: checked - local_redis_metrics_checked_count is %s'
        % str(local_redis_metrics_checked_count))
    logger.info(
        'get_correlations :: correlated - local_redis_metrics_correlations_count is %s'
        % str(local_redis_metrics_correlations_count))
    logger.info(
        'get_correlations :: processed %s correlations on local_redis_metrics_checked_count %s local metrics in %.6f seconds'
        % (str(local_redis_metrics_correlations_count),
           str(local_redis_metrics_checked_count),
           (end_local_correlations - start_local_correlations)))

    remote_metrics_count = 0
    remote_correlations_check_count = 0
    remote_correlations_count = 0
    logger.info('get_correlations :: remote_assigned count %s' %
                str(len(remote_assigned)))
    start_remote_correlations = timer()
    for ts_data in remote_assigned:
        remote_metrics_count += 1
        correlated = None
        metric_name = str(ts_data[0])
        metric_base_name = metric_name.replace(settings.FULL_NAMESPACE, '', 1)
        if str(metric_base_name) == str(base_name):
            continue
        timeseries = []
        try:
            timeseries = ts_data[1]
        except:
            timeseries = []
        if not timeseries:
            continue

        correlate_ts = []
        for ts, value in timeseries:
            if int(ts) < from_timestamp:
                continue
            if int(ts) <= anomaly_timestamp:
                correlate_ts.append((int(ts), value))
            # @modified 20180720 - Feature #2464: luminosity_remote_data
            # Added note here - if you modify the value of 61 here, it must be
            # modified in the luminosity_remote_data function in
            # skyline/webapp/backend.py as well
            if int(ts) > (anomaly_timestamp + 61):
                break
        if not correlate_ts:
            continue

        anomaly_ts_dict = dict(anomalous_ts)
        correlate_ts_dict = dict(correlate_ts)

        for a in anomalies:
            try:
                # @modified 20180720 - Feature #2464: luminosity_remote_data
                # Added note here - if you modify the value of 120 here, it must be
                # modified in the luminosity_remote_data function in
                # skyline/webapp/backend.py as well
                if int(a.exact_timestamp) < int(anomaly_timestamp - 120):
                    continue
                if int(a.exact_timestamp) > int(anomaly_timestamp + 120):
                    continue
            except:
                continue
            try:
                time_period = (int(anomaly_timestamp - 120),
                               int(anomaly_timestamp + 120))
                my_correlator = Correlator(anomaly_ts_dict, correlate_ts_dict,
                                           time_period)
                metrics_checked_for_correlation += 1
                remote_correlations_check_count += 1
                try:
                    cross_correlation_threshold = settings.LUMINOL_CROSS_CORRELATION_THRESHOLD
                except:
                    cross_correlation_threshold = 0.9
                if my_correlator.is_correlated(
                        threshold=cross_correlation_threshold):
                    correlation = my_correlator.get_correlation_result()
                    correlated = True
                    correlations.append([
                        metric_base_name, correlation.coefficient,
                        correlation.shift, correlation.shifted_coefficient
                    ])
                    remote_correlations_count += 1
            except:
                pass
        if correlated:
            correlated_metrics.append(metric_base_name)

    end_remote_correlations = timer()
    logger.info(
        'get_correlations :: checked - remote_correlations_check_count is %s' %
        str(remote_correlations_check_count))
    logger.info(
        'get_correlations :: correlated - remote_correlations_count is %s' %
        str(remote_correlations_count))
    logger.info(
        'get_correlations :: processed remote correlations on remote_metrics_count %s local metric in %.6f seconds'
        % (str(remote_metrics_count),
           (end_remote_correlations - start_remote_correlations)))

    end = timer()
    logger.info(
        'get_correlations :: checked a total of %s metrics and correlated %s metrics to %s anomaly, processed in %.6f seconds'
        % (str(metrics_checked_for_correlation), str(
            len(correlated_metrics)), base_name, (end - start)))
    # @added 20170720 - Task #2462: Implement useful metrics for Luminosity
    # Added runtime to calculate avg_runtime Graphite metric
    runtime = '%.6f' % (end - start)
    return (correlated_metrics, correlations, metrics_checked_for_correlation,
            runtime)
Ejemplo n.º 39
0
def alert_smtp(alert, metric):
    """
    Called by :func:`~trigger_alert` and sends an alert via smtp to the
    recipients that are configured for the metric.

    """
    LOCAL_DEBUG = False
    logger = logging.getLogger(skyline_app_logger)
    if settings.ENABLE_DEBUG or LOCAL_DEBUG:
        logger.info('debug :: alert_smtp - sending smtp alert')
        logger.info('debug :: alert_smtp - Memory usage at start: %s (kb)' %
                    resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)

    # FULL_DURATION to hours so that analyzer surfaces the relevant timeseries data
    # in the graph
    full_duration_in_hours = int(settings.FULL_DURATION) / 3600

    # For backwards compatibility
    if '@' in alert[1]:
        sender = settings.ALERT_SENDER
        recipient = alert[1]
    else:
        sender = settings.SMTP_OPTS['sender']
        # @modified 20160806 - Added default_recipient
        try:
            recipients = settings.SMTP_OPTS['recipients'][alert[0]]
            use_default_recipient = False
        except:
            use_default_recipient = True
        if use_default_recipient:
            try:
                recipients = settings.SMTP_OPTS['default_recipient']
                logger.info(
                    'alert_smtp - using default_recipient as no recipients are configured for %s'
                    % str(alert[0]))
            except:
                logger.error(
                    'error :: alert_smtp - no known recipient for %s' %
                    str(alert[0]))
                return False

    # Backwards compatibility
    if type(recipients) is str:
        recipients = [recipients]

    unencoded_graph_title = 'Skyline Analyzer - ALERT at %s hours %s - %s' % (
        full_duration_in_hours, metric[1], metric[0])
    if settings.ENABLE_DEBUG or LOCAL_DEBUG:
        logger.info('debug :: alert_smtp - unencoded_graph_title: %s' %
                    unencoded_graph_title)
    graph_title_string = quote(unencoded_graph_title, safe='')
    graph_title = '&title=%s' % graph_title_string

    if settings.GRAPHITE_PORT != '':
        link = '%s://%s:%s/render/?from=-%shours&target=cactiStyle(%s)%s%s&colorList=orange' % (
            settings.GRAPHITE_PROTOCOL, settings.GRAPHITE_HOST,
            settings.GRAPHITE_PORT, full_duration_in_hours, metric[1],
            settings.GRAPHITE_GRAPH_SETTINGS, graph_title)
    else:
        link = '%s://%s/render/?from=-%shours&target=cactiStyle(%s)%s%s&colorList=orange' % (
            settings.GRAPHITE_PROTOCOL, settings.GRAPHITE_HOST,
            full_duration_in_hours, metric[1],
            settings.GRAPHITE_GRAPH_SETTINGS, graph_title)

    content_id = metric[1]
    image_data = None
    if settings.SMTP_OPTS.get('embed-images'):
        try:
            image_data = urllib2.urlopen(link).read()
            if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                logger.info('debug :: alert_smtp - image data OK')
        except urllib2.URLError:
            image_data = None
            if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                logger.info('debug :: alert_smtp - image data None')

    if LOCAL_DEBUG:
        logger.info(
            'debug :: alert_smtp - Memory usage after image_data: %s (kb)' %
            resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)

    # If we failed to get the image or if it was explicitly disabled,
    # use the image URL instead of the content.
    if image_data is None:
        img_tag = '<img src="%s"/>' % link
    else:
        img_tag = '<img src="cid:%s"/>' % content_id
        if settings.ENABLE_DEBUG or LOCAL_DEBUG:
            logger.info('debug :: alert_smtp - img_tag: %s' % img_tag)

    redis_image_data = None
    try:
        plot_redis_data = settings.PLOT_REDIS_DATA
    except:
        plot_redis_data = False

    if settings.SMTP_OPTS.get('embed-images') and plot_redis_data:
        # Create graph from Redis data
        try:
            REDIS_ALERTER_CONN = redis.StrictRedis(
                unix_socket_path=settings.REDIS_SOCKET_PATH)
        except:
            logger.error('error :: alert_smtp - redis connection failed')

        redis_metric_key = '%s%s' % (settings.FULL_NAMESPACE, metric[1])
        try:
            raw_series = REDIS_ALERTER_CONN.get(redis_metric_key)
            if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                logger.info('debug :: alert_smtp - raw_series: %s' % 'OK')
        except:
            if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                logger.info('debug :: alert_smtp - raw_series: %s' % 'FAIL')

        try:
            if LOCAL_DEBUG:
                logger.info(
                    'debug :: alert_smtp - Memory usage before get Redis timeseries data: %s (kb)'
                    % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
            unpacker = Unpacker(use_list=True)
            unpacker.feed(raw_series)
            timeseries_x = [float(item[0]) for item in unpacker]
            unpacker = Unpacker(use_list=True)
            unpacker.feed(raw_series)
            timeseries_y = [item[1] for item in unpacker]

            unpacker = Unpacker(use_list=False)
            unpacker.feed(raw_series)
            timeseries = list(unpacker)
            if LOCAL_DEBUG:
                logger.info(
                    'debug :: alert_smtp - Memory usage after get Redis timeseries data: %s (kb)'
                    % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
        except:
            logger.error('error :: alert_smtp - unpack timeseries failed')
            timeseries = None

        pd_series_values = None
        if timeseries:
            try:
                if LOCAL_DEBUG:
                    logger.info(
                        'debug :: alert_smtp - Memory usage before pd.Series: %s (kb)'
                        % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
                values = pd.Series([x[1] for x in timeseries])
                # Because the truth value of a Series is ambiguous
                pd_series_values = True
                if LOCAL_DEBUG:
                    logger.info(
                        'debug :: alert_smtp - Memory usage after pd.Series: %s (kb)'
                        % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
            except:
                logger.error(
                    'error :: alert_smtp - pandas value series on timeseries failed'
                )

        if pd_series_values:
            try:
                array_median = np.median(values)
                if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                    logger.info('debug :: alert_smtp - values median: %s' %
                                str(array_median))

                array_amax = np.amax(values)
                if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                    logger.info('debug :: alert_smtp - array_amax: %s' %
                                str(array_amax))
                array_amin = np.amin(values)
                if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                    logger.info('debug :: alert_smtp - array_amin: %s' %
                                str(array_amin))
                mean = values.mean()
                if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                    logger.info('debug :: alert_smtp - mean: %s' % str(mean))
                stdDev = values.std()
                if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                    logger.info('debug :: alert_smtp - stdDev: %s' %
                                str(stdDev))

                sigma3 = 3 * stdDev
                if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                    logger.info('debug :: alert_smtp - sigma3: %s' %
                                str(sigma3))

                # sigma3_series = [sigma3] * len(values)

                sigma3_upper_bound = mean + sigma3
                try:
                    sigma3_lower_bound = mean - sigma3
                except:
                    sigma3_lower_bound = 0

                sigma3_upper_series = [sigma3_upper_bound] * len(values)
                sigma3_lower_series = [sigma3_lower_bound] * len(values)
                amax_series = [array_amax] * len(values)
                amin_series = [array_amin] * len(values)
                mean_series = [mean] * len(values)
            except:
                logger.error(
                    'error :: alert_smtp - numpy ops on series failed')
                mean_series = None

        if mean_series:
            graph_title = 'Skyline Analyzer - ALERT - at %s hours - Redis data\n%s - anomalous value: %s' % (
                full_duration_in_hours, metric[1], metric[0])
            # @modified 20160814 - Bug #1558: Memory leak in Analyzer
            # I think the buf is causing a memory leak, trying a file
            # if python_version == 3:
            #     buf = io.StringIO()
            # else:
            #     buf = io.BytesIO()
            buf = '%s/%s.%s.%s.png' % (settings.SKYLINE_TMP_DIR, skyline_app,
                                       str(metric[0]), metric[0])

            if LOCAL_DEBUG:
                logger.info(
                    'debug :: alert_smtp - Memory usage before plot Redis data: %s (kb)'
                    % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)

            # Too big
            # rcParams['figure.figsize'] = 12, 6
            rcParams['figure.figsize'] = 8, 4
            try:
                # fig = plt.figure()
                fig = plt.figure(frameon=False)
                ax = fig.add_subplot(111)
                ax.set_title(graph_title, fontsize='small')
                ax.set_axis_bgcolor('black')
                try:
                    datetimes = [
                        dt.datetime.utcfromtimestamp(ts) for ts in timeseries_x
                    ]
                    if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                        logger.info('debug :: alert_smtp - datetimes: %s' %
                                    'OK')
                except:
                    logger.error('error :: alert_smtp - datetimes: %s' %
                                 'FAIL')

                plt.xticks(rotation=0, horizontalalignment='center')
                xfmt = DateFormatter('%a %H:%M')
                plt.gca().xaxis.set_major_formatter(xfmt)

                ax.xaxis.set_major_formatter(xfmt)

                ax.plot(datetimes,
                        timeseries_y,
                        color='orange',
                        lw=0.6,
                        zorder=3)
                ax.tick_params(axis='both', labelsize='xx-small')

                max_value_label = 'max - %s' % str(array_amax)
                ax.plot(datetimes,
                        amax_series,
                        lw=1,
                        label=max_value_label,
                        color='m',
                        ls='--',
                        zorder=4)
                min_value_label = 'min - %s' % str(array_amin)
                ax.plot(datetimes,
                        amin_series,
                        lw=1,
                        label=min_value_label,
                        color='b',
                        ls='--',
                        zorder=4)
                mean_value_label = 'mean - %s' % str(mean)
                ax.plot(datetimes,
                        mean_series,
                        lw=1.5,
                        label=mean_value_label,
                        color='g',
                        ls='--',
                        zorder=4)

                sigma3_text = (r'3$\sigma$')
                # sigma3_label = '%s - %s' % (str(sigma3_text), str(sigma3))

                sigma3_upper_label = '%s upper - %s' % (
                    str(sigma3_text), str(sigma3_upper_bound))
                ax.plot(datetimes,
                        sigma3_upper_series,
                        lw=1,
                        label=sigma3_upper_label,
                        color='r',
                        ls='solid',
                        zorder=4)

                if sigma3_lower_bound > 0:
                    sigma3_lower_label = '%s lower - %s' % (
                        str(sigma3_text), str(sigma3_lower_bound))
                    ax.plot(datetimes,
                            sigma3_lower_series,
                            lw=1,
                            label=sigma3_lower_label,
                            color='r',
                            ls='solid',
                            zorder=4)

                ax.get_yaxis().get_major_formatter().set_useOffset(False)
                ax.get_yaxis().get_major_formatter().set_scientific(False)

                # Shrink current axis's height by 10% on the bottom
                box = ax.get_position()
                ax.set_position([
                    box.x0, box.y0 + box.height * 0.1, box.width,
                    box.height * 0.9
                ])

                # Put a legend below current axis
                ax.legend(loc='upper center',
                          bbox_to_anchor=(0.5, -0.05),
                          fancybox=True,
                          shadow=True,
                          ncol=4,
                          fontsize='x-small')
                plt.rc('lines', lw=2, color='w')

                plt.grid(True)

                ax.grid(b=True,
                        which='both',
                        axis='both',
                        color='lightgray',
                        linestyle='solid',
                        alpha=0.5,
                        linewidth=0.6)
                ax.set_axis_bgcolor('black')

                rcParams['xtick.direction'] = 'out'
                rcParams['ytick.direction'] = 'out'
                ax.margins(y=.02, x=.03)
                # tight_layout removes the legend box
                # fig.tight_layout()
                try:
                    if LOCAL_DEBUG:
                        logger.info(
                            'debug :: alert_smtp - Memory usage before plt.savefig: %s (kb)'
                            %
                            resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
                    plt.savefig(buf, format='png')
                    # @added 20160814 - Bug #1558: Memory leak in Analyzer
                    # As per http://www.mail-archive.com/[email protected]/msg13222.html
                    # savefig in the parent process was causing the memory leak
                    # the below fig.clf() and plt.close() did not resolve this
                    # however spawing a multiprocessing process for alert_smtp
                    # does solve this as issue as all memory is freed when the
                    # process terminates.
                    fig.clf()
                    plt.close(fig)
                    redis_graph_content_id = 'redis.%s' % metric[1]
                    redis_image_data = True
                    if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                        logger.info('debug :: alert_smtp - savefig: %s' % 'OK')
                        logger.info(
                            'debug :: alert_smtp - Memory usage after plt.savefig: %s (kb)'
                            %
                            resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
                except:
                    logger.error('error :: alert_smtp - plt.savefig: %s' %
                                 'FAIL')
            except:
                logger.error('error :: alert_smtp - could not build plot')
                logger.info(traceback.format_exc())

    if LOCAL_DEBUG:
        logger.info(
            'debug :: alert_smtp - Memory usage before email: %s (kb)' %
            resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)

    if redis_image_data:
        redis_img_tag = '<img src="cid:%s"/>' % redis_graph_content_id
        if settings.ENABLE_DEBUG or LOCAL_DEBUG:
            logger.info('debug :: alert_smtp - redis_img_tag: %s' %
                        str(redis_img_tag))
    else:
        redis_img_tag = '<img src="none"/>'

    try:
        body = '<h3><font color="#dd3023">Sky</font><font color="#6698FF">line</font><font color="black"> Analyzer alert</font></h3><br>'
        body += '<font color="black">metric: <b>%s</b></font><br>' % metric[1]
        body += '<font color="black">Anomalous value: %s</font><br>' % str(
            metric[0])
        body += '<font color="black">At hours: %s</font><br>' % str(
            full_duration_in_hours)
        body += '<font color="black">Next alert in: %s seconds</font><br>' % str(
            alert[2])
        if redis_image_data:
            body += '<font color="black">min: %s  | max: %s   | mean: %s <br>' % (
                str(array_amin), str(array_amax), str(mean))
            body += '3-sigma: %s <br>' % str(sigma3)
            body += '3-sigma upper bound: %s   | 3-sigma lower bound: %s <br></font>' % (
                str(sigma3_upper_bound), str(sigma3_lower_bound))
            body += '<h3><font color="black">Redis data at FULL_DURATION</font></h3><br>'
            body += '<div dir="ltr">:%s<br></div>' % redis_img_tag
        if image_data:
            body += '<h3><font color="black">Graphite data at FULL_DURATION (may be aggregated)</font></h3>'
            body += '<div dir="ltr"><a href="%s">%s</a><br></div><br>' % (
                link, img_tag)
            body += '<font color="black">Clicking on the above graph will open to the Graphite graph with current data</font><br>'
        if redis_image_data:
            body += '<font color="black">To disable the Redis data graph view, set PLOT_REDIS_DATA to False in your settings.py, if the Graphite graph is sufficient for you,<br>'
            body += 'however do note that will remove the 3-sigma and mean value too.</font>'
        body += '<br>'
        body += '<div dir="ltr" align="right"><font color="#dd3023">Sky</font><font color="#6698FF">line</font><font color="black"> version :: %s</font></div><br>' % str(
            skyline_version)
    except:
        logger.error('error :: alert_smtp - could not build body')
        logger.info(traceback.format_exc())

    for recipient in recipients:
        try:
            msg = MIMEMultipart('alternative')
            msg['Subject'] = '[Skyline alert] - Analyzer ALERT - ' + metric[1]
            msg['From'] = sender
            msg['To'] = recipient

            msg.attach(MIMEText(body, 'html'))

            if redis_image_data:
                try:
                    # @modified 20160814 - Bug #1558: Memory leak in Analyzer
                    # I think the buf is causing a memory leak, trying a file
                    # buf.seek(0)
                    # msg_plot_attachment = MIMEImage(buf.read())
                    # msg_plot_attachment = MIMEImage(buf.read())
                    try:
                        with open(buf, 'r') as f:
                            plot_image_data = f.read()
                        try:
                            os.remove(buf)
                        except OSError:
                            logger.error(
                                'error :: alert_smtp - failed to remove file - %s'
                                % buf)
                            logger.info(traceback.format_exc())
                            pass
                    except:
                        logger.error('error :: failed to read plot file - %s' %
                                     buf)
                        plot_image_data = None
                    msg_plot_attachment = MIMEImage(plot_image_data)
                    msg_plot_attachment.add_header(
                        'Content-ID', '<%s>' % redis_graph_content_id)
                    msg.attach(msg_plot_attachment)
                    if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                        logger.info(
                            'debug :: alert_smtp - msg_plot_attachment - redis data done'
                        )
                except:
                    logger.error('error :: alert_smtp - msg_plot_attachment')
                    logger.info(traceback.format_exc())

            if image_data is not None:
                try:
                    msg_attachment = MIMEImage(image_data)
                    msg_attachment.add_header('Content-ID',
                                              '<%s>' % content_id)
                    msg.attach(msg_attachment)
                    if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                        logger.info(
                            'debug :: alert_smtp - msg_attachment - Graphite img source done'
                        )
                except:
                    logger.error('error :: alert_smtp - msg_attachment')
                    logger.info(traceback.format_exc())
        except:
            logger.error('error :: alert_smtp - could not attach')
            logger.info(traceback.format_exc())

        s = SMTP('127.0.0.1')
        try:
            s.sendmail(sender, recipient, msg.as_string())
            if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                logger.info('debug :: alert_smtp - message sent to %s OK' %
                            str(recipient))
        except:
            logger.error('error :: alert_smtp - could not send email to %s' %
                         str(recipient))
            logger.info(traceback.format_exc())

        s.quit()

        if LOCAL_DEBUG:
            logger.info(
                'debug :: alert_smtp - Memory usage after email: %s (kb)' %
                resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)

        if redis_image_data:
            # buf.seek(0)
            # buf.write('none')
            if LOCAL_DEBUG:
                logger.info(
                    'debug :: alert_smtp - Memory usage before del redis_image_data objects: %s (kb)'
                    % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
            del raw_series
            del unpacker
            del timeseries[:]
            del timeseries_x[:]
            del timeseries_y[:]
            del values
            del datetimes[:]
            del msg_plot_attachment
            del redis_image_data
            # We del all variables that are floats as they become unique objects and
            # can result in what appears to be a memory leak, but is not, it is
            # just the way Python handles floats
            del mean
            del array_amin
            del array_amax
            del stdDev
            del sigma3
            if LOCAL_DEBUG:
                logger.info(
                    'debug :: alert_smtp - Memory usage after del redis_image_data objects: %s (kb)'
                    % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
            if LOCAL_DEBUG:
                logger.info(
                    'debug :: alert_smtp - Memory usage before del fig object: %s (kb)'
                    % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
            # @added 20160814 - Bug #1558: Memory leak in Analyzer
            #                   Issue #21 Memory leak in Analyzer - https://github.com/earthgecko/skyline/issues/21
            # As per http://www.mail-archive.com/[email protected]/msg13222.html
            fig.clf()
            plt.close(fig)
            del fig
            if LOCAL_DEBUG:
                logger.info(
                    'debug :: alert_smtp - Memory usage after del fig object: %s (kb)'
                    % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)

        if LOCAL_DEBUG:
            logger.info(
                'debug :: alert_smtp - Memory usage before del other objects: %s (kb)'
                % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
        del recipients[:]
        del body
        del msg
        del image_data
        del msg_attachment
        if LOCAL_DEBUG:
            logger.info(
                'debug :: alert_smtp - Memory usage after del other objects: %s (kb)'
                % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
        return
Ejemplo n.º 40
0
    def spin_process(self, i, unique_metrics):
        """
        Assign a bunch of metrics for a process to analyze.
        """
        # Discover assigned metrics
        keys_per_processor = int(
            ceil(
                float(len(unique_metrics)) /
                float(settings.ANALYZER_PROCESSES)))
        if i == settings.ANALYZER_PROCESSES:
            assigned_max = len(unique_metrics)
        else:
            assigned_max = i * keys_per_processor
        assigned_min = assigned_max - keys_per_processor
        assigned_keys = range(assigned_min, assigned_max)

        # Compile assigned metrics
        assigned_metrics = [unique_metrics[index] for index in assigned_keys]

        # Check if this process is unnecessary
        if len(assigned_metrics) == 0:
            return

        # Multi get series
        raw_assigned = self.redis_conn.mget(assigned_metrics)

        # Make process-specific dicts
        exceptions = defaultdict(int)
        anomaly_breakdown = defaultdict(int)

        # Distill timeseries strings into lists
        for i, metric_name in enumerate(assigned_metrics):
            self.check_if_parent_is_alive()

            try:
                raw_series = raw_assigned[i]
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)

                anomalous, ensemble, datapoint = run_selected_algorithm(
                    timeseries, metric_name)

                # If it's anomalous, add it to list
                if anomalous:
                    base_name = metric_name.replace(settings.FULL_NAMESPACE,
                                                    '', 1)
                    metric = [datapoint, base_name]
                    self.anomalous_metrics.append(metric)

                    # Get the anomaly breakdown - who returned True?
                    for index, value in enumerate(ensemble):
                        if value:
                            algorithm = settings.ALGORITHMS[index]
                            anomaly_breakdown[algorithm] += 1

            # It could have been deleted by the Roomba
            except TypeError:
                exceptions['DeletedByRoomba'] += 1
            except TooShort:
                exceptions['TooShort'] += 1
            except Stale:
                exceptions['Stale'] += 1
            except Boring:
                exceptions['Boring'] += 1
            except:
                exceptions['Other'] += 1
                logger.info(traceback.format_exc())

        # Add values to the queue so the parent process can collate
        for key, value in anomaly_breakdown.items():
            self.anomaly_breakdown_q.put((key, value))

        for key, value in exceptions.items():
            self.exceptions_q.put((key, value))
Ejemplo n.º 41
0
    def test(self):
        """
        Called when the process intializes.
        """
        while 1:
            now = time()

            # Make sure Redis is up
            try:
                self.redis_conn.ping()
            except:
                logger.error(
                    'cloudbrain can\'t connect to redis at socket path %s' %
                    settings.REDIS_SOCKET_PATH)
                sleep(10)
                self.redis_conn = StrictRedis(
                    unix_socket_path=settings.REDIS_SOCKET_PATH)
                continue

            # Discover unique metrics
            unique_metrics = list(
                self.redis_conn.smembers(settings.FULL_NAMESPACE +
                                         'unique_metrics'))

            if len(unique_metrics) == 0:
                logger.info(
                    'no metrics in redis. try adding some - see README')
                sleep(10)
                continue

            # Spawn processes
            pids = []
            for i in range(1, settings.ANALYZER_PROCESSES + 1):
                if i > len(unique_metrics):
                    logger.info(
                        'WARNING: cloudbrain is set for more cores than needed.'
                    )
                    break

                p = Process(target=self.spin_process, args=(i, unique_metrics))
                pids.append(p)
                p.start()

            # Send wait signal to zombie processes
            for p in pids:
                p.join()

            # Grab data from the queue and populate dictionaries
            exceptions = dict()
            anomaly_breakdown = dict()
            while 1:
                try:
                    key, value = self.anomaly_breakdown_q.get_nowait()
                    if key not in anomaly_breakdown.keys():
                        anomaly_breakdown[key] = value
                    else:
                        anomaly_breakdown[key] += value
                except Empty:
                    break

            while 1:
                try:
                    key, value = self.exceptions_q.get_nowait()
                    if key not in exceptions.keys():
                        exceptions[key] = value
                    else:
                        exceptions[key] += value
                except Empty:
                    break

            # Send alerts
            if settings.ENABLE_ALERTS:
                for alert in settings.ALERTS:
                    for metric in self.anomalous_metrics:
                        if alert[0] in metric[1]:
                            cache_key = 'last_alert.%s.%s' % (alert[1],
                                                              metric[1])
                            try:
                                last_alert = self.redis_conn.get(cache_key)
                                if not last_alert:
                                    self.redis_conn.setex(
                                        cache_key, alert[2], packb(metric[0]))
                                    trigger_alert(alert, metric)

                            except Exception as e:
                                logger.error("couldn't send alert: %s" % e)

            # Write anomalous_metrics to static webapp directory
            filename = path.abspath(
                path.join(path.dirname(__file__), '..', settings.ANOMALY_DUMP))
            with open(filename, 'w') as fh:
                # Make it JSONP with a handle_data() function
                anomalous_metrics = list(self.anomalous_metrics)
                anomalous_metrics.sort(key=operator.itemgetter(1))
                fh.write('handle_data(%s)' % anomalous_metrics)

            # Log progress
            logger.info('seconds to run    :: %.2f' % (time() - now))
            logger.info('total metrics     :: %d' % len(unique_metrics))
            logger.info('total analyzed    :: %d' %
                        (len(unique_metrics) - sum(exceptions.values())))
            logger.info('total anomalies   :: %d' %
                        len(self.anomalous_metrics))
            logger.info('exception stats   :: %s' % exceptions)
            logger.info('anomaly breakdown :: %s' % anomaly_breakdown)

            # Log to Graphite
            self.send_graphite_metric('cloudbrain.analyzer.run_time',
                                      '%.2f' % (time() - now))
            self.send_graphite_metric(
                'cloudbrain.analyzer.total_analyzed',
                '%.2f' % (len(unique_metrics) - sum(exceptions.values())))

            # Check canary metric
            raw_series = self.redis_conn.get(settings.FULL_NAMESPACE +
                                             settings.CANARY_METRIC)
            if raw_series is not None:
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)
                time_human = (timeseries[-1][0] - timeseries[0][0]) / 3600
                projected = 24 * (time() - now) / time_human

                logger.info('canary duration   :: %.2f' % time_human)
                self.send_graphite_metric('cloudbrain.analyzer.duration',
                                          '%.2f' % time_human)
                self.send_graphite_metric('cloudbrain.analyzer.projected',
                                          '%.2f' % projected)

            # Reset counters
            self.anomalous_metrics[:] = []

            # Sleep if it went too fast
            if time() - now < 5:
                logger.info('sleeping due to low run time...')
                sleep(10)
Ejemplo n.º 42
0
def get_correlations(base_name, anomaly_timestamp, anomalous_ts,
                     assigned_metrics, raw_assigned, anomalies):

    logger = logging.getLogger(skyline_app_logger)

    # Distill timeseries strings into lists
    start = timer()
    count = 0
    # Sample the time series
    from_timestamp = anomaly_timestamp - 600
    correlated_metrics = []
    correlations = []
    no_data = False
    if not anomalous_ts:
        no_data = True
    if not assigned_metrics:
        no_data = True
    if not raw_assigned:
        no_data = True
    if not anomalies:
        no_data = True
    if no_data:
        logger.error('error :: get_correlations :: no data')
        return (correlated_metrics, correlations)

    for i, metric_name in enumerate(assigned_metrics):
        count += 1
        # print(metric_name)
        if count > 1000:
            break
        correlated = None
        metric_base_name = metric_name.replace(settings.FULL_NAMESPACE, '', 1)
        if str(metric_base_name) == str(base_name):
            continue
        try:
            raw_series = raw_assigned[i]
            unpacker = Unpacker(use_list=False)
            unpacker.feed(raw_series)
            timeseries = list(unpacker)
        except:
            timeseries = []
        if not timeseries:
            # print('no time series data for %s' % base_name)
            continue

        # Convert the time series if this is a known_derivative_metric
        known_derivative_metric = is_derivative_metric(skyline_app,
                                                       metric_base_name)
        if known_derivative_metric:
            try:
                derivative_timeseries = nonNegativeDerivative(timeseries)
                timeseries = derivative_timeseries
            except:
                logger.error(traceback.format_exc())
                logger.error('error :: nonNegativeDerivative')

        correlate_ts = []
        for ts, value in timeseries:
            if int(ts) < from_timestamp:
                continue
            if int(ts) <= anomaly_timestamp:
                correlate_ts.append((int(ts), value))
            if int(ts) > (anomaly_timestamp + 61):
                break
        if not correlate_ts:
            continue

        anomaly_ts_dict = dict(anomalous_ts)
        correlate_ts_dict = dict(correlate_ts)

        for a in anomalies:
            try:
                if int(a.exact_timestamp) < int(anomaly_timestamp - 120):
                    continue
                if int(a.exact_timestamp) > int(anomaly_timestamp + 120):
                    continue
            except:
                continue
            try:
                time_period = (int(anomaly_timestamp - 120),
                               int(anomaly_timestamp + 120))
                my_correlator = Correlator(anomaly_ts_dict, correlate_ts_dict,
                                           time_period)
                # For better correlation use 0.9 instead of 0.8 for the threshold
                # @modified 20180524 - Feature #2360: CORRELATE_ALERTS_ONLY
                #                      Branch #2270: luminosity
                #                      Feature #2378: Add redis auth to Skyline and rebrow
                # Added this to setting.py
                # if my_correlator.is_correlated(threshold=0.9):
                try:
                    cross_correlation_threshold = settings.LUMINOL_CROSS_CORRELATION_THRESHOLD
                except:
                    cross_correlation_threshold = 0.9
                if my_correlator.is_correlated(
                        threshold=cross_correlation_threshold):
                    correlation = my_correlator.get_correlation_result()
                    correlated = True
                    correlations.append([
                        metric_base_name, correlation.coefficient,
                        correlation.shift, correlation.shifted_coefficient
                    ])
            except:
                pass
        if correlated:
            correlated_metrics.append(metric_base_name)

    end = timer()
    logger.info(
        'correlated %s metrics to %s anomaly, processed in %.6f seconds' %
        (str(len(correlated_metrics)), base_name, (end - start)))
    return (correlated_metrics, correlations)
Ejemplo n.º 43
0
 def setUp(self):
     raw_packet = encode_ping()
     unpacker = Unpacker()
     unpacker.feed(raw_packet)
     self.command = list(unpacker)[1:]
Ejemplo n.º 44
0
 def toJsonBody(self, mpackBody):
     unpacker = Unpacker(object_pairs_hook=OrderedDict)
     unpacker.feed(mpackBody)
     bodyMap = unpacker.unpack()
     newBody = json.dumps(bodyMap, ensure_ascii=False)
     return newBody
Ejemplo n.º 45
0
 def setUp(self):
     raw_packet = encode_jump_to_main()
     unpacker = Unpacker()
     unpacker.feed(raw_packet)
     self.command = list(unpacker)[1:]
Ejemplo n.º 46
0
    def run(self):
        """
        Called when the process intializes.
        """
        while 1:
            now = time()

            # Make sure Redis is up
            try:
                self.redis_conn.ping()
            except:
                logger.error(
                    'skyline can\'t connect to redis at socket path %s' %
                    settings.REDIS_SOCKET_PATH)
                sleep(10)
                self.redis_conn = StrictRedis(
                    unix_socket_path=settings.REDIS_SOCKET_PATH)
                continue

            # Discover unique metrics
            unique_metrics = list(
                self.redis_conn.smembers(settings.FULL_NAMESPACE +
                                         'unique_metrics'))

            if len(unique_metrics) == 0:
                logger.info(
                    'no metrics in redis. try adding some - see README')
                sleep(10)
                continue

            # Reset boundary_metrics
            boundary_metrics = []

            # Build boundary metrics
            for metric_name in unique_metrics:
                for metric in BOUNDARY_METRICS:
                    CHECK_MATCH_PATTERN = metric[0]
                    check_match_pattern = re.compile(CHECK_MATCH_PATTERN)
                    base_name = metric_name.replace(FULL_NAMESPACE, '', 1)
                    pattern_match = check_match_pattern.match(base_name)
                    if pattern_match:
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info(
                                "debug - boundary metric - pattern MATCHED - "
                                + metric[0] + " | " + base_name)
                        boundary_metrics.append([metric_name, metric[1]])

            if ENABLE_BOUNDARY_DEBUG:
                logger.info("debug - boundary metrics - " +
                            str(boundary_metrics))

            if len(boundary_metrics) == 0:
                logger.info(
                    'no metrics in redis. try adding some - see README')
                sleep(10)
                continue

            # Spawn processes
            pids = []
            for i in range(1, settings.BOUNDARY_PROCESSES + 1):
                if i > len(boundary_metrics):
                    logger.info(
                        'WARNING: skyline boundary is set for more cores than needed.'
                    )
                    break

                p = Process(target=self.spin_process,
                            args=(i, boundary_metrics))
                pids.append(p)
                p.start()

            # Send wait signal to zombie processes
            for p in pids:
                p.join()

            # Grab data from the queue and populate dictionaries
            exceptions = dict()
            anomaly_breakdown = dict()
            while 1:
                try:
                    key, value = self.anomaly_breakdown_q.get_nowait()
                    if key not in anomaly_breakdown.keys():
                        anomaly_breakdown[key] = value
                    else:
                        anomaly_breakdown[key] += value
                except Empty:
                    break

            while 1:
                try:
                    key, value = self.exceptions_q.get_nowait()
                    if key not in exceptions.keys():
                        exceptions[key] = value
                    else:
                        exceptions[key] += value
                except Empty:
                    break

            # Send alerts
            if settings.BOUNDARY_ENABLE_ALERTS:
                for anomalous_metric in self.anomalous_metrics:
                    datapoint = str(anomalous_metric[0])
                    metric_name = anomalous_metric[1]
                    base_name = metric_name.replace(FULL_NAMESPACE, '', 1)
                    expiration_time = str(anomalous_metric[2])
                    metric_trigger = str(anomalous_metric[5])
                    alert_threshold = int(anomalous_metric[6])
                    metric_alerters = anomalous_metric[7]
                    algorithm = anomalous_metric[8]
                    if ENABLE_BOUNDARY_DEBUG:
                        logger.info("debug - anomalous_metric - " +
                                    str(anomalous_metric))

                    # Determine how many times has the anomaly been seen if the
                    # ALERT_THRESHOLD is set to > 1 and create a cache key in
                    # redis to keep count so that alert_threshold can be honored
                    if alert_threshold == 0:
                        times_seen = 1
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info("debug - alert_threshold - " +
                                        str(alert_threshold))

                    if alert_threshold == 1:
                        times_seen = 1
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info("debug - alert_threshold - " +
                                        str(alert_threshold))

                    if alert_threshold > 1:
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info("debug - alert_threshold - " +
                                        str(alert_threshold))
                        anomaly_cache_key_count_set = False
                        anomaly_cache_key_expiration_time = (
                            int(alert_threshold) + 1) * 60
                        anomaly_cache_key = 'anomaly_seen.%s.%s' % (algorithm,
                                                                    base_name)
                        try:
                            anomaly_cache_key_count = self.redis_conn.get(
                                anomaly_cache_key)
                            if not anomaly_cache_key_count:
                                try:
                                    if ENABLE_BOUNDARY_DEBUG:
                                        logger.info(
                                            "debug - redis no anomaly_cache_key - "
                                            + str(anomaly_cache_key))
                                    times_seen = 1
                                    if ENABLE_BOUNDARY_DEBUG:
                                        logger.info(
                                            "debug - redis setex anomaly_cache_key - "
                                            + str(anomaly_cache_key))
                                    self.redis_conn.setex(
                                        anomaly_cache_key,
                                        anomaly_cache_key_expiration_time,
                                        packb(int(times_seen)))
                                    logger.info(
                                        'set anomaly seen key :: %s seen %s' %
                                        (anomaly_cache_key, str(times_seen)))
                                except Exception as e:
                                    logger.error('redis setex failed :: %s' %
                                                 str(anomaly_cache_key))
                                    logger.error("couldn't set key: %s" % e)
                            else:
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info(
                                        "debug - redis anomaly_cache_key retrieved OK - "
                                        + str(anomaly_cache_key))
                                anomaly_cache_key_count_set = True
                        except:
                            if ENABLE_BOUNDARY_DEBUG:
                                logger.info(
                                    "debug - redis failed - anomaly_cache_key retrieval failed - "
                                    + str(anomaly_cache_key))
                            anomaly_cache_key_count_set = False

                        if anomaly_cache_key_count_set:
                            unpacker = Unpacker(use_list=False)
                            unpacker.feed(anomaly_cache_key_count)
                            raw_times_seen = list(unpacker)
                            times_seen = int(raw_times_seen[0]) + 1
                            try:
                                self.redis_conn.setex(
                                    anomaly_cache_key,
                                    anomaly_cache_key_expiration_time,
                                    packb(int(times_seen)))
                                logger.info(
                                    'set anomaly seen key :: %s seen %s' %
                                    (anomaly_cache_key, str(times_seen)))
                            except:
                                times_seen = 1
                                logger.error(
                                    'set anomaly seen key failed :: %s seen %s'
                                    % (anomaly_cache_key, str(times_seen)))

                    # Alert the alerters if times_seen > alert_threshold
                    if times_seen >= alert_threshold:
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info(
                                "debug - times_seen %s is greater than or equal to alert_threshold %s"
                                % (str(times_seen), str(alert_threshold)))
                        for alerter in metric_alerters.split("|"):
                            # Determine alerter limits
                            send_alert = False
                            alerts_sent = 0
                            if ENABLE_BOUNDARY_DEBUG:
                                logger.info("debug - checking alerter - %s" %
                                            alerter)
                            try:
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info(
                                        "debug - determining alerter_expiration_time for settings"
                                    )
                                alerter_expiration_time_setting = settings.BOUNDARY_ALERTER_OPTS[
                                    'alerter_expiration_time'][alerter]
                                alerter_expiration_time = int(
                                    alerter_expiration_time_setting)
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info(
                                        "debug - determined alerter_expiration_time from settings - %s"
                                        % str(alerter_expiration_time))
                            except:
                                # Set an arbitrary expiry time if not set
                                alerter_expiration_time = 160
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info(
                                        "debug - could not determine alerter_expiration_time from settings"
                                    )
                            try:
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info(
                                        "debug - determining alerter_limit from settings"
                                    )
                                alerter_limit_setting = settings.BOUNDARY_ALERTER_OPTS[
                                    'alerter_limit'][alerter]
                                alerter_limit = int(alerter_limit_setting)
                                alerter_limit_set = True
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info(
                                        "debug - determined alerter_limit from settings - %s"
                                        % str(alerter_limit))
                            except:
                                alerter_limit_set = False
                                send_alert = True
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info(
                                        "debug - could not determine alerter_limit from settings"
                                    )

                            # If the alerter_limit is set determine how many
                            # alerts the alerter has sent
                            if alerter_limit_set:
                                alerter_sent_count_key = 'alerts_sent.%s' % (
                                    alerter)
                                try:
                                    alerter_sent_count_key_data = self.redis_conn.get(
                                        alerter_sent_count_key)
                                    if not alerter_sent_count_key_data:
                                        if ENABLE_BOUNDARY_DEBUG:
                                            logger.info(
                                                "debug - redis no alerter key, no alerts sent for - "
                                                + str(alerter_sent_count_key))
                                        alerts_sent = 0
                                        send_alert = True
                                        if ENABLE_BOUNDARY_DEBUG:
                                            logger.info(
                                                "debug - alerts_sent set to %s"
                                                % str(alerts_sent))
                                            logger.info(
                                                "debug - send_alert set to %s"
                                                % str(sent_alert))
                                    else:
                                        if ENABLE_BOUNDARY_DEBUG:
                                            logger.info(
                                                "debug - redis alerter key retrieved, unpacking"
                                                + str(alerter_sent_count_key))
                                        unpacker = Unpacker(use_list=False)
                                        unpacker.feed(
                                            alerter_sent_count_key_data)
                                        raw_alerts_sent = list(unpacker)
                                        alerts_sent = int(raw_alerts_sent[0])
                                        if ENABLE_BOUNDARY_DEBUG:
                                            logger.info(
                                                "debug - alerter %s alerts sent %s "
                                                % (str(alerter),
                                                   str(alerts_sent)))
                                except:
                                    logger.info("No key set - %s" %
                                                alerter_sent_count_key)
                                    alerts_sent = 0
                                    send_alert = True
                                    if ENABLE_BOUNDARY_DEBUG:
                                        logger.info(
                                            "debug - alerts_sent set to %s" %
                                            str(alerts_sent))
                                        logger.info(
                                            "debug - send_alert set to %s" %
                                            str(send_alert))

                                if alerts_sent < alerter_limit:
                                    send_alert = True
                                    if ENABLE_BOUNDARY_DEBUG:
                                        logger.info(
                                            "debug - alerts_sent %s is less than alerter_limit %s"
                                            % (str(alerts_sent),
                                               str(alerter_limit)))
                                        logger.info(
                                            "debug - send_alert set to %s" %
                                            str(send_alert))

                            # Send alert
                            alerter_alert_sent = False
                            if send_alert:
                                cache_key = 'last_alert.boundary.%s.%s.%s' % (
                                    alerter, base_name, algorithm)
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info(
                                        "debug - checking cache_key - %s" %
                                        cache_key)
                                try:
                                    last_alert = self.redis_conn.get(cache_key)
                                    if not last_alert:
                                        try:
                                            self.redis_conn.setex(
                                                cache_key,
                                                int(anomalous_metric[2]),
                                                packb(int(
                                                    anomalous_metric[0])))
                                            if ENABLE_BOUNDARY_DEBUG:
                                                logger.info(
                                                    'debug - key setex OK - %s'
                                                    % (cache_key))
                                            trigger_alert(
                                                alerter, datapoint, base_name,
                                                expiration_time,
                                                metric_trigger, algorithm)
                                            logger.info(
                                                'alert sent :: %s - %s - via %s - %s'
                                                % (base_name, datapoint,
                                                   alerter, algorithm))
                                            trigger_alert(
                                                "syslog", datapoint, base_name,
                                                expiration_time,
                                                metric_trigger, algorithm)
                                            logger.info(
                                                'alert sent :: %s - %s - via syslog - %s'
                                                % (base_name, datapoint,
                                                   algorithm))
                                            alerter_alert_sent = True
                                        except Exception as e:
                                            logger.error(
                                                'alert failed :: %s - %s - via %s - %s'
                                                % (base_name, datapoint,
                                                   alerter, algorithm))
                                            logger.error(
                                                "couldn't send alert: %s" %
                                                str(e))
                                            trigger_alert(
                                                "syslog", datapoint, base_name,
                                                expiration_time,
                                                metric_trigger, algorithm)
                                    else:
                                        if ENABLE_BOUNDARY_DEBUG:
                                            logger.info(
                                                "debug - cache_key exists not alerting via %s for %s is less than alerter_limit %s"
                                                % (alerter, cache_key))
                                        trigger_alert("syslog", datapoint,
                                                      base_name,
                                                      expiration_time,
                                                      metric_trigger,
                                                      algorithm)
                                        logger.info(
                                            'alert sent :: %s - %s - via syslog - %s'
                                            %
                                            (base_name, datapoint, algorithm))
                                except:
                                    trigger_alert("syslog", datapoint,
                                                  base_name, expiration_time,
                                                  metric_trigger, algorithm)
                                    logger.info(
                                        'alert sent :: %s - %s - via syslog - %s'
                                        % (base_name, datapoint, algorithm))
                            else:
                                trigger_alert("syslog", datapoint, base_name,
                                              expiration_time, metric_trigger,
                                              algorithm)
                                logger.info(
                                    'alert sent :: %s - %s - via syslog - %s' %
                                    (base_name, datapoint, algorithm))

                            # Update the alerts sent for the alerter cache key,
                            # to allow for alert limiting
                            if alerter_alert_sent and alerter_limit_set:
                                try:
                                    alerter_sent_count_key = 'alerts_sent.%s' % (
                                        alerter)
                                    new_alerts_sent = int(alerts_sent) + 1
                                    self.redis_conn.setex(
                                        alerter_sent_count_key,
                                        alerter_expiration_time,
                                        packb(int(new_alerts_sent)))
                                    logger.info('set %s - %s' %
                                                (alerter_sent_count_key,
                                                 str(new_alerts_sent)))
                                except:
                                    logger.error('failed to set %s - %s' %
                                                 (alerter_sent_count_key,
                                                  str(new_alerts_sent)))

                    else:
                        # Always alert to syslog, even if alert_threshold is not
                        # breached or if send_alert is not True
                        trigger_alert("syslog", datapoint, base_name,
                                      expiration_time, metric_trigger,
                                      algorithm)
                        logger.info('alert sent :: %s - %s - via syslog - %s' %
                                    (base_name, datapoint, algorithm))

            # Write anomalous_metrics to static webapp directory
            if len(self.anomalous_metrics) > 0:
                filename = path.abspath(
                    path.join(path.dirname(__file__), '..',
                              settings.ANOMALY_DUMP))
                with open(filename, 'w') as fh:
                    # Make it JSONP with a handle_data() function
                    anomalous_metrics = list(self.anomalous_metrics)
                    anomalous_metrics.sort(key=operator.itemgetter(1))
                    fh.write('handle_data(%s)' % anomalous_metrics)

            # Log progress
            logger.info('seconds to run    :: %.2f' % (time() - now))
            logger.info('total metrics     :: %d' % len(boundary_metrics))
            logger.info('total analyzed    :: %d' %
                        (len(boundary_metrics) - sum(exceptions.values())))
            logger.info('total anomalies   :: %d' %
                        len(self.anomalous_metrics))
            logger.info('exception stats   :: %s' % exceptions)
            logger.info('anomaly breakdown :: %s' % anomaly_breakdown)

            # Log to Graphite
            self.send_graphite_metric(
                'skyline.boundary.' + SERVER_METRIC_PATH + 'run_time',
                '%.2f' % (time() - now))
            self.send_graphite_metric(
                'skyline.boundary.' + SERVER_METRIC_PATH + 'total_analyzed',
                '%.2f' % (len(boundary_metrics) - sum(exceptions.values())))
            self.send_graphite_metric(
                'skyline.boundary.' + SERVER_METRIC_PATH + 'total_anomalies',
                '%d' % len(self.anomalous_metrics))
            self.send_graphite_metric(
                'skyline.boundary.' + SERVER_METRIC_PATH + 'total_metrics',
                '%d' % len(boundary_metrics))
            for key, value in exceptions.items():
                send_metric = 'skyline.boundary.' + SERVER_METRIC_PATH + 'exceptions.%s' % key
                self.send_graphite_metric(send_metric, '%d' % value)
            for key, value in anomaly_breakdown.items():
                send_metric = 'skyline.boundary.' + SERVER_METRIC_PATH + 'anomaly_breakdown.%s' % key
                self.send_graphite_metric(send_metric, '%d' % value)

            # Check canary metric
            raw_series = self.redis_conn.get(settings.FULL_NAMESPACE +
                                             settings.CANARY_METRIC)
            if raw_series is not None:
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)
                time_human = (timeseries[-1][0] - timeseries[0][0]) / 3600
                projected = 24 * (time() - now) / time_human

                logger.info('canary duration   :: %.2f' % time_human)
                self.send_graphite_metric(
                    'skyline.boundary.' + SERVER_METRIC_PATH + 'duration',
                    '%.2f' % time_human)
                self.send_graphite_metric(
                    'skyline.boundary.' + SERVER_METRIC_PATH + 'projected',
                    '%.2f' % projected)

            # Reset counters
            self.anomalous_metrics[:] = []

            # Only run once per minute
            seconds_to_run = int((time() - now))
            if seconds_to_run < 60:
                sleep_for_seconds = 60 - seconds_to_run
            else:
                sleep_for_seconds = 0
            if sleep_for_seconds > 0:
                logger.info('sleeping for %s seconds' % sleep_for_seconds)
                sleep(sleep_for_seconds)
Ejemplo n.º 47
0
def get_anomalous_ts(base_name, anomaly_timestamp):

    logger = logging.getLogger(skyline_app_logger)

    # @added 20180423 - Feature #2360: CORRELATE_ALERTS_ONLY
    #                   Branch #2270: luminosity
    # Only correlate metrics with an alert setting
    if correlate_alerts_only:
        try:
            smtp_alerter_metrics = list(
                redis_conn.smembers('analyzer.smtp_alerter_metrics'))
        except:
            smtp_alerter_metrics = []
        if base_name not in smtp_alerter_metrics:
            logger.error('%s has no alerter setting, not correlating' %
                         base_name)
            return []

    if not base_name or not anomaly_timestamp:
        return []

    # from skyline_functions import nonNegativeDerivative
    anomalous_metric = '%s%s' % (settings.FULL_NAMESPACE, base_name)
    unique_metrics = []
    try:
        unique_metrics = list(
            redis_conn.smembers(settings.FULL_NAMESPACE + 'unique_metrics'))
    except:
        logger.error(traceback.format_exc())
        logger.error('error :: get_assigned_metrics :: no unique_metrics')
        return []
    # @added 20180720 - Feature #2464: luminosity_remote_data
    # Ensure that Luminosity only processes it's own Redis metrics so that if
    # multiple Skyline instances are running, Luminosity does not process an
    # anomaly_id for a metric that is not local to itself.  This will stop the
    # call to the remote Redis with other_redis_conn below.  With the
    # introduction of the preprocessing luminosity_remote_data API endpoint for
    # remote Skyline instances, there is no further requirement for Skyline
    # instances to have direct access to Redis on another Skyline instance.
    # A much better solution and means all data is preprocessed and encrypted,
    # there is no need for iptables other than 443 (or custom https port).
    #
    if anomalous_metric in unique_metrics:
        logger.info(
            '%s is a metric in Redis, processing on this Skyline instance' %
            base_name)
    else:
        logger.info(
            '%s is not a metric in Redis, not processing on this Skyline instance'
            % base_name)
        return []

    assigned_metrics = [anomalous_metric]
    # @modified 20180419 -
    raw_assigned = []
    try:
        raw_assigned = redis_conn.mget(assigned_metrics)
    except:
        raw_assigned = []
    if raw_assigned == [None]:
        logger.info('%s data not retrieved from local Redis' %
                    (str(base_name)))
        raw_assigned = []

    # @modified 20180721 - Feature #2464: luminosity_remote_data
    # TO BE DEPRECATED settings.OTHER_SKYLINE_REDIS_INSTANCES
    # with the addition of the luminosity_remote_data API call and the above
    if not raw_assigned and settings.OTHER_SKYLINE_REDIS_INSTANCES:
        # @modified 20180519 - Feature #2378: Add redis auth to Skyline and rebrow
        # for redis_ip, redis_port in settings.OTHER_SKYLINE_REDIS_INSTANCES:
        for redis_ip, redis_port, redis_password in settings.OTHER_SKYLINE_REDIS_INSTANCES:
            if not raw_assigned:
                try:
                    if redis_password:
                        other_redis_conn = StrictRedis(
                            host=str(redis_ip),
                            port=int(redis_port),
                            password=str(redis_password))
                    else:
                        other_redis_conn = StrictRedis(host=str(redis_ip),
                                                       port=int(redis_port))
                    raw_assigned = other_redis_conn.mget(assigned_metrics)
                    if raw_assigned == [None]:
                        logger.info(
                            '%s data not retrieved from Redis at %s on port %s'
                            % (str(base_name), str(redis_ip), str(redis_port)))
                        raw_assigned = []
                    if raw_assigned:
                        logger.info(
                            '%s data retrieved from Redis at %s on port %s' %
                            (str(base_name), str(redis_ip), str(redis_port)))
                except:
                    logger.error(traceback.format_exc())
                    logger.error(
                        'error :: failed to connect to Redis at %s on port %s'
                        % (str(redis_ip), str(redis_port)))
                    raw_assigned = []

    if not raw_assigned or raw_assigned == [None]:
        logger.info('%s data not retrieved' % (str(base_name)))
        return []

    for i, metric_name in enumerate(assigned_metrics):
        try:
            raw_series = raw_assigned[i]
            unpacker = Unpacker(use_list=False)
            unpacker.feed(raw_series)
            timeseries = list(unpacker)
        except:
            timeseries = []

    # Convert the time series if this is a known_derivative_metric
    known_derivative_metric = is_derivative_metric(skyline_app, base_name)
    if known_derivative_metric:
        derivative_timeseries = nonNegativeDerivative(timeseries)
        timeseries = derivative_timeseries

    # Sample the time series
    # @modified 20180720 - Feature #2464: luminosity_remote_data
    # Added note here - if you modify the value of 600 here, it must be
    # modified in the luminosity_remote_data function in
    # skyline/webapp/backend.py as well
    from_timestamp = anomaly_timestamp - 600
    anomaly_ts = []
    for ts, value in timeseries:
        if int(ts) < from_timestamp:
            continue
        if int(ts) <= anomaly_timestamp:
            anomaly_ts.append((int(ts), value))
        if int(ts) > anomaly_timestamp:
            break
    return anomaly_ts
Ejemplo n.º 48
0
def luminosity_remote_data(anomaly_timestamp):
    """
    Gets all the unique_metrics from Redis and then mgets Redis data for all
    metrics.  The data is then preprocessed for the remote Skyline luminosity
    instance and only the relevant fragments of the time series are
    returned.  This return is then gzipped by the Flask Webapp response to
    ensure the minimum about of bandwidth is used.

    :param anomaly_timestamp: the anomaly timestamp
    :type anomaly_timestamp: int
    :return: list
    :rtype: list

    """

    message = 'luminosity_remote_data returned'
    success = False
    luminosity_data = []
    logger.info('luminosity_remote_data :: determining unique_metrics')
    unique_metrics = []
    # If you modify the values of 61 or 600 here, it must be modified in the
    # luminosity_remote_data function in
    # skyline/luminosity/process_correlations.py as well
    from_timestamp = int(anomaly_timestamp) - 600
    until_timestamp = int(anomaly_timestamp) + 61

    try:
        unique_metrics = list(
            REDIS_CONN.smembers(settings.FULL_NAMESPACE + 'unique_metrics'))
    except Exception as e:
        logger.error('error :: %s' % str(e))
        logger.error(
            'error :: luminosity_remote_data :: could not determine unique_metrics from Redis set'
        )
    if not unique_metrics:
        message = 'error :: luminosity_remote_data :: could not determine unique_metrics from Redis set'
        return luminosity_data, success, message
    logger.info('luminosity_remote_data :: %s unique_metrics' %
                str(len(unique_metrics)))

    # assigned metrics
    assigned_min = 0
    assigned_max = len(unique_metrics)
    assigned_keys = range(assigned_min, assigned_max)

    # Compile assigned metrics
    assigned_metrics = [unique_metrics[index] for index in assigned_keys]
    # Check if this process is unnecessary
    if len(assigned_metrics) == 0:
        message = 'error :: luminosity_remote_data :: assigned_metrics length is 0'
        logger.error(message)
        return luminosity_data, success, message

    # Multi get series
    raw_assigned_failed = True
    try:
        raw_assigned = REDIS_CONN.mget(assigned_metrics)
        raw_assigned_failed = False
    except:
        logger.info(traceback.format_exc())
        message = 'error :: luminosity_remote_data :: failed to mget raw_assigned'
        logger.error(message)
        return luminosity_data, success, message
    if raw_assigned_failed:
        message = 'error :: luminosity_remote_data :: failed to mget raw_assigned'
        logger.error(message)
        return luminosity_data, success, message

    # Distill timeseries strings into lists
    for i, metric_name in enumerate(assigned_metrics):
        timeseries = []
        try:
            raw_series = raw_assigned[i]
            unpacker = Unpacker(use_list=False)
            unpacker.feed(raw_series)
            timeseries = list(unpacker)
        except:
            timeseries = []

        if not timeseries:
            continue

        # Convert the time series if this is a known_derivative_metric
        base_name = metric_name.replace(settings.FULL_NAMESPACE, '', 1)
        known_derivative_metric = is_derivative_metric('webapp', base_name)
        if known_derivative_metric:
            try:
                derivative_timeseries = nonNegativeDerivative(timeseries)
                timeseries = derivative_timeseries
            except:
                logger.error('error :: nonNegativeDerivative failed')

        correlate_ts = []
        for ts, value in timeseries:
            if int(ts) < from_timestamp:
                continue
            if int(ts) <= anomaly_timestamp:
                correlate_ts.append((int(ts), value))
            if int(ts) > (anomaly_timestamp + until_timestamp):
                break
        if not correlate_ts:
            continue
        metric_data = [str(metric_name), correlate_ts]
        luminosity_data.append(metric_data)

    logger.info(
        'luminosity_remote_data :: %s valid metric time series data preprocessed for the remote request'
        % str(len(luminosity_data)))

    return luminosity_data, success, message
Ejemplo n.º 49
0
        def determine_id(table, key, value):
            """
            Get the id of something from Redis or the database and insert a new
            record if one does not exist for the value.

            :param table: table name
            :param key: key name
            :param value: value name
            :type table: str
            :type key: str
            :type value: str
            :return: int or boolean

            """

            query_cache_key = '%s.mysql_ids.%s.%s.%s' % (skyline_app, table, key, value)
            determined_id = None
            redis_determined_id = None
            if settings.ENABLE_PANORAMA_DEBUG:
                logger.info('debug :: query_cache_key - %s' % (query_cache_key))

            try:
                redis_known_id = self.redis_conn.get(query_cache_key)
            except:
                redis_known_id = None

            if redis_known_id:
                unpacker = Unpacker(use_list=False)
                unpacker.feed(redis_known_id)
                redis_determined_id = list(unpacker)

            if redis_determined_id:
                determined_id = int(redis_determined_id[0])

            if determined_id:
                if determined_id > 0:
                    return determined_id

            # Query MySQL
            query = 'select id FROM %s WHERE %s=\'%s\'' % (table, key, value)
            results = self.mysql_select(query)

            determined_id = 0
            if results:
                determined_id = int(results[0][0])

            if determined_id > 0:
                # Set the key for a week
                if not redis_determined_id:
                    try:
                        self.redis_conn.setex(query_cache_key, 604800, packb(determined_id))
                        logger.info('set redis query_cache_key - %s - id: %s' % (
                            query_cache_key, str(determined_id)))
                    except Exception as e:
                        logger.error(traceback.format_exc())
                        logger.error('error :: failed to set query_cache_key - %s - id: %s' % (
                            query_cache_key, str(determined_id)))
                return int(determined_id)

            # INSERT because no known id
            insert_query = 'insert into %s (%s) VALUES (\'%s\')' % (table, key, value)
            logger.info('inserting %s into %s table' % (value, table))
            try:
                results = self.mysql_insert(insert_query)
            except:
                logger.error(traceback.format_exc())
                logger.error('error :: failed to determine the id of %s from the insert' % (value))
                raise

            determined_id = 0
            if results:
                determined_id = int(results)
            else:
                logger.error('error :: results not set')
                raise

            if determined_id > 0:
                # Set the key for a week
                if not redis_determined_id:
                    try:
                        self.redis_conn.setex(query_cache_key, 604800, packb(determined_id))
                        logger.info('set redis query_cache_key - %s - id: %s' % (
                            query_cache_key, str(determined_id)))
                    except Exception as e:
                        logger.error(traceback.format_exc())
                        logger.error('%s' % str(e))
                        logger.error('error :: failed to set query_cache_key - %s - id: %s' % (
                            query_cache_key, str(determined_id)))
                return determined_id

            logger.error('error :: failed to determine the inserted id for %s' % value)
            return False
Ejemplo n.º 50
0
    def vacuum(self, i, namespace, duration):
        """
        Trim metrics that are older than settings.FULL_DURATION and purge old
        metrics.
        """
        begin = time()
        logger.info('%s :: started vacuum' % (skyline_app))

        # Discover assigned metrics
        namespace_unique_metrics = '%sunique_metrics' % str(namespace)
        # @modified 20191030 - Bug #3266: py3 Redis binary objects not strings
        #                      Branch #3262: py3
        # unique_metrics = list(self.redis_conn.smembers(namespace_unique_metrics))
        unique_metrics = list(
            self.redis_conn_decoded.smembers(namespace_unique_metrics))

        # @added 20200727 - Feature #3650: ROOMBA_DO_NOT_PROCESS_BATCH_METRICS
        #                   Feature #3480: batch_processing
        #                   Feature #3486: analyzer_batch
        if ROOMBA_DO_NOT_PROCESS_BATCH_METRICS and BATCH_PROCESSING and BATCH_PROCESSING_NAMESPACES:
            try:
                batch_metrics = list(
                    self.redis_conn_decoded.smembers(
                        'aet.analyzer.batch_processing_metrics'))
            except:
                logger.error(
                    'error - failed to get Redis set aet.analyzer.batch_processing_metrics'
                )
                batch_metrics = []
            if batch_metrics:
                full_namespace_batch_metrics = []
                for base_name in batch_metrics:
                    metric = ''.join((settings.FULL_NAMESPACE, base_name))
                    full_namespace_batch_metrics.append(metric)
                del batch_metrics
                non_batch_unique_metrics = []
                for metric in unique_metrics:
                    if metric not in full_namespace_batch_metrics:
                        non_batch_unique_metrics.append(metric)
                # @modified 20200815 - Feature #3650: ROOMBA_DO_NOT_PROCESS_BATCH_METRICS
                # del after log
                # UnboundLocalError: local variable 'full_namespace_batch_metrics' referenced before assignment
                # del full_namespace_batch_metrics
                if non_batch_unique_metrics:
                    logger.info(
                        'roomba :: batch_processing :: removing %s batch metrics from unique_metrics'
                        % str(len(full_namespace_batch_metrics)))
                    unique_metrics = non_batch_unique_metrics
                del non_batch_unique_metrics
                # @added 20200815 - Feature #3650: ROOMBA_DO_NOT_PROCESS_BATCH_METRICS
                del full_namespace_batch_metrics

        keys_per_processor = int(
            ceil(
                float(len(unique_metrics)) / float(settings.ROOMBA_PROCESSES)))
        if i == settings.ROOMBA_PROCESSES:
            assigned_max = len(unique_metrics)
        else:
            assigned_max = min(len(unique_metrics), i * keys_per_processor)
        assigned_min = (i - 1) * keys_per_processor
        assigned_keys = range(assigned_min, assigned_max)

        # Compile assigned metrics
        assigned_metrics = [unique_metrics[index] for index in assigned_keys]

        euthanized = 0
        blocked = 0
        trimmed_keys = 0
        active_keys = 0

        # @modified 20191016 - Task #3280: Handle py2 xange and py3 range
        #                      Branch #3262: py3
        # for i in xrange(len(assigned_metrics)):
        range_list = []
        if python_version == 2:
            for i in xrange(len(assigned_metrics)):
                range_list.append(i)
        if python_version == 3:
            for i in range(len(assigned_metrics)):
                range_list.append(i)
        for i in range_list:
            self.check_if_parent_is_alive()

            pipe = self.redis_conn.pipeline()
            now = time()
            key = assigned_metrics[i]

            try:
                # WATCH the key
                pipe.watch(key)

                # Everything below NEEDS to happen before another datapoint
                # comes in. If your data has a very small resolution (<.1s),
                # this technique may not suit you.
                raw_series = pipe.get(key)
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = sorted([unpacked for unpacked in unpacker])

                # Put pipe back in multi mode
                pipe.multi()

                # There's one value. Purge if it's too old
                try:
                    if python_version == 2:
                        if not isinstance(timeseries[0], TupleType):
                            if timeseries[0] < now - duration:
                                pipe.delete(key)
                                pipe.srem(namespace_unique_metrics, key)
                                pipe.execute()
                                euthanized += 1
                            continue
                    if python_version == 3:
                        if not isinstance(timeseries[0], tuple):
                            if timeseries[0] < now - duration:
                                pipe.delete(key)
                                pipe.srem(namespace_unique_metrics, key)
                                pipe.execute()
                                euthanized += 1
                            continue
                except IndexError:
                    continue

                # Check if the last value is too old and purge
                if timeseries[-1][0] < now - duration:
                    pipe.delete(key)
                    pipe.srem(namespace_unique_metrics, key)
                    pipe.execute()
                    euthanized += 1
                    continue

                # Remove old datapoints and duplicates from timeseries
                temp = set()
                temp_add = temp.add
                delta = now - duration
                trimmed = [
                    tuple for tuple in timeseries if tuple[0] > delta
                    and tuple[0] not in temp and not temp_add(tuple[0])
                ]

                # Purge if everything was deleted, set key otherwise
                if len(trimmed) > 0:
                    # Serialize and turn key back into not-an-array
                    btrimmed = packb(trimmed)
                    if len(trimmed) <= 15:
                        value = btrimmed[1:]
                    elif len(trimmed) <= 65535:
                        value = btrimmed[3:]
                        trimmed_keys += 1
                    else:
                        value = btrimmed[5:]
                        trimmed_keys += 1
                    pipe.set(key, value)
                    active_keys += 1
                else:
                    pipe.delete(key)
                    pipe.srem(namespace_unique_metrics, key)
                    euthanized += 1

                pipe.execute()

            except WatchError:
                blocked += 1
                assigned_metrics.append(key)
            except Exception as e:
                # If something bad happens, zap the key and hope it goes away
                pipe.delete(key)
                pipe.srem(namespace_unique_metrics, key)
                pipe.execute()
                euthanized += 1
                logger.info(e)
                logger.info('%s :: vacuum Euthanizing %s' % (skyline_app, key))
            finally:
                pipe.reset()

        logger.info(
            '%s :: vacuum operated on %s %d keys in %f seconds' %
            (skyline_app, namespace, len(assigned_metrics), time() - begin))
        logger.info('%s :: vaccum %s keyspace is now %d keys' %
                    (skyline_app, namespace,
                     (len(assigned_metrics) - euthanized)))
        logger.info('%s :: vaccum blocked %d times' % (skyline_app, blocked))
        logger.info('%s :: vacuum euthanized %d geriatric keys' %
                    (skyline_app, euthanized))
        logger.info('%s :: vacuum processed %d active keys' %
                    (skyline_app, active_keys))
        logger.info('%s :: vacuum potentially trimmed %d keys' %
                    (skyline_app, trimmed_keys))
Ejemplo n.º 51
0
    def run(self):
        """
        Called when the process intializes.
        """
        while 1:
            now = time()

            # Make sure Redis is up
            try:
                self.redis_conn.ping()
            except:
                logger.error('skyline can\'t connect to redis at socket path %s' % settings.REDIS_SOCKET_PATH)
                sleep(10)
                self.redis_conn = StrictRedis(unix_socket_path=settings.REDIS_SOCKET_PATH)
                continue

            # Discover unique metrics
            unique_metrics = list(self.redis_conn.smembers(settings.FULL_NAMESPACE + 'unique_metrics'))

            if len(unique_metrics) == 0:
                logger.info('no metrics in redis. try adding some - see README')
                sleep(10)
                continue

            # Spawn processes
            pids = []
            for i in range(1, settings.ANALYZER_PROCESSES + 1):
                if i > len(unique_metrics):
                    logger.info('WARNING: skyline is set for more cores than needed.')
                    break

                p = Process(target=self.spin_process, args=(i, unique_metrics))
                pids.append(p)
                p.start()

            # Send wait signal to zombie processes
            for p in pids:
                p.join()

            # Grab data from the queue and populate dictionaries
            exceptions = dict()
            anomaly_breakdown = dict()
            while 1:
                try:
                    key, value = self.anomaly_breakdown_q.get_nowait()
                    if key not in anomaly_breakdown.keys():
                        anomaly_breakdown[key] = value
                    else:
                        anomaly_breakdown[key] += value
                except Empty:
                    break

            while 1:
                try:
                    key, value = self.exceptions_q.get_nowait()
                    if key not in exceptions.keys():
                        exceptions[key] = value
                    else:
                        exceptions[key] += value
                except Empty:
                    break

            # Send alerts
            if settings.ENABLE_ALERTS:
                for alert in settings.ALERTS:
                    for metric in self.anomalous_metrics:
                        ALERT_MATCH_PATTERN = alert[0]
                        METRIC_PATTERN = metric[1]
                        alert_match_pattern = re.compile(ALERT_MATCH_PATTERN)
                        pattern_match = alert_match_pattern.match(METRIC_PATTERN)
                        if pattern_match:
                            cache_key = 'last_alert.%s.%s' % (alert[1], metric[1])
                            try:
                                last_alert = self.redis_conn.get(cache_key)
                                if not last_alert:
                                    try:
                                        SECOND_ORDER_RESOLUTION_FULL_DURATION = alert[3]
                                        logger.info('mirage check      :: %s' % (metric[1]))
                                        # Write anomalous metric to test at second
                                        # order resolution by crucible to the check
                                        # file
                                        metric_timestamp = int(time())
                                        anomaly_check_file = '%s/%s.%s.txt' % (settings.MIRAGE_CHECK_PATH, metric_timestamp, metric[1])
                                        with open(anomaly_check_file, 'w') as fh:
                                            # metric_name, anomalous datapoint, hours to resolve, timestamp
                                            fh.write('metric = "%s"\nvalue = "%s"\nhours_to_resolve = "%s"\nmetric_timestamp = "%s"\n' % (metric[1], metric[0], alert[3], metric_timestamp))
                                            logger.info('added mirage check :: %s,%s,%s' % (metric[1], metric[0], alert[3]))
                                        if settings.ENABLE_FULL_DURATION_ALERTS:
                                            self.redis_conn.setex(cache_key, alert[2], packb(metric[0]))
                                            trigger_alert(alert, metric)
                                    except:
                                        self.redis_conn.setex(cache_key, alert[2], packb(metric[0]))
                                        trigger_alert(alert, metric)
                            except Exception as e:
                                logger.error("couldn't send alert: %s" % e)

            # Write anomalous_metrics to static webapp directory
            if len(self.anomalous_metrics) > 0:
                filename = path.abspath(path.join(path.dirname(__file__), '..', settings.ANOMALY_DUMP))
                with open(filename, 'w') as fh:
                    # Make it JSONP with a handle_data() function
                    anomalous_metrics = list(self.anomalous_metrics)
                    anomalous_metrics.sort(key=operator.itemgetter(1))
                    fh.write('handle_data(%s)' % anomalous_metrics)

            # Log progress
            logger.info('seconds to run    :: %.2f' % (time() - now))
            logger.info('total metrics     :: %d' % len(unique_metrics))
            logger.info('total analyzed    :: %d' % (len(unique_metrics) - sum(exceptions.values())))
            logger.info('total anomalies   :: %d' % len(self.anomalous_metrics))
            logger.info('exception stats   :: %s' % exceptions)
            logger.info('anomaly breakdown :: %s' % anomaly_breakdown)

            # Log to Graphite
            self.send_graphite_metric('skyline.analyzer.' + SERVER_METRIC_PATH + 'run_time', '%.2f' % (time() - now))
            self.send_graphite_metric('skyline.analyzer.' + SERVER_METRIC_PATH + 'total_analyzed', '%.2f' % (len(unique_metrics) - sum(exceptions.values())))
            self.send_graphite_metric('skyline.analyzer.' + SERVER_METRIC_PATH + 'total_anomalies', '%d' % len(self.anomalous_metrics))
            self.send_graphite_metric('skyline.analyzer.' + SERVER_METRIC_PATH + 'total_metrics', '%d' % len(unique_metrics))
            for key, value in exceptions.items():
                send_metric = 'skyline.analyzer.' + SERVER_METRIC_PATH + 'exceptions.%s' % key
                self.send_graphite_metric(send_metric, '%d' % value)
            for key, value in anomaly_breakdown.items():
                send_metric = 'skyline.analyzer.' + SERVER_METRIC_PATH + 'anomaly_breakdown.%s' % key
                self.send_graphite_metric(send_metric, '%d' % value)

            # Check canary metric
            raw_series = self.redis_conn.get(settings.FULL_NAMESPACE + settings.CANARY_METRIC)
            if raw_series is not None:
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)
                time_human = (timeseries[-1][0] - timeseries[0][0]) / 3600
                projected = 24 * (time() - now) / time_human

                logger.info('canary duration   :: %.2f' % time_human)
                self.send_graphite_metric('skyline.analyzer.' + SERVER_METRIC_PATH + 'duration', '%.2f' % time_human)
                self.send_graphite_metric('skyline.analyzer.' + SERVER_METRIC_PATH + 'projected', '%.2f' % projected)

            # Reset counters
            self.anomalous_metrics[:] = []

            # Sleep if it went too fast
            if time() - now < 5:
                logger.info('sleeping due to low run time...')
                sleep(10)
Ejemplo n.º 52
0
    def spin_process(self, i, unique_metrics):
        """
        Assign a bunch of metrics for a process to analyze.

        Multiple get the assigned_metrics to the process from Redis.

        For each metric:

        - unpack the `raw_timeseries` for the metric.
        - Analyse each timeseries against `ALGORITHMS` to determine if it is
          anomalous.
        - If anomalous add it to the :obj:`self.anomalous_metrics` list
        - Add what algorithms triggered to the :obj:`self.anomaly_breakdown_q`
          queue
        - If :mod:`settings.ENABLE_CRUCIBLE` is ``True``:

          - Add a crucible data file with the details about the timeseries and
            anomaly.
          - Write the timeseries to a json file for crucible.

        Add keys and values to the queue so the parent process can collate for:\n
        * :py:obj:`self.anomaly_breakdown_q`
        * :py:obj:`self.exceptions_q`
        """

        spin_start = time()
        logger.info('spin_process started')
        if LOCAL_DEBUG:
            logger.info('debug :: Memory usage spin_process start: %s (kb)' % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)

        # TESTING removal of p.join() from p.terminate()
        # sleep(4)

        # @modified 20160801 - Adding additional exception handling to Analyzer
        # Check the unique_metrics list is valid
        try:
            len(unique_metrics)
        except:
            logger.error('error :: the unique_metrics list is not valid')
            logger.info(traceback.format_exc())
            logger.info('nothing to do, no unique_metrics')
            return

        # Discover assigned metrics
        keys_per_processor = int(ceil(float(len(unique_metrics)) / float(settings.ANALYZER_PROCESSES)))
        if i == settings.ANALYZER_PROCESSES:
            assigned_max = len(unique_metrics)
        else:
            assigned_max = min(len(unique_metrics), i * keys_per_processor)
        # Fix analyzer worker metric assignment #94
        # https://github.com/etsy/skyline/pull/94 @languitar:worker-fix
        assigned_min = (i - 1) * keys_per_processor
        assigned_keys = range(assigned_min, assigned_max)
        # assigned_keys = range(300, 310)

        # Compile assigned metrics
        assigned_metrics = [unique_metrics[index] for index in assigned_keys]
        if LOCAL_DEBUG:
            logger.info('debug :: Memory usage spin_process after assigned_metrics: %s (kb)' % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)

        # @added 20190410 - Feature #2916: ANALYZER_ENABLED setting
        if not ANALYZER_ENABLED:
            len_assigned_metrics = len(assigned_metrics)
            logger.info('ANALYZER_ENABLED is set to %s removing the %s assigned_metrics' % (
                str(ANALYZER_ENABLED), str(len_assigned_metrics)))
            assigned_metrics = []
            del unique_metrics

        # Check if this process is unnecessary
        if len(assigned_metrics) == 0:
            return

        # Multi get series
        # @modified 20160801 - Adding additional exception handling to Analyzer
        raw_assigned_failed = True
        try:
            raw_assigned = self.redis_conn.mget(assigned_metrics)
            raw_assigned_failed = False
            if LOCAL_DEBUG:
                logger.info('debug :: Memory usage spin_process after raw_assigned: %s (kb)' % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
        except:
            logger.info(traceback.format_exc())
            logger.error('error :: failed to get assigned_metrics from Redis')

        # Make process-specific dicts
        exceptions = defaultdict(int)
        anomaly_breakdown = defaultdict(int)

        # @added 20160803 - Adding additional exception handling to Analyzer
        if raw_assigned_failed:
            return

        # @added 20161119 - Branch #922: ionosphere
        #                   Task #1718: review.tsfresh
        # Determine the unique Mirage and Ionosphere metrics once, which are
        # used later to determine how Analyzer should handle/route anomalies
        try:
            mirage_unique_metrics = list(self.redis_conn.smembers('mirage.unique_metrics'))
        except:
            mirage_unique_metrics = []

        # @added 20190408 - Feature #2882: Mirage - periodic_check
        # Add Mirage periodic checks so that Mirage is analysing each metric at
        # least once per hour.
        mirage_periodic_check_metric_list = []
        try:
            mirage_periodic_check_enabled = settings.MIRAGE_PERIODIC_CHECK
        except:
            mirage_periodic_check_enabled = False
        try:
            mirage_periodic_check_interval = settings.MIRAGE_PERIODIC_CHECK_INTERVAL
        except:
            mirage_periodic_check_interval = 3600
        mirage_periodic_check_interval_minutes = int(int(mirage_periodic_check_interval) / 60)
        if mirage_unique_metrics and mirage_periodic_check_enabled:
            mirage_unique_metrics_count = len(mirage_unique_metrics)
            # Mirage periodic checks are only done on declared namespaces as to
            # process all Mirage metrics periodically would probably create a
            # substantial load on Graphite and is probably not required only key
            # metrics should be analysed by Mirage periodically.
            periodic_check_mirage_metrics = []
            try:
                mirage_periodic_check_namespaces = settings.MIRAGE_PERIODIC_CHECK_NAMESPACES
            except:
                mirage_periodic_check_namespaces = []
            for namespace in mirage_periodic_check_namespaces:
                for metric_name in mirage_unique_metrics:
                    metric_namespace_elements = metric_name.split('.')
                    mirage_periodic_metric = False
                    for periodic_namespace in mirage_periodic_check_namespaces:
                        if not namespace in mirage_periodic_check_namespaces:
                            continue
                        periodic_namespace_namespace_elements = periodic_namespace.split('.')
                        elements_matched = set(metric_namespace_elements) & set(periodic_namespace_namespace_elements)
                        if len(elements_matched) == len(periodic_namespace_namespace_elements):
                            mirage_periodic_metric = True
                            break
                    if mirage_periodic_metric:
                        if not metric_name in periodic_check_mirage_metrics:
                            periodic_check_mirage_metrics.append(metric_name)

            periodic_check_mirage_metrics_count = len(periodic_check_mirage_metrics)
            logger.info(
                'there are %s known Mirage periodic metrics' % (
                    str(periodic_check_mirage_metrics_count)))
            for metric_name in periodic_check_mirage_metrics:
                try:
                    self.redis_conn.sadd('new.mirage.periodic_check.metrics.all', metric_name)
                except Exception as e:
                    logger.error('error :: could not add %s to Redis set new.mirage.periodic_check.metrics.all: %s' % (
                        metric_name, e))
            try:
                self.redis_conn.rename('mirage.periodic_check.metrics.all', 'mirage.periodic_check.metrics.all.old')
            except:
                pass
            try:
                self.redis_conn.rename('new.mirage.periodic_check.metrics.all', 'mirage.periodic_check.metrics.all')
            except:
                pass
            try:
                self.redis_conn.delete('mirage.periodic_check.metrics.all.old')
            except:
                pass

            if periodic_check_mirage_metrics_count > mirage_periodic_check_interval_minutes:
                mirage_periodic_checks_per_minute = periodic_check_mirage_metrics_count / mirage_periodic_check_interval_minutes
            else:
                mirage_periodic_checks_per_minute = 1
            logger.info(
                '%s Mirage periodic checks can be added' % (
                    str(int(mirage_periodic_checks_per_minute))))
            for metric_name in periodic_check_mirage_metrics:
                if len(mirage_periodic_check_metric_list) == int(mirage_periodic_checks_per_minute):
                    break
                base_name = metric_name.replace(settings.FULL_NAMESPACE, '', 1)
                mirage_periodic_check_cache_key = 'mirage.periodic_check.%s' % base_name
                mirage_periodic_check_key = False
                try:
                    mirage_periodic_check_key = self.redis_conn.get(mirage_periodic_check_cache_key)
                except Exception as e:
                    logger.error('error :: could not query Redis for cache_key: %s' % e)
                if not mirage_periodic_check_key:
                    try:
                        key_created_at = int(time())
                        self.redis_conn.setex(
                            mirage_periodic_check_cache_key,
                            mirage_periodic_check_interval, key_created_at)
                        logger.info(
                            'created Mirage periodic_check Redis key - %s' % (mirage_periodic_check_cache_key))
                        mirage_periodic_check_metric_list.append(metric_name)
                        try:
                            self.redis_conn.sadd('new.mirage.periodic_check.metrics', metric_name)
                        except Exception as e:
                            logger.error('error :: could not add %s to Redis set new.mirage.periodic_check.metrics: %s' % (
                                metric_name, e))
                    except:
                        logger.error(traceback.format_exc())
                        logger.error(
                            'error :: failed to create Mirage periodic_check Redis key - %s' % (mirage_periodic_check_cache_key))
            try:
                self.redis_conn.rename('mirage.periodic_check.metrics', 'mirage.periodic_check.metrics.old')
            except:
                pass
            try:
                self.redis_conn.rename('new.mirage.periodic_check.metrics', 'mirage.periodic_check.metrics')
            except:
                pass
            try:
                self.redis_conn.delete('mirage.periodic_check.metrics.old')
            except:
                pass
            mirage_periodic_check_metric_list_count = len(mirage_periodic_check_metric_list)
            logger.info(
                '%s Mirage periodic checks were added' % (
                    str(mirage_periodic_check_metric_list_count)))

        try:
            ionosphere_unique_metrics = list(self.redis_conn.smembers('ionosphere.unique_metrics'))
        except:
            ionosphere_unique_metrics = []

        # @added 20170602 - Feature #2034: analyse_derivatives
        # In order to convert monotonic, incrementing metrics to a deriative
        # metric
        try:
            derivative_metrics = list(self.redis_conn.smembers('derivative_metrics'))
        except:
            derivative_metrics = []
        try:
            non_derivative_metrics = list(self.redis_conn.smembers('non_derivative_metrics'))
        except:
            non_derivative_metrics = []
        # This is here to refresh the sets
        try:
            manage_derivative_metrics = self.redis_conn.get('analyzer.derivative_metrics_expiry')
        except Exception as e:
            if LOCAL_DEBUG:
                logger.error('error :: could not query Redis for analyzer.derivative_metrics_expiry key: %s' % str(e))
            manage_derivative_metrics = False

        # @added 20170901 - Bug #2154: Infrequent missing new_ Redis keys
        # If the analyzer.derivative_metrics_expiry is going to expire in the
        # next 60 seconds, just manage the derivative_metrics in the run as
        # there is an overlap some times where the key existed at the start of
        # the run but has expired by the end of the run.
        derivative_metrics_expiry_ttl = False
        if manage_derivative_metrics:
            try:
                derivative_metrics_expiry_ttl = self.redis_conn.ttl('analyzer.derivative_metrics_expiry')
                logger.info('the analyzer.derivative_metrics_expiry key ttl is %s' % str(derivative_metrics_expiry_ttl))
            except:
                logger.error('error :: could not query Redis for analyzer.derivative_metrics_expiry key: %s' % str(e))
            if derivative_metrics_expiry_ttl:
                if int(derivative_metrics_expiry_ttl) < 60:
                    logger.info('managing derivative_metrics as the analyzer.derivative_metrics_expiry key ttl is less than 60 with %s' % str(derivative_metrics_expiry_ttl))
                    manage_derivative_metrics = False
                    try:
                        self.redis_conn.delete('analyzer.derivative_metrics_expiry')
                        logger.info('deleted the Redis key analyzer.derivative_metrics_expiry')
                    except:
                        logger.error('error :: failed to delete Redis key :: analyzer.derivative_metrics_expiry')

        try:
            non_derivative_monotonic_metrics = settings.NON_DERIVATIVE_MONOTONIC_METRICS
        except:
            non_derivative_monotonic_metrics = []

        # @added 20180519 - Feature #2378: Add redis auth to Skyline and rebrow
        # Added Redis sets for Boring, TooShort and Stale
        redis_set_errors = 0

        # Distill timeseries strings into lists
        for i, metric_name in enumerate(assigned_metrics):
            self.check_if_parent_is_alive()

            # logger.info('analysing %s' % metric_name)

            try:
                raw_series = raw_assigned[i]
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)
            except:
                timeseries = []

            # @added 20170602 - Feature #2034: analyse_derivatives
            # In order to convert monotonic, incrementing metrics to a deriative
            # metric
            known_derivative_metric = False
            unknown_deriv_status = True
            if metric_name in non_derivative_metrics:
                unknown_deriv_status = False
            if unknown_deriv_status:
                if metric_name in derivative_metrics:
                    known_derivative_metric = True
                    unknown_deriv_status = False
            # This is here to refresh the sets
            if not manage_derivative_metrics:
                unknown_deriv_status = True

            base_name = metric_name.replace(settings.FULL_NAMESPACE, '', 1)

            # @added 20170617 - Bug #2050: analyse_derivatives - change in monotonicity
            # First check if it has its own Redis z.derivative_metric key
            # that has not expired
            derivative_metric_key = 'z.derivative_metric.%s' % str(base_name)

            if unknown_deriv_status:
                # @added 20170617 - Bug #2050: analyse_derivatives - change in monotonicity
                last_derivative_metric_key = False
                try:
                    last_derivative_metric_key = self.redis_conn.get(derivative_metric_key)
                except Exception as e:
                    logger.error('error :: could not query Redis for last_derivative_metric_key: %s' % e)

                # Determine if it is a strictly increasing monotonically metric
                # or has been in last FULL_DURATION via its z.derivative_metric
                # key
                if not last_derivative_metric_key:
                    is_strictly_increasing_monotonically = strictly_increasing_monotonicity(timeseries)
                    if is_strictly_increasing_monotonically:
                        try:
                            last_expire_set = int(time())
                            self.redis_conn.setex(
                                derivative_metric_key, settings.FULL_DURATION, last_expire_set)
                        except Exception as e:
                            logger.error('error :: could not set Redis derivative_metric key: %s' % e)
                else:
                    # Until the z.derivative_metric key expires, it is classed
                    # as such
                    is_strictly_increasing_monotonically = True

                skip_derivative = in_list(base_name, non_derivative_monotonic_metrics)
                if skip_derivative:
                    is_strictly_increasing_monotonically = False
                if is_strictly_increasing_monotonically:
                    known_derivative_metric = True
                    try:
                        self.redis_conn.sadd('derivative_metrics', metric_name)
                    except:
                        logger.info(traceback.format_exc())
                        logger.error('error :: failed to add metric to Redis derivative_metrics set')
                    try:
                        self.redis_conn.sadd('new_derivative_metrics', metric_name)
                    except:
                        logger.info(traceback.format_exc())
                        logger.error('error :: failed to add metric to Redis new_derivative_metrics set')
                else:
                    try:
                        self.redis_conn.sadd('non_derivative_metrics', metric_name)
                    except:
                        logger.info(traceback.format_exc())
                        logger.error('error :: failed to add metric to Redis non_derivative_metrics set')
                    try:
                        self.redis_conn.sadd('new_non_derivative_metrics', metric_name)
                    except:
                        logger.info(traceback.format_exc())
                        logger.error('error :: failed to add metric to Redis new_non_derivative_metrics set')
            if known_derivative_metric:
                try:
                    derivative_timeseries = nonNegativeDerivative(timeseries)
                    timeseries = derivative_timeseries
                except:
                    logger.error('error :: nonNegativeDerivative failed')

            # @added 20180903 - Feature #2580: illuminance
            #                   Feature #1986: flux
            try:
                illuminance_datapoint = timeseries[-1][1]
                if '.illuminance' not in metric_name:
                    self.illuminance_datapoints.append(illuminance_datapoint)
            except:
                pass

            try:
                anomalous, ensemble, datapoint = run_selected_algorithm(timeseries, metric_name)

                # @added 20190408 - Feature #2882: Mirage - periodic_check
                # Add for Mirage periodic - is really anomalous add to
                # real_anomalous_metrics and if in mirage_periodic_check_metric_list
                # add as anomalous
                if anomalous:
                    # @modified 20190412 - Bug #2932: self.real_anomalous_metrics not being populated correctly
                    #                      Feature #2882: Mirage - periodic_check
                    # self.real_anomalous_metrics.append(base_name)
                    base_name = metric_name.replace(settings.FULL_NAMESPACE, '', 1)
                    metric_timestamp = timeseries[-1][0]
                    metric = [datapoint, base_name, metric_timestamp]
                    self.real_anomalous_metrics.append(metric)
                if metric_name in mirage_periodic_check_metric_list:
                    self.mirage_periodic_check_metrics.append(base_name)
                    anomalous = True

                # If it's anomalous, add it to list
                if anomalous:
                    base_name = metric_name.replace(settings.FULL_NAMESPACE, '', 1)
                    metric_timestamp = timeseries[-1][0]
                    metric = [datapoint, base_name, metric_timestamp]
                    self.anomalous_metrics.append(metric)

                    # Get the anomaly breakdown - who returned True?
                    triggered_algorithms = []
                    for index, value in enumerate(ensemble):
                        if value:
                            algorithm = settings.ALGORITHMS[index]
                            anomaly_breakdown[algorithm] += 1
                            triggered_algorithms.append(algorithm)

            # It could have been deleted by the Roomba
            except TypeError:
                # logger.error('TypeError analysing %s' % metric_name)
                exceptions['DeletedByRoomba'] += 1
            except TooShort:
                # logger.error('TooShort analysing %s' % metric_name)
                exceptions['TooShort'] += 1
            except Stale:
                # logger.error('Stale analysing %s' % metric_name)
                exceptions['Stale'] += 1
            except Boring:
                # logger.error('Boring analysing %s' % metric_name)
                exceptions['Boring'] += 1
            except:
                # logger.error('Other analysing %s' % metric_name)
                exceptions['Other'] += 1
                logger.info(traceback.format_exc())

        # Add values to the queue so the parent process can collate
        for key, value in anomaly_breakdown.items():
            self.anomaly_breakdown_q.put((key, value))

        for key, value in exceptions.items():
            self.exceptions_q.put((key, value))

        spin_end = time() - spin_start
        logger.info('spin_process took %.2f seconds' % spin_end)
Ejemplo n.º 53
0
def test_partialdata():
    unpacker = Unpacker()
    unpacker.feed(b"\xa5")
    with raises(StopIteration):
        next(iter(unpacker))
    unpacker.feed(b"h")
    with raises(StopIteration):
        next(iter(unpacker))
    unpacker.feed(b"a")
    with raises(StopIteration):
        next(iter(unpacker))
    unpacker.feed(b"l")
    with raises(StopIteration):
        next(iter(unpacker))
    unpacker.feed(b"l")
    with raises(StopIteration):
        next(iter(unpacker))
    unpacker.feed(b"o")
    assert next(iter(unpacker)) == "hallo"
Ejemplo n.º 54
0
    def run(self):
        """
        Called when the process intializes.

        Determine if Redis is up and discover the number of `unique metrics`.

        Divide the `unique_metrics` between the number of `ANALYZER_PROCESSES`
        and assign each process a set of metrics to analyse for anomalies.

        Wait for the processes to finish.

        Process the Determine whether if any anomalous metrics require:\n
        * alerting on (and set `EXPIRATION_TIME` key in Redis for alert).\n
        * feeding to another module e.g. mirage.

        Populated the webapp json the anomalous_metrics details.

        Log the details about the run to the skyline log.

        Send skyline.analyzer metrics to `GRAPHITE_HOST`,
        """

        # Log management to prevent overwriting
        # Allow the bin/<skyline_app>.d to manage the log
        if os.path.isfile(skyline_app_logwait):
            try:
                os.remove(skyline_app_logwait)
            except OSError:
                logger.error('error - failed to remove %s, continuing' % skyline_app_logwait)
                pass

        now = time()
        log_wait_for = now + 5
        while now < log_wait_for:
            if os.path.isfile(skyline_app_loglock):
                sleep(.1)
                now = time()
            else:
                now = log_wait_for + 1

        logger.info('starting %s run' % skyline_app)
        if os.path.isfile(skyline_app_loglock):
            logger.error('error - bin/%s.d log management seems to have failed, continuing' % skyline_app)
            try:
                os.remove(skyline_app_loglock)
                logger.info('log lock file removed')
            except OSError:
                logger.error('error - failed to remove %s, continuing' % skyline_app_loglock)
                pass
        else:
            logger.info('bin/%s.d log management done' % skyline_app)

        if not os.path.exists(settings.SKYLINE_TMP_DIR):
            if python_version == 2:
                os.makedirs(settings.SKYLINE_TMP_DIR, 0750)
            if python_version == 3:
                os.makedirs(settings.SKYLINE_TMP_DIR, mode=0o750)

        # Initiate the algorithm timings if Analyzer is configured to send the
        # algorithm_breakdown metrics with ENABLE_ALGORITHM_RUN_METRICS
        algorithm_tmp_file_prefix = settings.SKYLINE_TMP_DIR + '/' + skyline_app + '.'
        algorithms_to_time = []
        if send_algorithm_run_metrics:
            algorithms_to_time = settings.ALGORITHMS

        while 1:
            now = time()

            # Make sure Redis is up
            try:
                self.redis_conn.ping()
            except:
                logger.error('skyline can\'t connect to redis at socket path %s' % settings.REDIS_SOCKET_PATH)
                sleep(10)
                # @modified 20180519 - Feature #2378: Add redis auth to Skyline and rebrow
                if settings.REDIS_PASSWORD:
                    self.redis_conn = StrictRedis(password=settings.REDIS_PASSWORD, unix_socket_path=settings.REDIS_SOCKET_PATH)
                else:
                    self.redis_conn = StrictRedis(unix_socket_path=settings.REDIS_SOCKET_PATH)
                continue

            # Report app up
            self.redis_conn.setex(skyline_app, 120, now)

            # Discover unique metrics
            unique_metrics = list(self.redis_conn.smembers(settings.FULL_NAMESPACE + 'unique_metrics'))

            if len(unique_metrics) == 0:
                logger.info('no metrics in redis. try adding some - see README')
                sleep(10)
                continue

            # Using count files rather that multiprocessing.Value to enable metrics for
            # metrics for algorithm run times, etc
            for algorithm in algorithms_to_time:
                algorithm_count_file = algorithm_tmp_file_prefix + algorithm + '.count'
                algorithm_timings_file = algorithm_tmp_file_prefix + algorithm + '.timings'
                # with open(algorithm_count_file, 'a') as f:
                with open(algorithm_count_file, 'w') as f:
                    pass
                with open(algorithm_timings_file, 'w') as f:
                    pass

            # Spawn processes
            pids = []
            pid_count = 0
            for i in range(1, settings.ANALYZER_PROCESSES + 1):
                if i > len(unique_metrics):
                    logger.info('WARNING: skyline is set for more cores than needed.')
                    break

                p = Process(target=self.spin_process, args=(i, unique_metrics))
                pids.append(p)
                pid_count += 1
                logger.info('starting %s of %s spin_process/es' % (str(pid_count), str(settings.ANALYZER_PROCESSES)))
                p.start()

            # Send wait signal to zombie processes
            # for p in pids:
            #     p.join()
            # Self monitor processes and terminate if any spin_process has run
            # for longer than 180 seconds
            p_starts = time()
            while time() - p_starts <= 180:
                if any(p.is_alive() for p in pids):
                    # Just to avoid hogging the CPU
                    sleep(.1)
                else:
                    # All the processes are done, break now.
                    time_to_run = time() - p_starts
                    logger.info('%s :: %s spin_process/es completed in %.2f seconds' % (skyline_app, str(settings.ANALYZER_PROCESSES), time_to_run))
                    break
            else:
                # We only enter this if we didn't 'break' above.
                logger.info('%s :: timed out, killing all spin_process processes' % (skyline_app))
                for p in pids:
                    p.terminate()
                    # p.join()

            # Grab data from the queue and populate dictionaries
            exceptions = dict()
            anomaly_breakdown = dict()
            while 1:
                try:
                    key, value = self.anomaly_breakdown_q.get_nowait()
                    if key not in anomaly_breakdown.keys():
                        anomaly_breakdown[key] = value
                    else:
                        anomaly_breakdown[key] += value
                except Empty:
                    break

            while 1:
                try:
                    key, value = self.exceptions_q.get_nowait()
                    if key not in exceptions.keys():
                        exceptions[key] = value
                    else:
                        exceptions[key] += value
                except Empty:
                    break

            # Push to panorama
#            if len(self.panorama_anomalous_metrics) > 0:
#                logger.info('to do - push to panorama')

            # Push to crucible
#            if len(self.crucible_anomalous_metrics) > 0:
#                logger.info('to do - push to crucible')

            # Write anomalous_metrics to static webapp directory

            # Using count files rather that multiprocessing.Value to enable metrics for
            # metrics for algorithm run times, etc
            for algorithm in algorithms_to_time:
                algorithm_count_file = algorithm_tmp_file_prefix + algorithm + '.count'
                algorithm_timings_file = algorithm_tmp_file_prefix + algorithm + '.timings'

                try:
                    algorithm_count_array = []
                    with open(algorithm_count_file, 'r') as f:
                        for line in f:
                            value_string = line.replace('\n', '')
                            unquoted_value_string = value_string.replace("'", '')
                            float_value = float(unquoted_value_string)
                            algorithm_count_array.append(float_value)
                except:
                    algorithm_count_array = False

                if not algorithm_count_array:
                    continue

                number_of_times_algorithm_run = len(algorithm_count_array)
                logger.info(
                    'algorithm run count - %s run %s times' % (
                        algorithm, str(number_of_times_algorithm_run)))
                if number_of_times_algorithm_run == 0:
                    continue

                try:
                    algorithm_timings_array = []
                    with open(algorithm_timings_file, 'r') as f:
                        for line in f:
                            value_string = line.replace('\n', '')
                            unquoted_value_string = value_string.replace("'", '')
                            float_value = float(unquoted_value_string)
                            algorithm_timings_array.append(float_value)
                except:
                    algorithm_timings_array = False

                if not algorithm_timings_array:
                    continue

                number_of_algorithm_timings = len(algorithm_timings_array)
                logger.info(
                    'algorithm timings count - %s has %s timings' % (
                        algorithm, str(number_of_algorithm_timings)))

                if number_of_algorithm_timings == 0:
                    continue

                try:
                    _sum_of_algorithm_timings = sum(algorithm_timings_array)
                except:
                    logger.error("sum error: " + traceback.format_exc())
                    _sum_of_algorithm_timings = round(0.0, 6)
                    logger.error('error - sum_of_algorithm_timings - %s' % (algorithm))
                    continue

                sum_of_algorithm_timings = round(_sum_of_algorithm_timings, 6)
                # logger.info('sum_of_algorithm_timings - %s - %.16f seconds' % (algorithm, sum_of_algorithm_timings))

                try:
                    _median_algorithm_timing = determine_median(algorithm_timings_array)
                except:
                    _median_algorithm_timing = round(0.0, 6)
                    logger.error('error - _median_algorithm_timing - %s' % (algorithm))
                    continue
                median_algorithm_timing = round(_median_algorithm_timing, 6)
                # logger.info('median_algorithm_timing - %s - %.16f seconds' % (algorithm, median_algorithm_timing))

                logger.info(
                    'algorithm timing - %s - total: %.6f - median: %.6f' % (
                        algorithm, sum_of_algorithm_timings,
                        median_algorithm_timing))
                send_mertic_name = 'algorithm_breakdown.' + algorithm + '.timing.times_run'
                self.send_graphite_metric(send_mertic_name, '%d' % number_of_algorithm_timings)
                send_mertic_name = 'algorithm_breakdown.' + algorithm + '.timing.total_time'
                self.send_graphite_metric(send_mertic_name, '%.6f' % sum_of_algorithm_timings)
                send_mertic_name = 'algorithm_breakdown.' + algorithm + '.timing.median_time'
                self.send_graphite_metric(send_mertic_name, '%.6f' % median_algorithm_timing)

            # Log progress
            logger.info('seconds to run    :: %.2f' % (time() - now))
            logger.info('total metrics     :: %d' % len(unique_metrics))
            logger.info('total analyzed    :: %d' % (len(unique_metrics) - sum(exceptions.values())))
            logger.info('total anomalies   :: %d' % len(self.anomalous_metrics))
            logger.info('exception stats   :: %s' % exceptions)
            logger.info('anomaly breakdown :: %s' % anomaly_breakdown)

            # Log to Graphite
            self.send_graphite_metric('run_time', '%.2f' % (time() - now))
            self.send_graphite_metric('total_analyzed', '%.2f' % (len(unique_metrics) - sum(exceptions.values())))
            self.send_graphite_metric('total_anomalies', '%d' % len(self.anomalous_metrics))
            self.send_graphite_metric('total_metrics', '%d' % len(unique_metrics))
            for key, value in exceptions.items():
                send_metric = 'exceptions.%s' % key
                self.send_graphite_metric(send_metric, '%d' % value)
            for key, value in anomaly_breakdown.items():
                send_metric = 'anomaly_breakdown.%s' % key
                self.send_graphite_metric(send_metric, '%d' % value)

            # Check canary metric
            raw_series = self.redis_conn.get(settings.FULL_NAMESPACE + settings.CANARY_METRIC)
            if raw_series is not None:
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)
                time_human = (timeseries[-1][0] - timeseries[0][0]) / 3600
                projected = 24 * (time() - now) / time_human

                logger.info('canary duration   :: %.2f' % time_human)
                self.send_graphite_metric('duration', '%.2f' % time_human)
                self.send_graphite_metric('projected', '%.2f' % projected)

            # Reset counters
            self.anomalous_metrics[:] = []

            # Sleep if it went too fast
            # if time() - now < 5:
            #    logger.info('sleeping due to low run time...')
            #    sleep(10)
            # @modified 20160504 - @earthgecko - development internal ref #1338, #1340)
            # Etsy's original if this was a value of 5 seconds which does
            # not make skyline Analyzer very efficient in terms of installations
            # where 100s of 1000s of metrics are being analyzed.  This lead to
            # Analyzer running over several metrics multiple time in a minute
            # and always working.  Therefore this was changed from if you took
            # less than 5 seconds to run only then sleep.  This behaviour
            # resulted in Analyzer analysing a few 1000 metrics in 9 seconds and
            # then doing it again and again in a single minute.  Therefore the
            # ANALYZER_OPTIMUM_RUN_DURATION setting was added to allow this to
            # self optimise in cases where skyline is NOT deployed to analyze
            # 100s of 1000s of metrics.  This relates to optimising performance
            # for any deployments in the few 1000s and 60 second resolution
            # area, e.g. smaller and local deployments.
            process_runtime = time() - now
            analyzer_optimum_run_duration = settings.ANALYZER_OPTIMUM_RUN_DURATION
            if process_runtime < analyzer_optimum_run_duration:
                sleep_for = (analyzer_optimum_run_duration - process_runtime)
                # sleep_for = 60
                logger.info('sleeping for %.2f seconds due to low run time...' % sleep_for)
                sleep(sleep_for)
Ejemplo n.º 55
0
def test_partialdata():
    unpacker = Unpacker()
    unpacker.feed(b'\xa5')
    with raises(StopIteration):
        next(iter(unpacker))
    unpacker.feed(b'h')
    with raises(StopIteration):
        next(iter(unpacker))
    unpacker.feed(b'a')
    with raises(StopIteration):
        next(iter(unpacker))
    unpacker.feed(b'l')
    with raises(StopIteration):
        next(iter(unpacker))
    unpacker.feed(b'l')
    with raises(StopIteration):
        next(iter(unpacker))
    unpacker.feed(b'o')
    assert next(iter(unpacker)) == b'hallo'
Ejemplo n.º 56
0
    def run(self):
        """
        Called when the process intializes.
        """
        while 1:
            now = time()

            # Make sure Redis is up
            try:
                self.redis_conn.ping()
            except:
                logger.error(
                    'skyline can\'t connect to redis at socket path %s' %
                    settings.REDIS_SOCKET_PATH)
                sleep(10)
                self.redis_conn = StrictRedis(
                    unix_socket_path=settings.REDIS_SOCKET_PATH)
                continue

            # Discover unique metrics
            unique_metrics = list(
                self.redis_conn.smembers(settings.FULL_NAMESPACE +
                                         'unique_metrics'))

            if len(unique_metrics) == 0:
                logger.info(
                    'no metrics in redis. try adding some - see README')
                sleep(10)
                continue

            # Spawn processes
            pids = []
            for i in range(1, settings.ANALYZER_PROCESSES + 1):
                if i > len(unique_metrics):
                    logger.info(
                        'WARNING: skyline is set for more cores than needed.')
                    break

                p = Process(target=self.spin_process, args=(i, unique_metrics))
                pids.append(p)
                p.start()

            # Send wait signal to zombie processes
            for p in pids:
                p.join()

            # Send alerts
            #if settings.ENABLE_ALERTS:
            #    for alert in settings.ALERTS:
            #        for metric in self.anomalous_metrics:
            #            if alert[0] in metric[1]:
            #                try:
            #                    last_alert = self.redis_conn.get('last_alert.' + metric[1])
            #                    if not last_alert:
            #                        self.redis_conn.setex('last_alert.' + metric[1], alert[2], packb(metric[0]))
            #                        self.send_mail(alert, metric)
            #                except Exception as e:
            #                    logger.error("couldn't send alert: %s" % e)

            # Write anomalous_metrics to static webapp directory
            filename = path.abspath(
                path.join(path.dirname(__file__), '..', settings.ANOMALY_DUMP))
            with open(filename, 'w') as fh:
                # Make it JSONP with a handle_data() function
                anomalous_metrics = list(self.anomalous_metrics)
                anomalous_metrics.sort(key=operator.itemgetter(1))
                fh.write('handle_data(%s)' % anomalous_metrics)

            # process anomalous metrics
            for metric in self.anomalous_metrics:
                try:
                    last_save_key = 'last_save.%s.%s' % (metric[1], metric[2])
                    last_save = self.redis_conn.get(last_save_key)
                    if not last_save:
                        self.redis_conn.setex(last_save_key,
                                              settings.SKIP_FREQUENCY,
                                              packb(metric[0]))
                        self.storage.save(metric)
                    if settings.ENABLE_ALERTS:
                        last_alert_key = 'last_alert.' + metric[1]
                        last_alert = self.redis_conn.get(last_alert_key)
                        if not last_alert:
                            self.redis_conn.setex(last_alert_key,
                                                  settings.SKIP_FREQUENCY,
                                                  packb(metric[0]))
                            self.alerter.add(metric)
                except Exception as e:
                    logger.error(
                        "Failed processing anomaly, pid: %s, metric: %s, error: %s",
                        getpid(), metric[1], e)

            # send ready alerts
            if settings.ENABLE_ALERTS:
                try:
                    self.alerter.send_alerts()
                except Exception as e:
                    logger.error("Failed sending alerts, error: %s", e)

            # Log progress
            logger.info('seconds to run    :: %.2f' % (time() - now))
            logger.info('total metrics     :: %d' % len(unique_metrics))
            logger.info('total analyzed    :: %d' %
                        (len(unique_metrics) - sum(self.exceptions.values())))
            logger.info('total anomalies   :: %d' %
                        len(self.anomalous_metrics))
            logger.info('exception stats   :: %s' % self.exceptions)
            logger.info('anomaly breakdown :: %s' % self.anomaly_breakdown)

            # Log to Graphite
            if settings.GRAPHITE_HOST != '':
                host = settings.GRAPHITE_HOST.replace('http://', '')
                system(
                    'echo skyline.analyzer.run_time %.2f %s | nc -w 3 %s 2003'
                    % ((time() - now), now, host))
                system(
                    'echo skyline.analyzer.total_analyzed %d %s | nc -w 3 %s 2003'
                    % ((len(unique_metrics) - sum(self.exceptions.values())),
                       now, host))

            # Check canary metric
            raw_series = self.redis_conn.get(settings.FULL_NAMESPACE +
                                             settings.CANARY_METRIC)
            if raw_series is not None:
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)
                time_human = (timeseries[-1][0] - timeseries[0][0]) / 3600
                projected = 24 * (time() - now) / time_human

                logger.info('canary duration   :: %.2f' % time_human)
                if settings.GRAPHITE_HOST != '':
                    host = settings.GRAPHITE_HOST.replace('http://', '')
                    system(
                        'echo skyline.analyzer.duration %.2f %s | nc -w 3 %s 2003'
                        % (time_human, now, host))
                    system(
                        'echo skyline.analyzer.projected %.2f %s | nc -w 3 %s 2003'
                        % (projected, now, host))

            # Reset counters
            self.anomalous_metrics[:] = []
            self.exceptions = Manager().dict()
            self.anomaly_breakdown = Manager().dict()

            # Sleep if it went too fast
            if time() - now < 5:
                logger.info('sleeping due to low run time...')
                sleep(10)
from msgpack import Unpacker
import json

unpacked = []
unpacker = Unpacker()
fname = r"test.bin"
with open(fname, 'rb') as f:
    unpacker.feed(f.read())
    for o in unpacker:
        print(json.dumps(o, indent=2))
Ejemplo n.º 58
0
def get_anomalous_ts(base_name, anomaly_timestamp):

    logger = logging.getLogger(skyline_app_logger)

    # @added 20180423 - Feature #2360: CORRELATE_ALERTS_ONLY
    #                   Branch #2270: luminosity
    # Only correlate metrics with an alert setting
    if correlate_alerts_only:
        try:
            smtp_alerter_metrics = list(
                redis_conn.smembers('analyzer.smtp_alerter_metrics'))
        except:
            smtp_alerter_metrics = []
        if base_name not in smtp_alerter_metrics:
            logger.error('%s has no alerter setting, not correlating' %
                         base_name)
            return False

    if not base_name or not anomaly_timestamp:
        return False

    # from skyline_functions import nonNegativeDerivative
    anomalous_metric = '%s%s' % (settings.FULL_NAMESPACE, base_name)
    assigned_metrics = [anomalous_metric]
    # @modified 20180419 -
    raw_assigned = []
    try:
        raw_assigned = redis_conn.mget(assigned_metrics)
    except:
        raw_assigned = []
    if raw_assigned == [None]:
        logger.info('%s data not retrieved from local Redis' %
                    (str(base_name)))
        raw_assigned = []

    if not raw_assigned and settings.OTHER_SKYLINE_REDIS_INSTANCES:
        # @modified 20180519 - Feature #2378: Add redis auth to Skyline and rebrow
        # for redis_ip, redis_port in settings.OTHER_SKYLINE_REDIS_INSTANCES:
        for redis_ip, redis_port, redis_password in settings.OTHER_SKYLINE_REDIS_INSTANCES:
            if not raw_assigned:
                try:
                    if redis_password:
                        other_redis_conn = StrictRedis(
                            host=str(redis_ip),
                            port=int(redis_port),
                            password=str(redis_password))
                    else:
                        other_redis_conn = StrictRedis(host=str(redis_ip),
                                                       port=int(redis_port))
                    raw_assigned = other_redis_conn.mget(assigned_metrics)
                    if raw_assigned == [None]:
                        logger.info(
                            '%s data not retrieved from Redis at %s on port %s'
                            % (str(base_name), str(redis_ip), str(redis_port)))
                        raw_assigned = []
                    if raw_assigned:
                        logger.info(
                            '%s data retrieved from Redis at %s on port %s' %
                            (str(base_name), str(redis_ip), str(redis_port)))
                except:
                    logger.error(traceback.format_exc())
                    logger.error(
                        'error :: failed to connect to Redis at %s on port %s'
                        % (str(redis_ip), str(redis_port)))
                    raw_assigned = []

    if not raw_assigned or raw_assigned == [None]:
        logger.info('%s data not retrieved' % (str(base_name)))
        return False

    for i, metric_name in enumerate(assigned_metrics):
        try:
            raw_series = raw_assigned[i]
            unpacker = Unpacker(use_list=False)
            unpacker.feed(raw_series)
            timeseries = list(unpacker)
        except:
            timeseries = []

    # Convert the time series if this is a known_derivative_metric
    known_derivative_metric = is_derivative_metric(skyline_app, base_name)
    if known_derivative_metric:
        derivative_timeseries = nonNegativeDerivative(timeseries)
        timeseries = derivative_timeseries

    # Sample the time series
    from_timestamp = anomaly_timestamp - 600
    anomaly_ts = []
    for ts, value in timeseries:
        if int(ts) < from_timestamp:
            continue
        if int(ts) <= anomaly_timestamp:
            anomaly_ts.append((int(ts), value))
        if int(ts) > anomaly_timestamp:
            break
    return anomaly_ts
Ejemplo n.º 59
0
def alert_smtp(alert, metric, second_order_resolution_seconds, context):
    """
    Called by :func:`~trigger_alert` and sends an alert via smtp to the
    recipients that are configured for the metric.

    """
    LOCAL_DEBUG = False
    logger = logging.getLogger(skyline_app_logger)
    if settings.ENABLE_DEBUG or LOCAL_DEBUG:
        logger.info('debug :: alert_smtp - sending smtp alert')
        logger.info('debug :: alert_smtp - Memory usage at start: %s (kb)' %
                    resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)

    # SECOND_ORDER_RESOLUTION_SECONDS to hours so that Mirage surfaces the
    # relevant timeseries data in the graph
    second_order_resolution_in_hours = int(
        second_order_resolution_seconds) / 3600

    # @added 20161229 - Feature #1830: Ionosphere alerts
    # Added Ionosphere variables
    base_name = str(metric[1]).replace(settings.FULL_NAMESPACE, '', 1)
    if settings.IONOSPHERE_ENABLED:
        timeseries_dir = base_name.replace('.', '/')
        training_data_dir = '%s/%s/%s' % (settings.IONOSPHERE_DATA_FOLDER,
                                          str(int(metric[2])), timeseries_dir)
        graphite_image_file = '%s/%s.%s.graphite.%sh.png' % (
            training_data_dir, base_name, skyline_app,
            str(int(second_order_resolution_in_hours)))
        json_file = '%s/%s.%s.redis.%sh.json' % (
            training_data_dir, base_name, skyline_app,
            str(int(full_duration_in_hours)))
        training_data_redis_image = '%s/%s.%s.redis.plot.%sh.png' % (
            training_data_dir, base_name, skyline_app,
            str(int(full_duration_in_hours)))

    # For backwards compatibility
    if '@' in alert[1]:
        sender = settings.ALERT_SENDER
        recipient = alert[1]
        logger.info('alert_smtp - recipient for %s are %s' %
                    (str(alert[0]), str(recipient)))
    else:
        sender = settings.SMTP_OPTS['sender']
        # @modified 20160806 - Added default_recipient
        try:
            recipients = settings.SMTP_OPTS['recipients'][alert[0]]
            use_default_recipient = False
            logger.info('alert_smtp - recipients for %s are %s' %
                        (str(alert[0]), str(recipients)))
            logger.info('alert_smtp - recipients are:')
            try:
                for recip in recipients:
                    logger.info('alert_smtp :: recipient - %s' % str(recip))
            except:
                logger.error(traceback.format_exc())
                logger.error(
                    'alert_smtp :: error :: could not iterate recipients list')
        except:
            use_default_recipient = True
        if use_default_recipient:
            try:
                recipients = settings.SMTP_OPTS['default_recipient']
                logger.info(
                    'alert_smtp - using default_recipient as no recipients are configured for %s'
                    % str(alert[0]))
            except:
                logger.error(
                    'error :: alert_smtp - no known recipient for %s' %
                    str(alert[0]))
                return False

    # Backwards compatibility
    if type(recipients) is str:
        logger.info('alert_smtp :: recipients is a string')
        recipients = [recipients]
    else:
        logger.info('alert_smtp :: recipients is not a string, OK')

    # @added 20180524 - Task #2384: Change alerters to cc other recipients
    # The alerters did send an individual email to each recipient. This would be
    # more useful if one email was sent with the first smtp recipient being the
    # to recipient and the subsequent recipients were add in cc.
    if recipients:
        primary_recipient = False
        cc_recipients = False
        for i_recipient in recipients:
            if not primary_recipient:
                primary_recipient = str(i_recipient)
            if primary_recipient != i_recipient:
                if not cc_recipients:
                    cc_recipients = str(i_recipient)
                else:
                    new_cc_recipients = '%s,%s' % (str(cc_recipients),
                                                   str(i_recipient))
                    cc_recipients = str(new_cc_recipients)
        logger.info(
            'alert_smtp - will send to primary_recipient :: %s, cc_recipients :: %s'
            % (str(primary_recipient), str(cc_recipients)))

    # @modified 20161228 - Feature #1830: Ionosphere alerts
    # Ionosphere alerts
    unencoded_graph_title = 'Skyline %s - ALERT at %s hours - %s' % (
        context, str(int(second_order_resolution_in_hours)), str(metric[0]))
    # @modified 20170603 - Feature #2034: analyse_derivatives
    # Added deriative functions to convert the values of metrics strictly
    # increasing monotonically to their deriative products in alert graphs and
    # specify it in the graph_title
    known_derivative_metric = False
    try:
        # @modified 20180519 - Feature #2378: Add redis auth to Skyline and rebrow
        if settings.REDIS_PASSWORD:
            REDIS_ALERTER_CONN = redis.StrictRedis(
                password=settings.REDIS_PASSWORD,
                unix_socket_path=settings.REDIS_SOCKET_PATH)
        else:
            REDIS_ALERTER_CONN = redis.StrictRedis(
                unix_socket_path=settings.REDIS_SOCKET_PATH)
    except:
        logger.error('error :: alert_smtp - redis connection failed')
    try:
        derivative_metrics = list(
            REDIS_ALERTER_CONN.smembers('derivative_metrics'))
    except:
        derivative_metrics = []
    redis_metric_name = '%s%s' % (settings.FULL_NAMESPACE, str(base_name))
    if redis_metric_name in derivative_metrics:
        known_derivative_metric = True
    if known_derivative_metric:
        try:
            non_derivative_monotonic_metrics = settings.NON_DERIVATIVE_MONOTONIC_METRICS
        except:
            non_derivative_monotonic_metrics = []
        skip_derivative = in_list(redis_metric_name,
                                  non_derivative_monotonic_metrics)
        if skip_derivative:
            known_derivative_metric = False
    if known_derivative_metric:
        unencoded_graph_title = 'Skyline %s - ALERT at %s hours - derivative graph - %s' % (
            context, str(int(second_order_resolution_in_hours)), str(
                metric[0]))

    if settings.ENABLE_DEBUG or LOCAL_DEBUG:
        logger.info('debug :: alert_smtp - unencoded_graph_title: %s' %
                    unencoded_graph_title)
    graph_title_string = quote(unencoded_graph_title, safe='')
    graph_title = '&title=%s' % graph_title_string

    graphite_port = '80'
    if settings.GRAPHITE_PORT != '':
        graphite_port = str(settings.GRAPHITE_PORT)

    link = '%s://%s:%s/render/?from=-%shours&target=cactiStyle(%s)%s%s&colorList=orange' % (
        settings.GRAPHITE_PROTOCOL, settings.GRAPHITE_HOST, graphite_port,
        str(int(second_order_resolution_in_hours)), metric[1],
        settings.GRAPHITE_GRAPH_SETTINGS, graph_title)
    # @added 20170603 - Feature #2034: analyse_derivatives
    if known_derivative_metric:
        link = '%s://%s:%s/render/?from=-%shours&target=cactiStyle(nonNegativeDerivative(%s))%s%s&colorList=orange' % (
            settings.GRAPHITE_PROTOCOL, settings.GRAPHITE_HOST, graphite_port,
            str(int(second_order_resolution_in_hours)), metric[1],
            settings.GRAPHITE_GRAPH_SETTINGS, graph_title)

    content_id = metric[1]
    image_data = None
    if settings.SMTP_OPTS.get('embed-images'):
        # @added 20161229 - Feature #1830: Ionosphere alerts
        # Use existing data if files exist
        if os.path.isfile(graphite_image_file):
            try:
                with open(graphite_image_file, 'r') as f:
                    image_data = f.read()
                logger.info('alert_smtp - using existing png - %s' %
                            graphite_image_file)
            except:
                logger.error(traceback.format_exc())
                logger.error(
                    'error :: alert_smtp - failed to read image data from existing png - %s'
                    % graphite_image_file)
                logger.error('error :: alert_smtp - %s' % str(link))
                image_data = None

        if image_data is None:
            try:
                # @modified 20170913 - Task #2160: Test skyline with bandit
                # Added nosec to exclude from bandit tests
                image_data = urllib2.urlopen(link).read()  # nosec
                if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                    logger.info('debug :: alert_smtp - image data OK')
            except urllib2.URLError:
                logger.error(traceback.format_exc())
                logger.error('error :: alert_smtp - failed to get image graph')
                logger.error('error :: alert_smtp - %s' % str(link))
                image_data = None
                if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                    logger.info('debug :: alert_smtp - image data None')

    # If we failed to get the image or if it was explicitly disabled,
    # use the image URL instead of the content.
    if image_data is None:
        img_tag = '<img src="%s"/>' % link
    else:
        img_tag = '<img src="cid:%s"/>' % content_id
        if settings.ENABLE_DEBUG or LOCAL_DEBUG:
            logger.info('debug :: alert_smtp - img_tag: %s' % img_tag)

        if settings.IONOSPHERE_ENABLED:
            # Create Ionosphere Graphite image
            # @modified 20161229 - Feature #1830: Ionosphere alerts
            # Only write the data to the file if it does not exist
            if not os.path.isfile(graphite_image_file):
                try:
                    write_data_to_file(skyline_app, graphite_image_file, 'w',
                                       image_data)
                    logger.info('added %s Ionosphere Graphite image :: %s' %
                                (skyline_app, graphite_image_file))
                except:
                    logger.info(traceback.format_exc())
                    logger.error(
                        'error :: failed to add %s Ionosphere Graphite image' %
                        (skyline_app, graphite_image_file))
            else:
                logger.info(
                    '%s Ionosphere Graphite image already exists :: %s' %
                    (skyline_app, graphite_image_file))

    redis_image_data = None
    try:
        plot_redis_data = settings.PLOT_REDIS_DATA
    except:
        plot_redis_data = False

    if settings.SMTP_OPTS.get('embed-images') and plot_redis_data:
        # Create graph from Redis data
        redis_metric_key = '%s%s' % (settings.FULL_NAMESPACE, metric[1])
        try:
            raw_series = REDIS_ALERTER_CONN.get(redis_metric_key)
            if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                logger.info('debug :: alert_smtp - raw_series: %s' % 'OK')
        except:
            if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                logger.info('debug :: alert_smtp - raw_series: %s' % 'FAIL')

        try:
            unpacker = Unpacker(use_list=True)
            unpacker.feed(raw_series)
            timeseries_x = [float(item[0]) for item in unpacker]
            unpacker = Unpacker(use_list=True)
            unpacker.feed(raw_series)
            timeseries_y = [item[1] for item in unpacker]

            unpacker = Unpacker(use_list=False)
            unpacker.feed(raw_series)
            timeseries = list(unpacker)
        except:
            logger.error('error :: alert_smtp - unpack timeseries failed')
            timeseries = None

        # @added 20170603 - Feature #2034: analyse_derivatives
        if known_derivative_metric:
            try:
                derivative_timeseries = nonNegativeDerivative(timeseries)
                timeseries = derivative_timeseries
            except:
                logger.error(
                    'error :: alert_smtp - nonNegativeDerivative failed')

        if settings.IONOSPHERE_ENABLED and timeseries:
            '''
            .. todo: this is possibly to be used to allow the user to submit the
                FULL_DURATION duration data set for the features profile to be
                created against IF it is a Mirage metric.  This would allow for
                additional granularity in Mirage metrics, thereby maintaining
                their seasonality, but allow user and Skyline to analyze the
                anomaly at a FULL_DURATION resolution as well.  Not sure how to
                code that in Ionosphere context yet but could just be additonal
                flag in the Ionosphere record.  In the Ionosphere frontend, the
                user would be given an option to either create the features
                profile on the Mirage timeseries or the redis FULL_DURATION
                timeseries.  It is a little complicated, but doable.
                # @modified 20161229 - Feature #1828: ionosphere - mirage Redis data features
                However that ^^ is UNDESIRABLE in the Mirage/Ionosphere context
                at the moment.  Ionosphere must only profile SECOND_ORDER_RESOLUTION_HOURS
                currently so as to not pollute the seasonality aspect of Mirage
            '''
            # Create Ionosphere redis timeseries json if is does not exist
            # @modified 20161229 - Feature #1830: Ionosphere alerts
            # Only write the data to the file if it does not exist and replace
            # the timeseries object if a json file exists

            # @added 20170920 - Bug #2168: Strange Redis derivative graph
            using_original_redis_json = False

            if not os.path.isfile(json_file):
                timeseries_json = str(timeseries).replace('[', '(').replace(
                    ']', ')')
                try:
                    write_data_to_file(skyline_app, json_file, 'w',
                                       timeseries_json)
                    logger.info(
                        'added %s Ionosphere Redis data timeseries json file :: %s'
                        % (skyline_app, json_file))
                except:
                    logger.info(traceback.format_exc())
                    logger.error(
                        'error :: failed to add %s Ionosphere Redis data timeseries json file'
                        % (skyline_app, json_file))
            else:
                # Replace the timeseries object
                logger.info(
                    '%s Ionosphere Redis data timeseries json file already exists, using :: %s'
                    % (skyline_app, json_file))
                anomaly_json = json_file
                try:
                    # Read the timeseries json file
                    with open(anomaly_json, 'r') as f:
                        raw_timeseries = f.read()
                    timeseries_array_str = str(raw_timeseries).replace(
                        '(', '[').replace(')', ']')
                    timeseries = literal_eval(timeseries_array_str)
                    logger.info(
                        '%s Redis timeseries replaced with timeseries from :: %s'
                        % (skyline_app, anomaly_json))
                    timeseries_x = [float(item[0]) for item in timeseries]
                    timeseries_y = [item[1] for item in timeseries]
                    # @added 20170920 - Bug #2168: Strange Redis derivative graph
                    # This already has nonNegativeDerivative applied to it
                    using_original_redis_json = True
                except:
                    logger.error(traceback.format_exc())
                    logger.error(
                        'error :: %s failed to read timeseries data from %s' %
                        (skyline_app, anomaly_json))
                    timeseries = None

        # @added 20170823 - Feature #2034: analyse_derivatives
        # Originally patterned and added to analyzer/alerters.py on 20170603
        if known_derivative_metric:

            # @added 20170920 - Bug #2168: Strange Redis derivative graph
            # If this is the Mirage Redis json it already has
            # nonNegativeDerivative applied to it
            if not using_original_redis_json:
                logger.info('alert_smtp - nonNegativeDerivative being applied')

                try:
                    derivative_timeseries = nonNegativeDerivative(timeseries)
                    timeseries = derivative_timeseries
                    # @added 20170920 - Bug #2168: Strange Redis derivative graph
                    logger.info('alert_smtp - nonNegativeDerivative applied')
                except:
                    logger.error(
                        'error :: alert_smtp - nonNegativeDerivative failed')
            else:
                logger.info(
                    'alert_smtp - nonNegativeDerivative not being applied, as it will have been applied in the original json'
                )

        # @added 21070823 - Bug #2068: Analyzer smtp alert error on Redis plot with derivative metrics
        # Originally patterned and added to analyzer/alerters.py on 20170726
        # If the nonNegativeDerivative has been calculated we need to reset the
        # x and y as nonNegativeDerivative has to discard the first value as it
        # has no delta for it so the timeseries is 1 item less.
        timeseries_x = [float(item[0]) for item in timeseries]
        timeseries_y = [item[1] for item in timeseries]

        pd_series_values = None
        original_anomalous_datapoint = metric[0]
        if timeseries:
            try:
                values = pd.Series([x[1] for x in timeseries])
                # Because the truth value of a Series is ambiguous
                pd_series_values = True
            except:
                logger.error(
                    'error :: alert_smtp - pandas value series on timeseries failed'
                )

            # @added 20170307 - Feature #1960: ionosphere_layers
            # To display the original anomalous datapoint value in the Redis plot
            try:
                original_anomalous_datapoint = float(timeseries[-1][1])
            except:
                logger.error(
                    'error :: alert_smtp - falied to determine the original_anomalous_datapoint from the timeseries'
                )

        if pd_series_values:
            try:
                array_median = np.median(values)
                if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                    logger.info('debug :: alert_smtp - values median: %s' %
                                str(array_median))

                array_amax = np.amax(values)
                if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                    logger.info('debug :: alert_smtp - array_amax: %s' %
                                str(array_amax))
                array_amin = np.amin(values)
                if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                    logger.info('debug :: alert_smtp - array_amin: %s' %
                                str(array_amin))
                mean = values.mean()
                if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                    logger.info('debug :: alert_smtp - mean: %s' % str(mean))
                stdDev = values.std()
                if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                    logger.info('debug :: alert_smtp - stdDev: %s' %
                                str(stdDev))

                sigma3 = 3 * stdDev
                if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                    logger.info('debug :: alert_smtp - sigma3: %s' %
                                str(sigma3))

                # sigma3_series = [sigma3] * len(values)

                sigma3_upper_bound = mean + sigma3
                try:
                    sigma3_lower_bound = mean - sigma3
                except:
                    sigma3_lower_bound = 0

                sigma3_upper_series = [sigma3_upper_bound] * len(values)
                sigma3_lower_series = [sigma3_lower_bound] * len(values)
                amax_series = [array_amax] * len(values)
                amin_series = [array_amin] * len(values)
                mean_series = [mean] * len(values)
            except:
                logger.error(
                    'error :: alert_smtp - numpy ops on series failed')
                mean_series = None

        if mean_series:
            # @modified 20170307 - Feature #1960: ionosphere_layers
            # To display the original anomalous datapoint value in the Redis plot
            # graph_title = 'Skyline %s - ALERT - at %s hours - Redis data\n%s - anomalous value: %s' % (context, str(int(full_duration_in_hours)), metric[1], str(metric[0]))
            graph_title = 'Skyline %s - ALERT - at %s hours - Redis data\n%s - anomalous value: %s' % (
                context, str(int(full_duration_in_hours)), metric[1],
                str(original_anomalous_datapoint))
            # @added 20170603 - Feature #2034: analyse_derivatives
            if known_derivative_metric:
                graph_title = 'Skyline %s - ALERT - at %s hours - Redis data (derivative graph)\n%s - anomalous value: %s' % (
                    context, str(int(full_duration_in_hours)), metric[1],
                    str(original_anomalous_datapoint))

            if python_version == 3:
                buf = io.StringIO()
            else:
                buf = io.BytesIO()

            # Too big
            # rcParams['figure.figsize'] = 12, 6
            rcParams['figure.figsize'] = 8, 4
            try:
                # fig = plt.figure()
                fig = plt.figure(frameon=False)
                ax = fig.add_subplot(111)
                ax.set_title(graph_title, fontsize='small')
                # @modified 20180417 - Bug #2358: set_axis_bgcolor method removed from Matplotlib - Luminosity
                #                      IssueID #49 'AxesSubplot' object has no attribute 'set_axis_bgcolor'
                # ax.set_axis_bgcolor('black')
                if hasattr(ax, 'set_facecolor'):
                    ax.set_facecolor('black')
                else:
                    ax.set_axis_bgcolor('black')

                try:
                    datetimes = [
                        dt.datetime.utcfromtimestamp(ts) for ts in timeseries_x
                    ]
                    if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                        logger.info('debug :: alert_smtp - datetimes: %s' %
                                    'OK')
                except:
                    logger.error('error :: alert_smtp - datetimes: %s' %
                                 'FAIL')

                plt.xticks(rotation=0, horizontalalignment='center')
                xfmt = DateFormatter('%a %H:%M')
                plt.gca().xaxis.set_major_formatter(xfmt)

                ax.xaxis.set_major_formatter(xfmt)

                ax.plot(datetimes,
                        timeseries_y,
                        color='orange',
                        lw=0.6,
                        zorder=3)
                ax.tick_params(axis='both', labelsize='xx-small')

                max_value_label = 'max - %s' % str(array_amax)
                ax.plot(datetimes,
                        amax_series,
                        lw=1,
                        label=max_value_label,
                        color='m',
                        ls='--',
                        zorder=4)
                min_value_label = 'min - %s' % str(array_amin)
                ax.plot(datetimes,
                        amin_series,
                        lw=1,
                        label=min_value_label,
                        color='b',
                        ls='--',
                        zorder=4)
                mean_value_label = 'mean - %s' % str(mean)
                ax.plot(datetimes,
                        mean_series,
                        lw=1.5,
                        label=mean_value_label,
                        color='g',
                        ls='--',
                        zorder=4)

                sigma3_text = (r'3$\sigma$')
                # sigma3_label = '%s - %s' % (str(sigma3_text), str(sigma3))

                sigma3_upper_label = '%s upper - %s' % (
                    str(sigma3_text), str(sigma3_upper_bound))
                ax.plot(datetimes,
                        sigma3_upper_series,
                        lw=1,
                        label=sigma3_upper_label,
                        color='r',
                        ls='solid',
                        zorder=4)

                if sigma3_lower_bound > 0:
                    sigma3_lower_label = '%s lower - %s' % (
                        str(sigma3_text), str(sigma3_lower_bound))
                    ax.plot(datetimes,
                            sigma3_lower_series,
                            lw=1,
                            label=sigma3_lower_label,
                            color='r',
                            ls='solid',
                            zorder=4)

                ax.get_yaxis().get_major_formatter().set_useOffset(False)
                ax.get_yaxis().get_major_formatter().set_scientific(False)

                # Shrink current axis's height by 10% on the bottom
                box = ax.get_position()
                ax.set_position([
                    box.x0, box.y0 + box.height * 0.1, box.width,
                    box.height * 0.9
                ])

                # Put a legend below current axis
                ax.legend(loc='upper center',
                          bbox_to_anchor=(0.5, -0.05),
                          fancybox=True,
                          shadow=True,
                          ncol=4,
                          fontsize='x-small')
                plt.rc('lines', lw=2, color='w')

                plt.grid(True)

                ax.grid(b=True,
                        which='both',
                        axis='both',
                        color='lightgray',
                        linestyle='solid',
                        alpha=0.5,
                        linewidth=0.6)
                # @modified 20180417 - Bug #2358: set_axis_bgcolor method removed from Matplotlib - Luminosity
                #                      IssueID #49 'AxesSubplot' object has no attribute 'set_axis_bgcolor'
                # ax.set_axis_bgcolor('black')
                if hasattr(ax, 'set_facecolor'):
                    ax.set_facecolor('black')
                else:
                    ax.set_axis_bgcolor('black')

                rcParams['xtick.direction'] = 'out'
                rcParams['ytick.direction'] = 'out'
                ax.margins(y=.02, x=.03)
                # tight_layout removes the legend box
                # fig.tight_layout()

                if settings.IONOSPHERE_ENABLED:
                    if not os.path.exists(training_data_dir):
                        mkdir_p(training_data_dir)
                        logger.info('created dir - %s' % training_data_dir)

                    if not os.path.isfile(training_data_redis_image):
                        try:
                            plt.savefig(training_data_redis_image,
                                        format='png')
                            logger.info(
                                'alert_smtp - save Redis training data image - %s'
                                % (training_data_redis_image))
                        except:
                            logger.info(traceback.format_exc())
                            logger.error(
                                'error :: alert_smtp - could not save - %s' %
                                (training_data_redis_image))
                    else:
                        logger.info(
                            'alert_smtp - Redis training data image already exists - %s'
                            % (training_data_redis_image))

                try:
                    plt.savefig(buf, format='png')
                    # @added 20160814 - Bug #1558: Memory leak in Analyzer
                    # As per http://www.mail-archive.com/[email protected]/msg13222.html
                    # savefig in the parent process was causing the memory leak
                    # the below fig.clf() and plt.close() did not resolve this
                    # however spawing a multiprocessing process for alert_smtp
                    # does solve this as issue as all memory is freed when the
                    # process terminates.
                    fig.clf()
                    plt.close(fig)
                    redis_graph_content_id = 'redis.%s' % metric[1]
                    redis_image_data = True
                    if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                        logger.info('debug :: alert_smtp - savefig: %s' % 'OK')
                except:
                    logger.info(traceback.format_exc())
                    logger.error('error :: alert_smtp - plt.savefig: %s' %
                                 'FAIL')
            except:
                logger.error(traceback.format_exc())
                logger.error('error :: alert_smtp - could not build plot')

    if redis_image_data:
        redis_img_tag = '<img src="cid:%s"/>' % redis_graph_content_id
        if settings.ENABLE_DEBUG or LOCAL_DEBUG:
            logger.info('debug :: alert_smtp - redis_img_tag: %s' %
                        str(redis_img_tag))
    else:
        # @modified 20161229 - Feature #1830: Ionosphere alerts
        # @modified 20170108 - Feature #1852: Ionosphere - features_profile matched graphite graphs
        # Restored the previous redis_img_tag method as some smtp alerts were
        # coming without a Redis graph, not all but some and for some reason,
        # I am pretty certain retrospectively that it was done that way from
        # testing I just wanted to try and be cleaner.
        # The redis_img_tag was changed at
        # https://github.com/earthgecko/skyline/commit/31bcacf3f90f0953ebed0d57260cb937e01f887c#diff-520bf2a218f65074ffead4d8184c138dR489
        redis_img_tag = '<img src="%s"/>' % 'none'
        # redis_img_tag = '<img src="none"/>'

    # @added 20170806 - Feature #1830: Ionosphere alerts
    # Show a human date in alerts
    alerted_at = str(dt.datetime.utcfromtimestamp(int(metric[2])))

    try:
        body = '<h3><font color="#dd3023">Sky</font><font color="#6698FF">line</font><font color="black"> %s alert</font></h3><br>' % context
        body += '<font color="black">metric: <b>%s</b></font><br>' % metric[1]
        body += '<font color="black">Anomalous value: %s (Mirage)</font><br>' % str(
            metric[0])
        body += '<font color="black">Original anomalous value: %s (Analyzer)</font><br>' % str(
            original_anomalous_datapoint)
        body += '<font color="black">Anomaly timestamp: %s</font><br>' % str(
            int(metric[2]))
        # @added 20170806 - Feature #1830: Ionosphere alerts
        # Show a human date in alerts
        body += '<font color="black">Anomalous at: %s</font><br>' % alerted_at
        body += '<font color="black">At hours: %s</font><br>' % str(
            int(second_order_resolution_in_hours))
        body += '<font color="black">Next alert in: %s seconds</font><br>' % str(
            alert[2])
        # @added 20170603 - Feature #2034: analyse_derivatives
        if known_derivative_metric:
            body += '<font color="black">Derivative graph: True</font><br>'

        more_body = ''
        if settings.IONOSPHERE_ENABLED:
            # @modified 20170823 - Bug #2142: 7bit SMTP encoding breaking long urls
            # Broke body into body and more_body to workaround the 990 character
            # limit per line for SMTP
            more_body += '<h3><font color="#dd3023">Ionosphere :: </font><font color="#6698FF">training data</font><font color="black"></font></h3>'
            ionosphere_link = '%s/ionosphere?timestamp=%s&metric=%s' % (
                settings.SKYLINE_URL, str(int(metric[2])), str(metric[1]))
            more_body += '<font color="black">To use this timeseries to train Skyline that this is not anomalous manage this training data at:<br>'
            more_body += '<a href="%s">%s</a></font>' % (ionosphere_link,
                                                         ionosphere_link)
        if image_data:
            more_body += '<h3><font color="black">Graphite data at SECOND_ORDER_RESOLUTION_HOURS (aggregated)</font></h3>'
            more_body += '<div dir="ltr"><a href="%s">%s</a><br></div><br>' % (
                link, img_tag)
            more_body += '<font color="black">Clicking on the above graph will open to the Graphite graph with current data</font><br>'
        if redis_image_data:
            more_body += '<font color="black">min: %s  | max: %s   | mean: %s <br>' % (
                str(array_amin), str(array_amax), str(mean))
            more_body += '3-sigma: %s <br>' % str(sigma3)
            more_body += '3-sigma upper bound: %s   | 3-sigma lower bound: %s <br></font>' % (
                str(sigma3_upper_bound), str(sigma3_lower_bound))
            more_body += '<h3><font color="black">Redis data at FULL_DURATION</font></h3><br>'
            more_body += '<div dir="ltr">:%s<br></div>' % redis_img_tag
            more_body += '<font color="black">To disable the Redis data graph view, set PLOT_REDIS_DATA to False in your settings.py, if the Graphite graph is sufficient for you,<br>'
            more_body += 'however do note that will remove the 3-sigma and mean value too.</font>'
        more_body += '<br>'
        more_body += '<div dir="ltr" align="right"><font color="#dd3023">Sky</font><font color="#6698FF">line</font><font color="black"> version :: %s</font></div><br>' % str(
            skyline_version)
    except:
        logger.error('error :: alert_smtp - could not build body')
        logger.info(traceback.format_exc())

    # @modified 20180524 - Task #2384: Change alerters to cc other recipients
    # Do not send to each recipient, send to primary_recipient and cc the other
    # recipients, thereby sending only one email
    # for recipient in recipients:
    #     logger.info('alert_smtp - sending alert to %s' % (str(recipient)))
    if primary_recipient:
        logger.info(
            'alert_smtp - will send to primary_recipient :: %s, cc_recipients :: %s'
            % (str(primary_recipient), str(cc_recipients)))
        try:
            # @modified 20170823 - Bug #2142: 7bit SMTP encoding breaking long urls
            # Broke body into body and more_body to workaround the 990 character
            # limit per line for SMTP, using mixed as alternative indicates that
            # the client should select one of the parts for display and ignore
            # the rest (tripleee - https://stackoverflow.com/a/35115938)
            # msg = MIMEMultipart('alternative')
            msg = MIMEMultipart('mixed')

            # @added 20170812 - Bug #2142: 7bit SMTP encoding breaking long urls
            # set email charset and email encodings
            cs_ = charset.Charset('utf-8')
            cs_.header_encoding = charset.QP
            cs_.body_encoding = charset.QP
            msg.set_charset(cs_)

            msg['Subject'] = '[Skyline alert] - %s ALERT - %s' % (context,
                                                                  metric[1])
            msg['From'] = sender
            # @modified 20180524 - Task #2384: Change alerters to cc other recipients
            # msg['To'] = recipient
            msg['To'] = primary_recipient

            # @added 20180524 - Task #2384: Change alerters to cc other recipients
            # Added Cc
            if cc_recipients:
                msg['Cc'] = cc_recipients

            msg.attach(MIMEText(body, 'html'))
            # @added 20170823 - Bug #2142: 7bit SMTP encoding breaking long urls
            # Broke body into body and more_body to workaround the 990 character
            # limit per line for SMTP
            msg.attach(MIMEText(more_body, 'html'))
            msg.replace_header('content-transfer-encoding', 'quoted-printable')

            if image_data is not None:
                try:
                    msg_attachment = MIMEImage(image_data)
                    msg_attachment.add_header('Content-ID',
                                              '<%s>' % content_id)
                    msg.attach(msg_attachment)
                    if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                        logger.info(
                            'debug :: alert_smtp - msg_attachment - Graphite img source done'
                        )
                except:
                    logger.error('error :: alert_smtp - msg_attachment')
                    logger.info(traceback.format_exc())
            if redis_image_data:
                try:
                    buf.seek(0)
                    msg_plot_attachment = MIMEImage(buf.read())
                    msg_plot_attachment.add_header(
                        'Content-ID', '<%s>' % redis_graph_content_id)
                    msg.attach(msg_plot_attachment)
                    if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                        logger.info(
                            'debug :: alert_smtp - msg_plot_attachment - redis data done'
                        )
                except:
                    logger.error('error :: alert_smtp - msg_plot_attachment')
                    logger.info(traceback.format_exc())
        except:
            logger.error('error :: alert_smtp - could not attach')
            logger.info(traceback.format_exc())

        s = SMTP('127.0.0.1')
        try:
            # @modified 20180524 - Task #2384: Change alerters to cc other recipients
            # Send to primary_recipient and cc_recipients
            # s.sendmail(sender, recipient, msg.as_string())
            if cc_recipients:
                s.sendmail(sender, [primary_recipient, cc_recipients],
                           msg.as_string())
            else:
                s.sendmail(sender, primary_recipient, msg.as_string())
            if settings.ENABLE_DEBUG or LOCAL_DEBUG:
                # logger.info('debug :: alert_smtp - message sent to %s OK' % str(recipient))
                logger.info(
                    'debug :: alert_smtp - message sent OK to primary_recipient :: %s, cc_recipients :: %s'
                    % (str(primary_recipient), str(cc_recipients)))
        except:
            logger.info(traceback.format_exc())
            # logger.error('error :: alert_smtp - could not send email to %s' % str(recipient))
            logger.error(
                'error :: alert_smtp - could not send email to primary_recipient :: %s, cc_recipients :: %s'
                % (str(primary_recipient), str(cc_recipients)))

        s.quit()

        if LOCAL_DEBUG:
            logger.info(
                'debug :: alert_smtp - Memory usage after email: %s (kb)' %
                resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)

    return
Ejemplo n.º 60
0
# open the database

redis_conn = StrictRedis(unix_socket_path='/tmp/redis.sock')

full_list = list(redis_conn.smembers('system.unique_metrics'))
if len(full_list) == 0:
    print "No metrics"
    exit()

count = 0
start = time.time()
for metric in full_list:
    count += 1
    if not db.open("/opt/skyline/src/cabinet/" + metric + ".kct",
                   DB.OWRITER | DB.OCREATE):
        print >> sys.stderr, metric + "open error: " + str(db.error())

    raw_metric = redis_conn.mget(metric)
    for i, metric_name in enumerate(raw_metric):
        unpacker = Unpacker(use_list=False)
        unpacker.feed(metric_name)
        timeseries = list(unpacker)
        for value in timeseries:
            if db.check(value[0]) < 0:
                db.set(value[0], value[1])
            #db.set(value[0], value[1])
    db.close()
    if (count % 100) == 0:
        print "%s keys.  Rate: %s" % (count, (100 / (time.time() - start)))
        start = time.time()