예제 #1
0
    def __init__(self, host, port, retry_interval, report_status=None, logging_timeout=None, stream_backend_map={}):
        # set up thrift and scribe objects
        timeout = logging_timeout if logging_timeout is not None else config.scribe_logging_timeout
        self.socket = thriftpy.transport.socket.TSocket(six.text_type(host), int(port))
        if timeout:
            self.socket.set_timeout(timeout)

        self.stream_backend_map = stream_backend_map
        self.transport = TFramedTransportFactory().get_transport(self.socket)
        protocol = TBinaryProtocolFactory(strict_read=False).get_protocol(self.transport)
        self.client = TClient(scribe_thrift.scribe, protocol)

        # our own bookkeeping for connection
        self.connected = False # whether or not we think we're currently connected to the scribe server
        self.last_connect_time = 0 # last time we got disconnected or failed to reconnect

        self.retry_interval = retry_interval
        self.report_status = report_status or get_default_reporter()
        self.__lock = threading.RLock()
        self._birth_pid = os.getpid()

        self.metrics = MetricsReporter(
            sample_rate=config.metrics_sample_rate,
            backend="scribe"
        )
 def test_latency_microseconds(self):
     metrics = MetricsReporter(backend="test", sample_rate=1)
     metrics._sample_log_line_latency = mock.Mock(FakeMetric())
     with metrics.sampled_request():
         pass
     assert metrics._sample_log_line_latency.record.call_count == 1
     # time.time() has a resolution of down to half a microsecond, so seeing under that means we're tracking s or ms.
     assert metrics._sample_log_line_latency.record.call_args[1][
         'value'] > 0.1
    def test_fake_metric_functionality(self):
        metrics = MetricsReporter(backend="test", sample_rate=1)
        # Monkeypatch in the fake counters
        metrics._sample_log_line_latency = FakeMetric()
        metrics._total_log_line_sent = FakeMetric()

        assert metrics._sample_counter == 0
        # First try, not part of sample
        with metrics.sampled_request():
            assert metrics._sample_counter == 1
        assert metrics._sample_counter == 0
    def test_metrics_reporter_sampling(self):
        metrics = MetricsReporter(backend="test", sample_rate=3)

        assert metrics._sample_counter == 0
        # First try, not part of sample
        with metrics.sampled_request():
            assert metrics._sample_counter == 1

        # Second try, not part of sample
        with metrics.sampled_request():
            assert metrics._sample_counter == 2

        # Third try, part of sample, so called outside the context
        with metrics.sampled_request():
            assert metrics._sample_counter == 3
        assert metrics._sample_counter == 0
예제 #5
0
 def __init__(self, client_id, host=None, port=None, stream_backend_map={}):
     self.stream_prefix = config.monk_stream_prefix
     self.report_status = get_default_reporter()
     self.metrics = MetricsReporter(
         sample_rate=config.metrics_sample_rate,
         backend="monk"
     )
     self.timeout_backoff_s = config.monk_timeout_backoff_ms / 1000
     self.stream_backend_map = stream_backend_map
     self.last_timeout = time.time() - self.timeout_backoff_s
     self.producer = MonkProducer(
         client_id,
         host,
         port,
         timeout_ms=config.monk_timeout_ms,
         collect_metrics=False
     )
예제 #6
0
class MonkLogger(object):
    """Wrapper around MonkProducer"""

    def __init__(self, client_id, host=None, port=None, stream_backend_map={}):
        self.stream_prefix = config.monk_stream_prefix
        self.report_status = get_default_reporter()
        self.metrics = MetricsReporter(
            sample_rate=config.metrics_sample_rate,
            backend="monk"
        )
        self.timeout_backoff_s = config.monk_timeout_backoff_ms / 1000
        self.stream_backend_map = stream_backend_map
        self.last_timeout = time.time() - self.timeout_backoff_s
        self.producer = MonkProducer(
            client_id,
            host,
            port,
            timeout_ms=config.monk_timeout_ms,
            collect_metrics=False
        )

    def log_line(self, stream, line):
        backend = self.stream_backend_map.get(stream, config.default_backend)
        if backend not in ('monk', 'dual'):
            return

        if time.time() < self.last_timeout + self.timeout_backoff_s:
            return
        with self.metrics.sampled_request():
            try:
                self.producer.send_messages(
                    self.stream_prefix + stream,
                    [line],
                    None
                )
            except socket.timeout as e:
                self.report_status(True, 'Monk took too long to respond')
                self.last_timeout = time.time()
                self.metrics.monk_timeout()
            except Exception as e:
                self.report_status(
                    True,
                    'Exception while sending to monk: %s' % str(e)
                )
                self.metrics.monk_exception()
 def test_zero_sample_rate(self):
     metrics = MetricsReporter(backend="test", sample_rate=0)
     assert metrics._sample_counter == 0
     # Should never sample
     with metrics.sampled_request():
         assert metrics._sample_counter == 1
예제 #8
0
class ScribeLogger(object):
    """Implementation that logs to a scribe server. If errors are encountered,
    drop lines and retry occasionally.

    :param host: hostname of the scribe server
    :param port: port number of the scribe server
    :param retry_interval: number of seconds to wait between retries
    :param report_status: a function `report_status(is_error, msg)` which is
        called to print out errors and status messages. The first
        argument indicates whether what is being printed is an error or not,
        and the second argument is the actual message.
    :param logging_timeout: milliseconds to time out scribe logging; "0" means
        blocking (no timeout)
    """

    def __init__(self, host, port, retry_interval, report_status=None, logging_timeout=None, stream_backend_map={}):
        # set up thrift and scribe objects
        timeout = logging_timeout if logging_timeout is not None else config.scribe_logging_timeout
        self.socket = thriftpy.transport.socket.TSocket(six.text_type(host), int(port))
        if timeout:
            self.socket.set_timeout(timeout)

        self.stream_backend_map = stream_backend_map
        self.transport = TFramedTransportFactory().get_transport(self.socket)
        protocol = TBinaryProtocolFactory(strict_read=False).get_protocol(self.transport)
        self.client = TClient(scribe_thrift.scribe, protocol)

        # our own bookkeeping for connection
        self.connected = False # whether or not we think we're currently connected to the scribe server
        self.last_connect_time = 0 # last time we got disconnected or failed to reconnect

        self.retry_interval = retry_interval
        self.report_status = report_status or get_default_reporter()
        self.__lock = threading.RLock()
        self._birth_pid = os.getpid()

        self.metrics = MetricsReporter(
            sample_rate=config.metrics_sample_rate,
            backend="scribe"
        )

    def _maybe_reconnect(self):
        """Try (re)connecting to the server if it's been long enough since our
        last attempt.
        """
        assert self.connected == False

        # don't retry too often
        now = time.time()
        if (now - self.last_connect_time) > self.retry_interval:
            try:
                self.transport.open()
                self.connected = True
            except TTransportException:
                self.last_connect_time = now
                self.report_status(True, 'yelp_clog failed to connect to scribe server')

    def _log_line_no_size_limit(self, stream, line):
        """Log a single line without size limit. It should not include any newline characters.
           Since this method is called in log_line, the line should be in utf-8 format and
           less than MAX_LINE_SIZE_IN_BYTES already. We don't limit traceback size.
        """
        with self.__lock, self.metrics.sampled_request():
            if os.getpid() != self._birth_pid:
                raise ScribeIsNotForkSafeError
            if not self.connected:
                self._maybe_reconnect()

            if self.connected:
                log_entry = scribe_thrift.LogEntry(category=scribify(stream), message=line + b'\n')
                try:
                    return self.client.Log(messages=[log_entry])
                except Exception as e:
                    try:
                        self.report_status(
                            True,
                            'yelp_clog failed to log to scribe server with '
                            ' exception: %s(%s)' % (type(e), six.text_type(e))
                        )
                    finally:
                        self.close()
                        self.last_connect_time = time.time()

                    # Don't reconnect if report_status raises an exception
                    self._maybe_reconnect()

    def log_line(self, stream, line):
        """Log a single line. It should not include any newline characters.
           If the line size is over 50 MB, an exception raises and the line will be dropped.
           If the line size is over 5 MB, a message consisting origin stream information
           will be recorded at WHO_CLOG_LARGE_LINE_STREAM (in json format).
        """
        backend = self.stream_backend_map.get(stream, config.default_backend)
        if backend not in ('scribe', 'dual'):
            return

        # log unicodes as their utf-8 encoded representation
        if isinstance(line, six.text_type):
            line = line.encode('UTF-8')

        # check log line size
        if len(line) <= WARNING_LINE_SIZE_IN_BYTES:
            self._log_line_no_size_limit(stream, line)
        elif len(line) <= MAX_LINE_SIZE_IN_BYTES:
            self._log_line_no_size_limit(stream, line)

            # log the origin of the stream with traceback to WHO_CLOG_LARGE_LINE_STREAM category
            origin_info = {}
            origin_info['stream'] = stream
            origin_info['line_size'] = len(line)
            origin_info['traceback'] = ''.join(traceback.format_stack())
            log_line = json.dumps(origin_info).encode('UTF-8')
            self._log_line_no_size_limit(WHO_CLOG_LARGE_LINE_STREAM, log_line)
            self.report_status(
                False,
                'The log line size is larger than %r bytes (monitored in \'%s\')'
                % (WARNING_LINE_SIZE_IN_BYTES, WHO_CLOG_LARGE_LINE_STREAM)
            )
        else:
            # raise an exception if too large
            self.report_status(
                True,
                'The log line is dropped (line size larger than %r bytes)'
                % MAX_LINE_SIZE_IN_BYTES
            )
            raise LogLineIsTooLongError('The max log line size allowed is %r bytes'
                % MAX_LINE_SIZE_IN_BYTES)

    def close(self):
        self.transport.close()
        self.connected = False