예제 #1
0
    def __init__(self, config, logger, socket_file, docker_api_version,
                 host_hostname, data_path, log_path):

        self._config = config
        self._logger = logger

        self.__delay = self._config.get('container_check_interval')
        self.__log_prefix = self._config.get('docker_log_prefix')
        name = self._config.get('container_name')

        self.__socket_file = socket_file
        self.__docker_api_version = docker_api_version
        self.__client = DockerClient(base_url=('unix:/%s' %
                                               self.__socket_file),
                                     version=self.__docker_api_version)

        self.container_id = self.__get_scalyr_container_id(self.__client, name)

        self.__checkpoint_file = os.path.join(data_path,
                                              "docker-checkpoints.json")
        self.__log_path = log_path

        self.__host_hostname = host_hostname

        self.__readback_buffer_size = self._config.get('readback_buffer_size')

        self.containers = {}
        self.__checkpoints = {}

        self.__log_watcher = None
        self.__module = None
        self.__start_time = time.time()
        self.__thread = StoppableThread(target=self.check_containers,
                                        name="Container Checker")
예제 #2
0
    def __init__(self, configuration, monitors):
        """Initializes the manager.

        Note, the log_config variable on the monitors will be updated as a side effect of this call to reflect
        the filling in of defaults and making paths absolute.  TODO:  This is kind of odd, it would be cleaner
        to do this elsewhere more tied to the monitors themselves.

        @param configuration: The configuration file containing which log files need to be copied listed in the
            configuration file.
        @param monitors:  The list of ScalyrMonitor instances that will be run.  This is needed so the manager
            can be sure to copy the logs files generated by the monitors. Note, the log_config for the monitors
            will be updated (on the monitor) to reflect the filling in of defaults and making paths absolute.

        @type configuration: configuration.Configuration
        @type monitors: list<ScalyrMonitor>
        """
        StoppableThread.__init__(self, name='log copier thread')
        self.__config = configuration
        # The list of LogMatcher objects that are watching for new files to appear.
        self.__log_matchers = CopyingManager.__create_log_matches(configuration, monitors)

        # The list of LogFileProcessors that are processing the lines from matched log files.
        self.__log_processors = []
        # A dict from file path to the LogFileProcessor that is processing it.
        self.__log_paths_being_processed = {}
        # A lock that protects the status variables and the __log_matchers variable, the only variables that
        # are access in generate_status() which needs to be thread safe.
        self.__lock = threading.Lock()

        # The current pending AddEventsTask.  We will retry the contained AddEventsRequest serveral times.
        self.__pending_add_events_task = None

        # The next LogFileProcessor that should have log lines read from it for transmission.
        self.__current_processor = 0

        # The client to use for sending the data.  Set in the start_manager call.
        self.__scalyr_client = None
        # The last time we scanned for new files that match the __log_matchers.
        self.__last_new_file_scan_time = 0

        # Status variables that track statistics reported to the status page.
        self.__last_attempt_time = None
        self.__last_success_time = None
        self.__last_attempt_size = None
        self.__last_response = None
        self.__last_response_status = None
        self.__total_bytes_uploaded = 0
        self.__total_errors = 0

        # The positions to use for a given file if there is not already a checkpoint for that file.
        # Set in the start_manager call.
        self.__logs_initial_positions = None

        # A semaphore that we increment when this object has begun copying files (after first scan).
        self.__copying_semaphore = threading.Semaphore()
예제 #3
0
 def __init__(self, local=None, host='localhost', port=2000):
     self.__server_socket = None
     # The open connections.
     self.__connections = []
     # Any local variables to set for the interactive shell.  This is a dict.
     self.__local = local
     # The IP address to server the connections from.
     self.__host = host
     # The port.
     self.__port = port
     StoppableThread.__init__(self, 'debug server thread')
예제 #4
0
 def __init__(self, local=None, host='localhost', port=2000):
     self.__server_socket = None
     # The open connections.
     self.__connections = []
     # Any local variables to set for the interactive shell.  This is a dict.
     self.__local = local
     # The IP address to server the connections from.
     self.__host = host
     # The port.
     self.__port = port
     StoppableThread.__init__(self, 'debug server thread')
예제 #5
0
    def test_name_prefix(self):
        StoppableThread.set_name_prefix("test_name_prefix: ")
        test_thread = StoppableThread("Testing", self._run_method)
        self.assertEqual(test_thread.getName(), "test_name_prefix: Testing")
        test_thread.start()
        test_thread.stop()

        self.assertTrue(self._run_counter > 0)
예제 #6
0
    def test_name_prefix_with_none(self):
        StoppableThread.set_name_prefix("test_name_prefix: ")
        test_thread = StoppableThread(target=self._run_method)
        self.assertEqual(test_thread.getName(), "test_name_prefix: ")
        test_thread.start()
        test_thread.stop()

        self.assertTrue(self._run_counter > 0)
예제 #7
0
    def test_basic_use(self):
        # Since the ScalyrTestCase sets the name prefix, we need to set it back to None to get an unmolested name.
        StoppableThread.set_name_prefix(None)
        test_thread = StoppableThread("Testing", self._run_method)
        self.assertEqual(test_thread.getName(), "Testing")
        test_thread.start()
        test_thread.stop()

        self.assertTrue(self._run_counter > 0)
예제 #8
0
    def __init__(self, scalyr_client, configuration, logs_initial_positions):
        """Initializes the manager.

        @param scalyr_client: The client to use to send requests to Scalyr.
        @param configuration: The configuration file containing which log files need to be copied.
        @param logs_initial_positions: A dict mapping file paths to the offset with the file to begin copying
            if none can be found from the checkpoint files.  This can be used to override the default behavior of
            just reading from the current end of the file if there is no checkpoint for the file

        @type scalyr_client: scalyr_client.ScalyrClientSession
        @type configuration: configuration.Configuration
        @type logs_initial_positions: dict
        """
        StoppableThread.__init__(self, name='log copier thread')
        self.__config = configuration
        # The list of LogMatcher objects that are watching for new files to appear.
        self.__log_matchers = configuration.logs

        # The list of LogFileProcessors that are processing the lines from matched log files.
        self.__log_processors = []
        # A dict from file path to the LogFileProcessor that is processing it.
        self.__log_paths_being_processed = {}
        # A lock that protects the status variables and the __log_matchers variable, the only variables that
        # are access in generate_status() which needs to be thread safe.
        self.__lock = threading.Lock()

        # The current pending AddEventsTask.  We will retry the contained AddEventsRequest serveral times.
        self.__pending_add_events_task = None

        # The next LogFileProcessor that should have log lines read from it for transmission.
        self.__current_processor = 0

        # The client to use for sending the data.
        self.__scalyr_client = scalyr_client
        # The last time we scanned for new files that match the __log_matchers.
        self.__last_new_file_scan_time = 0

        # Status variables that track statistics reported to the status page.
        self.__last_attempt_time = None
        self.__last_success_time = None
        self.__last_attempt_size = None
        self.__last_response = None
        self.__last_response_status = None
        self.__total_bytes_uploaded = 0
        self.__total_errors = 0

        # The positions to use for a given file if there is not already a checkpoint for that file.
        self.__logs_initial_positions = logs_initial_positions

        # A semaphore that we increment when this object has begun copying files (after first scan).
        self.__copying_semaphore = threading.Semaphore()
예제 #9
0
    def __init__(self, scalyr_client, configuration, logs_initial_positions):
        """Initializes the manager.

        @param scalyr_client: The client to use to send requests to Scalyr.
        @param configuration: The configuration file containing which log files need to be copied.
        @param logs_initial_positions: A dict mapping file paths to the offset with the file to begin copying
            if none can be found from the checkpoint files.  This can be used to override the default behavior of
            just reading from the current end of the file if there is no checkpoint for the file

        @type scalyr_client: scalyr_client.ScalyrClientSession
        @type configuration: configuration.Configuration
        @type logs_initial_positions: dict
        """
        StoppableThread.__init__(self, name='log copier thread')
        self.__config = configuration
        # The list of LogMatcher objects that are watching for new files to appear.
        self.__log_matchers = configuration.logs

        # The list of LogFileProcessors that are processing the lines from matched log files.
        self.__log_processors = []
        # A dict from file path to the LogFileProcessor that is processing it.
        self.__log_paths_being_processed = {}
        # A lock that protects the status variables and the __log_matchers variable, the only variables that
        # are access in generate_status() which needs to be thread safe.
        self.__lock = threading.Lock()

        # The current pending AddEventsTask.  We will retry the contained AddEventsRequest serveral times.
        self.__pending_add_events_task = None

        # The next LogFileProcessor that should have log lines read from it for transmission.
        self.__current_processor = 0

        # The client to use for sending the data.
        self.__scalyr_client = scalyr_client
        # The last time we scanned for new files that match the __log_matchers.
        self.__last_new_file_scan_time = 0

        # Status variables that track statistics reported to the status page.
        self.__last_attempt_time = None
        self.__last_success_time = None
        self.__last_attempt_size = None
        self.__last_response = None
        self.__last_response_status = None
        self.__total_bytes_uploaded = 0
        self.__total_errors = 0

        # The positions to use for a given file if there is not already a checkpoint for that file.
        self.__logs_initial_positions = logs_initial_positions

        # A semaphore that we increment when this object has begun copying files (after first scan).
        self.__copying_semaphore = threading.Semaphore()
예제 #10
0
    def __init__(self, monitor_config, logger, sample_interval_secs=30):
        """Constructs an instance of the monitor.

        It is optional for derived classes to override this method.  The can instead
        override _initialize which is invoked during initialization.
        TODO:  Determine which approach is preferred by developers and recommend that.

        If a derived class overrides __init__, they must invoke this method in the override method.

        This method will set default values forall public attributes (log_config, disabled, etc).  These
        may be overwritten by the derived class.

        The derived classes must raise an Exception (or something derived from Exception)
        in this method if the provided configuration is invalid or if there is any other
        error known at this time preventing the module from running.

        @param monitor_config: A dict containing the configuration information for this module instance from the
            configuration file. The only valid values are strings, ints, longs, floats, and booleans.
        @param logger: The logger to use for output.
        @param sample_interval_secs: The interval in seconds to wait between gathering samples.
        """
        # The logger instance that this monitor should use to report all information and metric values.
        self._logger = logger
        self.monitor_name = monitor_config['module']
        # The MonitorConfig object created from the config for this monitor instance.
        self._config = MonitorConfig(monitor_config,
                                     monitor_module=self.monitor_name)
        log_path = self.monitor_name.split('.')[-1] + '.log'
        self.disabled = False
        # TODO: For now, just leverage the logic in the loggers for naming this monitor.  However,
        # we should have it be more dynamic where the monitor can override it.
        if logger.component.find('monitor:') == 0:
            self.monitor_name = logger.component[8:]
        else:
            self.monitor_name = logger.component
        self.log_config = {
            "path": log_path,
        }
        # This lock protects all variables that can be access by other threads, reported_lines,
        # emitted_lines, and errors.  It does not protect _run_state since that already has its own lock.
        self.__lock = Lock()
        self.__reported_lines = 0
        self.__errors = 0

        self._sample_interval_secs = sample_interval_secs
        self.__metric_log_open = False
        self._initialize()

        StoppableThread.__init__(self, name='metric thread')
예제 #11
0
    def __init__(self, monitor_config, logger, sample_interval_secs=30):
        """Constructs an instance of the monitor.

        It is optional for derived classes to override this method.  The can instead
        override _initialize which is invoked during initialization.
        TODO:  Determine which approach is preferred by developers and recommend that.

        If a derived class overrides __init__, they must invoke this method in the override method.

        This method will set default values forall public attributes (log_config, disabled, etc).  These
        may be overwritten by the derived class.

        The derived classes must raise an Exception (or something derived from Exception)
        in this method if the provided configuration is invalid or if there is any other
        error known at this time preventing the module from running.

        @param monitor_config: A dict containing the configuration information for this module instance from the
            configuration file. The only valid values are strings, ints, longs, floats, and booleans.
        @param logger: The logger to use for output.
        @param sample_interval_secs: The interval in seconds to wait between gathering samples.
        """
        # The MonitorConfig object created from the config for this monitor instance.
        self._config = MonitorConfig(monitor_config)
        # The logger instance that this monitor should use to report all information and metric values.
        self._logger = logger
        self.monitor_name = monitor_config['module']
        log_path = self.monitor_name.split('.')[-1] + '.log'
        self.disabled = False
        # TODO: For now, just leverage the logic in the loggers for naming this monitor.  However,
        # we should have it be more dynamic where the monitor can override it.
        if logger.component.find('monitor:') == 0:
            self.monitor_name = logger.component[8:]
        else:
            self.monitor_name = logger.component
        self.log_config = {
            "path": log_path,
        }
        # This lock protects all variables that can be access by other threads, reported_lines,
        # emitted_lines, and errors.  It does not protect _run_state since that already has its own lock.
        self.__lock = Lock()
        self.__reported_lines = 0
        self.__errors = 0

        self._sample_interval_secs = sample_interval_secs
        self.__metric_log_open = False
        self._initialize()

        StoppableThread.__init__(self, name='metric thread')
예제 #12
0
    def test_is_alive(self):
        class TestThread(StoppableThread):
            def __init__(self):
                self.run_counter = 0
                StoppableThread.__init__(self, "Test thread")

            def run_and_propagate(self):
                while self._run_state.is_running():
                    self._run_state.sleep_but_awaken_if_stopped(0.03)

        test_thread_1 = TestThread()
        test_thread_2 = StoppableThread("Testing", self._run_method)

        test_threads = [test_thread_1, test_thread_2]

        for test_thread in test_threads:
            self.assertFalse(test_thread.isAlive())

            if six.PY3:
                self.assertFalse(test_thread.is_alive())

            test_thread.start()

            self.assertTrue(test_thread.isAlive())

            if six.PY3:
                self.assertTrue(test_thread.is_alive())

            test_thread.stop()
            self.assertFalse(test_thread.isAlive())

            if six.PY3:
                self.assertFalse(test_thread.is_alive())
예제 #13
0
    def __init__( self, socket_file, cid, name, stream, log_path, config, last_request=None, max_log_size=20*1024*1024, max_log_rotations=2 ):
        self.__socket_file = socket_file
        self.cid = cid
        self.name = name
        self.stream = stream
        self.log_path = log_path
        self.stream_name = name + "-" + stream

        self.__max_previous_lines = config.get( 'max_previous_lines' )
        self.__log_timestamps = config.get( 'log_timestamps' )

        self.__last_request_lock = threading.Lock()

        self.__last_request = time.time()
        if last_request:
            self.__last_request = last_request

        self.__logger = logging.Logger( cid + '.' + stream )

        self.__log_handler = logging.handlers.RotatingFileHandler( filename = log_path, maxBytes = max_log_size, backupCount = max_log_rotations )
        formatter = logging.Formatter()
        self.__log_handler.setFormatter( formatter )
        self.__logger.addHandler( self.__log_handler )
        self.__logger.setLevel( logging.INFO )

        self.__thread = StoppableThread( target=self.process_request, name="Docker monitor logging thread for %s" % (name + '.' + stream) )
예제 #14
0
    def __init__(self,
                 socket_file,
                 cid,
                 name,
                 stream,
                 log_path,
                 config,
                 last_request=None,
                 max_log_size=20 * 1024 * 1024,
                 max_log_rotations=2):
        self.__socket_file = socket_file
        self.cid = cid
        self.name = name

        #stderr or stdout
        self.stream = stream
        self.log_path = log_path
        self.stream_name = name + "-" + stream

        self.__max_previous_lines = config.get('max_previous_lines')
        self.__log_timestamps = True  # Note: always log timestamps for now.  config.get( 'log_timestamps' )
        self.__docker_api_version = config.get('docker_api_version')

        self.__last_request_lock = threading.Lock()

        self.__last_request = time.time()
        if last_request:
            self.__last_request = last_request

        self.__logger = logging.Logger(cid + '.' + stream)

        self.__log_handler = logging.handlers.RotatingFileHandler(
            filename=log_path,
            maxBytes=max_log_size,
            backupCount=max_log_rotations)
        formatter = logging.Formatter()
        self.__log_handler.setFormatter(formatter)
        self.__logger.addHandler(self.__log_handler)
        self.__logger.setLevel(logging.INFO)

        self.__client = None
        self.__logs = None

        self.__thread = StoppableThread(
            target=self.process_request,
            name="Docker monitor logging thread for %s" %
            (name + '.' + stream))
예제 #15
0
    def __init__(self, local, client_connection, host, port):
        """Initializes the connection.
        @param local: The dict of local variables to populate into the envinroment the interactive shell is run in.
        @param client_connection:  The network connection
        @param host: the client's IP address
        @param port: the client's port

        @type local: dict
        @type client_connection:
        @type host: str
        @type port: int
        """
        self.__local = local
        self.__client_connection = client_connection
        self.__host = host
        self.__port = port
        StoppableThread.__init__(self, 'Debug connection thread')
예제 #16
0
    def __init__(self, local, client_connection, host, port):
        """Initializes the connection.
        @param local: The dict of local variables to populate into the envinroment the interactive shell is run in.
        @param client_connection:  The network connection
        @param host: the client's IP address
        @param port: the client's port

        @type local: dict
        @type client_connection:
        @type host: str
        @type port: int
        """
        self.__local = local
        self.__client_connection = client_connection
        self.__host = host
        self.__port = port
        StoppableThread.__init__(self, 'Debug connection thread')
예제 #17
0
    def test_exception(self):
        class TestException(Exception):
            pass

        def throw_an_exception(run_state):
            run_state.is_running()
            raise TestException()

        test_thread = StoppableThread('Testing', throw_an_exception)
        test_thread.start()

        caught_it = False
        try:
            test_thread.stop()
        except TestException:
            caught_it = True

        self.assertTrue(caught_it)
예제 #18
0
    def __init__(self, configuration, platform_controller):
        """Initializes the manager.
        @param configuration: The agent configuration that controls what monitors should be run.
        @param platform_controller:  The controller for this server.

        @type configuration: scalyr_agent.Configuration
        @type platform_controller: scalyr_agent.platform_controller.PlatformController
        """
        StoppableThread.__init__(self, name='monitor manager thread')
        if configuration.disable_monitors_creation:
            log.log( scalyr_logging.DEBUG_LEVEL_0, "Creation of Scalyr Monitors disabled.  No monitors created." )
            self.__monitors = []
        else:
            self.__monitors = MonitorsManager.__create_monitors(configuration, platform_controller)

        self.__disable_monitor_threads = configuration.disable_monitor_threads

        self.__running_monitors = []
        self.__user_agent_callback = None
        self._user_agent_refresh_interval = configuration.user_agent_refresh_interval
예제 #19
0
    def test_exception(self):
        class TestException(Exception):
            pass

        def throw_an_exception(run_state):
            run_state.is_running()
            raise TestException()

        test_thread = StoppableThread("Testing", throw_an_exception)
        test_thread.start()

        caught_it = False
        try:
            test_thread.stop()
        except TestException:
            caught_it = True

        self.assertTrue(caught_it)
 def __init__(self2):
     StoppableThread.__init__(self2,
                              name='FakeClockAdvancerThread',
                              is_daemon=True)
예제 #21
0
 def __init__(self, capture_interval=10, *args, **kwargs):
     StoppableThread.__init__(self)
예제 #22
0
 def test_stopptable_thread_init_memory_leak(self):
     # There was a bug with StoppableThread constructor having a cycle
     for index in range(0, 50):
         thread = StoppableThread(name="test1")
         self.assertTrue(thread)
         self.assertNoNewGarbage()
예제 #23
0
class ContainerChecker(StoppableThread):
    """
        Monitors containers to check when they start and stop running.
    """
    def __init__(self, config, logger, socket_file, docker_api_version,
                 host_hostname, data_path, log_path):

        self._config = config
        self._logger = logger

        self.__delay = self._config.get('container_check_interval')
        self.__log_prefix = self._config.get('docker_log_prefix')
        name = self._config.get('container_name')

        self.__socket_file = socket_file
        self.__docker_api_version = docker_api_version
        self.__client = DockerClient(base_url=('unix:/%s' %
                                               self.__socket_file),
                                     version=self.__docker_api_version)

        self.container_id = self.__get_scalyr_container_id(self.__client, name)

        self.__checkpoint_file = os.path.join(data_path,
                                              "docker-checkpoints.json")
        self.__log_path = log_path

        self.__host_hostname = host_hostname

        self.__readback_buffer_size = self._config.get('readback_buffer_size')

        self.containers = {}
        self.__checkpoints = {}

        self.__log_watcher = None
        self.__module = None
        self.__start_time = time.time()
        self.__thread = StoppableThread(target=self.check_containers,
                                        name="Container Checker")

    def start(self):
        self.__load_checkpoints()
        self.containers = self.get_running_containers(self.__client)

        # if querying the docker api fails, set the container list to empty
        if self.containers == None:
            self.containers = {}

        self.docker_logs = self.__get_docker_logs(self.containers)
        self.docker_loggers = []

        #create and start the DockerLoggers
        self.__start_docker_logs(self.docker_logs)
        self._logger.log(
            scalyr_logging.DEBUG_LEVEL_1,
            "Initialization complete.  Starting docker monitor for Scalyr")
        self.__thread.start()

    def stop(self, wait_on_join=True, join_timeout=5):
        self.__thread.stop(wait_on_join=wait_on_join,
                           join_timeout=join_timeout)

        #stop the DockerLoggers
        for logger in self.docker_loggers:
            if self.__log_watcher:
                self.__log_watcher.remove_log_path(self.__module,
                                                   logger.log_path)
            logger.stop(wait_on_join, join_timeout)
            self._logger.log(scalyr_logging.DEBUG_LEVEL_1,
                             "Stopping %s - %s" % (logger.name, logger.stream))

        self.__update_checkpoints()

    def check_containers(self, run_state):

        while run_state.is_running():
            self.__update_checkpoints()

            self._logger.log(scalyr_logging.DEBUG_LEVEL_2,
                             'Attempting to retrieve list of containers')
            running_containers = self.get_running_containers(self.__client)

            # if running_containers is None, that means querying the docker api failed.
            # rather than resetting the list of running containers to empty
            # continue using the previous list of containers
            if running_containers == None:
                self._logger.log(scalyr_logging.DEBUG_LEVEL_2,
                                 'Failed to get list of containers')
                running_containers = self.containers

            self._logger.log(scalyr_logging.DEBUG_LEVEL_2,
                             'Found %d containers' % len(running_containers))
            #get the containers that have started since the last sample
            starting = {}
            for cid, info in running_containers.iteritems():
                if cid not in self.containers:
                    self._logger.log(
                        scalyr_logging.DEBUG_LEVEL_1,
                        "Starting logger for container '%s'" % info['name'])
                    starting[cid] = info

            #get the containers that have stopped
            stopping = {}
            for cid, info in self.containers.iteritems():
                if cid not in running_containers:
                    self._logger.log(
                        scalyr_logging.DEBUG_LEVEL_1,
                        "Stopping logger for container '%s'" % info['name'])
                    stopping[cid] = info

            #stop the old loggers
            self.__stop_loggers(stopping)

            #update the list of running containers
            #do this before starting new ones, as starting up new ones
            #will access self.containers
            self.containers = running_containers

            #start the new ones
            self.__start_loggers(starting)

            run_state.sleep_but_awaken_if_stopped(self.__delay)

    def set_log_watcher(self, log_watcher, module):
        self.__log_watcher = log_watcher
        self.__module = module

    def __get_scalyr_container_id(self, client, name):
        """Gets the container id of the scalyr-agent container
        If the config option container_name is empty, then it is assumed that the scalyr agent is running
        on the host and not in a container and None is returned.
        """
        result = None

        regex = None
        if name is not None:
            regex = re.compile(name)

        # get all the containers
        containers = client.containers()

        for container in containers:

            # see if we are checking on names
            if name is not None:
                # if so, loop over all container names for this container
                # Note: containers should only have one name, but the 'Names' field
                # is a list, so iterate over it just in case
                for cname in container['Names']:
                    cname = cname.lstrip('/')
                    # check if the name regex matches
                    m = regex.match(cname)
                    if m:
                        result = container['Id']
                        break
            # not checking container name, so check the Command instead to see if it's the agent
            else:
                if container['Command'].startswith('/usr/sbin/scalyr-agent-2'):
                    result = container['Id']

            if result:
                break

        if not result:
            # only raise an exception if we were looking for a specific name but couldn't find it
            if name is not None:
                raise Exception(
                    "Unable to find a matching container id for container '%s'.  Please make sure that a "
                    "container matching the regular expression '%s' is running."
                    % (name, name))

        return result

    def __update_checkpoints(self):
        """Update the checkpoints for when each docker logger logged a request, and save the checkpoints
        to file.
        """

        for logger in self.docker_loggers:
            last_request = logger.last_request()
            self.__checkpoints[logger.stream_name] = last_request

        # save to disk
        if self.__checkpoints:
            tmp_file = self.__checkpoint_file + '~'
            scalyr_util.atomic_write_dict_as_json_file(self.__checkpoint_file,
                                                       tmp_file,
                                                       self.__checkpoints)

    def __load_checkpoints(self):
        try:
            checkpoints = scalyr_util.read_file_as_json(self.__checkpoint_file)
        except:
            self._logger.info(
                "No checkpoint file '%s' exists.\n\tAll logs will be read starting from their current end.",
                self.__checkpoint_file)
            checkpoints = {}

        if checkpoints:
            for name, last_request in checkpoints.iteritems():
                self.__checkpoints[name] = last_request

    def get_running_containers(self, client, ignore_self=True):
        """Gets a dict of running containers that maps container id to container name
        """
        result = {}
        try:
            response = client.containers()
            for container in response:
                cid = container['Id']

                if ignore_self and cid == self.container_id:
                    continue

                if len(container['Names']) > 0:
                    name = container['Names'][0].lstrip('/')
                    result[cid] = {'name': name}
                else:
                    result[cid] = cid

        except:  # container querying failed
            global_log.warning("Error querying running containers",
                               limit_once_per_x_secs=300,
                               limit_key='docker-api-running-containers')
            result = None

        return result

    def __stop_loggers(self, stopping):
        """
        Stops any DockerLoggers in the 'stopping' dict
        @param: stopping - a dict of container ids => container names. Any running containers that have
        the same container-id as a key in the dict will be stopped.
        """
        if stopping:
            self._logger.log(scalyr_logging.DEBUG_LEVEL_2,
                             'Stopping all docker loggers')
            for logger in self.docker_loggers:
                if logger.cid in stopping:
                    logger.stop(wait_on_join=True, join_timeout=1)
                    if self.__log_watcher:
                        self.__log_watcher.remove_log_path(
                            self.__module, logger.log_path)

            self.docker_loggers[:] = [
                l for l in self.docker_loggers if l.cid not in stopping
            ]
            self.docker_logs[:] = [
                l for l in self.docker_logs if l['cid'] not in stopping
            ]

    def __start_loggers(self, starting):
        """
        Starts a list of DockerLoggers
        @param: starting - a list of DockerLoggers to start
        """
        if starting:
            self._logger.log(scalyr_logging.DEBUG_LEVEL_2,
                             'Starting all docker loggers')
            docker_logs = self.__get_docker_logs(starting)
            self.__start_docker_logs(docker_logs)
            self.docker_logs.extend(docker_logs)

    def __start_docker_logs(self, docker_logs):
        for log in self.docker_logs:
            last_request = self.__get_last_request_for_log(
                log['log_config']['path'])
            if self.__log_watcher:
                log['log_config'] = self.__log_watcher.add_log_config(
                    self.__module, log['log_config'])
            self.docker_loggers.append(
                self.__create_docker_logger(log, last_request))

    def __get_last_request_for_log(self, path):
        result = datetime.datetime.fromtimestamp(self.__start_time)

        try:
            full_path = os.path.join(self.__log_path, path)
            fp = open(full_path, 'r', self.__readback_buffer_size)

            # seek readback buffer bytes from the end of the file
            fp.seek(0, os.SEEK_END)
            size = fp.tell()
            if size < self.__readback_buffer_size:
                fp.seek(0, os.SEEK_SET)
            else:
                fp.seek(size - self.__readback_buffer_size, os.SEEK_SET)

            first = True
            for line in fp:
                # ignore the first line because it likely started somewhere randomly
                # in the line
                if first:
                    first = False
                    continue

                dt, _ = _split_datetime_from_line(line)
                if dt:
                    result = dt
            fp.close()
        except Exception, e:
            global_log.info("%s", str(e))

        return scalyr_util.seconds_since_epoch(result)
 def __init__(self2):  # pylint: disable=no-self-argument
     StoppableThread.__init__(
         self2, name="FakeClockAdvancerThread", is_daemon=True
     )
예제 #25
0
class DockerLogger(object):
    """Abstraction for logging either stdout or stderr from a given container

    Logging is performed on a separate thread because each log is read from a continuous stream
    over the docker socket.
    """
    def __init__(self,
                 socket_file,
                 cid,
                 name,
                 stream,
                 log_path,
                 config,
                 last_request=None,
                 max_log_size=20 * 1024 * 1024,
                 max_log_rotations=2):
        self.__socket_file = socket_file
        self.cid = cid
        self.name = name

        #stderr or stdout
        self.stream = stream
        self.log_path = log_path
        self.stream_name = name + "-" + stream

        self.__max_previous_lines = config.get('max_previous_lines')
        self.__log_timestamps = True  # Note: always log timestamps for now.  config.get( 'log_timestamps' )
        self.__docker_api_version = config.get('docker_api_version')

        self.__last_request_lock = threading.Lock()

        self.__last_request = time.time()
        if last_request:
            self.__last_request = last_request

        self.__logger = logging.Logger(cid + '.' + stream)

        self.__log_handler = logging.handlers.RotatingFileHandler(
            filename=log_path,
            maxBytes=max_log_size,
            backupCount=max_log_rotations)
        formatter = logging.Formatter()
        self.__log_handler.setFormatter(formatter)
        self.__logger.addHandler(self.__log_handler)
        self.__logger.setLevel(logging.INFO)

        self.__client = None
        self.__logs = None

        self.__thread = StoppableThread(
            target=self.process_request,
            name="Docker monitor logging thread for %s" %
            (name + '.' + stream))

    def start(self):
        self.__thread.start()

    def stop(self, wait_on_join=True, join_timeout=5):
        if self.__client and self.__logs and self.__logs.response:
            sock = self.__client._get_raw_response_socket(self.__logs.response)
            if sock:
                sock.shutdown(socket.SHUT_RDWR)
        self.__thread.stop(wait_on_join=wait_on_join,
                           join_timeout=join_timeout)

    def last_request(self):
        self.__last_request_lock.acquire()
        result = self.__last_request
        self.__last_request_lock.release()
        return result

    def process_request(self, run_state):
        """This function makes a log request on the docker socket for a given container and continues
        to read from the socket until the connection is closed
        """
        try:
            # random delay to prevent all requests from starting at the same time
            delay = random.randint(500, 5000) / 1000
            run_state.sleep_but_awaken_if_stopped(delay)

            self.__logger.log(
                scalyr_logging.DEBUG_LEVEL_3,
                'Starting to retrieve logs for cid=%s' % str(self.cid))
            self.__client = DockerClient(base_url=('unix:/%s' %
                                                   self.__socket_file),
                                         version=self.__docker_api_version)

            epoch = datetime.datetime.utcfromtimestamp(0)
            while run_state.is_running():
                self.__logger.log(
                    scalyr_logging.DEBUG_LEVEL_3,
                    'Attempting to retrieve logs for cid=%s' % str(self.cid))
                sout = False
                serr = False
                if self.stream == 'stdout':
                    sout = True
                else:
                    serr = True

                self.__logs = self.__client.logs(
                    container=self.cid,
                    stdout=sout,
                    stderr=serr,
                    stream=True,
                    timestamps=True,
                    tail=self.__max_previous_lines,
                    follow=True)

                # self.__logs is a generator so don't call len( self.__logs )
                self.__logger.log(
                    scalyr_logging.DEBUG_LEVEL_3,
                    'Found log lines for cid=%s' % (str(self.cid)))
                try:
                    for line in self.__logs:
                        #split the docker timestamp from the frest of the line
                        dt, log_line = _split_datetime_from_line(line)
                        if not dt:
                            global_log.error(
                                'No timestamp found on line: \'%s\'', line)
                        else:
                            timestamp = scalyr_util.seconds_since_epoch(
                                dt, epoch)

                            #see if we log the entire line including timestamps
                            if self.__log_timestamps:
                                log_line = line

                            #check to make sure timestamp is >= to the last request
                            #Note: we can safely read last_request here because we are the only writer
                            if timestamp >= self.__last_request:
                                self.__logger.info(log_line.strip())

                                #but we need to lock for writing
                                self.__last_request_lock.acquire()
                                self.__last_request = timestamp
                                self.__last_request_lock.release()

                        if not run_state.is_running():
                            self.__logger.log(
                                scalyr_logging.DEBUG_LEVEL_3,
                                'Exiting out of container log for cid=%s' %
                                str(self.cid))
                            break
                except ProtocolError, e:
                    if run_state.is_running():
                        global_log.warning(
                            "Stream closed due to protocol error: %s" % str(e))

                if run_state.is_running():
                    global_log.warning(
                        "Log stream has been closed for '%s'.  Check docker.log on the host for possible errors.  Attempting to reconnect, some logs may be lost"
                        % (self.name),
                        limit_once_per_x_secs=300,
                        limit_key='stream-closed-%s' % self.name)
                    delay = random.randint(500, 3000) / 1000
                    run_state.sleep_but_awaken_if_stopped(delay)

            # we are shutting down, so update our last request to be slightly later than it's current
            # value to prevent duplicate logs when starting up again.
            self.__last_request_lock.acquire()

            #can't be any smaller than 0.01 because the time value is only saved to 2 decimal places
            #on disk
            self.__last_request += 0.01

            self.__last_request_lock.release()

        except Exception, e:
            global_log.warn(
                'Unhandled exception in DockerLogger.process_request for %s:\n\t%s'
                % (self.name, str(e)))
예제 #26
0
 def __init__(self):
     self.run_counter = 0
     StoppableThread.__init__(self, "Test thread")
예제 #27
0
    def __init__(self, configuration, monitors):
        """Initializes the manager.

        Note, the log_config variable on the monitors will be updated as a side effect of this call to reflect
        the filling in of defaults and making paths absolute.  TODO:  This is kind of odd, it would be cleaner
        to do this elsewhere more tied to the monitors themselves.

        @param configuration: The configuration file containing which log files need to be copied listed in the
            configuration file.
        @param monitors:  The list of ScalyrMonitor instances that will be run.  This is needed so the manager
            can be sure to copy the logs files generated by the monitors. Note, the log_config for the monitors
            will be updated (on the monitor) to reflect the filling in of defaults and making paths absolute.

        @type configuration: configuration.Configuration
        @type monitors: list<ScalyrMonitor>
        """
        StoppableThread.__init__(self, name='log copier thread')
        self.__config = configuration
        # Keep track of monitors
        self.__monitors = monitors

        # We keep track of which paths we have configs for so that when we add in the configuration for the monitor
        # log files we don't re-add in the same path.  This can easily happen if a monitor is used multiple times
        # but they are all just writing to the same monitor file.
        self.__all_paths = {}

        # The list of LogMatcher objects that are watching for new files to appear.
        self.__log_matchers = self.__create_log_matches(
            configuration, monitors)

        # The list of LogFileProcessors that are processing the lines from matched log files.
        self.__log_processors = []
        # A dict from file path to the LogFileProcessor that is processing it.
        self.__log_paths_being_processed = {}
        # A lock that protects the status variables and the __log_matchers variable, the only variables that
        # are access in generate_status() which needs to be thread safe.
        self.__lock = threading.Lock()

        # The current pending AddEventsTask.  We will retry the contained AddEventsRequest serveral times.
        self.__pending_add_events_task = None

        # The next LogFileProcessor that should have log lines read from it for transmission.
        self.__current_processor = 0

        # The client to use for sending the data.  Set in the start_manager call.
        self.__scalyr_client = None
        # The last time we scanned for new files that match the __log_matchers.
        self.__last_new_file_scan_time = 0

        # Status variables that track statistics reported to the status page.
        self.__last_attempt_time = None
        self.__last_success_time = None
        self.__last_attempt_size = None
        self.__last_response = None
        self.__last_response_status = None
        self.__total_bytes_uploaded = 0
        self.__total_errors = 0

        # The positions to use for a given file if there is not already a checkpoint for that file.
        # Set in the start_manager call.
        self.__logs_initial_positions = None

        # A semaphore that we increment when this object has begun copying files (after first scan).
        self.__copying_semaphore = threading.Semaphore()

        #set the log watcher variable of all monitors.  Do this last so everything is set up
        #and configured when the monitor receives this call
        for monitor in monitors:
            monitor.set_log_watcher(self)
예제 #28
0
    def test_basic_use(self):
        test_thread = StoppableThread('Testing', self._run_method)
        test_thread.start()
        test_thread.stop()

        self.assertTrue(self._run_counter > 0)
예제 #29
0
class DockerLogger( object ):
    def __init__( self, socket_file, cid, name, stream, log_path, config, last_request=None, max_log_size=20*1024*1024, max_log_rotations=2 ):
        self.__socket_file = socket_file
        self.cid = cid
        self.name = name
        self.stream = stream
        self.log_path = log_path
        self.stream_name = name + "-" + stream

        self.__max_previous_lines = config.get( 'max_previous_lines' )
        self.__log_timestamps = config.get( 'log_timestamps' )

        self.__last_request_lock = threading.Lock()

        self.__last_request = time.time()
        if last_request:
            self.__last_request = last_request

        self.__logger = logging.Logger( cid + '.' + stream )

        self.__log_handler = logging.handlers.RotatingFileHandler( filename = log_path, maxBytes = max_log_size, backupCount = max_log_rotations )
        formatter = logging.Formatter()
        self.__log_handler.setFormatter( formatter )
        self.__logger.addHandler( self.__log_handler )
        self.__logger.setLevel( logging.INFO )

        self.__thread = StoppableThread( target=self.process_request, name="Docker monitor logging thread for %s" % (name + '.' + stream) )

    def start( self ):
        self.__thread.start()

    def stop( self, wait_on_join=True, join_timeout=5 ):
        self.__thread.stop( wait_on_join=wait_on_join, join_timeout=join_timeout )

    def last_request( self ):
        self.__last_request_lock.acquire()
        result = self.__last_request
        self.__last_request_lock.release()
        return result

    def process_request( self, run_state ):
        request = DockerRequest( self.__socket_file )
        request.get( '/containers/%s/logs?%s=1&follow=1&tail=%d&timestamps=1' % (self.cid, self.stream, self.__max_previous_lines) )

        epoch = datetime.datetime.utcfromtimestamp( 0 )

        while run_state.is_running():
            line = request.readline()
            while line:
                line = self.strip_docker_header( line )
                dt, log_line = self.split_datetime_from_line( line )
                if not dt:
                    global_log.error( 'No timestamp found on line: \'%s\'', line )
                else:
                    timestamp = scalyr_util.seconds_since_epoch( dt, epoch )

                    #see if we log the entire line including timestamps
                    if self.__log_timestamps:
                        log_line = line

                    #check to make sure timestamp is >= to the last request
                    #Note: we can safely read last_request here because we are the only writer
                    if timestamp >= self.__last_request:
                        self.__logger.info( log_line.strip() )

                        #but we need to lock for writing
                        self.__last_request_lock.acquire()
                        self.__last_request = timestamp
                        self.__last_request_lock.release()

                line = request.readline()
            run_state.sleep_but_awaken_if_stopped( 0.1 )

        # we are shutting down, so update our last request to be slightly later than it's current
        # value to prevent duplicate logs when starting up again.
        self.__last_request_lock.acquire()

        #can't be any smaller than 0.01 because the time value is only saved to 2 decimal places
        #on disk
        self.__last_request += 0.01

        self.__last_request_lock.release()

    def strip_docker_header( self, line ):
        """Docker prepends some lines with an 8 byte header.  The first 4 bytes a byte containg the stream id
        0, 1 or 2 for stdin, stdout and stderr respectively, followed by 3 bytes of padding.

        The next 4 bytes contain the size of the message.

        This function checks for the existence of the the 8 byte header and if the length field matches the remaining
        length of the line then it strips the first 8 bytes.

        If the lengths don't match or if an expected stream type is not found then the line is left alone
        """

        # the docker header has a stream id, which is a single byte, followed by 3 bytes of padding
        # and then a 4-byte int in big-endian (network) order
        fmt = '>B3xI'
        size = struct.calcsize( fmt )

        # make sure the length of the line has as least as many bytes required by the header
        if len( line ) >= size:
            stream, length = struct.unpack( fmt, line[0:size] )

            # We expect a value of 0, 1 or 2 for stream.  Anything else indicates we don't have
            # a docker header
            # We also expect length to be the length of the remaining line
            if stream in [ 0, 1, 2 ] and len( line[size:] ) == length:
                #We have a valid docker header, so strip it
                line = line[size:]

        return line


    def split_datetime_from_line( self, line ):
        """Docker timestamps are in RFC3339 format: 2015-08-03T09:12:43.143757463Z, with everything up to the first space
        being the timestamp.
        """
        log_line = line
        dt = datetime.datetime.utcnow()
        pos = line.find( ' ' )
        if pos > 0:
            dt = scalyr_util.rfc3339_to_datetime( line[0:pos] )
            log_line = line[pos+1:]

        return (dt, log_line)
예제 #30
0
    def test_basic_use(self):
        test_thread = StoppableThread('Testing', self._run_method)
        test_thread.start()
        test_thread.stop()

        self.assertTrue(self._run_counter > 0)
예제 #31
0
class DockerLogger(object):
    def __init__(self,
                 socket_file,
                 cid,
                 name,
                 stream,
                 log_path,
                 config,
                 last_request=None,
                 max_log_size=20 * 1024 * 1024,
                 max_log_rotations=2):
        self.__socket_file = socket_file
        self.cid = cid
        self.name = name
        self.stream = stream
        self.log_path = log_path
        self.stream_name = name + "-" + stream

        self.__max_previous_lines = config.get('max_previous_lines')
        self.__log_timestamps = config.get('log_timestamps')

        self.__last_request_lock = threading.Lock()

        self.__last_request = time.time()
        if last_request:
            self.__last_request = last_request

        self.__logger = logging.Logger(cid + '.' + stream)

        self.__log_handler = logging.handlers.RotatingFileHandler(
            filename=log_path,
            maxBytes=max_log_size,
            backupCount=max_log_rotations)
        formatter = logging.Formatter()
        self.__log_handler.setFormatter(formatter)
        self.__logger.addHandler(self.__log_handler)
        self.__logger.setLevel(logging.INFO)

        self.__thread = StoppableThread(
            target=self.process_request,
            name="Docker monitor logging thread for %s" %
            (name + '.' + stream))

    def start(self):
        self.__thread.start()

    def stop(self, wait_on_join=True, join_timeout=5):
        self.__thread.stop(wait_on_join=wait_on_join,
                           join_timeout=join_timeout)

    def last_request(self):
        self.__last_request_lock.acquire()
        result = self.__last_request
        self.__last_request_lock.release()
        return result

    def process_request(self, run_state):
        request = DockerRequest(self.__socket_file)
        request.get('/containers/%s/logs?%s=1&follow=1&tail=%d&timestamps=1' %
                    (self.cid, self.stream, self.__max_previous_lines))

        epoch = datetime.datetime.utcfromtimestamp(0)

        while run_state.is_running():
            line = request.readline()
            while line:
                line = self.strip_docker_header(line)
                dt, log_line = self.split_datetime_from_line(line)
                if not dt:
                    global_log.error('No timestamp found on line: \'%s\'',
                                     line)
                else:
                    timestamp = scalyr_util.seconds_since_epoch(dt, epoch)

                    #see if we log the entire line including timestamps
                    if self.__log_timestamps:
                        log_line = line

                    #check to make sure timestamp is >= to the last request
                    #Note: we can safely read last_request here because we are the only writer
                    if timestamp >= self.__last_request:
                        self.__logger.info(log_line.strip())

                        #but we need to lock for writing
                        self.__last_request_lock.acquire()
                        self.__last_request = timestamp
                        self.__last_request_lock.release()

                line = request.readline()
            run_state.sleep_but_awaken_if_stopped(0.1)

        # we are shutting down, so update our last request to be slightly later than it's current
        # value to prevent duplicate logs when starting up again.
        self.__last_request_lock.acquire()

        #can't be any smaller than 0.01 because the time value is only saved to 2 decimal places
        #on disk
        self.__last_request += 0.01

        self.__last_request_lock.release()

    def strip_docker_header(self, line):
        """Docker prepends some lines with an 8 byte header.  The first 4 bytes a byte containg the stream id
        0, 1 or 2 for stdin, stdout and stderr respectively, followed by 3 bytes of padding.

        The next 4 bytes contain the size of the message.

        This function checks for the existence of the the 8 byte header and if the length field matches the remaining
        length of the line then it strips the first 8 bytes.

        If the lengths don't match or if an expected stream type is not found then the line is left alone
        """

        # the docker header has a stream id, which is a single byte, followed by 3 bytes of padding
        # and then a 4-byte int in big-endian (network) order
        fmt = '>B3xI'
        size = struct.calcsize(fmt)

        # make sure the length of the line has as least as many bytes required by the header
        if len(line) >= size:
            stream, length = struct.unpack(fmt, line[0:size])

            # We expect a value of 0, 1 or 2 for stream.  Anything else indicates we don't have
            # a docker header
            # We also expect length to be the length of the remaining line
            if stream in [0, 1, 2] and len(line[size:]) == length:
                #We have a valid docker header, so strip it
                line = line[size:]

        return line

    def split_datetime_from_line(self, line):
        """Docker timestamps are in RFC3339 format: 2015-08-03T09:12:43.143757463Z, with everything up to the first space
        being the timestamp.
        """
        log_line = line
        dt = datetime.datetime.utcnow()
        pos = line.find(' ')
        if pos > 0:
            dt = scalyr_util.rfc3339_to_datetime(line[0:pos])
            log_line = line[pos + 1:]

        return (dt, log_line)
예제 #32
0
 def __init__(self):
     self.run_counter = 0
     StoppableThread.__init__(self, 'Test thread')
예제 #33
0
    def __init__(self, monitor_config, logger, sample_interval_secs=None, global_config=None):
        """Constructs an instance of the monitor.

        It is optional for derived classes to override this method.  The can instead
        override _initialize which is invoked during initialization.
        TODO:  Determine which approach is preferred by developers and recommend that.

        If a derived class overrides __init__, they must invoke this method in the override method.

        This method will set default values for all public attributes (log_config, disabled, etc).  These
        may be overwritten by the derived class.

        The derived classes must raise an Exception (or something derived from Exception)
        in this method if the provided configuration is invalid or if there is any other
        error known at this time preventing the module from running.

        @param monitor_config: A dict containing the configuration information for this module instance from the
            configuration file. The only valid values are strings, ints, longs, floats, and booleans.
        @param logger: The logger to use for output.
        @param sample_interval_secs: The interval in seconds to wait between gathering samples.  If None, it will
            set the value from the ``sample_interval`` field in the monitor_config if present, or the default
            interval time for all monitors in ``DEFAULT_SAMPLE_INTERVAL_SECS``.  Generally, you should probably
            pass None here and allow the value to be taken from the configuration files.
        @param global_config: the global configuration object.  Monitors can use or ignore this as necessary
        """
        # The logger instance that this monitor should use to report all information and metric values.
        self._logger = logger
        self.monitor_name = monitor_config['module']

        # save the global config
        self._global_config = global_config

        # The MonitorConfig object created from the config for this monitor instance.
        self._config = MonitorConfig(monitor_config, monitor_module=self.monitor_name)
        log_path = self.monitor_name.split('.')[-1] + '.log'
        self.disabled = False
        # TODO: For now, just leverage the logic in the loggers for naming this monitor.  However,
        # we should have it be more dynamic where the monitor can override it.
        if logger.component.find('monitor:') == 0:
            self.monitor_name = logger.component[8:]
        else:
            self.monitor_name = logger.component
        self.log_config = {
            "path": log_path,
        }
        # This lock protects all variables that can be access by other threads, reported_lines,
        # emitted_lines, and errors.  It does not protect _run_state since that already has its own lock.
        self.__lock = Lock()
        self.__reported_lines = 0
        self.__errors = 0

        # Set the time between samples for this monitor.  We prefer configuration values over the values
        # passed into the constructor.
        if sample_interval_secs is not None:
            self._sample_interval_secs = sample_interval_secs
        else:
            self._sample_interval_secs = self._config.get('sample_interval', convert_to=float,
                                                          default=ScalyrMonitor.DEFAULT_SAMPLE_INTERVAL_SECS)

        self.__metric_log_open = False

        # These variables control the rate limiter on how fast we can write to the metric log.
        # The first one is the average number of bytes that can be written per second.  This is the bucket fill rate
        # in the "leaky bucket" algorithm used to calculate the rate limit.  Derived classes may change this.
        self._log_write_rate = self._config.get('monitor_log_write_rate', convert_to=int, default=2000)
        # This is the maximum size of a write burst to the log.  This is the bucket size in the "leaky bucket" algorithm
        # used to calculate the rate limit.  Derived classes may change this.
        self._log_max_write_burst = self._config.get('monitor_log_max_write_burst', convert_to=int, default=100000)
        # This is the number of seconds between waiting to flush the metric log (if there are pending bytes that
        # need to be flushed to disk).  If this is greater than zero, then it will reduce the amount of disk
        # flushing, but at the cost of possible loss of data if the agent shutdowns down unexpectantly.
        self._log_flush_delay = self._config.get('monitor_log_flush_delay', convert_to=float, default=0.0, min_value=0)

        self._initialize()

        StoppableThread.__init__(self, name='metric thread')
예제 #34
0
    def __init__(self,
                 monitor_config,
                 logger,
                 sample_interval_secs=None,
                 global_config=None):
        """Constructs an instance of the monitor.

        It is optional for derived classes to override this method.  The can instead
        override _initialize which is invoked during initialization.
        TODO:  Determine which approach is preferred by developers and recommend that.

        If a derived class overrides __init__, they must invoke this method in the override method.

        This method will set default values for all public attributes (log_config, disabled, etc).  These
        may be overwritten by the derived class.

        The derived classes must raise an Exception (or something derived from Exception)
        in this method if the provided configuration is invalid or if there is any other
        error known at this time preventing the module from running.

        @param monitor_config: A dict containing the configuration information for this module instance from the
            configuration file. The only valid values are strings, ints, longs, floats, and booleans.
        @param logger: The logger to use for output.
        @param sample_interval_secs: The interval in seconds to wait between gathering samples.  If None, it will
            set the value from the ``sample_interval`` field in the monitor_config if present, or the default
            interval time for all monitors in ``DEFAULT_SAMPLE_INTERVAL_SECS``.  Generally, you should probably
            pass None here and allow the value to be taken from the configuration files.
        @param global_config: the global configuration object.  Monitors can use or ignore this as necessary
        """
        # The logger instance that this monitor should use to report all information and metric values.
        self._logger = logger
        self.monitor_name = monitor_config['module']

        # save the global config
        self._global_config = global_config

        # The MonitorConfig object created from the config for this monitor instance.
        self._config = MonitorConfig(monitor_config,
                                     monitor_module=self.monitor_name)
        log_path = self.monitor_name.split('.')[-1] + '.log'
        self.disabled = False
        # TODO: For now, just leverage the logic in the loggers for naming this monitor.  However,
        # we should have it be more dynamic where the monitor can override it.
        if logger.component.find('monitor:') == 0:
            self.monitor_name = logger.component[8:]
        else:
            self.monitor_name = logger.component
        self.log_config = {
            "path": log_path,
        }
        # This lock protects all variables that can be access by other threads, reported_lines,
        # emitted_lines, and errors.  It does not protect _run_state since that already has its own lock.
        self.__lock = Lock()
        self.__reported_lines = 0
        self.__errors = 0

        # Set the time between samples for this monitor.  We prefer configuration values over the values
        # passed into the constructor.
        if sample_interval_secs is not None:
            self._sample_interval_secs = sample_interval_secs
        else:
            self._sample_interval_secs = self._config.get(
                'sample_interval',
                convert_to=float,
                default=ScalyrMonitor.DEFAULT_SAMPLE_INTERVAL_SECS)

        self.__metric_log_open = False

        # These variables control the rate limiter on how fast we can write to the metric log.
        # The first one is the average number of bytes that can be written per second.  This is the bucket fill rate
        # in the "leaky bucket" algorithm used to calculate the rate limit.  Derived classes may change this.
        self._log_write_rate = self._config.get('monitor_log_write_rate',
                                                convert_to=int,
                                                default=2000)
        # This is the maximum size of a write burst to the log.  This is the bucket size in the "leaky bucket" algorithm
        # used to calculate the rate limit.  Derived classes may change this.
        self._log_max_write_burst = self._config.get(
            'monitor_log_max_write_burst', convert_to=int, default=100000)
        # This is the number of seconds between waiting to flush the metric log (if there are pending bytes that
        # need to be flushed to disk).  If this is greater than zero, then it will reduce the amount of disk
        # flushing, but at the cost of possible loss of data if the agent shutdowns down unexpectantly.
        self._log_flush_delay = self._config.get('monitor_log_flush_delay',
                                                 convert_to=float,
                                                 default=0.0,
                                                 min_value=0)

        # If true, will adjust the sleep time between gather_sample calls by the time spent in gather_sample, rather
        # than sleeping the full sample_interval_secs time.
        self._adjust_sleep_by_gather_time = False
        self._initialize()

        StoppableThread.__init__(self, name='metric thread')
예제 #35
0
 def run(self, result=None):
     _start_thread_watcher_if_necessary()
     StoppableThread.set_name_prefix("TestCase %s: " % six.text_type(self))
     return unittest.TestCase.run(self, result=result)