def test_name_prefix_with_none(self): StoppableThread.set_name_prefix("test_name_prefix: ") test_thread = StoppableThread(target=self._run_method) self.assertEqual(test_thread.getName(), "test_name_prefix: ") test_thread.start() test_thread.stop() self.assertTrue(self._run_counter > 0)
def test_name_prefix(self): StoppableThread.set_name_prefix("test_name_prefix: ") test_thread = StoppableThread("Testing", self._run_method) self.assertEqual(test_thread.getName(), "test_name_prefix: Testing") test_thread.start() test_thread.stop() self.assertTrue(self._run_counter > 0)
def test_basic_use(self): # Since the ScalyrTestCase sets the name prefix, we need to set it back to None to get an unmolested name. StoppableThread.set_name_prefix(None) test_thread = StoppableThread("Testing", self._run_method) self.assertEqual(test_thread.getName(), "Testing") test_thread.start() test_thread.stop() self.assertTrue(self._run_counter > 0)
def test_exception(self): class TestException(Exception): pass def throw_an_exception(run_state): run_state.is_running() raise TestException() test_thread = StoppableThread('Testing', throw_an_exception) test_thread.start() caught_it = False try: test_thread.stop() except TestException: caught_it = True self.assertTrue(caught_it)
def test_exception(self): class TestException(Exception): pass def throw_an_exception(run_state): run_state.is_running() raise TestException() test_thread = StoppableThread("Testing", throw_an_exception) test_thread.start() caught_it = False try: test_thread.stop() except TestException: caught_it = True self.assertTrue(caught_it)
def test_basic_use(self): test_thread = StoppableThread('Testing', self._run_method) test_thread.start() test_thread.stop() self.assertTrue(self._run_counter > 0)
class DockerLogger(object): def __init__(self, socket_file, cid, name, stream, log_path, config, last_request=None, max_log_size=20 * 1024 * 1024, max_log_rotations=2): self.__socket_file = socket_file self.cid = cid self.name = name self.stream = stream self.log_path = log_path self.stream_name = name + "-" + stream self.__max_previous_lines = config.get('max_previous_lines') self.__log_timestamps = config.get('log_timestamps') self.__last_request_lock = threading.Lock() self.__last_request = time.time() if last_request: self.__last_request = last_request self.__logger = logging.Logger(cid + '.' + stream) self.__log_handler = logging.handlers.RotatingFileHandler( filename=log_path, maxBytes=max_log_size, backupCount=max_log_rotations) formatter = logging.Formatter() self.__log_handler.setFormatter(formatter) self.__logger.addHandler(self.__log_handler) self.__logger.setLevel(logging.INFO) self.__thread = StoppableThread( target=self.process_request, name="Docker monitor logging thread for %s" % (name + '.' + stream)) def start(self): self.__thread.start() def stop(self, wait_on_join=True, join_timeout=5): self.__thread.stop(wait_on_join=wait_on_join, join_timeout=join_timeout) def last_request(self): self.__last_request_lock.acquire() result = self.__last_request self.__last_request_lock.release() return result def process_request(self, run_state): request = DockerRequest(self.__socket_file) request.get('/containers/%s/logs?%s=1&follow=1&tail=%d×tamps=1' % (self.cid, self.stream, self.__max_previous_lines)) epoch = datetime.datetime.utcfromtimestamp(0) while run_state.is_running(): line = request.readline() while line: line = self.strip_docker_header(line) dt, log_line = self.split_datetime_from_line(line) if not dt: global_log.error('No timestamp found on line: \'%s\'', line) else: timestamp = scalyr_util.seconds_since_epoch(dt, epoch) #see if we log the entire line including timestamps if self.__log_timestamps: log_line = line #check to make sure timestamp is >= to the last request #Note: we can safely read last_request here because we are the only writer if timestamp >= self.__last_request: self.__logger.info(log_line.strip()) #but we need to lock for writing self.__last_request_lock.acquire() self.__last_request = timestamp self.__last_request_lock.release() line = request.readline() run_state.sleep_but_awaken_if_stopped(0.1) # we are shutting down, so update our last request to be slightly later than it's current # value to prevent duplicate logs when starting up again. self.__last_request_lock.acquire() #can't be any smaller than 0.01 because the time value is only saved to 2 decimal places #on disk self.__last_request += 0.01 self.__last_request_lock.release() def strip_docker_header(self, line): """Docker prepends some lines with an 8 byte header. The first 4 bytes a byte containg the stream id 0, 1 or 2 for stdin, stdout and stderr respectively, followed by 3 bytes of padding. The next 4 bytes contain the size of the message. This function checks for the existence of the the 8 byte header and if the length field matches the remaining length of the line then it strips the first 8 bytes. If the lengths don't match or if an expected stream type is not found then the line is left alone """ # the docker header has a stream id, which is a single byte, followed by 3 bytes of padding # and then a 4-byte int in big-endian (network) order fmt = '>B3xI' size = struct.calcsize(fmt) # make sure the length of the line has as least as many bytes required by the header if len(line) >= size: stream, length = struct.unpack(fmt, line[0:size]) # We expect a value of 0, 1 or 2 for stream. Anything else indicates we don't have # a docker header # We also expect length to be the length of the remaining line if stream in [0, 1, 2] and len(line[size:]) == length: #We have a valid docker header, so strip it line = line[size:] return line def split_datetime_from_line(self, line): """Docker timestamps are in RFC3339 format: 2015-08-03T09:12:43.143757463Z, with everything up to the first space being the timestamp. """ log_line = line dt = datetime.datetime.utcnow() pos = line.find(' ') if pos > 0: dt = scalyr_util.rfc3339_to_datetime(line[0:pos]) log_line = line[pos + 1:] return (dt, log_line)
class DockerLogger( object ): def __init__( self, socket_file, cid, name, stream, log_path, config, last_request=None, max_log_size=20*1024*1024, max_log_rotations=2 ): self.__socket_file = socket_file self.cid = cid self.name = name self.stream = stream self.log_path = log_path self.stream_name = name + "-" + stream self.__max_previous_lines = config.get( 'max_previous_lines' ) self.__log_timestamps = config.get( 'log_timestamps' ) self.__last_request_lock = threading.Lock() self.__last_request = time.time() if last_request: self.__last_request = last_request self.__logger = logging.Logger( cid + '.' + stream ) self.__log_handler = logging.handlers.RotatingFileHandler( filename = log_path, maxBytes = max_log_size, backupCount = max_log_rotations ) formatter = logging.Formatter() self.__log_handler.setFormatter( formatter ) self.__logger.addHandler( self.__log_handler ) self.__logger.setLevel( logging.INFO ) self.__thread = StoppableThread( target=self.process_request, name="Docker monitor logging thread for %s" % (name + '.' + stream) ) def start( self ): self.__thread.start() def stop( self, wait_on_join=True, join_timeout=5 ): self.__thread.stop( wait_on_join=wait_on_join, join_timeout=join_timeout ) def last_request( self ): self.__last_request_lock.acquire() result = self.__last_request self.__last_request_lock.release() return result def process_request( self, run_state ): request = DockerRequest( self.__socket_file ) request.get( '/containers/%s/logs?%s=1&follow=1&tail=%d×tamps=1' % (self.cid, self.stream, self.__max_previous_lines) ) epoch = datetime.datetime.utcfromtimestamp( 0 ) while run_state.is_running(): line = request.readline() while line: line = self.strip_docker_header( line ) dt, log_line = self.split_datetime_from_line( line ) if not dt: global_log.error( 'No timestamp found on line: \'%s\'', line ) else: timestamp = scalyr_util.seconds_since_epoch( dt, epoch ) #see if we log the entire line including timestamps if self.__log_timestamps: log_line = line #check to make sure timestamp is >= to the last request #Note: we can safely read last_request here because we are the only writer if timestamp >= self.__last_request: self.__logger.info( log_line.strip() ) #but we need to lock for writing self.__last_request_lock.acquire() self.__last_request = timestamp self.__last_request_lock.release() line = request.readline() run_state.sleep_but_awaken_if_stopped( 0.1 ) # we are shutting down, so update our last request to be slightly later than it's current # value to prevent duplicate logs when starting up again. self.__last_request_lock.acquire() #can't be any smaller than 0.01 because the time value is only saved to 2 decimal places #on disk self.__last_request += 0.01 self.__last_request_lock.release() def strip_docker_header( self, line ): """Docker prepends some lines with an 8 byte header. The first 4 bytes a byte containg the stream id 0, 1 or 2 for stdin, stdout and stderr respectively, followed by 3 bytes of padding. The next 4 bytes contain the size of the message. This function checks for the existence of the the 8 byte header and if the length field matches the remaining length of the line then it strips the first 8 bytes. If the lengths don't match or if an expected stream type is not found then the line is left alone """ # the docker header has a stream id, which is a single byte, followed by 3 bytes of padding # and then a 4-byte int in big-endian (network) order fmt = '>B3xI' size = struct.calcsize( fmt ) # make sure the length of the line has as least as many bytes required by the header if len( line ) >= size: stream, length = struct.unpack( fmt, line[0:size] ) # We expect a value of 0, 1 or 2 for stream. Anything else indicates we don't have # a docker header # We also expect length to be the length of the remaining line if stream in [ 0, 1, 2 ] and len( line[size:] ) == length: #We have a valid docker header, so strip it line = line[size:] return line def split_datetime_from_line( self, line ): """Docker timestamps are in RFC3339 format: 2015-08-03T09:12:43.143757463Z, with everything up to the first space being the timestamp. """ log_line = line dt = datetime.datetime.utcnow() pos = line.find( ' ' ) if pos > 0: dt = scalyr_util.rfc3339_to_datetime( line[0:pos] ) log_line = line[pos+1:] return (dt, log_line)
class DockerLogger(object): """Abstraction for logging either stdout or stderr from a given container Logging is performed on a separate thread because each log is read from a continuous stream over the docker socket. """ def __init__(self, socket_file, cid, name, stream, log_path, config, last_request=None, max_log_size=20 * 1024 * 1024, max_log_rotations=2): self.__socket_file = socket_file self.cid = cid self.name = name #stderr or stdout self.stream = stream self.log_path = log_path self.stream_name = name + "-" + stream self.__max_previous_lines = config.get('max_previous_lines') self.__log_timestamps = True # Note: always log timestamps for now. config.get( 'log_timestamps' ) self.__docker_api_version = config.get('docker_api_version') self.__last_request_lock = threading.Lock() self.__last_request = time.time() if last_request: self.__last_request = last_request self.__logger = logging.Logger(cid + '.' + stream) self.__log_handler = logging.handlers.RotatingFileHandler( filename=log_path, maxBytes=max_log_size, backupCount=max_log_rotations) formatter = logging.Formatter() self.__log_handler.setFormatter(formatter) self.__logger.addHandler(self.__log_handler) self.__logger.setLevel(logging.INFO) self.__client = None self.__logs = None self.__thread = StoppableThread( target=self.process_request, name="Docker monitor logging thread for %s" % (name + '.' + stream)) def start(self): self.__thread.start() def stop(self, wait_on_join=True, join_timeout=5): if self.__client and self.__logs and self.__logs.response: sock = self.__client._get_raw_response_socket(self.__logs.response) if sock: sock.shutdown(socket.SHUT_RDWR) self.__thread.stop(wait_on_join=wait_on_join, join_timeout=join_timeout) def last_request(self): self.__last_request_lock.acquire() result = self.__last_request self.__last_request_lock.release() return result def process_request(self, run_state): """This function makes a log request on the docker socket for a given container and continues to read from the socket until the connection is closed """ try: # random delay to prevent all requests from starting at the same time delay = random.randint(500, 5000) / 1000 run_state.sleep_but_awaken_if_stopped(delay) self.__logger.log( scalyr_logging.DEBUG_LEVEL_3, 'Starting to retrieve logs for cid=%s' % str(self.cid)) self.__client = DockerClient(base_url=('unix:/%s' % self.__socket_file), version=self.__docker_api_version) epoch = datetime.datetime.utcfromtimestamp(0) while run_state.is_running(): self.__logger.log( scalyr_logging.DEBUG_LEVEL_3, 'Attempting to retrieve logs for cid=%s' % str(self.cid)) sout = False serr = False if self.stream == 'stdout': sout = True else: serr = True self.__logs = self.__client.logs( container=self.cid, stdout=sout, stderr=serr, stream=True, timestamps=True, tail=self.__max_previous_lines, follow=True) # self.__logs is a generator so don't call len( self.__logs ) self.__logger.log( scalyr_logging.DEBUG_LEVEL_3, 'Found log lines for cid=%s' % (str(self.cid))) try: for line in self.__logs: #split the docker timestamp from the frest of the line dt, log_line = _split_datetime_from_line(line) if not dt: global_log.error( 'No timestamp found on line: \'%s\'', line) else: timestamp = scalyr_util.seconds_since_epoch( dt, epoch) #see if we log the entire line including timestamps if self.__log_timestamps: log_line = line #check to make sure timestamp is >= to the last request #Note: we can safely read last_request here because we are the only writer if timestamp >= self.__last_request: self.__logger.info(log_line.strip()) #but we need to lock for writing self.__last_request_lock.acquire() self.__last_request = timestamp self.__last_request_lock.release() if not run_state.is_running(): self.__logger.log( scalyr_logging.DEBUG_LEVEL_3, 'Exiting out of container log for cid=%s' % str(self.cid)) break except ProtocolError, e: if run_state.is_running(): global_log.warning( "Stream closed due to protocol error: %s" % str(e)) if run_state.is_running(): global_log.warning( "Log stream has been closed for '%s'. Check docker.log on the host for possible errors. Attempting to reconnect, some logs may be lost" % (self.name), limit_once_per_x_secs=300, limit_key='stream-closed-%s' % self.name) delay = random.randint(500, 3000) / 1000 run_state.sleep_but_awaken_if_stopped(delay) # we are shutting down, so update our last request to be slightly later than it's current # value to prevent duplicate logs when starting up again. self.__last_request_lock.acquire() #can't be any smaller than 0.01 because the time value is only saved to 2 decimal places #on disk self.__last_request += 0.01 self.__last_request_lock.release() except Exception, e: global_log.warn( 'Unhandled exception in DockerLogger.process_request for %s:\n\t%s' % (self.name, str(e)))
class ContainerChecker(StoppableThread): """ Monitors containers to check when they start and stop running. """ def __init__(self, config, logger, socket_file, docker_api_version, host_hostname, data_path, log_path): self._config = config self._logger = logger self.__delay = self._config.get('container_check_interval') self.__log_prefix = self._config.get('docker_log_prefix') name = self._config.get('container_name') self.__socket_file = socket_file self.__docker_api_version = docker_api_version self.__client = DockerClient(base_url=('unix:/%s' % self.__socket_file), version=self.__docker_api_version) self.container_id = self.__get_scalyr_container_id(self.__client, name) self.__checkpoint_file = os.path.join(data_path, "docker-checkpoints.json") self.__log_path = log_path self.__host_hostname = host_hostname self.__readback_buffer_size = self._config.get('readback_buffer_size') self.containers = {} self.__checkpoints = {} self.__log_watcher = None self.__module = None self.__start_time = time.time() self.__thread = StoppableThread(target=self.check_containers, name="Container Checker") def start(self): self.__load_checkpoints() self.containers = self.get_running_containers(self.__client) # if querying the docker api fails, set the container list to empty if self.containers == None: self.containers = {} self.docker_logs = self.__get_docker_logs(self.containers) self.docker_loggers = [] #create and start the DockerLoggers self.__start_docker_logs(self.docker_logs) self._logger.log( scalyr_logging.DEBUG_LEVEL_1, "Initialization complete. Starting docker monitor for Scalyr") self.__thread.start() def stop(self, wait_on_join=True, join_timeout=5): self.__thread.stop(wait_on_join=wait_on_join, join_timeout=join_timeout) #stop the DockerLoggers for logger in self.docker_loggers: if self.__log_watcher: self.__log_watcher.remove_log_path(self.__module, logger.log_path) logger.stop(wait_on_join, join_timeout) self._logger.log(scalyr_logging.DEBUG_LEVEL_1, "Stopping %s - %s" % (logger.name, logger.stream)) self.__update_checkpoints() def check_containers(self, run_state): while run_state.is_running(): self.__update_checkpoints() self._logger.log(scalyr_logging.DEBUG_LEVEL_2, 'Attempting to retrieve list of containers') running_containers = self.get_running_containers(self.__client) # if running_containers is None, that means querying the docker api failed. # rather than resetting the list of running containers to empty # continue using the previous list of containers if running_containers == None: self._logger.log(scalyr_logging.DEBUG_LEVEL_2, 'Failed to get list of containers') running_containers = self.containers self._logger.log(scalyr_logging.DEBUG_LEVEL_2, 'Found %d containers' % len(running_containers)) #get the containers that have started since the last sample starting = {} for cid, info in running_containers.iteritems(): if cid not in self.containers: self._logger.log( scalyr_logging.DEBUG_LEVEL_1, "Starting logger for container '%s'" % info['name']) starting[cid] = info #get the containers that have stopped stopping = {} for cid, info in self.containers.iteritems(): if cid not in running_containers: self._logger.log( scalyr_logging.DEBUG_LEVEL_1, "Stopping logger for container '%s'" % info['name']) stopping[cid] = info #stop the old loggers self.__stop_loggers(stopping) #update the list of running containers #do this before starting new ones, as starting up new ones #will access self.containers self.containers = running_containers #start the new ones self.__start_loggers(starting) run_state.sleep_but_awaken_if_stopped(self.__delay) def set_log_watcher(self, log_watcher, module): self.__log_watcher = log_watcher self.__module = module def __get_scalyr_container_id(self, client, name): """Gets the container id of the scalyr-agent container If the config option container_name is empty, then it is assumed that the scalyr agent is running on the host and not in a container and None is returned. """ result = None regex = None if name is not None: regex = re.compile(name) # get all the containers containers = client.containers() for container in containers: # see if we are checking on names if name is not None: # if so, loop over all container names for this container # Note: containers should only have one name, but the 'Names' field # is a list, so iterate over it just in case for cname in container['Names']: cname = cname.lstrip('/') # check if the name regex matches m = regex.match(cname) if m: result = container['Id'] break # not checking container name, so check the Command instead to see if it's the agent else: if container['Command'].startswith('/usr/sbin/scalyr-agent-2'): result = container['Id'] if result: break if not result: # only raise an exception if we were looking for a specific name but couldn't find it if name is not None: raise Exception( "Unable to find a matching container id for container '%s'. Please make sure that a " "container matching the regular expression '%s' is running." % (name, name)) return result def __update_checkpoints(self): """Update the checkpoints for when each docker logger logged a request, and save the checkpoints to file. """ for logger in self.docker_loggers: last_request = logger.last_request() self.__checkpoints[logger.stream_name] = last_request # save to disk if self.__checkpoints: tmp_file = self.__checkpoint_file + '~' scalyr_util.atomic_write_dict_as_json_file(self.__checkpoint_file, tmp_file, self.__checkpoints) def __load_checkpoints(self): try: checkpoints = scalyr_util.read_file_as_json(self.__checkpoint_file) except: self._logger.info( "No checkpoint file '%s' exists.\n\tAll logs will be read starting from their current end.", self.__checkpoint_file) checkpoints = {} if checkpoints: for name, last_request in checkpoints.iteritems(): self.__checkpoints[name] = last_request def get_running_containers(self, client, ignore_self=True): """Gets a dict of running containers that maps container id to container name """ result = {} try: response = client.containers() for container in response: cid = container['Id'] if ignore_self and cid == self.container_id: continue if len(container['Names']) > 0: name = container['Names'][0].lstrip('/') result[cid] = {'name': name} else: result[cid] = cid except: # container querying failed global_log.warning("Error querying running containers", limit_once_per_x_secs=300, limit_key='docker-api-running-containers') result = None return result def __stop_loggers(self, stopping): """ Stops any DockerLoggers in the 'stopping' dict @param: stopping - a dict of container ids => container names. Any running containers that have the same container-id as a key in the dict will be stopped. """ if stopping: self._logger.log(scalyr_logging.DEBUG_LEVEL_2, 'Stopping all docker loggers') for logger in self.docker_loggers: if logger.cid in stopping: logger.stop(wait_on_join=True, join_timeout=1) if self.__log_watcher: self.__log_watcher.remove_log_path( self.__module, logger.log_path) self.docker_loggers[:] = [ l for l in self.docker_loggers if l.cid not in stopping ] self.docker_logs[:] = [ l for l in self.docker_logs if l['cid'] not in stopping ] def __start_loggers(self, starting): """ Starts a list of DockerLoggers @param: starting - a list of DockerLoggers to start """ if starting: self._logger.log(scalyr_logging.DEBUG_LEVEL_2, 'Starting all docker loggers') docker_logs = self.__get_docker_logs(starting) self.__start_docker_logs(docker_logs) self.docker_logs.extend(docker_logs) def __start_docker_logs(self, docker_logs): for log in self.docker_logs: last_request = self.__get_last_request_for_log( log['log_config']['path']) if self.__log_watcher: log['log_config'] = self.__log_watcher.add_log_config( self.__module, log['log_config']) self.docker_loggers.append( self.__create_docker_logger(log, last_request)) def __get_last_request_for_log(self, path): result = datetime.datetime.fromtimestamp(self.__start_time) try: full_path = os.path.join(self.__log_path, path) fp = open(full_path, 'r', self.__readback_buffer_size) # seek readback buffer bytes from the end of the file fp.seek(0, os.SEEK_END) size = fp.tell() if size < self.__readback_buffer_size: fp.seek(0, os.SEEK_SET) else: fp.seek(size - self.__readback_buffer_size, os.SEEK_SET) first = True for line in fp: # ignore the first line because it likely started somewhere randomly # in the line if first: first = False continue dt, _ = _split_datetime_from_line(line) if dt: result = dt fp.close() except Exception, e: global_log.info("%s", str(e)) return scalyr_util.seconds_since_epoch(result)