def encode_object(term): """@param term return an object encoded as string.""" encoded_object = '' if isinstance(term, str): encoded_object = 'string:' + term elif isinstance(term, bytes): encoded_object = 'bytes:' + encode_byte_string_as_string(term) elif isinstance(term, (list, tuple, set)): encoded_object = [encode_object(item) for item in term] elif isinstance(term, dict): encoded_object = {} for key, var in term.items(): if isinstance(key, tuple): key = "tuple:" + str(key) else: key = encode_object(key) var = encode_object(var) encoded_object[key] = var elif isinstance(term, (bool, int, float)) or term is None: encoded_object = term else: msg = 'Unencodeable object %s' % type(term) logging.getLogger(DEBUG_LOG_NAME).error(msg) raise Exception(msg) return encoded_object
def open(self, reopen_flag=False): """ Open the given resource. @param reopen_flag when True, attempt to reopen the same resource and check if it differs from the previously opened one. @raise Exception if valid log_stream_fd was already provided, is still open and reopen_flag is False. @raise OSError when opening failed with unexpected error. @return True if the resource was really opened or False if opening was not yet possible but should be attempted again. """ if not reopen_flag and (self.log_file_fd != -1): msg = 'Cannot reopen stream still open when not instructed to do so' logging.getLogger(DEBUG_LOG_NAME).error(msg) raise Exception(msg) log_file_fd = -1 stat_data = None try: log_file_fd = SecureOSFunctions.secure_open_file( self.log_resource_name[7:], os.O_RDONLY) stat_data = os.fstat(log_file_fd) except OSError as openOsError: msg = 'OSError occurred in FileLogDataResource.open(). Error message: %s' % openOsError logging.getLogger(DEBUG_LOG_NAME).error(msg) if log_file_fd != -1: os.close(log_file_fd) if openOsError.errno == errno.ENOENT: return False raise if not stat.S_ISREG(stat_data.st_mode): os.close(log_file_fd) msg = 'Attempting to open non-regular file %s as file' % encode_byte_string_as_string( self.log_resource_name) print(msg, file=sys.stderr) logging.getLogger(DEBUG_LOG_NAME).error(msg) raise Exception(msg) if reopen_flag and (self.stat_data is not None) and ( stat_data.st_ino == self.stat_data.st_ino) and (stat_data.st_dev == self.stat_data.st_dev): # Reopening was requested, but we would reopen the file already opened, which is of no use. os.close(log_file_fd) return False # This is a new file or a successful reopen attempt. self.log_file_fd = log_file_fd self.stat_data = stat_data return True
def __init__(self, log_resource_name, log_stream_fd, default_buffer_size=1 << 16, repositioning_data=None): """ Create a new file type resource. @param log_resource_name the unique name of this source as bytes array, has to start with "file://" before the file path. @param log_stream_fd the stream for reading the resource or -1 if not yet opened. @param repositioning_data if not None, attempt to position the stream using the given data. """ if not log_resource_name.startswith(b'file://'): msg = 'Attempting to create different type resource as file' logging.getLogger(DEBUG_LOG_NAME).error(msg) raise Exception(msg) self.log_resource_name = log_resource_name self.log_file_fd = log_stream_fd self.stat_data = None if self.log_file_fd >= 0: self.stat_data = os.fstat(log_stream_fd) self.buffer = b'' self.default_buffer_size = default_buffer_size self.total_consumed_length = 0 # Create a hash for repositioning. There is no need to be cryptographically secure here: if upstream can manipulate the content, # to provoke hash collisions, correct positioning would not matter anyway. # skipcq: PTC-W1003 self.repositioning_digest = hashlib.md5() if (log_stream_fd != -1) and (repositioning_data is not None): if repositioning_data[0] != self.stat_data.st_ino: msg = 'Not attempting to reposition on %s, inode number mismatch' % encode_byte_string_as_string( self.log_resource_name) logging.getLogger(DEBUG_LOG_NAME).warning(msg) print(msg, file=sys.stderr) elif repositioning_data[1] > self.stat_data.st_size: msg = 'Not attempting to reposition on %s, file size too small' % encode_byte_string_as_string( self.log_resource_name) logging.getLogger(DEBUG_LOG_NAME).warning(msg) print(msg, file=sys.stderr) else: # skipcq: PTC-W1003 hash_algo = hashlib.md5() length = repositioning_data[1] while length != 0: block = None if length < default_buffer_size: block = os.read(self.log_file_fd, length) else: block = os.read(self.log_file_fd, default_buffer_size) if not block: msg = 'Not attempting to reposition on %s, file shrunk while reading' % encode_byte_string_as_string( self.log_resource_name) logging.getLogger(DEBUG_LOG_NAME).warning(msg) print(msg, file=sys.stderr) break hash_algo.update(block) length -= len(block) digest = hash_algo.digest() if length == 0: if digest == base64.b64decode(repositioning_data[2]): # Repositioning is OK, keep current digest and length data. self.total_consumed_length = repositioning_data[1] self.repositioning_digest = hash_algo else: msg = 'Not attempting to reposition on %s, digest changed' % encode_byte_string_as_string( self.log_resource_name) logging.getLogger(DEBUG_LOG_NAME).warning(msg) print(msg, file=sys.stderr) length = -1 if length != 0: # Repositioning failed, go back to the beginning of the stream. os.lseek(self.log_file_fd, 0, os.SEEK_SET)