class Script(six.with_metaclass(ABCMeta, object)): """An abstract base class for implementing modular inputs. Subclasses should override ``get_scheme``, ``stream_events``, and optionally ``validate_input`` if the modular input uses external validation. The ``run`` function is used to run modular inputs; it typically should not be overridden. """ def __init__(self): self._input_definition = None self._service = None def run(self, args): """Runs this modular input :param args: List of command line arguments passed to this script. :returns: An integer to be used as the exit value of this program. """ # call the run_script function, which handles the specifics of running # a modular input return self.run_script(args, EventWriter(), sys.stdin) def run_script(self, args, event_writer, input_stream): """Handles all the specifics of running a modular input :param args: List of command line arguments passed to this script. :param event_writer: An ``EventWriter`` object for writing events. :param input_stream: An input stream for reading inputs. :returns: An integer to be used as the exit value of this program. """ try: if len(args) == 1: # This script is running as an input. Input definitions will be # passed on stdin as XML, and the script will write events on # stdout and log entries on stderr. self._input_definition = InputDefinition.parse(input_stream) self.stream_events(self._input_definition, event_writer) event_writer.close() return 0 elif str(args[1]).lower() == "--scheme": # Splunk has requested XML specifying the scheme for this # modular input Return it and exit. scheme = self.get_scheme() if scheme is None: event_writer.log( EventWriter.FATAL, "Modular input script returned a null scheme.") return 1 else: event_writer.write_xml_document(scheme.to_xml()) return 0 elif args[1].lower() == "--validate-arguments": validation_definition = ValidationDefinition.parse( input_stream) try: self.validate_input(validation_definition) return 0 except Exception as e: root = ET.Element("error") ET.SubElement(root, "message").text = str(e) event_writer.write_xml_document(root) return 1 else: err_string = "ERROR Invalid arguments to modular input script:" + ' '.join( args) event_writer._err.write(err_string) return 1 except Exception as e: err_string = EventWriter.ERROR + str(e) event_writer._err.write(err_string) return 1 @property def service(self): """ Returns a Splunk service object for this script invocation. The service object is created from the Splunkd URI and session key passed to the command invocation on the modular input stream. It is available as soon as the :code:`Script.stream_events` method is called. :return: :class:splunklib.client.Service. A value of None is returned, if you call this method before the :code:`Script.stream_events` method is called. """ if self._service is not None: return self._service if self._input_definition is None: return None splunkd_uri = self._input_definition.metadata["server_uri"] session_key = self._input_definition.metadata["session_key"] splunkd = urlsplit(splunkd_uri, allow_fragments=False) self._service = Service( scheme=splunkd.scheme, host=splunkd.hostname, port=splunkd.port, token=session_key, ) return self._service @abstractmethod def get_scheme(self): """The scheme defines the parameters understood by this modular input. :return: a ``Scheme`` object representing the parameters for this modular input. """ def validate_input(self, definition): """Handles external validation for modular input kinds. When Splunk calls a modular input script in validation mode, it will pass in an XML document giving information about the Splunk instance (so you can call back into it if needed) and the name and parameters of the proposed input. If this function does not throw an exception, the validation is assumed to succeed. Otherwise any errors thrown will be turned into a string and logged back to Splunk. The default implementation always passes. :param definition: The parameters for the proposed input passed by splunkd. """ pass @abstractmethod def stream_events(self, inputs, ew): """The method called to stream events into Splunk. It should do all of its output via
class Checkpointer(with_metaclass(ABCMeta, object)): '''Base class of checkpointer. ''' @abstractmethod def update(self, key, state): '''Update checkpoint. :param key: Checkpoint key. :type key: ``string`` :param state: Checkpoint state. :type state: ``json object`` Usage:: >>> from solnlib.modular_input import checkpointer >>> ck = checkpointer.KVStoreCheckpointer(session_key, 'Splunk_TA_test') >>> ck.update('checkpoint_name1', {'k1': 'v1', 'k2': 'v2'}) >>> ck.update('checkpoint_name2', 'checkpoint_value2') ''' pass @abstractmethod def batch_update(self, states): '''Batch update checkpoint. :param states: List of checkpoint. Each state in the list is a json object which should contain '_key' and 'state' keys. For instance: { '_key': ckpt key which is a string, 'state': ckpt which is a json object } :type states: ``list`` Usage:: >>> from solnlib.modular_input import checkpointer >>> ck = checkpointer.KVStoreCheckpointer(session_key, 'Splunk_TA_test') >>> ck.batch_update([{'_key': 'checkpoint_name1', 'state': {'k1': 'v1', 'k2': 'v2'}}, {'_key': 'checkpoint_name2', 'state': 'checkpoint_value2'}, {...}]) ''' pass @abstractmethod def get(self, key): '''Get checkpoint. :param key: Checkpoint key. :type key: ``string`` :returns: Checkpoint state if exists else None. :rtype: ``json object`` Usage:: >>> from solnlib.modular_input import checkpointer >>> ck = checkpointer.KVStoreCheckpointer(session_key, 'Splunk_TA_test') >>> ck.get('checkpoint_name1') >>> returns: {'k1': 'v1', 'k2': 'v2'} ''' pass @abstractmethod def delete(self, key): '''Delete checkpoint. :param key: Checkpoint key. :type key: ``string`` Usage:: >>> from solnlib.modular_input import checkpointer >>> ck = checkpointer.KVStoreCheckpointer(session_key, 'Splunk_TA_test') >>> ck.delete('checkpoint_name1') ''' pass
class ModularInput(with_metaclass(ABCMeta, object)): '''Base class of Splunk modular input. It's a base modular input, it should be inherited by sub modular input. For sub modular input, properties: 'app', 'name', 'title' and 'description' must be overriden, also there are some other optional properties can be overriden like: 'use_external_validation', 'use_single_instance', 'use_kvstore_checkpointer' and 'use_hec_event_writer'. Notes: If you set 'KVStoreCheckpointer' or 'use_hec_event_writer' to True, you must override the corresponding 'kvstore_checkpointer_collection_name' and 'hec_input_name'. Usage:: >>> Class TestModularInput(ModularInput): >>> app = 'TestApp' >>> name = 'test_modular_input' >>> title = 'Test modular input' >>> description = 'This is a test modular input' >>> use_external_validation = True >>> use_single_instance = False >>> use_kvstore_checkpointer = True >>> kvstore_checkpointer_collection_name = 'TestCheckpoint' >>> use_hec_event_writer = True >>> hec_input_name = 'TestEventWriter' >>> >>> def extra_arguments(self): >>> ... .. . >>> >>> def do_validation(self, parameters): >>> ... .. . >>> >>> def do_run(self, inputs): >>> ... .. . >>> >>> if __name__ == '__main__': >>> md = TestModularInput() >>> md.execute() ''' # App name, must be overriden app = None # Modular input name, must be overriden name = None # Modular input scheme title, must be overriden title = None # Modular input scheme description, must be overriden description = None # Modular input scheme use external validation, default is False use_external_validation = False # Modular input scheme use single instance mode, default is False use_single_instance = False # Use kvstore as checkpointer, default is True use_kvstore_checkpointer = True # Collection name of kvstore checkpointer, must be overriden if # use_kvstore_checkpointer is True kvstore_checkpointer_collection_name = None # Use hec event writer use_hec_event_writer = True # Input name of Splunk HEC, must be overriden if use_hec_event_writer # is True hec_input_name = None def __init__(self): # Validate properties self._validate_properties() # Modular input state self.should_exit = False # Metadata self.server_host_name = None self.server_uri = None self.server_scheme = None self.server_host = None self.server_port = None self.session_key = None # Modular input config name self.config_name = None # Checkpoint dir self._checkpoint_dir = None # Checkpointer self._checkpointer = None # Orphan process monitor self._orphan_monitor = None # Event writer self._event_writer = None def _validate_properties(self): if not all([self.app, self.name, self.title, self.description]): raise ModularInputException( 'Attributes: "app", "name", "title", "description" must ' 'be overriden.') if self.use_kvstore_checkpointer: if self.kvstore_checkpointer_collection_name is None: raise ModularInputException( 'Attribute: "kvstore_checkpointer_collection_name" must' 'be overriden if "use_kvstore_checkpointer" is True".') elif self.kvstore_checkpointer_collection_name.strip() == '': raise ModularInputException( 'Attribute: "kvstore_checkpointer_collection_name" can' ' not be empty.') if self.use_hec_event_writer: if self.hec_input_name is None: raise ModularInputException( 'Attribute: "hec_input_name" must be overriden ' 'if "use_hec_event_writer" is True.') elif self.hec_input_name.strip() == '': raise ModularInputException( 'Attribute: "hec_input_name" can not be empty.') @property def checkpointer(self): '''Get checkpointer object. The checkpointer returned depends on use_kvstore_checkpointer flag, if use_kvstore_checkpointer is true will return an KVStoreCheckpointer object else an FileCheckpointer object. :returns: An checkpointer object. :rtype: ``Checkpointer object`` ''' if self._checkpointer is not None: return self._checkpoint_dir self._checkpointer = self._create_checkpointer() return self._checkpointer def _create_checkpointer(self): if self.use_kvstore_checkpointer: checkpointer_name = ':'.join([ self.app, self.config_name, self.kvstore_checkpointer_collection_name ]) try: return checkpointer.KVStoreCheckpointer( checkpointer_name, self.session_key, self.app, owner='nobody', scheme=self.server_scheme, host=self.server_host, port=self.server_port) except binding.HTTPError as e: logging.error('Failed to init kvstore checkpointer: %s.', traceback.format_exc()) raise else: return checkpointer.FileCheckpointer(self._checkpoint_dir) @property def event_writer(self): '''Get event writer object. The event writer returned depends on use_hec_event_writer flag, if use_hec_event_writer is true will return an HECEventWriter object else an ClassicEventWriter object. :returns: Event writer object. :rtype: ``EventWriter object`` ''' if self._event_writer is not None: return self._event_writer self._event_writer = self._create_event_writer() return self._event_writer def _create_event_writer(self): if self.use_hec_event_writer: hec_input_name = ':'.join([self.app, self.hec_input_name]) try: return event_writer.HECEventWriter(hec_input_name, self.session_key, scheme=self.server_scheme, host=self.server_host, port=self.server_port) except binding.HTTPError as e: logging.error('Failed to init HECEventWriter: %s.', traceback.format_exc()) raise else: return event_writer.ClassicEventWriter() def _update_metadata(self, metadata): self.server_host_name = metadata['server_host'] splunkd = urlparse.urlsplit(metadata['server_uri']) self.server_uri = splunkd.geturl() self.server_scheme = splunkd.scheme self.server_host = splunkd.hostname self.server_port = splunkd.port self.session_key = metadata['session_key'] self._checkpoint_dir = metadata['checkpoint_dir'] def _do_scheme(self): scheme = Scheme(self.title) scheme.description = self.description scheme.use_external_validation = self.use_external_validation scheme.streaming_mode = Scheme.streaming_mode_xml scheme.use_single_instance = self.use_single_instance for argument in self.extra_arguments(): name = argument['name'] title = argument.get('title', None) description = argument.get('description', None) validation = argument.get('validation', None) data_type = argument.get('data_type', Argument.data_type_string) required_on_edit = argument.get('required_on_edit', False) required_on_create = argument.get('required_on_create', False) scheme.add_argument( Argument(name, title=title, description=description, validation=validation, data_type=data_type, required_on_edit=required_on_edit, required_on_create=required_on_create)) return ET.tostring(scheme.to_xml(), encoding=SCHEME_ENCODING) def extra_arguments(self): '''Extra arguments for modular input. Default implementation is returning an empty list. :returns: List of arguments like: [{'name': 'arg1', 'title': 'arg1 title', 'description': 'arg1 description', 'validation': 'arg1 validation statement', 'data_type': Argument.data_type_string, 'required_on_edit': False, 'required_on_create': False}, {...}, {...}] :rtype: ``list`` ''' return [] def do_validation(self, parameters): '''Handles external validation for modular input kinds. When Splunk calls a modular input script in validation mode, it will pass in an XML document giving information about the Splunk instance (so you can call back into it if needed) and the name and parameters of the proposed input. If this function does not throw an exception, the validation is assumed to succeed. Otherwise any errors thrown will be turned into a string and logged back to Splunk. :param parameters: The parameters of input passed by splunkd. :raises Exception: If validation is failed. ''' pass @abstractmethod def do_run(self, inputs): '''Runs this modular input :param inputs: Command line arguments passed to this modular input. For single instance mode, inputs like: { 'stanza_name1': {'arg1': 'arg1_value', 'arg2': 'arg2_value', ...} 'stanza_name2': {'arg1': 'arg1_value', 'arg2': 'arg2_value', ...} 'stanza_name3': {'arg1': 'arg1_value', 'arg2': 'arg2_value', ...} }. For multile instance mode, inputs like: { 'stanza_name1': {'arg1': 'arg1_value', 'arg2': 'arg2_value', ...} }. :type inputs: ``dict`` ''' pass def register_teardown_handler(self, handler, *args): '''Register teardown signal handler. :param handler: Teardown signal handler. Usage:: >>> mi = ModularInput(...) >>> def teardown_handler(arg1, arg2, ...): >>> ... >>> mi.register_teardown_handler(teardown_handler, arg1, arg2, ...) ''' def _teardown_handler(signum, frame): handler(*args) utils.handle_teardown_signals(_teardown_handler) def register_orphan_handler(self, handler, *args): '''Register orphan process handler. :param handler: Orphan process handler. Usage:: >>> mi = ModularInput(...) >>> def orphan_handler(arg1, arg2, ...): >>> ... >>> mi.register_orphan_handler(orphan_handler, arg1, arg2, ...) ''' def _orphan_handler(): handler(*args) if self._orphan_monitor is None: self._orphan_monitor = OrphanProcessMonitor(_orphan_handler) self._orphan_monitor.start() def get_validation_definition(self): '''Get validation definition. This method can be overwritten to get validation definition from other input instead `stdin`. :returns: A dict object must contains `metadata` and `parameters`, example: { 'metadata': { 'session_key': 'iCKPS0cvmpyeJk...sdaf', 'server_host': 'test-test.com', 'server_uri': 'https://127.0.0.1:8089', 'checkpoint_dir': '/tmp' }, parameters: {'args1': value1, 'args2': value2} } :rtype: ``dict`` ''' validation_definition = ValidationDefinition.parse(sys.stdin) return { 'metadata': validation_definition.metadata, 'parameters': validation_definition.parameters } def get_input_definition(self): '''Get input definition. This method can be overwritten to get input definition from other input instead `stdin`. :returns: A dict object must contains `metadata` and `inputs`, example: { 'metadata': { 'session_key': 'iCKPS0cvmpyeJk...sdaf', 'server_host': 'test-test.com', 'server_uri': 'https://127.0.0.1:8089', 'checkpoint_dir': '/tmp' }, inputs: { 'stanza1': {'arg1': value1, 'arg2': value2}, 'stanza2': {'arg1': value1, 'arg2': value2} } } :rtype: ``dict`` ''' input_definition = InputDefinition.parse(sys.stdin) return { 'metadata': input_definition.metadata, 'inputs': input_definition.inputs } def execute(self): '''Modular input entry. Usage:: >>> Class TestModularInput(ModularInput): >>> ... .. . >>> >>> if __name__ == '__main__': >>> md = TestModularInput() >>> md.execute() ''' if len(sys.argv) == 1: try: input_definition = self.get_input_definition() self._update_metadata(input_definition['metadata']) if self.use_single_instance: self.config_name = self.name else: self.config_name = list( input_definition['inputs'].keys())[0] self.do_run(input_definition['inputs']) logging.info('Modular input: %s exit normally.', self.name) return 0 except Exception as e: logging.error('Modular input: %s exit with exception: %s.', self.name, traceback.format_exc()) return 1 finally: # Stop orphan monitor if any if self._orphan_monitor: self._orphan_monitor.stop() elif str(sys.argv[1]).lower() == '--scheme': sys.stdout.write(self._do_scheme()) sys.stdout.flush() return 0 elif sys.argv[1].lower() == '--validate-arguments': try: validation_definition = self.get_validation_definition() self._update_metadata(validation_definition['metadata']) self.do_validation(validation_definition['parameters']) return 0 except Exception as e: logging.error( 'Modular input: %s validate arguments with exception: %s.', self.name, traceback.format_exc()) root = ET.Element('error') ET.SubElement(root, 'message').text = str(e) sys.stderr.write(ET.tostring(root)) sys.stderr.flush() return 1 else: logging.error( 'Modular input: %s run with invalid arguments: "%s".', self.name, ' '.join(sys.argv[1:])) return 1
class Logs(with_metaclass(Singleton, object)): '''A singleton class that manage all kinds of logger. Usage:: >>> from solnlib.import log >>> log.Logs.set_context(directory='/var/log/test', namespace='test') >>> logger = log.Logs().get_logger('mymodule') >>> logger.set_level(logging.DEBUG) >>> logger.debug('a debug log') ''' # Normal logger settings _default_directory = None _default_namespace = None _default_log_format = ( '%(asctime)s %(levelname)s pid=%(process)d tid=%(threadName)s ' 'file=%(filename)s:%(funcName)s:%(lineno)d | %(message)s') _default_log_level = logging.INFO _default_max_bytes = 25000000 _default_backup_count = 5 # Default root logger settings _default_root_logger_log_file = 'solnlib' @classmethod def set_context(cls, **context): '''set log context. :param directory: (optional) Log directory, default is splunk log root directory. :type directory: ``string`` :param namespace: (optional) Logger namespace, default is None. :type namespace: ``string`` :param log_format: (optional) Log format, default is: '%(asctime)s %(levelname)s pid=%(process)d tid=%(threadName)s file=%(filename)s:%(funcName)s:%(lineno)d | %(message)s'. :type log_format: ``string`` :param log_level: (optional) Log level, default is logging.INFO. :type log_level: ``integer`` :param max_bytes: (optional) The maximum log file size before rollover, default is 25000000. :type max_bytes: ``integer`` :param backup_count: (optional) The number of log files to retain, default is 5. :type backup_count: ``integer`` :param root_logger_log_file: (optional) Root logger log file name, default is 'solnlib'. :type root_logger_log_file: ``string`` ''' if 'directory' in context: cls._default_directory = context['directory'] if 'namespace' in context: cls._default_namespace = context['namespace'] if 'log_format' in context: cls._default_log_format = context['log_format'] if 'log_level' in context: cls._default_log_level = context['log_level'] if 'max_bytes' in context: cls._default_max_bytes = context['max_bytes'] if 'backup_count' in context: cls._default_backup_count = context['backup_count'] if 'root_logger_log_file' in context: cls._default_root_logger_log_file = context['root_logger_log_file'] cls._reset_root_logger() @classmethod def _reset_root_logger(cls): logger = logging.getLogger() log_file = cls._get_log_file(cls._default_root_logger_log_file) file_handler = logging.handlers.RotatingFileHandler( log_file, mode='a', maxBytes=cls._default_max_bytes, backupCount=cls._default_backup_count) file_handler.setFormatter(logging.Formatter(cls._default_log_format)) logger.addHandler(file_handler) logger.setLevel(cls._default_log_level) @classmethod def _get_log_file(cls, name): if cls._default_namespace: name = '{}_{}.log'.format(cls._default_namespace, name) else: name = '{}.log'.format(name) if cls._default_directory: directory = cls._default_directory else: try: directory = make_splunkhome_path(['var', 'log', 'splunk']) except KeyError: raise LogException( 'Log directory is empty, please set log directory ' 'by calling Logs.set_context(directory="/var/log/...").') log_file = op.sep.join([directory, name]) return log_file def __init__(self): self._lock = Lock() self._loggers = {} def get_logger(self, name): ''' Get logger with the name of `name`. If logger with the name of `name` exists just return else create a new logger with the name of `name`. :param name: Logger name, it will be used as log file name too. :type name: ``string`` :returns: A named logger. :rtype: ``logging.Logger`` ''' with self._lock: log_file = self._get_log_file(name) if log_file in self._loggers: return self._loggers[log_file] logger = logging.getLogger(log_file) handler_exists = any( [True for h in logger.handlers if h.baseFilename == log_file]) if not handler_exists: file_handler = logging.handlers.RotatingFileHandler( log_file, mode='a', maxBytes=self._default_max_bytes, backupCount=self._default_backup_count) file_handler.setFormatter( logging.Formatter(self._default_log_format)) logger.addHandler(file_handler) logger.setLevel(self._default_log_level) logger.propagate = False self._loggers[log_file] = logger return logger def set_level(self, level, name=None): '''Set log level of logger. Set log level of all logger if `name` is None else of logger with the name of `name`. :param level: Log level to set. :type level: ``integer`` :param name: (optional) The name of logger, default is None. :type name: ``string`` ''' with self._lock: if name: log_file = self._get_log_file(name) logger = self._loggers.get(log_file) if logger: logger.setLevel(level) else: self._default_log_level = level for logger in list(self._loggers.values()): logger.setLevel(level) logging.getLogger().setLevel(level)
class EventWriter(with_metaclass(ABCMeta, object)): '''Base class of event writer. ''' description = 'EventWriter' @abstractmethod def create_event(self, data, time=None, index=None, host=None, source=None, sourcetype=None, fields=None, stanza=None, unbroken=False, done=False): '''Create a new event. :param data: Event data. :type data: ``json object`` :param time: (optional) Event timestamp, default is None. :type time: ``float`` :param index: (optional) The index event will be written to, default is None :type index: ``string`` :param host: (optional) Event host, default is None. :type host: ``string`` :param source: (optional) Event source, default is None. :type source: ``string`` :param sourcetype: (optional) Event sourcetype, default is None. :type sourcetype: ``string`` :param fields: (optional) Event fields, default is None. :type fields: ``json object`` :param stanza: (optional) Event stanza name, default is None. :type stanza: ``string`` :param unbroken: (optional) Event unbroken flag, default is False. It is only meaningful when for XMLEvent when using ClassicEventWriter. :type unbroken: ``bool`` :param done: (optional) The last unbroken event, default is False. It is only meaningful when for XMLEvent when using ClassicEventWriter. :returns: ``bool`` :returns: A new event object. :rtype: ``(XMLEvent, HECEvent)`` Usage:: >>> ew = event_writer.HECEventWriter(...) >>> event = ew.create_event( >>> data='This is a test data.', >>> time='%.3f' % 1372274622.493, >>> index='main', >>> host='localhost', >>> source='Splunk', >>> sourcetype='misc', >>> fields='{'accountid': '603514901691', 'Cloud': u'AWS'}' >>> stanza='test_scheme://test', >>> unbroken=True, >>> done=True) ''' pass @abstractmethod def write_events(self, events): '''Write events. :param events: List of events to write. :type events: ``list`` Usage:: >>> from solnlib.modular_input import event_writer >>> ew = event_writer.EventWriter(...) >>> ew.write_events([event1, event2]) ''' pass