def update_log_checkpoint(log_type, log_offset, child_account_id): """ Save log_offset to the checkpoint file for log_type. @param log_type Used to determine which checkpoint file to open @param log_offset Information to save in the checkpoint file """ Program.log( f"{log_type} consumer: saving latest log offset to a " "checkpointing file", logging.INFO) file_path = os.path.join( Config.get_checkpoint_dir(), f"{log_type}_checkpoint_data_" + child_account_id + ".txt")\ if child_account_id else os.path.join( Config.get_checkpoint_dir(), f"{log_type}_checkpoint_data.txt") checkpoint_filename = file_path # Open file checkpoint_filename in writing mode only checkpoint_file = open(checkpoint_filename, 'w') checkpoint_file.write(json.dumps(log_offset) + '\n') # According to Python docs, closing a file also flushes the file checkpoint_file.close()
def main(): """ Kicks off DuoLogSync by setting important variables, creating and running a Producer-Consumer pair for each log-type defined in a config file passed to the program. """ arg_parser = argparse.ArgumentParser(prog='duologsync', description="Path to config file") arg_parser.add_argument('ConfigPath', metavar='config-path', type=str, help='Config to start application') args = arg_parser.parse_args() # Handle shutting down the program via Ctrl-C signal.signal(signal.SIGINT, sigint_handler) # Create a config Dictionary from a YAML file located at args.ConfigPath config = Config.create_config(args.ConfigPath) Config.set_config(config) Program.setup_logging(Config.get_log_filepath()) # Dict of writers (server id: writer) to be used for consumer tasks server_to_writer = Writer.create_writers(Config.get_servers()) # List of Producer/Consumer objects as asyncio tasks to be run tasks = create_tasks(server_to_writer) # Run the Producers and Consumers asyncio.get_event_loop().run_until_complete(asyncio.gather(*tasks)) asyncio.get_event_loop().close() if Program.is_logging_set(): print(f"DuoLogSync: shutdown successfully. Check " f"{Config.get_log_filepath()} for program logs")
def create_admin(ikey, skey, host, is_msp=False): """ Create an Admin object (from the duo_client library) with the given values. The Admin object has many functions for using Duo APIs and retrieving logs. @param ikey Duo Client ID (Integration Key) @param skey Duo Client Secret for proving identity / access (Secrey Key) @param host URI where data / logs will be fetched from @param is_msp Indicates where we are using MSP account for logs retrieval @return a newly created Admin object """ if is_msp: admin = duo_client.Accounts(ikey=ikey, skey=skey, host=host, user_agent=f"Duo Log Sync/{__version__}") Program.log( f"duo_client Account_Admin initialized for ikey: {ikey}, host: {host}", logging.INFO) else: admin = duo_client.Admin(ikey=ikey, skey=skey, host=host, user_agent=f"Duo Log Sync/{__version__}") Program.log( f"duo_client Admin initialized for ikey: {ikey}, host: {host}", logging.INFO) return admin
def create_config(cls, config_filepath): """ Attemp to read the file at config_filepath and generate a config Dictionary object based on a defined JSON schema @param config_filepath File from which to generate a config object """ shutdown_reason = None try: with open(config_filepath) as config_file: # PyYAML gives better error messages for streams than for files config_file_data = config_file.read() config = yaml.full_load(config_file_data) # Check config against a schema to ensure all the needed fields # and values are defined config = cls._validate_and_normalize_config(config) if config.get('dls_settings').get('api').get( 'timeout') < cls.API_TIMEOUT_DEFAULT: config['dls_settings']['api'][ 'timeout'] = cls.API_TIMEOUT_DEFAULT Program.log( 'DuoLogSync: Setting default api timeout to 120 seconds.' ) # Will occur when given a bad filepath or a bad file except OSError as os_error: shutdown_reason = f"{os_error}" Program.log('DuoLogSync: Failed to open the config file. Check ' 'that the filename is correct') # Will occur if the config file does not contain valid YAML except YAMLError as yaml_error: shutdown_reason = f"{yaml_error}" Program.log('DuoLogSync: Failed to parse the config file. Check ' 'that the config file has valid YAML.') # Validation of the config against a schema failed except ValueError: shutdown_reason = f"{cls.SCHEMA_VALIDATOR.errors}" Program.log('DuoLogSync: Validation of the config file failed. ' 'Check that required fields have proper values.') # No exception raised during the try block, return config else: # Calculate offset as a timestamp and rewrite its value in config offset = config.get('dls_settings').get('api').get('offset') offset = datetime.utcnow() - timedelta(days=offset) config['dls_settings']['api']['offset'] = int(offset.timestamp()) return config # At this point, it is guaranteed that an exception was raised, which # means that it is shutdown time Program.initiate_shutdown(shutdown_reason) return None
def sigint_handler(signal_number, stack_frame): """ Handler for SIGINT (Ctrl-C) to gracefully shutdown DuoLogSync """ shutdown_reason = f"received signal {signal_number} (Ctrl-C)" Program.initiate_shutdown(shutdown_reason) if stack_frame: Program.log(f"DuoLogSync: stack frame from Ctrl-C is {stack_frame}", logging.INFO)
def create_consumer_producer_pair(endpoint, writer, admin, child_account=None): """ Create a pair of Producer-Consumer objects for each endpoint and return a list containing the asyncio tasks for running those objects. @param endpoint Log type to create producer/consumer pair for @param writer Object for writing logs to a server @param admin Object from which to get the correct API endpoints @param child_account If present, this is being used by MSP and pass appropriate account id @return list of asyncio tasks for running the Producer and Consumer objects """ # The format a log should have before being consumed and sent log_format = Config.get_log_format() log_queue = asyncio.Queue() producer = consumer = None # Create the right pair of Producer-Consumer objects based on endpoint if endpoint == Config.AUTH: if Config.account_is_msp(): producer = AuthlogProducer(admin.json_api_call, log_queue, child_account_id=child_account, url_path="/admin/v2/logs/authentication") else: producer = AuthlogProducer(admin.get_authentication_log, log_queue) consumer = AuthlogConsumer(log_format, log_queue, writer, child_account) elif endpoint == Config.TELEPHONY: if Config.account_is_msp(): producer = TelephonyProducer(admin.json_api_call, log_queue, child_account_id=child_account, url_path='/admin/v1/logs/telephony') else: producer = TelephonyProducer(admin.get_telephony_log, log_queue) consumer = TelephonyConsumer(log_format, log_queue, writer, child_account) elif endpoint == Config.ADMIN: if Config.account_is_msp(): producer = AdminactionProducer(admin.json_api_call, log_queue, child_account_id=child_account, url_path='/admin/v1/logs/administrator') else: producer = AdminactionProducer(admin.get_administrator_log, log_queue) consumer = AdminactionConsumer(log_format, log_queue, writer, child_account) else: Program.log(f"{endpoint} is not a recognized endpoint", logging.WARNING) del log_queue return [] tasks = [asyncio.ensure_future(producer.produce()), asyncio.ensure_future(consumer.consume())] return tasks
def connection_lost(self, exc): shutdown_reason = None if exc: shutdown_reason = ( f"UDP connection with host-{self.host} and port-{self.port}" f"was closed for the following reason [{exc}]") else: shutdown_reason = ( f"UDP connection with host-{self.host} and port-{self.port} " "was closed") Program.initiate_shutdown(shutdown_reason)
def get_log_offset(log_type, recover_log_offset, checkpoint_directory, child_account_id=None): """ Retrieve the offset from which logs of log_type should be fetched either by using the default offset or by using a timestamp saved in a checkpoint file @param log_type Name of the log for which recovery is occurring @param recover_log_offset Whether checkpoint files should be used to retrieve log offset info @param checkpoint_directory Directory containing log offset checkpoint files @return the last offset read for a log type based on checkpointing data """ milliseconds_per_second = 1000 log_offset = Config.get_api_offset() # Auth must have timestamp represented in milliseconds, not seconds if log_type == Config.AUTH: log_offset *= milliseconds_per_second # In this case, look for a checkpoint file from which to read the log offset if recover_log_offset: try: checkpoint_file_path = os.path.join( checkpoint_directory, f"{log_type}_checkpoint_data_" + child_account_id + ".txt")\ if child_account_id else os.path.join( checkpoint_directory, f"{log_type}_checkpoint_data.txt") # Open the checkpoint file, 'with' statement automatically closes it with open(checkpoint_file_path) as checkpoint: # Set log_offset equal to the contents of the checkpoint file log_offset = json.loads(checkpoint.read()) # Most likely, the checkpoint file doesn't exist except OSError: Program.log(f"Could not read checkpoint file for {log_type} logs, " "consuming logs from {log_offset} timestamp") return log_offset
async def add_logs_to_queue(self, logs): """ If logs is not none, add them to this Writer's queue @param logs The logs to be added """ # Important for recovery in the event of a crash self.log_offset = Producer.get_log_offset(logs) # Authlogs v2 endpoint returns dict response if isinstance(logs, dict): logs = logs['authlogs'] Program.log(f"{self.log_type} producer: adding {len(logs)} " "logs to the queue", logging.INFO) await self.log_queue.put(logs) Program.log(f"{self.log_type} producer: added {len(logs)} " "logs to the queue", logging.INFO)
def main(): """ Kicks off DuoLogSync by setting important variables, creating and running a Producer-Consumer pair for each log-type defined in a config file passed to the program. """ arg_parser = argparse.ArgumentParser(prog='duologsync', description="Path to config file") arg_parser.add_argument('ConfigPath', metavar='config-path', type=str, help='Config to start application') args = arg_parser.parse_args() # Handle shutting down the program via Ctrl-C signal.signal(signal.SIGINT, sigint_handler) # Create a config Dictionary from a YAML file located at args.ConfigPath config = Config.create_config(args.ConfigPath) Config.set_config(config) # Do extra checks for Trust Monitor support is_dtm_in_config = check_for_specific_endpoint('trustmonitor', config) log_format = Config.get_log_format() is_msp = Config.account_is_msp() if (is_dtm_in_config and log_format != 'JSON'): Program.log(f"DuoLogSync: Trust Monitor endpoint only supports JSON", logging.WARNING) return if (is_dtm_in_config and is_msp): Program.log( f"DuoLogSync: Trust Monitor endpoint only supports non-msp", logging.WARNING) return Program.setup_logging(Config.get_log_filepath()) # Dict of writers (server id: writer) to be used for consumer tasks server_to_writer = Writer.create_writers(Config.get_servers()) # List of Producer/Consumer objects as asyncio tasks to be run tasks = create_tasks(server_to_writer) # Run the Producers and Consumers asyncio.get_event_loop().run_until_complete(asyncio.gather(*tasks)) asyncio.get_event_loop().close() if Program.is_logging_set(): print(f"DuoLogSync: shutdown successfully. Check " f"{Config.get_log_filepath()} for program logs")
def create_consumer_producer_pair(endpoint, writer, admin): """ Create a pair of Producer-Consumer objects for each endpoint and return a list containing the asyncio tasks for running those objects. @param endpoint Log type to create producer/consumer pair for @param writer Object for writing logs to a server @param admin Object from which to get the correct API endpoints @return list of asyncio tasks for running the Producer and Consumer objects """ # The format a log should have before being consumed and sent log_format = Config.get_log_format() log_queue = asyncio.Queue() producer = consumer = None # Create the right pair of Producer-Consumer objects based on endpoint if endpoint == Config.AUTH: producer = AuthlogProducer(admin.get_authentication_log, log_queue) consumer = AuthlogConsumer(log_format, log_queue, writer) elif endpoint == Config.TELEPHONY: producer = TelephonyProducer(admin.get_telephony_log, log_queue) consumer = TelephonyConsumer(log_format, log_queue, writer) elif endpoint == Config.ADMIN: producer = AdminactionProducer(admin.get_administrator_log, log_queue) consumer = AdminactionConsumer(log_format, log_queue, writer) else: Program.log(f"{endpoint} is not a recognized endpoint", logging.WARNING) del log_queue return [] tasks = [asyncio.ensure_future(producer.produce()), asyncio.ensure_future(consumer.consume())] return tasks
async def create_writer(self, host, port, cert_filepath): """ Wrapper for functions to create TCP or UDP connections. @param host Hostname of the network connection to establish @param port Port of the network connection to establish @param cert_filepath Path to file containing SSL certificate @return a 'writer' object for writing data over the connection made """ Program.log(f"DuoLogSync: Opening connection to {host}:{port}", logging.INFO) # Message to be logged if an error occurs in this function help_message = (f"DuoLogSync: check that host-{host} and port-{port} " "are correct in the config file") writer = None try: if self.protocol == 'UDP': writer = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) elif self.protocol == 'TCPSSL': ssl_context = ssl.create_default_context( ssl.Purpose.SERVER_AUTH, cafile=cert_filepath) writer = await Writer.create_tcp_writer( host, port, ssl_context) elif self.protocol == 'TCP': writer = await Writer.create_tcp_writer(host, port) # Failed to open the certificate file except FileNotFoundError: shutdown_reason = f"{cert_filepath} could not be opened." help_message = ( 'DuoLogSync: Make sure the filepath for SSL cert file is ' 'correct.') # Couldn't establish a connection within 60 seconds except asyncio.TimeoutError: shutdown_reason = 'connection to server timed-out after 60 seconds' # If an invalid hostname or port number is given or simply failed to # connect using the host and port given except (gaierror, OSError) as error: shutdown_reason = f"{error}" # An error did not occur and the writer was successfully created else: return writer Program.initiate_shutdown(shutdown_reason) Program.log(help_message, logging.ERROR) return None
async def restless_sleep(duration): """ Wrapper for the asyncio.sleep function to sleep for duration seconds but check every second that DuoLogSync is still running. This is necessary in the case that the program should be shutting down but a producer is in the middle of a 2 minute poll and will not be aware of program shutdown until much later. @param duration The number of seconds to sleep for """ while duration > 0: await asyncio.sleep(1) # Poll for program running state if Program.is_running(): duration = duration - 1 continue # Otherwise, program is done running, raise an exception to be caught raise ProgramShutdownError
def create_admin(ikey, skey, host, is_msp=False, proxy_server=None, proxy_port=None): """ Create an Admin object (from the duo_client library) with the given values. The Admin object has many functions for using Duo APIs and retrieving logs. @param ikey Duo Client ID (Integration Key) @param skey Duo Client Secret for proving identity / access (Secret Key) @param host URI where data / logs will be fetched from @param is_msp Indicates where we are using MSP account for logs retrieval @param proxy_server Host/IP of Http Proxy if in use or None @param proxy_port Port of Http Proxy if in use or None @return a newly created Admin object """ if is_msp: admin = duo_client.Accounts( ikey=ikey, skey=skey, host=host, user_agent=f"Duo Log Sync/{__version__}" ) Program.log(f"duo_client Account_Admin initialized for ikey: {ikey}, host: {host}", logging.INFO) else: admin = duo_client.Admin( ikey=ikey, skey=skey, host=host, user_agent=f"Duo Log Sync/{__version__}" ) Program.log(f"duo_client Admin initialized for ikey: {ikey}, host: {host}", logging.INFO) if proxy_server and proxy_port: admin.set_proxy(host=proxy_server, port=proxy_port) Program.log(f"duo_client Proxy configured: {proxy_server}:{proxy_port}", logging.INFO) return admin
def test_is_logging_set(self): self.assertEqual(Program.is_logging_set(), False) Program._logging_set = True self.assertEqual(Program.is_logging_set(), True)
def test_initiate_shutdown(self): self.assertEqual(Program._running, True) Program.initiate_shutdown('test') self.assertEqual(Program._running, False)
async def produce(self): """ The main function of this class and subclasses. Runs a loop, sleeping for the polling duration then making an API call, consuming the logs from that API call and saving the offset of the latest log read. """ # Exit when DuoLogSync is shutting down (due to error or Ctrl-C) while Program.is_running(): shutdown_reason = None Program.log( f"{self.log_type} producer: begin polling for " f"{Config.get_api_timeout()} seconds", logging.INFO) try: # Sleep for api_timeout amount of time, but check for program # shutdown every second await restless_sleep(Config.get_api_timeout()) Program.log(f"{self.log_type} producer: fetching logs", logging.INFO) api_result = await self.call_log_api() if api_result: await self.add_logs_to_queue(self.get_logs(api_result)) else: Program.log( f"{self.log_type} producer: no new logs available", logging.INFO) # Horribly messed up hostname was provided for duoclient host except (gaierror, OSError) as error: shutdown_reason = f"{self.log_type} producer: [{error}]" Program.log('DuoLogSync: check that the duoclient host ' 'provided in the config file is correct') # duo_client throws a RuntimeError if the ikey or skey is invalid except RuntimeError as runtime_error: shutdown_reason = f"{self.log_type} producer: [{runtime_error}]" Program.log('DuoLogSync: check that the duoclient ikey and ' 'skey in the config file are correct') # Shutdown hath been noticed and thus shutdown shall begin except ProgramShutdownError: break if shutdown_reason: Program.initiate_shutdown(shutdown_reason) # Unblock consumer but putting anything in the shared queue await self.log_queue.put([]) Program.log(f"{self.log_type} producer: shutting down", logging.INFO)
async def consume(self): """ Consumer that will consume data from a queue shared with a producer object. Data from the queue is then sent over a configured transport protocol to respective SIEMs or servers. """ while Program.is_running(): Program.log(f"{self.log_type} consumer: waiting for logs", logging.INFO) # Call unblocks only when there is an element in the queue to get logs = await self.log_queue.get() # Time to shutdown if not Program.is_running(): continue Program.log( f"{self.log_type} consumer: received {len(logs)} logs " "from producer", logging.INFO) # Keep track of the latest log written in the case that a problem # occurs in the middle of writing logs last_log_written = None successful_write = False # If we are sending empty [] to unblock consumers, nothing should be written to file if logs: try: Program.log(f"{self.log_type} consumer: writing logs", logging.INFO) for log in logs: if self.child_account_id: log['child_account_id'] = self.child_account_id await self.writer.write(self.format_log(log)) last_log_written = log # All the logs were written successfully successful_write = True # Specifically watch out for errno 32 - Broken pipe. This means # that the connect established by writer was reset or shutdown. except BrokenPipeError as broken_pipe_error: shutdown_reason = f"{broken_pipe_error}" Program.initiate_shutdown(shutdown_reason) Program.log("DuoLogSync: connection to server was reset", logging.WARNING) finally: if successful_write: Program.log( f"{self.log_type} consumer: successfully wrote " "all logs", logging.INFO) else: Program.log( f"{self.log_type} consumer: failed to write " "some logs", logging.WARNING) self.log_offset = Producer.get_log_offset(last_log_written) self.update_log_checkpoint(self.log_type, self.log_offset, self.child_account_id) else: Program.log(f"{self.log_type} consumer: No logs to write", logging.INFO) Program.log(f"{self.log_type} consumer: shutting down", logging.INFO)
def test_setup_logging_normal(self): filepath = 'logs.txt' Program.setup_logging(filepath) self.assertEqual(Program._logging_set, True)
def test_is_running(self): self.assertEqual(Program.is_running(), True) Program._running = False self.assertEqual(Program.is_running(), False)
def test_log_without_logging_set(self, mock_print): Program.log('Oh no, logging has not been set!') mock_print.assert_called_once()
def test_log_with_logging_set(self, mock_log): Program._logging_set = True Program.log('Everything is A-Ok') mock_log.assert_called_once()