def create_admin(ikey, skey, host, is_msp=False): """ Create an Admin object (from the duo_client library) with the given values. The Admin object has many functions for using Duo APIs and retrieving logs. @param ikey Duo Client ID (Integration Key) @param skey Duo Client Secret for proving identity / access (Secrey Key) @param host URI where data / logs will be fetched from @param is_msp Indicates where we are using MSP account for logs retrieval @return a newly created Admin object """ if is_msp: admin = duo_client.Accounts(ikey=ikey, skey=skey, host=host, user_agent=f"Duo Log Sync/{__version__}") Program.log( f"duo_client Account_Admin initialized for ikey: {ikey}, host: {host}", logging.INFO) else: admin = duo_client.Admin(ikey=ikey, skey=skey, host=host, user_agent=f"Duo Log Sync/{__version__}") Program.log( f"duo_client Admin initialized for ikey: {ikey}, host: {host}", logging.INFO) return admin
def update_log_checkpoint(log_type, log_offset, child_account_id): """ Save log_offset to the checkpoint file for log_type. @param log_type Used to determine which checkpoint file to open @param log_offset Information to save in the checkpoint file """ Program.log( f"{log_type} consumer: saving latest log offset to a " "checkpointing file", logging.INFO) file_path = os.path.join( Config.get_checkpoint_dir(), f"{log_type}_checkpoint_data_" + child_account_id + ".txt")\ if child_account_id else os.path.join( Config.get_checkpoint_dir(), f"{log_type}_checkpoint_data.txt") checkpoint_filename = file_path # Open file checkpoint_filename in writing mode only checkpoint_file = open(checkpoint_filename, 'w') checkpoint_file.write(json.dumps(log_offset) + '\n') # According to Python docs, closing a file also flushes the file checkpoint_file.close()
async def create_writer(self, host, port, cert_filepath): """ Wrapper for functions to create TCP or UDP connections. @param host Hostname of the network connection to establish @param port Port of the network connection to establish @param cert_filepath Path to file containing SSL certificate @return a 'writer' object for writing data over the connection made """ Program.log(f"DuoLogSync: Opening connection to {host}:{port}", logging.INFO) # Message to be logged if an error occurs in this function help_message = (f"DuoLogSync: check that host-{host} and port-{port} " "are correct in the config file") writer = None try: if self.protocol == 'UDP': writer = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) elif self.protocol == 'TCPSSL': ssl_context = ssl.create_default_context( ssl.Purpose.SERVER_AUTH, cafile=cert_filepath) writer = await Writer.create_tcp_writer( host, port, ssl_context) elif self.protocol == 'TCP': writer = await Writer.create_tcp_writer(host, port) # Failed to open the certificate file except FileNotFoundError: shutdown_reason = f"{cert_filepath} could not be opened." help_message = ( 'DuoLogSync: Make sure the filepath for SSL cert file is ' 'correct.') # Couldn't establish a connection within 60 seconds except asyncio.TimeoutError: shutdown_reason = 'connection to server timed-out after 60 seconds' # If an invalid hostname or port number is given or simply failed to # connect using the host and port given except (gaierror, OSError) as error: shutdown_reason = f"{error}" # An error did not occur and the writer was successfully created else: return writer Program.initiate_shutdown(shutdown_reason) Program.log(help_message, logging.ERROR) return None
def sigint_handler(signal_number, stack_frame): """ Handler for SIGINT (Ctrl-C) to gracefully shutdown DuoLogSync """ shutdown_reason = f"received signal {signal_number} (Ctrl-C)" Program.initiate_shutdown(shutdown_reason) if stack_frame: Program.log(f"DuoLogSync: stack frame from Ctrl-C is {stack_frame}", logging.INFO)
def main(): """ Kicks off DuoLogSync by setting important variables, creating and running a Producer-Consumer pair for each log-type defined in a config file passed to the program. """ arg_parser = argparse.ArgumentParser(prog='duologsync', description="Path to config file") arg_parser.add_argument('ConfigPath', metavar='config-path', type=str, help='Config to start application') args = arg_parser.parse_args() # Handle shutting down the program via Ctrl-C signal.signal(signal.SIGINT, sigint_handler) # Create a config Dictionary from a YAML file located at args.ConfigPath config = Config.create_config(args.ConfigPath) Config.set_config(config) # Do extra checks for Trust Monitor support is_dtm_in_config = check_for_specific_endpoint('trustmonitor', config) log_format = Config.get_log_format() is_msp = Config.account_is_msp() if (is_dtm_in_config and log_format != 'JSON'): Program.log(f"DuoLogSync: Trust Monitor endpoint only supports JSON", logging.WARNING) return if (is_dtm_in_config and is_msp): Program.log( f"DuoLogSync: Trust Monitor endpoint only supports non-msp", logging.WARNING) return Program.setup_logging(Config.get_log_filepath()) # Dict of writers (server id: writer) to be used for consumer tasks server_to_writer = Writer.create_writers(Config.get_servers()) # List of Producer/Consumer objects as asyncio tasks to be run tasks = create_tasks(server_to_writer) # Run the Producers and Consumers asyncio.get_event_loop().run_until_complete(asyncio.gather(*tasks)) asyncio.get_event_loop().close() if Program.is_logging_set(): print(f"DuoLogSync: shutdown successfully. Check " f"{Config.get_log_filepath()} for program logs")
def create_consumer_producer_pair(endpoint, writer, admin, child_account=None): """ Create a pair of Producer-Consumer objects for each endpoint and return a list containing the asyncio tasks for running those objects. @param endpoint Log type to create producer/consumer pair for @param writer Object for writing logs to a server @param admin Object from which to get the correct API endpoints @param child_account If present, this is being used by MSP and pass appropriate account id @return list of asyncio tasks for running the Producer and Consumer objects """ # The format a log should have before being consumed and sent log_format = Config.get_log_format() log_queue = asyncio.Queue() producer = consumer = None # Create the right pair of Producer-Consumer objects based on endpoint if endpoint == Config.AUTH: if Config.account_is_msp(): producer = AuthlogProducer(admin.json_api_call, log_queue, child_account_id=child_account, url_path="/admin/v2/logs/authentication") else: producer = AuthlogProducer(admin.get_authentication_log, log_queue) consumer = AuthlogConsumer(log_format, log_queue, writer, child_account) elif endpoint == Config.TELEPHONY: if Config.account_is_msp(): producer = TelephonyProducer(admin.json_api_call, log_queue, child_account_id=child_account, url_path='/admin/v1/logs/telephony') else: producer = TelephonyProducer(admin.get_telephony_log, log_queue) consumer = TelephonyConsumer(log_format, log_queue, writer, child_account) elif endpoint == Config.ADMIN: if Config.account_is_msp(): producer = AdminactionProducer(admin.json_api_call, log_queue, child_account_id=child_account, url_path='/admin/v1/logs/administrator') else: producer = AdminactionProducer(admin.get_administrator_log, log_queue) consumer = AdminactionConsumer(log_format, log_queue, writer, child_account) else: Program.log(f"{endpoint} is not a recognized endpoint", logging.WARNING) del log_queue return [] tasks = [asyncio.ensure_future(producer.produce()), asyncio.ensure_future(consumer.consume())] return tasks
def create_config(cls, config_filepath): """ Attemp to read the file at config_filepath and generate a config Dictionary object based on a defined JSON schema @param config_filepath File from which to generate a config object """ shutdown_reason = None try: with open(config_filepath) as config_file: # PyYAML gives better error messages for streams than for files config_file_data = config_file.read() config = yaml.full_load(config_file_data) # Check config against a schema to ensure all the needed fields # and values are defined config = cls._validate_and_normalize_config(config) if config.get('dls_settings').get('api').get( 'timeout') < cls.API_TIMEOUT_DEFAULT: config['dls_settings']['api'][ 'timeout'] = cls.API_TIMEOUT_DEFAULT Program.log( 'DuoLogSync: Setting default api timeout to 120 seconds.' ) # Will occur when given a bad filepath or a bad file except OSError as os_error: shutdown_reason = f"{os_error}" Program.log('DuoLogSync: Failed to open the config file. Check ' 'that the filename is correct') # Will occur if the config file does not contain valid YAML except YAMLError as yaml_error: shutdown_reason = f"{yaml_error}" Program.log('DuoLogSync: Failed to parse the config file. Check ' 'that the config file has valid YAML.') # Validation of the config against a schema failed except ValueError: shutdown_reason = f"{cls.SCHEMA_VALIDATOR.errors}" Program.log('DuoLogSync: Validation of the config file failed. ' 'Check that required fields have proper values.') # No exception raised during the try block, return config else: # Calculate offset as a timestamp and rewrite its value in config offset = config.get('dls_settings').get('api').get('offset') offset = datetime.utcnow() - timedelta(days=offset) config['dls_settings']['api']['offset'] = int(offset.timestamp()) return config # At this point, it is guaranteed that an exception was raised, which # means that it is shutdown time Program.initiate_shutdown(shutdown_reason) return None
def get_log_offset(log_type, recover_log_offset, checkpoint_directory, child_account_id=None): """ Retrieve the offset from which logs of log_type should be fetched either by using the default offset or by using a timestamp saved in a checkpoint file @param log_type Name of the log for which recovery is occurring @param recover_log_offset Whether checkpoint files should be used to retrieve log offset info @param checkpoint_directory Directory containing log offset checkpoint files @return the last offset read for a log type based on checkpointing data """ milliseconds_per_second = 1000 log_offset = Config.get_api_offset() # Auth must have timestamp represented in milliseconds, not seconds if log_type == Config.AUTH: log_offset *= milliseconds_per_second # In this case, look for a checkpoint file from which to read the log offset if recover_log_offset: try: checkpoint_file_path = os.path.join( checkpoint_directory, f"{log_type}_checkpoint_data_" + child_account_id + ".txt")\ if child_account_id else os.path.join( checkpoint_directory, f"{log_type}_checkpoint_data.txt") # Open the checkpoint file, 'with' statement automatically closes it with open(checkpoint_file_path) as checkpoint: # Set log_offset equal to the contents of the checkpoint file log_offset = json.loads(checkpoint.read()) # Most likely, the checkpoint file doesn't exist except OSError: Program.log(f"Could not read checkpoint file for {log_type} logs, " "consuming logs from {log_offset} timestamp") return log_offset
async def add_logs_to_queue(self, logs): """ If logs is not none, add them to this Writer's queue @param logs The logs to be added """ # Important for recovery in the event of a crash self.log_offset = Producer.get_log_offset(logs) # Authlogs v2 endpoint returns dict response if isinstance(logs, dict): logs = logs['authlogs'] Program.log(f"{self.log_type} producer: adding {len(logs)} " "logs to the queue", logging.INFO) await self.log_queue.put(logs) Program.log(f"{self.log_type} producer: added {len(logs)} " "logs to the queue", logging.INFO)
def create_consumer_producer_pair(endpoint, writer, admin): """ Create a pair of Producer-Consumer objects for each endpoint and return a list containing the asyncio tasks for running those objects. @param endpoint Log type to create producer/consumer pair for @param writer Object for writing logs to a server @param admin Object from which to get the correct API endpoints @return list of asyncio tasks for running the Producer and Consumer objects """ # The format a log should have before being consumed and sent log_format = Config.get_log_format() log_queue = asyncio.Queue() producer = consumer = None # Create the right pair of Producer-Consumer objects based on endpoint if endpoint == Config.AUTH: producer = AuthlogProducer(admin.get_authentication_log, log_queue) consumer = AuthlogConsumer(log_format, log_queue, writer) elif endpoint == Config.TELEPHONY: producer = TelephonyProducer(admin.get_telephony_log, log_queue) consumer = TelephonyConsumer(log_format, log_queue, writer) elif endpoint == Config.ADMIN: producer = AdminactionProducer(admin.get_administrator_log, log_queue) consumer = AdminactionConsumer(log_format, log_queue, writer) else: Program.log(f"{endpoint} is not a recognized endpoint", logging.WARNING) del log_queue return [] tasks = [asyncio.ensure_future(producer.produce()), asyncio.ensure_future(consumer.consume())] return tasks
def create_admin(ikey, skey, host, is_msp=False, proxy_server=None, proxy_port=None): """ Create an Admin object (from the duo_client library) with the given values. The Admin object has many functions for using Duo APIs and retrieving logs. @param ikey Duo Client ID (Integration Key) @param skey Duo Client Secret for proving identity / access (Secret Key) @param host URI where data / logs will be fetched from @param is_msp Indicates where we are using MSP account for logs retrieval @param proxy_server Host/IP of Http Proxy if in use or None @param proxy_port Port of Http Proxy if in use or None @return a newly created Admin object """ if is_msp: admin = duo_client.Accounts( ikey=ikey, skey=skey, host=host, user_agent=f"Duo Log Sync/{__version__}" ) Program.log(f"duo_client Account_Admin initialized for ikey: {ikey}, host: {host}", logging.INFO) else: admin = duo_client.Admin( ikey=ikey, skey=skey, host=host, user_agent=f"Duo Log Sync/{__version__}" ) Program.log(f"duo_client Admin initialized for ikey: {ikey}, host: {host}", logging.INFO) if proxy_server and proxy_port: admin.set_proxy(host=proxy_server, port=proxy_port) Program.log(f"duo_client Proxy configured: {proxy_server}:{proxy_port}", logging.INFO) return admin
async def produce(self): """ The main function of this class and subclasses. Runs a loop, sleeping for the polling duration then making an API call, consuming the logs from that API call and saving the offset of the latest log read. """ # Exit when DuoLogSync is shutting down (due to error or Ctrl-C) while Program.is_running(): shutdown_reason = None Program.log( f"{self.log_type} producer: begin polling for " f"{Config.get_api_timeout()} seconds", logging.INFO) try: # Sleep for api_timeout amount of time, but check for program # shutdown every second await restless_sleep(Config.get_api_timeout()) Program.log(f"{self.log_type} producer: fetching logs", logging.INFO) api_result = await self.call_log_api() if api_result: await self.add_logs_to_queue(self.get_logs(api_result)) else: Program.log( f"{self.log_type} producer: no new logs available", logging.INFO) # Horribly messed up hostname was provided for duoclient host except (gaierror, OSError) as error: shutdown_reason = f"{self.log_type} producer: [{error}]" Program.log('DuoLogSync: check that the duoclient host ' 'provided in the config file is correct') # duo_client throws a RuntimeError if the ikey or skey is invalid except RuntimeError as runtime_error: shutdown_reason = f"{self.log_type} producer: [{runtime_error}]" Program.log('DuoLogSync: check that the duoclient ikey and ' 'skey in the config file are correct') # Shutdown hath been noticed and thus shutdown shall begin except ProgramShutdownError: break if shutdown_reason: Program.initiate_shutdown(shutdown_reason) # Unblock consumer but putting anything in the shared queue await self.log_queue.put([]) Program.log(f"{self.log_type} producer: shutting down", logging.INFO)
async def consume(self): """ Consumer that will consume data from a queue shared with a producer object. Data from the queue is then sent over a configured transport protocol to respective SIEMs or servers. """ while Program.is_running(): Program.log(f"{self.log_type} consumer: waiting for logs", logging.INFO) # Call unblocks only when there is an element in the queue to get logs = await self.log_queue.get() # Time to shutdown if not Program.is_running(): continue Program.log( f"{self.log_type} consumer: received {len(logs)} logs " "from producer", logging.INFO) # Keep track of the latest log written in the case that a problem # occurs in the middle of writing logs last_log_written = None successful_write = False # If we are sending empty [] to unblock consumers, nothing should be written to file if logs: try: Program.log(f"{self.log_type} consumer: writing logs", logging.INFO) for log in logs: if self.child_account_id: log['child_account_id'] = self.child_account_id await self.writer.write(self.format_log(log)) last_log_written = log # All the logs were written successfully successful_write = True # Specifically watch out for errno 32 - Broken pipe. This means # that the connect established by writer was reset or shutdown. except BrokenPipeError as broken_pipe_error: shutdown_reason = f"{broken_pipe_error}" Program.initiate_shutdown(shutdown_reason) Program.log("DuoLogSync: connection to server was reset", logging.WARNING) finally: if successful_write: Program.log( f"{self.log_type} consumer: successfully wrote " "all logs", logging.INFO) else: Program.log( f"{self.log_type} consumer: failed to write " "some logs", logging.WARNING) self.log_offset = Producer.get_log_offset(last_log_written) self.update_log_checkpoint(self.log_type, self.log_offset, self.child_account_id) else: Program.log(f"{self.log_type} consumer: No logs to write", logging.INFO) Program.log(f"{self.log_type} consumer: shutting down", logging.INFO)
def test_log_with_logging_set(self, mock_log): Program._logging_set = True Program.log('Everything is A-Ok') mock_log.assert_called_once()
def test_log_without_logging_set(self, mock_print): Program.log('Oh no, logging has not been set!') mock_print.assert_called_once()