Example #1
0
    def update_log_checkpoint(log_type, log_offset, child_account_id):
        """
        Save log_offset to the checkpoint file for log_type.

        @param log_type     Used to determine which checkpoint file to open
        @param log_offset   Information to save in the checkpoint file
        """

        Program.log(
            f"{log_type} consumer: saving latest log offset to a "
            "checkpointing file", logging.INFO)

        file_path = os.path.join(
            Config.get_checkpoint_dir(),
            f"{log_type}_checkpoint_data_" + child_account_id + ".txt")\
            if child_account_id else os.path.join(
                Config.get_checkpoint_dir(),
                f"{log_type}_checkpoint_data.txt")

        checkpoint_filename = file_path

        # Open file checkpoint_filename in writing mode only
        checkpoint_file = open(checkpoint_filename, 'w')
        checkpoint_file.write(json.dumps(log_offset) + '\n')

        # According to Python docs, closing a file also flushes the file
        checkpoint_file.close()
Example #2
0
def main():
    """
    Kicks off DuoLogSync by setting important variables, creating and running
    a Producer-Consumer pair for each log-type defined in a config file passed
    to the program.
    """

    arg_parser = argparse.ArgumentParser(prog='duologsync',
                                         description="Path to config file")
    arg_parser.add_argument('ConfigPath', metavar='config-path', type=str,
                            help='Config to start application')
    args = arg_parser.parse_args()

    # Handle shutting down the program via Ctrl-C
    signal.signal(signal.SIGINT, sigint_handler)

    # Create a config Dictionary from a YAML file located at args.ConfigPath
    config = Config.create_config(args.ConfigPath)
    Config.set_config(config)

    Program.setup_logging(Config.get_log_filepath())

    # Dict of writers (server id: writer) to be used for consumer tasks
    server_to_writer = Writer.create_writers(Config.get_servers())

    # List of Producer/Consumer objects as asyncio tasks to be run
    tasks = create_tasks(server_to_writer)

    # Run the Producers and Consumers
    asyncio.get_event_loop().run_until_complete(asyncio.gather(*tasks))
    asyncio.get_event_loop().close()

    if Program.is_logging_set():
        print(f"DuoLogSync: shutdown successfully. Check "
              f"{Config.get_log_filepath()} for program logs")
Example #3
0
def create_admin(ikey, skey, host, is_msp=False):
    """
    Create an Admin object (from the duo_client library) with the given values.
    The Admin object has many functions for using Duo APIs and retrieving logs.

    @param ikey Duo Client ID (Integration Key)
    @param skey Duo Client Secret for proving identity / access (Secrey Key)
    @param host URI where data / logs will be fetched from
    @param is_msp Indicates where we are using MSP account for logs retrieval

    @return a newly created Admin object
    """

    if is_msp:
        admin = duo_client.Accounts(ikey=ikey,
                                    skey=skey,
                                    host=host,
                                    user_agent=f"Duo Log Sync/{__version__}")
        Program.log(
            f"duo_client Account_Admin initialized for ikey: {ikey}, host: {host}",
            logging.INFO)
    else:
        admin = duo_client.Admin(ikey=ikey,
                                 skey=skey,
                                 host=host,
                                 user_agent=f"Duo Log Sync/{__version__}")
        Program.log(
            f"duo_client Admin initialized for ikey: {ikey}, host: {host}",
            logging.INFO)

    return admin
Example #4
0
    def create_config(cls, config_filepath):
        """
        Attemp to read the file at config_filepath and generate a config
        Dictionary object based on a defined JSON schema

        @param config_filepath  File from which to generate a config object
        """

        shutdown_reason = None

        try:
            with open(config_filepath) as config_file:
                # PyYAML gives better error messages for streams than for files
                config_file_data = config_file.read()
                config = yaml.full_load(config_file_data)

                # Check config against a schema to ensure all the needed fields
                # and values are defined
                config = cls._validate_and_normalize_config(config)
                if config.get('dls_settings').get('api').get(
                        'timeout') < cls.API_TIMEOUT_DEFAULT:
                    config['dls_settings']['api'][
                        'timeout'] = cls.API_TIMEOUT_DEFAULT
                    Program.log(
                        'DuoLogSync: Setting default api timeout to 120 seconds.'
                    )

        # Will occur when given a bad filepath or a bad file
        except OSError as os_error:
            shutdown_reason = f"{os_error}"
            Program.log('DuoLogSync: Failed to open the config file. Check '
                        'that the filename is correct')

        # Will occur if the config file does not contain valid YAML
        except YAMLError as yaml_error:
            shutdown_reason = f"{yaml_error}"
            Program.log('DuoLogSync: Failed to parse the config file. Check '
                        'that the config file has valid YAML.')

        # Validation of the config against a schema failed
        except ValueError:
            shutdown_reason = f"{cls.SCHEMA_VALIDATOR.errors}"
            Program.log('DuoLogSync: Validation of the config file failed. '
                        'Check that required fields have proper values.')

        # No exception raised during the try block, return config
        else:
            # Calculate offset as a timestamp and rewrite its value in config
            offset = config.get('dls_settings').get('api').get('offset')
            offset = datetime.utcnow() - timedelta(days=offset)
            config['dls_settings']['api']['offset'] = int(offset.timestamp())
            return config

        # At this point, it is guaranteed that an exception was raised, which
        # means that it is shutdown time
        Program.initiate_shutdown(shutdown_reason)
        return None
Example #5
0
def sigint_handler(signal_number, stack_frame):
    """
    Handler for SIGINT (Ctrl-C) to gracefully shutdown DuoLogSync
    """

    shutdown_reason = f"received signal {signal_number} (Ctrl-C)"
    Program.initiate_shutdown(shutdown_reason)

    if stack_frame:
        Program.log(f"DuoLogSync: stack frame from Ctrl-C is {stack_frame}",
                    logging.INFO)
Example #6
0
def create_consumer_producer_pair(endpoint, writer, admin, child_account=None):
    """
    Create a pair of Producer-Consumer objects for each endpoint and return a
    list containing the asyncio tasks for running those objects.

    @param endpoint     Log type to create producer/consumer pair for
    @param writer       Object for writing logs to a server
    @param admin        Object from which to get the correct API endpoints
    @param child_account If present, this is being used by MSP and pass appropriate account id

    @return list of asyncio tasks for running the Producer and Consumer objects
    """

    # The format a log should have before being consumed and sent
    log_format = Config.get_log_format()
    log_queue = asyncio.Queue()
    producer = consumer = None

    # Create the right pair of Producer-Consumer objects based on endpoint
    if endpoint == Config.AUTH:
        if Config.account_is_msp():
            producer = AuthlogProducer(admin.json_api_call, log_queue,
                                       child_account_id=child_account,
                                       url_path="/admin/v2/logs/authentication")
        else:
            producer = AuthlogProducer(admin.get_authentication_log, log_queue)
        consumer = AuthlogConsumer(log_format, log_queue, writer, child_account)
    elif endpoint == Config.TELEPHONY:
        if Config.account_is_msp():
            producer = TelephonyProducer(admin.json_api_call, log_queue,
                                         child_account_id=child_account,
                                         url_path='/admin/v1/logs/telephony')
        else:
            producer = TelephonyProducer(admin.get_telephony_log, log_queue)
        consumer = TelephonyConsumer(log_format, log_queue, writer, child_account)
    elif endpoint == Config.ADMIN:
        if Config.account_is_msp():
            producer = AdminactionProducer(admin.json_api_call, log_queue,
                                           child_account_id=child_account,
                                           url_path='/admin/v1/logs/administrator')
        else:
            producer = AdminactionProducer(admin.get_administrator_log, log_queue)
        consumer = AdminactionConsumer(log_format, log_queue, writer, child_account)
    else:
        Program.log(f"{endpoint} is not a recognized endpoint", logging.WARNING)
        del log_queue
        return []

    tasks = [asyncio.ensure_future(producer.produce()),
             asyncio.ensure_future(consumer.consume())]

    return tasks
Example #7
0
    def connection_lost(self, exc):
        shutdown_reason = None

        if exc:
            shutdown_reason = (
                f"UDP connection with host-{self.host} and port-{self.port}"
                f"was closed for the following reason [{exc}]")

        else:
            shutdown_reason = (
                f"UDP connection with host-{self.host} and port-{self.port} "
                "was closed")

        Program.initiate_shutdown(shutdown_reason)
Example #8
0
def get_log_offset(log_type,
                   recover_log_offset,
                   checkpoint_directory,
                   child_account_id=None):
    """
    Retrieve the offset from which logs of log_type should be fetched either by
    using the default offset or by using a timestamp saved in a checkpoint file

    @param log_type             Name of the log for which recovery is occurring
    @param recover_log_offset   Whether checkpoint files should be used to
                                retrieve log offset info
    @param checkpoint_directory Directory containing log offset checkpoint files

    @return the last offset read for a log type based on checkpointing data
    """

    milliseconds_per_second = 1000
    log_offset = Config.get_api_offset()

    # Auth must have timestamp represented in milliseconds, not seconds
    if log_type == Config.AUTH:
        log_offset *= milliseconds_per_second

    # In this case, look for a checkpoint file from which to read the log offset
    if recover_log_offset:
        try:
            checkpoint_file_path = os.path.join(
                checkpoint_directory,
                f"{log_type}_checkpoint_data_" + child_account_id + ".txt")\
                if child_account_id else os.path.join(
                checkpoint_directory,
                f"{log_type}_checkpoint_data.txt")

            # Open the checkpoint file, 'with' statement automatically closes it
            with open(checkpoint_file_path) as checkpoint:
                # Set log_offset equal to the contents of the checkpoint file
                log_offset = json.loads(checkpoint.read())

        # Most likely, the checkpoint file doesn't exist
        except OSError:
            Program.log(f"Could not read checkpoint file for {log_type} logs, "
                        "consuming logs from {log_offset} timestamp")

    return log_offset
Example #9
0
    async def add_logs_to_queue(self, logs):
        """
        If logs is not none, add them to this Writer's queue

        @param logs The logs to be added
        """

        # Important for recovery in the event of a crash
        self.log_offset = Producer.get_log_offset(logs)

        # Authlogs v2 endpoint returns dict response
        if isinstance(logs, dict):
            logs = logs['authlogs']

        Program.log(f"{self.log_type} producer: adding {len(logs)} "
                    "logs to the queue", logging.INFO)

        await self.log_queue.put(logs)
        Program.log(f"{self.log_type} producer: added {len(logs)} "
                    "logs to the queue", logging.INFO)
Example #10
0
def main():
    """
    Kicks off DuoLogSync by setting important variables, creating and running
    a Producer-Consumer pair for each log-type defined in a config file passed
    to the program.
    """

    arg_parser = argparse.ArgumentParser(prog='duologsync',
                                         description="Path to config file")
    arg_parser.add_argument('ConfigPath',
                            metavar='config-path',
                            type=str,
                            help='Config to start application')
    args = arg_parser.parse_args()

    # Handle shutting down the program via Ctrl-C
    signal.signal(signal.SIGINT, sigint_handler)

    # Create a config Dictionary from a YAML file located at args.ConfigPath
    config = Config.create_config(args.ConfigPath)
    Config.set_config(config)

    # Do extra checks for Trust Monitor support
    is_dtm_in_config = check_for_specific_endpoint('trustmonitor', config)
    log_format = Config.get_log_format()
    is_msp = Config.account_is_msp()

    if (is_dtm_in_config and log_format != 'JSON'):
        Program.log(f"DuoLogSync: Trust Monitor endpoint only supports JSON",
                    logging.WARNING)
        return

    if (is_dtm_in_config and is_msp):
        Program.log(
            f"DuoLogSync: Trust Monitor endpoint only supports non-msp",
            logging.WARNING)
        return

    Program.setup_logging(Config.get_log_filepath())

    # Dict of writers (server id: writer) to be used for consumer tasks
    server_to_writer = Writer.create_writers(Config.get_servers())

    # List of Producer/Consumer objects as asyncio tasks to be run
    tasks = create_tasks(server_to_writer)

    # Run the Producers and Consumers
    asyncio.get_event_loop().run_until_complete(asyncio.gather(*tasks))
    asyncio.get_event_loop().close()

    if Program.is_logging_set():
        print(f"DuoLogSync: shutdown successfully. Check "
              f"{Config.get_log_filepath()} for program logs")
Example #11
0
def create_consumer_producer_pair(endpoint, writer, admin):
    """
    Create a pair of Producer-Consumer objects for each endpoint and return a
    list containing the asyncio tasks for running those objects.

    @param endpoint     Log type to create producer/consumer pair for
    @param writer       Object for writing logs to a server
    @param admin        Object from which to get the correct API endpoints

    @return list of asyncio tasks for running the Producer and Consumer objects
    """

    # The format a log should have before being consumed and sent
    log_format = Config.get_log_format()
    log_queue = asyncio.Queue()
    producer = consumer = None

    # Create the right pair of Producer-Consumer objects based on endpoint
    if endpoint == Config.AUTH:
        producer = AuthlogProducer(admin.get_authentication_log, log_queue)
        consumer = AuthlogConsumer(log_format, log_queue, writer)
    elif endpoint == Config.TELEPHONY:
        producer = TelephonyProducer(admin.get_telephony_log, log_queue)
        consumer = TelephonyConsumer(log_format, log_queue, writer)
    elif endpoint == Config.ADMIN:
        producer = AdminactionProducer(admin.get_administrator_log, log_queue)
        consumer = AdminactionConsumer(log_format, log_queue, writer)
    else:
        Program.log(f"{endpoint} is not a recognized endpoint", logging.WARNING)
        del log_queue
        return []

    tasks = [asyncio.ensure_future(producer.produce()),
             asyncio.ensure_future(consumer.consume())]

    return tasks
Example #12
0
    async def create_writer(self, host, port, cert_filepath):
        """
        Wrapper for functions to create TCP or UDP connections.

        @param host             Hostname of the network connection to establish
        @param port             Port of the network connection to establish
        @param cert_filepath    Path to file containing SSL certificate

        @return a 'writer' object for writing data over the connection made
        """

        Program.log(f"DuoLogSync: Opening connection to {host}:{port}",
                    logging.INFO)

        # Message to be logged if an error occurs in this function
        help_message = (f"DuoLogSync: check that host-{host} and port-{port} "
                        "are correct in the config file")
        writer = None

        try:
            if self.protocol == 'UDP':
                writer = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)

            elif self.protocol == 'TCPSSL':
                ssl_context = ssl.create_default_context(
                    ssl.Purpose.SERVER_AUTH, cafile=cert_filepath)

                writer = await Writer.create_tcp_writer(
                    host, port, ssl_context)

            elif self.protocol == 'TCP':
                writer = await Writer.create_tcp_writer(host, port)

        # Failed to open the certificate file
        except FileNotFoundError:
            shutdown_reason = f"{cert_filepath} could not be opened."
            help_message = (
                'DuoLogSync: Make sure the filepath for SSL cert file is '
                'correct.')

        # Couldn't establish a connection within 60 seconds
        except asyncio.TimeoutError:
            shutdown_reason = 'connection to server timed-out after 60 seconds'

        # If an invalid hostname or port number is given or simply failed to
        # connect using the host and port given
        except (gaierror, OSError) as error:
            shutdown_reason = f"{error}"

        # An error did not occur and the writer was successfully created
        else:
            return writer

        Program.initiate_shutdown(shutdown_reason)
        Program.log(help_message, logging.ERROR)
        return None
Example #13
0
async def restless_sleep(duration):
    """
    Wrapper for the asyncio.sleep function to sleep for duration seconds
    but check every second that DuoLogSync is still running. This is
    necessary in the case that the program should be shutting down but
    a producer is in the middle of a 2 minute poll and will not be aware
    of program shutdown until much later.

    @param duration The number of seconds to sleep for
    """

    while duration > 0:
        await asyncio.sleep(1)

        # Poll for program running state
        if Program.is_running():
            duration = duration - 1
            continue

        # Otherwise, program is done running, raise an exception to be caught
        raise ProgramShutdownError
Example #14
0
def create_admin(ikey, skey, host, is_msp=False, proxy_server=None, proxy_port=None):
    """
    Create an Admin object (from the duo_client library) with the given values.
    The Admin object has many functions for using Duo APIs and retrieving logs.

    @param ikey Duo Client ID (Integration Key)
    @param skey Duo Client Secret for proving identity / access (Secret Key)
    @param host URI where data / logs will be fetched from
    @param is_msp Indicates where we are using MSP account for logs retrieval
    @param proxy_server Host/IP of Http Proxy if in use or None
    @param proxy_port Port of Http Proxy if in use or None
    @return a newly created Admin object
    """

    if is_msp:
        admin = duo_client.Accounts(
            ikey=ikey,
            skey=skey,
            host=host,
            user_agent=f"Duo Log Sync/{__version__}"
        )
        Program.log(f"duo_client Account_Admin initialized for ikey: {ikey}, host: {host}",
                    logging.INFO)
    else:
        admin = duo_client.Admin(
            ikey=ikey,
            skey=skey,
            host=host,
            user_agent=f"Duo Log Sync/{__version__}"
        )
        Program.log(f"duo_client Admin initialized for ikey: {ikey}, host: {host}",
                    logging.INFO)

    if proxy_server and proxy_port:
        admin.set_proxy(host=proxy_server, port=proxy_port)
        Program.log(f"duo_client Proxy configured: {proxy_server}:{proxy_port}", logging.INFO)


    return admin
Example #15
0
    def test_is_logging_set(self):
        self.assertEqual(Program.is_logging_set(), False)

        Program._logging_set = True

        self.assertEqual(Program.is_logging_set(), True)
Example #16
0
    def test_initiate_shutdown(self):
        self.assertEqual(Program._running, True)

        Program.initiate_shutdown('test')

        self.assertEqual(Program._running, False)
Example #17
0
    async def produce(self):
        """
        The main function of this class and subclasses. Runs a loop, sleeping
        for the polling duration then making an API call, consuming the logs
        from that API call and saving the offset of the latest log read.
        """

        # Exit when DuoLogSync is shutting down (due to error or Ctrl-C)
        while Program.is_running():
            shutdown_reason = None
            Program.log(
                f"{self.log_type} producer: begin polling for "
                f"{Config.get_api_timeout()} seconds", logging.INFO)

            try:
                # Sleep for api_timeout amount of time, but check for program
                # shutdown every second
                await restless_sleep(Config.get_api_timeout())
                Program.log(f"{self.log_type} producer: fetching logs",
                            logging.INFO)
                api_result = await self.call_log_api()
                if api_result:
                    await self.add_logs_to_queue(self.get_logs(api_result))
                else:
                    Program.log(
                        f"{self.log_type} producer: no new logs available",
                        logging.INFO)

            # Horribly messed up hostname was provided for duoclient host
            except (gaierror, OSError) as error:
                shutdown_reason = f"{self.log_type} producer: [{error}]"
                Program.log('DuoLogSync: check that the duoclient host '
                            'provided in the config file is correct')

            # duo_client throws a RuntimeError if the ikey or skey is invalid
            except RuntimeError as runtime_error:
                shutdown_reason = f"{self.log_type} producer: [{runtime_error}]"
                Program.log('DuoLogSync: check that the duoclient ikey and '
                            'skey in the config file are correct')

            # Shutdown hath been noticed and thus shutdown shall begin
            except ProgramShutdownError:
                break

            if shutdown_reason:
                Program.initiate_shutdown(shutdown_reason)

        # Unblock consumer but putting anything in the shared queue
        await self.log_queue.put([])
        Program.log(f"{self.log_type} producer: shutting down", logging.INFO)
Example #18
0
    async def consume(self):
        """
        Consumer that will consume data from a queue shared with a producer
        object. Data from the queue is then sent over a configured transport
        protocol to respective SIEMs or servers.
        """

        while Program.is_running():
            Program.log(f"{self.log_type} consumer: waiting for logs",
                        logging.INFO)

            # Call unblocks only when there is an element in the queue to get
            logs = await self.log_queue.get()

            # Time to shutdown
            if not Program.is_running():
                continue

            Program.log(
                f"{self.log_type} consumer: received {len(logs)} logs "
                "from producer", logging.INFO)

            # Keep track of the latest log written in the case that a problem
            # occurs in the middle of writing logs
            last_log_written = None
            successful_write = False

            # If we are sending empty [] to unblock consumers, nothing should be written to file
            if logs:
                try:
                    Program.log(f"{self.log_type} consumer: writing logs",
                                logging.INFO)
                    for log in logs:
                        if self.child_account_id:
                            log['child_account_id'] = self.child_account_id
                        await self.writer.write(self.format_log(log))
                        last_log_written = log

                    # All the logs were written successfully
                    successful_write = True

                # Specifically watch out for errno 32 - Broken pipe. This means
                # that the connect established by writer was reset or shutdown.
                except BrokenPipeError as broken_pipe_error:
                    shutdown_reason = f"{broken_pipe_error}"
                    Program.initiate_shutdown(shutdown_reason)
                    Program.log("DuoLogSync: connection to server was reset",
                                logging.WARNING)

                finally:
                    if successful_write:
                        Program.log(
                            f"{self.log_type} consumer: successfully wrote "
                            "all logs", logging.INFO)
                    else:
                        Program.log(
                            f"{self.log_type} consumer: failed to write "
                            "some logs", logging.WARNING)

                    self.log_offset = Producer.get_log_offset(last_log_written)
                    self.update_log_checkpoint(self.log_type, self.log_offset,
                                               self.child_account_id)
            else:
                Program.log(f"{self.log_type} consumer: No logs to write",
                            logging.INFO)

        Program.log(f"{self.log_type} consumer: shutting down", logging.INFO)
Example #19
0
    def test_setup_logging_normal(self):
        filepath = 'logs.txt'

        Program.setup_logging(filepath)

        self.assertEqual(Program._logging_set, True)
Example #20
0
    def test_is_running(self):
        self.assertEqual(Program.is_running(), True)

        Program._running = False

        self.assertEqual(Program.is_running(), False)
Example #21
0
    def test_log_without_logging_set(self, mock_print):
        Program.log('Oh no, logging has not been set!')

        mock_print.assert_called_once()
Example #22
0
    def test_log_with_logging_set(self, mock_log):
        Program._logging_set = True

        Program.log('Everything is A-Ok')

        mock_log.assert_called_once()