Ejemplo n.º 1
0
    def _initialize(self):
        """Method for performing any startup steps, like setting state to running"""
        # Perform another safety check to make sure this is not being invoked already
        if self._config.is_running:
            LOGGER.error('App already running for service \'%s\'.',
                         self.type())
            return False

        # Check if this is an invocation spawned from a previous partial execution
        # Return if the config is marked as 'partial' but the invocation type is wrong
        if not self._config.is_successive_invocation and self._config.is_partial:
            LOGGER.error('App in partial execution state for service \'%s\'.',
                         self.type())
            return False

        LOGGER.info('App starting for service \'%s\'.', self.type())

        LOGGER.info('App executing as a successive invocation: %s',
                    self._config.is_successive_invocation)

        # Validate the auth in the config. This raises an exception upon failure
        self._validate_auth()

        self._last_timestamp = self._config.last_timestamp

        # Mark this app as running, which updates the parameter store
        self._config.mark_running()

        return True
Ejemplo n.º 2
0
    def _set_rate_limit_sleep(self):
        """Get the number of seconds we need to sleep until we are clear to continue"""
        # Make sure we have authentication headers
        if not self._auth_headers:
            self._rate_limit_sleep = 0
            LOGGER.error('No authentication headers for service \'%s\'',
                         self.type())
            return

        result, response = self._make_get_request(self._rate_limit_endpoint(),
                                                  self._auth_headers)

        if not result:
            self._rate_limit_sleep = 0
            return

        # Making sure we have a valid response
        if not response:
            LOGGER.error(
                'Response invalid getting rate limit data for service \'%s\'',
                self.type())
            self._rate_limit_sleep = 0
            return

        self._rate_limit_sleep = response.get('data')['X-RateLimit-Reset']
        LOGGER.info('Rate limit sleep set for \'%s\': %d', self.type(),
                    self._rate_limit_sleep)
Ejemplo n.º 3
0
    def _finalize(self):
        """Method for performing any final steps, like saving applicable state

        This function is also used to invoke a new copy of this lambda in the case
        that there are more logs available to collect.
        """
        if not self._last_timestamp:
            LOGGER.error(
                'Ending last timestamp is 0. This should not happen and is likely '
                'due to the subclass not setting this value.')

        if self._last_timestamp == self._config.start_last_timestamp:
            LOGGER.error(
                'Ending last timestamp is the same as the beginning last timestamp. '
                'This could occur if there were no logs collected for this execution.'
            )

        LOGGER.info(
            'App complete for service \'%s\'. Gathered %d logs in %d polls.',
            self.type(), self._gathered_log_count, self._poll_count)

        self._config.last_timestamp = self._last_timestamp

        # If there are more logs to poll, invoke this app function again and mark
        # the config as 'partial'. Marking the state as 'partial' prevents
        # scheduled function invocations from running alongside chained invocations.
        if self._more_to_poll:
            self._config.mark_partial()

            self._invoke_successive_gather()
            return

        self._config.mark_success()
Ejemplo n.º 4
0
    def _get_duo_logs(self, hostname, full_url):
        """Get all logs from the endpoint for this timeframe

        Returns:
            [
                {
                    'timestamp': <int:unix timestamp>,
                    'device': <str:device>,
                    'username': <str:username>,
                    'factor': <str:factor>,
                    'result': <str:result>,
                    'ip': <str:ip address>,
                    'new_enrollment': <bool:if event corresponds to enrollment>,
                    'integration': <str:integration>,
                    'location': {
                        'state': '<str:state>',
                        'city': '<str:city>',
                        'country': '<str:country>'
                    }
                }
            ]
        """
        # Get the last timestamp and add one to it to avoid duplicates
        # Sanity check mintime as unix timestamp, then transform to string
        params = {'mintime': str(int(self._last_timestamp + 1))}

        # Contstruct the headers for this request. Every request must be signed
        headers = self._generate_auth(hostname, params)
        if not headers:
            return False

        try:
            # Make the request to the api, resulting in a bool or dict
            result, response = self._make_get_request(full_url,
                                                      headers=headers,
                                                      params=params)
        except requests.exceptions.ConnectionError:
            LOGGER.exception('Received bad response from duo')
            return False

        if not result:
            return False

        # Duo stores the list of logs in the 'response' key of the response
        logs = response['response']
        if not logs:
            LOGGER.info('No logs in response from duo')
            return False

        # Get the timestamp from the latest event. Duo produces these sequentially
        # so we can just extract the timestamp from the last item in the list
        self._last_timestamp = logs[-1]['timestamp']

        # Check if the max amount of logs was returned with this request. If the value
        # is not the max, then we are done polling logs for this timeframe
        # Setting _more_to_poll to true here will allow the caller to try to poll again
        self._more_to_poll = len(logs) >= self._MAX_RESPONSE_LOGS

        # Return the list of logs to the caller so they can be send to the batcher
        return logs
Ejemplo n.º 5
0
    def _determine_last_time(self):
        """Determine the last time this function was executed and fallback on
        evaluating the rate value if there is no last timestamp available

        Returns:
            int: The unix timestamp for the starting point to fetch logs back to
        """
        if not self.last_timestamp:
            interval_time = self.evaluate_interval()
            current_time = int(calendar.timegm(time.gmtime()))
            time_delta = current_time - interval_time
            LOGGER.debug(
                'Current timestamp: %s seconds. Calculated delta: %s seconds',
                current_time, time_delta)

            # Request the date format from the app since some services expect different types
            # Using init=False will return the class without instantiating it
            date_format = StreamAlertApp.get_app(self,
                                                 init=False).date_formatter()
            if date_format:
                self.last_timestamp = datetime.utcfromtimestamp(
                    time_delta).strftime(date_format)
            else:
                self.last_timestamp = time_delta

        LOGGER.info('Starting last timestamp set to: %s', self.last_timestamp)

        return self.last_timestamp
Ejemplo n.º 6
0
    def _invoke_successive_gather(self):
        """Invoke a successive app function to handle more logs

        This is useful when there were more logs to collect than could be accomplished
        in this execution. Instead of marking the config with 'success' and waiting
        for the next scheduled execution, this will invoke the lambda again with an
        'event' indicating there are more logs to collect. Other scheduled executions
        will not have an 'event' to allow for this type of override, and will exit
        when checking the 'self._config.is_running' property. This allows for chained
        invocations without the worry of duplicated effort or collisions.
        """
        try:
            lambda_client = boto3.client('lambda',
                                         region_name=self._config['region'])
            response = lambda_client.invoke(
                FunctionName=self._config['function_name'],
                InvocationType='Event',
                Payload=json.dumps(
                    {'invocation_type':
                     self._config.Events.SUCCESSIVE_INVOKE}),
                Qualifier=self._config['qualifier'])
        except ClientError as err:
            LOGGER.error(
                'An error occurred while invoking a subsequent app function '
                '(\'%s:%s\'). Error is: %s', self._config['function_name'],
                self._config['qualifier'], err.response)
            raise

        LOGGER.info(
            'Invoking successive apps function \'%s\' with Lambda request ID \'%s\'',
            self._config['function_name'],
            response['ResponseMetadata']['RequestId'])
Ejemplo n.º 7
0
    def _gather_logs(self):
        """Gather the G Suite Admin Report logs for this application type

        Returns:
            bool or list: If the execution fails for some reason, return False.
                Otherwise, return a list of activies for this application type.
        """
        if not self._create_service():
            return False

        # Cache the last event timestamp so it can be used for future requests
        if not self._next_page_token:
            self._last_event_timestamp = self._last_timestamp

        LOGGER.debug('Querying activities since %s for %s',
                     self._last_event_timestamp, self.type())
        LOGGER.debug('Using next page token: %s', self._next_page_token)

        activities_list = self._activities_service.list(
            userKey='all',
            applicationName=self._type(),
            startTime=self._last_event_timestamp,
            pageToken=self._next_page_token)

        try:
            results = activities_list.execute()
        except self._GOOGLE_API_EXCEPTIONS:
            LOGGER.exception('Failed to execute activities listing for %s',
                             self.type())
            return False

        if not results:
            LOGGER.error(
                'No results received from the G Suite API request for %s',
                self.type())
            return False

        activities = results.get('items', [])
        if not activities:
            LOGGER.info('No logs in response from G Suite API request for %s',
                        self.type())
            return False

        # The activity api returns logs in reverse chronological order, for some reason, and
        # therefore the newest log will be first in the list. This should only be updated
        # once during the first poll
        if not self._next_page_token:
            self._last_timestamp = activities[0]['id']['time']
            LOGGER.debug('Caching last timestamp: %s', self._last_timestamp)

        self._next_page_token = results.get('nextPageToken')
        self._more_to_poll = bool(self._next_page_token)

        return activities
Ejemplo n.º 8
0
    def _finalize(self):
        """Method for performing any final steps, like saving applicable state"""
        self._config.mark_success()

        if not self._last_timestamp:
            LOGGER.error('Ending last timestamp is 0. This should not happen and is likely '
                         'due to the subclass not setting this value.')

        if self._last_timestamp == self._config.start_last_timestamp:
            LOGGER.error('Ending last timestamp is the same as the beginning last timestamp')

        LOGGER.info('App complete for service \'%s\'. Gathered %d logs in %d polls.',
                    self.type(), self._gathered_log_count, self._poll_count)

        self._config.last_timestamp = self._last_timestamp
Ejemplo n.º 9
0
    def send_logs(self, source_function, logs):
        """Public method to send the logs to the rule processor

        Args:
            source_function (str): The app function name from which the logs came
            logs (list): List of the logs that have been gathered
        """
        LOGGER.info('Starting batch send of %d logs to the rule processor',
                    len(logs))

        # Try to send all of the logs in one fell swoop
        if self._send_logs_to_stream_alert(source_function, logs):
            return

        # Fall back on segmenting the list of logs into multiple requests
        # if they could not be sent at once
        return self._segment_and_send(source_function, logs)
Ejemplo n.º 10
0
    def _gather(self):
        """Protected entry point for the beginning of polling"""

        # Make this request sleep if the API throttles requests
        self._sleep()

        def do_gather():
            """Perform the gather using this scoped method so we can time it"""
            # Increment the poll count
            self._poll_count += 1

            logs = self._gather_logs()

            # Make sure there are logs, this can be False if there was an issue polling
            # of if there are no new logs to be polled
            if not logs:
                self._more_to_poll = False
                LOGGER.error(
                    'Gather process for service \'%s\' was not able to poll any logs '
                    'on poll #%d', self.type(), self._poll_count)
                return

            # Increment the count of logs gathered
            self._gathered_log_count += len(logs)

            # Utilize the batcher to send logs to the rule processor
            self._batcher.send_logs(self._config['function_name'], logs)

            LOGGER.debug('Updating config last timestamp from %s to %s',
                         self._config.last_timestamp, self._last_timestamp)

            # Save the config's last timestamp after each function run
            self._config.last_timestamp = self._last_timestamp

        # Use timeit to track how long one poll takes, and cast to a decimal.
        # Use decimal since these floating point values can be very small and the
        # builtin float uses scientific notation when handling very small values
        exec_time = Decimal(timeit(do_gather, number=1))

        LOGGER.info('Gather process for \'%s\' executed in %f seconds.',
                    self.type(), exec_time)

        # Add a 50% buffer to the time it took to account for some unforeseen delay and to give
        # this function enough time to spawn a new invocation if there are more logs to poll
        # Cast this back to float so general arithemtic works
        return float(exec_time * Decimal(self._POLL_BUFFER_MULTIPLIER))
Ejemplo n.º 11
0
    def _send_logs_to_stream_alert(self, source_function, logs):
        """Protected method for sending logs to the rule processor lambda
        function for processing. This performs some size checks before sending.

        Args:
            source_function (str): The app function name from which the logs came
            logs (list): List of the logs that have been gathered
        """
        # Create a payload to be sent to the rule processor that contains the
        # service these logs were collected from and the list of logs
        payload = {
            'Records': [{
                'stream_alert_app': source_function,
                'logs': logs
            }]
        }
        payload_json = json.dumps(payload, separators=(',', ':'))
        if len(payload_json) > MAX_LAMBDA_PAYLOAD_SIZE:
            LOGGER.debug(
                'Log payload size for %d logs exceeds limit and will be '
                'segmented (%d > %d max).', len(logs), len(payload_json),
                MAX_LAMBDA_PAYLOAD_SIZE)
            return False

        LOGGER.debug('Sending %d logs to rule processor with payload size %d',
                     len(logs), len(payload_json))

        try:
            response = Batcher.LAMBDA_CLIENT.invoke(
                FunctionName=self.rp_function,
                InvocationType='Event',
                Payload=payload_json,
                Qualifier='production')

        except ClientError as err:
            LOGGER.error(
                'An error occurred while sending logs to '
                '\'%s:production\'. Error is: %s', self.rp_function,
                err.response)
            raise

        LOGGER.info('Sent %d logs to \'%s\' with Lambda request ID \'%s\'',
                    len(logs), self.rp_function,
                    response['ResponseMetadata']['RequestId'])

        return True
Ejemplo n.º 12
0
    def _initialize(self):
        """Method for performing any startup steps, like setting state to running"""
        # Perform another safety check to make sure this is not being invoked already
        if self._config.is_running:
            LOGGER.error('App already running for service \'%s\'.', self.type())
            return False

        LOGGER.info('App starting for service \'%s\'.', self.type())

        # Validate the auth in the config. This raises an exception upon failure
        self._validate_auth()

        self._last_timestamp = self._config.last_timestamp

        # Mark this app as running, which updates the parameter store
        self._config.mark_running()

        return True
Ejemplo n.º 13
0
    def _determine_last_time(self):
        """Determine the last time this function was executed and fallback on
        evaluating the rate value if there is no last timestamp available

        Returns:
            int: The unix timestamp for the starting point to fetch logs back to
        """
        if not self.last_timestamp:
            interval_time = self.evaluate_interval()
            current_time = time.mktime(time.gmtime())
            LOGGER.debug('Current timestamp: %s seconds', current_time)

            self.last_timestamp = current_time - interval_time

        LOGGER.info('Starting last timestamp set to: %d seconds',
                    self.last_timestamp)

        return self.last_timestamp
Ejemplo n.º 14
0
    def _gather_logs(self):
        """Gather the Box Admin Events

        The ideal way to do this would be to use the boxsdk.events.Events class and the
        `get_events` method to retrieve these events. However, this method does allow you
        to pass keyword arguments (such as params) which are needed for specifying the
        'created_after' parameter.

        Returns:
            bool or list: If the execution fails for some reason, return False.
                Otherwise, return a list of box admin event entries.
        """
        if not self._create_client():
            LOGGER.error('Could not create box client for %s', self.type())
            return False

        result, response = self._make_request()

        # If the result is False, errors would be previously logged up
        # the stack before this, so just return False
        if not result:
            return False

        if not response:
            LOGGER.error('No results received from the Box API request for %s',
                         self.type())
            return False

        self._more_to_poll = int(
            response['chunk_size']) >= self._MAX_CHUNK_SIZE

        events = response.get('entries', [])
        if not events:
            LOGGER.info(
                'No events in response from the Box API request for %s',
                self.type())
            return False

        self._next_stream_position = response['next_stream_position']

        self._last_timestamp = events[-1]['created_at']

        return events
Ejemplo n.º 15
0
    def gather(self):
        """Public method for actual gathering of logs"""
        # Initialize, saving state to 'running'
        if not self._initialize():
            return

        while self._gather() + self._sleep_seconds(
        ) < self._config.remaining_ms() / 1000.0:
            LOGGER.debug('More logs to poll for \'%s\': %s', self.type(),
                         self._more_to_poll)
            LOGGER.info('Lambda remaining seconds: %.2f',
                        self._config.remaining_ms() / 1000.0)
            if not self._more_to_poll:
                break

            # Reset the boolean indicating that there is more data to poll. Subclasses should
            # set this to 'True' within their implementation of the '_gather_logs' function
            self._more_to_poll = not self._more_to_poll

        # Finalize, saving state to 'succeeded'
        self._finalize()
Ejemplo n.º 16
0
    def _gather_logs(self):
        """Gather the G Suite Admin Report logs for this application type

        Returns:
            bool or list: If the execution fails for some reason, return False.
                Otherwise, return a list of activies for this application type.
        """
        if not self._create_service():
            return False

        activities_list = self._activities_service.list(
            userKey='all',
            applicationName=self._type(),
            startTime=self._last_timestamp,
            pageToken=self._next_page_token if self._next_page_token else None)

        try:
            results = activities_list.execute()
        except errors.HttpError:
            LOGGER.exception('Failed to execute activities listing')
            return False

        if not results:
            LOGGER.error(
                'No results received from the G Suite API request for %s',
                self.type())
            return False

        self._next_page_token = results.get('nextPageToken')
        self._more_to_poll = bool(self._next_page_token)

        activities = results.get('items', [])
        if not activities:
            LOGGER.info('No logs in response from G Suite API request for %s',
                        self.type())
            return False

        self._last_timestamp = activities[-1]['id']['time']

        return activities
Ejemplo n.º 17
0
    def _get_onelogin_events(self):
        """Get all events from the endpoint for this timeframe

        Returns:
            [
                {
                    'id': <int:id>,
                    'created_at': <str:created_at>,
                    'account_id': <int:account_id>,
                    'user_id': <int:user_id>,
                    'event_type_id': <int:event_type_id>,
                    'notes': <str:notes>,
                    'ipaddr': <str:ipaddr>,
                    'actor_user_id': <int:actor_user_id>,
                    'assuming_acting_user_id': null,
                    'role_id': <int:role_id>,
                    'app_id': <int:app_id>,
                    'group_id': <int:group_id>,
                    'otp_device_id': <int:otp_device_id>,
                    'policy_id': <int:policy_id>,
                    'actor_system': <str:actor_system>,
                    'custom_message': <str:custom_message>,
                    'role_name': <str:role_name>,
                    'app_name': <str:app_name>,
                    'group_name': <str:group_name>,
                    'actor_user_name': <str:actor_user_name>,
                    'user_name': <str:user_name>,
                    'policy_name': <str:policy_name>,
                    'otp_device_name': <str:otp_device_name>,
                    'operation_name': <str:operation_name>,
                    'directory_sync_run_id': <int:directory_sync_run_id>,
                    'directory_id': <int:directory_id>,
                    'resolution': <str:resolution>,
                    'client_id': <int:client_id>,
                    'resource_type_id': <int:resource_type_id>,
                    'error_description': <str:error_description>
                }
            ]
        """
        # Make sure we have authentication headers
        if not self._auth_headers:
            LOGGER.error('No authentication headers for service \'%s\'',
                         self.type())
            return False

        # Are we just getting events or getting paginated events?
        if self._next_page_url:
            params = None
            request_url = self._next_page_url
        else:
            params = {'since': self._last_timestamp}
            request_url = self._events_endpoint()

        result, response = self._make_get_request(request_url,
                                                  self._auth_headers, params)

        if not result:
            # If we hit the rate limit, update the sleep time
            if response and response.get('status'):
                r_status = response.get('status')
                if r_status['code'] == 400 and r_status[
                        'message'] == 'rate_limit_exceeded':
                    self._set_rate_limit_sleep()

            return False

        # Fail if response is invalid
        if not response:
            LOGGER.error('Response is invalid for service \'%s\'', self.type())
            return False

        # Set pagination link, if there is any
        self._next_page_url = response['pagination']['next_link']
        self._more_to_poll = bool(self._next_page_url)

        # Adjust the last seen event, if the events list is not empty
        if not response['data']:
            LOGGER.info('Empty list of events for service \'%s\'', self.type())
            return False

        self._last_timestamp = response['data'][-1]['created_at']

        # Return the list of events to the caller so they can be send to the batcher
        return response['data']
Ejemplo n.º 18
0
 def report_remaining_seconds(self):
     """Log the remaining seconds"""
     LOGGER.info('Lambda remaining seconds: %.2f',
                 self.remaining_ms() / 1000.0)