def _gather(self): """Protected entry point to peform the gather that returns the time the process took Returns: float: time, in seconds, for which the function ran """ # Make this request sleep if the API throttles requests self._sleep() # Increment the poll count self._poll_count += 1 logs = self._gather_logs() # Make sure there are logs, this can be False if there was an issue polling # of if there are no new logs to be polled if not logs: self._more_to_poll = False LOGGER.error( '[%s] Gather process was not able to poll any logs ' 'on poll #%d', self, self._poll_count) return # Increment the count of logs gathered self._gathered_log_count += len(logs) # Utilize the batcher to send logs to the rule processor self._batcher.send_logs(logs) LOGGER.debug('Updating config last timestamp from %s to %s', self._config.last_timestamp, self._last_timestamp) # Save the config's last timestamp after each function run self._config.last_timestamp = self._last_timestamp
def _evaluate_interval(self): """Get the interval at which this function is executing. This translates an AWS Rate Schedule Expression ('rate(2 hours)') into a second interval """ rate_match = AWS_RATE_RE.match(self._schedule) if not rate_match: raise AppConfigError('Invalid \'rate\' interval value: ' '{}'.format(self._schedule)) value = rate_match.group(2) or rate_match.group(4) unit = rate_match.group(3) or rate_match.group(5).replace('s', '') translate_to_seconds = { 'minute': 60, 'hour': 60 * 60, 'day': 60 * 60 * 24 } interval = int(value) * translate_to_seconds[unit] LOGGER.debug('Evaluated rate interval: %d seconds', interval) # Get the total seconds that this rate evaluates to return interval
def _invoke_successive_app(self): """Invoke a successive app function to handle more logs This is useful when there were more logs to collect than could be accomplished in this execution. Instead of marking the config with 'success' and waiting for the next scheduled execution, this will invoke the lambda again with an 'event' indicating there are more logs to collect. Other scheduled executions will not have an 'event' to allow for this type of override, and will exit when checking the 'self._config.is_running' property. This allows for chained invocations without the worry of duplicated effort or collisions. """ lambda_client = boto3.client('lambda') try: response = lambda_client.invoke( FunctionName=self._config.function_name, InvocationType='Event', Payload=self._config.successive_event, Qualifier=self._config.function_version) except ClientError as err: LOGGER.error( 'An error occurred while invoking a subsequent app function ' '(\'%s:%s\'). Error is: %s', self._config.function_name, self._config.function_version, err.response) raise LOGGER.info( 'Invoking successive apps function \'%s\' with Lambda request ID \'%s\'', self._config.function_name, response['ResponseMetadata']['RequestId'])
def _wrapper(*args, **kwargs): start = time.time() func(*args, **kwargs) total = time.time() - start LOGGER.info('[%s] Function executed in %.4f seconds.', func.__name__, total) return total
def gather(self): """Public method for actual gathering of logs""" # Initialize the app, saving state to 'running' if not self._initialize(): return try: # Add a 50% buffer to the time it took to account for some unforeseen delay and to give # this function enough time to spawn a new invocation if there are more logs to poll while (((self._gather() * self._POLL_BUFFER_MULTIPLIER) + self._sleep_seconds()) < self._remaining_seconds): LOGGER.debug('[%s] More logs to poll: %s', self, self._more_to_poll) self._config.report_remaining_seconds() if not self._more_to_poll: break # Reset the boolean indicating that there is more data to poll. Subclasses should # set this to 'True' within their implementation of the '_gather_logs' function self._more_to_poll = not self._more_to_poll LOGGER.debug( '[%s] Gathered all logs possible for this execution. More logs to poll: ' '%s', self, self._more_to_poll) self._config.report_remaining_seconds() # Finalize, saving state to 'succeeded' self._finalize() finally: # Make sure the config is not left marked as running, which could be problematic if self._config and self._config.is_running: self._config.mark_failure()
def _determine_last_time(self, date_format): """Determine the last time this function was executed and fallback on evaluating the rate value if there is no last timestamp available Returns: int: The unix timestamp for the starting point to fetch logs back to """ if not self.last_timestamp: interval_time = self._evaluate_interval() current_time = int(calendar.timegm(time.gmtime())) time_delta = current_time - interval_time LOGGER.debug( 'Current timestamp: %s seconds. Calculated delta: %s seconds', current_time, time_delta) # Request the date format from the app since some services expect different types # Using init=False will return the class without instantiating it if date_format: self.last_timestamp = datetime.utcfromtimestamp( time_delta).strftime(date_format) else: self.last_timestamp = time_delta LOGGER.info('Starting last timestamp set to: %s', self.last_timestamp) return self.last_timestamp
def _check_http_response(self, response): """Method for checking for a valid HTTP response code Returns: bool: Indicator of whether or not this request was successful """ success = response is not None and (200 <= response.status_code <= 299) if not success: LOGGER.error('[%s] HTTP request failed: [%d] %s', self, response.status_code, response.content) return success
def last_timestamp(self, timestamp): """Set the last timestamp""" if self._last_timestamp == timestamp: LOGGER.debug('Timestamp is unchanged and will not be saved: %s', timestamp) return LOGGER.debug('Setting last timestamp to: %s', timestamp) self._last_timestamp = timestamp self._save_state()
def _sleep(self): """Function to sleep the looping""" # Do not sleep if this is the first poll if self._poll_count == 0: LOGGER.debug('Skipping sleep for first poll') return # Sleep for n seconds so the called API does not return a bad response sleep_for_secs = self._sleep_seconds() LOGGER.debug('[%s] Sleeping for %d seconds...', self, sleep_for_secs) time.sleep(sleep_for_secs)
def _make_get_request(self, full_url, headers, params=None): """Method for returning the json loaded response for this GET request Returns: tuple (bool, dict): False if the was an error performing the request, and the dictionary loaded from the json response """ LOGGER.debug('[%s] Making GET request on poll #%d', self, self._poll_count) # Perform the request and return the response as a dict response = requests.get(full_url, headers=headers, params=params, timeout=self._DEFAULT_REQUEST_TIMEOUT) return self._check_http_response(response), response.json()
def _initialize(self): """Method for performing any startup steps, like setting state to running""" # Perform another safety check to make sure this is not being invoked already if self._config.is_running: LOGGER.error('[%s] App already running', self) return False # Check if this is an invocation spawned from a previous partial execution # Return if the config is marked as 'partial' but the invocation type is wrong if not self._config.is_successive_invocation and self._config.is_partial: LOGGER.error('[%s] App in partial execution state, exiting', self) return False LOGGER.info('[%s] Starting app', self) LOGGER.info('App executing as a successive invocation: %s', self._config.is_successive_invocation) # Validate the auth in the config. This raises an exception upon failure self._config.validate_auth(set(self.required_auth_info())) self._config.set_starting_timestamp(self.date_formatter()) self._last_timestamp = self._config.last_timestamp # Mark this app as running, which updates the parameter store self._config.mark_running() return True
def context(self, context): """Set an additional context dictionary specific to each app""" if self._context == context: LOGGER.debug('App context is unchanged and will not be saved: %s', context) return if not isinstance(context, dict): raise AppStateError( 'Unable to set context: %s. Must be a dictionary', context) LOGGER.debug('Setting context to: %s', context) self._context = context self._save_state()
def _send_logs_to_lambda(self, logs): """Protected method for sending logs to the rule processor lambda function for processing. This performs some size checks before sending. Args: source_function (str): The app function name from which the logs came logs (list): List of the logs that have been gathered """ # Create a payload to be sent to the rule processor that contains the # service these logs were collected from and the list of logs payload = {'Records': [{'stream_alert_app': self._source_function, 'logs': logs}]} payload_json = json.dumps(payload, separators=(',', ':')) if len(payload_json) > self.MAX_LAMBDA_PAYLOAD_SIZE: if len(logs) == 1: LOGGER.error('Log payload size for single log exceeds input limit and will be ' 'dropped (%d > %d max).', len(payload_json), self.MAX_LAMBDA_PAYLOAD_SIZE) return True LOGGER.debug('Log payload size for %d logs exceeds limit and will be ' 'segmented (%d > %d max).', len(logs), len(payload_json), self.MAX_LAMBDA_PAYLOAD_SIZE) return False LOGGER.debug('Sending %d logs to rule processor with payload size %d', len(logs), len(payload_json)) try: response = Batcher.LAMBDA_CLIENT.invoke( FunctionName=self._destination_function, InvocationType='Event', Payload=payload_json, Qualifier='production' ) except ClientError as err: LOGGER.error('An error occurred while sending logs to ' '\'%s:production\'. Error is: %s', self._destination_function, err.response) raise LOGGER.info('Sent %d logs to \'%s\' with Lambda request ID \'%s\'', len(logs), self._destination_function, response['ResponseMetadata']['RequestId']) return True
def send_logs(self, logs): """Public method to send the logs to the rule processor Args: source_function (str): The app function name from which the logs came logs (list): List of the logs that have been gathered """ LOGGER.info('Starting batch send of %d logs to the rule processor', len(logs)) # Try to send all of the logs in one fell swoop if self._send_logs_to_lambda(logs): return # Fall back on segmenting the list of logs into multiple requests # if they could not be sent at once self._segment_and_send(logs) LOGGER.info('Finished batch send of %d logs to the rule processor', len(logs))
def load_config(cls, event, context): """Load the configuration for this app invocation Args: event (dict): The AWS Lambda input event, which is JSON serialized to a dictionary context (LambdaContext): The AWS LambdaContext object, passed in via the handler. Returns: AppConfig: Configuration for the running application """ # Patch out the protected _remaining_ms method to the AWS timing function AppConfig.remaining_ms = context.get_remaining_time_in_millis func_name = context.function_name func_version = context.function_version # Get full parameter names for authentication and state parameters auth_param_name = '_'.join([func_name, cls.AUTH_CONFIG_SUFFIX]) state_param_name = '_'.join([func_name, cls.STATE_CONFIG_SUFFIX]) # Get the loaded parameters and a list of any invalid ones from parameter store params, invalid_params = cls._get_parameters(auth_param_name, state_param_name) # Check to see if the authentication param is in the invalid params list if auth_param_name in invalid_params: raise AppConfigError( 'Could not load authentication parameter required for this ' 'app: {}'.format(auth_param_name)) LOGGER.debug('Retrieved parameters from parameter store: %s', cls._scrub_auth_info(params, auth_param_name)) LOGGER.debug( 'Invalid parameters could not be retrieved from parameter store: %s', invalid_params) # Load the authentication info. This data can vary from service to service auth_config = { key: value.encode('utf-8') if isinstance(value, unicode) else value for key, value in params[auth_param_name].iteritems() } state_config = params.get(state_param_name, {}) return AppConfig(auth_config, state_config, event, func_name, func_version)
def _make_post_request(self, full_url, headers, data, is_json=True): """Method for returning the json loaded response for this POST request Returns: tuple (bool, dict): False if the was an error performing the request, and the dictionary loaded from the json response """ LOGGER.debug('[%s] Making POST request on poll #%d', self, self._poll_count) # Perform the request and return the response as a dict if is_json: response = requests.post(full_url, headers=headers, json=data, timeout=self._DEFAULT_REQUEST_TIMEOUT) else: # if content type is form-encoded, the param is 'data' rather than 'json' response = requests.post(full_url, headers=headers, data=data, timeout=self._DEFAULT_REQUEST_TIMEOUT) return self._check_http_response(response), response.json()
def _segment_and_send(self, logs): """Protected method for segmenting a list of logs into smaller lists so they conform to the input limit of AWS Lambda Args: source_function (str): The app function name from which the logs came logs (list): List of the logs that have been gathered """ log_count = len(logs) LOGGER.debug('Segmenting %d logs into subsets', log_count) segment_size = int(math.ceil(log_count / 2.0)) for index in range(0, log_count, segment_size): subset = logs[index:segment_size + index] # Try to send this current subset to the rule processor # and segment again if they are too large to be sent at once if not self._send_logs_to_lambda(subset): self._segment_and_send(subset) return True
def _get_parameters(cls, *names): """Simple helper function to house the boto3 ssm client get_parameters operations Args: names (list): A list of parameter names to retrieve from the aws ssm parameter store Returns: tuple (dict, list): Dictionary with the load parameter names as keys and the actual parameter (as a dictionary) as the value. The seconary list that is returned contains any invalid parameters that were not loaded """ # Create the ssm boto3 client that will be cached and used throughout this execution # if one does not exist already if AppConfig.SSM_CLIENT is None: boto_config = client.Config(connect_timeout=cls.BOTO_TIMEOUT, read_timeout=cls.BOTO_TIMEOUT) AppConfig.SSM_CLIENT = boto3.client('ssm', config=boto_config) LOGGER.debug('Retrieving values from parameter store with names: %s', ', '.join('\'{}\''.format(name) for name in names)) try: parameters = AppConfig.SSM_CLIENT.get_parameters( Names=list(names), WithDecryption=True) except ClientError as err: joined_names = ', '.join('\'{}\''.format(name) for name in names) raise AppConfigError( 'Could not get parameter with names {}. Error: ' '{}'.format(joined_names, err.response['Error']['Message'])) decoded_params = {} for param in parameters['Parameters']: try: decoded_params[param['Name']] = json.loads(param['Value']) except ValueError: raise AppConfigError( 'Could not load value for parameter with ' 'name \'{}\'. The value is not valid json: ' '\'{}\''.format(param['Name'], param['Value'])) return decoded_params, parameters['InvalidParameters']
def _finalize(self): """Method for performing any final steps, like saving applicable state This function is also used to invoke a new copy of this lambda in the case that there are more logs available to collect. """ if not self._last_timestamp: LOGGER.error( 'Ending last timestamp is 0. This should not happen and is likely ' 'due to the subclass not setting this value.') if self._last_timestamp == self._config.start_last_timestamp: LOGGER.info( 'Ending last timestamp is the same as the beginning last timestamp. ' 'This could occur if there were no logs collected for this execution.' ) LOGGER.info('[%s] App complete; gathered %d logs in %d polls.', self, self._gathered_log_count, self._poll_count) self._config.last_timestamp = self._last_timestamp # If there are more logs to poll, invoke this app function again and mark # the config as 'partial'. Marking the state as 'partial' prevents # scheduled function invocations from running alongside chained invocations. if self._more_to_poll: self._config.mark_partial() self._invoke_successive_app() return self._config.mark_success()
def current_state(self, state): """Set the current state of the execution""" if not getattr(self.States, str(state).upper(), None): LOGGER.error('Current state cannot be saved with value \'%s\'', state) return if self._current_state == state: LOGGER.debug('State is unchanged and will not be saved: %s', state) return LOGGER.debug('Setting current state to: %s', state) self._current_state = state self._save_state()
def mark_success(self): """Helper method to mark the state as 'succeeded'""" LOGGER.debug('Marking current_state as: %s', self.States.SUCCEEDED) self.current_state = self.States.SUCCEEDED
def current_state(self): """Get the current state of the execution""" LOGGER.debug('Getting current_state: %s', self._current_state) return self._current_state
def last_timestamp(self): """Get the last timestamp""" LOGGER.debug('Getting last_timestamp as: %s', self._last_timestamp) return self._last_timestamp
def is_successive_invocation(self): """Check if this invocation is a successive invoke from a previous execution""" is_successive = self._invocation_type == self.Events.SUCCESSIVE_INVOKE LOGGER.debug('Is successive invocation: %s', is_successive) return is_successive
def mark_partial(self): """Helper method to mark the state as 'partial'""" LOGGER.debug('Marking current_state as: %s', self.States.PARTIAL) self.current_state = self.States.PARTIAL
def report_remaining_seconds(self): """Log the remaining seconds""" LOGGER.info('Lambda remaining seconds: %.2f', self.remaining_ms() / 1000.0)
def context(self): """Get an additional context dictionary specific to each app""" LOGGER.debug('Getting context: %s', self._context) return self._context
def mark_running(self): """Helper method to mark the state as 'running'""" LOGGER.debug('Marking current_state as: %s', self.States.RUNNING) self.current_state = self.States.RUNNING
def _wrapper(*args, **kwargs): try: return func(*args, **kwargs) except requests.exceptions.Timeout: LOGGER.exception('[%s] Request timed out', func.__name__) return False, None
def mark_failure(self): """Helper method to mark the state as 'failed'""" LOGGER.debug('Marking current_state as: %s', self.States.FAILED) self.current_state = self.States.FAILED