def _make_get_request(): # To use closure here is to make backoff logic patchable and testable. try: # Log GET request URL for debugging purpose, especially useful when # debugging query syntax LOGGER.debug('URL of GET request is %s', full_url) resp = requests.get(full_url, headers=headers, params=params, timeout=self._TIMEOUT) # Return false if resp contains non-200 status code. if not self._validate_status_code(resp): return False, None # When querying list of api versions and log files, Salesforce responses # json content. return True, resp.json() except requests.exceptions.Timeout: LOGGER.exception( 'Request timed out for when sending get request to %s', full_url) return False, None except ValueError: # When fetch log events, Salesforce returns raw data in csv format, not json return True, resp.text.encode('utf-8')
def _determine_last_time(self): """Determine the last time this function was executed and fallback on evaluating the rate value if there is no last timestamp available Returns: int: The unix timestamp for the starting point to fetch logs back to """ if not self.last_timestamp: interval_time = self.evaluate_interval() current_time = int(calendar.timegm(time.gmtime())) time_delta = current_time - interval_time LOGGER.debug( 'Current timestamp: %s seconds. Calculated delta: %s seconds', current_time, time_delta) # Request the date format from the app since some services expect different types # Using init=False will return the class without instantiating it date_format = StreamAlertApp.get_app(self, init=False).date_formatter() if date_format: self.last_timestamp = datetime.utcfromtimestamp( time_delta).strftime(date_format) else: self.last_timestamp = time_delta LOGGER.info('Starting last timestamp set to: %s', self.last_timestamp) return self.last_timestamp
def evaluate_interval(self): """Get the interval at which this function is executing. This translates an AWS Rate Schedule Expression ('rate(2 hours)') into a second interval """ if 'interval' not in self: raise AppIntegrationConfigError( 'The \'interval\' value is not defined in the config') rate_match = AWS_RATE_RE.match(self['interval']) if not rate_match: raise AppIntegrationConfigError('Invalid \'rate\' interval value: ' '{}'.format(self['interval'])) value = rate_match.group(2) or rate_match.group(4) unit = rate_match.group(3) or rate_match.group(5).replace('s', '') translate_to_seconds = { 'minute': 60, 'hour': 60 * 60, 'day': 60 * 60 * 24 } interval = int(value) * translate_to_seconds[unit] LOGGER.debug('Evaluated rate interval: %d seconds', interval) # Get the total seconds that this rate evaluates to return interval
def do_gather(): """Perform the gather using this scoped method so we can time it""" # Increment the poll count self._poll_count += 1 logs = self._gather_logs() # Make sure there are logs, this can be False if there was an issue polling # of if there are no new logs to be polled if not logs: self._more_to_poll = False LOGGER.error( 'Gather process for service \'%s\' was not able to poll any logs ' 'on poll #%d', self.type(), self._poll_count) return # Increment the count of logs gathered self._gathered_log_count += len(logs) # Utilize the batcher to send logs to the rule processor self._batcher.send_logs(self._config['function_name'], logs) LOGGER.debug('Updating config last timestamp from %s to %s', self._config.last_timestamp, self._last_timestamp) # Save the config's last timestamp after each function run self._config.last_timestamp = self._last_timestamp
def _get_parameters(names): """Simple helper function to house the boto3 ssm client get_parameters operations Args: names (list): A list of parameter names to retrieve from the aws ssm parameter store Returns: tuple (dict, list): Dictionary with the load parameter names as keys and the actual parameter (as a dictionary) as the value. The seconary list that is returned contains any invalid parameters that were not loaded """ LOGGER.debug('Retrieving values from parameter store with names: %s', ', '.join('\'{}\''.format(name) for name in names)) try: parameters = AppConfig.SSM_CLIENT.get_parameters( Names=names, WithDecryption=True) except ClientError as err: joined_names = ', '.join('\'{}\''.format(name) for name in names) raise AppIntegrationConfigError( 'Could not get parameter with names {}. Error: ' '{}'.format(joined_names, err.response['Error']['Message'])) decoded_params = {} for param in parameters['Parameters']: try: decoded_params[param['Name']] = json.loads(param['Value']) except ValueError: raise AppIntegrationConfigError( 'Could not load value for parameter with ' 'name \'{}\'. The value is not valid json: ' '\'{}\''.format(param['Name'], param['Value'])) return decoded_params, parameters['InvalidParameters']
def gather(self): """Public method for actual gathering of logs""" # Initialize, saving state to 'running' if not self._initialize(): return while ( self._gather() + self._sleep_seconds() < (self._config.remaining_ms() / 1000.0) - self._EOF_SECONDS_BUFFER): LOGGER.debug('More logs to poll for \'%s\': %s', self.type(), self._more_to_poll) self._config.report_remaining_seconds() if not self._more_to_poll: break # Reset the boolean indicating that there is more data to poll. Subclasses should # set this to 'True' within their implementation of the '_gather_logs' function self._more_to_poll = not self._more_to_poll LOGGER.debug( 'Gathered all logs possible for this execution. More logs to poll ' 'for \'%s\': %s', self.type(), self._more_to_poll) self._config.report_remaining_seconds() # Finalize, saving state to 'succeeded' self._finalize()
def is_successive_invocation(self): """Check if this invocation is a successive invoke from a previous execution""" is_successive = self._event.get( 'invocation_type') == self.Events.SUCCESSIVE_INVOKE LOGGER.debug('Is successive invocation: %s', is_successive) return is_successive
def _create_service(self): """GSuite requests must be signed with the keyfile Returns: bool: True if the Google API discovery service was successfully established or False if any errors occurred during the creation of the Google discovery service, """ if self._activities_service: LOGGER.debug('Service already instantiated for %s', self.type()) return True creds = self._load_credentials(self._config.auth['keyfile']) if not creds: return False try: resource = discovery.build('admin', 'reports_v1', credentials=creds) except errors.Error: LOGGER.exception('Failed to build discovery service for %s', self.type()) return False # The google discovery service 'Resource' class that is returned by # 'discovery.build' dynamically loads methods/attributes, so pylint will complain # about no 'activities' member existing without the below pylint comment self._activities_service = resource.activities() # pylint: disable=no-member return True
def _sleep(self): """Function to sleep the looping""" # Do not sleep if this is the first poll if self._poll_count == 0: LOGGER.debug('Skipping sleep for first poll') return # Sleep for n seconds so the called API does not return a bad response sleep_for_secs = self._sleep_seconds() LOGGER.debug('Sleeping \'%s\' app for %d seconds...', self.type(), sleep_for_secs) time.sleep(sleep_for_secs)
def _make_request(self): """Make the request using the Box client The inner function of `_perform_request` is used to handle a single retry in the event of a ConnectionError. If this fails twice, the function will return Returns: dict: Response from Box (boxsdk.session.box_session.BoxResponse) that is json loaded into a dictionary. """ # Create the parameters for this request, 100 is the max value for limit params = { 'limit': self._MAX_CHUNK_SIZE, 'stream_type': EnterpriseEventsStreamType.ADMIN_LOGS, } # From Box's docs: Box responds to the created_before and created_after # parameters only if the stream_position parameter is not included. if self._next_stream_position: params['stream_position'] = self._next_stream_position else: params['created_after'] = self._last_timestamp LOGGER.debug('Requesting events for %s', self.type()) def _perform_request(allow_retry=True): try: # Get the events using a make_request call with the box api. This is to # support custom parameters such as 'created_after' and 'created_before' box_response = self._client.make_request( 'GET', self._client.get_url('events'), params=params, timeout=self._DEFAULT_REQUEST_TIMEOUT) except BoxException: LOGGER.exception('Failed to get events for %s', self.type()) return False, None # Return a tuple to conform to return value of safe_timeout except ConnectionError: # In testing, the requests connection seemed to get reset for no # obvious reason, and a simple retry once works fine so catch it # and retry once, but after that return False LOGGER.exception( 'Bad response received from host, will retry once') if allow_retry: return _perform_request(allow_retry=False) return False, None # Return a tuple to conform to return value of safe_timeout # Return a successful status and the JSON from the box response # Return a tuple to conform to return value of safe_timeout return True, box_response.json() return _perform_request()
def _make_post_request(self, full_url, headers, data): """Method for returning the json loaded response for this POST request Returns: tuple (bool, dict): False if the was an error performing the request, and the dictionary loaded from the json response """ LOGGER.debug('Making POST request for service \'%s\' on poll #%d', self.type(), self._poll_count) # Perform the request and return the response as a dict response = requests.post(full_url, headers=headers, json=data) return self._check_http_response(response), response.json()
def _request_token(self): """Request OAuth token from salesforce Meanwhile, it will also get instance url which will be used in future requests. The instance url identifies the Salesforce instance to which API calls should be sent. Returns: bool: Returns True if update auth headers and instance url successfully. """ headers = {'Content-Type': 'application/x-www-form-urlencoded'} # required credentials when request for a token. data = { 'grant_type': 'password', 'client_id': self._config.auth['client_id'], 'client_secret': self._config.auth['client_secret'], 'username': self._config.auth['username'], 'password': '******'.format(self._config.auth['password'], self._config.auth['security_token']), 'response_type': 'code', 'redirect_uri': self._SALESFORCE_TOKEN_URL } success, response = self._make_post_request(self._SALESFORCE_TOKEN_URL, headers, data, False) if not (success and response): return False if not (response.get('access_token') and response.get('instance_url')): LOGGER.error( 'Response invalid generating headers for service \'%s\'', self._type()) return False bearer = 'Bearer {}'.format(response.get('access_token')) self._auth_headers = { 'Content-Type': 'application/json', 'Authorization': bearer } self._instance_url = response.get('instance_url') LOGGER.debug('Successfully obtain OAuth token and instance URL') return True
def _make_get_request(self, full_url, headers, params=None): """Method for returning the json loaded response for this GET request Returns: tuple (bool, dict): False if the was an error performing the request, and the dictionary loaded from the json response """ LOGGER.debug('Making GET request for service \'%s\' on poll #%d', self.type(), self._poll_count) # Perform the request and return the response as a dict response = requests.get(full_url, headers=headers, params=params, timeout=self._DEFAULT_REQUEST_TIMEOUT) return self._check_http_response(response), response.json()
def _send_logs_to_stream_alert(self, source_function, logs): """Protected method for sending logs to the rule processor lambda function for processing. This performs some size checks before sending. Args: source_function (str): The app function name from which the logs came logs (list): List of the logs that have been gathered """ # Create a payload to be sent to the rule processor that contains the # service these logs were collected from and the list of logs payload = { 'Records': [{ 'stream_alert_app': source_function, 'logs': logs }] } payload_json = json.dumps(payload, separators=(',', ':')) if len(payload_json) > MAX_LAMBDA_PAYLOAD_SIZE: LOGGER.debug( 'Log payload size for %d logs exceeds limit and will be ' 'segmented (%d > %d max).', len(logs), len(payload_json), MAX_LAMBDA_PAYLOAD_SIZE) return False LOGGER.debug('Sending %d logs to rule processor with payload size %d', len(logs), len(payload_json)) try: response = Batcher.LAMBDA_CLIENT.invoke( FunctionName=self.rp_function, InvocationType='Event', Payload=payload_json, Qualifier='production') except ClientError as err: LOGGER.error( 'An error occurred while sending logs to ' '\'%s:production\'. Error is: %s', self.rp_function, err.response) raise LOGGER.info('Sent %d logs to \'%s\' with Lambda request ID \'%s\'', len(logs), self.rp_function, response['ResponseMetadata']['RequestId']) return True
def _determine_last_time(self): """Determine the last time this function was executed and fallback on evaluating the rate value if there is no last timestamp available Returns: int: The unix timestamp for the starting point to fetch logs back to """ if not self.last_timestamp: interval_time = self.evaluate_interval() current_time = time.mktime(time.gmtime()) LOGGER.debug('Current timestamp: %s seconds', current_time) self.last_timestamp = current_time - interval_time LOGGER.info('Starting last timestamp set to: %d seconds', self.last_timestamp) return self.last_timestamp
def _create_client(self): """Box requests must be signed with a JWT keyfile Returns: bool: True if the Box client was successfully created or False if any errors occurred during the creation of the client """ if self._client: LOGGER.debug('Client already instantiated for %s', self.type()) return True auth = self._load_auth(self._config.auth['keyfile']) if not auth: return False self._client = Client(auth) return bool(self._client)
def _segment_and_send(self, source_function, logs): """Protected method for segmenting a list of logs into smaller lists so they conform to the input limit of AWS Lambda Args: source_function (str): The app function name from which the logs came logs (list): List of the logs that have been gathered """ log_count = len(logs) LOGGER.debug('Segmenting %d logs into subsets', log_count) segment_size = int(math.ceil(log_count / 2.0)) for index in range(0, log_count, segment_size): subset = logs[index:segment_size + index] # Try to send this current subset to the rule processor # and segment again if they are too large to be sent at once if not self._send_logs_to_stream_alert(source_function, subset): self._segment_and_send(source_function, subset) return True
def _make_post_request(self, full_url, headers, data, is_json=True): """Method for returning the json loaded response for this POST request Returns: tuple (bool, dict): False if the was an error performing the request, and the dictionary loaded from the json response """ LOGGER.debug('Making POST request for service \'%s\' on poll #%d', self.type(), self._poll_count) # Perform the request and return the response as a dict if is_json: response = requests.post(full_url, headers=headers, json=data, timeout=self._DEFAULT_REQUEST_TIMEOUT) else: # if content type is form-encoded, the param is 'data' rather than 'json' response = requests.post(full_url, headers=headers, data=data, timeout=self._DEFAULT_REQUEST_TIMEOUT) return self._check_http_response(response), response.json()
def _get_latest_api_version(self): """GET request to fetch supported API versions and find the latest API version The example of response json body: [ { "version": "20.0", "label": "Winter '11", "url": "/services/data/v20.0" }, { "version": "21.0", "label": "Spring '11", "url": "/services/data/v21.0" }, { "version": "26.0", "label": "Winter '13", "url": "/services/data/v26.0" } ] Returns: bool: Return True if get latest api version successfully. """ url = '{}/services/data/'.format(self._instance_url) success, response = self._make_get_request(url, self._auth_headers) if not (success and response): LOGGER.error('Failed to fetch lastest api version') return False versions = [float(version.get('version', 0)) for version in response] if versions: self._latest_api_version = str(sorted(versions)[-1]) if self._latest_api_version == '0.0': LOGGER.error('Failed to obtain latest API version') return False LOGGER.debug('Successfully obtain latest API version %s', self._latest_api_version) return True
def gather(self): """Public method for actual gathering of logs""" # Initialize, saving state to 'running' if not self._initialize(): return while self._gather() + self._sleep_seconds( ) < self._config.remaining_ms() / 1000.0: LOGGER.debug('More logs to poll for \'%s\': %s', self.type(), self._more_to_poll) LOGGER.info('Lambda remaining seconds: %.2f', self._config.remaining_ms() / 1000.0) if not self._more_to_poll: break # Reset the boolean indicating that there is more data to poll. Subclasses should # set this to 'True' within their implementation of the '_gather_logs' function self._more_to_poll = not self._more_to_poll # Finalize, saving state to 'succeeded' self._finalize()
def _gather(self): """Protected entry point for the beginning of polling""" # Make this request sleep if the API throttles requests self._sleep() def do_gather(): """Perform the gather using this scoped method so we can time it""" logs = self._gather_logs() # Make sure there are logs, this can be False if there was an issue polling # of if there are no new logs to be polled if not logs: LOGGER.error('Gather process for service \'%s\' was not able to poll any logs', self.type()) return # Increment the count of logs gathered self._gathered_log_count += len(logs) # Utilize the batcher to send logs to the rule processor self._batcher.send_logs(self._config['function_name'], logs) LOGGER.debug('Updating config last timestamp from %d to %d', self._config.last_timestamp, self._last_timestamp) # Save the config's last timestamp after each function run self._config.last_timestamp = self._last_timestamp self._poll_count += 1 # Use timeit to track how long one poll takes, and cast to a decimal. # Use decimal since these floating point values can be very small and the # builtin float uses scientific notation when handling very small values exec_time = Decimal(timeit(do_gather, number=1)) LOGGER.debug('Gather process for \'%s\' executed in %f seconds.', self.type(), exec_time) # Add a 20% buffer to the time it too to account for some unforeseen delay # Cast this back to float so general arithemtic works return float(exec_time * Decimal(self._POLL_BUFFER_MULTIPLIER))
def _gather_logs(self): """Gather the G Suite Admin Report logs for this application type Returns: bool or list: If the execution fails for some reason, return False. Otherwise, return a list of activies for this application type. """ if not self._create_service(): return False # Cache the last event timestamp so it can be used for future requests if not self._next_page_token: self._last_event_timestamp = self._last_timestamp LOGGER.debug('Querying activities since %s for %s', self._last_event_timestamp, self.type()) LOGGER.debug('Using next page token: %s', self._next_page_token) activities_list = self._activities_service.list( userKey='all', applicationName=self._type(), startTime=self._last_event_timestamp, pageToken=self._next_page_token) try: results = activities_list.execute() except self._GOOGLE_API_EXCEPTIONS: LOGGER.exception('Failed to execute activities listing for %s', self.type()) return False if not results: LOGGER.error( 'No results received from the G Suite API request for %s', self.type()) return False activities = results.get('items', []) if not activities: LOGGER.info('No logs in response from G Suite API request for %s', self.type()) return False # The activity api returns logs in reverse chronological order, for some reason, and # therefore the newest log will be first in the list. This should only be updated # once during the first poll if not self._next_page_token: self._last_timestamp = activities[0]['id']['time'] LOGGER.debug('Caching last timestamp: %s', self._last_timestamp) self._next_page_token = results.get('nextPageToken') self._more_to_poll = bool(self._next_page_token) return activities
def mark_partial(self): """Helper method to mark the state as 'partial'""" LOGGER.debug('Marking current_state as: %s', self.States.PARTIAL) self[self._STATE_KEY] = self.States.PARTIAL
def load_config(cls, context, event): """Load the configuration for this app invocation Args: context (LambdaContext): The AWS LambdaContext object, passed in via the handler. Returns: AppConfig: Subclassed dictionary with the below structure that contains all of the methods for configuration validation, updating, saving, etc: { 'type': <type>, 'cluster': <cluster>, 'prefix': <prefix>, 'app_name': <app_name>, 'interval': <rate_interval>, 'region': <aws_region>, 'account_id': <aws_account_id>, 'function_name': <function_name>, 'qualifier': <qualifier>, 'last_timestamp': <time>, 'current_state': <running|succeeded|failed>, 'auth': { 'req_auth_item_01': <req_auth_value_01> } } """ # Load the base config from the context that will get updated with other info base_config = AppConfig._parse_context(context) LOGGER.debug('Loaded env config: %s', base_config) # Create the ssm boto3 client that will be cached and used throughout this execution # if one does not exist already if AppConfig.SSM_CLIENT is None: AppConfig.SSM_CLIENT = boto3.client( 'ssm', region_name=base_config['region']) # Generate a map of all the suffixes and full parameter names param_names = { key: '_'.join([base_config['function_name'], key]) for key in { cls.AUTH_CONFIG_SUFFIX, cls.BASE_CONFIG_SUFFIX, cls.STATE_CONFIG_SUFFIX } } LOGGER.debug('Parameter suffixes and names: %s', param_names) # Get the loaded parameters and a list of any invalid ones from parameter store params, invalid_params = AppConfig._get_parameters( param_names.values()) LOGGER.debug( 'Retrieved parameters from parameter store: %s', cls._scrub_auth_info(params, param_names[cls.AUTH_CONFIG_SUFFIX])) LOGGER.debug( 'Invalid parameters could not be retrieved from parameter store: %s', invalid_params) # Check to see if there are any required parameters in the invalid params list missing_required_params = [ param for param in invalid_params if param != param_names[cls.STATE_CONFIG_SUFFIX] ] if missing_required_params: joined_params = ', '.join('\'{}\''.format(param) for param in missing_required_params) raise AppIntegrationConfigError( 'Could not load parameters required for this ' 'configuration: {}'.format(joined_params)) # Update the env config with the base config values base_config.update(params[param_names[cls.BASE_CONFIG_SUFFIX]]) # The state config can be None with first time deploys, so us a lookup and # add default empty values if there is no state found base_config.update( params.get(param_names[cls.STATE_CONFIG_SUFFIX], { cls._STATE_KEY: None, cls._TIME_KEY: None })) # Add the auth config info to the 'auth' key since these key/values can vary # from service to service base_config[cls.AUTH_CONFIG_SUFFIX] = { key: value.encode('utf-8') if isinstance(value, unicode) else value for key, value in params[param_names[ cls.AUTH_CONFIG_SUFFIX]].iteritems() } return AppConfig(base_config, event)
def current_state(self): """Cache the current time to be written to the config""" LOGGER.debug('Getting current_state: %s', self.get(self._STATE_KEY)) return self.get(self._STATE_KEY)
def last_timestamp(self): """Get the last time from the config""" LOGGER.debug('Getting last_timestamp as: %s', self.get(self._TIME_KEY)) return self.get(self._TIME_KEY)
def mark_failure(self): """Helper method to mark the state as 'failed'""" LOGGER.debug('Marking current_state as: %s', self.States.FAILED) self[self._STATE_KEY] = self.States.FAILED
def mark_success(self): """Helper method to mark the state as 'succeeded'""" LOGGER.debug('Marking current_state as: %s', self.States.SUCCEEDED) self[self._STATE_KEY] = self.States.SUCCEEDED
def mark_running(self): """Helper method to mark the state as 'running'""" LOGGER.debug('Marking current_state as: %s', self.States.RUNNING) self[self._STATE_KEY] = self.States.RUNNING
def last_timestamp(self, timestamp): """Set the last time in the config""" LOGGER.debug('Setting last_timestamp as: %s', timestamp) self[self._TIME_KEY] = timestamp