def evaluate_interval(self): """Get the interval at which this function is executing. This translates an AWS Rate Schedule Expression ('rate(2 hours)') into a second interval """ if 'interval' not in self: raise AppIntegrationConfigError( 'The \'interval\' value is not defined in the config') rate_match = AWS_RATE_RE.match(self['interval']) if not rate_match: raise AppIntegrationConfigError('Invalid \'rate\' interval value: ' '{}'.format(self['interval'])) value = rate_match.group(2) or rate_match.group(4) unit = rate_match.group(3) or rate_match.group(5).replace('s', '') translate_to_seconds = { 'minute': 60, 'hour': 60 * 60, 'day': 60 * 60 * 24 } interval = int(value) * translate_to_seconds[unit] LOGGER.debug('Evaluated rate interval: %d seconds', interval) # Get the total seconds that this rate evaluates to return interval
def _gather_logs(self): """Gather log events. Returns: list: A list of dictionaries containing log events. """ url = '{}{}'.format(self._SLACK_API_BASE_URL, self._endpoint()) headers = { 'Content-Type': 'application/x-www-form-urlencoded', 'Authorization': 'Bearer {}'.format(self._config.auth['auth_token']), } data = self._get_request_data() success, response = self._make_post_request(url, headers, data, False) if not success: LOGGER.exception('Received bad response from slack') return False if not response.get('ok'): LOGGER.exception('Received error or warning from slack') return False self._more_to_poll = self._check_for_more_to_poll(response) results = self._filter_response_entries(response) self._last_timestamp = int(time.time()) return results
def _invoke_successive_gather(self): """Invoke a successive app function to handle more logs This is useful when there were more logs to collect than could be accomplished in this execution. Instead of marking the config with 'success' and waiting for the next scheduled execution, this will invoke the lambda again with an 'event' indicating there are more logs to collect. Other scheduled executions will not have an 'event' to allow for this type of override, and will exit when checking the 'self._config.is_running' property. This allows for chained invocations without the worry of duplicated effort or collisions. """ try: lambda_client = boto3.client('lambda', region_name=self._config['region']) response = lambda_client.invoke( FunctionName=self._config['function_name'], InvocationType='Event', Payload=json.dumps( {'invocation_type': self._config.Events.SUCCESSIVE_INVOKE}), Qualifier=self._config['qualifier']) except ClientError as err: LOGGER.error( 'An error occurred while invoking a subsequent app function ' '(\'%s:%s\'). Error is: %s', self._config['function_name'], self._config['qualifier'], err.response) raise LOGGER.info( 'Invoking successive apps function \'%s\' with Lambda request ID \'%s\'', self._config['function_name'], response['ResponseMetadata']['RequestId'])
def is_successive_invocation(self): """Check if this invocation is a successive invoke from a previous execution""" is_successive = self._event.get( 'invocation_type') == self.Events.SUCCESSIVE_INVOKE LOGGER.debug('Is successive invocation: %s', is_successive) return is_successive
def _generate_headers(self): """Each request will request a new token to call the resources APIs. More details to be found here: https://developers.onelogin.com/api-docs/1/oauth20-tokens/generate-tokens-2 Returns: str: Bearer token to be used to call the OneLogin resource APIs """ authorization = 'client_id: {}, client_secret: {}'.format( self._config['auth']['client_id'], self._config['auth']['client_secret']) headers_token = { 'Authorization': authorization, 'Content-Type': 'application/json' } result, response = self._make_post_request( self._token_endpoint(), headers_token, {'grant_type': 'client_credentials'}) if not result: return False if not response: LOGGER.error( 'Response invalid generating headers for service \'%s\'', self.type()) return False bearer = 'bearer:{}'.format(response.get('access_token')) self._auth_headers = {'Authorization': bearer}
def _make_get_request(): # To use closure here is to make backoff logic patchable and testable. try: # Log GET request URL for debugging purpose, especially useful when # debugging query syntax LOGGER.debug('URL of GET request is %s', full_url) resp = requests.get(full_url, headers=headers, params=params, timeout=self._TIMEOUT) # Return false if resp contains non-200 status code. if not self._validate_status_code(resp): return False, None # When querying list of api versions and log files, Salesforce responses # json content. return True, resp.json() except requests.exceptions.Timeout: LOGGER.exception( 'Request timed out for when sending get request to %s', full_url) return False, None except ValueError: # When fetch log events, Salesforce returns raw data in csv format, not json return True, resp.text.encode('utf-8')
def _generate_auth(self, hostname, params): """Duo requests must be signed each time. This has been largely borrowed/updated from here: https://github.com/duosecurity/duo_client_python/blob/master/duo_client/admin.py """ formatted_date = datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S -0000') auth_string = '\n'.join([formatted_date, 'GET', hostname, self._endpoint(), urllib.urlencode(params)]) try: signature = hmac.new(self._config.auth['secret_key'], auth_string, hashlib.sha1) except TypeError: LOGGER.exception('Could not generate hmac signature') return False # Format the basic auth with integration key and the hmac hex digest basic_auth = ':'.join([self._config.auth['integration_key'], signature.hexdigest()]) return { 'Date': formatted_date, 'Authorization': 'Basic {}'.format(b64encode(basic_auth)), 'Host': hostname }
def _create_service(self): """GSuite requests must be signed with the keyfile Returns: bool: True if the Google API discovery service was successfully established or False if any errors occurred during the creation of the Google discovery service, """ if self._activities_service: LOGGER.debug('Service already instantiated for %s', self.type()) return True creds = self._load_credentials(self._config.auth['keyfile']) if not creds: return False try: resource = discovery.build('admin', 'reports_v1', credentials=creds) except errors.Error: LOGGER.exception('Failed to build discovery service for %s', self.type()) return False # The google discovery service 'Resource' class that is returned by # 'discovery.build' dynamically loads methods/attributes, so pylint will complain # about no 'activities' member existing without the below pylint comment self._activities_service = resource.activities() # pylint: disable=no-member return True
def _perform_request(allow_retry=True): try: # Get the events using a make_request call with the box api. This is to # support custom parameters such as 'created_after' and 'created_before' box_response = self._client.make_request( 'GET', self._client.get_url('events'), params=params, timeout=self._DEFAULT_REQUEST_TIMEOUT) except BoxException: LOGGER.exception('Failed to get events for %s', self.type()) return False, None # Return a tuple to conform to return value of safe_timeout except ConnectionError: # In testing, the requests connection seemed to get reset for no # obvious reason, and a simple retry once works fine so catch it # and retry once, but after that return False LOGGER.exception( 'Bad response received from host, will retry once') if allow_retry: return _perform_request(allow_retry=False) return False, None # Return a tuple to conform to return value of safe_timeout # Return a successful status and the JSON from the box response # Return a tuple to conform to return value of safe_timeout return True, box_response.json()
def _determine_last_time(self): """Determine the last time this function was executed and fallback on evaluating the rate value if there is no last timestamp available Returns: int: The unix timestamp for the starting point to fetch logs back to """ if not self.last_timestamp: interval_time = self.evaluate_interval() current_time = int(calendar.timegm(time.gmtime())) time_delta = current_time - interval_time LOGGER.debug( 'Current timestamp: %s seconds. Calculated delta: %s seconds', current_time, time_delta) # Request the date format from the app since some services expect different types # Using init=False will return the class without instantiating it date_format = StreamAlertApp.get_app(self, init=False).date_formatter() if date_format: self.last_timestamp = datetime.utcfromtimestamp( time_delta).strftime(date_format) else: self.last_timestamp = time_delta LOGGER.info('Starting last timestamp set to: %s', self.last_timestamp) return self.last_timestamp
def do_gather(): """Perform the gather using this scoped method so we can time it""" # Increment the poll count self._poll_count += 1 logs = self._gather_logs() # Make sure there are logs, this can be False if there was an issue polling # of if there are no new logs to be polled if not logs: self._more_to_poll = False LOGGER.error( 'Gather process for service \'%s\' was not able to poll any logs ' 'on poll #%d', self.type(), self._poll_count) return # Increment the count of logs gathered self._gathered_log_count += len(logs) # Utilize the batcher to send logs to the rule processor self._batcher.send_logs(self._config['function_name'], logs) LOGGER.debug('Updating config last timestamp from %s to %s', self._config.last_timestamp, self._last_timestamp) # Save the config's last timestamp after each function run self._config.last_timestamp = self._last_timestamp
def gather(self): """Public method for actual gathering of logs""" # Initialize, saving state to 'running' if not self._initialize(): return while ( self._gather() + self._sleep_seconds() < (self._config.remaining_ms() / 1000.0) - self._EOF_SECONDS_BUFFER): LOGGER.debug('More logs to poll for \'%s\': %s', self.type(), self._more_to_poll) self._config.report_remaining_seconds() if not self._more_to_poll: break # Reset the boolean indicating that there is more data to poll. Subclasses should # set this to 'True' within their implementation of the '_gather_logs' function self._more_to_poll = not self._more_to_poll LOGGER.debug( 'Gathered all logs possible for this execution. More logs to poll ' 'for \'%s\': %s', self.type(), self._more_to_poll) self._config.report_remaining_seconds() # Finalize, saving state to 'succeeded' self._finalize()
def _get_parameters(names): """Simple helper function to house the boto3 ssm client get_parameters operations Args: names (list): A list of parameter names to retrieve from the aws ssm parameter store Returns: tuple (dict, list): Dictionary with the load parameter names as keys and the actual parameter (as a dictionary) as the value. The seconary list that is returned contains any invalid parameters that were not loaded """ LOGGER.debug('Retrieving values from parameter store with names: %s', ', '.join('\'{}\''.format(name) for name in names)) try: parameters = AppConfig.SSM_CLIENT.get_parameters( Names=names, WithDecryption=True) except ClientError as err: joined_names = ', '.join('\'{}\''.format(name) for name in names) raise AppIntegrationConfigError( 'Could not get parameter with names {}. Error: ' '{}'.format(joined_names, err.response['Error']['Message'])) decoded_params = {} for param in parameters['Parameters']: try: decoded_params[param['Name']] = json.loads(param['Value']) except ValueError: raise AppIntegrationConfigError( 'Could not load value for parameter with ' 'name \'{}\'. The value is not valid json: ' '\'{}\''.format(param['Name'], param['Value'])) return decoded_params, parameters['InvalidParameters']
def _get_duo_logs(self, hostname, full_url): """Get all logs from the endpoint for this timeframe Returns: [ { 'timestamp': <int:unix timestamp>, 'device': <str:device>, 'username': <str:username>, 'factor': <str:factor>, 'result': <str:result>, 'ip': <str:ip address>, 'new_enrollment': <bool:if event corresponds to enrollment>, 'integration': <str:integration>, 'location': { 'state': '<str:state>', 'city': '<str:city>', 'country': '<str:country>' } } ] """ # Get the last timestamp and add one to it to avoid duplicates # Sanity check mintime as unix timestamp, then transform to string params = {'mintime': str(int(self._last_timestamp + 1))} # Contstruct the headers for this request. Every request must be signed headers = self._generate_auth(hostname, params) if not headers: return False try: # Make the request to the api, resulting in a bool or dict result, response = self._make_get_request(full_url, headers=headers, params=params) except requests.exceptions.ConnectionError: LOGGER.exception('Received bad response from duo') return False if not result: return False # Duo stores the list of logs in the 'response' key of the response logs = response['response'] if not logs: LOGGER.info('No logs in response from duo') return False # Get the timestamp from the latest event. Duo produces these sequentially # so we can just extract the timestamp from the last item in the list self._last_timestamp = logs[-1]['timestamp'] # Check if the max amount of logs was returned with this request. If the value # is not the max, then we are done polling logs for this timeframe # Setting _more_to_poll to true here will allow the caller to try to poll again self._more_to_poll = len(logs) >= self._MAX_RESPONSE_LOGS # Return the list of logs to the caller so they can be send to the batcher return logs
def _check_http_response(self, response): """Method for checking for a valid HTTP response code Returns: bool: Indicator of whether or not this request was successful """ success = response is not None and (200 <= response.status_code <= 299) if not success: LOGGER.error('HTTP request failed for service \'%s\': [%d] %s', self.type(), response.status_code, response.content) return success
def __setitem__(self, key, value): # Do some safety checking so we don't save a malformed state if key == self._STATE_KEY and not getattr(self.States, str(value).upper(), None): LOGGER.error('Current state cannot be saved with value \'%s\'', value) return dict.__setitem__(self, key, value) # If this is a key related to the state config, save the state in parameter store if key in self._state_keys(): self._save_state()
def _sleep(self): """Function to sleep the looping""" # Do not sleep if this is the first poll if self._poll_count == 0: LOGGER.debug('Skipping sleep for first poll') return # Sleep for n seconds so the called API does not return a bad response sleep_for_secs = self._sleep_seconds() LOGGER.debug('Sleeping \'%s\' app for %d seconds...', self.type(), sleep_for_secs) time.sleep(sleep_for_secs)
def _make_request(self): """Make the request using the Box client The inner function of `_perform_request` is used to handle a single retry in the event of a ConnectionError. If this fails twice, the function will return Returns: dict: Response from Box (boxsdk.session.box_session.BoxResponse) that is json loaded into a dictionary. """ # Create the parameters for this request, 100 is the max value for limit params = { 'limit': self._MAX_CHUNK_SIZE, 'stream_type': EnterpriseEventsStreamType.ADMIN_LOGS, } # From Box's docs: Box responds to the created_before and created_after # parameters only if the stream_position parameter is not included. if self._next_stream_position: params['stream_position'] = self._next_stream_position else: params['created_after'] = self._last_timestamp LOGGER.debug('Requesting events for %s', self.type()) def _perform_request(allow_retry=True): try: # Get the events using a make_request call with the box api. This is to # support custom parameters such as 'created_after' and 'created_before' box_response = self._client.make_request( 'GET', self._client.get_url('events'), params=params, timeout=self._DEFAULT_REQUEST_TIMEOUT) except BoxException: LOGGER.exception('Failed to get events for %s', self.type()) return False, None # Return a tuple to conform to return value of safe_timeout except ConnectionError: # In testing, the requests connection seemed to get reset for no # obvious reason, and a simple retry once works fine so catch it # and retry once, but after that return False LOGGER.exception( 'Bad response received from host, will retry once') if allow_retry: return _perform_request(allow_retry=False) return False, None # Return a tuple to conform to return value of safe_timeout # Return a successful status and the JSON from the box response # Return a tuple to conform to return value of safe_timeout return True, box_response.json() return _perform_request()
def _make_post_request(self, full_url, headers, data): """Method for returning the json loaded response for this POST request Returns: tuple (bool, dict): False if the was an error performing the request, and the dictionary loaded from the json response """ LOGGER.debug('Making POST request for service \'%s\' on poll #%d', self.type(), self._poll_count) # Perform the request and return the response as a dict response = requests.post(full_url, headers=headers, json=data) return self._check_http_response(response), response.json()
def _initialize(self): """Method for performing any startup steps, like setting state to running""" # Perform another safety check to make sure this is not being invoked already if self._config.is_running: LOGGER.error('App already running for service \'%s\'.', self.type()) return False # Check if this is an invocation spawned from a previous partial execution # Return if the config is marked as 'partial' but the invocation type is wrong if not self._config.is_successive_invocation and self._config.is_partial: LOGGER.error('App in partial execution state for service \'%s\'.', self.type()) return False LOGGER.info('App starting for service \'%s\'.', self.type()) LOGGER.info('App executing as a successive invocation: %s', self._config.is_successive_invocation) # Validate the auth in the config. This raises an exception upon failure self._validate_auth() self._last_timestamp = self._config.last_timestamp # Mark this app as running, which updates the parameter store self._config.mark_running() return True
def _request_token(self): """Request OAuth token from salesforce Meanwhile, it will also get instance url which will be used in future requests. The instance url identifies the Salesforce instance to which API calls should be sent. Returns: bool: Returns True if update auth headers and instance url successfully. """ headers = {'Content-Type': 'application/x-www-form-urlencoded'} # required credentials when request for a token. data = { 'grant_type': 'password', 'client_id': self._config.auth['client_id'], 'client_secret': self._config.auth['client_secret'], 'username': self._config.auth['username'], 'password': '******'.format(self._config.auth['password'], self._config.auth['security_token']), 'response_type': 'code', 'redirect_uri': self._SALESFORCE_TOKEN_URL } success, response = self._make_post_request(self._SALESFORCE_TOKEN_URL, headers, data, False) if not (success and response): return False if not (response.get('access_token') and response.get('instance_url')): LOGGER.error( 'Response invalid generating headers for service \'%s\'', self._type()) return False bearer = 'Bearer {}'.format(response.get('access_token')) self._auth_headers = { 'Content-Type': 'application/json', 'Authorization': bearer } self._instance_url = response.get('instance_url') LOGGER.debug('Successfully obtain OAuth token and instance URL') return True
def _make_get_request(self, full_url, headers, params=None): """Method for returning the json loaded response for this GET request Returns: tuple (bool, dict): False if the was an error performing the request, and the dictionary loaded from the json response """ LOGGER.debug('Making GET request for service \'%s\' on poll #%d', self.type(), self._poll_count) # Perform the request and return the response as a dict response = requests.get(full_url, headers=headers, params=params, timeout=self._DEFAULT_REQUEST_TIMEOUT) return self._check_http_response(response), response.json()
def _gather(self): """Protected entry point for the beginning of polling""" # Make this request sleep if the API throttles requests self._sleep() def do_gather(): """Perform the gather using this scoped method so we can time it""" # Increment the poll count self._poll_count += 1 logs = self._gather_logs() # Make sure there are logs, this can be False if there was an issue polling # of if there are no new logs to be polled if not logs: self._more_to_poll = False LOGGER.error( 'Gather process for service \'%s\' was not able to poll any logs ' 'on poll #%d', self.type(), self._poll_count) return # Increment the count of logs gathered self._gathered_log_count += len(logs) # Utilize the batcher to send logs to the rule processor self._batcher.send_logs(self._config['function_name'], logs) LOGGER.debug('Updating config last timestamp from %s to %s', self._config.last_timestamp, self._last_timestamp) # Save the config's last timestamp after each function run self._config.last_timestamp = self._last_timestamp # Use timeit to track how long one poll takes, and cast to a decimal. # Use decimal since these floating point values can be very small and the # builtin float uses scientific notation when handling very small values exec_time = Decimal(timeit(do_gather, number=1)) LOGGER.info('Gather process for \'%s\' executed in %f seconds.', self.type(), exec_time) # Add a 50% buffer to the time it took to account for some unforeseen delay and to give # this function enough time to spawn a new invocation if there are more logs to poll # Cast this back to float so general arithemtic works return float(exec_time * Decimal(self._POLL_BUFFER_MULTIPLIER))
def send_logs(self, source_function, logs): """Public method to send the logs to the rule processor Args: source_function (str): The app function name from which the logs came logs (list): List of the logs that have been gathered """ LOGGER.info('Starting batch send of %d logs to the rule processor', len(logs)) # Try to send all of the logs in one fell swoop if self._send_logs_to_stream_alert(source_function, logs): return # Fall back on segmenting the list of logs into multiple requests # if they could not be sent at once return self._segment_and_send(source_function, logs)
def _load_auth(cls, auth_data): """Load JWTAuth from Box service account JSON keyfile Args: auth_data (dict): The loaded keyfile data from a Box service account JSON file Returns: boxsdk.JWTAuth Instance of JWTAuth that allows the client to authenticate or False if there was an issue loading the auth """ try: auth = JWTAuth.from_settings_dictionary(auth_data) except (TypeError, ValueError, KeyError): LOGGER.exception('Could not load JWT from settings dictionary') return False return auth
def _determine_last_time(self): """Determine the last time this function was executed and fallback on evaluating the rate value if there is no last timestamp available Returns: int: The unix timestamp for the starting point to fetch logs back to """ if not self.last_timestamp: interval_time = self.evaluate_interval() current_time = time.mktime(time.gmtime()) LOGGER.debug('Current timestamp: %s seconds', current_time) self.last_timestamp = current_time - interval_time LOGGER.info('Starting last timestamp set to: %d seconds', self.last_timestamp) return self.last_timestamp
def _create_client(self): """Box requests must be signed with a JWT keyfile Returns: bool: True if the Box client was successfully created or False if any errors occurred during the creation of the client """ if self._client: LOGGER.debug('Client already instantiated for %s', self.type()) return True auth = self._load_auth(self._config.auth['keyfile']) if not auth: return False self._client = Client(auth) return bool(self._client)
def _initialize(self): """Method for performing any startup steps, like setting state to running""" # Perform another safety check to make sure this is not being invoked already if self._config.is_running: LOGGER.error('App already running for service \'%s\'.', self.type()) return False LOGGER.info('App starting for service \'%s\'.', self.type()) # Validate the auth in the config. This raises an exception upon failure self._validate_auth() self._last_timestamp = self._config.last_timestamp # Mark this app as running, which updates the parameter store self._config.mark_running() return True
def _make_post_request(self, full_url, headers, data, is_json=True): """Method for returning the json loaded response for this POST request Returns: tuple (bool, dict): False if the was an error performing the request, and the dictionary loaded from the json response """ LOGGER.debug('Making POST request for service \'%s\' on poll #%d', self.type(), self._poll_count) # Perform the request and return the response as a dict if is_json: response = requests.post(full_url, headers=headers, json=data, timeout=self._DEFAULT_REQUEST_TIMEOUT) else: # if content type is form-encoded, the param is 'data' rather than 'json' response = requests.post(full_url, headers=headers, data=data, timeout=self._DEFAULT_REQUEST_TIMEOUT) return self._check_http_response(response), response.json()
def _segment_and_send(self, source_function, logs): """Protected method for segmenting a list of logs into smaller lists so they conform to the input limit of AWS Lambda Args: source_function (str): The app function name from which the logs came logs (list): List of the logs that have been gathered """ log_count = len(logs) LOGGER.debug('Segmenting %d logs into subsets', log_count) segment_size = int(math.ceil(log_count / 2.0)) for index in range(0, log_count, segment_size): subset = logs[index:segment_size + index] # Try to send this current subset to the rule processor # and segment again if they are too large to be sent at once if not self._send_logs_to_stream_alert(source_function, subset): self._segment_and_send(source_function, subset) return True