def event_filter(chunk_index, result, records_dict, ip_field, noise_events, method): api_results = result['response'] error_flag = True # Before yielding events, make the ip lookup dict which will have the following format: # {<ip-address>: <API response for that IP address>} ip_lookup = {} if result['message'] == 'ok': error_flag = False for event in api_results: ip_lookup[event['ip']] = event for record in records_dict[0]: if error_flag: # Exception has occured while fetching the noise statuses from API if ip_field in record and record[ip_field] != '': # These calls have been failed due to API failure, as this event have IP address value, considering them as noise if noise_events: event = {'ip': record[ip_field], 'error': api_results} yield event_generator.make_invalid_event( method, event, True, record) else: # Either the record is not having IP field or the value of the IP field is '' # send the record as it is as it doesn't have any IP address, after appending all fields # Considering this event as non-noisy if not noise_events: yield event_generator.make_invalid_event( method, {}, True, record) else: # Successful execution of the API call if ip_field in record and record[ip_field] != '': # Check if the IP field is not an iterable to avoid any error while referencing ip in ip_lookup if isinstance( record[ip_field], six.string_types) and record[ip_field] in ip_lookup: if ip_lookup[record[ip_field]]['noise'] == noise_events: yield event_generator.make_valid_event( method, ip_lookup[record[ip_field]], True, record) else: # Meaning ip is either invalid or not returned by the API, which is case of `multi` method only # Invalid IPs are considered as non-noise if not noise_events: event = { 'ip': record[ip_field], 'error': 'IP address doesn\'t match the valid IP format' } yield event_generator.make_invalid_event( method, event, True, record) else: if not noise_events: # Either the record is not having IP field or the value of the IP field is '' # send the record as it is as it doesn't have any IP address, after appending all fields # Considering this event as non-noisy yield event_generator.make_invalid_event( method, {}, True, record)
def response_scroller(api_client, logger, query, result_size): """ Use the api_client instance of GreyNoise SDK to fetch the query results and traverse through the results if the result set is too large. """ # This will keep the count of how many events are remaining to be sent to Splunk remaining_chunk_size = result_size completion_flag = False scroll = None while not completion_flag: event_count = 0 size = None # Avoid the extra call if expected number of events are already retrieved if remaining_chunk_size == 0: logger.debug("No GreyNoise query results remaining to be sent, completing the search...") break # Do not fetch bunch of results if user does not request so many results # Fetch only required numbers of events to keep away if the requested size is less than 10,000 if remaining_chunk_size < 10000: size = remaining_chunk_size logger.debug("Size for the GNQL query is configured to {}".format(size)) api_response = api_client.query(query=query, size=size, scroll=scroll) if api_response.get('count', None): # If this is the last page of API response, the scroll will not be present scroll = api_response.get('scroll', None) api_data = api_response.get('data', []) for ip_data in api_data: if event_count == 0 and remaining_chunk_size == result_size: yield event_generator.make_valid_event('query', ip_data, True) else: yield event_generator.make_valid_event('query', ip_data, False) event_count = event_count + 1 if event_count == remaining_chunk_size: completion_flag = True break remaining_chunk_size = remaining_chunk_size - event_count logger.debug("Statistics: Remaining chunk size: {} : Events written:{}".format(remaining_chunk_size, event_count)) else: message = api_response.get('message', '') query = api_response.get('query', '') logger.info("No results returned for GreyNoise query: {}, message: {}".format(str(query), str(message))) event = { 'message': message, 'query': query } yield event_generator.make_invalid_event('query', event, True) exit(1) # If we are on the last page of the results, scroll will not be present. if scroll is None: logger.debug("Last page of the GreyNoise query results detected, completing the search...") completion_flag = True
def do_generate(self, api_key, logger): query = self.query count = self.count if query == '': logger.error("Parameter query should not be empty.") self.write_error("Parameter query should not be empty.") exit(1) # Strip the spaces from the parameter value if given if count: count = count.strip() # Validating the given parameters try: count = validator.Integer(option_name='count', minimum=1).validate(count) except ValueError as e: # Validator will throw ValueError with error message when the parameters are not proper logger.error(str(e)) self.write_error(str(e)) exit(1) logger.info( "Fetching aggregate statistics for query: {}, count: {}".format( str(query), count)) # Opting timout 120 seconds for the requests api_client = GreyNoise(api_key=api_key, timeout=240, integration_name="Splunk") # If count is not passed explicitely to the command by the user, then it will have the value None stats_data = api_client.stats(query, count) logger.info( "Successfully retrieved response for the aggregate statistics for query: {}, count: {}" .format(str(query), count)) if int(stats_data.get('count', -1)) >= 0: results = {} results['source'] = 'greynoise' results['sourcetype'] = 'greynoise' results['_time'] = time.time() results['_raw'] = {'results': stats_data} yield results else: response = stats_data.get('message', None) or stats_data.get( 'error', None) if 'bad count' in response or 'bad query' in response: logger.error( "Invalid response retrieved from the GreyNoise API for query: {}, response: {}" .format(str(query), str(response))) if 'message' in response: event = {'message': response} else: event = {'error': response} yield event_generator.make_invalid_event('stats', event, True)
def transform(self, records): """Method that processes and yield event records to the Splunk events pipeline.""" ip_addresses = self.ip ip_field = self.ip_field api_key = "" EVENTS_PER_CHUNK = 5000 THREADS = 3 USE_CACHE = False logger = utility.setup_logger( session_key=self._metadata.searchinfo.session_key, log_context=self._metadata.searchinfo.command) if ip_addresses and ip_field: logger.error( "Please use parameter ip to work gnquick as generating command or " "use parameter ip_field to work gnquick as transforming command." ) self.write_error( "Please use parameter ip to work gnquick as generating command or " "use parameter ip_field to work gnquick as transforming command" ) exit(1) try: message = '' api_key = utility.get_api_key( self._metadata.searchinfo.session_key, logger=logger) except APIKeyNotFoundError as e: message = str(e) except HTTPError as e: message = str(e) if message: self.write_error(message) logger.error( "Error occured while retrieving API key, Error: {}".format( message)) exit(1) if ip_addresses and not ip_field: # This peice of code will work as generating command and will not use the Splunk events. # Splitting the ip_addresses by commas and stripping spaces from both the sides for each IP address ip_addresses = [ip.strip() for ip in ip_addresses.split(',')] logger.info("Started retrieving results") try: logger.debug( "Initiating to fetch noise and RIOT status for IP address(es): {}" .format(str(ip_addresses))) api_client = GreyNoise(api_key=api_key, timeout=120, integration_name=INTEGRATION_NAME) # CACHING START cache_enabled, cache_client = utility.get_caching( self._metadata.searchinfo.session_key, 'multi', logger) if int(cache_enabled) == 1 and cache_client is not None: cache_start = time.time() ips_not_in_cache, ips_in_cache = utility.get_ips_not_in_cache( cache_client, ip_addresses, logger) try: response = [] if len(ips_in_cache) >= 1: response = cache_client.query_kv_store( ips_in_cache) if response is None: logger.debug( "KVStore is not ready. Skipping caching mechanism." ) noise_status = api_client.quick(ip_addresses) elif response == []: noise_status = utility.fetch_response_from_api( api_client.quick, cache_client, ip_addresses, logger) else: noise_status = utility.fetch_response_from_api( api_client.quick, cache_client, ips_not_in_cache, logger) noise_status.extend(response) except Exception: logger.debug( "An exception occurred while fetching response from cache.\n{}" .format(traceback.format_exc())) logger.debug( "Generating command with caching took {} seconds.". format(time.time() - cache_start)) else: # Opting timout 120 seconds for the requests noise_status = api_client.quick(ip_addresses) logger.info("Retrieved results successfully") # CACHING END # Process the API response and send the noise and RIOT status information of IP with extractions # to the Splunk, Using this flag to handle the field extraction issue in custom commands # Only the fields extracted from the first event of generated by custom command # will be extracted from all events first_record_flag = True # Flag to indicate whether erroneous IPs are present erroneous_ip_present = False for ip in ip_addresses: for sample in noise_status: if ip == sample['ip']: yield event_generator.make_valid_event( 'quick', sample, first_record_flag) if first_record_flag: first_record_flag = False logger.debug( "Fetched noise and RIOT status for ip={} from GreyNoise API" .format(str(ip))) break else: erroneous_ip_present = True try: validate_ip(ip, strict=True) except ValueError as e: error_msg = str(e).split(":") logger.debug( "Generating noise and RIOT status for ip={} manually" .format(str(ip))) event = {'ip': ip, 'error': error_msg[0]} yield event_generator.make_invalid_event( 'quick', event, first_record_flag) if first_record_flag: first_record_flag = False if erroneous_ip_present: logger.warn( "Value of one or more IP address(es) is either invalid or non-routable" ) self.write_warning( "Value of one or more IP address(es) passed to {command_name} " "is either invalid or non-routable".format( command_name=str( self._metadata.searchinfo.command))) except RateLimitError: logger.error( "Rate limit error occured while fetching the context information for ips={}" .format(str(ip_addresses))) self.write_error( "The Rate Limit has been exceeded. Please contact the Administrator" ) except RequestFailure as e: response_code, response_message = e.args if response_code == 401: msg = "Unauthorized. Please check your API key." else: # Need to handle this, as splunklib is unable to handle the exception with # (400, {'error': 'error_reason'}) format msg = ( "The API call to the GreyNoise platform have been failed " "with status_code: {} and error: {}").format( response_code, response_message['error'] if isinstance( response_message, dict) else response_message) logger.error("{}".format(str(msg))) self.write_error(msg) except ConnectionError: logger.error( "Error while connecting to the Server. Please check your connection and try again." ) self.write_error( "Error while connecting to the Server. Please check your connection and try again." ) except RequestException: logger.error( "There was an ambiguous exception that occurred while handling your Request. Please try again." ) self.write_error( "There was an ambiguous exception that occurred while handling your Request. Please try again." ) except Exception: logger.error("Exception: {} ".format( str(traceback.format_exc()))) self.write_error( "Exception occured while fetching the noise and RIOT status of the IP address(es). " "See greynoise_main.log for more details.") elif ip_field: # Enter the mechanism only when the Search is complete and all the events are available if self.search_results_info and not self.metadata.preview: try: # Strip the spaces from the parameter value if given ip_field = ip_field.strip() # Validating the given parameter try: ip_field = validator.Fieldname( option_name='ip_field').validate(ip_field) except ValueError as e: # Validator will throw ValueError with error message when the parameters are not proper logger.error(str(e)) self.write_error(str(e)) exit(1) # API key validation if not self.api_validation_flag: api_key_validation, message = utility.validate_api_key( api_key, logger) logger.debug( "API validation status: {}, message: {}".format( api_key_validation, str(message))) self.api_validation_flag = True if not api_key_validation: logger.info(message) self.write_error(message) exit(1) # This piece of code will work as transforming command and will use # the Splunk ingested events and field which is specified in ip_field. chunk_dict = event_generator.batch(records, ip_field, EVENTS_PER_CHUNK, logger) # This means there are only 1000 or below IPs to call in the entire bunch of records # Use one thread with single thread with caching mechanism enabled for the chunk if len(chunk_dict) == 1: logger.info( "Less then 1000 distinct IPs are present, " "optimizing the IP requests call to GreyNoise API..." ) THREADS = 1 USE_CACHE = True api_client = GreyNoise(api_key=api_key, timeout=120, use_cache=USE_CACHE, integration_name=INTEGRATION_NAME) # When no records found, batch will return {0:([],[])} tot_time_start = time.time() if len(list(chunk_dict.values())[0][0]) >= 1: for event in event_generator.get_all_events( self._metadata.searchinfo.session_key, api_client, 'multi', ip_field, chunk_dict, logger, threads=THREADS): yield event else: logger.info( "No events found, please increase the search timespan to have more search results." ) tot_time_end = time.time() logger.debug( "Total execution time => {}".format(tot_time_end - tot_time_start)) except Exception: logger.info( "Exception occured while adding the noise and RIOT status to the events, Error: {}" .format(traceback.format_exc())) self.write_error( "Exception occured while adding the noise and RIOT status of " "the IP addresses to events. See greynoise_main.log for more details." ) else: logger.error( "Please specify exactly one parameter from ip and ip_field with some value." ) self.write_error( "Please specify exactly one parameter from ip and ip_field with some value." )
def transform(self, records): """Method that processes and yield event records to the Splunk events pipeline.""" ip_address = self.ip ip_field = self.ip_field api_key = "" EVENTS_PER_CHUNK = 1 THREADS = 3 USE_CACHE = False logger = utility.setup_logger( session_key=self._metadata.searchinfo.session_key, log_context=self._metadata.searchinfo.command) if ip_address and ip_field: logger.error("Please use parameter ip to work gnriot as generating command or " "use parameter ip_field to work gnriot as transforming command.") self.write_error("Please use parameter ip to work gnriot as generating command or " "use parameter ip_field to work gnriot as transforming command") exit(1) try: message = '' api_key = utility.get_api_key(self._metadata.searchinfo.session_key, logger=logger) except APIKeyNotFoundError as e: message = str(e) except HTTPError as e: message = str(e) if message: self.write_error(message) logger.error("Error occured while retrieving API key, Error: {}".format(message)) exit(1) if ip_address and not ip_field: # This peice of code will work as generating command and will not use the Splunk events. # Strip the spaces from the parameter value if given ip_address = ip_address.strip() logger.info("Started retrieving results") try: logger.debug("Initiating to fetch RIOT information for IP address: {}".format(str(ip_address))) api_client = GreyNoise(api_key=api_key, timeout=120, integration_name=INTEGRATION_NAME) # Opting timout 120 seconds for the requests session_key = self._metadata.searchinfo.session_key riot_information = utility.get_response_for_generating( session_key, api_client, ip_address, 'greynoise_riot', logger) logger.info("Retrieved results successfully") # Process the API response and send the riot information of IP with extractions to the Splunk yield event_generator.make_valid_event('riot', riot_information, True) logger.debug("Fetched RIOT information for ip={} from GreyNoise API".format(str(ip_address))) except ValueError as e: error_msg = str(e).split(":") logger.debug("Generating RIOT information for ip={} manually".format(str(ip_address))) event = { 'ip': ip_address, 'error': error_msg[0] } yield event_generator.make_invalid_event('riot', event, True) logger.warn(error_msg) self.write_warning( "Value of IP address passed to {command_name} is either invalid or non-routable".format( command_name=str(self._metadata.searchinfo.command))) except RateLimitError: logger.error("Rate limit error occured while fetching the context information for ip={}".format( str(ip_address))) self.write_error("The Rate Limit has been exceeded. Please contact the Administrator") except RequestFailure as e: response_code, response_message = e.args if response_code == 401: msg = "Unauthorized. Please check your API key." else: # Need to handle this, as splunklib is unable to handle the exception with # (400, {'error': 'error_reason'}) format msg = ("The API call to the GreyNoise platform have been failed " "with status_code: {} and error: {}").format( response_code, response_message['error'] if isinstance(response_message, dict) else response_message) logger.error("{}".format(str(msg))) self.write_error(msg) except ConnectionError: logger.error("Error while connecting to the Server. Please check your connection and try again.") self.write_error("Error while connecting to the Server. Please check your connection and try again.") except RequestException: logger.error( "There was an ambiguous exception that occurred while handling your Request. Please try again.") self.write_error( "There was an ambiguous exception that occurred while handling your Request. Please try again.") except Exception: logger.error("Exception: {} ".format(str(traceback.format_exc()))) self.write_error("Exception occured while fetching the RIOT information of the IP address. " "See greynoise_main.log for more details.") elif ip_field: logger.info("Started retrieving RIOT information for the IP addresses present in field: {}".format( str(ip_field))) # Enter the mechanism only when the Search is complete and all the events are available if self.search_results_info and not self.metadata.preview: try: # Strip the spaces from the parameter value if given ip_field = ip_field.strip() # Validating the given parameter try: ip_field = validator.Fieldname(option_name='ip_field').validate(ip_field) except ValueError as e: # Validator will throw ValueError with error message when the parameters are not proper logger.error(str(e)) self.write_error(str(e)) exit(1) # API key validation if not self.api_validation_flag: api_key_validation, message = utility.validate_api_key(api_key, logger) logger.debug("API validation status: {}, message: {}".format(api_key_validation, str(message))) self.api_validation_flag = True if not api_key_validation: logger.info(message) self.write_error(message) exit(1) # This piece of code will work as transforming command and will use # the Splunk ingested events and field which is specified in ip_field. # divide the records in the form of dict of tuples having chunk_index as key # {<index>: (<records>, <All the ips in records>)} chunk_dict = event_generator.batch( records, ip_field, EVENTS_PER_CHUNK, logger, optimize_requests=False) logger.debug("Successfully divided events into chunks") # This means there are only 1000 or below IPs to call in the entire bunch of records # Use one thread with single thread with caching mechanism enabled for the chunk if len(chunk_dict) == 1: logger.debug("Less then 1000 distinct IPs are present, " "optimizing the IP requests call to GreyNoise API...") THREADS = 1 USE_CACHE = True api_client = GreyNoise( api_key=api_key, timeout=120, use_cache=USE_CACHE, integration_name=INTEGRATION_NAME) # When no records found, batch will return {0:([],[])} if len(chunk_dict) > 0: for event in event_generator.get_all_events( self._metadata.searchinfo.session_key, api_client, 'greynoise_riot', ip_field, chunk_dict, logger, threads=THREADS): yield event logger.info("Successfully sent all the results to the Splunk") else: logger.info("No events found, please increase the search timespan to have more search results.") except Exception: logger.info( "Exception occured while adding the RIOT information to the events, Error: {}".format( traceback.format_exc())) self.write_error("Exception occured while adding the RIOT information of the IP addresses " "to events. See greynoise_main.log for more details.") else: logger.error("Please specify exactly one parameter from ip and ip_field with some value.") self.write_error("Please specify exactly one parameter from ip and ip_field with some value.")