Beispiel #1
0
def response_scroller(api_client, logger, query, result_size):
    """
    Use the api_client instance of GreyNoise SDK to fetch the query results and traverse through the results if the result set is too large.
    """
    # This will keep the count of how many events are remaining to be sent to Splunk
    remaining_chunk_size = result_size
    completion_flag = False
    scroll = None

    while not completion_flag:
        event_count = 0
        size = None
        
        # Avoid the extra call if expected number of events are already retrieved
        if remaining_chunk_size == 0:
            logger.debug("No GreyNoise query results remaining to be sent, completing the search...")
            break

        # Do not fetch bunch of results if user does not request so many results
        # Fetch only required numbers of events to keep away if the requested size is less than 10,000
        if remaining_chunk_size < 10000:
            size = remaining_chunk_size
            logger.debug("Size for the GNQL query is configured to {}".format(size))

        api_response = api_client.query(query=query, size=size, scroll=scroll)

        if api_response.get('count', None):
            # If this is the last page of API response, the scroll will not be present
            scroll = api_response.get('scroll', None)
            api_data = api_response.get('data', [])
            
            for ip_data in api_data:
                if event_count == 0 and remaining_chunk_size == result_size:
                    yield event_generator.make_valid_event('query', ip_data, True)
                else:
                    yield event_generator.make_valid_event('query', ip_data, False)

                event_count = event_count + 1

                if event_count == remaining_chunk_size:
                    completion_flag = True
                    break

            remaining_chunk_size = remaining_chunk_size - event_count
            logger.debug("Statistics: Remaining chunk size: {} : Events written:{}".format(remaining_chunk_size, event_count))
        else:
            message = api_response.get('message', '')
            query = api_response.get('query', '')
            logger.info("No results returned for GreyNoise query: {}, message: {}".format(str(query), str(message)))
            event = {
                'message': message,
                'query': query
            }
            yield event_generator.make_invalid_event('query', event, True)
            exit(1)
        
        # If we are on the last page of the results, scroll will not be present.
        if scroll is None:
            logger.debug("Last page of the GreyNoise query results detected, completing the search...")
            completion_flag = True
Beispiel #2
0
def event_filter(chunk_index, result, records_dict, ip_field, noise_events,
                 method):

    api_results = result['response']
    error_flag = True
    # Before yielding events, make the ip lookup dict which will have the following format:
    # {<ip-address>: <API response for that IP address>}
    ip_lookup = {}
    if result['message'] == 'ok':
        error_flag = False
        for event in api_results:
            ip_lookup[event['ip']] = event

    for record in records_dict[0]:

        if error_flag:
            # Exception has occured while fetching the noise statuses from API
            if ip_field in record and record[ip_field] != '':
                # These calls have been failed due to API failure, as this event have IP address value, considering them as noise
                if noise_events:
                    event = {'ip': record[ip_field], 'error': api_results}
                    yield event_generator.make_invalid_event(
                        method, event, True, record)
            else:
                # Either the record is not having IP field or the value of the IP field is ''
                # send the record as it is as it doesn't have any IP address, after appending all fields
                # Considering this event as non-noisy
                if not noise_events:
                    yield event_generator.make_invalid_event(
                        method, {}, True, record)
        else:
            # Successful execution of the API call
            if ip_field in record and record[ip_field] != '':

                # Check if the IP field is not an iterable to avoid any error while referencing ip in ip_lookup
                if isinstance(
                        record[ip_field],
                        six.string_types) and record[ip_field] in ip_lookup:
                    if ip_lookup[record[ip_field]]['noise'] == noise_events:
                        yield event_generator.make_valid_event(
                            method, ip_lookup[record[ip_field]], True, record)
                else:
                    # Meaning ip is either invalid or not returned by the API, which is case of `multi` method only
                    # Invalid IPs are considered as non-noise
                    if not noise_events:
                        event = {
                            'ip':
                            record[ip_field],
                            'error':
                            'IP address doesn\'t match the valid IP format'
                        }
                        yield event_generator.make_invalid_event(
                            method, event, True, record)
            else:
                if not noise_events:
                    # Either the record is not having IP field or the value of the IP field is ''
                    # send the record as it is as it doesn't have any IP address, after appending all fields
                    # Considering this event as non-noisy
                    yield event_generator.make_invalid_event(
                        method, {}, True, record)
Beispiel #3
0
    def do_generate(self, api_key, logger):

        ip_address = self.ip

        try:
            # Strip the spaces from the parameter value if given
            if ip_address:
                ip_address = ip_address.strip()

            logger.info("Initiating to fetch RIOT status for ip: {}".format(
                str(ip_address)))
            # Opting default timout 60 seconds for the request
            api_client = GreyNoise(api_key=api_key,
                                   timeout=60,
                                   integration_name="Splunk")
            context_info = api_client.riot(ip_address)
            logger.info(
                "Successfully retrieved the RIOT status for ip={}".format(
                    str(ip_address)))

            # Process the API response and send the context information of IP with extractions in the Splunk
            results = event_generator.make_valid_event('ip', context_info,
                                                       True)
            yield results

        except ValueError:
            logger.error(
                "IP address: {} doesn\'t match the valid IP format".format(
                    str(ip_address)))
            self.write_error("IP address doesn\'t match the valid IP format")
    def do_generate(self, api_key, logger):
        """
        Method to fetch the api response and process and send the response with extractions in the Splunk.

        :param api_key: GreyNoise API Key.
        :logger: logger object.
        """
        ip_address = self.ip

        try:
            # Strip the spaces from the parameter value if given
            if ip_address:
                ip_address = ip_address.strip()

            logger.info(
                "Initiating to fetch context information for ip: {}".format(
                    str(ip_address)))
            # Opting default timout 60 seconds for the request
            api_client = GreyNoise(api_key=api_key,
                                   timeout=60,
                                   integration_name=INTEGRATION_NAME)
            session_key = self._metadata.searchinfo.session_key
            context_info = get_response_for_generating(session_key, api_client,
                                                       ip_address, 'ip',
                                                       logger)
            logger.info(
                "Successfully retrieved the context information for ip={}".
                format(str(ip_address)))

            # Process the API response and send the context information of IP with extractions in the Splunk
            results = event_generator.make_valid_event('ip', context_info,
                                                       True)
            yield results

        except ValueError as e:
            error_msg = str(e).split(":")
            logger.error(e)
            self.write_error(error_msg[0])
Beispiel #5
0
    def transform(self, records):
        """Method that processes and yield event records to the Splunk events pipeline."""
        ip_addresses = self.ip
        ip_field = self.ip_field
        api_key = ""
        EVENTS_PER_CHUNK = 5000
        THREADS = 3
        USE_CACHE = False
        logger = utility.setup_logger(
            session_key=self._metadata.searchinfo.session_key,
            log_context=self._metadata.searchinfo.command)

        if ip_addresses and ip_field:
            logger.error(
                "Please use parameter ip to work gnquick as generating command or "
                "use parameter ip_field to work gnquick as transforming command."
            )
            self.write_error(
                "Please use parameter ip to work gnquick as generating command or "
                "use parameter ip_field to work gnquick as transforming command"
            )
            exit(1)

        try:
            message = ''
            api_key = utility.get_api_key(
                self._metadata.searchinfo.session_key, logger=logger)
        except APIKeyNotFoundError as e:
            message = str(e)
        except HTTPError as e:
            message = str(e)

        if message:
            self.write_error(message)
            logger.error(
                "Error occured while retrieving API key, Error: {}".format(
                    message))
            exit(1)

        if ip_addresses and not ip_field:
            # This peice of code will work as generating command and will not use the Splunk events.
            # Splitting the ip_addresses by commas and stripping spaces from both the sides for each IP address
            ip_addresses = [ip.strip() for ip in ip_addresses.split(',')]

            logger.info("Started retrieving results")
            try:
                logger.debug(
                    "Initiating to fetch noise and RIOT status for IP address(es): {}"
                    .format(str(ip_addresses)))

                api_client = GreyNoise(api_key=api_key,
                                       timeout=120,
                                       integration_name=INTEGRATION_NAME)

                # CACHING START
                cache_enabled, cache_client = utility.get_caching(
                    self._metadata.searchinfo.session_key, 'multi', logger)
                if int(cache_enabled) == 1 and cache_client is not None:
                    cache_start = time.time()
                    ips_not_in_cache, ips_in_cache = utility.get_ips_not_in_cache(
                        cache_client, ip_addresses, logger)
                    try:
                        response = []
                        if len(ips_in_cache) >= 1:
                            response = cache_client.query_kv_store(
                                ips_in_cache)
                        if response is None:
                            logger.debug(
                                "KVStore is not ready. Skipping caching mechanism."
                            )
                            noise_status = api_client.quick(ip_addresses)
                        elif response == []:
                            noise_status = utility.fetch_response_from_api(
                                api_client.quick, cache_client, ip_addresses,
                                logger)
                        else:
                            noise_status = utility.fetch_response_from_api(
                                api_client.quick, cache_client,
                                ips_not_in_cache, logger)
                            noise_status.extend(response)
                    except Exception:
                        logger.debug(
                            "An exception occurred while fetching response from cache.\n{}"
                            .format(traceback.format_exc()))
                    logger.debug(
                        "Generating command with caching took {} seconds.".
                        format(time.time() - cache_start))
                else:
                    # Opting timout 120 seconds for the requests
                    noise_status = api_client.quick(ip_addresses)
                logger.info("Retrieved results successfully")
                # CACHING END

                # Process the API response and send the noise and RIOT status information of IP with extractions
                # to the Splunk, Using this flag to handle the field extraction issue in custom commands
                # Only the fields extracted from the first event of generated by custom command
                # will be extracted from all events
                first_record_flag = True

                # Flag to indicate whether erroneous IPs are present
                erroneous_ip_present = False
                for ip in ip_addresses:
                    for sample in noise_status:
                        if ip == sample['ip']:
                            yield event_generator.make_valid_event(
                                'quick', sample, first_record_flag)
                            if first_record_flag:
                                first_record_flag = False
                            logger.debug(
                                "Fetched noise and RIOT status for ip={} from GreyNoise API"
                                .format(str(ip)))
                            break
                    else:
                        erroneous_ip_present = True
                        try:
                            validate_ip(ip, strict=True)
                        except ValueError as e:
                            error_msg = str(e).split(":")
                            logger.debug(
                                "Generating noise and RIOT status for ip={} manually"
                                .format(str(ip)))
                            event = {'ip': ip, 'error': error_msg[0]}
                            yield event_generator.make_invalid_event(
                                'quick', event, first_record_flag)

                            if first_record_flag:
                                first_record_flag = False

                if erroneous_ip_present:
                    logger.warn(
                        "Value of one or more IP address(es) is either invalid or non-routable"
                    )
                    self.write_warning(
                        "Value of one or more IP address(es) passed to {command_name} "
                        "is either invalid or non-routable".format(
                            command_name=str(
                                self._metadata.searchinfo.command)))

            except RateLimitError:
                logger.error(
                    "Rate limit error occured while fetching the context information for ips={}"
                    .format(str(ip_addresses)))
                self.write_error(
                    "The Rate Limit has been exceeded. Please contact the Administrator"
                )
            except RequestFailure as e:
                response_code, response_message = e.args
                if response_code == 401:
                    msg = "Unauthorized. Please check your API key."
                else:
                    # Need to handle this, as splunklib is unable to handle the exception with
                    # (400, {'error': 'error_reason'}) format
                    msg = (
                        "The API call to the GreyNoise platform have been failed "
                        "with status_code: {} and error: {}").format(
                            response_code,
                            response_message['error'] if isinstance(
                                response_message, dict) else response_message)

                logger.error("{}".format(str(msg)))
                self.write_error(msg)
            except ConnectionError:
                logger.error(
                    "Error while connecting to the Server. Please check your connection and try again."
                )
                self.write_error(
                    "Error while connecting to the Server. Please check your connection and try again."
                )
            except RequestException:
                logger.error(
                    "There was an ambiguous exception that occurred while handling your Request. Please try again."
                )
                self.write_error(
                    "There was an ambiguous exception that occurred while handling your Request. Please try again."
                )
            except Exception:
                logger.error("Exception: {} ".format(
                    str(traceback.format_exc())))
                self.write_error(
                    "Exception occured while fetching the noise and RIOT status of the IP address(es). "
                    "See greynoise_main.log for more details.")

        elif ip_field:
            # Enter the mechanism only when the Search is complete and all the events are available
            if self.search_results_info and not self.metadata.preview:

                try:
                    # Strip the spaces from the parameter value if given
                    ip_field = ip_field.strip()
                    # Validating the given parameter
                    try:
                        ip_field = validator.Fieldname(
                            option_name='ip_field').validate(ip_field)
                    except ValueError as e:
                        # Validator will throw ValueError with error message when the parameters are not proper
                        logger.error(str(e))
                        self.write_error(str(e))
                        exit(1)

                    # API key validation
                    if not self.api_validation_flag:
                        api_key_validation, message = utility.validate_api_key(
                            api_key, logger)
                        logger.debug(
                            "API validation status: {}, message: {}".format(
                                api_key_validation, str(message)))
                        self.api_validation_flag = True
                        if not api_key_validation:
                            logger.info(message)
                            self.write_error(message)
                            exit(1)

                    # This piece of code will work as transforming command and will use
                    # the Splunk ingested events and field which is specified in ip_field.
                    chunk_dict = event_generator.batch(records, ip_field,
                                                       EVENTS_PER_CHUNK,
                                                       logger)

                    # This means there are only 1000 or below IPs to call in the entire bunch of records
                    # Use one thread with single thread with caching mechanism enabled for the chunk
                    if len(chunk_dict) == 1:
                        logger.info(
                            "Less then 1000 distinct IPs are present, "
                            "optimizing the IP requests call to GreyNoise API..."
                        )
                        THREADS = 1
                        USE_CACHE = True

                    api_client = GreyNoise(api_key=api_key,
                                           timeout=120,
                                           use_cache=USE_CACHE,
                                           integration_name=INTEGRATION_NAME)
                    # When no records found, batch will return {0:([],[])}
                    tot_time_start = time.time()
                    if len(list(chunk_dict.values())[0][0]) >= 1:
                        for event in event_generator.get_all_events(
                                self._metadata.searchinfo.session_key,
                                api_client,
                                'multi',
                                ip_field,
                                chunk_dict,
                                logger,
                                threads=THREADS):
                            yield event
                    else:
                        logger.info(
                            "No events found, please increase the search timespan to have more search results."
                        )
                    tot_time_end = time.time()
                    logger.debug(
                        "Total execution time => {}".format(tot_time_end -
                                                            tot_time_start))
                except Exception:
                    logger.info(
                        "Exception occured while adding the noise and RIOT status to the events, Error: {}"
                        .format(traceback.format_exc()))
                    self.write_error(
                        "Exception occured while adding the noise and RIOT status of "
                        "the IP addresses to events. See greynoise_main.log for more details."
                    )

        else:
            logger.error(
                "Please specify exactly one parameter from ip and ip_field with some value."
            )
            self.write_error(
                "Please specify exactly one parameter from ip and ip_field with some value."
            )
    def transform(self, records):
        """Method that processes and yield event records to the Splunk events pipeline."""
        ip_address = self.ip
        ip_field = self.ip_field
        api_key = ""
        EVENTS_PER_CHUNK = 1
        THREADS = 3
        USE_CACHE = False
        logger = utility.setup_logger(
            session_key=self._metadata.searchinfo.session_key, log_context=self._metadata.searchinfo.command)

        if ip_address and ip_field:
            logger.error("Please use parameter ip to work gnriot as generating command or "
                         "use parameter ip_field to work gnriot as transforming command.")
            self.write_error("Please use parameter ip to work gnriot as generating command or "
                             "use parameter ip_field to work gnriot as transforming command")
            exit(1)

        try:
            message = ''
            api_key = utility.get_api_key(self._metadata.searchinfo.session_key, logger=logger)
        except APIKeyNotFoundError as e:
            message = str(e)
        except HTTPError as e:
            message = str(e)

        if message:
            self.write_error(message)
            logger.error("Error occured while retrieving API key, Error: {}".format(message))
            exit(1)

        if ip_address and not ip_field:
            # This peice of code will work as generating command and will not use the Splunk events.
            # Strip the spaces from the parameter value if given
            ip_address = ip_address.strip()

            logger.info("Started retrieving results")
            try:
                logger.debug("Initiating to fetch RIOT information for IP address: {}".format(str(ip_address)))
                api_client = GreyNoise(api_key=api_key, timeout=120, integration_name=INTEGRATION_NAME)
                # Opting timout 120 seconds for the requests
                session_key = self._metadata.searchinfo.session_key
                riot_information = utility.get_response_for_generating(
                    session_key, api_client, ip_address, 'greynoise_riot', logger)
                logger.info("Retrieved results successfully")

                # Process the API response and send the riot information of IP with extractions to the Splunk
                yield event_generator.make_valid_event('riot', riot_information, True)
                logger.debug("Fetched RIOT information for ip={} from GreyNoise API".format(str(ip_address)))

            except ValueError as e:
                error_msg = str(e).split(":")
                logger.debug("Generating RIOT information for ip={} manually".format(str(ip_address)))
                event = {
                    'ip': ip_address,
                    'error': error_msg[0]
                }
                yield event_generator.make_invalid_event('riot', event, True)
                logger.warn(error_msg)
                self.write_warning(
                    "Value of IP address passed to {command_name} is either invalid or non-routable".format(
                        command_name=str(self._metadata.searchinfo.command)))
            except RateLimitError:
                logger.error("Rate limit error occured while fetching the context information for ip={}".format(
                    str(ip_address)))
                self.write_error("The Rate Limit has been exceeded. Please contact the Administrator")
            except RequestFailure as e:
                response_code, response_message = e.args
                if response_code == 401:
                    msg = "Unauthorized. Please check your API key."
                else:
                    # Need to handle this, as splunklib is unable to handle the exception with
                    # (400, {'error': 'error_reason'}) format
                    msg = ("The API call to the GreyNoise platform have been failed "
                           "with status_code: {} and error: {}").format(
                        response_code, response_message['error'] if isinstance(response_message, dict)
                        else response_message)

                logger.error("{}".format(str(msg)))
                self.write_error(msg)
            except ConnectionError:
                logger.error("Error while connecting to the Server. Please check your connection and try again.")
                self.write_error("Error while connecting to the Server. Please check your connection and try again.")
            except RequestException:
                logger.error(
                    "There was an ambiguous exception that occurred while handling your Request. Please try again.")
                self.write_error(
                    "There was an ambiguous exception that occurred while handling your Request. Please try again.")
            except Exception:
                logger.error("Exception: {} ".format(str(traceback.format_exc())))
                self.write_error("Exception occured while fetching the RIOT information of the IP address. "
                                 "See greynoise_main.log for more details.")

        elif ip_field:

            logger.info("Started retrieving RIOT information for the IP addresses present in field: {}".format(
                str(ip_field)))
            # Enter the mechanism only when the Search is complete and all the events are available
            if self.search_results_info and not self.metadata.preview:
                try:
                    # Strip the spaces from the parameter value if given
                    ip_field = ip_field.strip()
                    # Validating the given parameter
                    try:
                        ip_field = validator.Fieldname(option_name='ip_field').validate(ip_field)
                    except ValueError as e:
                        # Validator will throw ValueError with error message when the parameters are not proper
                        logger.error(str(e))
                        self.write_error(str(e))
                        exit(1)

                    # API key validation
                    if not self.api_validation_flag:
                        api_key_validation, message = utility.validate_api_key(api_key, logger)
                        logger.debug("API validation status: {}, message: {}".format(api_key_validation, str(message)))
                        self.api_validation_flag = True
                        if not api_key_validation:
                            logger.info(message)
                            self.write_error(message)
                            exit(1)

                    # This piece of code will work as transforming command and will use
                    # the Splunk ingested events and field which is specified in ip_field.
                    # divide the records in the form of dict of tuples having chunk_index as key
                    # {<index>: (<records>, <All the ips in records>)}
                    chunk_dict = event_generator.batch(
                        records, ip_field, EVENTS_PER_CHUNK, logger, optimize_requests=False)
                    logger.debug("Successfully divided events into chunks")

                    # This means there are only 1000 or below IPs to call in the entire bunch of records
                    # Use one thread with single thread with caching mechanism enabled for the chunk
                    if len(chunk_dict) == 1:
                        logger.debug("Less then 1000 distinct IPs are present, "
                                     "optimizing the IP requests call to GreyNoise API...")
                        THREADS = 1
                        USE_CACHE = True

                    api_client = GreyNoise(
                        api_key=api_key, timeout=120, use_cache=USE_CACHE, integration_name=INTEGRATION_NAME)

                    # When no records found, batch will return {0:([],[])}
                    if len(chunk_dict) > 0:
                        for event in event_generator.get_all_events(
                                self._metadata.searchinfo.session_key, api_client, 'greynoise_riot', ip_field,
                                chunk_dict, logger, threads=THREADS):
                            yield event

                        logger.info("Successfully sent all the results to the Splunk")
                    else:
                        logger.info("No events found, please increase the search timespan to have more search results.")
                except Exception:
                    logger.info(
                        "Exception occured while adding the RIOT information to the events, Error: {}".format(
                            traceback.format_exc()))
                    self.write_error("Exception occured while adding the RIOT information of the IP addresses "
                                     "to events. See greynoise_main.log for more details.")

        else:
            logger.error("Please specify exactly one parameter from ip and ip_field with some value.")
            self.write_error("Please specify exactly one parameter from ip and ip_field with some value.")