def test_regular_expression(self): validator = validators.RegularExpression() # duck-type: act like it's a regex and allow failure if it isn't one validator.__call__('a').match('a') self.assertEqual(validator.__call__(None), None) self.assertRaises(ValueError, validator.__call__, '(a')
class StubbedReportingCommand(ReportingCommand): boolean = Option( doc=''' **Syntax:** **boolean=***<value>* **Description:** A boolean value''', require=False, validate=validators.Boolean()) duration = Option( doc=''' **Syntax:** **duration=***<value>* **Description:** A length of time''', validate=validators.Duration()) fieldname = Option( doc=''' **Syntax:** **fieldname=***<value>* **Description:** Name of a field''', validate=validators.Fieldname()) file = Option( doc=''' **Syntax:** **file=***<value>* **Description:** Name of a file''', validate=validators.File(mode='r')) integer = Option( doc=''' **Syntax:** **integer=***<value>* **Description:** An integer value''', validate=validators.Integer()) optionname = Option( doc=''' **Syntax:** **optionname=***<value>* **Description:** The name of an option (used internally)''', validate=validators.OptionName()) regularexpression = Option( doc=''' **Syntax:** **regularexpression=***<value>* **Description:** Regular expression pattern to match''', validate=validators.RegularExpression()) set = Option( doc=''' **Syntax:** **set=***<value>* **Description:** Regular expression pattern to match''', validate=validators.Set("foo", "bar", "test")) @Configuration() def map(self, records): pass def reduce(self, records): pass
class CountMatchesCommand(StreamingCommand): """ Counts the number of non-overlapping matches to a regular expression in a set of fields. ##Syntax .. code-block:: countmatches fieldname=<field> pattern=<regular_expression> <field-list> ##Description A count of the number of non-overlapping matches to the regular expression specified by `pattern` is computed for each record processed. The result is stored in the field specified by `fieldname`. If `fieldname` exists, its value is replaced. If `fieldname` does not exist, it is created. Event records are otherwise passed through to the next pipeline processor unmodified. ##Example Count the number of words in the `text` of each tweet in tweets.csv and store the result in `word_count`. .. code-block:: | inputcsv tweets.csv | countmatches fieldname=word_count pattern="\\w+" text """ fieldname = Option(doc=''' **Syntax:** **fieldname=***<fieldname>* **Description:** Name of the field that will hold the match count''', require=True, validate=validators.Fieldname()) pattern = Option(doc=''' **Syntax:** **pattern=***<regular-expression>* **Description:** Regular expression pattern to match''', require=True, validate=validators.RegularExpression()) def stream(self, records): self.logger.debug('CountMatchesCommand: %s' % self) # logs command line for record in records: count = 0.0 for fieldname in self.fieldnames: matches = self.pattern.finditer(str(record[fieldname])) count += len(list(matches)) record[self.fieldname] = count yield record
class SearchTableCommand(StreamingCommand): pattern = Option(doc=''' **Syntax:** **pattern=***<regular-expression>* **Description:** Regular expression pattern to match''', require=False, validate=validators.RegularExpression()) def stream(self, records): #pydevd.settrace() self.logger.setLevel(logging.DEBUG) self.logger.debug('SearchTableCommand: %s' % self) # logs command line for record in records: found = "false" for field in record: matches = len(list(self.pattern.finditer(str(record[field])))) if matches > 0: found = "true" if found == "true": yield record self.logger.debug('SearchTableCommand: Done')
class TestSearchCommand(SearchCommand): boolean = Option( doc=''' **Syntax:** **boolean=***<value>* **Description:** A boolean value''', validate=validators.Boolean()) required_boolean = Option( doc=''' **Syntax:** **boolean=***<value>* **Description:** A boolean value''', require=True, validate=validators.Boolean()) aliased_required_boolean = Option( doc=''' **Syntax:** **boolean=***<value>* **Description:** A boolean value''', name='foo', require=True, validate=validators.Boolean()) code = Option( doc=''' **Syntax:** **code=***<value>* **Description:** A Python expression, if mode == "eval", or statement, if mode == "exec"''', validate=validators.Code()) required_code = Option( doc=''' **Syntax:** **code=***<value>* **Description:** A Python expression, if mode == "eval", or statement, if mode == "exec"''', require=True, validate=validators.Code()) duration = Option( doc=''' **Syntax:** **duration=***<value>* **Description:** A length of time''', validate=validators.Duration()) required_duration = Option( doc=''' **Syntax:** **duration=***<value>* **Description:** A length of time''', require=True, validate=validators.Duration()) fieldname = Option( doc=''' **Syntax:** **fieldname=***<value>* **Description:** Name of a field''', validate=validators.Fieldname()) required_fieldname = Option( doc=''' **Syntax:** **fieldname=***<value>* **Description:** Name of a field''', require=True, validate=validators.Fieldname()) file = Option( doc=''' **Syntax:** **file=***<value>* **Description:** Name of a file''', validate=validators.File()) required_file = Option( doc=''' **Syntax:** **file=***<value>* **Description:** Name of a file''', require=True, validate=validators.File()) integer = Option( doc=''' **Syntax:** **integer=***<value>* **Description:** An integer value''', validate=validators.Integer()) required_integer = Option( doc=''' **Syntax:** **integer=***<value>* **Description:** An integer value''', require=True, validate=validators.Integer()) map = Option( doc=''' **Syntax:** **map=***<value>* **Description:** A mapping from one value to another''', validate=validators.Map(foo=1, bar=2, test=3)) required_map = Option( doc=''' **Syntax:** **map=***<value>* **Description:** A mapping from one value to another''', require=True, validate=validators.Map(foo=1, bar=2, test=3)) match = Option( doc=''' **Syntax:** **match=***<value>* **Description:** A value that matches a regular expression pattern''', validate=validators.Match('social security number', r'\d{3}-\d{2}-\d{4}')) required_match = Option( doc=''' **Syntax:** **required_match=***<value>* **Description:** A value that matches a regular expression pattern''', require=True, validate=validators.Match('social security number', r'\d{3}-\d{2}-\d{4}')) optionname = Option( doc=''' **Syntax:** **optionname=***<value>* **Description:** The name of an option (used internally)''', validate=validators.OptionName()) required_optionname = Option( doc=''' **Syntax:** **optionname=***<value>* **Description:** The name of an option (used internally)''', require=True, validate=validators.OptionName()) regularexpression = Option( doc=''' **Syntax:** **regularexpression=***<value>* **Description:** Regular expression pattern to match''', validate=validators.RegularExpression()) required_regularexpression = Option( doc=''' **Syntax:** **regularexpression=***<value>* **Description:** Regular expression pattern to match''', require=True, validate=validators.RegularExpression()) set = Option( doc=''' **Syntax:** **set=***<value>* **Description:** A member of a set''', validate=validators.Set('foo', 'bar', 'test')) required_set = Option( doc=''' **Syntax:** **set=***<value>* **Description:** A member of a set''', require=True, validate=validators.Set('foo', 'bar', 'test')) class ConfigurationSettings(SearchCommand.ConfigurationSettings): @classmethod def fix_up(cls, command_class): pass
def test_regular_expression(self): validator = validators.RegularExpression() self.assertIsInstance(validator.__call__('a'), re._pattern_type) self.assertEqual(validator.__call__(None), None) self.assertRaises(ValueError, validator.__call__, '(a')
class CountMatchesCommand(StreamingCommand): """ Counts the number of non-overlapping matches to a regular expression in a set of fields. ##Syntax .. code-block:: countmatches fieldname=<field> pattern=<regular_expression> <field-list> ##Description A count of the number of non-overlapping matches to the regular expression specified by `pattern` is computed for each record processed. The result is stored in the field specified by `fieldname`. If `fieldname` exists, its value is replaced. If `fieldname` does not exist, it is created. Event records are otherwise passed through to the next pipeline processor unmodified. ##Example Count the number of words in the `text` of each tweet in tweets.csv and store the result in `word_count`. .. code-block:: | inputlookup tweets | countmatches fieldname=word_count pattern="\\w+" text """ fieldname = Option( doc=''' **Syntax:** **fieldname=***<fieldname>* **Description:** Name of the field that will hold the match count''', require=True, validate=validators.Fieldname()) outname = Option( doc=''' **Syntax:** **outname=***<outname>* **Description:** Name of the outpuf field that will hold the index name''', require=True, validate=validators.Fieldname()) pattern = Option( doc=''' **Syntax:** **pattern=***<regular-expression>* **Description:** Regular expression pattern to match''', require=True, validate=validators.RegularExpression()) def stream(self, records): self.logger.debug('CountMatchesCommand: %s', self) # logs command line pattern = self.pattern outname = self.outname count = 0 whitelist = "" for record in records: for fieldname in self.fieldnames: matches = pattern.findall(six.text_type(record[fieldname])) count += len(matches) record[self.fieldname] = count if whitelist != "": whitelist = str(whitelist) + "|" + str(record) else: whitelist = str(record) # whitelist is empty if count == 0: whitelist = "[('" + str(outname) + "', '*')]" yield {'_raw': str(whitelist)}
class fuzzylookup(StreamingCommand): doc = ''' **Syntax:** | fuzzylookup [ prefix=<string> ] [ lookupfilter=<kvpairs> ] [ mask=<regex> ] [ delete=<regex> ] <lookup-table-name> ( <lookup-field> [AS <event-field>] ) [ OUTPUT | OUTPUTNEW (<lookup-destfield> [AS <event-destfield>] ) ... ] **Description** Takes field from search results and compares to a lookup for near-matches ''' prefix = Option(doc=''' **Syntax:** **prefix=***"prefix_text" **Description:** Text to prefix all output field names with. Helpful if you just want every lookup field without aliasing each one.''', require=False) addmetrics = Option(doc=''' **Syntax:** **addmetrics=***[True|False] **Description:** Add fuzzy match metrics to each result (score, matching characters, similarity score, consecutive match length) **Default:** False''', require=False, validate=validators.Boolean()) lookupfilter = Option(doc=''' **Syntax:** **lookupfilter=***"LookupField1=\"local admin\" Lookupfield2=\"*@$email_domain$\""* (wildcard, variable, or literal string match) **Description:** Filter for data in the specified lookup to narrow down comparisons''', require=False) mask = Option(doc=''' **Syntax:** **mask=***"regular expression"* **Description:** Mask pattern for both compared sets of values. Masks the regex matched text before comparing.''', require=False, validate=validators.RegularExpression()) delete = Option(doc=''' **Syntax:** **delete=***"regular expression"* **Description:** Deletion pattern for both compared sets of values. Removes the regex matched text before comparing.''', require=False, validate=validators.RegularExpression()) session_key = '' splunkd_uri = '' service = '' lookup_list = [] lookup_filters_static = [] lookup_filters_dynamic = [] lookup = '' lookupfield = '' searchfield = '' output_aliases = OrderedDict() # Default output field overwrite setting is True output_overwrite = True # Store the data from the lookup for each dynamic filter # Use a manager as a proxy to allow for cross-process communication manager = Manager() prepopulated_filter_lookupdata = manager.dict() l = manager.list() # Define main function def stream(self, events): logger = setup_logging('fuzzylookup') args = [ val for val in self._metadata.searchinfo.args[2:] if '=' not in val ] logger.debug("Arguments: " + str(self._metadata.searchinfo.args[2:])) arg_count = len(args) arg_index = 0 # Parse the arguments to the command if arg_count >= 3: while arg_index < arg_count: # Process the lookup name, lookup field, search field if self.lookup == '': self.lookup = args[arg_index] arg_index += 1 if self.lookupfield == '': self.lookupfield = args[arg_index] if len(args) >= arg_index + 2: if args[arg_index + 1].upper() == 'AS': self.searchfield = args[arg_index + 2] arg_index += 3 else: self.searchfield = self.lookupfield arg_index += 1 else: self.searchfield = self.lookupfield arg_index += 1 if arg_index < len(args) and None not in [ self.lookup, self.lookupfield, self.searchfield ]: if args[arg_index].upper() == 'OUTPUT': self.output_overwrite = True elif args[arg_index].upper() == 'OUTPUTNEW': self.output_overwrite = False else: # Add field to output fields list output_field_name = args[arg_index].strip(',') if len(args) >= arg_index + 2: if args[arg_index + 1].upper() == 'AS': self.output_aliases[output_field_name] = args[ arg_index + 2] arg_index += 2 else: self.output_aliases[ output_field_name] = output_field_name else: self.output_aliases[ output_field_name] = output_field_name arg_index += 1 else: logger.critical( "Not enough parameters specified to execute fuzzylookup.") print("Not enough parameters specified to execute fuzzylookup.") exit(1957) if None in [self.lookup, self.lookupfield, self.searchfield]: logger.critical("Could not parse all arguments for fuzzylookup") print("Could not parse all arguments for fuzzylookup") exit(1173) logger.debug("lookup: " + self.lookup) logger.debug("lookupfield: " + self.lookupfield) logger.debug("searchfield: " + self.searchfield) logger.debug("output_overwrite: " + str(self.output_overwrite)) logger.debug("output_aliases: " + str(self.output_aliases)) if self.prefix is None: self.prefix = '' if self.addmetrics is None: self.addmetrics = False logger.debug("prefix = %s", self.prefix) logger.debug("addmetrics = %s", self.addmetrics) #log beginning of comparison logger.info('Comparing %s to %s in %s lookup for fuzzy matches', self.searchfield, self.lookupfield, self.lookup) start_time = time.time() lookupfilter_str = '' # See if we have a lookup filter we can use in the root search if self.lookupfilter is not None and len(self.lookupfilter) > 0: # Split the filter into multiple key/value filters # Break the data into multiple fields, if needed # Replace the space delimiter with |, then split by | filter_list = re.sub(r'\s+(\w+=)', '|\g<1>', self.lookupfilter).split('|') # pylint: disable=anomalous-backslash-in-string for f in filter_list: logger.debug("filter = " + f) filter_re = re.compile(r'^(.*?)([<>=]+)(.*)$') m = filter_re.match(f) if m is not None: filter_obj = { 'field': m.group(1), 'op': m.group(2), 'value': m.group(3).strip('"') } # Find the dynamic filters, referencing $fieldname$ from the event if re.search(r'\$\w+\$', f) is None: self.lookup_filters_static.append(filter_obj) else: # Find the static filters self.lookup_filters_dynamic.append(filter_obj) else: # Only handle field/value pair filters. Ignore all others. logger.info("Ignored filter: %s", f) # Build the static filter string to go into the SPL search for f in self.lookup_filters_static: lookupfilter_str += '{0}{1}"{2}" '.format( f['field'].replace('|', ""), f['op'], f['value'].replace('|', "")) logger.debug("Static lookup filter: %s", lookupfilter_str) if len(lookupfilter_str) > 0: lookup_search = '|inputlookup {0} where {1}="*" | search {2} | eval {1}=lower({1}) | dedup {1}'.format( self.lookup, self.lookupfield, lookupfilter_str) else: lookup_search = '|inputlookup {0} where {1}="*" | eval {1}=lower({1}) | dedup {1}'.format( self.lookup, self.lookupfield) logger.info('Lookup query is: %s' % (lookup_search)) # Connect via existing session key self.session_key = self._metadata.searchinfo.session_key self.splunkd_uri = self._metadata.searchinfo.splunkd_uri namespace = self._metadata.searchinfo.app try: self.service = client.connect(token=self.session_key) logger.info('Successfully connected to %s', str(self.splunkd_uri)) except BaseException as e: logger.error('Error connecting: %s', repr(e)) # bind incoming search results for reading and extraction of search field # execute lookup command and bind results logger.info('Attempting to cache lookup of %s', self.lookup) # Set the URL of the Splunk endpoint search_url = '%s/servicesNS/nobody/%s/search/jobs' % (self.splunkd_uri, namespace) # Set the headers for HTTP requests headers = { 'Authorization': 'Splunk %s' % self.session_key, 'Content-Type': 'application/x-www-form-urlencoded' } try: request_data = { "search": lookup_search, "exec_mode": 'oneshot', "count": '0', "rf": self.lookupfield, # Required fields list "namespace": namespace, "output_mode": 'json' } #logger.debug('Request data: %s', str(request_data)) logger.debug('Search URL: %s', str(search_url)) #logger.debug('Headers: %s', str(headers)) payload = str.encode(urllib.parse.urlencode(request_data)) json_data, result_code = request('POST', search_url, payload, headers) # Write the values from the lookup to lookup_list self.lookup_list = json.loads(json_data)['results'] logger.info('Retrieved %d records from lookup %s', len(self.lookup_list), self.lookup) logger.debug('Response code: %s', result_code) #logger.debug('Response contents: %s', json_data) except BaseException as e: logger.error('Could not cache lookup %s: %s', self.lookup, repr(e)) # Make a Pool of workers pool = ThreadPool(5) try: count = 0 if len(self.lookup_list) > 0: logger.debug("Running ThreadPool") results = pool.map(self.get_distances, events) for result in results: yield result count += 1 else: for event in events: yield event count += 1 except BaseException as e: logger.error("Error: %s" % repr(e)) results = {} duration_secs = round(time.time() - start_time) logger.info( "Completed fuzzylookup search command for %s results in %s seconds.", str(count), str(duration_secs)) # Run this thread once for each event def get_distances(self, event): logger = setup_logging('fuzzylookup') start_time = time.time() # sf = search field / field from search results # Convert to Unicode (py3 compatible) event_field_value = str(event[self.searchfield].lower()) if event_field_value is None or len(event_field_value) == 0: return event # Iterate through lookupfield results and calculate get_distances logger.debug('Calculating distances for %s', event_field_value) best_match_string = None active_score = 100 active_charmatch = 0 best_score = 100 best_charmatch = 0 dynamic_matches = 0 dynamic_match_list = [] dynamic_filters = {} use_cache = True try: # See if we have a dynamic lookup filter (references event field values) if len(self.lookup_filters_dynamic) > 0: # For this event, calculate the dynamic lookup filters based on the data in the event # Using this feature dramatically speeds up searches by limiting the number of rows compared dynamic_filter_keys = [] for s in self.lookup_filters_dynamic: try: # Look for dynamic variables in the provided filter ($xxxxx$) lookup_filter_value = s['value'] match_list = re.findall(r'\$[^\$]+\$', lookup_filter_value) # For each match, replace instances of $xxxxxx$ with the field value from the event # Supports multiple event fields for group in match_list: v = group.strip('$') if v in list(event.keys()): lookup_filter_value = lookup_filter_value.replace( group, event[v]) dynamic_filters[s['field']] = lookup_filter_value except BaseException as e: logger.error( "Error building dynamic lookup filters: %s", repr(e)) # We may have more than one filtered field per row. Account for that here. dynamic_filter_keys.append(s['field'] + "=" + lookup_filter_value) # Generate the key string so we can recall the same lookup rows later for more events # This is to have a shorter list to compare against if len(dynamic_filter_keys) > 0: #logger.debug(str(dynamic_filter_keys)) dynamic_filter_keys.sort() dynamic_filters_key = '|'.join(dynamic_filter_keys) #logger.debug("dynamic_filters_key = " + dynamic_filters_key) if dynamic_filters_key in list( self.prepopulated_filter_lookupdata.keys( )) and use_cache: logger.debug( "Using prepopulated filter lookup data for " + dynamic_filters_key) comparison_list = self.prepopulated_filter_lookupdata[ dynamic_filters_key] # Make sure we skip the filter comparison and go straight to Levenshtein dynamic_filters = {} else: comparison_list = self.lookup_list #logger.debug("Cached dynamic filter results: " + str(len(list(self.prepopulated_filter_lookupdata.keys())))) else: logger.error("No dynamic filters matched for input: " + str(event)) return event else: # No dynamic filters found. Use the raw lookup list. comparison_list = self.lookup_list dynamic_filters_key = None # Find the shortest distance metric comparison_count = 0 for lookup_record in comparison_list: comparison_count += 1 # We have a dynamic filter so we have to grab the field referenced from the event # Ex: Lookupfield2=\"*@$email_domain$\"" # s['field'] = 'Lookupfield2' # s['value'] = "*@$email_domain$" filter_matched = True for filter_key, filter_value in list(dynamic_filters.items()): try: #dynamic_filter_list.append(lookup_filter_value) if filter_key in list(lookup_record.keys()): # Make sure the dynamic filter field matches the dynamic filter value # Prepare the text field to be compared against lookup_value = lookup_record[filter_key] # Use fnmatch to do a pure wildcard search between the lookup row value # and the dynamic filter text from the event #logger.debug("Comparing %s to %s", lookup_value, lookup_filter_value) if fnmatch.fnmatch(lookup_value, filter_value): pass else: # If the record doesn't match, skip to the next lookup value (see below) filter_matched = False else: logger.debug( "Lookup record skipped. Missing field %s: %s", s['field'], str(lookup_record)) filter_matched = False except BaseException as e: logger.error( "Error checking dynamic lookup filters: %s", repr(e)) if filter_matched: dynamic_matches += 1 # Use this match for caching lookup entries that match this dynamic filter dynamic_match_list.append(lookup_record) else: # Skip comparison continue # Produce a list of fields to output if we were not supplied one if len(self.output_aliases) == 0: for lookup_field in list(lookup_record.keys()): self.output_aliases[lookup_field] = lookup_field # Get the lookup field value lookup_value = lookup_record[self.lookupfield] # Convert to Unicode (Python 3 compatible version) sf_compare = str(event_field_value.lower()) lf_compare = str(lookup_value.lower()) try: # Apply the deletions and masking prior to comparisons being made if self.delete is not None: sf_compare = re.sub(self.delete, '', sf_compare) lf_compare = re.sub(self.delete, '', lf_compare) if self.mask is not None: sf_compare = re.sub(self.mask, '*', sf_compare) lf_compare = re.sub(self.mask, '*', lf_compare) #logger.debug("Comparing %s to %s", sf_compare, lf_compare) active_score = jf.levenshtein_distance( sf_compare, lf_compare) active_charmatch = matching_chars(sf_compare, lf_compare) # Get the result with the greatest 1:1 character overlap if the scores are identical if active_score < best_score or ( active_score == best_score and active_charmatch > best_charmatch): # New best score best_match_string = [lookup_value] best_match_lookup_record = [lookup_record] best_score = active_score best_charmatch = active_charmatch best_lf_compare = lf_compare elif active_score == best_score and active_charmatch == best_charmatch: # Same best score, different entry. Append to the list. best_match_string.append(lookup_value) best_match_lookup_record.append(lookup_record) except TypeError as e: logger.error("Type Error: " + repr(e)) raise Exception except BaseException as e: logger.error("Error comparing %s to list entry %s: %s", event_field_value, lookup_value, repr(e)) if best_score < 100: # Calculate a metric for similarity based on fuzzy score and string character overlap count fuzzy_weight = 75 charmatch_weight = 25 #sequencelen_weight = 25 max_length = max(len(sf_compare), len(best_lf_compare)) fuzzy_metric = round( (1 - (float(best_score) / max_length)) * fuzzy_weight, 2) # inverted, best=0 charmatch_metric = round( (float(best_charmatch) / max_length) * charmatch_weight, 2) #sequencelen_metric = round((1-(float(best_sequencelen) / max_length)) * sequencelen_weight, 2) # Check for the best consecutive character length match in the resulting list # This is done in a second step to limit the number of sequence length computations #if len(best_match_lookup_record) > 1: best_sequencelen = 0 best_sequence_lookup_record = [] for lookup_record in best_match_lookup_record: lf_compare = lookup_record[self.lookupfield].lower() # Apply the deletions and masking prior to comparisons being made (again) if self.delete is not None: lf_compare = re.sub(self.delete, '', lf_compare) if self.mask is not None: lf_compare = re.sub(self.mask, '*', lf_compare) # Calculate the length of consecutive character matches active_sequencelen = overlap_length(sf_compare, lf_compare) if active_sequencelen > best_sequencelen: # New best score best_sequencelen = active_sequencelen best_sequence_lookup_record = [lookup_record] elif active_sequencelen == best_sequencelen: # Best score tie best_sequence_lookup_record.append(lookup_record) best_match_lookup_record = best_sequence_lookup_record if self.addmetrics: # Output the fuzzy metrics event[self.prefix + "fuzzy_matchlen"] = best_sequencelen event[self.prefix + "fuzzy_score"] = best_score event[self.prefix + "fuzzy_charmatch"] = best_charmatch event[ self.prefix + "fuzzy_similarity"] = fuzzy_metric + charmatch_metric # + sequencelen_metric # Output the fields from the lookup entry/entries if len(self.output_aliases) > 0: logger.debug('output_aliases length: ' + str(len(self.output_aliases))) # Only write selected entries to the event. Aliases and field names are identical if no alias specified. for lookup_field, lookup_field_alias in list( self.output_aliases.items()): #logger.debug(self.output_overwrite) #logger.debug(event[lookup_field]) #logger.debug(lookup_record[lookup_field]) if (self.output_overwrite or lookup_field not in list( event.keys())) and lookup_field in list( lookup_record.keys()): # Loop through the "best matches" lookup entries lookup_field_entries = [] for lookup_record in best_match_lookup_record: lookup_field_entries.append( lookup_record[lookup_field]) event[self.prefix + lookup_field_alias] = lookup_field_entries # Cache the dynamic lookup list entries in case another event needs the same list # This dramatically speeds up processing for dynamic filters that match a large part of the lookup if dynamic_filters_key is not None and dynamic_match_list is not None and len( list(dynamic_filters.keys())) > 0: self.prepopulated_filter_lookupdata[ dynamic_filters_key] = dynamic_match_list #logger.debug("prepopulated_filter_lookupdata count (child process) = " + str(len(list(self.prepopulated_filter_lookupdata.keys())))) duration_secs = round(time.time() - start_time) logger.debug( "Done calculating distances for %s in %s seconds. Result: %s", event_field_value, str(duration_secs), best_match_string) if dynamic_filters_key is not None: logger.debug("Dynamic filter matches for %s: %s", dynamic_filters_key, dynamic_matches) except BaseException as e: logger.error("get_distances error: " + repr(e)) tb = traceback.format_exc() logger.error(tb) return event
class SplunkRerunCommand(GeneratingCommand): regex = Option(require=True, validate=validators.RegularExpression(), doc=''' **Syntax:** **regex=***<regex pattern>* **Description:** regex pattern matching alerts/reports to rerun''') trigger = Option(validate=validators.Boolean(), default=False) #Todo - Update tz and epoch to be class veriables # Apply snap to earliest or latest # This would be the @<snap> period of pattern def applySnap(self, unit, original): tz = tzlocal.get_localzone() #Get epoch as Datetime based on timezone epoch = datetime.datetime.fromtimestamp(0,tz=tz) #Get runtime as Datetime based on timezone orig = datetime.datetime.fromtimestamp(int(original),tz=tz) #Depending on Snap replace units with 0 in Datetime #then convert back to epoch by subtracting epoch Datetime #and returning difference in seconds if(unit==None): return original if(unit=="m" or "min" in unit): x = (orig.replace(second=0)-epoch).total_seconds() elif(unit=="h" or "hr" in unit or "hour" in unit): x = (orig.replace(minute=0,second=0)-epoch).total_seconds() elif(unit=="d" or "day" in unit): x = (orig.replace(hour=0,minute=0,second=0)-epoch).total_seconds() elif("mon" in unit): x = (orig.replace(day=1,hour=0,minute=0,second=0)-epoch).total_seconds() elif(unit=="y" or "yr" in unit or "year" in unit): x = (orig.replace(month=1,day=1,hour=0,minute=0,second=0)-epoch).total_seconds() elif("w" in unit): day = orig.weekday() x = ((orig-datetime.timedelta(days=day))-epoch).total_seconds() else: raise Exception("Error parsing snap unit; no match for unit") return x # Apply the offset to earliest and latest # This can be in 3 places in the pattern # <offset 1><offset 2>@<snap><snap offset> # -15m+5s@d+1h # function handles one offset at a time and does not matter which offset it is def applyOffset(self, offset, unit, original): # No need to convert to datetime in this function (exception month) # Just add or subtract the appropriate number of seconds if(offset==None or unit==None): return original if(unit=="s" or "sec" in unit): x = original + int(offset) elif(unit=="m" or "min" in unit): x = original + int(offset)*60 elif(unit=="h" or "hr" in unit or "hour" in unit): x = original + int(offset)*60*60 elif(unit=="d" or "day" in unit): x = original + int(offset)*60*60*24 elif("w" in unit): x = original + int(offset)*60*60*24*7 elif(unit=="y" or "yr" in unit or "year" in unit): x = original + int(offset)*60*60*24*7*365 # Month is a special use case, since it is the only period that does not have a set number of seconds # To handle Month I convert to Datetime, and use relativedelta to add or subtract the number of months # then subtract epoch Datetime and get difference in seconds similar to applySnap elif("mon" in unit): try: tz = tzlocal.get_localzone() epoch = datetime.datetime.fromtimestamp(0,tz=tz) x = ((datetime.datetime.utcfromtimestamp(int(original),tz=tz)+relativedelta(months=int(offset)))-epoch).total_seconds() except NameError: raise Exception("No Month Functionality; install python-dateutil library") except Exception as e: raise e else: raise Exception("Error applying time offset; no match for unit") return x # Todo - better name for function; test more edge cases for possible earliest and latest patterns # getTimeRange will get earliest or latest based on the scheduled run time # relTime is the pattern for earliest or latest stored in Splunk # This could be as simple as -15m or @d or be complex as -1mon@y+12d def getTimeRange(self,relTime,runTime): #Regex to extract each offset and snap m = re.match("((?P<offset1>[+-]?\d+)(?P<unit1>[a-zA-Z]+)(?:(?P<offset2>[+-]\d+)(?P<unit2>[a-zA-Z]+))?)?(?:@(?P<snap>[a-zA-Z]+)(?:(?P<snapOff>[+-]\d+)(?P<snapUnit>[a-zA-Z]+))?)?",relTime) if relTime.isdigit(): #If it is static time return relTime elif m and relTime!="now": #Apply snap then offsets in the following order: snap offset, first offset, second offset #The only time I think the order of offset would matter is when "mon" is used. self.logger.debug("[RERUN CMD]: Original: {0} {1}".format(runTime,relTime)) runTime = self.applySnap(m.group('snap'),runTime) runTime = self.applyOffset(m.group('snapOff'),m.group('snapUnit'),runTime) runTime = self.applyOffset(m.group('offset1'),m.group('unit1'),runTime) runTime = self.applyOffset(m.group('offset2'),m.group('unit2'),runTime) self.logger.debug("[RERUN CMD]: Result: {}".format(runTime)) return runTime def generate(self): #Todo - allow host to be set as paramater host = "localhost" #Get port info from uri in case using non-stnadard mgmt port splunkd_uri = self._metadata.searchinfo.splunkd_uri port = splunkd_uri.split(":")[-1] #Owner will be set as who ever ran the search owner = self._metadata.searchinfo.owner app= self._metadata.searchinfo.app #Get token to authenticate to API to rerun searches token=self._metadata.searchinfo.session_key #Use rerun command earliest and latest as the outage period, this way can be set by time picker instead of as parameters outageStart = self._metadata.searchinfo.earliest_time outageEnd = self._metadata.searchinfo.latest_time # Get the rerun command search id - this is because Splunk was not killing the python script when search was cancelled # Use this to monitor the status of the search and if it is no longer "Running" exit the script rerunSid = self._metadata.searchinfo.sid #Compile regex to find searches filter = re.compile(self.regex) #Try to connect to Splunk API self.logger.info("[RERUN CMD]: Connecting to Splunk API...") try: #service = client.connect(host=host, port=port, token=token, owner=owner, app=app) service = client.connect(host=host, port=port, token=token) self.logger.info("[RERUN CMD]: Connected to Splunk API successfully") except Exception as e: self.logger.error("[RERUN CMD]: {}".format(e.msg)) #Splunk not stopping script going to ping sid from here and stop script if cancelled by user #Todo - look in to getting specific job info based on sid instead of use for statement for job in service.jobs: if job.sid == rerunSid: rerunJob = job self.logger.debug(job.state) #If for some reason script cant find the search that triggered it if not rerunJob: self.logger.error("[RERUN CMD]: Rerun Job SID not found exiting...") sys.exit(1) # Main loop to find an rerun searches for search in service.saved_searches: # Does not rerun disabled searches if filter.search(search.name) and search.is_scheduled=="1" and search.disabled=="0": #Parse the Splunk cron schedule for the found search ct = CronTab(search['content']['cron_schedule']) #Get earliest and latest pattern for search dispatch_earliest = search['content']['dispatch.earliest_time'] dispatch_latest = search['content']['dispatch.latest_time'] # Start with runTime equal to outageStart, crontab will be used to set this to the next time scheduled search # would have ran before rerunning runTime=outageStart while True: # Check to see if the search has been cancelled by user rerunJob.refresh() if rerunJob.state.content.dispatchState!="RUNNING": sys.exit() # Get next scheduled run time, and break if greater than outageEnd runTime = runTime + ct.next(now=runTime,default_utc=False) if runTime > outageEnd or rerunJob.state.content.dispatchState!="RUNNING": self.logger.error(rerunJob.state.content.dispatchState) break #Get new earliest and latest based on new search run time earliest = self.getTimeRange(dispatch_earliest,runTime) latest = self.getTimeRange(dispatch_latest,runTime) # Set search parameters and run search kwargs_block = {'dispatch.earliest_time':earliest, "dispatch.latest_time":latest, "trigger_actions":self.trigger} job = search.dispatch(**kwargs_block) time.sleep(0.25) #Couldn't pass blocking argument, so sleep until isDone while job['isDone']!="1": self.logger.debug("[RERUN CMD]: Percent {}".format(job['doneProgress'])) time.sleep(1) job.refresh() message = "{} ran sucessfully for scheduled time {}".format(search.name,runTime) self.logger.info("[RERUN CMD]: {}".format(message)) #Return results yield {"_time":time.time(), "Message":message,"Search":search.name, "MissedRunTime":runTime, "MissedEarliest":earliest,"MissedLatest":latest, "TriggerActions":self.trigger,"Finished":job['isDone'],"CompletionPercentage":float(job['doneProgress'])*100,"ScanCount":job['scanCount'],"EventCount":job['eventCount'],"ResultCount": job['resultCount']}