def delete_key_from_event(self, delete_event): try: cfg = cli.getConfStanza('kvstore_tools', 'settings') except BaseException as e: eprint("Could not read configuration: " + repr(e)) # Facility info - prepended to log lines facility = os.path.basename(__file__) facility = os.path.splitext(facility)[0] try: logger = setup_logger(cfg["log_level"], 'kvstore_tools.log', facility) except BaseException as e: eprint("Could not create logger: " + repr(e)) print("Could not create logger: " + repr(e)) exit(1) url_tmpl_delete = '%(server_uri)s/servicesNS/%(owner)s/%(app)s/storage/collections/data/%(collection)s/%(id)s?output_mode=json' headers = { 'Authorization': 'Splunk %s' % self.session_key, 'Content-Type': 'application/json' } for key, value in list(delete_event.items()): delete_event[key] = value if key == '_key' and len(value) > 0: logger.debug("Found %s (%s) in event" % (key, value)) try: delete_url = url_tmpl_delete % dict( server_uri=self.splunkd_uri, owner='nobody', app=self.app, collection=self.collection, id=urllib.parse.quote(value, safe='')) logger.debug("Delete url: " + delete_url) try: response, response_code = request( 'DELETE', delete_url, '', headers) logger.debug('Server response: %s' % response) except BaseException as e: logger.error('ERROR Failed to delete key: %s', repr(e)) if response_code == 200: logger.debug("Successfully deleted " + key) delete_event['delete_status'] = "success" return delete_event else: logger.error("Error %d deleting %s: %s" % (response_code, key, response)) delete_event['delete_status'] = "error" return delete_event except BaseException as e: logger.error("Error deleting %s: %s" % (key, repr(e))) delete_event['delete_status'] = "error" return delete_event
def get_app_collections(uri, session_key, selected_collection, selected_app, app_list, global_scope): url_tmpl_app = '%(server_uri)s/servicesNS/%(owner)s/%(app)s/storage/collections/config?output_mode=json&count=0' # Enumerate all collections in the apps list collections = [] for app in app_list: eprint("Polling collections in app: %s" % app) # Enumerate all of the collections in the app (if an app is selected) collections_url = url_tmpl_app % dict( server_uri=uri, owner='nobody', app=app) headers = { 'Authorization': 'Splunk %s' % session_key, 'Content-Type': 'application/json' } try: response, response_code = request('GET', collections_url, '', headers) if response_code == 200: response = json.loads(response) else: # There's a problem connecting. Abort. raise Exception("Could not connect to server: Error %s" % response_code) except BaseException as e: raise Exception(e) for entry in response["entry"]: entry_app = entry["acl"]["app"] entry_collection = entry["name"] entry_sharing = entry["acl"]["sharing"] eprint(entry_sharing + '/' + entry_app + '/' + entry_collection) if ((selected_app == entry_app and selected_collection == entry_collection) or (selected_app is None and selected_collection == entry_collection) or (selected_app == entry_app and selected_collection is None) or (entry_sharing == 'global' and global_scope) or (selected_app is None and selected_collection is None)): c = [entry_app, entry_collection] if c not in collections: collections.append(c) eprint("Added {0}/{1} to backup list".format( entry_app, entry_collection)) return collections
def stream(self, events): try: cfg = cli.getConfStanza('kvstore_tools', 'settings') except BaseException as e: eprint("Could not read configuration: " + repr(e)) # Facility info - prepended to log lines facility = os.path.basename(__file__) facility = os.path.splitext(facility)[0] try: logger = setup_logger(cfg["log_level"], 'kvstore_tools.log', facility) except BaseException as e: eprint("Could not create logger: " + repr(e)) print("Could not create logger: " + repr(e)) exit(1) logger.info('Script started by %s' % self._metadata.searchinfo.username) if self.app: logger.debug('App: %s' % self.app) else: self.app = self._metadata.searchinfo.app if self.collection: logger.debug('Collection: %s' % self.collection) else: logger.critical("No collection specified. Exiting.") print("Error: No collection specified.") exit(1) if self.outputkeyfield: logger.debug('Output Key Field: %s' % self.outputkeyfield) else: self.outputkeyfield = self.collection + "_key" if self.outputvalues: logger.debug('Output Values: %s' % self.outputvalues) else: self.outputvalues = "" if self.delimiter: logger.debug('Delimiter: %s' % self.delimiter) else: self.delimiter = "," if self.groupby: logger.debug('Group by field: %s' % self.groupby) else: self.groupby = None opts = {} opts["owner"] = "nobody" opts["token"] = self._metadata.searchinfo.session_key opts["app"] = self.app #epoch_time = int(time.time()) current_user = self._metadata.searchinfo.username lookup_output_kvpairs = [] # Static output fields are literal values that are given within the search command arguments # e.g. "lookup_field1=value1" static_output_fields = {} # variable output fields are values taken from the events and pushed into the lookup record # as events are processed # e.g. "lookup_field2=$sourcetype$" variable_output_fields = {} resolved_variables = {} # Check for lockfile from previous invocations for this search ID dispatch = self._metadata.searchinfo.dispatch_dir static_kvfields_file = os.path.join(dispatch, "kvfields_static") #dict variable_kvfields_file = os.path.join(dispatch, "kvfields_variable") #dict resolved_variables_file = os.path.join(dispatch, "resolved_variables") #dict try: if os.path.isfile(static_kvfields_file): with open(static_kvfields_file, 'r') as f: # Set static kvfields values static_output_fields = json.loads(f.read()) #dict if os.path.isfile(variable_kvfields_file): with open(variable_kvfields_file, 'r') as f: # Set variable kvfields values variable_output_fields = json.loads(f.read()) #dict # Connect to the kv store service = connect(**opts) if self.collection in service.kvstore: obj_collection = service.kvstore[self.collection] else: logger.critical("KVStore not found: %s" % self.collection) print('KVStore not found: %s' % self.collection) exit(1) # First invocation - build the lists for static and variable values if static_output_fields == {} and variable_output_fields == {}: # Split the key-value pairs argument into individual key-value pairs # Account for quoted string values and delimiters within the quoted value kvpair_split_re = r'([^=]+=(?:"[^"\\]*(?:\\.[^"\\]*)*"|[^{}]+))'.format( self.delimiter) x = re.findall(kvpair_split_re, self.outputvalues) for i in x: i = i.strip(self.delimiter).strip() lookup_output_kvpairs.append(i) for lof in lookup_output_kvpairs: k, v = lof.split("=") k = k.strip() v = v.strip().strip('"').replace('\\"', '"') logger.debug("k = %s, v = %s" % (k, v)) # Replace special values v = v.replace("$kv_current_userid$", current_user) v = v.replace("$kv_now$", str(time.time())) # Value starts and ends with $ - variable field if v[0] + v[-1] == '$$': # Add to the list of variable fields variable_output_fields[k] = v.replace("$", "") else: # Add to the list of static fields static_output_fields[k] = v logger.info( "Unpacked %d static and %d variable fields from arguments" % (len(list(static_output_fields.keys())), len(list(variable_output_fields.keys())))) # Write the static payload to the file # File doesn't exist. Open/claim it. with open(static_kvfields_file, 'w') as f: f.write( json.dumps(static_output_fields, ensure_ascii=False)) with open(variable_kvfields_file, 'w') as f: f.write( json.dumps(variable_output_fields, ensure_ascii=False)) except BaseException as e: logger.critical('Error connecting to collection: %s' % repr(e), exc_info=True) print('Error connecting to collection: %s' % repr(e)) exit(1) # Read the events, resolve the variables, store them on a per-groupby-fieldvalue basis i = 0 inserts = 0 for e in events: update = False # (Re)read the latest data if os.path.isfile(resolved_variables_file): with open(resolved_variables_file, 'r') as f: # Open in non-blocking mode fd = f.fileno() flag = fcntl.fcntl(fd, fcntl.F_GETFL) fcntl.fcntl(fd, fcntl.F_SETFL, flag | os.O_NONBLOCK) # Set static kvfields values resolved_variables = json.loads( f.read()) #dict [groupby value][field name] if self.groupby is not None: groupby_value = e[self.groupby] else: # Make this value the same for every event (no group-by) groupby_value = '____placeholder' new_kv_record = {} if groupby_value in list(resolved_variables.keys()): # Set the previously recorded key value for this group-by value within the event kvstore_entry_key = resolved_variables[groupby_value]["_key"] # We've already resolved the variables for this groupby, but see if any are not populated for lookup_field, event_field in list( variable_output_fields.items()): if lookup_field not in list( resolved_variables[groupby_value].keys()): if event_field in list(e.keys()): if e[event_field] is not None and e[ event_field] != '': resolved_variables[groupby_value][ lookup_field] = e[event_field] new_kv_record[lookup_field] = e[event_field] update = True if update: # Update the collection new_kv_record.update(static_output_fields) response = obj_collection.data.update( kvstore_entry_key, json.dumps(new_kv_record)) # Write the data to disk immediately so other threads can benefit with open(resolved_variables_file, 'w') as f: f.write( json.dumps(resolved_variables, ensure_ascii=False)) else: # First time we're seeing this groupby value. Resolve variables and write the KV store record. # Define the dictionary resolved_variables[groupby_value] = {} # Update the static values new_kv_record = static_output_fields.copy() # Resolve the variables for lookup_field, event_field in list( variable_output_fields.items()): if event_field in list(e.keys()): if e[event_field] is not None: resolved_variables[groupby_value][ lookup_field] = e[event_field] new_kv_record[lookup_field] = e[event_field] # Write the new kvstore record and get the ID (_key) response = obj_collection.data.insert( json.dumps(new_kv_record)) kvstore_entry_key = response["_key"] resolved_variables[groupby_value]["_key"] = kvstore_entry_key # Write the data to disk immediately so other threads can benefit with open(resolved_variables_file, 'w') as f: f.write(json.dumps(resolved_variables, ensure_ascii=False)) inserts += 1 # Write the KV store record's _key value to the event e[self.outputkeyfield] = kvstore_entry_key yield e i += 1 logger.info("Modified %d events and inserted %s new records into %s" % (i, inserts, self.collection))
def write_events_to_file(events, fields, local_output, outputformat, compression): logger = dhelp.setup_logging('export_everything') # Buffer variables output_file_buf = [] buffer_flush_count = 1000 event_counter = 0 first_field = None if outputformat == 'json': output_file_buf.append('['.encode('utf-8')) for event in events: if first_field is None: first_field = list(event.keys())[0] #dhelp.eprint('First field = ' + first_field) # Get the fields list for the event # Filter the fields if fields= is supplied if fields is not None: event_keys = [] if type(fields) == str: fields = [fields] for k in list(event.keys()): for f in fields: if k == f or fnmatch.fnmatch(k, f): event_keys.append(k) else: event_keys = list(event.keys()) # Pick the output format on the first event if one was not specified if event_counter == 0: if outputformat is None and '_raw' in event_keys: outputformat = 'raw' elif outputformat is None: outputformat = 'json' # Check event format setting and write a header if needed if outputformat == "csv" or outputformat == "tsv" or outputformat == "pipe": delimiters = {'csv': ',', 'tsv': '\t', 'pipe': '|'} delimiter = delimiters[outputformat] # Write header header = '' for field in event_keys: # Quote the string if it has a space if ' ' in field and outputformat == "csv": field = '"' + field + '"' # Concatenate the header field names header += field + delimiter # Strip off the last delimiter header = header[:-1] + '\n' output_file_buf.append(header.encode('utf-8')) output_text = '' # Build the row of text if outputformat == "raw": if '_raw' in event_keys: output_text = event["_raw"] else: logger.warning("No raw field when raw output selected.") elif outputformat == "csv" or outputformat == "tsv" or outputformat == "pipe": for key, value in list(event.items()): logger.debug("Key = %s, Value = %s", key, value) if key in event_keys: # Convert list to string value if isinstance(value, list): #value = '"' + delimiter.join(value).replace('"', r'\"') + '"' value = '"' + delimiter.join(value).replace( '"', r'""') + '"' if outputformat == "csv": # Escape any double-quotes if '"' in value: # String has a quotation mark. Quote it and escape those inside. value = dhelp.escape_quotes_csv(value) value = '"' + value + '"' # Quote the string if it has a space or separator elif ' ' in value or ',' in value: value = '"' + value + '"' output_text += value + delimiter output_text = output_text[:-1] elif outputformat == "kv": for key, value in list(event.items()): if key in event_keys: # Escape any double-quotes if '"' in value: # String has a quotation mark. Quote it and escape those inside. value = dhelp.escape_quotes(value) value = '"' + value + '"' # Quote the string if it has a space or separator elif ' ' in value or '=' in value: value = '"' + value + '"' output_text += key + "=" + value + ' ' elif outputformat == "json": if fields is not None: json_event = {} for key in event_keys: json_event[key] = event[key] else: json_event = event output_text = json.dumps(json_event) + ',' # Append entry to the lists output_file_buf.append((output_text + '\n').encode('utf-8')) event_counter += 1 #event_buf.append(event) # Time to flush the buffers if len(output_file_buf) == buffer_flush_count: if compression: flush_buffer_gzip(output_file_buf, local_output) else: flush_buffer(output_file_buf, local_output) output_file_buf = [] yield (event) if outputformat == 'json': if isinstance(output_file_buf[-1], str): dhelp.eprint(output_file_buf[-1]) output_file_buf[-1] = output_file_buf[-1].replace( ',\n', '\n').encode('utf-8') elif isinstance(output_file_buf[-1], bytes): dhelp.eprint(output_file_buf[-1]) output_file_buf[-1] = output_file_buf[-1].decode('utf-8').replace( ',\n', '\n').encode('utf-8') output_file_buf.append(']'.encode('utf-8')) if compression: flush_buffer_gzip(output_file_buf, local_output) else: flush_buffer(output_file_buf, local_output) output_file_buf = None logger.debug("Wrote temp output file " + local_output)
def generate(self): try: cfg = cli.getConfStanza('kvstore_tools', 'settings') except BaseException as e: eprint("Could not read configuration: " + repr(e)) # Facility info - prepended to log lines facility = os.path.basename(__file__) facility = os.path.splitext(facility)[0] try: logger = setup_logger(cfg["log_level"], 'kvstore_tools.log', facility) except BaseException as e: eprint("Could not create logger: " + repr(e)) print("Could not create logger: " + repr(e)) exit(1) logger.info('Script started by %s' % self._metadata.searchinfo.username) session_key = self._metadata.searchinfo.session_key splunkd_uri = self._metadata.searchinfo.splunkd_uri if self.app: logger.debug('App: %s' % self.app) else: self.app = self._metadata.searchinfo.app if self.collection: logger.debug('Collection: %s' % self.collection) else: logger.critical("No collection specified. Exiting.") print("Error: No collection specified.") exit(1) if self.key: logger.debug('Key ID: %s' % self.collection) else: logger.critical("No key value specified. Exiting.") print("Error: No key value specified.") exit(1) headers = { 'Authorization': 'Splunk %s' % session_key, 'Content-Type': 'application/json' } #url_tmpl_app = '%(server_uri)s/servicesNS/%(owner)s/%(app)s/storage/collections/config?output_mode=json&count=0' # Enumerate all apps app_list = kv.get_server_apps(splunkd_uri, session_key, self.app) collection_list = kv.get_app_collections(splunkd_uri, session_key, self.collection, self.app, app_list, True) logger.debug('Collections present: %s', str(collection_list)) try: # Create an object for the collection collection_present = False for c in collection_list: # Extract the app and collection name from the array # c[0] = app, c[1] = collection name collection_app = c[0] collection_name = c[1] if (collection_name == self.collection): if self.app is None or self.app == collection_app: self.app = collection_app collection_present = True elif self.app != collection_app: pass logger.debug("Collection found: {0} in app {1}".format( self.collection, self.app)) if not collection_present: logger.critical( "KVStore collection %s not found within app %s" % (self.collection, self.app)) exit(1) except BaseException as e: logger.critical('Error enumerating collections: ' + str(e)) exit(1) url_tmpl_delete = '%(server_uri)s/servicesNS/%(owner)s/%(app)s/storage/collections/data/%(collection)s/%(id)s?output_mode=json' try: delete_url = url_tmpl_delete % dict(server_uri=splunkd_uri, owner='nobody', app=self.app, collection=self.collection, id=urllib.parse.quote(self.key, safe='')) logger.debug("Delete url: " + delete_url) try: response, response_code = request('DELETE', delete_url, '', headers) logger.debug('Server response: %s', response) except BaseException as e: logger.error( 'Failed to delete key %s from collection %s/%s: %s' % (self.key, self.app, self.collection, repr(e))) if response_code == 200: logger.debug( "Successfully deleted key %s from collection %s/%s" % (self.key, self.app, self.collection)) result = "success" else: logger.error( "Error deleting key %s from collection %s/%s: %s" % (self.key, self.app, self.collection, response)) result = "error" except BaseException as e: logger.error("Error deleting key %s from collection %s/%s: %s" % (self.key, self.app, self.collection, repr(e))) result = "error" # Entry deleted yield { '_time': time.time(), 'app': self.app, 'collection': self.collection, 'key': self.key, 'result': result }
def generate(self): try: cfg = cli.getConfStanza('kvstore_tools','settings') except BaseException as e: eprint("Could not read configuration: " + repr(e)) # Facility info - prepended to log lines facility = os.path.basename(__file__) facility = os.path.splitext(facility)[0] try: logger = setup_logger(cfg["log_level"], 'kvstore_tools.log', facility) except BaseException as e: eprint("Could not create logger: " + repr(e)) exit(1) logger.info('Script started by %s' % self._metadata.searchinfo.username) batch_size = int(cfg.get('backup_batch_size')) logger.debug("Batch size: %d rows" % batch_size) session_key = self._metadata.searchinfo.session_key splunkd_uri = self._metadata.searchinfo.splunkd_uri # Check for permissions to run the command content = rest.simpleRequest('/services/authentication/current-context?output_mode=json', sessionKey=session_key, method='GET')[1] content = json.loads(content) current_user = self._metadata.searchinfo.username current_user_capabilities = content['entry'][0]['content']['capabilities'] if 'run_kvstore_backup' in current_user_capabilities or 'run_kvst_all' in current_user_capabilities: logger.debug("User %s is authorized." % current_user) else: logger.error("User %s is unauthorized. Has the run_kvstore_backup capability been granted?" % current_user) yield({'Error': 'User %s is unauthorized. Has the run_kvstore_backup capability been granted?' % current_user }) sys.exit(3) # Sanitize input if self.app: logger.debug('App Context: %s' % self.app) else: self.app = None if self.path: pass else: # Get path from configuration try: # Break path out and re-join it so it's OS independent default_path = cfg.get('default_path').split('/') self.path = os.path.abspath(os.path.join(os.sep, *default_path)) except: logger.critical("Unable to get backup path") yield({'Error': "Path not provided in search arguments and default path is not set."}) sys.exit(1) # Replace environment variables self.path = os.path.expandvars(self.path) self.path = self.path.replace('//', '/') logger.debug('Backup path: %s' % self.path) if not os.path.isdir(self.path): logger.critical("Path does not exist: {0}".format(self.path)) yield({'Error': "Path does not exist: {0}".format(self.path)}) sys.exit(1) if self.collection: logger.debug('Collection: %s' % self.collection) else: self.collection = None if self.global_scope: logger.debug('Global Scope: %s' % self.global_scope) else: self.global_scope = False if self.compression or self.compression == False: logger.debug('Compression: %s' % self.compression) else: try: self.compression = cfg.get('compression') except: self.compression = False app_list = kv.get_server_apps(splunkd_uri, session_key, self.app) logger.debug("Apps list: %s" % str(app_list)) collection_list = kv.get_app_collections(splunkd_uri, session_key, self.collection, self.app, app_list, self.global_scope) logger.info('Collections to backup: %s', str(collection_list)) for collection in collection_list: # Extract the app and collection name from the array entry_app = collection[0] collection_name = collection[1] ts = time.time() st = datetime.fromtimestamp(ts).strftime('%Y%m%d_%H%M%S') #maxrows = int(limits_cfg.get('max_rows_per_query')) # Set the filename and location for the output (expanding environment variables) output_filename = entry_app + "#" + collection_name + "#" + st + ".json" if self.compression: output_filename = output_filename + '.gz' output_file = os.path.join(self.path, output_filename) # Download the collection to a local file result, message, total_record_count = kv.download_collection(logger, splunkd_uri, session_key, entry_app, collection_name, output_file, self.compression) logger.debug("Retrieved {0} records from {1}".format(total_record_count, collection_name)) yield {'_time': time.time(), 'app': entry_app, 'collection': collection_name, 'result': result, 'records': total_record_count, 'message': message, 'file': output_file } # Execute retention routine max_age = 0 max_age = int(cfg.get('retention_days')) max_size = 0 max_size = int(cfg.get('retention_size')) * 1024 * 1024 if max_size > 0 or max_age > 0: # Check the size of all *.json and *.json.gz files in the directory #dir = self.path pattern = os.path.join(self.path, "*#*#*.json*") # Get a listing of the files and check the file sizes backup_file_list = glob.glob(pattern) # Sort descending based on file timestamp backup_file_list.sort(key=os.path.getmtime, reverse=True) # Count the total bytes in all of the files totalbytes = 0 logger.debug("Max age (days): %s / Max size: %s" % (max_age, max_size)) for f in backup_file_list: logger.debug("File %s", f) # Get the file size (bytes) and age (days) bytes = os.path.getsize(f) age_days = old_div((time.time() - os.stat(f)[stat.ST_MTIME]), 86400) logger.debug("Age (days): %d", age_days) # increment the total byte count totalbytes += bytes if totalbytes > max_size and max_size > 0: # Delete the files logger.debug("Total bytes ({0}) > max_size ({1})".format(totalbytes, max_size)) os.remove(f) logger.info("Deleted file due to size retention policy: %s" % f) elif age_days > max_age and max_age > 0: logger.debug("Age ({0}) > max_age ({1})".format(age_days, max_age)) os.remove(f) logger.info("Deleted file due to age retention policy: %s" % f)
def generate(self): try: cfg = cli.getConfStanza('kvstore_tools', 'settings') except BaseException as e: eprint("Could not read configuration: " + repr(e)) # Facility info - prepended to log lines facility = os.path.basename(__file__) facility = os.path.splitext(facility)[0] try: logger = setup_logger(cfg["log_level"], 'kvstore_tools.log', facility) except BaseException as e: eprint("Could not create logger: " + repr(e)) exit(1) logger.info('Script started by %s' % self._metadata.searchinfo.username) session_key = self._metadata.searchinfo.session_key splunkd_uri = self._metadata.searchinfo.splunkd_uri # Check for permissions to run the command content = rest.simpleRequest( '/services/authentication/current-context?output_mode=json', sessionKey=session_key, method='GET')[1] content = json.loads(content) current_user = self._metadata.searchinfo.username current_user_capabilities = content['entry'][0]['content'][ 'capabilities'] if 'run_kvstore_restore' in current_user_capabilities or 'run_kvst_all' in current_user_capabilities: logger.debug("User %s is authorized." % current_user) else: logger.error( "User %s is unauthorized. Has the run_kvstore_restore capability been granted?" % current_user) yield ({ 'Error': 'User %s is unauthorized. Has the run_kvstore_restore capability been granted?' % current_user }) sys.exit(3) # Sanitize input if self.filename: logger.debug('Restore filename: %s' % self.filename) list_only = False else: self.filename = "*#*#*.json*" list_only = True if self.append: logger.debug('Appending to existing collection') else: self.append = False logger.debug('Append to existing collection: %s' % str(self.append)) backup_file_list = [] # Get the default path from the configuration default_path_dirlist = cfg.get('default_path').split('/') default_path = os.path.abspath( os.path.join(os.sep, *default_path_dirlist)) # Replace environment variables default_path = os.path.expandvars(default_path) default_path = default_path.replace('//', '/') if '*' in self.filename: # Expand the wildcard to include all matching files from the filesystem for name in glob.glob(self.filename): backup_file_list.append(name) if len(backup_file_list) == 0: self.filename = os.path.join(default_path, self.filename) for name in glob.glob(self.filename): backup_file_list.append(name) if len(backup_file_list) == 0: logger.critical("No matching files: %s" % self.filename) sys.exit(1) else: logger.debug('No wildcard string found in %s' % self.filename) if os.path.isfile(self.filename): backup_file_list.append(self.filename) elif os.path.isfile(os.path.join(default_path, self.filename)): backup_file_list.append( os.path.join(default_path, self.filename)) else: logger.critical("File does not exist: %s" % self.filename) sys.exit(1) deleted_collections = [] # f is now an array of filenames for name in backup_file_list: logger.debug('Parsing filename: %s' % name) try: # Isolate the filename from the path matches = re.search(r'(.*)(?:\/|\\)([^\/\\]+)', name) #path = matches.group(1) file_param = matches.group(2) name_split = file_param.split('#') except BaseException as e: logger.critical('Invalid filename: %s\n\t%s' % (name, repr(e))) yield ({'Error': 'Invalid filename: %s' % name}) sys.exit(1) # Open the file if it's a supported format if (name.endswith('.json') or name.endswith('.json.gz')) and len(name_split) == 3: # Extract the app name and collection name from the file name file_app = name_split[0] file_collection = name_split[1] if list_only: yield { 'filename': name, 'app': file_app, 'collection': file_collection, 'status': 'ready' } else: if not self.append: # Delete the collection contents using the KV Store REST API try: collection_id = file_app + "/" + file_collection # Make sure we aren't trying to delete the same collection twice if not collection_id in deleted_collections: kv.delete_collection(logger, splunkd_uri, session_key, file_app, file_collection) deleted_collections.append(collection_id) except BaseException as e: logger.critical(repr(e), exc_info=True) yield ({ 'Error': 'Failed to delete collection %s/%s: %s' % (file_app, file_collection, repr(e)) }) sys.exit(4) # Upload the collection to the KV Store REST API try: result, message, record_count = kv.upload_collection( logger, splunkd_uri, session_key, file_app, file_collection, name) yield ({ 'result': result, 'message': message, 'records': record_count }) except BaseException as e: logger.error("Error restoring collection: %s" % repr(e), exc_info=True) yield ({ 'result': 'error', 'message': 'Failed to delete collection: %s' % repr(e), 'records': 0 }) elif name.endswith('.tar.gz') or name.endswith('.tgz'): logger.info('Skipping filename (unsupported format): %s' % name) yield { '_time': time.time(), 'source': name, 'app': '', 'collection': '', 'records': 0, 'result': 'error' } continue else: # Skip this file logger.info( 'Skipping filename (does not meet naming convention): %s' % name) yield { '_time': time.time(), 'source': name, 'app': '', 'collection': '', 'records': 0, 'result': 'error' } continue
def stream(self, events): try: cfg = cli.getConfStanza('kvstore_tools', 'settings') except BaseException as e: eprint("Could not read configuration: " + repr(e)) # Facility info - prepended to log lines facility = os.path.basename(__file__) facility = os.path.splitext(facility)[0] try: logger = setup_logger(cfg["log_level"], 'kvstore_tools.log', facility) except BaseException as e: eprint("Could not create logger: " + repr(e)) print("Could not create logger: " + repr(e)) exit(1) logger.info('Script started by %s' % self._metadata.searchinfo.username) if self.app: logger.debug('App: %s' % self.app) else: self.app = self._metadata.searchinfo.app if self.collection: logger.debug('Collection: %s' % self.collection) else: logger.critical("No collection specified. Exiting.") print("Error: No collection specified.") exit(1) self.session_key = self._metadata.searchinfo.session_key self.splunkd_uri = self._metadata.searchinfo.splunkd_uri # Enumerate all app_list app_list = kv.get_server_apps(self.splunkd_uri, self.session_key, self.app) collection_list = kv.get_app_collections(self.splunkd_uri, self.session_key, self.collection, self.app, app_list, True) logger.debug('Collections present: %s', str(collection_list)) try: # Create an object for the collection collection_present = False for c in collection_list: # Extract the app and collection name from the array # c[0] = app, c[1] = collection name collection_app = c[0] collection_name = c[1] if (collection_name == self.collection): if self.app is None or self.app == collection_app: self.app = collection_app collection_present = True elif self.app != collection_app: pass logger.debug("Collection {0} found in app {1}".format( self.collection, self.app)) if not collection_present: logger.critical("KVStore collection %s/%s not found" % (self.app, self.collection)) exit(1) except BaseException as e: logger.critical('Error enumerating collections: %s' % repr(e)) exit(1) # Make a Pool of workers pool = ThreadPool(4) try: results = pool.map(self.delete_key_from_event, events) except BaseException as e: logger.error("%s" % repr(e), exc_info=True) results = {} for result in results: yield result
def generate(self): try: cfg = cli.getConfStanza('kvstore_tools', 'settings') except BaseException as e: eprint("Could not read configuration: " + repr(e)) # Facility info - prepended to log lines facility = os.path.basename(__file__) facility = os.path.splitext(facility)[0] try: logger = setup_logger(cfg["log_level"], 'kvstore_tools.log', facility) except BaseException as e: eprint("Could not create logger: " + repr(e)) print("Could not create logger: " + repr(e)) exit(1) logger.info('Script started by %s' % self._metadata.searchinfo.username) batch_size = int(cfg.get('backup_batch_size')) logger.debug("Batch size: %d rows" % batch_size) local_session_key = self._metadata.searchinfo.session_key splunkd_uri = self._metadata.searchinfo.splunkd_uri # Check for permissions to run the command content = rest.simpleRequest( '/services/authentication/current-context?output_mode=json', sessionKey=local_session_key, method='GET')[1] content = json.loads(content) current_user = self._metadata.searchinfo.username current_user_capabilities = content['entry'][0]['content'][ 'capabilities'] if 'run_kvstore_pull' in current_user_capabilities or 'run_kvst_all' in current_user_capabilities: logger.debug("User %s is authorized." % current_user) else: logger.error( "User %s is unauthorized. Has the run_kvstore_pull capability been granted?" % current_user) yield ({ 'Error': 'User %s is unauthorized. Has the run_kvstore_pull capability been granted?' % current_user }) sys.exit(3) # Sanitize input if self.app: logger.debug('App Context: %s' % self.app) else: self.app = None if self.collection: logger.debug('Collection: %s' % self.collection) else: self.collection = None if self.global_scope: logger.debug('Global Scope: %s' % self.global_scope) else: self.global_scope = False if self.append: logger.debug('Appending to existing collection') else: self.append = False logger.debug('Append to existing collection: %s' % str(self.append)) if self.targetport: logger.debug('Port for remote connect: %s' % self.targetport) else: self.targetport = '8089' # Get credentials try: # Use the credential where the realm matches the target hostname # Otherwise, use the last entry in the list credentials = kv.parse_custom_credentials(logger, cfg) try: credential = credentials[self.target] except: try: hostname = self.target.split('.')[0] credential = credentials[hostname] except: logger.critical("Could not get password for %s: %s" % (self.target, repr(e))) print("Could not get password for %s: %s" % (self.target, repr(e))) exit(1593) remote_user = credential['username'] remote_password = credential['password'] except BaseException as e: logger.critical( 'Failed to get credentials for remote Splunk instance: %s' % repr(e), exc_info=True) yield ({ 'Error': 'Failed to get credentials for remote Splunk instance: %s' % repr(e) }) exit(7372) # Login to the remote host and get the session key try: remote_host = self.target remote_port = self.targetport remote_uri = 'https://%s:%s' % (self.target, self.targetport) remote_service = client.connect(host=remote_host, port=remote_port, username=remote_user, password=remote_password) remote_service.login() remote_session_key = remote_service.token.replace('Splunk ', '') logger.debug('Remote Session_key: %s' % remote_session_key) except (urllib.error.HTTPError, BaseException) as e: logger.exception('Failed to login on remote Splunk instance: %s' % repr(e)) yield ({ 'Error': 'Failed to login on remote Splunk instance: %s' % repr(e) }) sys.exit(4424) # Get the list of remote apps and collections remote_app_list = kv.get_server_apps(remote_uri, remote_session_key, self.app) remote_collection_list = kv.get_app_collections( remote_uri, remote_session_key, self.collection, self.app, remote_app_list, self.global_scope) logger.debug('Collections to pull: %s' % str(remote_collection_list)) for remote_collection in remote_collection_list: # Extract the app and collection name from the array collection_app = remote_collection[0] collection_name = remote_collection[1] try: yield (kv.copy_collection(logger, remote_session_key, remote_uri, local_session_key, splunkd_uri, collection_app, collection_name, self.append)) except BaseException as e: logger.critical( 'Failed to copy collections from %s to local KV store: %s' % (self.target, repr(e)), exc_info=True) yield ({ 'Error': 'Failed to copy collections from %s to local KV store: %s' % (self.target, repr(e)) }) sys.exit(11)
def reduce(self, events): try: app_config = cli.getConfStanza('ep_general', 'settings') cmd_config = cli.getConfStanzas('ep_box') except BaseException as e: raise Exception("Could not read configuration: " + repr(e)) # Facility info - prepended to log lines facility = os.path.basename(__file__) facility = os.path.splitext(facility)[0] try: logger = setup_logger(app_config["log_level"], 'export_everything.log', facility) except BaseException as e: raise Exception("Could not create logger: " + repr(e)) logger.info('Box Export search command initiated') logger.debug('search_ep_box command: %s', self) # logs command line # Enumerate proxy settings http_proxy = os.environ.get('HTTP_PROXY') https_proxy = os.environ.get('HTTPS_PROXY') proxy_exceptions = os.environ.get('NO_PROXY') if http_proxy is not None: logger.debug("HTTP proxy: %s" % http_proxy) if https_proxy is not None: logger.debug("HTTPS proxy: %s" % https_proxy) if proxy_exceptions is not None: logger.debug("Proxy Exceptions: %s" % proxy_exceptions) # Enumerate settings app = self._metadata.searchinfo.app user = self._metadata.searchinfo.username dispatch = self._metadata.searchinfo.dispatch_dir session_key = self._metadata.searchinfo.session_key if self.target is None and 'target=' in str(self): recover_parameters(self) # Replace all tokenized parameter strings replace_object_tokens(self) try: target_config = get_config_from_alias(session_key, cmd_config, self.target) if target_config is None: exit_error( logger, "Unable to find target configuration (%s)." % self.target, 100937) logger.debug("Target configuration: " + str(target_config)) except BaseException as e: exit_error(logger, "Error reading target server configuration: " + repr(e), 124812) file_extensions = { 'raw': '.log', 'kv': '.log', 'pipe': '.log', 'csv': '.csv', 'tsv': '.tsv', 'json': '.json' } if self.outputformat is None: self.outputformat = 'csv' # Create the default filename now = str(int(time.time())) default_filename = ('export_' + user + '___now__' + file_extensions[self.outputformat]).strip("'") # Split the output into folder and filename if self.outputfile is not None: folder_list = self.outputfile.split('/') if len(folder_list) == 1: # No folder specified, use the default use_default_folder = True filename = folder_list[0] elif folder_list[0] == '': # Length > 1, outputfile points to the root folder (leading /) use_default_folder = False else: # Length > 1 and outputfile points to a relative path (no leading /) use_default_folder = True if len(folder_list) > 1 and folder_list[-1] == '': # No filename provided, trailing / filename = default_filename folder_list.pop() elif len(folder_list) > 1 and len(folder_list[-1]) > 0: filename = folder_list[-1] folder_list.pop() else: use_default_folder = True filename = default_filename folder_list = [] if use_default_folder: if 'default_folder' in list(target_config.keys()): # Use the configured default folder folder_list = target_config['default_folder'].strip('/').split( '/') + folder_list else: # Use the root folder folder_list = [''] # Replace keywords from output filename and folder folder = replace_keywords('/'.join(folder_list)) filename = replace_keywords(filename) logger.debug("Folder = " + folder) logger.debug("Filename = " + filename) if self.compress is not None: logger.debug('Compression: %s', self.compress) else: try: self.compress = target_config.get('compress') except: self.compress = False # Use the random number to support running multiple outputs in a single search random_number = str(random.randint(10000, 100000)) staging_filename = 'export_everything_staging_' + random_number + '.txt' local_output_file = os.path.join(dispatch, staging_filename) if self.compress: local_output_file = local_output_file + '.gz' logger.debug("Staging file: %s" % local_output_file) # Append .gz to the output file if compress=true if not self.compress and len(filename) > 3: if filename[-3:] == '.gz': # We have a .gz extension when compression was not specified. Enable compression. self.compress = True elif self.compress and len(filename) > 3: if filename[-3:] != '.gz': filename = filename + '.gz' #if auth is not None: # Use the credential to connect to Box try: client = get_box_connection(target_config) except BaseException as e: exit_error(logger, "Could not connect to box: " + repr(e)) subfolders = folder.strip('/').split('/') if '' in subfolders: subfolders.remove('') logger.debug("Folders: %s" % str(subfolders)) # Prepend the list with the root element box_folder_object = client.root_folder().get() # Walk the folder path until we find the target directory for subfolder_name in subfolders: logger.debug("Looking for folder: %s" % subfolder_name) # Get the folder ID for the string specified from the list of child subfolders # folder object is from the previous iteration folder_contents = box_folder_object.get_items() folder_found = False for item in folder_contents: if item.type == 'folder': #logger.debug('{0} {1} is named "{2}"'.format(item.type.capitalize(), item.id, item.name)) if subfolder_name == item.name: logger.debug("Found a target folder ID: %s" % str(item.id)) box_folder_object = client.folder(folder_id=item.id) folder_found = True if not folder_found: # Create the required subfolder box_folder_object = box_folder_object.create_subfolder( subfolder_name) try: event_counter = 0 # Write the output file to disk in the dispatch folder logger.debug( "Writing events to dispatch file. file=\"%s\" format=%s compress=%s fields=%s", local_output_file, self.outputformat, self.compress, self.fields) for event in event_file.write_events_to_file( events, self.fields, local_output_file, self.outputformat, self.compress): yield event event_counter += 1 except BoxAPIException as be: exit_error(logger, be.message, 833928) except BaseException as e: exit_error(logger, "Error writing file to upload", 398372) try: new_file = box_folder_object.upload(local_output_file, file_name=filename) message = "Box Export Status: Success. File name: %s, File ID: %s" % ( new_file.name, new_file.id) eprint(message) logger.info(message) except BaseException as e: exit_error(logger, "Error uploading file to Box: " + repr(e), 109693)
def reduce(self, events): try: app_config = cli.getConfStanza('ep_general', 'settings') cmd_config = cli.getConfStanzas('ep_smb') except BaseException as e: raise Exception("Could not read configuration: " + repr(e)) # Facility info - prepended to log lines facility = os.path.basename(__file__) facility = os.path.splitext(facility)[0] try: logger = setup_logger(app_config["log_level"], 'export_everything.log', facility) except BaseException as e: raise Exception("Could not create logger: " + repr(e)) logger.info('SMB Export search command initiated') logger.debug('search_ep_smb command: %s', self) # logs command line # Enumerate proxy settings http_proxy = os.environ.get('HTTP_PROXY') https_proxy = os.environ.get('HTTPS_PROXY') proxy_exceptions = os.environ.get('NO_PROXY') if http_proxy is not None: logger.debug("HTTP proxy: %s" % http_proxy) if https_proxy is not None: logger.debug("HTTPS proxy: %s" % https_proxy) if proxy_exceptions is not None: logger.debug("Proxy Exceptions: %s" % proxy_exceptions) # Enumerate settings app = self._metadata.searchinfo.app user = self._metadata.searchinfo.username dispatch = self._metadata.searchinfo.dispatch_dir session_key = self._metadata.searchinfo.session_key if self.target is None and 'target=' in str(self): recover_parameters(self) # Replace all tokenized parameter strings replace_object_tokens(self) # Use the random number to support running multiple outputs in a single search random_number = str(random.randint(10000, 100000)) try: target_config = get_config_from_alias(session_key, cmd_config, self.target) if target_config is None: exit_error( logger, "Unable to find target configuration (%s)." % self.target, 100937) except BaseException as e: exit_error(logger, "Error reading target server configuration: " + repr(e), 124812) # Get the local client hostname client_name = socket.gethostname() # Delete any domain from the client hostname string if '.' in client_name: client_name = client_name[0:client_name.index('.')] # Check to see if we have credentials valid_settings = [] for l in list(target_config.keys()): if len(target_config[l]) > 0: valid_settings.append(l) if 'host' in valid_settings: # A target has been configured. Check for credentials. try: if 'credential_username' in valid_settings and 'credential_password' in valid_settings and 'share_name' in valid_settings: domain = target_config[ 'credential_realm'] if 'credential_realm' in list( target_config.keys()) else target_config['host'] try: # Try port 445 first conn = SMBConnection( target_config['credential_username'], target_config['credential_password'], client_name, target_config['host'], domain=domain, use_ntlm_v2=True, sign_options=SMBConnection.SIGN_WHEN_SUPPORTED, is_direct_tcp=True) connected = conn.connect(target_config['host'], 445, timeout=5) if target_config['share_name'] not in ( s.name for s in conn.listShares(timeout=10)): exit_error( logger, "Unable to find the specified share name on the server", 553952) ''' p445_error = repr(e445) try: # Try port 139 if that didn't work conn = SMBConnection(target_config['credential_username'], target_config['credential_password'], client_name, target_config['host'], domain=domain, use_ntlm_v2=True, sign_options = SMBConnection.SIGN_WHEN_SUPPORTED) connected = conn.connect(target_config['host'], 139, timeout=5) except BaseException as e139: p139_error = repr(e139) raise Exception("Errors connecting to host: \\nPort 139: %s\\nPort 445: %s" % (p139_error, p445_error)) conn = SMBConnection(target_config['credential_username'], target_config['credential_password'], client_name, target_config['host'], domain=domain, use_ntlm_v2=True, sign_options = SMBConnection.SIGN_WHEN_SUPPORTED) connected = conn.connect(target_config['host'], 139) shares = share_exists = False for i in range(len(shares)): if shares[i].name == target_config['share_name']: share_exists = True break ''' except BaseException as e: exit_error( logger, "Unable to setup SMB connection: " + repr(e), 921982) else: exit_error(logger, "Required settings not found", 101926) except BaseException as e: exit_error(logger, "Error reading the configuration: " + repr(e), 230494) else: exit_error(logger, "Could not find required configuration settings", 2823874) file_extensions = { 'raw': '.log', 'kv': '.log', 'pipe': '.log', 'csv': '.csv', 'tsv': '.tsv', 'json': '.json' } if self.outputformat is None: self.outputformat = 'csv' # Create the default filename default_filename = ('export_' + user + '___now__' + file_extensions[self.outputformat]).strip("'") folder, filename = event_file.parse_outputfile(self.outputfile, default_filename, target_config) if self.compress is not None: logger.debug('Compression: %s', self.compress) else: try: self.compress = target_config.get('compress') except: self.compress = False staging_filename = 'export_everything_staging_' + random_number + '.txt' local_output_file = os.path.join(dispatch, staging_filename) if self.compress: local_output_file = local_output_file + '.gz' # Append .gz to the output file if compress=true if not self.compress and len(filename) > 3: if filename[-3:] == '.gz': # We have a .gz extension when compression was not specified. Enable compression. self.compress = True elif self.compress and len(filename) > 3: if filename[-3:] != '.gz': filename = filename + '.gz' if conn is not None: # Use the credential to connect to the SFTP server try: # Check to see if the folder exists folder_attrs = conn.getAttributes(target_config['share_name'], folder, timeout=10) except BaseException: # Remote directory could not be loaded. It must not exist. Create it. # Create the folders required to store the file subfolders = ['/'] + folder.strip('/').split('/') if '' in subfolders: subfolders.remove('') logger.debug("Folders list for dir creation: %s" % str(subfolders)) current_folder = '' folder_depth = len(subfolders) - 1 for i, subfolder_name in enumerate(subfolders): current_folder = (current_folder + '/' + subfolder_name).replace('//', '/') logger.debug("Current folder = " + current_folder) try: conn.getAttributes(target_config['share_name'], current_folder, timeout=10) except: conn.createDirectory(target_config['share_name'], current_folder, timeout=10) try: folder_attrs = conn.getAttributes( target_config['share_name'], folder, timeout=10) except BaseException as e: exit_error( logger, "Could not load or create remote directory: " + repr(e), 377890) # This should always be true if folder_attrs is not None: if folder_attrs.isReadOnly or not folder_attrs.isDirectory: exit_error( logger, "Could not access the remote directory: " + repr(e), 184772) else: try: event_counter = 0 # Write the output file to disk in the dispatch folder logger.debug( "Writing events to dispatch file. file=\"%s\" format=%s compress=%s fields=%s", local_output_file, self.outputformat, self.compress, self.fields) for event in event_file.write_events_to_file( events, self.fields, local_output_file, self.outputformat, self.compress): yield event event_counter += 1 except BaseException as e: exit_error(logger, "Error writing file to upload: " + repr(e), 296733) # Write the file to the remote location try: with open(local_output_file, 'rb', buffering=0) as local_file: bytes_uploaded = conn.storeFile( target_config['share_name'], folder + '/' + filename, local_file) except BaseException as e: exit_error( logger, "Error uploading file to SMB server: " + repr(e), 109693) if bytes_uploaded > 0: message = "SMB Export Status: Success. File name: %s" % ( folder + '/' + filename) eprint(message) logger.info(message) else: exit_error(logger, "Zero bytes uploaded", 771293) else: exit_error(logger, "Could not connect to server.", 159528)