def validate_backup_repo_exists(client_config, repository): """[summary] Validates backup repository exists in ES/OS Args: client_config ([dict]): [Client configuration] repository ([str]): [Repository to verify exists] Raises: Exception: [On error, print error and retry] Returns: [bool]: [Does backup repository exist] """ try: elastic_connection = es.build_es_connection(client_config) repositories = elastic_connection.cat.repositories(format='json') elastic_connection.close() for record in repositories: if repository == record['id']: print( f"Backup repository {repository} exists and is registered") return True except Exception as e: elastic_connection.close() print("Operation failed - Validate backup repo exists") raise Exception(e) # If it makes it this far the repo does not exist, fail print(f"Backup repository {repository} not registered") return False
def forcemerge_indices(client_config, index, index_forcemerge_policies): elastic_connection = es.build_es_connection(client_config) newest_record = "" newest_record = es.get_newest_document_date_in_index( client_config, index, elastic_connection) # make sure newest record is not empty if newest_record != "": # Get the index specific forcemerge policy policy = es.check_index_forcemerge_policy(index, index_forcemerge_policies) # Get policy forcemerge days from specific policy policy_days = index_forcemerge_policies[policy] # Get current datetime current_date = datetime.utcnow() # Figure out how many days since current_date vs. newest_record days_ago = (current_date - newest_record).days # Check if days_ago is greater than or equal to policy date # If greater than or equal to policy date, delete index if days_ago >= policy_days: # Delete old index status = elastic_connection.indices.forcemerge(index) if '_shards' in status: if 'total' in status['_shards'] and 'successful' in status[ '_shards']: if status['_shards']['total'] == status['_shards'][ 'successful']: print("Forcemerge for " + index + " successful") else: print("Forcemerge for " + index + " unsuccessful") else: print("Forcemerge for " + index + " unsuccessful") elastic_connection.close()
def delete_snapshot_in_repository(client_config, repository, snapshot): """[summary] Deletes a snapshot from a backup repository Args: client_config ([dict]): [Client configuration] repository ([str]): [Backup repository] snapshot ([str]): [Snapshot full name] Raises: Exception: [On error, print error and retry] Return: ([bool]): [Did snapshot get removed] """ elastic_connection = es.build_es_connection(client_config) try: delete_status = elastic_connection.snapshot.delete( repository, snapshot=snapshot) elastic_connection.close() if 'acknowledged' in delete_status: if delete_status['acknowledged'] == True: print("Snapshot " + snapshot + " deleted successfully") return True else: print("Snapshot " + snapshot + " failed to delete successfully") return False except Exception as e: elastic_connection.close() print("Operation failed - Delete snapshot " + snapshot + " from " + repository) raise Exception(e) return False
def get_snapshots_in_repository(client_config, repository): """[summary] Gets all snapshots from backup repository Args: client_config ([dict]): [Client configuration] repository ([str]): [Backup repository name] Raises: Exception: [On error, print error and retry] Returns: [dict]: [Dictionary of all snapshot information] """ elastic_connection = es.build_es_connection(client_config) snapshots = {'snapshots': []} try: snapshots = elastic_connection.snapshot.get(repository, '_all') elastic_connection.close() except Exception as e: elastic_connection.close() print("Operation failed - Get snapshots from " + repository) raise Exception(e) elastic_connection.close() return snapshots
def delete_old_indices(client_config, index, index_retention_policies): """Deletes indices past retention policy Args: client_config (dict): Client configuration index (str): Index name index_retention_policies (dict): Retention policy """ elastic_connection = es.build_es_connection(client_config) newest_record = "" newest_record = es.get_newest_document_date_in_index( client_config, index, elastic_connection) # make sure newest record is not empty if newest_record != "": # Get the index specific retention policy policy = es.check_index_retention_policy(index, index_retention_policies) # Get policy retention days from specific policy policy_days = index_retention_policies[policy] # Get current datetime current_date = datetime.utcnow() # Figure out how many days since current_date vs. newest_record days_ago = (current_date - newest_record).days # Check if days_ago is greater than or equal to policy date # If greater than or equal to policy date, delete index if days_ago >= policy_days: # Delete old index es.delete_index(client_config, index) elastic_connection.close()
def get_index_template(client_config, template_name): try: elastic_connection = es.build_es_connection(client_config) index_template = elastic_connection.indices.get_template(template_name) elastic_connection.close() return index_template except: return "Not found"
def reingest_data(json_data, esclient): elasticsearch_connection = es.build_es_connection(clients[esclient]) results = es.get_list_by_chunk_size(json_data, 100) for result in results: es.bulk_insert_data_to_es(elasticsearch_connection, result, "accounting", bulk_size=100) elasticsearch_connection.close()
def check_for_mapping_conflicts(client_config, indices, compare_mapping): elastic_connection = es.build_es_connection(client_config) for index in indices: mapping = elastic_connection.indices.get_mapping(index) index_mapping = mapping[index]['mappings']['properties'] differences = [] for diff in list(dictdiffer.diff(index_mapping, compare_mapping)): differences.append(diff) if len(differences) > 0: print("Index template does not match index " + index + ". Changes below") print(differences)
def allocate_indices(client_config, index, index_allocation_policies): """Processes index allocations per index age Args: client_config (dict): Client configuration index (str): Index name index_allocation_policies (dict): Allocation policy """ elastic_connection = es.build_es_connection(client_config) newest_record = "" newest_record = es.get_newest_document_date_in_index( client_config, index, elastic_connection) # make sure newest record is not empty if newest_record != "": # Get the index specific allocation policy policy = es.check_index_allocation_policy(index, index_allocation_policies) # Get policy allocation days from specific policy policy_days = index_allocation_policies[policy] # Get current datetime current_date = datetime.utcnow() # Figure out how many days since current_date vs. newest_record days_ago = (current_date - newest_record).days # Check if days_ago is greater than or equal to policy date # If greater than or equal to policy date, delete index policy_days = dict( sorted(policy_days.items(), key=lambda item: item[1])) allocation_type = '' for key, value in policy_days.items(): if value <= days_ago: allocation_type = key if allocation_type != '': # Change index allocation per policy index_settings = elastic_connection.indices.get_settings( index=index) index_settings = index_settings[index]['settings']['index'] box_type = 'hot' if 'routing' in index_settings: if 'allocation' in index_settings['routing']: if 'require' in index_settings['routing']['allocation']: if 'box_type' in index_settings['routing'][ 'allocation']['require']: box_type = index_settings['routing']['allocation'][ 'require']['box_type'] if allocation_type != box_type: print("Changing allocation of index " + str(index) + \ " to " + str(allocation_type)) elastic_connection.indices.put_settings( index=index, body={ "index.routing.allocation.require.box_type": allocation_type }) elastic_connection.close()
def apply_forcemerge_to_indices(indices, index_forcemerge_policies, client_config): elastic_connection = es.build_es_connection(client_config) with ThreadPoolExecutor(max_workers=es.get_lowest_data_node_thread_count( client_config)) as executor: for index in indices: index = str(index['index']) # Only proceed if index is not a special index if not es.check_special_index(index): future = executor.submit(forcemerge_indices, client_config, index, index_forcemerge_policies) elastic_connection.close()
def take_snapshot(client_config, repository, snapshot, body): """[summary] Creates a backup snapshot Args: client_config ([dict]): [Client configuration] repository ([str]): [ES/OS repository name] snapshot ([str]): [Name of snapshot to create] body ([dict]): [Details for backup job] Raises: Exception: [If error, print error and retry] Returns: [bool]: [Backup job status] """ try: if es.check_cluster_health_status(client_config, 'yellow'): print("Cluster health check passed") except Exception as e: raise Exception(e) try: current_date = datetime.strftime( datetime.utcnow(), '%Y-%m-%d_%H:%M:%S') snapshot_name = f"{snapshot}_{current_date}" if DEBUG_ENABLED == "1": print(f"Triggering backup for {snapshot_name}*") print("Repository is " + repository + "| snapshot is " + snapshot_name + " | body is:") print(json.dumps(body)) elastic_connection = es.build_es_connection(client_config) backup_job = elastic_connection.snapshot.create( repository, snapshot_name, body, wait_for_completion=False, request_timeout=30) elastic_connection.close() if 'accepted' in backup_job: if backup_job['accepted']: return True else: print("Backup snapshot " + snapshot_name + " failed to create") return False except Exception as e: elastic_connection.close() print("Operation failed - Create snapshot " + snapshot + " for repo " + repository) raise Exception(e)
def get_indices_within_limit_age(client_config, indices, limit_age): """[summary] Takes a list of indices and looks to see if the most recent document is within a specified @timestamp age based on limit_age Args: client_config ([dict]): [Client configuration] indices ([list]): [List of indices to look through] limit_age ([int]): [Age in terms of within X days ago] Raises: Exception: [If error, print and retry] Returns: [list]: [List of indices that were within limit_age] """ limit_age = limit_age * 86400 current_date = datetime.utcnow() indices_within_limit_age = [] body = '{"aggs": {"indices": {"terms": {"field": "_index","order": {"1": "desc"},"size": 50000},"aggs": {"1": {"max": {"field": "@timestamp"}}}}},"size": 0,"_source": {"excludes": []}}' for index in indices: elastic_connection = es.build_es_connection(client_config) try: if DEBUG_ENABLED == "1": print("Index is " + index) print(f"Limit age is {limit_age}\nBody is\n{body}") result = elastic_connection.search(index=index + "*", body=body) elastic_connection.close() if DEBUG_ENABLED == "1": print(result) for index in result['aggregations']['indices']['buckets']: index_name = index['key'] index_date = dateparser.parse( index['1']['value_as_string']).replace(tzinfo=None) seconds_ago = (current_date - index_date).total_seconds() if DEBUG_ENABLED == "1": print(f"Index name is {index_name}") print(f"Policy {limit_age} vs index {seconds_ago}") if seconds_ago <= limit_age: indices_within_limit_age.append(index_name) except Exception as e: elastic_connection.close() raise Exception(e) return indices_within_limit_age
def apply_retention_to_old_indices(indices, index_retention_policies, client_config): """Apply retention to indices older than policy Args: indices (array): List of indices index_retention_policies (dict): Retention policy client_config (dict): Client configuration """ elastic_connection = es.build_es_connection(client_config) with ThreadPoolExecutor(max_workers=es.get_lowest_data_node_thread_count( client_config)) as executor: for index in indices: index = str(index['index']) # Only proceed if index is not a special index if not es.check_special_index(index): executor.submit(delete_old_indices, client_config, index, index_retention_policies) elastic_connection.close()
def create_index_template(client_config, group, last_index): # Base template settings template = { "order": 5, "version": 60001, "settings": { "index": { "mapping": { "total_fields": { "limit": "15000" } }, "refresh_interval": "30s", "number_of_shards": "1", "number_of_replicas": "1" } }, "mappings": { }, "aliases": {} } try: elastic_connection = es.build_es_connection(client_config) # Get index mappings from most current index field_mappings = elastic_connection.indices.get_mapping(last_index) # Extract mappings from most current index mapping = field_mappings[last_index]['mappings'] # Update base template to have index mappings template['mappings'] = mapping # Set index patterns template should match on template['index_patterns'] = [ group + "-*"] # Create the template elastic_connection.indices.put_template(group, body=template) elastic_connection.close() return True except: return False
def calculate_accounting(client_config, client_name): settings = load_settings() # Set today's current datetime today = datetime.now() date_time = today.strftime("%Y%m%d") # Check if client accounting data already calculated today if path.exists(settings['accounting']['output_folder'] + '/' + client_name + "_accounting-" + date_time + ".json"): print("Accounting already calculated for " + client_name + " today: " + str(date_time)) return True else: print("Calculating accounting data for " + client_name) # Check cluster health - Expect Yellow to continue if es.check_cluster_health_status(client_config, settings['accounting']['health_check_level']): elastic_connection = es.build_es_connection(client_config) # Grab the client specific allocation policy (tiering policy) index_allocation_policies = get_allocation_policy(client_config) # Next, get information on all current indices in client cluster indices = es.es_get_indices(client_config) print("Client " + client_name + " has " + str(len(indices)) + ' indices') accounting_records = [] special_index_size = 0 # Loop through each index for index in indices: if not es.check_special_index(index['index']): # Grab the current index's allocation policy based on index name policy = es.check_index_allocation_policy(index['index'], index_allocation_policies) # Lookup the policy's # of days setting policy_days = index_allocation_policies[policy] # Get current datetime current_date = datetime.now() # Get index datetime index_date = datetime.strptime(index['creation.date.string'], '%Y-%m-%dT%H:%M:%S.%fZ') # Figure out how many days since current_date vs. index_date days_ago = (current_date - index_date).days # Build client specific daily accounting records # Convert index size from bytes to gigabytes index_size_in_gb = round(float(index['storeSize']) / 1024 / 1024 / 1024, 8) # Calculate indices daily cost # If index is older than policy_days, set disk type to sata # and make sure index is set to proper allocation attribute if days_ago >= policy_days: cost = round(float(index_size_in_gb) * settings['accounting']['sata_cost'], 8) disk_type = 'sata' else: cost = round(float(index_size_in_gb) * settings['accounting']['ssd_cost'], 8) disk_type = 'ssd' index_group = es.get_index_group(index['index']) accounting_record = { 'name': index['index'], 'client': client_name, 'size': float(index_size_in_gb), 'logs': int(index['docsCount']), 'disk': disk_type, 'cost': float(cost), 'index_creation_date': index['creation.date.string'], '@timestamp': str(current_date.isoformat()), 'index_group': index_group, 'allocation_policy': str(policy), 'current_policy_days': int(policy_days) } accounting_records.append(accounting_record) else: index_size_in_gb = round(float(index['storeSize']) / 1024 / 1024 / 1024, 8) special_index_size += index_size_in_gb # Check TOML for device tracking settings, if exists, calculate if 'device_tracking_inclusion' in settings['accounting']: device_by_ip = [] device_by_computer_name = [] device_by_user = [] total_devices = 0 for inclusion in settings['accounting']['device_tracking_inclusion']: index = settings['accounting']['device_tracking_inclusion'][inclusion]['index'] tracking_field = settings['accounting']['device_tracking_inclusion'][inclusion]['tracking_field'] search = settings['accounting']['device_tracking_inclusion'][inclusion]['search'] count_as = settings['accounting']['device_tracking_inclusion'][inclusion]['count_as'] response = es.aggregate_search(elastic_connection, index, search, 'value_count', tracking_field, sort='@timestamp', limit_to_fields=[tracking_field]) if count_as == "computer": device_by_computer_name += response if count_as == "ip": device_by_ip += response if count_as == "user": device_by_user += response if 'device_tracking_exclusion' in settings['accounting']: for exclusion in settings['accounting']['device_tracking_exclusion']: index = settings['accounting']['device_tracking_exclusion'][exclusion]['index'] field_to_exclude_against = settings['accounting']['device_tracking_exclusion'][exclusion]['field_to_exclude_against'] field_to_match_against = settings['accounting']['device_tracking_exclusion'][exclusion]['field_to_match_against'] field_to_match_against_count_as_type = settings['accounting']['device_tracking_exclusion'][exclusion]['field_to_match_against_count_as_type'] search = settings['accounting']['device_tracking_exclusion'][exclusion]['search'] count_as = settings['accounting']['device_tracking_exclusion'][exclusion]['count_as'] response = es.multiple_aggregate_search(elastic_connection, index, search, 'value_count', field_to_match_against, field_to_exclude_against, sort='@timestamp', limit_to_fields=[field_to_exclude_against,field_to_match_against]) if field_to_match_against_count_as_type == "computer": # Look for computers in device_by_computer_name, if found # remove response value from field_to_exclude_against for computer in response.keys(): if computer in device_by_computer_name: print(f"Removing {computer} from {field_to_exclude_against}") exclusion = response[computer] if field_to_exclude_against == "ip": device_by_ip.pop(exclusion) if field_to_exclude_against == "computer": device_by_computer_name.pop(exclusion) if field_to_exclude_against == "user": device_by_user.pop(exclusion) if field_to_match_against_count_as_type == "ip": # Look for ips in device_by_ip, if found # remove response value from field_to_exclude_against for ip in response.keys(): print(ip) if ip in device_by_computer_name: print(f"Removing {ip} from {field_to_exclude_against}") exclusion = response[ip] if field_to_exclude_against == "ip": device_by_ip.pop(exclusion) if field_to_exclude_against == "computer": device_by_computer_name.pop(exclusion) if field_to_exclude_against == "user": device_by_user.pop(exclusion) if field_to_match_against_count_as_type == "user": # Look for users in device_by_user, if found # remove response value from field_to_exclude_against for user in response.keys(): if user in device_by_computer_name: print(f"Removing {user} from {field_to_exclude_against}") exclusion = response[user] if field_to_exclude_against == "ip": device_by_ip.pop(exclusion) if field_to_exclude_against == "computer": device_by_computer_name.pop(exclusion) if field_to_exclude_against == "user": device_by_user.pop(exclusion) device_by_user_count = len(set(device_by_user)) device_by_computer_name_count = len(set(device_by_computer_name)) device_by_ip_count = len(set(device_by_ip)) total_devices = device_by_user_count + device_by_computer_name_count + device_by_ip_count accounting_record = { 'client': client_name, 'device_count': int(total_devices), '@timestamp': str(current_date.isoformat()), } if os.path.isdir(settings['accounting']['output_folder']): with open(settings['accounting']['output_folder'] + '/' + client_name + "_accounting-device-" + date_time + ".json", 'a') as f: json_content = json.dumps(accounting_record) f.write(json_content) f.write('\n') else: print(f"{settings['accounting']['output_folder']} does not exist. Unable to write accounting records to disk") # Appends newest record date into accounting_record #for accounting_record in accounting_records: #accounting_record['newest_document_date'] = str(es.get_newest_document_date_in_index(client_config, index['index'], elastic_connection).isoformat()) if not settings['settings']['debug'] and len(accounting_records) != 0: for accounting_record in accounting_records: # Create a backup copy of each accounting record if os.path.isdir(settings['accounting']['output_folder']): with open(settings['accounting']['output_folder'] + '/' + client_name + "_accounting-" + date_time + ".json", 'a') as f: json_content = json.dumps(accounting_record) f.write(json_content) f.write('\n') else: print(f"{settings['accounting']['output_folder']} does not exist. Unable to write accounting records to disk") else: print("Debug enabled or no data to save. Not creating accounting file") elastic_connection.close() cluster_stats = es.get_cluster_stats(client_config) # Convert cluster size from bytes to gigabytes cluster_size = round(float(cluster_stats['indices']['store']['size_in_bytes']) / 1024 / 1024 / 1024, 8) print("Total cluster size is: " + str(cluster_size) + " GB") if 'device_tracking_inclusion' in settings['accounting']: print(f"Total device tracking is {total_devices}") if cluster_size > 1: if os.path.isdir(settings['accounting']['output_folder']) and len(accounting_records) != 0 and not settings['settings']['debug']: with open(settings['accounting']['output_folder'] + '/' + client_name + "_accounting-" + date_time + ".json") as f: accounting_file = f.readlines() total_accounting_size = 0 for record in accounting_file: json_object = json.loads(record) total_accounting_size += float(json_object['size']) total_accounting_size = round(total_accounting_size, 8) print("Total accounting record size is: " + str(total_accounting_size) + " GB") special_index_size = round(special_index_size, 2) print("Total special index size is : " + str(special_index_size) + " GB") total_accounting_index_size = special_index_size + total_accounting_size print("Accounting and special index size equals : " + str(total_accounting_index_size) + " GB") difference_size = cluster_size - total_accounting_index_size print("Difference is " + str(difference_size) + " GB") if difference_size >= 20: message = "Accounting verification is off by more than 20.0 GB. Please find out why. This test is performed by comparing the current cluster size against the records in the accounting JSON output files.\n\nTotal cluster size is : " + str(cluster_size) + " GB\n\nTotal accounting record size is: " + str(total_accounting_size) + " GB\n\nTotal special index size is : " + str(special_index_size) + " GB\n\nAccounting and special index size equals : " + str(total_accounting_index_size) + " GB\n\nDifference is " + str(difference_size) + " GB\n\nThe size difference can be due to the script taking longer to run and the index sizes growing during the accounting calculation. However, if the difference is significant, some other problem likely occurred." send_notification(client_config, "accounting verification", "Failed", message, jira=settings['accounting']['ms-teams'], teams=settings['accounting']['jira']) else: if os.path.isdir(settings['accounting']['output_folder']): print(f"{settings['accounting']['output_folder']} does not exist. Unable to write accounting records to disk") if len(accounting_records) != 0: print("No accounting records to write to disk. Empty cluster") if len(accounting_records) != 0 and not settings['settings']['debug'] and settings['accounting']['output_to_es']: print("Sending accounting records to ES") elasticsearch_connection = es.build_es_connection(client_config) results = es.get_list_by_chunk_size(accounting_records, 100) for result in results: es.bulk_insert_data_to_es(elasticsearch_connection, result, "accounting", bulk_size=100) elasticsearch_connection.close() clients = load_configs() if client_name != settings['accounting']['send_copy_to_client_name'] and settings['accounting']['send_copy_to_client_name'] != '': elasticsearch_connection = es.build_es_connection(clients[settings['accounting']['send_copy_to_client_name']]) results = es.get_list_by_chunk_size(accounting_records, 100) for result in results: es.bulk_insert_data_to_es(elasticsearch_connection, result, "accounting", bulk_size=100) elasticsearch_connection.close() return True else: if not settings['settings']['debug']: print("No index data found for accounting") return True else: return True else: return True else: settings = load_settings() print("Accounting operation failed for " + client_name + ". Cluster health does not meet level: " + settings['accounting']['health_check_level']) return False
def extrapolate_files(files): # Find missing value and extrapolate print("Client has " + str(len(files))) files.sort() newest_file = files[len(files) - 1] oldest_file = files[0] newest_file_date = convert_file_date(os.path.basename(newest_file), client) oldest_file_date = convert_file_date(os.path.basename(oldest_file), client) days = (newest_file_date - oldest_file_date).days if int(days) + 1 == len(files): print("PASS - Client " + client + " should have " + str(int(days) + 1) + " of accounting data. " + str(len(files)) + " found") else: print("FAIL - Client " + client + " should have " + str(int(days) + 1) + " of accounting data. " + str(len(files)) + " found") json_data = load_json_file(oldest_file) sizes = {} #growth_rates = {} while days > 0: oldest_file_date += timedelta(days=1) days = days - 1 file_date = datetime.strftime(oldest_file_date, '%Y%m%d') file_to_check = "/cloud/cloud_configs/business_functions/accounting/" + client + "_accounting-" + file_date + ".json" if os.path.exists(file_to_check): json_data = load_json_file(file_to_check) for record in json_data: index_group = record['index_group'] #if index_group not in growth_rates: # growth_rates[index_group] = [] if index_group not in sizes: sizes[index_group] = [] if record['size'] > 0: sizes[index_group].append(record['size']) #if len(sizes[index_group]) > 1: # percent_difference = (sizes[index_group][-1] - sizes[index_group][-2]) / sizes[index_group][-1] # # Wait for at least 10 values before calculating IQR # if len(growth_rates[index_group]) > 10: # q1 = np.quantile(growth_rates[index_group],0.30) # q3 = np.quantile(growth_rates[index_group],0.70) # # Only save values above the 30% standard deviation but below 70% # # and change is not double in size # if growth_rates[index_group][-1] > q1 and growth_rates[index_group][-1] < q3 and abs(percent_difference) <= 100: # growth_rates[index_group].append(percent_difference) # else: # # Ignore change rates above 100 as that is extreme daily growth # # Often caused during rollout of new data sources or agents # if abs(percent_difference) <= 100: # growth_rates[index_group].append(percent_difference) else: print("File " + file_to_check + " not found") mean = 0 es_data = [] for record in json_data: index_group = record['index_group'] if len(sizes[index_group]) != 0: # Only apply moving averages if there are at least 5 stored # growth rates or more if len(sizes[index_group]) >= 5: # Apply Exponential Moving Averages to smooth the data set df = pd.DataFrame({'data': sizes[index_group]}) df['ewm_alpha_1'] = df['data'].ewm( span=7, adjust=False).mean() # Store the latest moving average mean = df['ewm_alpha_1'].iloc[-1] else: # Not enough values to safely predict moving average # set mean to 0 for no growth mean = 0 if debug == 1: print(index_group + " previous size " + str(round(record['size'], 2)) + " and cost " + str(round(record['cost'], 2)) + " new moving average is " + str(mean)) if mean >= 0: record['size'] = round(mean, 2) else: record['size'] = 0 if record['disk'] == 'ssd': record['cost'] = round(record['size'] * .001, 2) if record['disk'] == 'sata': record['cost'] = round(record['size'] * .003, 2) record['@timestamp'] = str(oldest_file_date.isoformat()) es_data.append(record) if debug == 1: print("New size " + str(record['size']) + " and cost " + str(record['cost'])) # Create a backup copy of each accounting record if debug == 0: with open(file_to_check, 'a') as f: json_content = json.dumps(record) f.write(json_content) f.write('\n') else: pass if debug == 0: if len(es_data) != 0 and debug == 0: elasticsearch_connection = es.build_es_connection( clients[client]) results = es.get_list_by_chunk_size(es_data, 100) for result in results: es.bulk_insert_data_to_es(elasticsearch_connection, result, "accounting", bulk_size=100) elasticsearch_connection.close() elasticsearch_connection = es.build_es_connection( clients["ha"]) results = es.get_list_by_chunk_size(es_data, 100) for result in results: es.bulk_insert_data_to_es(elasticsearch_connection, result, "accounting", bulk_size=100) elasticsearch_connection.close()
def calculate_audit_trail(client_config, settings): """Calculates an audit trail record for MSSP clients Args: client_config (dict): Client configuration file loaded from json content settings (dict): Settings loaded from settings.toml """ es_connection = es.build_es_connection(client_config) response = es.run_search(es_connection, 'reflex-organizations', '_exists_:name', sort="created_at", limit_to_fields=['name', 'uuid'], size=10000) organizations = es.return_field_mapped_to_value_from_query( response, 'name', 'uuid') start_time = get_start_time() end_time = get_end_time() for client_name, value in settings['mssp']['clients'].items(): tenant_name = settings['mssp']['clients'][client_name]['tenant_name'] print( f"Processing tenant for {client_name} with tenant name of {tenant_name}" ) email = value['email'] uuid = organizations[tenant_name] get_audit_trail(es_connection, start_time, uuid, "severity", end_time=end_time) report = f"Audit Trail Report for {tenant_name}\r\n\r\n" report = report + "This report represents events reviewed by H and A Security Solutions" \ + " LLC during daily event review activities. The report includes a breakdown of" \ + " events that have completed review.\r\n\r\nEvent statuses:\r\n\r\n" total_events = 0 dismissed_events = get_audit_trail(es_connection, start_time, uuid, "dismiss_reason.keyword", end_time=end_time) for name, _ in dismissed_events.items(): if valid_uuid(name): dismiss_reason = "Other" else: dismiss_reason = name report = report + \ f"{dismiss_reason} - Number of events {dismissed_events[name]}\r\n" total_events = total_events + dismissed_events[name] report = report + \ f"\r\nTotal Events Reviewed: {total_events}\r\n\r\nTop Rules\r\n\r\n" total_events = 0 top_rules = get_audit_trail(es_connection, start_time, uuid, "title", end_time=end_time) for rule, _ in top_rules.items(): report = report + \ f"{rule} - Number of events: {top_rules[rule]}\r\n" top_sources = get_audit_trail(es_connection, start_time, uuid, "source.keyword", end_time=end_time) report = report + "\r\nTop Sources\r\n\r\n" for source, _ in top_sources.items(): report = report + \ f"{source} - Number of events {top_sources[source]}\r\n" for address in list(email): send_email(address, "Audit Trail Report", report) print(report)