def make_constrants_map(self, constraints): constraints_map = {} for constraint in constraints: key = constraint.split('=')[0] value = constraint.split('=')[1] RuleUtils.addMulti(constraints_map, key, value) return constraints_map
def generate_queries(self, rule_constrains_names): queries = [] # separated_rule_constrains_names = self.convert_event_constraints_to_separated(rule_constrains_names) user_constraints = set() op_constraints = set() env_constraints = set() user_possible = op_possible = resource_possible = 0 for constraint in rule_constrains_names: key = constraint.split('=')[0] if key.startswith('sourceIPAddress') or key.startswith( 'userAgent') or key.startswith('userIdentity_invokedBy'): env_constraints.add(constraint) elif key in self.generation_param_info['valid_keys_sets'][ 'valid_keys_user']: user_constraints.add(constraint) elif key in self.generation_param_info['valid_keys_sets'][ 'valid_keys_op']: op_constraints.add(constraint) invalid_events = set() # invalid_types, invalid_events = self.process_type_restrictions(op_constraints, resource_constraints) # users req_head = {'index': self.user_possible_index, 'type': 'doc'} if user_constraints: user_query = RuleUtils.create_terms_filter_from_constraints( user_constraints) else: user_query = {"query": {"match_all": {}}, "size": 0} queries.extend([req_head, user_query]) # ops req_head = {'index': self.op_possible_index, 'type': 'doc'} if op_constraints: op_query = RuleUtils.op_create_terms_filter_from_constraints( op_constraints, invalid_events) else: op_query = {"query": {"match_all": {}}, "size": 0} queries.extend([req_head, op_query]) # envs req_head = {'index': self.environment_possible_index, 'type': 'doc'} if env_constraints or invalid_events: resource_query = RuleUtils.op_create_terms_filter_from_constraints( env_constraints, invalid_events) else: resource_query = {"query": {"match_all": {}}, "size": 0} queries.extend([req_head, resource_query]) # unique log entries query = RuleUtils.create_query_convert_resource_to_type( rule_constrains_names) req_head = {'index': self.unique_logs_index, 'type': 'doc'} queries.extend([req_head, query]) # all log entries req_head = {'index': self.all_logs_index, 'type': 'doc'} queries.extend([req_head, query]) return queries
def process_event(self, dependent_fields, flat_event, inv_param_dependency_mmap, possible_params): for k, v in flat_event.items(): RuleUtils.addMulti(possible_params, k, v) for k, v in dependent_fields.items(): for k2 in v: if k in flat_event and k2 in flat_event: if k not in inv_param_dependency_mmap: inv_param_dependency_mmap[k] = {} if k2 not in inv_param_dependency_mmap[k]: inv_param_dependency_mmap[k][k2] = {} RuleUtils.addMulti(inv_param_dependency_mmap[k][k2], flat_event[k2], flat_event[k])
def build_dependent_fields(self, valid_keys): dependent_field_lists = [ [ 'eventName', 'eventSource', 'eventName_bin', 'eventName_crud_bin', 'eventType', 'eventVersion', 'apiVersion' ], [ 'eventType', 'userIdentity_sessionContext_attributes_mfaAuthenticated' ], ['userIdentity_userName', 'userIdentity_accessKeyId'], [ 'sourceIPAddressLocation', 'sourceIPAddressInternal', 'userIdentity_invokedBy' ], [ 'userAgent', 'userAgent_bin', 'userAgent_general_bin', 'userIdentity_invokedBy' ] ] for valid_key in valid_keys: if valid_key.startswith('requestParameters_' ) or valid_key.startswith('additional'): dependent_field_lists.append([ valid_key, 'eventName', 'eventSource', 'eventName_bin', 'eventName_crud_bin' ]) dependent_fields = {} for v in dependent_field_lists: for field_a in v: for field_b in v: if field_a == field_b: continue RuleUtils.addMulti(dependent_fields, field_a, field_b) keys_to_remove = set() for k in dependent_fields.keys(): if k not in valid_keys: keys_to_remove.add(k) for k in keys_to_remove: dependent_fields.pop(k, None) return dependent_fields
def universe_period_intersection(event_normalizer, parameter_universe, dynamic_keys): current_values = {} while True: try: event = yield flat_event = event_normalizer.normalized_user_op_resource_from_event( event) for k in dynamic_keys: if k in flat_event: RuleUtils.addMulti(current_values, k, flat_event[k]) except GeneratorExit: for k in dynamic_keys: parameter_universe['possible_params'][k] = current_values[k] for outter_k in parameter_universe['inv_param_dependency_mmap']: for inner_k in parameter_universe['inv_param_dependency_mmap'][ outter_k]: if outter_k in dynamic_keys: for k, v in parameter_universe[ 'inv_param_dependency_mmap'][outter_k][ inner_k].items(): parameter_universe['inv_param_dependency_mmap'][ outter_k][inner_k][k] = v.intersection( current_values[outter_k]) if inner_k in dynamic_keys: keys_to_pop = set( ) #there's probably a better way to do this with set.symmetric_difference but I'm too tired to try it right now for k in parameter_universe[ 'inv_param_dependency_mmap'][outter_k][ inner_k]: if k not in current_values[inner_k]: keys_to_pop.add(k) for k in keys_to_pop: parameter_universe['inv_param_dependency_mmap'][ outter_k][inner_k].pop(k) return
def calc_overassignments(self, itemsets, mapping, names, coverage_rate_denominator, single_value_columns): requests = [] rules = [] results = [] for item, count in itemsets.items(): rule_constrains_names = RuleUtils.item_to_constraint_set( item, mapping, names) rule_constrains_names.update(single_value_columns) item_queries = self.generate_queries(rule_constrains_names) requests.extend(item_queries) rule_eval = RuleEval() rule_eval.set_constrants(rule_constrains_names) rule_eval.all_log_entries = count rule_eval.coverage_rate = count / float(coverage_rate_denominator) rule_eval.under_assignments = coverage_rate_denominator - count rule_eval.q = item_queries[7] rules.append(rule_eval) if len(requests) > 400: resp = es.msearch(body=requests, request_timeout=300) requests.clear() results.extend( self.process_separated_buffered_results( rules, resp['responses'])) rules.clear() if len(results) > 2000: reverse = True if self.job_config['abac_params']['metric'][ 'type'] == 'l_dist': reverse = False results = sorted(results, key=lambda x: x.sort_metric_value, reverse=reverse) results = results[: 500] # only keep top 1000 to save memory if requests: resp = es.msearch(body=requests, request_timeout=300) results.extend( self.process_separated_buffered_results( rules, resp['responses'])) requests.clear() return results
def build_orange_table_from_es_logs( mongo_query, valid_keys=None, prune_null_resources=True, all_logs_index='flat-all-log-entries', unique_logs_index='flat-unique-log-entries'): field_values = {} single_value_columns = set( ) #TODO return values that are always true, add them to Rules records = 0 key_value_counter = Counter() paginator = helpers.scan(es, query={"query": { "match_all": {} }}, index=unique_logs_index, doc_type='doc') for hit in paginator: records += 1 # if records % 1000 == 0: # print('Records : ' + str(records)) for key, value in hit['_source'].items(): if key == '_id' or (valid_keys and key not in valid_keys): continue RuleUtils.addMulti(field_values, key, value) key_value_counter.update(['%s=%s' % (key, value)]) for k, v in dict(key_value_counter).items(): if v == records: single_value_columns.add( k) # ignore fields that always have the same value field_name = k.split('=')[0] field_values.pop(field_name) orange_columns = [] for key, value in field_values.items(): # if len(value) == 1 and records > 1: # single_value_columns.add('%s=%s' % (key, value.pop()))#ignore fields that always have the same value # continue for elem in value: if not isinstance(elem, str): value.remove(elem) value.add(str(elem)) try: column = DiscreteVariable(key, values=value) except Exception as ex: traceback.print_exc() print(value) orange_columns.append(column) # if use_resources: # resource_encoder = OrangeTableResourceColumnGenerator(mongo_query) # resource_columns = resource_encoder.get_table_columns() # orange_columns.extend(resource_columns) # else: resource_encoder = None domain = Domain(orange_columns) records = 0 table = Table(domain) paginator = helpers.scan(es, query={"query": { "match_all": {} }}, index=all_logs_index, doc_type='doc') for hit in paginator: instance = createInstance(domain, hit['_source'], resource_encoder, prune_null_resources) table.append(instance) records += 1 # if records % 1000 == 0: # print('Records : ' + str(records)) # print('Built Table: %d recrods' % len(table)) return table, single_value_columns
'eventName -> eventName_bin', 'eventName -> eventSource', 'eventName -> eventType', 'eventName -> eventVersion', 'eventSource -> eventType', 'eventTime_weekday -> eventTime_weekend', 'sourceIPAddress -> sourceIPAddress_bin', 'userAgent_bin -> eventType', 'userAgent_bin -> sourceIPAddress_bin', 'userAgent_bin -> userAgent_general_bin', 'userAgent_bin -> userIdentity_invokedBy', 'userAgent_general_bin -> eventType', 'userIdentity_invokedBy -> sourceIPAddress_bin', 'eventName -> requestParameters_encryptionContext_PARAMETER_ARN', 'eventName -> requestParameters_path', 'eventName -> requestParameters_pipelineName', 'eventName -> apiVersion', 'eventSource -> eventVersion', 'eventName -> requestParameters_name', 'eventName -> requestParameters_maxResults', 'userIdentity_accessKeyId -> userIdentity_userName', 'apiVersion -> eventType', 'apiVersion -> eventVersion', 'userIdentity_invokedBy -> eventType', 'apiVersion -> eventSource', 'userIdentity_invokedBy -> sourceIPAddress', 'userAgent_bin -> userIdentity_sessionContext_attributes_mfaAuthenticated', 'sourceIPAddress_bin -> eventType', 'eventType -> eventVersion', 'eventType -> userIdentity_accessKeyId', 'eventType -> userIdentity_sessionContext_attributes_mfaAuthenticated' ] dependency_mmap = {} for corr in correlations: k1 = corr.split(' -> ')[0] k2 = corr.split(' -> ')[1] RuleUtils.addMulti(dependency_mmap, k1, k2) print(dependency_mmap)
def set_constrants(self, constraints): self.constraints = constraints for constraint in constraints: key = constraint.split('=')[0] value = constraint.split('=')[1] RuleUtils.addMulti(self.constraints_map, key, value)