def combine_analytic_raw_and_by_cmp_fields( self, analytic_table, analytic_by_cmp_fields_table, analytic_raw_and_by_cmp_fields_table): if table_exists(self.conn, analytic_raw_and_by_cmp_fields_table): return self.conn.loadactionset("fedsql") query = F"CREATE TABLE {analytic_raw_and_by_cmp_fields_table} AS " tables = [ F"{analytic_table} as s", F"{analytic_by_cmp_fields_table} as c" ] fields = [ 's.* ', 'c._Min_ as "cmpMin"', 'c._Mean_ as "cmpMean"', 'c._Max_ as "cmpMax"', 'c._Nobs_ as "cmpCount"' ] query += " SELECT " + ", ".join(fields) query += " FROM " + ", ".join(tables) where_clauses = list( map(lambda x: "s." + x + " = " + "c." + x, self.comparison_fields)) if len(where_clauses) > 0: query += " WHERE " + " AND ".join(where_clauses) self.log.debug("Combine by_src and by_cmp tables:" + " ".join(query.split())) out = self.conn.fedsql.execdirect(query) self.conn.promote(name=analytic_raw_and_by_cmp_fields_table)
def compute_evidence_detail(self, input_table, input_record_type, all_event_table, ev_detail_table): if table_exists(self.conn, ev_detail_table): self.log.debug("Evidence details table already exists") return self.conn.loadactionset("fedsql") query = F"CREATE TABLE {ev_detail_table} AS " query += 'SELECT \'' + input_record_type + '\' AS "recordType", r.*, \ n.eventId as "eventId", n.anomalousFlag as "anomalousFlag" ' from_tables = [F"{all_event_table} AS n", F"{input_table} AS r"] query += " FROM " + ", ".join(from_tables) where_clauses = list( map(lambda x: "n." + x + " = " + "r." + x, self.src_id_fields + self.comparison_fields)) where_clauses.append("n.anomalousFlag = 1") query += " WHERE " + " AND ".join(where_clauses) self.log.debug("Evidence detail table query:" + " ".join(query.split())) self.conn.fedsql.execdirect(query) self.conn.promote(name=ev_detail_table)
def run_operation(self, operation, input_field, groupby, input_table, output_table): if table_exists(self.conn, output_table): return if operation.upper() == 'DISTINCT': out = self.conn.distinct(table={ "name": input_table, "groupby": groupby }, inputs=input_field, casout={ "name": output_table, "promote": True }) elif operation.upper() == 'SUMMARY': out = self.conn.summary(table={ "name": input_table, "groupby": groupby }, inputs=input_field, casout={ "name": output_table, "promote": True }) else: raise Exception("Unknown operation")
def compute_evidence(self, input_table, all_event_table, evidence_table, fields_of_interest=None): if table_exists(self.conn, evidence_table): self.log.debug("Evidence table already exists") return self.conn.loadactionset("fedsql") ev_create_table = F"CREATE TABLE {evidence_table} AS " ev_fields = ["n.eventTypeId", "n.eventId", 'n.anomalousFlag'] distinct_flag = (self.src_aggregation_action.upper() == 'DISTINCT') if fields_of_interest is None: fields_of_interest = [] if distinct_flag: fields_of_interest += self.src_id_fields + self.comparison_fields + [ self.input_field ] else: fields_of_interest += ["starttime"] + self.src_id_fields + ["srcPort"] + self.dst_id_fields + \ ["dstPort"] + self.comparison_fields + ["protocol", self.input_field] fields_to_select = get_available_fields(self.conn, fields_of_interest, input_table) ev_fields += list(map(lambda x: "r." + x, fields_to_select)) ev_select = "SELECT " if distinct_flag: ev_select += " DISTINCT " ev_select += ", ".join(ev_fields) from_tables = [F"{input_table} AS r", F"{all_event_table} AS n"] ev_from = " FROM " + ", ".join(from_tables) fields_of_interest = self.src_id_fields + self.comparison_fields fields_in_where = get_available_fields(self.conn, fields_of_interest, all_event_table) ev_where_clauses = list( map(lambda x: "n." + x + " = " + "r." + x, fields_in_where)) ev_where = " WHERE " + " AND ".join(ev_where_clauses) ev_query = ev_create_table + ev_select + ev_from + ev_where self.log.debug("Evidence table query:" + " ".join(ev_query.split())) self.conn.fedsql.execdirect(ev_query) self.conn.promote(name=evidence_table)
def resolve_filter(self, filter): self.log.debug(F"Resolving filter: {filter}") output = self.filters[filter]['output'].format(intable=self.intable) finput = self.filters[filter]['input'].format(intable=self.intable) clause = self.filters[filter]['clause'] if not table_exists(self.conn, output): if not table_exists(self.conn, finput): if finput == self.intable: table_loaded = load_table( self.conn, self.intable, self.input_file_rel_path, self.input_caslib, import_options=self.input_import_options, index_vars=self.index_vars) if not table_loaded: raise Exception( F"Input table {self.input_file} not found.") self.log.debug(F"Input table {self.intable} loaded.") else: ifilter = self.find_filter(finput) self.resolve_filter(ifilter) where = clause.format(table="", not_missing="Is Not Missing") casout = {"name": output, "promote": True} if self.index_vars is not None: casout["indexvars"] = self.index_vars self.conn.table.partition(table={ "name": finput, "where": where }, casout=casout) self.log.debug(F"Completed resolving filter: {filter}")
def create_table_to_merge(self, table_name): merge_table_name = F"{table_name}_merge" if table_exists(self.conn, merge_table_name): self.log.debug("Merge events table already exists") return merge_table_name analytic_field_name = self.event_tables_cols[table_name] time_id_str = '' dur = self.event_analytic_duration["value"] if self.event_analytic_duration["unit"].upper() == 'HOUR': time_id_str = F'dhms(datepart(eventStartTime_sas), ceil(hour(eventStartTime_sas)/{dur}) * {dur}, 0, 0)' else: time_id_str = F'dhms(datepart(eventStartTime_sas), hour(eventStartTime_sas), \ ceil(minute(eventStartTime_sas)/{dur}) * {dur}, 0)' by_cols = self.src_id_fields + self.comparison_fields + [ analytic_field_name, 'score', 'anomalousFlag' ] by_cols_str = " ".join(by_cols) keep_cols = ["timeId"] + self.src_id_fields + self.comparison_fields keep_cols += [ analytic_field_name, F"{analytic_field_name}_score", F"{analytic_field_name}_anomalousFlag", F"{analytic_field_name}_relEvents" ] keep_cols_str = " ".join(keep_cols) code = F''' data {merge_table_name} (keep={keep_cols_str}); retain {keep_cols_str}; set {table_name}; by {by_cols_str}; length {analytic_field_name}_relEvents $ 100; if first.srcIpAddress then {analytic_field_name}_relEvents = ''; {analytic_field_name}_relEvents = catx(', ', trim(eventId), {analytic_field_name}_relEvents); eventStartTime_sas = eventStartTime/1000000 + 315619200; /* Convert from unix micros to sas */ timeId = {time_id_str}; /* Ceil to hour or min based on duration */ {analytic_field_name}_score = score; {analytic_field_name}_anomalousFlag = anomalousFlag; if last.srcIpAddress then output; ''' self.log.debug("Merge events table DS code:" + " ".join(code.split())) self.conn.datastep.runcode(code) self.conn.promote(merge_table_name) return merge_table_name
def standardize_analytic(self, input_table, analytic_field, deviation_table): if table_exists(self.conn, deviation_table): return copy_vars = self.src_id_fields + self.comparison_fields + [analytic_field] + \ ['cmpMin', 'cmpMean', 'cmpMax', 'cmpCount'] groupby = self.comparison_fields std_success = True try: self.conn.datapreprocess.transform(table={ "name": input_table, "groupby": groupby }, copyvars=copy_vars, requestpackages=[{ "inputs": analytic_field, "function": { "method": "standardize", "arguments": { "location": "median", "scale": "iqr" } } }], casout={ "name": deviation_table, "promote": True }) self.log.debug("Standardization complete.") except SWATCASActionError as e: self.log.debug(F"Standardization failed: {e}") # Create deviation table with all deviations set to 0 copy_vars_str = " ".join(copy_vars + groupby) code = F''' data {deviation_table}; retain copy_vars_str; set {input_table}; _TR1_{analytic_field} = 0; ''' self.conn.datastep.runcode(code) self.conn.promote(deviation_table)
def summarize_analytic_by_cmp_fields(self, analytic_table, analytic_field, analytic_by_cmp_fields_table): if table_exists(self.conn, analytic_by_cmp_fields_table): return out = self.conn.summary(table={ "name": analytic_table, "groupby": self.comparison_fields }, inputs=analytic_field, casout={ "name": analytic_by_cmp_fields_table, "promote": True })
def compute_all_events(self, input_table, cdf_table, cdf_field, analytic_field, min_value, all_event_table): if table_exists(self.conn, all_event_table): self.log.debug("All events table already exists") return self.conn.loadactionset("fedsql") ne_create_table = F"CREATE TABLE {all_event_table} AS " event_type_id = self.event_type_id ne_fields = [ F'{event_type_id} as "eventTypeId"', 'n.' + cdf_field + ' as "' + analytic_field + '"', 'n.cdf as "score"' ] ne_fields += list( map(lambda x: 'n.' + x, ['cmpMin', 'cmpMean', 'cmpMax', 'cmpCount'])) ne_fields += [ 'min(r.starttime) as "eventStartTime"', 'max(r.starttime) as "eventEndTime"', 'max(r.starttime) - min(r.starttime) as "eventDuration"' ] all_fields = ne_fields + list( map(lambda x: "r." + x + ' as "' + x + '"', self.src_id_fields + self.comparison_fields)) ne_select = "SELECT " + ", ".join(all_fields) from_tables = [F"{input_table} AS r", F"{cdf_table} as n"] ne_from = " FROM " + ", ".join(from_tables) where_clauses = list( map(lambda x: "n." + x + " = " + "r." + x, self.src_id_fields)) where_clauses += list( map(lambda x: "n." + x + " = " + "r." + x, self.comparison_fields)) ne_where = " WHERE " + " AND ".join(where_clauses) groupby_fields = ["n." + cdf_field, "n.cdf"] groupby_fields += list( map(lambda x: 'n.' + x, ['cmpMin', 'cmpMean', 'cmpMax', 'cmpCount'])) groupby_fields += list(map(lambda x: "r." + x, self.src_id_fields)) groupby_fields += list(map(lambda x: "r." + x, self.comparison_fields)) ne_group_by = " GROUP BY " + ", ".join(groupby_fields) all_event_query = ne_create_table + ne_select + ne_from + ne_where + ne_group_by self.log.debug("All event table query:" + " ".join(all_event_query.split())) self.conn.fedsql.execdirect(all_event_query) ne_code = F"DATA {all_event_table};\ SET {all_event_table};\ FORMAT eventId $HEX32.; " # all_fields = ["eventTypeId", analytic_field, "score", # "eventStartTime", "eventEndTime", "eventDuration"] # # all_fields += self.src_id_fields + self.comparison_fields all_fields = self.conn.columninfo( all_event_table)["ColumnInfo"]["Column"].tolist() all_fields_str = ", ".join(all_fields) event_id = F" eventId = put(md5(cats({all_fields_str})), $hex32.);" threshold = self.threshold ne_flag = F" IF score > {threshold} AND {analytic_field} > {min_value} THEN anomalousFlag = 1; " ne_flag += " ELSE anomalousFlag = 0;" ne_code = ne_code + event_id + ne_flag self.log.debug("All events DS code:" + " ".join(ne_code.split())) self.conn.runcode(ne_code) self.conn.promote(name=all_event_table)
def runAnalytic(self): qual_all_events_table = self.merged_all_events_table.format( intable=self.intable) if table_exists(self.conn, qual_all_events_table): self.log.debug("Merge events table already exists") return # Substitute intable in event_table_cols event_tables_cols_mod = {} for key, value in self.event_tables_cols.items(): event_tables_cols_mod[key.format(intable=self.intable)] = value self.event_tables_cols = event_tables_cols_mod # Get list of all event tables tables = list(self.event_tables_cols.keys()) # Load each of the table if it is not already loaded rel_paths = [] for table in tables: event_type_id = table.split("_")[-3] rel_path = self.output_file_rel_path(event_type_id) rel_paths.append(rel_path) loaded_tables = load_tables(self.conn, tables, rel_paths, self.ae_caslib) # Create tables with desired columns tables_to_merge = list(map(self.create_table_to_merge, loaded_tables)) if len(tables_to_merge) == 0: self.log.error("No tables to merge") return tables_to_merge_str = " ".join(tables_to_merge) # Run datastep to merge the tables groupby = "by " + " ".join(["timeId"] + self.src_id_fields + self.comparison_fields) + "; " code = "DATA " + qual_all_events_table + "; MERGE " + tables_to_merge_str + "; " + groupby + " run;" self.log.debug(code) self.conn.datastep.runcode(code) # Impute missing values code = "data {qual_all_events_table}; \ set {qual_all_events_table};\ array change _numeric_;\ do over change;\ if change=. then change=0;\ end;\ run ;" code = code.format(qual_all_events_table=qual_all_events_table) self.log.debug("Merge events DS code:" + " ".join(code.split())) self.conn.datastep.runcode(code) self.conn.promote(qual_all_events_table) # Save the table output_file_rel_path = self.output_file_rel_path() save_table(self.conn, qual_all_events_table, output_file_rel_path, self.ae_caslib, replace=True) self.log.debug("Saved merge events table")