def combine_analytic_raw_and_by_cmp_fields(
            self, analytic_table, analytic_by_cmp_fields_table,
            analytic_raw_and_by_cmp_fields_table):

        if table_exists(self.conn, analytic_raw_and_by_cmp_fields_table):
            return

        self.conn.loadactionset("fedsql")

        query = F"CREATE TABLE {analytic_raw_and_by_cmp_fields_table} AS "

        tables = [
            F"{analytic_table} as s", F"{analytic_by_cmp_fields_table} as c"
        ]

        fields = [
            's.* ', 'c._Min_ as "cmpMin"', 'c._Mean_ as "cmpMean"',
            'c._Max_ as "cmpMax"', 'c._Nobs_ as "cmpCount"'
        ]

        query += " SELECT " + ", ".join(fields)
        query += " FROM " + ", ".join(tables)

        where_clauses = list(
            map(lambda x: "s." + x + " = " + "c." + x, self.comparison_fields))

        if len(where_clauses) > 0:
            query += " WHERE " + " AND ".join(where_clauses)

        self.log.debug("Combine by_src and by_cmp tables:" +
                       " ".join(query.split()))

        out = self.conn.fedsql.execdirect(query)
        self.conn.promote(name=analytic_raw_and_by_cmp_fields_table)
    def compute_evidence_detail(self, input_table, input_record_type,
                                all_event_table, ev_detail_table):

        if table_exists(self.conn, ev_detail_table):
            self.log.debug("Evidence details table already exists")
            return

        self.conn.loadactionset("fedsql")

        query = F"CREATE TABLE {ev_detail_table} AS "
        query += 'SELECT \'' + input_record_type + '\' AS "recordType", r.*, \
                  n.eventId as "eventId", n.anomalousFlag as "anomalousFlag" '

        from_tables = [F"{all_event_table} AS n", F"{input_table} AS r"]
        query += " FROM " + ", ".join(from_tables)

        where_clauses = list(
            map(lambda x: "n." + x + " = " + "r." + x,
                self.src_id_fields + self.comparison_fields))

        where_clauses.append("n.anomalousFlag = 1")

        query += " WHERE " + " AND ".join(where_clauses)

        self.log.debug("Evidence detail table query:" +
                       " ".join(query.split()))
        self.conn.fedsql.execdirect(query)
        self.conn.promote(name=ev_detail_table)
    def run_operation(self, operation, input_field, groupby, input_table,
                      output_table):

        if table_exists(self.conn, output_table):
            return

        if operation.upper() == 'DISTINCT':
            out = self.conn.distinct(table={
                "name": input_table,
                "groupby": groupby
            },
                                     inputs=input_field,
                                     casout={
                                         "name": output_table,
                                         "promote": True
                                     })

        elif operation.upper() == 'SUMMARY':
            out = self.conn.summary(table={
                "name": input_table,
                "groupby": groupby
            },
                                    inputs=input_field,
                                    casout={
                                        "name": output_table,
                                        "promote": True
                                    })

        else:
            raise Exception("Unknown operation")
    def compute_evidence(self,
                         input_table,
                         all_event_table,
                         evidence_table,
                         fields_of_interest=None):

        if table_exists(self.conn, evidence_table):
            self.log.debug("Evidence table already exists")
            return

        self.conn.loadactionset("fedsql")

        ev_create_table = F"CREATE TABLE {evidence_table} AS "

        ev_fields = ["n.eventTypeId", "n.eventId", 'n.anomalousFlag']

        distinct_flag = (self.src_aggregation_action.upper() == 'DISTINCT')

        if fields_of_interest is None:
            fields_of_interest = []

            if distinct_flag:
                fields_of_interest += self.src_id_fields + self.comparison_fields + [
                    self.input_field
                ]
            else:
                fields_of_interest += ["starttime"] + self.src_id_fields + ["srcPort"] + self.dst_id_fields + \
                                      ["dstPort"] + self.comparison_fields + ["protocol", self.input_field]

        fields_to_select = get_available_fields(self.conn, fields_of_interest,
                                                input_table)

        ev_fields += list(map(lambda x: "r." + x, fields_to_select))

        ev_select = "SELECT "

        if distinct_flag:
            ev_select += " DISTINCT "

        ev_select += ", ".join(ev_fields)

        from_tables = [F"{input_table} AS r", F"{all_event_table} AS n"]
        ev_from = " FROM " + ", ".join(from_tables)

        fields_of_interest = self.src_id_fields + self.comparison_fields

        fields_in_where = get_available_fields(self.conn, fields_of_interest,
                                               all_event_table)

        ev_where_clauses = list(
            map(lambda x: "n." + x + " = " + "r." + x, fields_in_where))

        ev_where = " WHERE " + " AND ".join(ev_where_clauses)

        ev_query = ev_create_table + ev_select + ev_from + ev_where

        self.log.debug("Evidence table query:" + " ".join(ev_query.split()))
        self.conn.fedsql.execdirect(ev_query)
        self.conn.promote(name=evidence_table)
    def resolve_filter(self, filter):

        self.log.debug(F"Resolving filter: {filter}")
        output = self.filters[filter]['output'].format(intable=self.intable)
        finput = self.filters[filter]['input'].format(intable=self.intable)
        clause = self.filters[filter]['clause']

        if not table_exists(self.conn, output):
            if not table_exists(self.conn, finput):

                if finput == self.intable:
                    table_loaded = load_table(
                        self.conn,
                        self.intable,
                        self.input_file_rel_path,
                        self.input_caslib,
                        import_options=self.input_import_options,
                        index_vars=self.index_vars)

                    if not table_loaded:
                        raise Exception(
                            F"Input table {self.input_file} not found.")
                    self.log.debug(F"Input table {self.intable} loaded.")

                else:
                    ifilter = self.find_filter(finput)
                    self.resolve_filter(ifilter)

            where = clause.format(table="", not_missing="Is Not Missing")

            casout = {"name": output, "promote": True}
            if self.index_vars is not None:
                casout["indexvars"] = self.index_vars

            self.conn.table.partition(table={
                "name": finput,
                "where": where
            },
                                      casout=casout)

        self.log.debug(F"Completed resolving filter: {filter}")
Ejemplo n.º 6
0
    def create_table_to_merge(self, table_name):

        merge_table_name = F"{table_name}_merge"

        if table_exists(self.conn, merge_table_name):
            self.log.debug("Merge events table already exists")
            return merge_table_name

        analytic_field_name = self.event_tables_cols[table_name]

        time_id_str = ''
        dur = self.event_analytic_duration["value"]
        if self.event_analytic_duration["unit"].upper() == 'HOUR':
            time_id_str = F'dhms(datepart(eventStartTime_sas), ceil(hour(eventStartTime_sas)/{dur}) * {dur}, 0, 0)'
        else:
            time_id_str = F'dhms(datepart(eventStartTime_sas), hour(eventStartTime_sas), \
                                ceil(minute(eventStartTime_sas)/{dur}) * {dur}, 0)'

        by_cols = self.src_id_fields + self.comparison_fields + [
            analytic_field_name, 'score', 'anomalousFlag'
        ]
        by_cols_str = " ".join(by_cols)

        keep_cols = ["timeId"] + self.src_id_fields + self.comparison_fields
        keep_cols += [
            analytic_field_name, F"{analytic_field_name}_score",
            F"{analytic_field_name}_anomalousFlag",
            F"{analytic_field_name}_relEvents"
        ]
        keep_cols_str = " ".join(keep_cols)

        code = F'''
            data {merge_table_name} (keep={keep_cols_str});
                retain {keep_cols_str};            
                set {table_name};    
                by {by_cols_str};
                length {analytic_field_name}_relEvents $ 100;
                if first.srcIpAddress then {analytic_field_name}_relEvents = '';
                {analytic_field_name}_relEvents = catx(', ', trim(eventId), {analytic_field_name}_relEvents);
                eventStartTime_sas = eventStartTime/1000000 + 315619200; /* Convert from unix micros to sas */
                timeId = {time_id_str}; /* Ceil to hour or min based on duration */
                {analytic_field_name}_score = score;
                {analytic_field_name}_anomalousFlag = anomalousFlag;
                if last.srcIpAddress then output;
        '''

        self.log.debug("Merge events table DS code:" + " ".join(code.split()))

        self.conn.datastep.runcode(code)
        self.conn.promote(merge_table_name)

        return merge_table_name
    def standardize_analytic(self, input_table, analytic_field,
                             deviation_table):

        if table_exists(self.conn, deviation_table):
            return

        copy_vars = self.src_id_fields + self.comparison_fields + [analytic_field] + \
            ['cmpMin', 'cmpMean', 'cmpMax', 'cmpCount']

        groupby = self.comparison_fields

        std_success = True

        try:
            self.conn.datapreprocess.transform(table={
                "name": input_table,
                "groupby": groupby
            },
                                               copyvars=copy_vars,
                                               requestpackages=[{
                                                   "inputs": analytic_field,
                                                   "function": {
                                                       "method": "standardize",
                                                       "arguments": {
                                                           "location":
                                                           "median",
                                                           "scale": "iqr"
                                                       }
                                                   }
                                               }],
                                               casout={
                                                   "name": deviation_table,
                                                   "promote": True
                                               })
            self.log.debug("Standardization complete.")

        except SWATCASActionError as e:
            self.log.debug(F"Standardization failed: {e}")

            # Create deviation table with all deviations set to 0
            copy_vars_str = " ".join(copy_vars + groupby)
            code = F'''
                data {deviation_table};
                    retain copy_vars_str;
                    set {input_table};
                    _TR1_{analytic_field} = 0;
            '''
            self.conn.datastep.runcode(code)
            self.conn.promote(deviation_table)
    def summarize_analytic_by_cmp_fields(self, analytic_table, analytic_field,
                                         analytic_by_cmp_fields_table):

        if table_exists(self.conn, analytic_by_cmp_fields_table):
            return

        out = self.conn.summary(table={
            "name": analytic_table,
            "groupby": self.comparison_fields
        },
                                inputs=analytic_field,
                                casout={
                                    "name": analytic_by_cmp_fields_table,
                                    "promote": True
                                })
    def compute_all_events(self, input_table, cdf_table, cdf_field,
                           analytic_field, min_value, all_event_table):
        if table_exists(self.conn, all_event_table):
            self.log.debug("All events table already exists")
            return

        self.conn.loadactionset("fedsql")

        ne_create_table = F"CREATE TABLE {all_event_table} AS "

        event_type_id = self.event_type_id
        ne_fields = [
            F'{event_type_id} as "eventTypeId"',
            'n.' + cdf_field + ' as "' + analytic_field + '"',
            'n.cdf as "score"'
        ]
        ne_fields += list(
            map(lambda x: 'n.' + x,
                ['cmpMin', 'cmpMean', 'cmpMax', 'cmpCount']))
        ne_fields += [
            'min(r.starttime) as "eventStartTime"',
            'max(r.starttime) as "eventEndTime"',
            'max(r.starttime) - min(r.starttime) as "eventDuration"'
        ]

        all_fields = ne_fields + list(
            map(lambda x: "r." + x + ' as "' + x + '"',
                self.src_id_fields + self.comparison_fields))

        ne_select = "SELECT " + ", ".join(all_fields)

        from_tables = [F"{input_table} AS r", F"{cdf_table} as n"]

        ne_from = " FROM " + ", ".join(from_tables)

        where_clauses = list(
            map(lambda x: "n." + x + " = " + "r." + x, self.src_id_fields))
        where_clauses += list(
            map(lambda x: "n." + x + " = " + "r." + x, self.comparison_fields))

        ne_where = " WHERE " + " AND ".join(where_clauses)

        groupby_fields = ["n." + cdf_field, "n.cdf"]
        groupby_fields += list(
            map(lambda x: 'n.' + x,
                ['cmpMin', 'cmpMean', 'cmpMax', 'cmpCount']))
        groupby_fields += list(map(lambda x: "r." + x, self.src_id_fields))
        groupby_fields += list(map(lambda x: "r." + x, self.comparison_fields))

        ne_group_by = " GROUP BY " + ", ".join(groupby_fields)

        all_event_query = ne_create_table + ne_select + ne_from + ne_where + ne_group_by

        self.log.debug("All event table query:" +
                       " ".join(all_event_query.split()))

        self.conn.fedsql.execdirect(all_event_query)

        ne_code = F"DATA {all_event_table};\
                    SET {all_event_table};\
                    FORMAT eventId $HEX32.; "

        # all_fields = ["eventTypeId", analytic_field, "score",
        #               "eventStartTime", "eventEndTime", "eventDuration"]
        #
        # all_fields += self.src_id_fields + self.comparison_fields

        all_fields = self.conn.columninfo(
            all_event_table)["ColumnInfo"]["Column"].tolist()
        all_fields_str = ", ".join(all_fields)

        event_id = F" eventId = put(md5(cats({all_fields_str})), $hex32.);"

        threshold = self.threshold
        ne_flag = F"  IF score > {threshold} AND {analytic_field} > {min_value} THEN anomalousFlag = 1; "
        ne_flag += " ELSE anomalousFlag = 0;"

        ne_code = ne_code + event_id + ne_flag

        self.log.debug("All events DS code:" + " ".join(ne_code.split()))
        self.conn.runcode(ne_code)
        self.conn.promote(name=all_event_table)
Ejemplo n.º 10
0
    def runAnalytic(self):

        qual_all_events_table = self.merged_all_events_table.format(
            intable=self.intable)

        if table_exists(self.conn, qual_all_events_table):
            self.log.debug("Merge events table already exists")
            return

        # Substitute intable in event_table_cols
        event_tables_cols_mod = {}

        for key, value in self.event_tables_cols.items():
            event_tables_cols_mod[key.format(intable=self.intable)] = value

        self.event_tables_cols = event_tables_cols_mod

        # Get list of all event tables
        tables = list(self.event_tables_cols.keys())

        # Load each of the table if it is not already loaded
        rel_paths = []
        for table in tables:
            event_type_id = table.split("_")[-3]
            rel_path = self.output_file_rel_path(event_type_id)
            rel_paths.append(rel_path)

        loaded_tables = load_tables(self.conn, tables, rel_paths,
                                    self.ae_caslib)

        # Create tables with desired columns
        tables_to_merge = list(map(self.create_table_to_merge, loaded_tables))

        if len(tables_to_merge) == 0:
            self.log.error("No tables to merge")
            return

        tables_to_merge_str = " ".join(tables_to_merge)

        # Run datastep to merge the tables
        groupby = "by " + " ".join(["timeId"] + self.src_id_fields +
                                   self.comparison_fields) + "; "
        code = "DATA " + qual_all_events_table + "; MERGE " + tables_to_merge_str + "; " + groupby + " run;"

        self.log.debug(code)
        self.conn.datastep.runcode(code)

        # Impute missing values
        code = "data {qual_all_events_table}; \
                    set {qual_all_events_table};\
                    array change _numeric_;\
                    do over change;\
                        if change=. then change=0;\
                    end;\
                run ;"

        code = code.format(qual_all_events_table=qual_all_events_table)

        self.log.debug("Merge events DS code:" + " ".join(code.split()))
        self.conn.datastep.runcode(code)
        self.conn.promote(qual_all_events_table)

        # Save the table
        output_file_rel_path = self.output_file_rel_path()
        save_table(self.conn,
                   qual_all_events_table,
                   output_file_rel_path,
                   self.ae_caslib,
                   replace=True)
        self.log.debug("Saved merge events table")