def __init__(self, id: int, time: int, type: int, subtype: int, pid: int, ppid: int, cmdLine: str,
                 processName: str, morse: Morse = None):
        self.id = id
        self.time = time
        self.type = type
        self.subtype = subtype
        self.pid = pid
        self.ppid = ppid
        self.cmdLine = cmdLine
        self.processName = processName

        self.event_list = []
        self.event_id_list = []
        self.event_type_list = []
        self.state_list = []
        self.morse_grad_list = []
        self.simple_net_grad_list = []
        # grad list stores grad of morse
        self.cur_state = np.zeros([2, 3])
        self.seq_len = 0

        # init tags
        self.sTag: float = 0.0
        # benign
        self.iTag: float = 0.0
        #
        self.cTag: float = 0.0

        if self.ppid == -1:
            # unknown parent
            self.sTag = morse.get_stag_dangerous()
        elif self.ppid == 0:
            # process generated by root
            self.sTag = morse.get_stag_benign()
        else:
            from globals import GlobalVariable as gv
            parent_id = gv.get_processNode_by_pid(self.ppid)
            parent_node = gv.get_processNode(parent_id)
            if not parent_node:
                # parent node not exist or has been released, then this node is not valid
                self.sTag = morse.get_stag_dangerous()
                self.iTa = morse.get_itag_dangerous()
                self.cTag = morse.get_ctag_dangerous()
            else:
                self.sTag = parent_node.sTag
                self.iTag = parent_node.iTag
                self.cTag = parent_node.cTag
def post_train():
    for node_id in gv.processNodeSet:
        node = gv.get_processNode(node_id)
        if node:
            # event_list = node.get_event_list()
            event_list_id = node.get_event_id_list()
            # print(event_list)
            for event_id in event_list_id:

                event = gv.get_event_by_id(event_id)
                # print(event)
                if isinstance(event, rec.Record):
                    ep.EventParser.parse(event)

        state_sequence = node.generate_sequence_and_grad(5)
        # do rnn process
        # calculate loss
        # do back propagate

    pass
    def file2process_parser(self,
                            record: Record,
                            morse: Morse = None) -> np.ndarray((4, 4)):
        id = record.Id
        time = record.time
        subtype = record.subtype
        srcNode: FileNode
        destNode: ProcessNode
        if not gv.exist_fileNode(record.srcId):
            if record.srcId == -1:
                return None
            logger.error("file to process, can't find srcNode " +
                         str(record.srcId))
            return None
        else:
            srcNode = gv.get_fileNode(record.srcId)
        if not gv.exist_processNode(record.desId):
            logger.error("file to process, can't find desNode " +
                         str(record.desId))
            return None
        else:
            destNode = gv.get_processNode(record.desId)
        if not srcNode or not destNode:
            logger.error("file to process, can't find srcNode or destNode " +
                         ' ' + str(record.srcId) + ' ' + str(record.desId))
            return None

        eventArray = [id, time, subtype, 0]
        srcArray = srcNode.get_matrix_array(4)
        desArray = destNode.get_matrix_array(4)

        p1 = morse.get_attenuate_benign()
        if not isinstance(p1, float):
            p1 = p1.cpu().detach().numpy()
        p2 = morse.get_attenuate_susp_env()
        if not isinstance(p2, float):
            p2 = p2.cpu().detach().numpy()

        params = [
            p1, p2,
            morse.get_benign_possibility(srcArray[1]).cpu().detach().numpy(),
            morse.get_susp_possibility(srcArray[1]).cpu().detach().numpy()
        ]
        benign_grad = morse.get_benign_thresh_grad()
        susp_grad = morse.get_susp_thresh_grad()
        gv.add_morse_grad(id, np.concatenate([benign_grad, susp_grad]))
        # print("params: ", params[2].detach().numpy())
        return np.array([eventArray, params, srcArray, desArray])
def pre_process(record: rec.Record):
    src_id = record.srcId
    des_id = record.desId
    rec_id = record.Id
    subtype = record.subtype
    gv.set_event_by_id(rec_id, record)

    if subtype == 4 or subtype == 5 or subtype == 6 or subtype == 7:
        # read
        src_node = gv.get_processNode(src_id)
        des_node = gv.get_fileNode(des_id)
        src_node.add_event(rec_id)
        des_node.add_event(rec_id)
    elif subtype == 8 or subtype == 9 or subtype == 10 or subtype == 11:
        # write
        src_node = gv.get_processNode(src_id)
        des_node = gv.get_fileNode(des_id)
        src_node.add_event(rec_id)
        des_node.add_event(rec_id)
    else:
        pass
    def file2file_parser(self, record: Record,
                         morse: Morse) -> np.ndarray((4, 4)):
        id = record.Id
        time = record.time
        subtype = record.subtype
        srcNode: FileNode = None
        destNode: FileNode = None
        if not gv.exist_fileNode(record.srcId):
            logger.error("file to file, can't find srcNode" + ' ' +
                         str(record.srcId))
            return None
        else:
            srcNode = gv.get_fileNode(record.srcId)
        if not gv.exist_fileNode(record.desId):
            logger.error("file to file, can't find desNode" + ' ' +
                         str(record.desId))
            return None
        else:
            destNode = gv.get_fileNode(record.desId)
        if not srcNode or not destNode:
            logger.error("file to file, can't find desNode or destNode" + ' ' +
                         str(record.srcId) + ' ' + str(record.desId))
            return None

        eventArray = [id, time, subtype, 0]
        srcArray = srcNode.get_matrix_array(4)
        desArray = destNode.get_matrix_array(4)

        params = [
            morse.get_attenuate_benign(),
            morse.get_attenuate_susp_env(),
            morse.get_benign_possibility(srcArray[1]),
            morse.get_susp_possibility(srcArray[1])
        ] + [0] * (4 - len(record.params))
        benign_grad = morse.get_benign_thresh_grad()
        susp_grad = morse.get_susp_thresh_grad()
        gv.add_morse_grad(id, np.concatenate([benign_grad, susp_grad]))
        return np.array([eventArray, params, srcArray, desArray])
Beispiel #6
0
def train_model():
    logger = getLogger("train mode")
    device = gv.device
    numOfEpoch = 100
    batch_size = gv.batch_size
    sequence_size = 100
    activation_relu = torch.nn.ReLU()
    Loss_Function = Comp_Loss
    Learning_Rate = 0.001


    # models initialization
    # data_loader = DataLoader(processNodeSet=gv.processNodeSet)
    morse = Morse(batch_size=gv.batch_size, sequence_size=gv.sequence_size, data_loader=gv.processNodeSet)
    event_parser = EventParser(morse)
    rnn = RNNet(input_dim=gv.feature_size, hidden_dim=64, output_dim=3, numOfRNNLayers=1)
    rnn = rnn.to(device)
    rnn_optimizer = torch.optim.Adam(rnn.parameters(), lr=Learning_Rate)


    f = open(gv.train_data, "r")
    i = 0
    max_event_per_epoch = 100
    event_num = 0
    while True:
        # print(len(gv.processNodeSet))
        line = f.readline()
        if not line:
            break
        # print(line, "and", line[0])
        if line[:4] == "data":
            record = readObj(f)
            if record.type == 1:
                # event type
                event_num += 1
                event_parser.parse(record, morse)
                # tr.back_propagate(record, 0.5)
                # data_rearrange.pre_process(record)

                # process batch-wise
                if event_num == max_event_per_epoch:
                    rnn_grad = None
                    # while data_loader.has_next():
                    input_tensor_list = morse.forward(0.05)
                    for input_tensor in input_tensor_list:
                        if input_tensor is not None:
                            input_tensor = input_tensor.to(device)
                            input_tensor.requires_grad = True
                            rnn.train()
                            rnn_optimizer.zero_grad()
                            rnn_out, rnn_h = rnn(input_tensor.float())
                            rnn_loss = Loss_Function(rnn_out)
                            rnn_loss.backward()
                            rnn_grad = input_tensor.grad
                            rnn_optimizer.step()
                            print("loss: ", rnn_loss.item())

                            if gv.early_stopping_on:
                                model_weights = wrap_model(morse)
                                gv.early_stopping_model_queue.append([rnn_loss.item(), model_weights])

                                # early stopping and model saving
                                if (
                                        len(gv.early_stopping_model_queue) == gv.early_stopping_patience and early_stop_triggered
                                    (gv.early_stopping_model_queue.popleft()[0], rnn_loss.item(),
                                     gv.early_stopping_threshold)):
                                    print(
                                        "========================= early stopping triggered =========================")
                                    min_loss = min(list(gv.early_stopping_model_queue), key=lambda x: x[0])
                                    print("minimum loss: ", min_loss)
                                    popped_checkpoint = gv.early_stopping_model_queue.popleft()
                                    while popped_checkpoint > min_loss:
                                        popped_checkpoint = gv.early_stopping_model_queue.popleft()
                                    dump_model(rnn=rnn, morse_model_weights=popped_checkpoint[1])
                                    print("best model saved")
                                    sys.exit(0)

                            event_num = 0

                            # integrated grads calculations and updates
                            simple_net_grad_tensor = morse.simple_net_grad_tensor.to(device)
                            morse_grad_tensor = morse.morse_grad_tensor.to(device)
                            simple_net_final_grad = torch.tensordot(rnn_grad, simple_net_grad_tensor,
                                                                    ([0, 1, 2], [0, 1, 2]))
                            final_morse_grad = torch.tensordot(rnn_grad, morse_grad_tensor, ([0, 1, 2], [0, 1, 2]))
                            morse.a_b_setter(-gv.learning_rate * final_morse_grad[0])
                            morse.a_e_setter(-gv.learning_rate * final_morse_grad[1])

                            # update SimpleNet's weights
                            morse.benign_thresh_model_setter(simple_net_final_grad[0],
                                                            simple_net_final_grad[1])
                            morse.suspect_env_model_setter(simple_net_final_grad[2],
                                                          simple_net_final_grad[3])

                    dump_model(morse=morse, rnn=rnn)

            elif record.type == -1:
                # file node
                if 0 < record.subtype < 5:
                    newNode = record.getFileNode(morse)
                    if not newNode:
                        logger.error("failed to get file node")
                        continue
                    if gv.exist_fileNode(newNode.id):
                        logger.error("duplicate file node: " + str(newNode.id))
                    else:
                        gv.set_fileNode(newNode.id, newNode)
                elif record.subtype == -1:
                    # common file
                    newNode = record.getFileNode(morse)
                    if not newNode:
                        logger.error("failed to get file node")
                        continue
                    if gv.exist_fileNode(newNode.id):
                        logger.error("duplicate file node: " + str(newNode.id))
                    else:
                        gv.set_fileNode(newNode.id, newNode)
                elif record.subtype == 5:
                    # process node
                    # if no params, this process is released
                    if not record.params:
                        gv.remove_processNode(record.Id)
                        continue
                    newNode = record.getProcessNode(morse)
                    if not newNode:
                        logger.error("failed to get process node")
                        continue
                    if gv.exist_processNode(newNode.id):
                        logger.error("duplicate process node: " + newNode.id)
                    else:
                        gv.set_processNode(newNode.id, newNode)
        i += 1
    f.close()

    return rnn_grad
Beispiel #7
0
def predict_entry():
    logger = getLogger("test mode")
    device = gv.device
    numOfEpoch = 100
    batch_size = gv.batch_size
    sequence_size = 100
    activation_relu = torch.nn.ReLU()
    Loss_Function = Comp_Loss
    Learning_Rate = 0.001

    # models initialization
    # data_loader = DataLoader(processNodeSet=gv.processNodeSet)
    morse = Morse(batch_size=gv.batch_size,
                  sequence_size=gv.sequence_size,
                  data_loader=gv.processNodeSet)
    rnn = RNNet(input_dim=gv.feature_size,
                hidden_dim=64,
                output_dim=3,
                numOfRNNLayers=1)
    morse, rnn = load_model(morse, rnn)
    rnn = rnn.to(device)
    event_parser = EventParser(morse)
    rnn_optimizer = torch.optim.Adam(rnn.parameters(), lr=Learning_Rate)
    f = open(gv.test_data, "r")
    i = 0
    max_event_per_epoch = 100
    event_num = 0
    out_batches = []
    while True:
        line = f.readline()
        if not line:
            break
        # print(line, "and", line[0])
        if line[:4] == "data":
            record = readObj(f)
            if record.type == 1:
                # event type
                event_num += 1
                event_parser.parse(record, morse)

                # process batch-wise
                if event_num == max_event_per_epoch:
                    out_batches += predict(rnn)
                    event_num = 0

            elif record.type == -1:
                # file node
                if 0 < record.subtype < 5:
                    newNode = record.getFileNode(morse)
                    if not newNode:
                        logger.error("failed to get file node")
                        continue
                    if gv.exist_fileNode(newNode.id):
                        logger.error("duplicate file node: " + str(newNode.id))
                    else:
                        gv.set_fileNode(newNode.id, newNode)
                elif record.subtype == -1:
                    # common file
                    newNode = record.getFileNode(morse)
                    if not newNode:
                        logger.error("failed to get file node")
                        continue
                    if gv.exist_fileNode(newNode.id):
                        logger.error("duplicate file node: " + str(newNode.id))
                    else:
                        gv.set_fileNode(newNode.id, newNode)
                elif record.subtype == 5:
                    # process node
                    # if no params, this process is released
                    if not record.params:
                        gv.remove_processNode(record.Id)
                        continue
                    newNode = record.getProcessNode(morse)
                    if not newNode:
                        logger.error("failed to get process node")
                        continue
                    if gv.exist_processNode(newNode.id):
                        logger.error("duplicate process node: " + newNode.id)
                    else:
                        gv.set_processNode(newNode.id, newNode)
        i += 1

    return out_batches
    def parse(self, record: Record, morse: Morse = None) -> np.ndarray:
        '''
        parse record data, and convert it to np.ndarray((1,12)),
        if failed, np.zeros([1, 12]) will be returned.
        :param record: Record
        :return: morse_res, np.ndarray((1,12))
        '''
        vector = np.zeros([4, 4])
        morse_res = np.zeros([1, 12])
        src_id = record.srcId
        des_id = record.desId
        event_id = record.Id
        subtype = record.subtype
        src_node = None
        des_node = None

        if record.subtype == 1:
            pass
        elif record.subtype == 2:
            pass
        elif record.subtype == 3:
            pass
        elif record.subtype == 4:
            vector = self.process2file_parser(record, morse)
            if vector is None:
                return
            morse_res = self.event_processor.read_process(vector)
            src_node = gv.get_processNode(src_id)
            des_node = gv.get_fileNode(des_id)
            pass
        elif record.subtype == 5:
            vector = self.process2file_parser(record, morse)
            if vector is None:
                return
            morse_res = self.event_processor.read_process(vector)
            src_node = gv.get_processNode(src_id)
            des_node = gv.get_fileNode(des_id)
            pass
        elif record.subtype == 6:
            vector = self.process2file_parser(record, morse)
            if vector is None:
                return
            morse_res = self.event_processor.read_process(vector)
            src_node = gv.get_processNode(src_id)
            des_node = gv.get_fileNode(des_id)
            pass
        elif record.subtype == 7:
            vector = self.process2file_parser(record, morse)
            if vector is None:
                return
            morse_res = self.event_processor.read_process(vector)
            src_node = gv.get_processNode(src_id)
            des_node = gv.get_fileNode(des_id)
            pass
        elif record.subtype == 8:
            vector = self.file2process_parser(record, morse)
            if vector is None:
                return
            morse_res = self.event_processor.write_process(vector)
            src_node = gv.get_fileNode(src_id)
            des_node = gv.get_processNode(des_id)
            pass
        elif record.subtype == 9:
            vector = self.file2process_parser(record, morse)
            if vector is None:
                return
            morse_res = self.event_processor.write_process(vector)
            src_node = gv.get_fileNode(src_id)
            des_node = gv.get_processNode(des_id)
            pass
        elif record.subtype == 10:
            vector = self.file2process_parser(record, morse)
            if vector is None:
                return
            morse_res = self.event_processor.write_process(vector)
            src_node = gv.get_fileNode(src_id)
            des_node = gv.get_processNode(des_id)
            pass
        elif record.subtype == 11:
            vector = self.file2process_parser(record, morse)
            if vector is None:
                return
            morse_res = self.event_processor.write_process(vector)
            src_node = gv.get_fileNode(src_id)
            des_node = gv.get_processNode(des_id)
            pass
        elif record.subtype == 12:
            pass
        elif record.subtype == 13:
            pass
        elif record.subtype == 14:
            vector = self.process2process_parser(record, morse)
            # print("this is vector ",vector)
            if vector is None:
                return
            morse_res = self.event_processor.exec_process(vector)
            src_node = gv.get_processNode(src_id)
            des_node = gv.get_processNode(des_id)
            pass
        elif record.subtype == 15:
            pass
        elif record.subtype == 16:
            pass
        elif record.subtype == 17:
            pass
        elif record.subtype == 18:
            pass
        elif record.subtype == 19:
            pass
        elif record.subtype == 20:
            pass
        elif record.subtype == 21:
            pass
        elif record.subtype == 22:
            pass
        elif record.subtype == 23:
            pass
        elif record.subtype == 24:
            pass
        elif record.subtype == 25:
            pass
        elif record.subtype == 26:
            pass
        elif record.subtype == 27:
            vector = self.process2file_parser(record, morse)
            if vector is None:
                return
            morse_res = self.event_processor.read_process(vector)
            src_node = gv.get_processNode(src_id)
            des_node = gv.get_fileNode(des_id)
            pass
        elif record.subtype == 28:
            pass
        elif record.subtype == 29:
            pass
        elif record.subtype == 30:
            pass
        elif record.subtype == 31:
            pass
        elif record.subtype == 32:
            pass
        elif record.subtype == 33:
            pass
        elif record.subtype == 34:
            pass
        elif record.subtype == 35:
            vector = self.process2file_parser(record, morse)
            if vector is None:
                return
            morse_res = self.event_processor.load_process(vector)
            src_node = gv.get_processNode(src_id)
            des_node = gv.get_fileNode(des_id)
            pass
        elif record.subtype == 36:
            pass
        elif record.subtype == 37:
            pass
        morse_res = np.array(morse_res)
        if src_node and des_node:
            # print(vector)
            # print("src_node: ", src_node.seq_len, "des_node: ", des_node.seq_len, "subtype: ", record.subtype)

            # get morse_grad and simple_net grad, and do multiplication to get morse_simple_net_grad
            # simple_net_grad: np.array(4)
            # morse_grad: np.array(12, 4)
            # morse_simple_net_grad: np.array(12, 4)
            simple_net_grad = gv.get_morse_grad(event_id)
            morse_grad = morse_train.get_morse_grad(record.subtype, vector,
                                                    self.event_processor)
            morse_grad = np.array(morse_grad)
            morse_simple_net_grad = np.transpose(
                np.array([
                    morse_grad[:, 2], morse_grad[:, 2], morse_grad[:, 3],
                    morse_grad[:, 3]
                ])) * simple_net_grad
            morse_grad = morse_grad[:, 0:2]
            src_node.state_update(morse_res, subtype, vector, morse_grad,
                                  morse_simple_net_grad, event_id)
            des_node.state_update(morse_res, subtype, vector, morse_grad,
                                  morse_simple_net_grad, event_id)
            gv.succ_count += 1
        else:
            gv.fail_count += 1
            # print(gv.fail_count, "src_node: ",record.srcId, "des_node: ", record.desId, "subtype: ",
            # record.subtype, record.Id)
        # print(event_id)

        gv.set_event_by_id(event_id, morse_res)
        # print(type(morse_res))
        return morse_res