Esempio n. 1
0
    def save_load_snapshot(self, max_clusters):
        persistence = MemoryBufferPersistence()

        config = TemplateMinerConfig()
        config.drain_max_clusters = max_clusters
        template_miner1 = TemplateMiner(persistence, config)
        print(template_miner1.add_log_message("hello"))
        print(template_miner1.add_log_message("hello ABC"))
        print(template_miner1.add_log_message("hello BCD"))
        print(template_miner1.add_log_message("hello XYZ"))
        print(template_miner1.add_log_message("goodbye XYZ"))

        template_miner2 = TemplateMiner(persistence, config)

        self.assertListEqual(list(template_miner1.drain.id_to_cluster.keys()),
                             list(template_miner2.drain.id_to_cluster.keys()))

        self.assertListEqual(
            list(template_miner1.drain.root_node.key_to_child_node.keys()),
            list(template_miner2.drain.root_node.key_to_child_node.keys()))

        def get_tree_lines(template_miner):
            sio = io.StringIO()
            template_miner.drain.print_tree(sio)
            sio.seek(0)
            return sio.readlines()

        self.assertListEqual(get_tree_lines(template_miner1),
                             get_tree_lines(template_miner2))

        print(template_miner2.add_log_message("hello yyy"))
        print(template_miner2.add_log_message("goodbye ABC"))
Esempio n. 2
0
    def __init__(self, prefix_file, model_name, num_candidates, window_size,
                 device, lr, lr_step, lr_decay_ratio, max_iter):
        Path("data").mkdir(parents=True, exist_ok=True)
        self.persistence_path = prefix_file + "_templates_persist.bin"
        persistence = FilePersistence(self.persistence_path)
        config = TemplateMinerConfig()
        config.load("ailoganalyzer/drain3.ini")
        config.profiling_enabled = False
        self.template_miner = TemplateMiner(persistence, config)
        if device == "auto":
            device = torch.device(
                "cuda" if torch.cuda.is_available() else "cpu")

        super().__init__(model_name)

        self.prefix_file = prefix_file
        self.num_candidates = num_candidates
        self.window_size = window_size
        self.device = device
        self.lr = lr
        self.lr_step = lr_step
        self.lr_decay_ratio = lr_decay_ratio
        self.nb_epoch = max_iter

        self.semantic = False
        self.sequentials = False
        self.quantitatives = False

        self.model = None

        self.sequence = []
        self.train_seq = []
        self.train_loader = None
        self.valid_loader = None
        self.model_path = self.prefix_file + "_last.pth"
Esempio n. 3
0
    def __init__(self,
                 persistence_handler: PersistenceHandler = None,
                 config: TemplateMinerConfig = None):
        """
        Wrapper for Drain with persistence and masking support

        :param persistence_handler: The type of persistence to use. When None, no persistence is applied.
        :param config: Configuration object. When none, configuration is loaded from default .ini file (if exist)
        """
        logger.info("Starting Drain3 template miner")

        if config is None:
            logger.info(f"Loading configuration from {config_filename}")
            config = TemplateMinerConfig()
            config.load(config_filename)

        self.config = config

        self.profiler: Profiler = NullProfiler()
        if self.config.profiling_enabled:
            self.profiler = SimpleProfiler()

        self.persistence_handler = persistence_handler

        self.drain = Drain(sim_th=self.config.drain_sim_th,
                           depth=self.config.drain_depth,
                           max_children=self.config.drain_max_children,
                           max_clusters=self.config.drain_max_clusters,
                           extra_delimiters=self.config.drain_extra_delimiters,
                           profiler=self.profiler)
        self.masker = LogMasker(self.config.masking_instructions)
        self.last_save_time = time.time()
        if persistence_handler is not None:
            self.load_state()
Esempio n. 4
0
    def test_get_param_list(self):
        config = TemplateMinerConfig()
        mi = MaskingInstruction(
            "((?<=[^A-Za-z0-9])|^)([\\-\\+]?\\d+)((?=[^A-Za-z0-9])|$)", "NUM")
        config.masking_instructions.append(mi)
        config.mask_prefix = "[:"
        config.mask_suffix = ":]"
        template_miner = TemplateMiner(None, config)

        def add_and_test(msg, expected_params):
            print(f"msg: {msg}")
            res = template_miner.add_log_message(msg)
            print(f"result: {res}")
            params = template_miner.get_parameter_list(res["template_mined"],
                                                       msg)
            print(f"params: {params}")
            self.assertListEqual(params, expected_params)

        add_and_test("hello", [])
        add_and_test("hello ABC", [])
        add_and_test("hello BCD", ["BCD"])
        add_and_test("request took 123 ms", ["123"])
        add_and_test("file saved [test.xml]", [])
        add_and_test("new order received: [:xyz:]", [])
        add_and_test("order type: new, order priority:3", ["3"])
        add_and_test("order type: changed, order priority:5",
                     ["changed,", "5"])
Esempio n. 5
0
    def test_match_strategies(self):
        miner = TemplateMiner()
        print(miner.add_log_message("training4Model start"))
        print(miner.add_log_message("loadModel start"))
        print(miner.add_log_message("loadModel stop"))
        print(miner.add_log_message("this is a test"))
        miner.drain.print_tree()
        self.assertIsNotNone(
            miner.match("loadModel start", full_search_strategy="fallback"))
        self.assertIsNotNone(
            miner.match("loadModel start", full_search_strategy="always"))
        self.assertIsNone(
            miner.match("loadModel start", full_search_strategy="never"))
        print(miner.add_log_message("loadModel start"))
        self.assertIsNotNone(
            miner.match("loadModel start", full_search_strategy="fallback"))
        self.assertIsNotNone(
            miner.match("loadModel start", full_search_strategy="always"))
        self.assertIsNotNone(
            miner.match("loadModel start", full_search_strategy="never"))

        config = TemplateMinerConfig()
        config.parametrize_numeric_tokens = False
        miner = TemplateMiner(config=config)
        print(miner.add_log_message("training4Model start"))
        print(miner.add_log_message("loadModel start"))
        print(miner.add_log_message("loadModel stop"))
        print(miner.add_log_message("this is a test"))
        self.assertIsNotNone(
            miner.match("loadModel start", full_search_strategy="fallback"))
        self.assertIsNotNone(
            miner.match("loadModel start", full_search_strategy="always"))
        self.assertIsNotNone(
            miner.match("loadModel start", full_search_strategy="never"))

        self.assertIsNone(miner.match("", full_search_strategy="never"))
        self.assertIsNone(miner.match("", full_search_strategy="always"))
        self.assertIsNone(miner.match("", full_search_strategy="fallback"))

        print(miner.add_log_message(""))
        self.assertIsNotNone(miner.match("", full_search_strategy="never"))
        self.assertIsNotNone(miner.match("", full_search_strategy="always"))
        self.assertIsNotNone(miner.match("", full_search_strategy="fallback"))
Esempio n. 6
0
    def test_match_only(self):
        config = TemplateMinerConfig()
        config.drain_extra_delimiters = ["_"]
        mi = MaskingInstruction(
            "((?<=[^A-Za-z0-9])|^)([\\-\\+]?\\d+)((?=[^A-Za-z0-9])|$)", "NUM")
        config.masking_instructions.append(mi)
        tm = TemplateMiner(None, config)

        res = tm.add_log_message("aa aa aa")
        print(res)

        res = tm.add_log_message("aa aa bb")
        print(res)

        res = tm.add_log_message("xx yy zz")
        print(res)

        res = tm.add_log_message("rrr qqq 123")
        print(res)

        c = tm.match("aa   aa tt")
        self.assertEqual(1, c.cluster_id)

        c = tm.match("aa aa 12")
        self.assertEqual(1, c.cluster_id)

        c = tm.match("xx yy   zz")
        self.assertEqual(2, c.cluster_id)

        c = tm.match("xx yy rr")
        self.assertIsNone(c)

        c = tm.match("nothing")
        self.assertIsNone(c)

        c = tm.match("rrr qqq   456   ")
        self.assertEqual(3, c.cluster_id)

        c = tm.match("rrr qqq 555.2")
        self.assertIsNone(c)

        c = tm.match("rrr qqq num")
        self.assertIsNone(c)
Esempio n. 7
0
    def test_extract_parameters(self):
        config = TemplateMinerConfig()
        mi = MaskingInstruction(
            "((?<=[^A-Za-z0-9])|^)([\\-\\+]?\\d+)((?=[^A-Za-z0-9])|$)", "NUM")
        config.masking_instructions.append(mi)
        mi = MaskingInstruction(r"multiple words", "WORDS")
        config.masking_instructions.append(mi)
        config.mask_prefix = "[:"
        config.mask_suffix = ":]"
        template_miner = TemplateMiner(None, config)

        def add_and_test(msg, expected_params, exact_matching=False):
            print(f"msg: {msg}")
            res = template_miner.add_log_message(msg)
            print(f"result: {res}")
            extracted_parameters = template_miner.extract_parameters(
                res["template_mined"], msg, exact_matching=exact_matching)
            self.assertIsNotNone(extracted_parameters)
            params = [parameter.value for parameter in extracted_parameters]
            print(f"params: {params}")
            self.assertListEqual(params, expected_params)

        add_and_test("hello", [])
        add_and_test("hello ABC", [])
        add_and_test("hello BCD", ["BCD"])
        add_and_test("hello    BCD", ["BCD"])
        add_and_test("hello\tBCD", ["BCD"])
        add_and_test("request took 123 ms", ["123"])
        add_and_test("file saved [test.xml]", [])
        add_and_test("new order received: [:xyz:]", [])
        add_and_test("order type: new, order priority:3", ["3"])
        add_and_test("order type: changed, order priority:5",
                     ["changed,", "5"])
        add_and_test("sometimes one needs multiple words", ["multiple words"],
                     True)
        add_and_test("sometimes one needs not", ["not"], True)
        add_and_test("sometimes one needs multiple words", ["multiple words"],
                     True)
Esempio n. 8
0
 def test_load_config(self):
     config = TemplateMinerConfig()
     if os.path.exists("drain3_test.ini"):
         config.load("drain3_test.ini")
     if os.path.exists("tests/drain3_test.ini"):
         config.load("tests/drain3_test.ini")
     self.assertEqual(1024, config.drain_max_clusters)
     self.assertListEqual(["_"], config.drain_extra_delimiters)
     self.assertEqual(7, len(config.masking_instructions))
Esempio n. 9
0
 def test_load_config(self):
     config = TemplateMinerConfig()
     config.load(dirname(__file__) + "/drain3_test.ini")
     self.assertEqual(1024, config.drain_max_clusters)
     self.assertListEqual(["_"], config.drain_extra_delimiters)
     self.assertEqual(7, len(config.masking_instructions))
Esempio n. 10
0
    def test_extract_parameters_direct(self):
        config = TemplateMinerConfig()
        mi = MaskingInstruction(r"hdfs://[\w.:@-]*((/[\w.~%+-]+)+/?)?",
                                "hdfs_uri")
        config.masking_instructions.append(mi)
        mi = MaskingInstruction(r"(?P<quote>[\"'`]).*?(?P=quote)",
                                "quoted_string")
        config.masking_instructions.append(mi)
        mi = MaskingInstruction(r"((?P<p_0>[*_])\2{0,2}).*?\1",
                                "markdown_emph")
        config.masking_instructions.append(mi)
        mi = MaskingInstruction(r"multiple \*word\* pattern", "*words*")
        config.masking_instructions.append(mi)
        mi = MaskingInstruction(r"some \S+ \S+ pattern", "*words*")
        config.masking_instructions.append(mi)
        mi = MaskingInstruction(r"(\d{1,3}\.){3}\d{1,3}", "ip")
        config.masking_instructions.append(mi)
        mi = MaskingInstruction(r"(?P<number>\d+)\.\d+", "float")
        config.masking_instructions.append(mi)
        mi = MaskingInstruction(r"0[xX][a-fA-F0-9]+", "integer")
        config.masking_instructions.append(mi)
        mi = MaskingInstruction(r"(?P<number>\d+)", "integer")
        config.masking_instructions.append(mi)
        mi = MaskingInstruction(r"HelloWorld", "*")
        config.masking_instructions.append(mi)
        mi = MaskingInstruction(r"MaskPrefix", "<")
        config.masking_instructions.append(mi)
        template_miner = TemplateMiner(None, config)

        test_vectors = [
            ("<hdfs_uri>:<integer>+<integer>",
             "hdfs://*****:*****@<integer>", "some other cool pattern@0xe1f",
             ["some other cool pattern", "0xe1f"], ["*words*", "integer"]),
            ("Another test with <*words*> that includes <integer><integer> and <integer> <*> <integer>",
             "Another test with some other 0Xadded pattern that includes 500xc0ffee and 0X4 times 5",
             [
                 "some other 0Xadded pattern", "50", "0xc0ffee", "0X4",
                 "times", "5"
             ], ["*words*", "integer", "integer", "integer", "*", "integer"]),
            ("some <*words*> <*words*>",
             "some multiple *word* pattern some confusing *word* pattern",
             ["multiple *word* pattern",
              "some confusing *word* pattern"], ["*words*", "*words*"]),
            ("<*words*> <*>", "multiple *word* pattern <*words*>",
             ["multiple *word* pattern", "<*words*>"], ["*words*", "*"]),
            ("<*> <*>", "HelloWorld Test", ["HelloWorld", "Test"], ["*", "*"]),
            ("<*> <*>", "HelloWorld <anything>", ["HelloWorld",
                                                  "<anything>"], ["*", "*"]),
            ("<*><integer>", "HelloWorld1", ["HelloWorld",
                                             "1"], ["*", "integer"]),
            ("<*> works <*>", "This works as-expected",
             ["This", "as-expected"], ["*", "*"]),
            ("<memory:<integer>>", "<memory:8>", ["8"], ["integer"]),
            ("<memory:<integer> <core:<float>>>", "<memory:8 <core:0.5>>",
             ["8", "0.5"], ["integer", "float"]),
            ("<*> <memory:<<integer> <core:<float>>>",
             "New: <memory:<8 <core:0.5>>", ["New:", "8",
                                             "0.5"], ["*", "integer",
                                                      "float"]),
            ("<<>", "MaskPrefix", ["MaskPrefix"], ["<"]),
            ("<<<>>", "<MaskPrefix>", ["MaskPrefix"], ["<"]),
            ("There are no parameters here.", "There are no parameters here.",
             [], []),
            ("<float> <float>", "0.15 10.16 3.19", None, None),
            ("<float> <float>", "0.15 10.16 test 3.19", None, None),
            ("<memory:<<integer> <core:<float>>>", "<memory:8 <core:0.5>>",
             None, None),
            ("<<>", "<<>", None, None),
            ("<*words*> <*words*>", "0.15 0.15", None, None),
        ]

        for template, content, expected_parameters, expected_mask_names in test_vectors:
            with self.subTest(template=template,
                              content=content,
                              expected_parameters=expected_parameters):
                extracted_parameters = template_miner.extract_parameters(
                    template, content, exact_matching=True)
                if expected_parameters is None:
                    self.assertIsNone(extracted_parameters)
                else:
                    self.assertIsNotNone(extracted_parameters)
                    self.assertListEqual([
                        parameter.value for parameter in extracted_parameters
                    ], expected_parameters)
                    self.assertListEqual([
                        parameter.mask_name
                        for parameter in extracted_parameters
                    ], expected_mask_names)
Esempio n. 11
0
    persistence = FilePersistence("drain3_state.bin")

elif persistence_type == "REDIS":
    from drain3.redis_persistence import RedisPersistence

    persistence = RedisPersistence(redis_host='',
                                   redis_port=25061,
                                   redis_db=0,
                                   redis_pass='',
                                   is_ssl=True,
                                   redis_key="drain3_state_key")
else:
    persistence = None

config = TemplateMinerConfig()
config.load(dirname(__file__) + "/drain3.ini")

template_miner = TemplateMiner(persistence, config)
print(f"Drain3 started with '{persistence_type}' persistence")
print(f"reading from std-in (input 'q' to finish)")
while True:
    log_line = input("> ")
    if log_line == 'q':
        break
    result = template_miner.add_log_message(log_line)
    result_json = json.dumps(result)
    print(result_json)
    params = template_miner.get_parameter_list(result["template_mined"],
                                               log_line)
    print("parameters: " + str(params))
Esempio n. 12
0
                    level=logging.INFO,
                    format='%(message)s')

in_gz_file = "SSH.tar.gz"
in_log_file = "SSH.log"
if not os.path.isfile(in_log_file):
    logger.info(f"Downloading file {in_gz_file}")
    p = subprocess.Popen(
        f"curl https://zenodo.org/record/3227177/files/{in_gz_file} --output {in_gz_file}",
        shell=True)
    p.wait()
    logger.info(f"Extracting file {in_gz_file}")
    p = subprocess.Popen(f"tar -xvzf {in_gz_file}", shell=True)
    p.wait()

config = TemplateMinerConfig()
config.load(dirname(__file__) + "/drain3.ini")
config.profiling_enabled = True
template_miner = TemplateMiner(config=config)

line_count = 0

with open(in_log_file) as f:
    lines = f.readlines()

start_time = time.time()
batch_start_time = start_time
batch_size = 10000

for line in lines:
    line = line.rstrip()
Esempio n. 13
0
    def evaluate_HDFS(self, train=True):
        config = TemplateMinerConfig()
        config.load("ailoganalyzer/drain3.ini")
        config.profiling_enabled = False
        self.template_miner = TemplateMiner(config=config)

        hdfs_log = "../../Documents/HDFS_1/HDFS.log"
        hdfs_anomaly_label = "../../Documents/HDFS_1/anomaly_label.csv"
        nb_block = 30000

        with open(hdfs_anomaly_label, "r") as f:
            hdfs_labels = {}
            for i, line in tqdm(enumerate(f), total=nb_block):
                label = line.strip().split(",")
                hdfs_labels[label[0]] = (label[1] == "Anomaly")
        keys = random.sample(list(hdfs_labels), nb_block)
        values = [hdfs_labels[k] for k in keys]
        hdfs_labels = dict(zip(keys, values))

        blk_finder_2 = re.compile(r"(blk_-?\d+)")
        with open(hdfs_log, "r") as f:
            data_dict = {key: [] for key in hdfs_labels.keys()}
            for line in tqdm(f):
                blk = re.search(blk_finder_2, line).group()
                if blk in data_dict:
                    msg = " ".join(line.strip().split()[5:])
                    result = self.template_miner.add_log_message(msg)
                    cluster_id = result["cluster_id"] - 1
                    data_dict[blk].append(cluster_id)

        abnormal = []
        normal = []
        abnormal_label = []
        normal_label = []
        abnormal_blk = []

        for blk, seq in data_dict.items():
            if len(seq) > self.window_size:
                labels = seq[self.window_size:]
                seqs = sliding_window_view(seq[:-1], self.window_size)
                if hdfs_labels[blk]:
                    abnormal.append(seqs)
                    abnormal_label.append(labels)
                    abnormal_blk.append(blk)
                else:
                    normal.append(seqs)
                    normal_label.append(labels)

        print("normal : ", len(normal))
        print("abnormal : ", len(abnormal))
        train_seq, test_seq, train_label, test_label = train_test_split(
            normal, normal_label, train_size=0.8)
        train_seq = np.concatenate(train_seq)
        train_label = np.concatenate(train_label)

        if train:
            self.set_dataLoader_training_1(train_seq, train_label)
            self.train()

        # predict

        FP = 0
        TP = 0
        mem = {}
        for seqs, labels in tqdm(zip(test_seq, test_label),
                                 total=len(test_seq)):
            for seq, label in zip(seqs, labels):
                seq_tuple = tuple(seq + [label])
                if seq_tuple in mem:
                    result = mem[seq_tuple]
                else:
                    result = self.predict_seq(seq, label)
                    mem[seq_tuple] = result
                if result:
                    FP += 1
                    break
        for seqs, labels in tqdm(zip(abnormal, abnormal_label),
                                 total=len(abnormal)):
            for seq, label in zip(seqs, labels):
                seq_tuple = tuple(seq + [label])
                if seq_tuple in mem:
                    result = mem[seq_tuple]
                else:
                    result = self.predict_seq(seq, label)
                    mem[seq_tuple] = result
                if result:
                    TP += 1
                    break
        FN = len(abnormal) - TP
        P = 100 * TP / (TP + FP)
        R = 100 * TP / (TP + FN)
        F1 = 2 * P * R / (P + R)
        print('''false positive (FP): {}, false negative (FN): {},
            Precision: {:.3f}%, Recall: {:.3f}%,
            F1-measure: {:.3f}%'''.format(FP, FN, P, R, F1))