def setUp(self): self.parser = Dataset.parser_message self.path = "fake_path" self.path_list = ["fake_path", "fake_path", "fake_path"] self.data_test = "1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123\n1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123\n1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123" self.line = "1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123" self.pattern = Pattern(cardinality=6, pattern_word=["session", "User"], pattern_index=[1, 4]) self.pattern.id = 1 self.pattern2 = Pattern(cardinality=6, pattern_word=["Home", "Other"], pattern_index=[1, 4]) self.pattern2.id = 2 self.pattern3 = Pattern(cardinality=6, pattern_word=["Cat", "Users"], pattern_index=[1, 4]) self.pattern3.id = 3 self.dict_patterns = { 7: { 2: [self.pattern, self.pattern2, self.pattern3] } } self.dict_patterns_2 = { 7: { 2: [self.pattern, self.pattern2, self.pattern3], 1: [self.pattern, self.pattern2, self.pattern3] } }
def setUp(self): self.model = LSTMLayer(num_classes=5) self.default_pattern = Pattern(0, [], []) self.list_model = { 1: self.model.state_dict(), 2: self.model.state_dict(), 3: self.model.state_dict() } default_pattern1 = Pattern(0, [], []) default_pattern1.id = 1 default_pattern2 = Pattern(0, [], []) default_pattern2.id = 2 default_pattern3 = Pattern(0, [], []) default_pattern3.id = 3 m = Mock() m.side_effect = [default_pattern1, default_pattern2, default_pattern3 ] * 30 # Mock(return_value=self.default_pattern) logflow.logsparser.Journal.Journal.find_pattern = m #mock_get_pattern.return_value = 1 read_data = pickle.dumps({ 'word2vec': { "1": np.asarray([1] * 20), "2": np.asarray([2] * 20), "3": np.asarray([3] * 20), "4": [4] * 20, "5": [5] * 20, "6": [6] * 20, "7": [7] * 20 }, 'counter_patterns': { 1: 100, 2: 100, 3: 100, 4: 100, 6: 1000, 5: 1000 }, "LSTM": { 3: self.model.state_dict() }, "dict_patterns": {} }) mockOpen = mock_open(read_data=read_data) with patch('builtins.open', mockOpen): self.dataset = Dataset(path_model="/", path_data="/", name_model="/") self.dataset.load_files() self.dataset.LSTM = self.list_model self.dataset.list_logs = [] for i in range(30): self.dataset.list_logs.append( Log("1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123", index_line=i))
def find_pattern(message : List[str], dict_patterns : dict) -> Pattern: """Find the pattern associated to a log. The best pattern is the pattern with the maximum common words with the line. Args: message (List[str]): list of the words of the message part of the log. dict_patterns (dict): the dict of patterns. Returns: Pattern: the pattern associated to the line. """ # Create a default pattern to compare it to the other ones to find the best pattern. best_pattern = Pattern(0, [], []) # Get the patterns with the same cardinality as the line. The cardinality of a pattern is the cardinality used for finding this pattern and not this number of words. dict_patterns_size = dict_patterns[len(message)] # Get the descriptors # Begin by the bigger pattern to save time. for size_pattern in sorted(dict_patterns_size.keys(), reverse=True): for pattern in dict_patterns_size[size_pattern]: nb_word_match = 0 # Compute the number of common words for i in range(len(pattern)): if pattern.pattern_word[i] == message[pattern.pattern_index[i]]: nb_word_match += 1 # If we have more common words, then we have a new best pattern if nb_word_match > len(best_pattern): best_pattern = pattern # If new size if lower than the size of the actual best pattern, stop the detection. if len(best_pattern) > size_pattern: break return best_pattern
def detect_patterns(self): """Detect the pattern based on the maximum number of similar words. """ for entry in self.counter_general: comparison_vector = [0] * self.cardinality position = 0 entry = list(entry) # Once the dict_words is created, we get the number of entries with the same word by only one access to the dictionnary. for word in entry: comparison_vector[position] += self.dict_words[position][word] position += 1 # We take the best subset of the similar words, i.e [10,10,2,2,2] keeps 2 as best subset. best_subset_words_number = Counter(comparison_vector).most_common( 1)[0][0] # [(value, nb_value)] # We compute the index of the words kept best_subset_words_index = [ i for i, e in enumerate(comparison_vector) if e == best_subset_words_number ] # And the words theirself. best_subset_words_value = [ entry[i] for i in best_subset_words_index ] self.list_pattern.append( Pattern(self.cardinality, best_subset_words_value, best_subset_words_index)) self.list_pattern = list(set(self.list_pattern))
def test_workflow_wrong_first_log(self): read_data = pickle.dumps({ 'word2vec': { "1": np.asarray([1] * 20), "2": np.asarray([2] * 20), "3": np.asarray([3] * 20), "4": [4] * 20, "5": [5] * 20, "6": [6] * 20, "7": [7] * 20 }, 'counter_patterns': { 1: 100, 2: 100, 3: 100, 4: 100, 6: 1000, 5: 1000 }, "LSTM": { 3: self.model.state_dict() }, "dict_patterns": {} }) mockOpen = mock_open(read_data=read_data) with patch('builtins.open', mockOpen): dataset_local = Dataset(path_model="/", path_data="/", name_model="/") dataset_local.load_files() dataset_local.LSTM = self.list_model dataset_local.list_logs = [] default_pattern_local = Pattern(0, [], []) default_pattern_local.id = -1 m = Mock() m.side_effect = [default_pattern_local] * 30 logflow.logsparser.Journal.Journal.find_pattern = m for i in range(30): log = Log("-1", index_line=i) log.pattern = default_pattern_local dataset_local.list_logs.append(log) workflow = Workflow(dataset_local) #with self.assertRaises(Exception): tree = workflow.detect_workflow(10) self.assertEqual(tree, "-1")
def test_workflow_working_with_child(self): m = Mock() m.side_effect = [[{ "log": 25, "weigth": 10 }, { "log": 15, "weigth": 10 }], [{ "log": 25, "weigth": 10 }, { "log": 15, "weigth": 10 }], [{ "log": 25, "weigth": 10 }, { "log": 15, "weigth": 10 }], [{ "log": 25, "weigth": 10 }, { "log": 15, "weigth": 10 }], [{ "log": 25, "weigth": 10 }, { "log": 15, "weigth": 10 }]] logflow.treebuilding.Inference.Inference.test = m default_pattern1 = Pattern(0, [], []) default_pattern1.id = 1 default_pattern2 = Pattern(0, [], []) default_pattern2.id = 2 default_pattern3 = Pattern(0, [], []) default_pattern3.id = 3 m_pattern = Mock() m_pattern.side_effect = [ default_pattern1, default_pattern2, default_pattern3 ] * 3000 logflow.logsparser.Journal.Journal.find_pattern = m_pattern workflow = Workflow(self.dataset) workflow.detect_workflow(25)
def setUp(self): self.parser = Dataset.parser_message self.path = "fake_path" self.data_test = "1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123\n1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123\n1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123" self.data_test_2 = "1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123\n1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123\n1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1" self.list_files = ["fake_path1", "fake_path2", "fake_path3"] self.pattern = Pattern(cardinality=6, pattern_word=["session", "User"], pattern_index=[1, 4]) self.dict_patterns = {7: {2: [self.pattern]}}
def setUp(self): self.data_test = "2 2 2" self.path_data = "./" self.path_model = "./" self.name_dataset = "test" self.pattern = Pattern(cardinality=6, pattern_word=["session", "User"], pattern_index=[1, 4]) self.pattern.id = 2 self.dict_patterns = {7: {2: [self.pattern]}}
def setUp(self): self.log = Log( "1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123" ) default_pattern = Pattern(0, ["home"], []) default_pattern.id = 0 self.log.pattern = default_pattern self.log.cardinality = 3 self.log2 = Log( "1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 1234" ) default_pattern = Pattern(0, ["house"], []) default_pattern.id = 1 self.log2.pattern = default_pattern self.log2.cardinality = 3 self.log3 = Log( "1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 1235" ) default_pattern = Pattern(0, ["HouseCat"], []) default_pattern.id = 2 self.log3.pattern = default_pattern self.log3.cardinality = 3 self.log5 = Log( "1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 1235" ) default_pattern = Pattern(0, ["HouseCat"], []) default_pattern.id = 2 self.log5.pattern = default_pattern self.log5.cardinality = 3 self.log4 = Log( "1530388399a 2018 Jun 30 21:53:19 m21205 authpriv info") default_pattern = Pattern(0, [], []) default_pattern.id = 2 self.log4.pattern = default_pattern self.log5 = Log("-1")