Beispiel #1
0
 def setUp(self):
     self.parser = Dataset.parser_message
     self.path = "fake_path"
     self.path_list = ["fake_path", "fake_path", "fake_path"]
     self.data_test = "1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123\n1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123\n1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123"
     self.line = "1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123"
     self.pattern = Pattern(cardinality=6,
                            pattern_word=["session", "User"],
                            pattern_index=[1, 4])
     self.pattern.id = 1
     self.pattern2 = Pattern(cardinality=6,
                             pattern_word=["Home", "Other"],
                             pattern_index=[1, 4])
     self.pattern2.id = 2
     self.pattern3 = Pattern(cardinality=6,
                             pattern_word=["Cat", "Users"],
                             pattern_index=[1, 4])
     self.pattern3.id = 3
     self.dict_patterns = {
         7: {
             2: [self.pattern, self.pattern2, self.pattern3]
         }
     }
     self.dict_patterns_2 = {
         7: {
             2: [self.pattern, self.pattern2, self.pattern3],
             1: [self.pattern, self.pattern2, self.pattern3]
         }
     }
Beispiel #2
0
    def setUp(self):
        self.model = LSTMLayer(num_classes=5)
        self.default_pattern = Pattern(0, [], [])
        self.list_model = {
            1: self.model.state_dict(),
            2: self.model.state_dict(),
            3: self.model.state_dict()
        }
        default_pattern1 = Pattern(0, [], [])
        default_pattern1.id = 1
        default_pattern2 = Pattern(0, [], [])
        default_pattern2.id = 2
        default_pattern3 = Pattern(0, [], [])
        default_pattern3.id = 3
        m = Mock()
        m.side_effect = [default_pattern1, default_pattern2, default_pattern3
                         ] * 30
        # Mock(return_value=self.default_pattern)
        logflow.logsparser.Journal.Journal.find_pattern = m
        #mock_get_pattern.return_value = 1
        read_data = pickle.dumps({
            'word2vec': {
                "1": np.asarray([1] * 20),
                "2": np.asarray([2] * 20),
                "3": np.asarray([3] * 20),
                "4": [4] * 20,
                "5": [5] * 20,
                "6": [6] * 20,
                "7": [7] * 20
            },
            'counter_patterns': {
                1: 100,
                2: 100,
                3: 100,
                4: 100,
                6: 1000,
                5: 1000
            },
            "LSTM": {
                3: self.model.state_dict()
            },
            "dict_patterns": {}
        })

        mockOpen = mock_open(read_data=read_data)
        with patch('builtins.open', mockOpen):
            self.dataset = Dataset(path_model="/",
                                   path_data="/",
                                   name_model="/")
            self.dataset.load_files()

        self.dataset.LSTM = self.list_model
        self.dataset.list_logs = []
        for i in range(30):
            self.dataset.list_logs.append(
                Log("1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123",
                    index_line=i))
Beispiel #3
0
    def find_pattern(message : List[str], dict_patterns : dict) -> Pattern:
        """Find the pattern associated to a log.
        
        The best pattern is the pattern with the maximum common words with the line.
        
        Args:
            message (List[str]): list of the words of the message part of the log.
            dict_patterns (dict): the dict of patterns.

        Returns:
            Pattern: the pattern associated to the line.
        """
        # Create a default pattern to compare it to the other ones to find the best pattern.
        best_pattern = Pattern(0, [], [])
        # Get the patterns with the same cardinality as the line. The cardinality of a pattern is the cardinality used for finding this pattern and not this number of words.
        dict_patterns_size = dict_patterns[len(message)]
        # Get the descriptors
        # Begin by the bigger pattern to save time.
        for size_pattern in sorted(dict_patterns_size.keys(), reverse=True):
            for pattern in dict_patterns_size[size_pattern]:
                nb_word_match = 0
                # Compute the number of common words
                for i in range(len(pattern)):
                    if pattern.pattern_word[i] == message[pattern.pattern_index[i]]:
                        nb_word_match += 1
                # If we have more common words, then we have a new best pattern
                if nb_word_match > len(best_pattern):
                    best_pattern = pattern
            # If new size if lower than the size of the actual best pattern, stop the detection. 
            if len(best_pattern) > size_pattern:
                break
        return best_pattern
Beispiel #4
0
 def detect_patterns(self):
     """Detect the pattern based on the maximum number of similar words.
     """
     for entry in self.counter_general:
         comparison_vector = [0] * self.cardinality
         position = 0
         entry = list(entry)
         # Once the dict_words is created, we get the number of entries with the same word by only one access to the dictionnary.
         for word in entry:
             comparison_vector[position] += self.dict_words[position][word]
             position += 1
         # We take the best subset of the similar words, i.e [10,10,2,2,2] keeps 2 as best subset.
         best_subset_words_number = Counter(comparison_vector).most_common(
             1)[0][0]  # [(value, nb_value)]
         # We compute the index of the words kept
         best_subset_words_index = [
             i for i, e in enumerate(comparison_vector)
             if e == best_subset_words_number
         ]
         # And the words theirself.
         best_subset_words_value = [
             entry[i] for i in best_subset_words_index
         ]
         self.list_pattern.append(
             Pattern(self.cardinality, best_subset_words_value,
                     best_subset_words_index))
     self.list_pattern = list(set(self.list_pattern))
Beispiel #5
0
    def test_workflow_wrong_first_log(self):
        read_data = pickle.dumps({
            'word2vec': {
                "1": np.asarray([1] * 20),
                "2": np.asarray([2] * 20),
                "3": np.asarray([3] * 20),
                "4": [4] * 20,
                "5": [5] * 20,
                "6": [6] * 20,
                "7": [7] * 20
            },
            'counter_patterns': {
                1: 100,
                2: 100,
                3: 100,
                4: 100,
                6: 1000,
                5: 1000
            },
            "LSTM": {
                3: self.model.state_dict()
            },
            "dict_patterns": {}
        })
        mockOpen = mock_open(read_data=read_data)
        with patch('builtins.open', mockOpen):
            dataset_local = Dataset(path_model="/",
                                    path_data="/",
                                    name_model="/")
            dataset_local.load_files()

        dataset_local.LSTM = self.list_model
        dataset_local.list_logs = []
        default_pattern_local = Pattern(0, [], [])
        default_pattern_local.id = -1
        m = Mock()
        m.side_effect = [default_pattern_local] * 30
        logflow.logsparser.Journal.Journal.find_pattern = m
        for i in range(30):
            log = Log("-1", index_line=i)
            log.pattern = default_pattern_local
            dataset_local.list_logs.append(log)
        workflow = Workflow(dataset_local)
        #with self.assertRaises(Exception):
        tree = workflow.detect_workflow(10)
        self.assertEqual(tree, "-1")
Beispiel #6
0
    def test_workflow_working_with_child(self):
        m = Mock()
        m.side_effect = [[{
            "log": 25,
            "weigth": 10
        }, {
            "log": 15,
            "weigth": 10
        }], [{
            "log": 25,
            "weigth": 10
        }, {
            "log": 15,
            "weigth": 10
        }], [{
            "log": 25,
            "weigth": 10
        }, {
            "log": 15,
            "weigth": 10
        }], [{
            "log": 25,
            "weigth": 10
        }, {
            "log": 15,
            "weigth": 10
        }], [{
            "log": 25,
            "weigth": 10
        }, {
            "log": 15,
            "weigth": 10
        }]]
        logflow.treebuilding.Inference.Inference.test = m

        default_pattern1 = Pattern(0, [], [])
        default_pattern1.id = 1
        default_pattern2 = Pattern(0, [], [])
        default_pattern2.id = 2
        default_pattern3 = Pattern(0, [], [])
        default_pattern3.id = 3
        m_pattern = Mock()
        m_pattern.side_effect = [
            default_pattern1, default_pattern2, default_pattern3
        ] * 3000
        logflow.logsparser.Journal.Journal.find_pattern = m_pattern

        workflow = Workflow(self.dataset)
        workflow.detect_workflow(25)
Beispiel #7
0
 def setUp(self):
     self.parser = Dataset.parser_message
     self.path = "fake_path"
     self.data_test = "1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123\n1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123\n1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123"
     self.data_test_2 = "1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123\n1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123\n1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1"
     self.list_files = ["fake_path1", "fake_path2", "fake_path3"]
     self.pattern = Pattern(cardinality=6,
                            pattern_word=["session", "User"],
                            pattern_index=[1, 4])
     self.dict_patterns = {7: {2: [self.pattern]}}
Beispiel #8
0
 def setUp(self):
     self.data_test = "2 2 2"
     self.path_data = "./"
     self.path_model = "./"
     self.name_dataset = "test"
     self.pattern = Pattern(cardinality=6,
                            pattern_word=["session", "User"],
                            pattern_index=[1, 4])
     self.pattern.id = 2
     self.dict_patterns = {7: {2: [self.pattern]}}
Beispiel #9
0
    def setUp(self):
        self.log = Log(
            "1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123"
        )
        default_pattern = Pattern(0, ["home"], [])
        default_pattern.id = 0
        self.log.pattern = default_pattern
        self.log.cardinality = 3

        self.log2 = Log(
            "1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 1234"
        )
        default_pattern = Pattern(0, ["house"], [])
        default_pattern.id = 1
        self.log2.pattern = default_pattern
        self.log2.cardinality = 3

        self.log3 = Log(
            "1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 1235"
        )
        default_pattern = Pattern(0, ["HouseCat"], [])
        default_pattern.id = 2
        self.log3.pattern = default_pattern
        self.log3.cardinality = 3

        self.log5 = Log(
            "1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 1235"
        )
        default_pattern = Pattern(0, ["HouseCat"], [])
        default_pattern.id = 2
        self.log5.pattern = default_pattern
        self.log5.cardinality = 3

        self.log4 = Log(
            "1530388399a 2018 Jun 30 21:53:19 m21205 authpriv info")
        default_pattern = Pattern(0, [], [])
        default_pattern.id = 2
        self.log4.pattern = default_pattern

        self.log5 = Log("-1")