Example #1
0
    def classify(self):
        """Classify DDoS flows using Linear Discriminant Analysis.

        The data passed through to the fit() method cannot be a string
        type.

        :return: Results of the classification.
        """
        all_results = []  # Results from all fold trials
        fold_num = 1
        for train, test in self._kfold:
            print("\tTraining LDA...")
            # NOTE: I have switched the training and testing set around.
            train_array = np_array.array(map(self._data.__getitem__,
                                             test)).astype(np_float)
            train_label_array = np_array.array(
                map(self._labels.__getitem__, test)).astype(np_float)
            self._classifier.fit(train_array, train_label_array)
            print("\tTesting classifier...")
            test_array = np_array.array(map(self._data.__getitem__,
                                            train)).astype(np_float)
            test_label_array = np_array.array(
                map(self._labels.__getitem__, train)).astype(np_float)
            test_size = len(train)  # Remember the switch of sets!
            pred = self._classifier.predict(test_array)
            mislabeled = (test_label_array != pred).sum()
            tp, tn, fp, fn = rc.calculate_tpn_fpn(test_label_array, pred)
            detection_rate = rc.detection_rate(tp, fn)
            false_pos_rate = rc.false_positive_rate(tn, fp)
            all_results.append([
                fold_num, tp, tn, fp, fn, detection_rate, false_pos_rate,
                mislabeled, test_size
            ])
            fold_num += 1
        return all_results
Example #2
0
    def classify(self):
        """Classify DDoS flows using a Support Vector Machine.

        Note that SVM cannot handle too many data points for training.
        The exact number however is not currently known... Therefore use
        the StratifiedKFold object to obtain an even smaller training
        set. Alternatively, switch the training and testing sets around.
        It's an ugly hack...
        
        The data passed through to the fit() method cannot be a string
        type.

        :return: Results of the classification.
        """
        classifier = svm.SVC(
            C=self._config["C"],
            kernel=self._config["kernel"],
            degree=self._config["degree"],
            gamma=self._config["gamma"],
            coef0=self._config["coef0"],
            shrinking=self._config["shrinking"],
            probability=self._config["probability"],
            tol=self._config["tol"],
            cache_size=self._config["cache_size"],
            class_weight=self._config["class_weight"],
            verbose=self._config["verbose"],
            max_iter=self._config["max_iter"],
            decision_function_shape=self._config["decision_function_shape"],
            random_state=self._config["random_state"])
        all_results = []  # Results from all fold trials
        fold_num = 1
        for train, test in self._kfold:
            print("\tTraining SVM...")
            # NOTE: I have switched the training and testing set around.
            train_array = np_array.array(map(self._data.__getitem__,
                                             test)).astype(np_float)
            train_label_array = np_array.array(
                map(self._labels.__getitem__, test)).astype(np_float)
            classifier.fit(train_array, train_label_array)
            print("\tTesting classifier...")
            test_array = np_array.array(map(self._data.__getitem__,
                                            train)).astype(np_float)
            test_label_array = np_array.array(
                map(self._labels.__getitem__, train)).astype(np_float)
            test_size = len(train)  # Remember the switch of sets!
            pred = classifier.predict(test_array)
            mislabeled = (test_label_array != pred).sum()
            tp, tn, fp, fn = rc.calculate_tpn_fpn(test_label_array, pred)
            detection_rate = rc.detection_rate(tp, fn)
            false_pos_rate = rc.false_positive_rate(tn, fp)
            all_results.append([
                fold_num, tp, tn, fp, fn, detection_rate, false_pos_rate,
                mislabeled, test_size
            ])
            fold_num += 1
        return all_results
Example #3
0
    def classify(self):
        """Classify DDoS flows using a Random Forest.

        The data passed through to the fit() method cannot be a string
        type.

        :return: Results of the classification.
        """
        classifier = RandomForestClassifier(
            n_estimators=self._config["n_estimators"],
            criterion=self._config["criterion"],
            max_depth=self._config["max_depth"],
            min_samples_split=self._config["min_samples_split"],
            min_samples_leaf=self._config["min_samples_leaf"],
            min_weight_fraction_leaf=self._config["min_weight_fraction_leaf"],
            max_features=self._config["max_features"],
            max_leaf_nodes=self._config["max_leaf_nodes"],
            bootstrap=self._config["bootstrap"],
            oob_score=self._config["oob_score"],
            n_jobs=self._config["n_jobs"],
            random_state=self._config["random_state"],
            verbose=self._config["verbose"],
            warm_start=self._config["warm_start"],
            class_weight=self._config["class_weight"])
        all_results = []  # Results from all fold trials
        fold_num = 1
        for train, test in self._kfold:
            print("\tTraining Random Forest...")
            # NOTE: I have switched the training and testing set around.
            train_array = np_array.array(map(self._data.__getitem__,
                                             test)).astype(np_float)
            train_label_array = np_array.array(
                map(self._labels.__getitem__, test)).astype(np_float)
            classifier.fit(train_array, train_label_array)
            print("\tTesting classifier...")
            test_array = np_array.array(map(self._data.__getitem__,
                                            train)).astype(np_float)
            test_label_array = np_array.array(
                map(self._labels.__getitem__, train)).astype(np_float)
            test_size = len(train)  # Remember the switch of sets!
            pred = classifier.predict(test_array)
            mislabeled = (test_label_array != pred).sum()
            tp, tn, fp, fn = rc.calculate_tpn_fpn(test_label_array, pred)
            detection_rate = rc.detection_rate(tp, fn)
            false_pos_rate = rc.false_positive_rate(tn, fp)
            all_results.append([
                fold_num, tp, tn, fp, fn, detection_rate, false_pos_rate,
                mislabeled, test_size
            ])
            fold_num += 1
        return all_results
Example #4
0
    def classify(self):
        """Classify DDoS flows using K-Nearest Neighbours.

        The data passed through to the fit() method cannot be a string
        type.

        :return: Results of the classification.
        """
        classifier = KNeighborsClassifier(
            n_neighbors=self._config["n_neighbors"],
            weights=self._config["weights"],
            algorithm=self._config["algorithm"],
            leaf_size=self._config["leaf_size"],
            metric=self._config["metric"],
            p=self._config["p"],
            metric_params=self._config["metric_params"],
            n_jobs=self._config["n_jobs"])
        all_results = []  # Results from all fold trials
        fold_num = 1
        for train, test in self._kfold:
            print("\tTraining K-Nearest Neighbours...")
            # NOTE: I have switched the training and testing set around.
            train_array = np_array.array(map(self._data.__getitem__,
                                             test)).astype(np_float)
            train_label_array = np_array.array(
                map(self._labels.__getitem__, test)).astype(np_float)
            classifier.fit(train_array, train_label_array)
            print("\tTesting classifier...")
            test_array = np_array.array(map(self._data.__getitem__,
                                            train)).astype(np_float)
            test_label_array = np_array.array(
                map(self._labels.__getitem__, train)).astype(np_float)
            test_size = len(train)  # Remember the switch of sets!
            pred = classifier.predict(test_array)
            mislabeled = (test_label_array != pred).sum()
            tp, tn, fp, fn = rc.calculate_tpn_fpn(test_label_array, pred)
            detection_rate = rc.detection_rate(tp, fn)
            false_pos_rate = rc.false_positive_rate(tn, fp)
            all_results.append([
                fold_num, tp, tn, fp, fn, detection_rate, false_pos_rate,
                mislabeled, test_size
            ])
            fold_num += 1
        return all_results
    def classify(self):
        """Classify DDoS flows using a Support Vector Machine.

        Note that SVM cannot handle too many data points for training.
        The exact number however is not currently known... Therefore use
        the StratifiedKFold object to obtain an even smaller training
        set. Alternatively, switch the training and testing sets around.
        It's an ugly hack...
        
        The data passed through to the fit() method cannot be a string
        type.

        :return: Results of the classification.
        """
        all_results = []  # Results from all fold trials
        fold_num = 1
        for train, test in self._kfold:
            print("\tTraining SVM...")
            # NOTE: I have switched the training and testing set around.
            train_array = np_array.array(map(self._data.__getitem__,
                                             test)).astype(np_float)
            train_label_array = np_array.array(
                map(self._labels.__getitem__, test)).astype(np_float)
            self._classifier.fit(train_array, train_label_array)
            print("\tTesting classifier...")
            test_array = np_array.array(map(self._data.__getitem__,
                                            train)).astype(np_float)
            test_label_array = np_array.array(
                map(self._labels.__getitem__, train)).astype(np_float)
            test_size = len(train)  # Remember the switch of sets!
            pred = self._classifier.predict(test_array)
            mislabeled = (test_label_array != pred).sum()
            tp, tn, fp, fn = rc.calculate_tpn_fpn(test_label_array, pred)
            detection_rate = rc.detection_rate(tp, fn)
            false_pos_rate = rc.false_positive_rate(tn, fp)
            all_results.append([
                fold_num, tp, tn, fp, fn, detection_rate, false_pos_rate,
                mislabeled, test_size
            ])
            fold_num += 1
        return all_results