def classify(self): """Classify DDoS flows using Linear Discriminant Analysis. The data passed through to the fit() method cannot be a string type. :return: Results of the classification. """ all_results = [] # Results from all fold trials fold_num = 1 for train, test in self._kfold: print("\tTraining LDA...") # NOTE: I have switched the training and testing set around. train_array = np_array.array(map(self._data.__getitem__, test)).astype(np_float) train_label_array = np_array.array( map(self._labels.__getitem__, test)).astype(np_float) self._classifier.fit(train_array, train_label_array) print("\tTesting classifier...") test_array = np_array.array(map(self._data.__getitem__, train)).astype(np_float) test_label_array = np_array.array( map(self._labels.__getitem__, train)).astype(np_float) test_size = len(train) # Remember the switch of sets! pred = self._classifier.predict(test_array) mislabeled = (test_label_array != pred).sum() tp, tn, fp, fn = rc.calculate_tpn_fpn(test_label_array, pred) detection_rate = rc.detection_rate(tp, fn) false_pos_rate = rc.false_positive_rate(tn, fp) all_results.append([ fold_num, tp, tn, fp, fn, detection_rate, false_pos_rate, mislabeled, test_size ]) fold_num += 1 return all_results
def classify(self): """Classify DDoS flows using a Support Vector Machine. Note that SVM cannot handle too many data points for training. The exact number however is not currently known... Therefore use the StratifiedKFold object to obtain an even smaller training set. Alternatively, switch the training and testing sets around. It's an ugly hack... The data passed through to the fit() method cannot be a string type. :return: Results of the classification. """ classifier = svm.SVC( C=self._config["C"], kernel=self._config["kernel"], degree=self._config["degree"], gamma=self._config["gamma"], coef0=self._config["coef0"], shrinking=self._config["shrinking"], probability=self._config["probability"], tol=self._config["tol"], cache_size=self._config["cache_size"], class_weight=self._config["class_weight"], verbose=self._config["verbose"], max_iter=self._config["max_iter"], decision_function_shape=self._config["decision_function_shape"], random_state=self._config["random_state"]) all_results = [] # Results from all fold trials fold_num = 1 for train, test in self._kfold: print("\tTraining SVM...") # NOTE: I have switched the training and testing set around. train_array = np_array.array(map(self._data.__getitem__, test)).astype(np_float) train_label_array = np_array.array( map(self._labels.__getitem__, test)).astype(np_float) classifier.fit(train_array, train_label_array) print("\tTesting classifier...") test_array = np_array.array(map(self._data.__getitem__, train)).astype(np_float) test_label_array = np_array.array( map(self._labels.__getitem__, train)).astype(np_float) test_size = len(train) # Remember the switch of sets! pred = classifier.predict(test_array) mislabeled = (test_label_array != pred).sum() tp, tn, fp, fn = rc.calculate_tpn_fpn(test_label_array, pred) detection_rate = rc.detection_rate(tp, fn) false_pos_rate = rc.false_positive_rate(tn, fp) all_results.append([ fold_num, tp, tn, fp, fn, detection_rate, false_pos_rate, mislabeled, test_size ]) fold_num += 1 return all_results
def classify(self): """Classify DDoS flows using a Random Forest. The data passed through to the fit() method cannot be a string type. :return: Results of the classification. """ classifier = RandomForestClassifier( n_estimators=self._config["n_estimators"], criterion=self._config["criterion"], max_depth=self._config["max_depth"], min_samples_split=self._config["min_samples_split"], min_samples_leaf=self._config["min_samples_leaf"], min_weight_fraction_leaf=self._config["min_weight_fraction_leaf"], max_features=self._config["max_features"], max_leaf_nodes=self._config["max_leaf_nodes"], bootstrap=self._config["bootstrap"], oob_score=self._config["oob_score"], n_jobs=self._config["n_jobs"], random_state=self._config["random_state"], verbose=self._config["verbose"], warm_start=self._config["warm_start"], class_weight=self._config["class_weight"]) all_results = [] # Results from all fold trials fold_num = 1 for train, test in self._kfold: print("\tTraining Random Forest...") # NOTE: I have switched the training and testing set around. train_array = np_array.array(map(self._data.__getitem__, test)).astype(np_float) train_label_array = np_array.array( map(self._labels.__getitem__, test)).astype(np_float) classifier.fit(train_array, train_label_array) print("\tTesting classifier...") test_array = np_array.array(map(self._data.__getitem__, train)).astype(np_float) test_label_array = np_array.array( map(self._labels.__getitem__, train)).astype(np_float) test_size = len(train) # Remember the switch of sets! pred = classifier.predict(test_array) mislabeled = (test_label_array != pred).sum() tp, tn, fp, fn = rc.calculate_tpn_fpn(test_label_array, pred) detection_rate = rc.detection_rate(tp, fn) false_pos_rate = rc.false_positive_rate(tn, fp) all_results.append([ fold_num, tp, tn, fp, fn, detection_rate, false_pos_rate, mislabeled, test_size ]) fold_num += 1 return all_results
def classify(self): """Classify DDoS flows using K-Nearest Neighbours. The data passed through to the fit() method cannot be a string type. :return: Results of the classification. """ classifier = KNeighborsClassifier( n_neighbors=self._config["n_neighbors"], weights=self._config["weights"], algorithm=self._config["algorithm"], leaf_size=self._config["leaf_size"], metric=self._config["metric"], p=self._config["p"], metric_params=self._config["metric_params"], n_jobs=self._config["n_jobs"]) all_results = [] # Results from all fold trials fold_num = 1 for train, test in self._kfold: print("\tTraining K-Nearest Neighbours...") # NOTE: I have switched the training and testing set around. train_array = np_array.array(map(self._data.__getitem__, test)).astype(np_float) train_label_array = np_array.array( map(self._labels.__getitem__, test)).astype(np_float) classifier.fit(train_array, train_label_array) print("\tTesting classifier...") test_array = np_array.array(map(self._data.__getitem__, train)).astype(np_float) test_label_array = np_array.array( map(self._labels.__getitem__, train)).astype(np_float) test_size = len(train) # Remember the switch of sets! pred = classifier.predict(test_array) mislabeled = (test_label_array != pred).sum() tp, tn, fp, fn = rc.calculate_tpn_fpn(test_label_array, pred) detection_rate = rc.detection_rate(tp, fn) false_pos_rate = rc.false_positive_rate(tn, fp) all_results.append([ fold_num, tp, tn, fp, fn, detection_rate, false_pos_rate, mislabeled, test_size ]) fold_num += 1 return all_results
def classify(self): """Classify DDoS flows using a Support Vector Machine. Note that SVM cannot handle too many data points for training. The exact number however is not currently known... Therefore use the StratifiedKFold object to obtain an even smaller training set. Alternatively, switch the training and testing sets around. It's an ugly hack... The data passed through to the fit() method cannot be a string type. :return: Results of the classification. """ all_results = [] # Results from all fold trials fold_num = 1 for train, test in self._kfold: print("\tTraining SVM...") # NOTE: I have switched the training and testing set around. train_array = np_array.array(map(self._data.__getitem__, test)).astype(np_float) train_label_array = np_array.array( map(self._labels.__getitem__, test)).astype(np_float) self._classifier.fit(train_array, train_label_array) print("\tTesting classifier...") test_array = np_array.array(map(self._data.__getitem__, train)).astype(np_float) test_label_array = np_array.array( map(self._labels.__getitem__, train)).astype(np_float) test_size = len(train) # Remember the switch of sets! pred = self._classifier.predict(test_array) mislabeled = (test_label_array != pred).sum() tp, tn, fp, fn = rc.calculate_tpn_fpn(test_label_array, pred) detection_rate = rc.detection_rate(tp, fn) false_pos_rate = rc.false_positive_rate(tn, fp) all_results.append([ fold_num, tp, tn, fp, fn, detection_rate, false_pos_rate, mislabeled, test_size ]) fold_num += 1 return all_results