from FlowRecoder import get_data, gen_json import json import pandas as pd import numpy as np from sklearn.feature_extraction import DictVectorizer import matplotlib.pyplot as plt data = get_data("../pcap_files/malware/http-flood.pcap") data = json.loads(gen_json(data)) labels_y = [x['src'] for x in data.values()] measurements = [x for x in data.values()] vec = DictVectorizer() X = vec.fit_transform(measurements).toarray() from sklearn.cluster import KMeans kmeans = KMeans(n_clusters=2).fit(X) labels = kmeans.labels_ centers = kmeans.cluster_centers_ from sklearn.metrics import confusion_matrix from sklearn.model_selection import train_test_split from sklearn.metrics import plot_confusion_matrix from sklearn import svm, datasets from sklearn.svm import SVC class_names = labels X_train, X_test, y_train, y_test = train_test_split(X, labels, random_state=0) classifier = svm.SVC(kernel='linear', C=1).fit(X_train, y_train)
from FlowRecoder import get_data, gen_json import pandas as pd import numpy as np import json from sklearn.feature_extraction import DictVectorizer data = get_data("pcap_files/example.pcap") data = json.loads(gen_json(data)) measurements = [x for x in data.values()] vec = DictVectorizer() X = vec.fit_transform(measurements).toarray() # DBSCAN from sklearn.cluster import DBSCAN from sklearn.datasets import make_blobs from sklearn.preprocessing import StandardScaler import matplotlib.pyplot as plt from sklearn import metrics eps = 4 min_samples = 2 db = DBSCAN(eps=eps, min_samples=min_samples, metric='euclidean').fit(X) core_samples_mask = np.zeros_like(db.labels_, dtype=bool) core_samples_mask[db.core_sample_indices_] = True print("Number of Data : {0}\n".format(len(db.labels_))) print("params : eps = {0}, min_samples = {1}\n".format(eps, min_samples)) print('※ cluster labels : -1 is noise point\n') print('> OUTPUT')
from FlowRecoder import get_data, gen_json import pandas as pd import csv import json #fInput = "malware/http-flood.pcap" fInput = "example.pcap" data = get_data("pcap_files/" + fInput) data = gen_json(data) attrs = [ 'proto_name', 'src', 'sport', 'dst', 'dport', 'proto', 'push_flag_ratio', 'average_len', 'average_payload_len', 'pkt_count', 'flow_average_inter_arrival_time' ] data = json.loads(data) datas = list() for x in dict(data).values(): datas.append([xi for xi in x.values()]) df = pd.DataFrame(datas) df.to_csv('output/' + (fInput.split('/')[-1]).split('.')[0] + ".csv", index=False, header=attrs)