Exemple #1
0
from FlowRecoder import get_data, gen_json
import json
import pandas as pd
import numpy as np
from sklearn.feature_extraction import DictVectorizer
import matplotlib.pyplot as plt

data = get_data("../pcap_files/malware/http-flood.pcap")
data = json.loads(gen_json(data))

labels_y = [x['src'] for x in data.values()]

measurements = [x for x in data.values()]
vec = DictVectorizer()
X = vec.fit_transform(measurements).toarray()

from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=2).fit(X)
labels = kmeans.labels_
centers = kmeans.cluster_centers_

from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import plot_confusion_matrix
from sklearn import svm, datasets
from sklearn.svm import SVC

class_names = labels
X_train, X_test, y_train, y_test = train_test_split(X, labels, random_state=0)
classifier = svm.SVC(kernel='linear', C=1).fit(X_train, y_train)
from FlowRecoder import get_data, gen_json
import pandas as pd
import numpy as np
import json
from sklearn.feature_extraction import DictVectorizer

data = get_data("pcap_files/example.pcap")
data = json.loads(gen_json(data))

measurements = [x for x in data.values()]
vec = DictVectorizer()
X = vec.fit_transform(measurements).toarray()

# DBSCAN
from sklearn.cluster import DBSCAN
from sklearn.datasets import make_blobs
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from sklearn import metrics

eps = 4
min_samples = 2
db = DBSCAN(eps=eps, min_samples=min_samples, metric='euclidean').fit(X)

core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True

print("Number of Data : {0}\n".format(len(db.labels_)))
print("params : eps = {0}, min_samples = {1}\n".format(eps, min_samples))
print('※ cluster labels : -1 is noise point\n')
print('> OUTPUT')
from FlowRecoder import get_data, gen_json
import pandas as pd
import csv
import json

#fInput = "malware/http-flood.pcap"
fInput = "example.pcap"

data = get_data("pcap_files/" + fInput)
data = gen_json(data)

attrs = [
    'proto_name', 'src', 'sport', 'dst', 'dport', 'proto', 'push_flag_ratio',
    'average_len', 'average_payload_len', 'pkt_count',
    'flow_average_inter_arrival_time'
]

data = json.loads(data)

datas = list()
for x in dict(data).values():
    datas.append([xi for xi in x.values()])

df = pd.DataFrame(datas)
df.to_csv('output/' + (fInput.split('/')[-1]).split('.')[0] + ".csv",
          index=False,
          header=attrs)