Example #1
0
from utilities.plot_learning_curve import plot_learning_curve

start_time = time.perf_counter()

print("Importing dataset: UCI 2015 Phishing Examples")
dataset = arff.load(open('dataset_uci_2015.arff', 'rt'))
data = np.array(dataset['data'])
X = data[:, 0:30]
y = data[:, 30:31]
y = np.ravel(y, order='C')
print("Number of data points: ", data.shape[0])
print("Number of features: ", data.shape[1] - 1)

print("Training the model with Polynomial kernel")

d = 2
estimator = SVC(kernel='poly', degree=d, C=1, gamma='auto')

cv = ShuffleSplit(n_splits=30, test_size=0.2, random_state=0)

title = "Learning Curves (SVM - Polynomial kernel | d = " + str(d) + ")"

plt = plot_learning_curve(estimator,
                          title,
                          X,
                          y,
                          ylim=(0.8, 1.01),
                          cv=cv,
                          n_jobs=-1)
plt.show()
print('Time took:', time.perf_counter() - start_time, "seconds")
print("Importing dataset: UCI 2015 Phishing Examples")
dataset = arff.load(open('dataset_uci_2015.arff', 'rt'))
data = np.array(dataset['data']).astype(np.float)
X = data[:, 0:30]
y = data[:, 30:31]
y = np.ravel(y, order='C')
print("Number of data points: ", data.shape[0])
print("Number of features: ", data.shape[1] - 1)

# ------------------------------------------
model_label = "Multi-layer Perceptron (MLP) with Backpropagation"
estimator = MLPClassifier(solver='adam',
                          alpha=1e-4,
                          hidden_layer_sizes=(150, ),
                          learning_rate='adaptive',
                          max_iter=1000)
# ------------------------------------------

print("Training the model: " + model_label)
cv = ShuffleSplit(n_splits=30, test_size=0.2, random_state=0)
plt = plot_learning_curve(estimator,
                          "Learning Curves " + model_label,
                          X,
                          y,
                          ylim=(0.7, 1.01),
                          cv=cv,
                          n_jobs=-1)
plt.show()

print('Time took:', time.perf_counter() - start_time, "seconds")
from utilities.plot_learning_curve import plot_learning_curve

start_time = time.perf_counter()

print("Importing dataset: FCSIT 2018 Phishing Examples")

dataset = arff.load(open('dataset_FCSIT_2018.arff', 'rt'))
data = np.array(dataset['data'])

print("Number of data points: ", data.shape[0])
print("Number of features: ", data.shape[1] - 1)

X = data[:, 0:48]
y = data[:, 48:49]

print("Training the model with Sigmoid kernel")
estimator = SVC(kernel='sigmoid', C=1, gamma='auto')

cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0)

plt = plot_learning_curve(estimator,
                          "Learning Curves (SVM - Sigmoid kernel)",
                          X,
                          np.ravel(y, order='C'),
                          ylim=(0.1, 1.01),
                          cv=cv,
                          n_jobs=-1)
plt.show()
print('Time took:', time.perf_counter() - start_time, "seconds")
Example #4
0
import numpy as np
from sklearn.naive_bayes import BernoulliNB
from sklearn.model_selection import ShuffleSplit

from utilities.plot_learning_curve import plot_learning_curve

warnings.filterwarnings("ignore")
start_time = time.perf_counter()

print("Importing dataset: UCI-2 2016 Phishing Examples")
dataset = arff.load(open('dataset_uci_2016.arff', 'rt'))
data = np.array(dataset['data']).astype(np.float)
X = data[:, 0:9]
y = data[:, 9:10]
y = np.ravel(y, order='C')
print("Number of data points: ", data.shape[0])
print("Number of features: ", data.shape[1] - 1)

print("Training the model: Naive-Bayes (Bernoulli)")
estimator = BernoulliNB()
cv = ShuffleSplit(n_splits=30, test_size=0.2, random_state=0)
plt = plot_learning_curve(estimator,
                          "Learning Curves (Naive Bayes - Bernoulli)",
                          X,
                          y,
                          ylim=(0.8, 0.9),
                          cv=cv,
                          n_jobs=-1)
plt.show()

print('Time took:', time.perf_counter() - start_time, "seconds")
Example #5
0
from sklearn.ensemble import AdaBoostClassifier

from utilities.plot_learning_curve import plot_learning_curve

warnings.filterwarnings("ignore")
start_time = time.perf_counter()

print("Importing dataset: UCI-2 2016 Phishing Examples")
dataset = arff.load(open('dataset_uci_2016.arff', 'rt'))
data = np.array(dataset['data'])
X = data[:, 0:9]
y = data[:, 9:10]
y = np.ravel(y, order='C')
print("Number of data points: ", data.shape[0])
print("Number of features: ", data.shape[1] - 1)

# ------------------------------------------
model_label = "AdaBoost"

n_estimators = 50

estimator = AdaBoostClassifier(n_estimators=n_estimators, learning_rate=1, algorithm='SAMME')
# ------------------------------------------

print("Training the model: " + model_label)
cv = ShuffleSplit(n_splits=30, test_size=0.2, random_state=0)
plt = plot_learning_curve(estimator, "Learning Curves " + model_label + ", E=" + str(n_estimators), X, y,
                          ylim=(0.75, 0.90), cv=cv, n_jobs=-1)
plt.show()

print('Time took:', time.perf_counter() - start_time, "seconds")
Example #6
0
from utilities.plot_learning_curve import plot_learning_curve

start_time = time.perf_counter()

print("Importing dataset: UCI 2015 Phishing Examples")
dataset = arff.load(open('dataset_uci_2015.arff', 'rt'))
data = np.array(dataset['data'])
X = data[:, 0:30]
y = data[:, 30:31]
y = np.ravel(y, order='C')
print("Number of data points: ", data.shape[0])
print("Number of features: ", data.shape[1] - 1)

print("Training the model with Decision Tree")
estimator = DecisionTreeClassifier(criterion='entropy',
                                   random_state=0,
                                   max_depth=9,
                                   min_samples_leaf=2)

cv = ShuffleSplit(n_splits=30, test_size=0.2, random_state=0)

plt = plot_learning_curve(estimator,
                          "Learning Curves (CART decision Tree)",
                          X,
                          np.ravel(y, order='C'),
                          ylim=(0.8, 1.01),
                          cv=cv,
                          n_jobs=-1)
plt.show()
print('Time took:', time.perf_counter() - start_time, "seconds")
Example #7
0
dataset = arff.load(open('dataset_uci_2015.arff', 'rt'))
data = np.array(dataset['data'])
X = data[:, 0:30]
y = data[:, 30:31]
y = np.ravel(y, order='C')
print("Number of data points: ", data.shape[0])
print("Number of features: ", data.shape[1] - 1)

n_estimators = 7
max_depth = 11

print("Training the model with Random Forest")
estimator = RandomForestClassifier(criterion='entropy',
                                   n_jobs=-1,
                                   n_estimators=n_estimators,
                                   max_depth=max_depth)

cv = ShuffleSplit(n_splits=30, test_size=0.2, random_state=0)

plt = plot_learning_curve(estimator,
                          "Learning Curves (Random Forest | estimators = " +
                          str(n_estimators) + ", " + "max depth = " +
                          str(max_depth) + ")",
                          X,
                          y,
                          ylim=(0.8, 1.01),
                          cv=cv,
                          n_jobs=-1)
plt.show()
print('Time took:', time.perf_counter() - start_time, "seconds")
Example #8
0
from sklearn.model_selection import ShuffleSplit

from utilities.plot_learning_curve import plot_learning_curve

warnings.filterwarnings("ignore")
start_time = time.perf_counter()

print("Importing dataset: FCSIT 2018 Phishing Examples")

dataset = arff.load(open('dataset_FCSIT_2018.arff', 'rt'))
data = np.array(dataset['data']).astype(np.float)

print("Number of data points: ", data.shape[0])
print("Number of features: ", data.shape[1] - 1)

X = data[:, 0:48]
y = data[:, 48:49]

print("Training the model: Naive-Bayes (Bernoulli)")
estimator = BernoulliNB()
cv = ShuffleSplit(n_splits=30, test_size=0.2, random_state=0)
plt = plot_learning_curve(estimator,
                          "Learning Curves (Naive Bayes - Bernoulli)",
                          X,
                          np.ravel(y, order='C'),
                          ylim=(0.9, 0.93),
                          cv=cv,
                          n_jobs=-1)
plt.show()

print('Time took:', time.perf_counter() - start_time, "seconds")
Example #9
0
start_time = time.perf_counter()

print("Importing dataset: FCSIT 2018 Phishing Examples")

dataset = arff.load(open('dataset_FCSIT_2018.arff', 'rt'))
data = np.array(dataset['data'])

print("Number of data points: ", data.shape[0])
print("Number of features: ", data.shape[1] - 1)

X = data[:, 0:48]
y = data[:, 48:49]

print("Training the model with Polynomial kernel")
d = 1
estimator = SVC(kernel='poly', degree=d, C=1, gamma='auto')

cv = ShuffleSplit(n_splits=30, test_size=0.2, random_state=0)

title = "Learning Curves (SVM - Polynomial kernel | d = " + str(d) + ")"

plt = plot_learning_curve(estimator,
                          title,
                          X,
                          np.ravel(y, order='C'),
                          ylim=(0.9, 0.95),
                          cv=cv,
                          n_jobs=-1)
plt.show()
print('Time took:', time.perf_counter() - start_time, "seconds")
Example #10
0
data = data.astype(np.float)

print("Number of data points: ", data.shape[0])
print("Number of features: ", data.shape[1] - 1)

X = data[:, 0:48]
y = data[:, 48:49]

# nn = round(math.sqrt(data.shape[0]), 0)
# if nn % 2 == 0:
#     nn += 1

nn = 5

model_label = "kNN (N=" + str(nn) + ")"
print("Training the " + model_label)

estimator = KNeighborsClassifier(n_neighbors=nn)

cv = ShuffleSplit(n_splits=30, test_size=0.2, random_state=0)

plt = plot_learning_curve(estimator,
                          "Learning Curves (kNN (N=" + str(nn) + "))",
                          X,
                          np.ravel(y, order='C'),
                          ylim=(0.75, 1.01),
                          cv=cv,
                          n_jobs=-1)
plt.show()
print('Time took:', time.perf_counter() - start_time, "seconds")
Example #11
0
data = np.array(dataset['data'])
data = data.astype(np.float)
X = data[:, 0:30]
y = data[:, 30:31]
y = np.ravel(y, order='C')
print("Number of data points: ", data.shape[0])
print("Number of features: ", data.shape[1] - 1)

# nn = round(math.sqrt(data.shape[0]), 0)
# if nn % 2 == 0:
#     nn += 1

nn = 5

model_label = "kNN (N=" + str(nn) + ")"
print("Training the " + model_label)

estimator = KNeighborsClassifier(n_neighbors=nn)

cv = ShuffleSplit(n_splits=30, test_size=0.2, random_state=0)

plt = plot_learning_curve(estimator,
                          "Learning Curves (kNN (N=" + str(nn) + "))",
                          X,
                          y,
                          ylim=(0.75, 1.01),
                          cv=cv,
                          n_jobs=-1)
plt.show()
print('Time took:', time.perf_counter() - start_time, "seconds")