"QDA"
]

classifiers = [
    KNeighborsClassifier(3),
    SVC(gamma=2, C=1),
    #GaussianProcessClassifier(1.0 * RBF(1.0)),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1, max_iter=1000),
    AdaBoostClassifier(),
    GaussianNB(),
    QuadraticDiscriminantAnalysis()
]

val, valspam = importcsv()

train, test = train_test_split(val, test_size=0.5)
spamtrain, spamtest = train_test_split(valspam, test_size=0.5)

scores = list()

# iterate over classifiers
for name, clf in zip(names, classifiers):
    clf.fit(train, spamtrain)
    score = clf.score(test, spamtest)
    print(name)
    print(score)
    scores.append(score)

#ajout plot
from sklearn.linear_model import lasso_path, enet_path
from importation_pandas import importcsv
from sklearn.model_selection import train_test_split

import numpy as np
import matplotlib.pyplot as plt

from itertools import cycle

from sklearn.metrics import confusion_matrix

setX, setY = importcsv()
X_train, X_test, y_train, y_test = train_test_split(setX, setY, test_size=0.02)

eps = 5e-3  # the smaller it is the longer is the path

print("Computing regularization path using the lasso...")
alphas_lasso, coefs_lasso, _ = lasso_path(X_train,
                                          y_train,
                                          eps,
                                          fit_intercept=False)

print("Computing regularization path using the positive lasso...")
alphas_positive_lasso, coefs_positive_lasso, _ = lasso_path(
    X_train, y_train, eps, positive=True, fit_intercept=False)
print("Computing regularization path using the elastic net...")
alphas_enet, coefs_enet, _ = enet_path(X_train,
                                       y_train,
                                       eps=eps,
                                       l1_ratio=0.8,
                                       fit_intercept=False)
Exemple #3
0
from importation_pandas import importcsv

import matplotlib
import seaborn
import pandas
import matplotlib.dates as md
from matplotlib import pyplot as plt

from sklearn import preprocessing
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.covariance import EllipticEnvelope
from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM

data, spam = importcsv()

data = data[1814:]

spam = spam[1814:]

# some function for later


# return Series of distance between each point and his distance with the closest centroid
def getDistanceByPoint(data, model):
    distance = pd.Series()
    for i in range(0, len(data)):
        Xa = np.array(data.loc[i])
        Xb = model.cluster_centers_[model.labels_[i] - 1]
        distance.set_value(i, np.linalg.norm(Xa - Xb))