"QDA" ] classifiers = [ KNeighborsClassifier(3), SVC(gamma=2, C=1), #GaussianProcessClassifier(1.0 * RBF(1.0)), DecisionTreeClassifier(max_depth=5), RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), MLPClassifier(alpha=1, max_iter=1000), AdaBoostClassifier(), GaussianNB(), QuadraticDiscriminantAnalysis() ] val, valspam = importcsv() train, test = train_test_split(val, test_size=0.5) spamtrain, spamtest = train_test_split(valspam, test_size=0.5) scores = list() # iterate over classifiers for name, clf in zip(names, classifiers): clf.fit(train, spamtrain) score = clf.score(test, spamtest) print(name) print(score) scores.append(score) #ajout plot
from sklearn.linear_model import lasso_path, enet_path from importation_pandas import importcsv from sklearn.model_selection import train_test_split import numpy as np import matplotlib.pyplot as plt from itertools import cycle from sklearn.metrics import confusion_matrix setX, setY = importcsv() X_train, X_test, y_train, y_test = train_test_split(setX, setY, test_size=0.02) eps = 5e-3 # the smaller it is the longer is the path print("Computing regularization path using the lasso...") alphas_lasso, coefs_lasso, _ = lasso_path(X_train, y_train, eps, fit_intercept=False) print("Computing regularization path using the positive lasso...") alphas_positive_lasso, coefs_positive_lasso, _ = lasso_path( X_train, y_train, eps, positive=True, fit_intercept=False) print("Computing regularization path using the elastic net...") alphas_enet, coefs_enet, _ = enet_path(X_train, y_train, eps=eps, l1_ratio=0.8, fit_intercept=False)
from importation_pandas import importcsv import matplotlib import seaborn import pandas import matplotlib.dates as md from matplotlib import pyplot as plt from sklearn import preprocessing from sklearn.decomposition import PCA from sklearn.cluster import KMeans from sklearn.covariance import EllipticEnvelope from sklearn.ensemble import IsolationForest from sklearn.svm import OneClassSVM data, spam = importcsv() data = data[1814:] spam = spam[1814:] # some function for later # return Series of distance between each point and his distance with the closest centroid def getDistanceByPoint(data, model): distance = pd.Series() for i in range(0, len(data)): Xa = np.array(data.loc[i]) Xb = model.cluster_centers_[model.labels_[i] - 1] distance.set_value(i, np.linalg.norm(Xa - Xb))