Esempio n. 1
0
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif, mutual_info_classif, chi2
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.svm import LinearSVC, SVC, NuSVC
from sklearn.linear_model import LogisticRegression

from featureband.util.data_util import load_dataset
from featureband.util.metrics_util import load_clf

DATASET = "coil20"  # ["madelon", "basehock", "usps", "coil20", "gisette"]
FINAL_CLASSIFIER = "knn"  # ["knn", "logistic", "linear_svm"]
n_splits = 5

x, y = load_dataset(DATASET)
print(x.shape, y.shape)
clf = load_clf(FINAL_CLASSIFIER)
# clf = LogisticRegression()

skf = StratifiedKFold(n_splits=n_splits, random_state=42)

fold_index = 0
perfs = []
for train_index, test_index in skf.split(x, y):
    print("fold:", fold_index + 1)

    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]

    clf.fit(x_train, y_train)
Esempio n. 2
0
import numpy as np

from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from time import time
from time import clock

from featureband.util.data_util import load_dataset
from featureband.util.metrics_util import evaluate_cross_validation

x, y = load_dataset("news20")
print(x.shape, y.shape)

# clf = LinearSVC()
clf = KNeighborsClassifier(n_neighbors=3)
# clf = LogisticRegression()

samples = np.random.choice(100, 5, replace=False)
x_selected = x[:, samples]

t0 = time()
c0 = clock()
perf = evaluate_cross_validation(x, y, clf, n_splits=10)
print("t:", time() - t0, "c:", clock() - c0)

print("perf:", perf)