import numpy as np import matplotlib.pyplot as plt import feature_utils as utils import time from sklearn.cluster import KMeans from sklearn.decomposition import PCA from sklearn.cross_validation import KFold from multiprocessing.dummy import Pool from multiprocessing import cpu_count X, labels = utils.load_features("gist") n_samples, n_features = X.shape n_targets = labels.shape[1] #n_targets = 1 print(X.shape) n_folds = 4 # runs for each fold def run(args): start_time = time.time() train, test = args # for GIST only mult = 32 # split each input row into 32 rows fv_dims = 512 / mult X_train = X[train].reshape((-1, fv_dims)) # cluster data kmeans = KMeans()
import numpy as np import matplotlib.pyplot as plt import feature_utils as utils import time from sklearn import neighbors, metrics from sklearn.decomposition import PCA from sklearn.cross_validation import StratifiedKFold X, labels = utils.load_features('histograms') n_samples, n_features = X.shape n_targets = labels.shape[1] #n_targets = 1 print(X.shape) pca = PCA(n_components=0.95) X = pca.fit_transform(X) print(X.shape) c = X.shape[1] n_folds = 10 accuracy = np.ndarray((n_folds, n_targets)) tpr = np.ndarray((n_folds, n_targets)) fpr = np.ndarray((n_folds, n_targets)) k = 5 for target in range(n_targets): print("target : %i/%i" % (target + 1, n_targets)) y = labels[:, target] skf = StratifiedKFold(y, n_folds=n_folds) start_time = time.time()
import numpy as np import matplotlib.pyplot as plt import feature_utils as utils import time from sklearn.naive_bayes import GaussianNB from sklearn.cross_validation import StratifiedKFold f_name = 'hog_9' X, labels = utils.load_features(f_name) n_samples, n_features = X.shape n_targets = labels.shape[1] #n_targets = 1 print(X.shape) n_folds = 10 accuracy = np.ndarray((n_folds, n_targets)) tpr = np.ndarray((n_folds, n_targets)) fpr = np.ndarray((n_folds, n_targets)) for target in range(n_targets): print("target : %i/%i" % (target + 1, n_targets)) y = labels[:, target] skf = StratifiedKFold(y, n_folds=n_folds) start_time = time.time() for i, (train, test) in enumerate(skf): print("fold : %i/%i" % (i + 1, n_folds)) classifier = GaussianNB() classifier.fit(X[train], y[train]) classes = classifier.predict(X[test])
import numpy as np import matplotlib.pyplot as plt import feature_utils as utils import time from sklearn import neighbors, metrics from sklearn.decomposition import PCA X, y = utils.load_features('histograms') n_samples, n_features = X.shape y = y[:, 0] print(X.shape) pca = PCA(n_components=0.7) X = pca.fit_transform(X) print(X.shape) start_time = time.time() k = 5 classifier = neighbors.KNeighborsClassifier(k) classifier.fit(X, y) p = classifier.predict_proba(X) idx = np.argmax(p, axis=1) scores = p[np.arange(p.shape[0]), idx] genuine = classifier.classes_[idx] == y elapsed_time = time.time() - start_time fpr, tpr, _ = metrics.roc_curve(genuine, scores) print("elapsed time : " + str(elapsed_time))