def load_auto_data(): lags = 10 length = 2 * lags fn = '../data/autocorr_len20_lag10.npy' X = np.load(fn) labels = ca_data_utils.load_labels()[9:39992:2] labels = labels[length - 1:] print(X.shape) print(labels.shape) max_lens = np.linspace(3, 8, 6).astype(int) learning_rates = np.logspace(-2, 2, 5) boosters = ['gbtree', 'gblinear', 'dart'] gammas = np.linspace(0.0, 0.4, 5) reg_alphas = np.logspace(-1, 2, 4) param_dict = dict(max_depth=max_lens, learning_rate=learning_rates, booster=boosters, gamma=gammas, reg_alpha=reg_alphas) grid = GridSearchCV(xgb.XGBClassifier(), param_grid=param_dict, n_jobs=20, verbose=2) print('start to train...') grid.fit(X, labels) print('finished') df = pd.DataFrame.from_dict(grid.cv_results_) filename = '../data/clf_results/autocorr_xgbooster' df.to_csv(filename)
def select_models(): vid = ca_data_utils.load_v_matrix().T[8:39992] labels = ca_data_utils.load_labels()[8:39992] X_train, X_test, y_train, y_test = train_test_split( vid, labels, test_size=0.25) # random state? classifiers = [ SVC(kernel="linear", C=0.025), SVC(gamma=2, C=1), RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), AdaBoostClassifier() ] names = ['linear SVM', 'RBF SVM', 'Random Forest', 'AdaBoost'] train = [] test = [] for name, clf in zip(names, classifiers): print('starting to train with ', name) clf.fit(X_train, y_train) print('---> calculating training set accuracy:') train_accu = clf.score(X_train, y_train) train.append(train_accu) print('---> training set accuracy: ', train_accu) print('---> calculating testing set accuracy:') test_accu = clf.score(X_test, y_test) test.append(test_accu) print('---> testing set accuracy: ', test_accu) np.save('../data/clf_results/clf_names', names) np.save('../data/clf_results/train_accuracy', train) np.save('../data/clf_results/test_accuracy', test)
def xgboost(gamma): X = ca_data_utils.load_v_matrix().T[8:39992:5] labels = ca_data_utils.load_labels()[8:39992:5] clf = xgb.XGBClassifier(gamma=gamma, learning_rate=0.1) total_len = len(labels) cut = int(3 * total_len / 4) clf.fit(X[:cut], labels[:cut]) score = clf.score(X[cut:], labels[cut:]) print(score)
def load_data(): # X = ca_data_utils.load_v_matrix().T[9:39992] X = np.load('../data/autocorr.npy') labels = ca_data_utils.load_labels()[9:39992:2] # labels[labels==3] = 2 # labels = [labels[k+1] - labels[k] for k in range(len(labels)-1)] # labels = np.array(labels) # X = [X[k+1] - X[k] for k in range(len(X) - 1)] # labels[labels!=0] = 1 return X, labels
def kernel_pca(): X = ca_data_utils.load_v_matrix().T[8:39992] labels = ca_data_utils.load_labels()[8:39992] kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=10, n_jobs=20) print('starting kpca') X_kpca = kpca.fit_transform(X) print('finished') # pca = PCA() # X_pca = pca.fit_transform(X) sleep = labels == 1 wake1 = labels == 2 wake2 = labels == 3 plt.figure() plt.subplot(1, 2, 1, aspect='equal') plt.title("Original space") plt.scatter(X[sleep, 0], X[sleep, 1], c="red", s=20, edgecolor='k') plt.scatter(X[wake1, 0], X[wake1, 1], c="blue", s=20, edgecolor='k') plt.scatter(X[wake2, 0], X[wake2, 1], c='green', s=20, edgecolors='k') plt.xlabel("$x_1$") plt.ylabel("$x_2$") plt.subplot(1, 2, 2, aspect='equal') plt.scatter(X_kpca[sleep, 0], X_kpca[sleep, 1], c="red", s=20, edgecolor='k') plt.scatter(X_kpca[wake1, 0], X_kpca[wake1, 1], c="blue", s=20, edgecolor='k') plt.scatter(X_kpca[wake2, 0], X_kpca[wake2, 1], c='green', s=20, edgecolor='k') plt.title("Projection by KPCA") plt.xlabel(r"1st principal component in space induced by $\phi$") plt.ylabel("2nd component") plt.tight_layout() plt.savefig('../data/clf_results/kpca')
def tune_rbf_svm(): vid = ca_data_utils.load_v_matrix()[:, 9:39992] labels = ca_data_utils.load_labels()[9:39992] scaler = StandardScaler() X = scaler.fit_transform(vid.T) C_range = np.logspace(-1, 2, 4) print('C range: ', C_range) gamma_range = np.logspace(-3, 3, 7) * 1. / X.shape[1] #(-3,3,7) print('gamma range: ', gamma_range) param_grid = dict(gamma=gamma_range, C=C_range) cv = StratifiedShuffleSplit(test_size=0.25, random_state=42) grid = GridSearchCV(SVC(), param_grid=param_grid, cv=cv, n_jobs=20) print('start to train...') grid.fit(X, labels) print("The best parameters are %s with a score of %0.2f" % (grid.best_params_, grid.best_score_)) print('The results are:') print(grid.cv_results_)
def make_video(): vid = skimage.io.imread('../data/vid.tif')[9:39992] labels = ca_data_utils.load_labels()[9:39992] preds = np.load('../data/clf_results/y_pred.npy') i = 0 for frame, label, pred in zip(vid, labels, preds): print('startint ', i) print(frame.shape) f, (ax1, ax2) = plt.subplots(1, 2, gridspec_kw={'width_ratios': [5, 1]}) ax1.imshow(frame) color = 'g' if (label != pred): color = 'r' circle = Ellipse((0.5, 0.1), 0.5, 0.1, color=color) color = 'r' # sleep if label == 2: # wake1 color = 'g' elif label == 3: # wake2 color = 'b' actual = Ellipse((0.5, 0.9), 0.5, 0.1, color=color) color = 'r' # sleep if pred == 2: # wake1 color = 'g' elif pred == 3: # wake2 color = 'b' got = Ellipse((0.5, 0.7), 0.5, 0.1, color=color) ax2.add_artist(circle) ax2.add_artist(actual) ax2.add_artist(got) fname = '../data/clf_results/video/image_{0:05d}'.format(i) i += 1 print(fname) ax2.get_xaxis().set_visible(False) ax2.get_yaxis().set_visible(False) f.savefig(fname)
def load_data(step): v = ca_data_utils.load_v_matrix().T[9:39992:step] labels = ca_data_utils.load_labels()[9:39992:step] return v, labels
import scipy.io import numpy as np import skimage.io import ca_data_utils def trunk_data_by_states(vid, labels): state_1 = np.where(labels == 1) state_2 = np.where(labels == 2) state_3 = np.where(labels == 3) print(state_1[0].shape) print(state_2[0].shape) print(state_3[0].shape) sleep = vid[:, state_1[0]] wake_1 = vid[:, state_2[0]] wake_2 = vid[:, state_3[0]] print(sleep.shape) np.save('../data/byState/sleep', sleep) np.save('../data/byState/wake_1', wake_1) np.save('../data/byState/wake_2', wake_2) if __name__ == '__main__': labels = ca_data_utils.load_labels() vid = ca_data_utils.load_vid() print(vid.shape) trunk_data_by_states(vid, labels)
cv = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=42) grid = GridSearchCV(SVC(), param_grid=param_grid, cv=cv) print('start to train...') grid.fit(X, labels) print("The best parameters are %s with a score of %0.2f" % (grid.best_params_, grid.best_score_)) print('The results are:') print(grid.cv_results_) if __name__ == '__main__': vid = ca_data_utils.load_v_matrix().T[9:39992] labels = ca_data_utils.load_labels()[9:39992] for k in range(5, 11): select_models(vid, labels, k) # select_k(vid, labels, k) # clf = SVC(gamma=0.001, C=10) # scaler = StandardScaler() # X = scaler.fit_transform(vid) # X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.25) # print('start to train') # clf.fit(X_train, y_train) # print('finally finished tada~~') # y_pred = clf.predict(X) # print('saving y_pred for whole video, the accuracy for test data is ', clf.score(X_test, y_test))