def get_rand_docs(db, patient, state, num=500): collection = db[patient] df = shelve_api.load('labeled_' + patient) selection = df[df['state'] == state] l_id = list(selection['_id']) random.shuffle(l_id) l_id = l_id[:num] l_id = [ObjectId(post_id) for post_id in l_id] query = {'state': state, '_id': {'$in': l_id}} return collection.find(query)
def load_shelve(name): return shelve_api.load(name)
from sklearn.linear_model import LinearRegression from sklearn.cross_validation import StratifiedKFold import numpy as np from sklearn.metrics import classification_report from sklearn.metrics import roc_auc_score from tests import shelve_api XX, yy = shelve_api.load('lr') X = XX[2700:] y = yy[2700:] clf = LinearRegression(normalize=True) skf = StratifiedKFold(y, n_folds=2) for train_index, test_index in skf: print("Detailed classification report:") print() print("The model is trained on the full development set.") print("The scores are computed on the full evaluation set.") print() X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] # clf.fit(X_train, y_train) y_true, y_pred = y_test, clf.predict(X_test) for i, num in enumerate(y_pred): if num < 0.0: y_pred[i] = 0.0 continue elif num > 1.0:
self.X = [] self.y = [] def verbose_svm(self): skf = StratifiedKFold(self.y, n_folds=2) print(self.name) for train_index, test_index in skf: print("Detailed classification report:") print() print("The model is trained on the full development set.") print("The scores are computed on the full evaluation set.") print() X_train, X_test = self.X[train_index], self.X[test_index] PCA = self.get_pca() PCA.fit(self.X) y_train, y_test = self.y[train_index], self.y[test_index] SVC = self.get_svm() X_train = PCA.transform(X_train) SVC.fit(X_train, y_train) X_test = PCA.transform(X_test) y_true, y_pred = y_test, SVC.predict(X_test) print(classification_report(y_true, y_pred)) print() print(roc_auc_score(y_true, y_pred)) print() if __name__ == "__main__": clf = RawClf('Dog_1', data=(shelve_api.load('clf_x'), shelve_api.load('clf_y')))
self.y = [] def verbose_svm(self): skf = StratifiedKFold(self.y, n_folds=2) print(self.name) for train_index, test_index in skf: print("Detailed classification report:") print() print("The model is trained on the full development set.") print("The scores are computed on the full evaluation set.") print() X_train, X_test = self.X[train_index], self.X[test_index] PCA = self.get_pca() PCA.fit(self.X) y_train, y_test = self.y[train_index], self.y[test_index] SVC = self.get_svm() X_train = PCA.transform(X_train) SVC.fit(X_train, y_train) X_test = PCA.transform(X_test) y_true, y_pred = y_test, SVC.predict(X_test) print(classification_report(y_true, y_pred)) print() print(roc_auc_score(y_true, y_pred)) print() if __name__ == "__main__": clf = RawClf('Dog_1', data=(shelve_api.load('clf_x'), shelve_api.load('clf_y')))
def get_min_max(patient): mm = sapi.load("%s_mm" % patient) return (np.min(mm), np.max(mm))
from sklearn.cross_validation import StratifiedKFold import numpy as np from sklearn.metrics import classification_report from sklearn.metrics import roc_auc_score from sklearn.neighbors import KNeighborsClassifier from tests import shelve_api def yield_patient_names(name, d): for key in d: if name in key: yield key new_d = {} d = shelve_api.load('baseline') for key in yield_patient_names('Dog_1', d): new_d[key] = d[key] for key in yield_patient_names('Dog_2', d): new_d[key] = d[key] for key in yield_patient_names('Dog_3', d): new_d[key] = d[key] for key in yield_patient_names('Dog_4', d): new_d[key] = d[key] # for key in yield_patient_names('Dog_5', d): # new_d[key] = d[key] # store = pd.HDFStore("D:/gingivere/data.h5") # data = store['baseline'] # store.close()