Beispiel #1
0
def get_rand_docs(db, patient, state, num=500):
    collection = db[patient]
    df = shelve_api.load('labeled_' + patient)
    selection = df[df['state'] == state]
    l_id = list(selection['_id'])
    random.shuffle(l_id)
    l_id = l_id[:num]
    l_id = [ObjectId(post_id) for post_id in l_id]
    query = {'state': state, '_id': {'$in': l_id}}
    return collection.find(query)
Beispiel #2
0
def load_shelve(name):
    return shelve_api.load(name)
Beispiel #3
0
def load_shelve(name):
    return shelve_api.load(name)
Beispiel #4
0
from sklearn.linear_model import LinearRegression
from sklearn.cross_validation import StratifiedKFold
import numpy as np
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score

from tests import shelve_api


XX, yy = shelve_api.load('lr')
X = XX[2700:]
y = yy[2700:]

clf = LinearRegression(normalize=True)

skf = StratifiedKFold(y, n_folds=2)
for train_index, test_index in skf:
    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    # clf.fit(X_train, y_train)
    y_true, y_pred = y_test, clf.predict(X_test)
    for i, num in enumerate(y_pred):
        if num < 0.0:
            y_pred[i] = 0.0
            continue
        elif num > 1.0:
Beispiel #5
0
        self.X = []
        self.y = []

    def verbose_svm(self):
        skf = StratifiedKFold(self.y, n_folds=2)
        print(self.name)
        for train_index, test_index in skf:
            print("Detailed classification report:")
            print()
            print("The model is trained on the full development set.")
            print("The scores are computed on the full evaluation set.")
            print()
            X_train, X_test = self.X[train_index], self.X[test_index]
            PCA = self.get_pca()
            PCA.fit(self.X)
            y_train, y_test = self.y[train_index], self.y[test_index]
            SVC = self.get_svm()
            X_train = PCA.transform(X_train)
            SVC.fit(X_train, y_train)
            X_test = PCA.transform(X_test)
            y_true, y_pred = y_test, SVC.predict(X_test)
            print(classification_report(y_true, y_pred))
            print()
            print(roc_auc_score(y_true, y_pred))
            print()


if __name__ == "__main__":
    clf = RawClf('Dog_1',
                 data=(shelve_api.load('clf_x'), shelve_api.load('clf_y')))
Beispiel #6
0
        self.y = []

    def verbose_svm(self):
        skf = StratifiedKFold(self.y, n_folds=2)
        print(self.name)
        for train_index, test_index in skf:
            print("Detailed classification report:")
            print()
            print("The model is trained on the full development set.")
            print("The scores are computed on the full evaluation set.")
            print()
            X_train, X_test = self.X[train_index], self.X[test_index]
            PCA = self.get_pca()
            PCA.fit(self.X)
            y_train, y_test = self.y[train_index], self.y[test_index]
            SVC = self.get_svm()
            X_train = PCA.transform(X_train)
            SVC.fit(X_train, y_train)
            X_test = PCA.transform(X_test)
            y_true, y_pred = y_test, SVC.predict(X_test)
            print(classification_report(y_true, y_pred))
            print()
            print(roc_auc_score(y_true, y_pred))
            print()




if __name__ == "__main__":
    clf = RawClf('Dog_1', data=(shelve_api.load('clf_x'), shelve_api.load('clf_y')))
Beispiel #7
0
def get_min_max(patient):
    mm = sapi.load("%s_mm" % patient)
    return (np.min(mm), np.max(mm))
from sklearn.cross_validation import StratifiedKFold
import numpy as np
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.neighbors import KNeighborsClassifier

from tests import shelve_api


def yield_patient_names(name, d):
    for key in d:
        if name in key:
            yield key

new_d = {}
d = shelve_api.load('baseline')
for key in yield_patient_names('Dog_1', d):
    new_d[key] = d[key]
for key in yield_patient_names('Dog_2', d):
    new_d[key] = d[key]
for key in yield_patient_names('Dog_3', d):
    new_d[key] = d[key]
for key in yield_patient_names('Dog_4', d):
    new_d[key] = d[key]
# for key in yield_patient_names('Dog_5', d):
#     new_d[key] = d[key]


# store = pd.HDFStore("D:/gingivere/data.h5")
# data = store['baseline']
# store.close()
def get_min_max(patient):
    mm = sapi.load("%s_mm" % patient)
    return (np.min(mm), np.max(mm))