コード例 #1
0
import window_s_p_ft as win
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cross_validation import train_test_split


total_score = 0
stop = 1000
for x in range(stop):
    clf = KNeighborsClassifier()
    data = win.getStudents()
    data_train, data_test = train_test_split(data, test_size=0.2)
    data_train_labels = [s.spec for s in data_train]
    data_test_labels = [s.spec for s in data_test]
    data_train = [s.grades for s in data_train]
    data_test = [s.grades for s in data_test]
    clf.fit(data_train, data_train_labels)
    total_score += clf.score(data_test, data_test_labels)
total_score = total_score / stop
print('all')
print(total_score)

specs = ['FK', 'FM', 'MN', 'OE']
for sp in specs:
    total_score = 0
    for x in range(stop):
        clf = KNeighborsClassifier()
        data = win.getStudents()
        data_train, data_test = train_test_split(data, test_size=0.2)
        data_train_labels = [s.spec if s.spec == sp else 'NOT ' + sp for s in data_train]
        data_test_labels = [s.spec if s.spec == sp else 'NOT ' + sp for s in data_test]
        data_train = [s.grades for s in data_train]
コード例 #2
0
import networkx as nx
import matplotlib.pyplot as plt
import window_s_p_ft as win
import numpy as np
import math
from sklearn.manifold import MDS

cl = 'L'

data = win.getData(class_=cl)
data = sorted(data, key=lambda s: np.mean(s), reverse=True)
studs = win.getStudents(class_=cl)
studs = sorted(studs, key=lambda s: np.mean(s.grades), reverse=True)
st_corr = np.corrcoef(data, rowvar=1)

mds = MDS(n_components=2, dissimilarity='precomputed')
dists = np.empty((len(st_corr), len(st_corr)))
for ii in range(len(data)):
    for jj in range(len(data)):
        dists[ii][jj] = math.sqrt(2 * (1 - st_corr[ii][jj]))
pos = mds.fit(dists).embedding_

G = nx.Graph()
G.add_nodes_from(range(len(data)))
labels = []
for ii in range(len(data)):
    labels.append(str(ii + 1) + " " +
                  str(studs[ii].spec))
    for jj in range(ii + 1, len(data)):
        d = math.sqrt(2 * (1 - st_corr[ii][jj]))
        G.add_edge(ii, jj, weight=d)
コード例 #3
0
# plt.ylabel("Log(Explained variance)")
# '''log-log plot'''
# log_x = [math.log(x) for x in range(1, len(pca.explained_variance_)+1)]
# plt.xlabel("Log(no. PCA)")
# plt.ylabel("Log(Explained variance)")
# plt.plot(log_x, log_var, 'o-', label="Log")
# plt.legend(bbox_to_anchor=(1, 1), loc=1, borderaxespad=0.)
# print()
# plt.show()

# '''PCA matrix'''
# log_comp = np.asarray([[math.log(math.fabs(x)) for x in list_] for list_ in pca.components_])
# comp = log_comp.T
# comp = pca.components_.T

students = win.getStudents()
'''correlation student-student'''
st_corr = np.empty([len(students), len(students)])
for ii, st1 in enumerate(students):
    for jj, st2 in enumerate(students):
        try:
            st_corr[ii, jj] = math.log(math.fabs(pearsonr(st1.grades, st2.grades)[0]))
        except ValueError:
            print(pearsonr(st1.grades, st2.grades)[0])
            print(st1.grades)
            print(st2.grades)

# courses = win.getCourses()
# '''correlation course-course'''
# co_corr = np.empty([len(courses), len(courses)])
# for ii, c1 in enumerate(courses):
コード例 #4
0
import numpy as np
import window_s_p_ft as win
# import window_s_p_foto as win
from scipy.stats import pearsonr
from sklearn.metrics import mutual_info_score
import heatmap
import random


'''OE, MN, FK, FM'''
students = win.getStudents(shuffle=False)
'''correlation student-student'''
st_corr = np.empty([len(students), len(students)])
for ii, st1 in enumerate(students):
    for jj, st2 in enumerate(students):
        st_corr[ii, jj] = mutual_info_score(st1.grades, st2.grades)[0]

cl = students[0].class_
sp = students[0].spec
anno = []
anno2 = []
for ii, st in enumerate(students):
    if st.class_ != cl:
        anno.append((ii - 1, students[ii - 1].class_))
        cl = st.class_
    if st.spec != sp:
        anno2.append((ii - 1, students[ii - 1].spec))
        sp = st.spec
anno.append(((len(students) - 1), students[-1].class_))
anno2.append(((len(students) - 1), students[-1].spec))
anno = anno + anno2
コード例 #5
0
def classify(data=None, clf=None, repeat=10, test_size=0.2, leave=False):
    '''applies classification method based on a classification object clf
    data must be list of objects-students; repeat should be an integer and it makes the
    classification happen 'repeat' number of times and printed results are averaged over all repeats
    returns a dictionary of results(accuracy, precision etc.)'''
    if data is None:
        data = win.getStudents()
    if clf is None:
        clf = LinearDiscriminantAnalysis(solver='lsqr')
    clf = clf
    data = data
    total_score = 0
    stop = repeat
    results = OrderedDict()
    results['method'] = str(clf)
    if leave is False:
        for x in range(stop):
            data_train, data_test = train_test_split(data, test_size=test_size)
            data_train_labels = [s.spec for s in data_train]
            data_test_labels = [s.spec for s in data_test]
            data_train = [s.grades for s in data_train]
            data_test = [s.grades for s in data_test]
            clf.fit(data_train, data_train_labels)
            total_score += clf.score(data_test, data_test_labels)
        total_score = total_score / stop
        results['ACC for all specs'] = round(total_score, 2)
        specs = ['FK', 'FM', 'MN', 'OE']
        for sp in specs:
            total_score = 0
            total_sensitivity = 0
            total_specificity = 0
            total_precision = 0
            total_npv = 0
            total_prevalence = 0
            for x in range(stop):
                sensitivity = 0  # true positive
                specificity = 0  # true negative
                precision = 0
                npv = 0
                prevalence = 0
                data_train, data_test = train_test_split(
                    data, test_size=test_size)
                data_train_labels = [s.spec if s.spec ==
                                     sp else 'NOT ' + sp for s in data_train]
                data_test_labels = [s.spec if s.spec ==
                                    sp else 'NOT ' + sp for s in data_test]
                data_train = [s.grades for s in data_train]
                data_test = [s.grades for s in data_test]
                clf.fit(data_train, data_train_labels)
                total_score += clf.score(data_test, data_test_labels)
                prediction = clf.predict(data_test)
                for ii, d in enumerate(prediction):
                    if d == data_test_labels[ii] and d == sp:
                        sensitivity += 1
                    elif d == data_test_labels[ii] and d != sp:
                        specificity += 1
                    else:
                        pass
                try:
                    sensitivity = sensitivity / data_test_labels.count(sp)
                except ZeroDivisionError:
                    sensitivity = 0
                try:
                    specificity = specificity / \
                        data_test_labels.count('NOT ' + sp)
                except ZeroDivisionError:
                    specificity = 0
                try:
                    precision = sensitivity / prediction.tolist().count(sp)
                except ZeroDivisionError:
                    precision = 0
                try:
                    npv = specificity / prediction.tolist().count('NOT ' + sp)
                except ZeroDivisionError:
                    npv = 0
                prevalence = data_test_labels.count(sp) / len(data_test_labels)
                total_sensitivity += sensitivity
                total_specificity += specificity
                total_precision += precision
                total_npv += npv
                total_prevalence += prevalence
            total_score = total_score / stop
            total_sensitivity = total_sensitivity / stop
            total_specificity = total_specificity / stop
            total_precision = total_precision / stop
            total_npv = total_npv / stop
            total_prevalence = total_prevalence / stop
            # results[sp + ' accuracy: '] = total_score
            # results[sp + ' sensitivity: '] = total_sensitivity
            # results[sp + ' specificity: '] = total_specificity
            # results[sp + ' precision: '] = total_precision
            # results[sp + ' negative predictive value: '] = total_npv
            results[sp + ' acc - prevalence: '] = round(
                total_score - max(total_prevalence, 1 - total_prevalence), 2)
    else:
        for x in range(stop):
            loo = LeaveOneOut(n=len(data))
            for train_index, test_index in loo:
                data_train, data_test = [data[ii]
                                         for ii in train_index], data[test_index[0]]
                data_train_labels = [s.spec for s in data_train]
                data_test_labels = data_test.spec
                data_train = [s.grades for s in data_train]
                data_test = data_test.grades
                clf.fit(data_train, data_train_labels)
                if clf.predict(data_test)[0] == data_test_labels:
                    total_score += 1
        total_score = total_score / stop / len(loo)
        results['ACC for all specs'] = round(total_score, 2)
        specs = ['FK', 'FM', 'MN', 'OE']
        for sp in specs:
            total_score = 0
            total_prevalence = 0
            for x in range(stop):
                # prevalence = 0
                loo = LeaveOneOut(n=len(data))
                for train_index, test_index in loo:
                    data_train, data_test = [data[ii]
                                             for ii in train_index], data[test_index[0]]
                    data_train_labels = [s.spec if s.spec ==
                                         sp else 'NOT ' + sp for s in data_train]
                    data_test_labels = data_test.spec if data_test.spec == sp else 'NOT ' + sp
                    data_train = [s.grades for s in data_train]
                    data_test = data_test.grades
                    prediction = clf.predict(data_test)
                    clf.fit(data_train, data_train_labels)
                    if prediction[0] == data_test_labels:
                        total_score += 1
                    if data_test_labels == sp:
                        total_prevalence += 1
                    # total_prevalence += prevalence
            total_score = total_score / stop / len(loo)
            total_prevalence = total_prevalence / stop / len(loo)
            # results[sp + ' accuracy: '] = round(total_score, 2)
            results[sp + ' acc - prevalence: '] = round(
                total_score - max(total_prevalence, 1 - total_prevalence), 2)
    return results
コード例 #6
0
                           clf=LinearDiscriminantAnalysis(solver='lsqr'),
                           leave=leave),
                  classify(data=data, repeat=repeat, test_size=test_size,
                           clf=DecisionTreeClassifier(), leave=leave),
                  classify(data=data, repeat=repeat, test_size=test_size,
                           clf=KNeighborsClassifier(n_neighbors=5,
                                                    weights='uniform'),
                           leave=leave)]
    for cl in classified:
        print("")
        for x, y in cl.items():
            print(x, y)


repeat = 1000
test_size = 0.25
studs = win.getStudents(spare=True)
nustuds = []
for s in studs:
    if None in s.grades or 0 in s.grades or np.mean(s.grades) < 3.0:
        pass
    else:
        nustuds.append(s)
# data = win.getData()
# pca = KernelPCA(n_components=None, kernel='sigmoid')
# data = pca.fit_transform(data)  # mle -> n_components_ = 12
# # data = np.corrcoef(data)
# for ii, s in enumerate(studs):
#     s.grades = data[ii]
run(nustuds, test_size, repeat, False)