def analyzeClusters(n_loops=1, cl=None, sp=None, shuffled=False, spShuff=False): results = {} n = n_loops bins = [i for i in drange(0.0, 1.0, 0.1)] total_hist = [0 for i in bins] data = win.getData(shuffle=shuffled, class_=cl, spec=sp) if spShuff is True: win.shuffleIt(data, mode=2) Z = hie.linkage(data, method='average', metric='correlation') D = hie.dendrogram(Z, orientation='left', no_plot=True) total_ys = [0 for d in D['dcoord']] total_z = [0 for d in Z[::-1, 2]] total_acc = [0 for d in np.diff(Z[::-1, 2], 2)] for ii in range(0, n): # for loop added to average shuffled results # data = win.getData(shuffle=True, class_='J') # labels = win.getStudents(class_=classes[0]) # labels = [str(st.class_) + " " + str(st.spec) for st in labels] Z = hie.linkage(data, method='average', metric='correlation') D = hie.dendrogram(Z, orientation='left', no_plot=True) # print(data[40, :]) # print(data[42, :]) # freq method ys = [d[1] for d in D['dcoord']] total_ys = [a + b for a, b in zip(ys, total_ys)] hist, bins = np.histogram(ys, bins=bins) total_hist = [a + b for a, b in zip(hist, total_hist)] # elbow method (sort of) z = Z[::-1, 2] total_z = [a + b for a, b in zip(z, total_z)] # inv elbow acceleration = np.diff(Z[::-1, 2], 2) # 2nd derivative of distances total_acc = [a + b for a, b in zip(acceleration, total_acc)] if ii < n - 1: # dont get new data if there wont be another loop data = win.getData(shuffle=shuffled, class_=cl, spec=sp) total_hist = [a / n for a in total_hist] total_ys = [a / n for a in total_ys] total_z = [a / n for a in total_z] total_acc = [a / n for a in total_acc] results['bins'] = (bins[:-1] + bins[1:]) / 2 results['hist'] = total_hist results['ys'] = total_ys results['z'] = total_z results['acc'] = total_acc return results
data = win.getData(class_=None) co_corr = np.corrcoef(data, rowvar=0) print(np.mean(co_corr), scp.sem(co_corr, axis=None), sep=' +/- ') print('var: ', np.var(co_corr, axis=None)) n = 10000 '''c shuff''' m = 0 e = 0 v = 0 for ii in range(n): data = win.getData(class_=None) win.shuffleIt(data, 2) co_corr = np.corrcoef(data, rowvar=0) m += np.mean(co_corr) e += scp.sem(co_corr, axis=None) v += np.var(co_corr, axis=None) m /= n e /= n v /= n print(m, e, sep=' +/- ') print('var: ', v) '''s shuff''' m = 0 e = 0 v = 0 for ii in range(n):
# for ii, st in enumerate(students): # if st.class_ != cl: # anno.append((ii - 1, students[ii - 1].class_)) # cl = st.class_ # if st.spec != sp: # anno2.append((ii - 1, students[ii - 1].spec)) # sp = st.spec # anno.append(((len(students) - 1), students[-1].class_)) # anno2.append(((len(students) - 1), students[-1].spec)) # anno = anno + anno2 """set up ranges""" students = win.getData() co_corr = np.corrcoef(students, rowvar=0) vmin, vmax = co_corr.min(), co_corr.max() win.shuffleIt(students, 2) co_corr = np.corrcoef(students, rowvar=0) vmin = vmin if co_corr.min() >= vmin else co_corr.min() vmax = vmax if co_corr.min() <= vmax else co_corr.max() students = win.getData() win.shuffleIt(students, 1) co_corr = np.corrcoef(students, rowvar=0) vmin = vmin if co_corr.min() >= vmin else co_corr.min() vmax = vmax if co_corr.min() <= vmax else co_corr.max() vmin, vmax = vmin - 0.0001, vmax + 0.0001 """plot heatmap""" font = {"family": "normal", "weight": "bold", "size": 22} matplotlib.rc("font", **font) matplotlib.rcParams["ps.useafm"] = True