def generate_groups(get_labels, f_input='segm/segmented_curves_filtered.txt', output=''): for cut_value, ktotal in [(0.7, 2), (0.35, 3), (0.4, 4), (0.56, 5)]: slopes, intervals = select_original_breakpoints(ktotal, f_input) dois = get_dois(ktotal, f_input) data = np.concatenate((slopes, intervals), axis=1) data = norm(data) labels = get_labels(data, 'single', cut_value, ktotal, output) save_groups(dois, labels, data, output + 'k' + str(ktotal) + '/k' + str(ktotal))
plt.savefig(filename) if __name__ == '__main__': sources = [ 'clusters\\clusters\\clusters_ind_single_0.35_3.txt', 'clusters\\clusters\\clusters_ind_single_0.56_5.txt' ] colors = ['tab:red', 'tab:blue', 'tab:orange', 'tab:green', 'tab:grey'] letters = ['a', 'b', 'c', 'd', 'e', 'f'] idx = 0 for N, source in zip([3, 5], sources): labels = np.loadtxt(source, dtype=np.int) slopes, intervals = select_original_breakpoints( N, 'segm/segmented_curves_filtered.txt') unique, counts = np.unique(labels, return_counts=True) unique = unique[counts >= 10] counts = counts[counts >= 10] unique_idxs = np.argsort(counts)[-3:] unique = unique[unique_idxs].tolist() # labels = [unique.index(l) if l in unique else -1 for l in labels] for i, label in enumerate(unique): idxs = labels == label slopes_i = slopes[idxs] intervals_i = intervals[idxs] print(label, '-> tamanho', len(slopes_i)) filename = 'ave_curve_%d_intervals_%s.pdf' % (N, letters[idx]) plot_ave_curve(slopes_i, intervals_i, filename, colors[i])
ax.set_xlabel('PCA1 %.2f' % x_exp) ax.set_ylabel('PCA2 %.2f' % y_exp) ax.set_zlabel('PCA3 %.2f' % z_exp) plt.savefig(filename) plt.clf() if __name__ == '__main__': sys.setrecursionlimit(100000) f_input = 'segm/segmented_curves_html.txt' f_output = 'data/html_by_cat/' for N in [2, 3, 4, 5]: print(N) slopes, intervals = select_original_breakpoints(N, f_input) data = np.concatenate((slopes, intervals), axis=1) data = norm(data) print(data.shape) Z = linkage(data[:50000], method='single') print('Z complete') # plt.figure(figsize=(25, 15)) # dn = dendrogram(Z, leaf_rotation=90., leaf_font_size=8.) # plt.ylabel('distance', fontsize=18) # plt.xticks(fontsize=16) # plt.yticks(fontsize=16) # plt.savefig(f_output + 'dendrogram_%d.pdf' % N) # plt.close()