def generate_groups(get_labels,
                    f_input='segm/segmented_curves_filtered.txt',
                    output=''):
    for cut_value, ktotal in [(0.7, 2), (0.35, 3), (0.4, 4), (0.56, 5)]:
        slopes, intervals = select_original_breakpoints(ktotal, f_input)
        dois = get_dois(ktotal, f_input)
        data = np.concatenate((slopes, intervals), axis=1)
        data = norm(data)

        labels = get_labels(data, 'single', cut_value, ktotal, output)
        save_groups(dois, labels, data,
                    output + 'k' + str(ktotal) + '/k' + str(ktotal))
예제 #2
0
    plt.savefig(filename)


if __name__ == '__main__':

    sources = [
        'clusters\\clusters\\clusters_ind_single_0.35_3.txt',
        'clusters\\clusters\\clusters_ind_single_0.56_5.txt'
    ]
    colors = ['tab:red', 'tab:blue', 'tab:orange', 'tab:green', 'tab:grey']
    letters = ['a', 'b', 'c', 'd', 'e', 'f']
    idx = 0

    for N, source in zip([3, 5], sources):
        labels = np.loadtxt(source, dtype=np.int)
        slopes, intervals = select_original_breakpoints(
            N, 'segm/segmented_curves_filtered.txt')
        unique, counts = np.unique(labels, return_counts=True)
        unique = unique[counts >= 10]
        counts = counts[counts >= 10]
        unique_idxs = np.argsort(counts)[-3:]
        unique = unique[unique_idxs].tolist()
        # labels = [unique.index(l) if l in unique else -1 for l in labels]

        for i, label in enumerate(unique):
            idxs = labels == label
            slopes_i = slopes[idxs]
            intervals_i = intervals[idxs]

            print(label, '-> tamanho', len(slopes_i))
            filename = 'ave_curve_%d_intervals_%s.pdf' % (N, letters[idx])
            plot_ave_curve(slopes_i, intervals_i, filename, colors[i])
    ax.set_xlabel('PCA1 %.2f' % x_exp)
    ax.set_ylabel('PCA2 %.2f' % y_exp)
    ax.set_zlabel('PCA3 %.2f' % z_exp)

    plt.savefig(filename)
    plt.clf()


if __name__ == '__main__':

    sys.setrecursionlimit(100000)
    f_input = 'segm/segmented_curves_html.txt'
    f_output = 'data/html_by_cat/'
    for N in [2, 3, 4, 5]:
        print(N)
        slopes, intervals = select_original_breakpoints(N, f_input)
        data = np.concatenate((slopes, intervals), axis=1)
        data = norm(data)
        print(data.shape)

        Z = linkage(data[:50000], method='single')

        print('Z complete')

        # plt.figure(figsize=(25, 15))
        # dn = dendrogram(Z, leaf_rotation=90., leaf_font_size=8.)
        # plt.ylabel('distance', fontsize=18)
        # plt.xticks(fontsize=16)
        # plt.yticks(fontsize=16)
        # plt.savefig(f_output + 'dendrogram_%d.pdf' % N)
        # plt.close()