Esempio n. 1
0
def test_real_example_on_overlapping_community():
    ground_truth_community = json.load(
        open("ground_truth_community_Philosophy.json", "r")
    )
    detected_community = json.load(open("detected_community_Philosophy.json", "r"))

    c1 = Clustering(elm2clu_dict=ground_truth_community)
    c2 = Clustering(elm2clu_dict=detected_community)

    start = time.time()
    sim_ppr_pack = sim.element_sim(
        c1,
        c2,
        alpha=0.9,
        r=1.0,
        r2=None,
        rescale_path_type="max",
        ppr_implementation="prpack",
    )
    end = time.time()
    print("prpack elapsed time: {}s".format(end - start))
    start = time.time()
    sim_ppr_power_iteration = sim.element_sim(
        c1,
        c2,
        alpha=0.9,
        r=1.0,
        r2=None,
        rescale_path_type="max",
        ppr_implementation="power_iteration",
    )
    end = time.time()
    print("power iteration elapsed time: {}s".format(end - start))

    assert_approx_equal(sim_ppr_pack, sim_ppr_power_iteration, significant=3)
Esempio n. 2
0
def test_simple_example():
    c1_elm2clu_dict = {0: [0, 1], 1: [1, 2], 2: [1, 3], 3: [0], 4: [2], 5: [1]}
    c2_elm2clu_dict = {0: [0], 1: [1], 2: [1], 3: [0, 3], 4: [2, 4], 5: [2]}

    c1 = Clustering(elm2clu_dict=c1_elm2clu_dict)
    c2 = Clustering(elm2clu_dict=c2_elm2clu_dict)

    sim_ppr_pack = sim.element_sim(
        c1,
        c2,
        alpha=0.9,
        r=1.0,
        r2=None,
        rescale_path_type="max",
        ppr_implementation="prpack",
    )
    sim_ppr_power_iteration = sim.element_sim(
        c1,
        c2,
        alpha=0.9,
        r=1.0,
        r2=None,
        rescale_path_type="max",
        ppr_implementation="power_iteration",
    )

    assert_approx_equal(sim_ppr_pack, sim_ppr_power_iteration, significant=3)
Esempio n. 3
0
def paint_similarity_trace(b,
                           oks,
                           output=None,
                           figsize=(3, 3),
                           dpi=200,
                           **kwargs):
    clu_base = Clustering()
    fig, ax = plt.subplots(figsize=figsize, dpi=300)
    e_sim_list = []
    clu_base.from_membership_list(b)
    for g in oks.trace_mb.values():
        clu = Clustering()
        clu.from_membership_list(g[1])
        e_sim_list += [sim.element_sim(clu_base, clu)]

    ax.autoscale()
    ax.margins(0.1)
    # ax.set_aspect(1)
    plt.xlabel("steps")
    plt.ylabel("Element-centric similarity")
    plt.yticks(np.linspace(0, 1, 5))
    ax.tick_params(direction="in")
    plt.plot(e_sim_list)
    if output is not None:
        plt.savefig(output, dpi=dpi, transparent=True)
def mantel_elsim_r_average_and_errors(some_participants):

    global column_category_label, all_data, cards, total_participants, perms_of_each_n, perms_of_mantel_test
    dis2 = dissimilarity_matrix(total_participants)

    c2 = clustering_with_clusim(dis2)

    mantel_SUM = 0
    elsim_SUM = 0
    # r is between -1 and 1
    mantel_minimum = 10
    mantel_maximum = -10

    elsim_minimum = 10
    elsim_maximum = -10

    for i in range(perms_of_each_n):
        dis1 = dissimilarity_matrix(some_participants)

        # Mantel Method
        mantel = Mantel.test(dis1,
                             dis2,
                             perms_of_mantel_test,
                             method='pearson',
                             tail='two-tail')
        mantel_r = mantel[0]

        #find errors (minimum and maximum)
        if mantel_r < mantel_minimum:
            mantel_minimum = mantel_r
        elif mantel_r > mantel_maximum:
            mantel_maximum = mantel_r
        mantel_SUM = mantel_SUM + mantel_r

        c1 = clustering_with_clusim(dis1)

        # Element-centric Similarity
        elsim_r = sim.element_sim(c1, c2, r=1.0, alpha=0.9)

        #find errors (minimum and maximum)
        if elsim_r < elsim_minimum:
            elsim_minimum = elsim_r
        elif elsim_r > elsim_maximum:
            elsim_maximum = elsim_r
        elsim_SUM = elsim_SUM + elsim_r

    mantel_average = mantel_SUM / perms_of_each_n  #average of mantel_r
    mantel_l_error = mantel_average - mantel_minimum  #mantel_lower_error
    mantel_u_error = mantel_maximum - mantel_average  #mantel_upper_error

    elsim_average = elsim_SUM / perms_of_each_n  #average of elsim_r
    elsim_l_error = elsim_average - elsim_minimum  #mantel_lower_error
    elsim_u_error = elsim_maximum - elsim_average  #mantel_upper_error

    return mantel_average, mantel_l_error, mantel_u_error, elsim_average, elsim_l_error, elsim_u_error
def mantel_elsim_r_average_and_errors(some_participants):

    global column_category_label, all_data, cards, total_participants, count_of_samples_for_each_n, perms_of_mantel_test
    dis2 = dissimilarity_matrix(total_participants)

    c2 = clustering_with_clusim(dis2)

    mantel_r_table = []
    elsim_r_table = []

    for i in range(count_of_samples_for_each_n):
        dis1 = dissimilarity_matrix(some_participants)

        # Mantel Method
        mantel = Mantel.test(dis1,
                             dis2,
                             perms_of_mantel_test,
                             method='pearson',
                             tail='two-tail')
        mantel_r = mantel[0]

        mantel_r_table.append(mantel_r)

        c1 = clustering_with_clusim(dis1)

        # Element-centric Similarity
        elsim_r = sim.element_sim(c1, c2, r=1.0, alpha=0.9)

        elsim_r_table.append(elsim_r)

    mantel_average = statistics.mean(mantel_r_table)  # average of mantel_r
    mantel_l_error = mantel_average - min(mantel_r_table)  # mantel_lower_error
    mantel_u_error = max(mantel_r_table) - mantel_average  # mantel_upper_error
    mantel_sd = statistics.stdev(mantel_r_table)

    elsim_average = statistics.mean(elsim_r_table)  # average of elsim_r
    elsim_l_error = elsim_average - min(elsim_r_table)  # mantel_lower_error
    elsim_u_error = max(elsim_r_table) - elsim_average  # mantel_upper_error
    elsim_sd = statistics.stdev(elsim_r_table)

    return mantel_average, mantel_l_error, mantel_u_error, mantel_sd, elsim_average, elsim_l_error, elsim_u_error, elsim_sd
Esempio n. 6
0
def paint_mds(oks, figsize=(20, 20)):
    l2 = len(oks.trace_mb.keys())
    l = int(l2**0.5)
    X = np.zeros([l2, l2])
    for idx_1, pair_1 in enumerate(combinations(range(1, l + 1), 2)):
        b = oks.trace_mb[pair_1]
        clu_1 = Clustering()
        clu_1.from_membership_list(b)
        for idx_2, pair_2 in enumerate(combinations(range(1, l + 1), 2)):
            b = oks.trace_mb[pair_2]
            clu_2 = Clustering()
            clu_2.from_membership_list(b)

            X[idx_1][idx_2] = 1 - sim.element_sim(clu_1, clu_2)
            X[idx_2][idx_1] = X[idx_1][idx_2]

    def _plot_embedding(X, title=None):
        x_min, x_max = np.min(X, 0), np.max(X, 0)
        X = (X - x_min) / (x_max - x_min)

        plt.figure(figsize=figsize)
        for ind, i in enumerate(range(X.shape[0])):
            plt.text(X[i, 0],
                     X[i, 1],
                     str(list(oks.trace_mb.keys())[ind]),
                     color=plt.cm.Set1(1 / 10.),
                     fontdict={
                         'weight': 'bold',
                         'size': 12
                     })
        plt.xticks([]), plt.yticks([])
        if title is not None:
            plt.title(title)

    clf = manifold.MDS(n_components=2,
                       n_init=10,
                       max_iter=10000,
                       dissimilarity="precomputed")
    X_mds = clf.fit_transform(X)
    _plot_embedding(X_mds)
def compare_scores(nexperiment, true_clusters, true_labels, predicted_clusters,
                   predicted_labels):
    mp_score = score.calculate_mp_score(true_clusters, predicted_clusters)
    nmi = normalized_mutual_info_score(true_labels,
                                       predicted_labels,
                                       average_method='arithmetic')
    anmi = adjusted_mutual_info_score(true_labels, predicted_labels)
    completeness = completeness_score(true_labels, predicted_labels)
    v_measure = v_measure_score(true_labels, predicted_labels)
    rand = adjusted_rand_score(true_labels, predicted_labels)
    fms = fowlkes_mallows_score(true_labels, predicted_labels)

    T = Clustering()
    C = Clustering()
    T.from_cluster_list(true_clusters)
    C.from_cluster_list(predicted_clusters)

    jaccard_index = sim.jaccard_index(T, C)
    nmi2 = sim.nmi(T, C)
    fmeasure = sim.fmeasure(T, C)
    element_sim = sim.element_sim(T, C)
    ri = sim.rand_index(T, C)

    print("------------------")
    print("Example ", nexperiment)
    print("Weigthed Similarity: ", round(mp_score, 3))
    print("NMI: ", round(nmi, 3))
    print("AMI: ", round(anmi, 3))
    print("NMI2: ", round(nmi2, 3))
    print("RI: ", round(ri, 3))
    print("Completeness: ", round(completeness, 3))
    print("V-Measure: ", round(v_measure, 3))
    print("Adjusted Rand: ", round(rand, 3))
    print("Fowlkes Mallows: ", round(fms, 3))
    print("Jaccard Index: ", round(jaccard_index, 3))
    print("F-Measure: ", round(fmeasure, 3))
    print("Element-centric: ", round(element_sim, 3))
    print()