Ejemplo n.º 1
0
def test_cwba_inv():
    n = 1000
    rho_inv = Decimal(str(0.0))
    m = Decimal(str(300.0))
    censorP = 0.7
    avgRuns = 10
    increment = Decimal(str(0.05))
    spdaccs = []
    dsdaccs = []
    rdaccs = []
    params = []

    rho_inv += increment
    while float(rho_inv) < 1:
        #A, truth = cg.construct_adj(n,float(p),float(q))
        #A, truth = cg.constructWithHubs(n,float(p),float(q),r)
        A, truth = cwba.cwba_graph(n, int(m), 1 / float(rho_inv))
        spdcorr, spdtotal = sim.runsim(truth, censorP,
                                       voting.scipy_weighted_knn,
                                       metrics.spd_mat(A), avgRuns)
        dsdcorr, dsdtotal = sim.runsim(truth, censorP,
                                       voting.scipy_weighted_knn,
                                       metrics.dsd_mat(A), avgRuns)
        rdcorr, rdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn,
                                     metrics.rd_mat(A), avgRuns)
        spdaccs.append(spdcorr / spdtotal)
        dsdaccs.append(dsdcorr / dsdtotal)
        rdaccs.append(rdcorr / rdtotal)
        rho_inv += increment
        params.append(float(rho_inv))
    plotting.plot_params_vs_accuracy(params, [spdaccs, dsdaccs, rdaccs],
                                     "1/rho", ["SPD", "DSD", "RD"])
Ejemplo n.º 2
0
def test_ncch_q():
    # Initialize parameters
    global n,r,rp,censorP,avgRuns,increment,vote
    p = Decimal(str(0.5))
    q = Decimal(str(0.0))
    accs_spd,accs_dsd,accs_rd = [],[],[]
    params = []

    # Run simulations
    while float(q) <= 1:
        A, truth = ncch.construct_adj(n,float(p),float(q),r,rp)
        correct_spd, total_spd = sim.runsim(truth, censorP,
                                         vote, metrics.spd_mat(A), avgRuns)
        correct_dsd, total_dsd = sim.runsim(truth, censorP,
                                            vote, metrics.dsd_mat(A), avgRuns)
        correct_rd, total_rd = sim.runsim(truth, censorP,
                                          vote, metrics.rd_mat(A), avgRuns)
        accs_spd.append(correct_spd/total_spd)
        accs_dsd.append(correct_dsd/total_dsd)
        accs_rd.append(correct_rd/total_rd)
        print(q)
        params.append(float(q))
        q += increment
    plotting.plot_params_vs_accuracy(params, [accs_spd, accs_dsd, accs_rd],
                                     "Edge deletion probability (q)",
                                     ["SPD","DSD","RD"],
                                     "NCCH (p=0.5)")
    return
Ejemplo n.º 3
0
def test_cwba():
    n = 1000
    rho = Decimal(str(2.0))
    m = Decimal(str(0.0))
    censorP = 0.7
    avgRuns = 10
    increment = Decimal(str(1.0))
    spdaccs = []
    dsdaccs = []
    rdaccs = []
    params = []

    m += increment
    while int(m) < 20:
        #A, truth = cg.construct_adj(n,float(p),float(q))
        #A, truth = cg.constructWithHubs(n,float(p),float(q),r)
        A, truth = cwba.cwba_graph(n, int(m), float(rho))
        spdcorr, spdtotal = sim.runsim(truth, censorP,
                                       voting.scipy_weighted_knn,
                                       metrics.spd_mat(A), avgRuns)
        dsdcorr, dsdtotal = sim.runsim(truth, censorP,
                                       voting.scipy_weighted_knn,
                                       metrics.dsd_mat(A), avgRuns)
        rdcorr, rdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn,
                                     metrics.rd_mat(A), avgRuns)
        spdaccs.append(spdcorr / spdtotal)
        dsdaccs.append(dsdcorr / dsdtotal)
        rdaccs.append(rdcorr / rdtotal)
        m += increment
        params.append(int(m))
    plotting.plot_params_vs_accuracy(params, [spdaccs, dsdaccs, rdaccs],
                                     "Minimum vertex degree (m)",
                                     ["SPD", "DSD", "RD"],
                                     "CWBA (" + u"\u03C1" + "=2)")
Ejemplo n.º 4
0
def test_ncch_r():
    # Initialize parameters
    global n,rp,censorP,avgRuns,vote
    p = Decimal(str(0.2)) # p=0.2,0.8
    q = Decimal(str(0.5)) # q=0.5
    r = 100 # r=(0,20,1), r=(100,400,50)
    increment = 50
    accs_spd,accs_dsd,accs_rd = [],[],[]
    params = []

    # Run simulations
    while r <= 400:
        A, truth = ncch.construct_adj(n,float(p),float(q),r,rp)
        correct_spd, total_spd = sim.runsim(truth, censorP,
                                         vote, metrics.spd_mat(A), avgRuns)
        correct_dsd, total_dsd = sim.runsim(truth, censorP,
                                            vote, metrics.dsd_mat(A), avgRuns)
        correct_rd, total_rd = sim.runsim(truth, censorP,
                                          vote, metrics.rd_mat(A), avgRuns)
        accs_spd.append(correct_spd/total_spd)
        accs_dsd.append(correct_dsd/total_dsd)
        accs_rd.append(correct_rd/total_rd)
        print(r)
        params.append(r)
        r += increment
    plotting.plot_params_vs_accuracy(params, [accs_spd, accs_dsd, accs_rd],
                                     "Number of hubs",
                                     ["SPD","DSD","RD"],
                                     "NCCH (p="+str(p)+",q="+str(q)+")")
    return
Ejemplo n.º 5
0
def test_ncch_censor():
    # Initialize parameters
    global n,r,rp,avgRuns,vote
    p = Decimal(str(0.8)) # p=0.8
    q = Decimal(str(0.5)) # q=0.5
    censorP = Decimal(str(0.1))
    increment = Decimal(str(0.1))
    accs_spd,accs_dsd,accs_rd = [],[],[]
    params = []

    # Run simulations
    while float(censorP) < 1:
        A, truth = ncch.construct_adj(n,float(p),float(q),r,rp)
        correct_spd, total_spd = sim.runsim(truth, float(censorP),
                                         vote, metrics.spd_mat(A), avgRuns)
        correct_dsd, total_dsd = sim.runsim(truth, float(censorP),
                                            vote, metrics.dsd_mat(A), avgRuns)
        correct_rd, total_rd = sim.runsim(truth, float(censorP),
                                          vote, metrics.rd_mat(A), avgRuns)
        accs_spd.append(correct_spd/total_spd)
        accs_dsd.append(correct_dsd/total_dsd)
        accs_rd.append(correct_rd/total_rd)
        print(censorP)
        params.append(float(censorP))
        censorP += increment
    plotting.plot_params_vs_accuracy(params, [accs_spd, accs_dsd, accs_rd],
                                     "Vertex label censor proportion",
                                     ["SPD","DSD","RD"],
                                     "NCCH (p="+str(p)+",q="+str(q)+")")
    return
Ejemplo n.º 6
0
def test_cwba_rhoinv():
    # Initialize parameters
    global n, censorP, avgRuns, vote
    rho_inv = Decimal(str(0.05))
    m = Decimal(str(300.0))
    increment = Decimal(str(0.05))
    accs_spd, accs_dsd, accs_rd = [], [], []
    params = []

    # Run simulations
    while float(rho_inv) <= 1:
        A, truth = cwba.cwba_graph(n, int(m), 1 / float(rho_inv))
        correct_spd, total_spd = sim.runsim(truth, censorP, vote,
                                            metrics.spd_mat(A), avgRuns)
        correct_dsd, total_dsd = sim.runsim(truth, censorP, vote,
                                            metrics.dsd_mat(A), avgRuns)
        correct_rd, total_rd = sim.runsim(truth, censorP, vote,
                                          metrics.rd_mat(A), avgRuns)
        accs_spd.append(correct_spd / total_spd)
        accs_dsd.append(correct_dsd / total_dsd)
        accs_rd.append(correct_rd / total_rd)
        print(rho_inv)
        params.append(float(rho_inv))
        rho_inv += increment
    rho_str = u"\u03C1"
    plotting.plot_params_vs_accuracy(
        params, [accs_spd, accs_dsd, accs_rd],
        "Inverse likeliness of clusters (1/" + rho_str + ")",
        ["SPD", "DSD", "RD"], "CWBA (m=300)")
    return
Ejemplo n.º 7
0
def test_cwba_m():
    # Initialize parameters
    global n, censorP, avgRuns, vote
    rho = Decimal(str(2.0))
    m = Decimal(str(1.0))
    increment = Decimal(str(1.0))
    accs_spd, accs_dsd, accs_rd = [], [], []
    params = []

    # Run simulations
    while int(m) < 20:
        A, truth = cwba.cwba_graph(n, int(m), float(rho))
        correct_spd, total_spd = sim.runsim(truth, censorP, vote,
                                            metrics.spd_mat(A), avgRuns)
        correct_dsd, total_dsd = sim.runsim(truth, censorP, vote,
                                            metrics.dsd_mat(A), avgRuns)
        correct_rd, total_rd = sim.runsim(truth, censorP, vote,
                                          metrics.rd_mat(A), avgRuns)
        accs_spd.append(correct_spd / total_spd)
        accs_dsd.append(correct_dsd / total_dsd)
        accs_rd.append(correct_rd / total_rd)
        print(m)
        params.append(int(m))
        m += increment
    rho_str = u"\u03C1"
    plotting.plot_params_vs_accuracy(params, [accs_spd, accs_dsd, accs_rd],
                                     "New vertex degree (m)",
                                     ["SPD", "DSD", "RD"],
                                     "CWBA (" + rho_str + "=2)")
    return
Ejemplo n.º 8
0
def class_dists(G, truth_tab):
    """Returns a three-column data frame where the first column is all shortest path
    distances in G and the second is corresponding DSDs. The third column is 0
    if the two vertices have the same class and 1 otherwise. Paths of length 0
    are skipped (when source node == target node).

    Parameters
    ----------
    G : networkx.Graph or numpy.ndarray
    truth_tab : dict
        A dictionary of labels keyed by each vertex in G

    Returns
    -------
    df : DataFrame
    """

    A = as_adj(G)
    # total number of distances
    n_rows = int((A.shape[0] * (A.shape[0] - 1)) / 2)
    # numpy table to hold the result before we stick it in the data frame
    tab = np.empty((n_rows, 3), dtype=np.float64)
    dsds = dsd_mat(A)
    spds = spd_mat(A)

    col = 0
    for u, v in combinations(range(A.shape[0]), r=2):
        if u == v:
            continue
        tab[col, 0] = spds[u, v]
        tab[col, 1] = dsds[u, v]
        tab[col, 2] = 0 if truth_tab[u] == truth_tab[v] else 1
        col += 1

    return pd.DataFrame(tab, columns=['spd', 'dsd', 'class_diff'])
Ejemplo n.º 9
0
def spdists(G):
    A = as_adj(G)
    spds = spd_mat(A)
    res = []
    for u in range(0, A.shape[0]):
        for v in range(u + 1, A.shape[0]):
            res.append(spds[u][v])
    return res
Ejemplo n.º 10
0
def test_censor():
    n = 250
    p = Decimal(str(0.3))
    q = Decimal(str(0.5))
    censorP = Decimal(str(0.05))
    avgRuns = 10
    increment = Decimal(str(0.05))
    spdaccs = []
    dsdaccs = []
    rdaccs = []
    params = []

    r = 100

    #p += increment
    while float(censorP) < 1:
        A, truth = cg.construct_adj(n, float(p), float(q))
        #A, truth = cg.constructWithHubs(n,float(p),float(q),r)
        spdcorr, spdtotal = sim.runsim(truth, float(censorP),
                                       voting.scipy_weighted_knn,
                                       metrics.spd_mat(A), avgRuns)
        dsdcorr, dsdtotal = sim.runsim(truth, float(censorP),
                                       voting.scipy_weighted_knn,
                                       metrics.dsd_mat(A), avgRuns)
        rdcorr, rdtotal = sim.runsim(truth, float(censorP),
                                     voting.scipy_weighted_knn,
                                     metrics.rd_mat(A), avgRuns)
        spdaccs.append(spdcorr / spdtotal)
        dsdaccs.append(dsdcorr / dsdtotal)
        rdaccs.append(rdcorr / rdtotal)
        print(censorP)
        censorP += increment
        params.append(float(censorP))
    plotting.plot_params_vs_accuracy(params, [spdaccs, dsdaccs, rdaccs],
                                     "Censoring probability (censorP)",
                                     ["SPD", "DSD", "RD"], "NCC (q=0.5)")
    return
Ejemplo n.º 11
0
def test_cg():
    n = 250
    p = Decimal(str(0.0))
    q = Decimal(str(0.0))
    censorP = 0.6
    avgRuns = 10
    increment = Decimal(str(0.05))
    spdaccs = []
    dsdaccs = []
    rdaccs = []
    params = []

    r = 100

    #p += increment
    while float(p) <= 1:
        q = Decimal(str(0.5))
        A, truth = cg.construct_adj(n, float(p), float(q))
        #A, truth = cg.constructWithHubs(n,float(p),float(q),r)
        spdcorr, spdtotal = sim.runsim(truth, censorP,
                                       voting.scipy_weighted_knn,
                                       metrics.spd_mat(A), avgRuns)
        dsdcorr, dsdtotal = sim.runsim(truth, censorP,
                                       voting.scipy_weighted_knn,
                                       metrics.dsd_mat(A), avgRuns)
        rdcorr, rdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn,
                                     metrics.rd_mat(A), avgRuns)
        spdaccs.append(spdcorr / spdtotal)
        dsdaccs.append(dsdcorr / dsdtotal)
        rdaccs.append(rdcorr / rdtotal)
        print(p)
        p += increment
        params.append(float(p))
    plotting.plot_params_vs_accuracy(params, [spdaccs, dsdaccs, rdaccs],
                                     "Edge addition probability (p)",
                                     ["SPD", "DSD", "RD"], "NCC (q=0.5)")

    p = Decimal(str(0.5))
    q = Decimal(str(0.0))
    spdaccs = []
    dsdaccs = []
    rdaccs = []
    params = []

    q += increment
    while float(q) <= 1:
        A, truth = cg.construct_adj(n, float(p), float(q))
        #A, truth = cg.constructWithHubs(n,float(p),float(q),r)
        spdcorr, spdtotal = sim.runsim(truth, censorP,
                                       voting.knn_weighted_majority_vote,
                                       metrics.spd_mat(A), avgRuns)
        dsdcorr, dsdtotal = sim.runsim(truth, censorP,
                                       voting.knn_weighted_majority_vote,
                                       metrics.dsd_mat(A), avgRuns)
        rdcorr, rdtotal = sim.runsim(truth, censorP,
                                     voting.knn_weighted_majority_vote,
                                     metrics.rd_mat(A), avgRuns)
        spdaccs.append(spdcorr / spdtotal)
        dsdaccs.append(dsdcorr / dsdtotal)
        rdaccs.append(rdcorr / rdtotal)
        print(q)
        q += increment
        params.append(float(q))
    plotting.plot_params_vs_accuracy(params, [spdaccs, dsdaccs, rdaccs],
                                     "Edge deletion probability (q)",
                                     ["SPD", "DSD", "RD"],
                                     "NCCH (p=0.5, number of hubs=100)")
    return
Ejemplo n.º 12
0
def test_cg_h():
    n = 250
    p = Decimal(str(0.5))
    q = Decimal(str(0.5))
    censorP = 0.7
    avgRuns = 10
    increment = Decimal(str(1.0))
    spdaccs = []
    dsdaccs = []
    rdaccs = []
    params = []

    r = Decimal(str(0.0))

    r += increment
    while float(r) <= 20:
        #A, truth = cg.construct_adj(n,float(p),float(q))
        A, truth = cg.constructWithHubs(n, float(p), float(q), int(r))
        spdcorr, spdtotal = sim.runsim(truth, censorP,
                                       voting.scipy_weighted_knn,
                                       metrics.spd_mat(A), avgRuns)
        dsdcorr, dsdtotal = sim.runsim(truth, censorP,
                                       voting.scipy_weighted_knn,
                                       metrics.dsd_mat(A), avgRuns)
        rdcorr, rdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn,
                                     metrics.rd_mat(A), avgRuns)
        spdaccs.append(spdcorr / spdtotal)
        dsdaccs.append(dsdcorr / dsdtotal)
        rdaccs.append(rdcorr / rdtotal)
        r += increment
        params.append(int(r))
    plotting.plot_params_vs_accuracy(params, [spdaccs, dsdaccs, rdaccs],
                                     "Number of hubs", ["SPD", "DSD", "RD"])

    increment = Decimal(str(50.0))
    spdaccs = []
    dsdaccs = []
    rdaccs = []
    params = []

    r = Decimal(str(50.0))

    r += increment
    while float(r) <= 400:
        #A, truth = cg.construct_adj(n,float(p),float(q))
        A, truth = cg.constructWithHubs(n, float(p), float(q), int(r))
        spdcorr, spdtotal = sim.runsim(truth, censorP,
                                       voting.scipy_weighted_knn,
                                       metrics.spd_mat(A), avgRuns)
        dsdcorr, dsdtotal = sim.runsim(truth, censorP,
                                       voting.scipy_weighted_knn,
                                       metrics.dsd_mat(A), avgRuns)
        rdcorr, rdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn,
                                     metrics.rd_mat(A), avgRuns)
        spdaccs.append(spdcorr / spdtotal)
        dsdaccs.append(dsdcorr / dsdtotal)
        rdaccs.append(rdcorr / rdtotal)
        r += increment
        params.append(int(r))
    plotting.plot_params_vs_accuracy(params, [spdaccs, dsdaccs, rdaccs],
                                     "Number of hubs", ["SPD", "DSD", "RD"])
    return