def test_cwba(): n = 1000 rho = Decimal(str(2.0)) m = Decimal(str(0.0)) censorP = 0.7 avgRuns = 10 increment = Decimal(str(1.0)) spdaccs = [] dsdaccs = [] rdaccs = [] params = [] m += increment while int(m) < 20: #A, truth = cg.construct_adj(n,float(p),float(q)) #A, truth = cg.constructWithHubs(n,float(p),float(q),r) A, truth = cwba.cwba_graph(n, int(m), float(rho)) spdcorr, spdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.spd_mat(A), avgRuns) dsdcorr, dsdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.dsd_mat(A), avgRuns) rdcorr, rdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.rd_mat(A), avgRuns) spdaccs.append(spdcorr / spdtotal) dsdaccs.append(dsdcorr / dsdtotal) rdaccs.append(rdcorr / rdtotal) m += increment params.append(int(m)) plotting.plot_params_vs_accuracy(params, [spdaccs, dsdaccs, rdaccs], "Minimum vertex degree (m)", ["SPD", "DSD", "RD"], "CWBA (" + u"\u03C1" + "=2)")
def test_cwba_inv(): n = 1000 rho_inv = Decimal(str(0.0)) m = Decimal(str(300.0)) censorP = 0.7 avgRuns = 10 increment = Decimal(str(0.05)) spdaccs = [] dsdaccs = [] rdaccs = [] params = [] rho_inv += increment while float(rho_inv) < 1: #A, truth = cg.construct_adj(n,float(p),float(q)) #A, truth = cg.constructWithHubs(n,float(p),float(q),r) A, truth = cwba.cwba_graph(n, int(m), 1 / float(rho_inv)) spdcorr, spdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.spd_mat(A), avgRuns) dsdcorr, dsdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.dsd_mat(A), avgRuns) rdcorr, rdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.rd_mat(A), avgRuns) spdaccs.append(spdcorr / spdtotal) dsdaccs.append(dsdcorr / dsdtotal) rdaccs.append(rdcorr / rdtotal) rho_inv += increment params.append(float(rho_inv)) plotting.plot_params_vs_accuracy(params, [spdaccs, dsdaccs, rdaccs], "1/rho", ["SPD", "DSD", "RD"])
def test_ncch_r(): # Initialize parameters global n,rp,censorP,avgRuns,vote p = Decimal(str(0.2)) # p=0.2,0.8 q = Decimal(str(0.5)) # q=0.5 r = 100 # r=(0,20,1), r=(100,400,50) increment = 50 accs_spd,accs_dsd,accs_rd = [],[],[] params = [] # Run simulations while r <= 400: A, truth = ncch.construct_adj(n,float(p),float(q),r,rp) correct_spd, total_spd = sim.runsim(truth, censorP, vote, metrics.spd_mat(A), avgRuns) correct_dsd, total_dsd = sim.runsim(truth, censorP, vote, metrics.dsd_mat(A), avgRuns) correct_rd, total_rd = sim.runsim(truth, censorP, vote, metrics.rd_mat(A), avgRuns) accs_spd.append(correct_spd/total_spd) accs_dsd.append(correct_dsd/total_dsd) accs_rd.append(correct_rd/total_rd) print(r) params.append(r) r += increment plotting.plot_params_vs_accuracy(params, [accs_spd, accs_dsd, accs_rd], "Number of hubs", ["SPD","DSD","RD"], "NCCH (p="+str(p)+",q="+str(q)+")") return
def test_ncch_q(): # Initialize parameters global n,r,rp,censorP,avgRuns,increment,vote p = Decimal(str(0.5)) q = Decimal(str(0.0)) accs_spd,accs_dsd,accs_rd = [],[],[] params = [] # Run simulations while float(q) <= 1: A, truth = ncch.construct_adj(n,float(p),float(q),r,rp) correct_spd, total_spd = sim.runsim(truth, censorP, vote, metrics.spd_mat(A), avgRuns) correct_dsd, total_dsd = sim.runsim(truth, censorP, vote, metrics.dsd_mat(A), avgRuns) correct_rd, total_rd = sim.runsim(truth, censorP, vote, metrics.rd_mat(A), avgRuns) accs_spd.append(correct_spd/total_spd) accs_dsd.append(correct_dsd/total_dsd) accs_rd.append(correct_rd/total_rd) print(q) params.append(float(q)) q += increment plotting.plot_params_vs_accuracy(params, [accs_spd, accs_dsd, accs_rd], "Edge deletion probability (q)", ["SPD","DSD","RD"], "NCCH (p=0.5)") return
def test_ncch_censor(): # Initialize parameters global n,r,rp,avgRuns,vote p = Decimal(str(0.8)) # p=0.8 q = Decimal(str(0.5)) # q=0.5 censorP = Decimal(str(0.1)) increment = Decimal(str(0.1)) accs_spd,accs_dsd,accs_rd = [],[],[] params = [] # Run simulations while float(censorP) < 1: A, truth = ncch.construct_adj(n,float(p),float(q),r,rp) correct_spd, total_spd = sim.runsim(truth, float(censorP), vote, metrics.spd_mat(A), avgRuns) correct_dsd, total_dsd = sim.runsim(truth, float(censorP), vote, metrics.dsd_mat(A), avgRuns) correct_rd, total_rd = sim.runsim(truth, float(censorP), vote, metrics.rd_mat(A), avgRuns) accs_spd.append(correct_spd/total_spd) accs_dsd.append(correct_dsd/total_dsd) accs_rd.append(correct_rd/total_rd) print(censorP) params.append(float(censorP)) censorP += increment plotting.plot_params_vs_accuracy(params, [accs_spd, accs_dsd, accs_rd], "Vertex label censor proportion", ["SPD","DSD","RD"], "NCCH (p="+str(p)+",q="+str(q)+")") return
def test_cwba_rhoinv(): # Initialize parameters global n, censorP, avgRuns, vote rho_inv = Decimal(str(0.05)) m = Decimal(str(300.0)) increment = Decimal(str(0.05)) accs_spd, accs_dsd, accs_rd = [], [], [] params = [] # Run simulations while float(rho_inv) <= 1: A, truth = cwba.cwba_graph(n, int(m), 1 / float(rho_inv)) correct_spd, total_spd = sim.runsim(truth, censorP, vote, metrics.spd_mat(A), avgRuns) correct_dsd, total_dsd = sim.runsim(truth, censorP, vote, metrics.dsd_mat(A), avgRuns) correct_rd, total_rd = sim.runsim(truth, censorP, vote, metrics.rd_mat(A), avgRuns) accs_spd.append(correct_spd / total_spd) accs_dsd.append(correct_dsd / total_dsd) accs_rd.append(correct_rd / total_rd) print(rho_inv) params.append(float(rho_inv)) rho_inv += increment rho_str = u"\u03C1" plotting.plot_params_vs_accuracy( params, [accs_spd, accs_dsd, accs_rd], "Inverse likeliness of clusters (1/" + rho_str + ")", ["SPD", "DSD", "RD"], "CWBA (m=300)") return
def test_cwba_m(): # Initialize parameters global n, censorP, avgRuns, vote rho = Decimal(str(2.0)) m = Decimal(str(1.0)) increment = Decimal(str(1.0)) accs_spd, accs_dsd, accs_rd = [], [], [] params = [] # Run simulations while int(m) < 20: A, truth = cwba.cwba_graph(n, int(m), float(rho)) correct_spd, total_spd = sim.runsim(truth, censorP, vote, metrics.spd_mat(A), avgRuns) correct_dsd, total_dsd = sim.runsim(truth, censorP, vote, metrics.dsd_mat(A), avgRuns) correct_rd, total_rd = sim.runsim(truth, censorP, vote, metrics.rd_mat(A), avgRuns) accs_spd.append(correct_spd / total_spd) accs_dsd.append(correct_dsd / total_dsd) accs_rd.append(correct_rd / total_rd) print(m) params.append(int(m)) m += increment rho_str = u"\u03C1" plotting.plot_params_vs_accuracy(params, [accs_spd, accs_dsd, accs_rd], "New vertex degree (m)", ["SPD", "DSD", "RD"], "CWBA (" + rho_str + "=2)") return
def test_emaileucore(): fname = "../../Data/email-Eu-core/email-Eu-core.txt" f = open(fname, 'rb') G = nx.read_edgelist(f, comments='#', delimiter=None, nodetype=int) print(len(G)) G = max(nx.connected_component_subgraphs(G), key=len) print(len(G)) f.close() truth = {} f2name = "../../Data/email-Eu-core/email-Eu-core-department-labels.txt" f2 = open(f2name, 'rb') linenum = 0 for line in f2.readlines(): line = line.decode('UTF-8') nodedept = line.rstrip().split(' ') truth[nodedept[0]] = nodedept[1] linenum += 1 print(len(truth)) f2.close() truthcopy = list(truth.keys()) for t in truthcopy: if not (G.has_node(int(t))): del truth[t] print(len(truth.keys())) mapping = {} vnum = 0 for node in G.copy(): mapping[node] = vnum vnum += 1 nx.relabel_nodes(G, mapping, copy=False) newtruth = {} for m in mapping: newtruth[mapping[m]] = truth[str(m)] print(len(newtruth)) censorP = 0.3 avgRuns = 100 dsdcorrect, dsdtotal = sim.runsim(G, newtruth, censorP, voting.weightedMajorityVote, metrics.dsdMat(G), avgRuns) print(dsdcorrect) print(dsdtotal) spdcorrect, spdtotal = sim.runsim(G, newtruth, censorP, voting.weightedMajorityVote, metrics.spdMat(G), avgRuns) print(spdcorrect) print(spdtotal) print("DSD: ", dsdcorrect / dsdtotal) print("SPD: ", spdcorrect / spdtotal) return
def threeDplot(): n = 250 censorP = 0.7 avgRuns = 5 Xs, Ys, Zs = [], [], [] p, q = Decimal(str(0.0)), Decimal(str(0.0)) increment = 0.05 while p <= 1: while q <= 1: Xs.append(float(p)) Ys.append(float(q)) # G, truth = cg.construct_adj(n, p, q) correct, total = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.dsd_mat(G), avgRuns) acc = correct / total # Zs.append(float(acc)) q += Decimal(str(increment)) p += Decimal(str(increment)) q = Decimal(str(0.0)) fig = plt.figure() ax = plt.axes(projection='3d') ax.scatter3D(Xs, Ys, Zs, cmap='Greens') ax.set_xlabel('x') ax.set_ylabel('y') ax.set_zlabel('z') plt.show() return Xs, Ys, Zs
def test_rw(truth, spd, dsd, rd, censor_rate=0.7, k=20, n_runs=10, verbose=False): '''Test a real-world data set using random censoring over multiple runs. Truth should be a dict, and spd, dsd, and rd should be matrices. ''' dsd_corr, dsd_total = sim.runsim(truth, censor_rate, voting.scipy_weighted_knn, dsd, k=k, avg_runs=n_runs) if verbose: print('DSD: %.2f (%d/%d)' % (dsd_corr / dsd_total, dsd_corr, dsd_total)) spd_corr, spd_total = sim.runsim(truth, censor_rate, voting.scipy_weighted_knn, spd, k=k, avg_runs=n_runs) if verbose: print('SPD: %.2f (%d/%d)' % (spd_corr / spd_total, spd_corr, spd_total)) rd_corr, rd_total = sim.runsim(truth, censor_rate, voting.scipy_weighted_knn, rd, k=k, avg_runs=n_runs) if verbose: print('RD: %.2f (%d/%d)' % (rd_corr / rd_total, rd_corr, rd_total)) return (spd_corr / spd_total, dsd_corr / dsd_total, rd_corr / rd_total)
def test(): n = 250 p = Decimal(str(0.0)) q = Decimal(str(0.0)) censorP = 0.3 avgRuns = 5 increment = Decimal(str(0.05)) spdaccs1 = [] spdaccs2 = [] dsdaccs = [] rdaccs = [] params = [] r = 100 q = Decimal(str(0.5)) while float(p) <= 1: A, truth = cg.construct_adj(n, float(p), float(q)) #A, truth = cg.constructWithHubs(n,float(p),float(q),r) #spdcorr1, spdtotal1 = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.spd_mat(A), avgRuns) spdcorr1, spdtotal1 = sim.runsim(truth, censorP, voting.sklearn_weighted_knn, metrics.dsd_mat(A), avgRuns) spdcorr2, spdtotal2 = sim.runsim(truth, censorP, voting.knn_weighted_majority_vote, metrics.dsd_mat(A), avgRuns) #dsdcorr, dsdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.dsd_mat(A), avgRuns) #rdcorr, rdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.rd_mat(A), avgRuns) spdaccs1.append(spdcorr1 / spdtotal1) spdaccs2.append(spdcorr2 / spdtotal2) #dsdaccs.append(dsdcorr/dsdtotal) #rdaccs.append(rdcorr/rdtotal) print(p) print(spdcorr1 / spdtotal1) print(spdcorr2 / spdtotal2) p += increment params.append(float(p)) #plotting.plot_params_vs_accuracy(params, [spdaccs, dsdaccs, rdaccs], "Edge addition probability (p)", ["SPD","DSD", "RD"], "NCC (q=0.5)") plotting.plot_params_vs_accuracy(params, [spdaccs1, spdaccs2], "Edge addition probability (p)", ["SPD Scipy", "SPD Mine"], "NCC (q=0.5, censorP=0.6)") return
def test_karateClubGraph(): G = nx.karate_club_graph() truth = {} for v in G: truth[v] = G.nodes[v]['club'] censorP = 0.3 avgRuns = 1000 #nx.draw_networkx(G) #plt.show() dsdcorrect, dsdtotal = sim.runsim(G, truth, censorP, voting.weightedMajorityVote, metrics.dsdMat(G), avgRuns) print(dsdcorrect) print(dsdtotal) spdcorrect, spdtotal = sim.runsim(G, truth, censorP, voting.weightedMajorityVote, metrics.spdMat(G), avgRuns) print(spdcorrect) print(spdtotal) print("DSD: ", dsdcorrect / dsdtotal) print("SPD: ", spdcorrect / spdtotal) return
def test_censor(): n = 250 p = Decimal(str(0.3)) q = Decimal(str(0.5)) censorP = Decimal(str(0.05)) avgRuns = 10 increment = Decimal(str(0.05)) spdaccs = [] dsdaccs = [] rdaccs = [] params = [] r = 100 #p += increment while float(censorP) < 1: A, truth = cg.construct_adj(n, float(p), float(q)) #A, truth = cg.constructWithHubs(n,float(p),float(q),r) spdcorr, spdtotal = sim.runsim(truth, float(censorP), voting.scipy_weighted_knn, metrics.spd_mat(A), avgRuns) dsdcorr, dsdtotal = sim.runsim(truth, float(censorP), voting.scipy_weighted_knn, metrics.dsd_mat(A), avgRuns) rdcorr, rdtotal = sim.runsim(truth, float(censorP), voting.scipy_weighted_knn, metrics.rd_mat(A), avgRuns) spdaccs.append(spdcorr / spdtotal) dsdaccs.append(dsdcorr / dsdtotal) rdaccs.append(rdcorr / rdtotal) print(censorP) censorP += increment params.append(float(censorP)) plotting.plot_params_vs_accuracy(params, [spdaccs, dsdaccs, rdaccs], "Censoring probability (censorP)", ["SPD", "DSD", "RD"], "NCC (q=0.5)") return
def test_cg(): n = 250 p = Decimal(str(0.0)) q = Decimal(str(0.0)) censorP = 0.6 avgRuns = 10 increment = Decimal(str(0.05)) spdaccs = [] dsdaccs = [] rdaccs = [] params = [] r = 100 #p += increment while float(p) <= 1: q = Decimal(str(0.5)) A, truth = cg.construct_adj(n, float(p), float(q)) #A, truth = cg.constructWithHubs(n,float(p),float(q),r) spdcorr, spdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.spd_mat(A), avgRuns) dsdcorr, dsdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.dsd_mat(A), avgRuns) rdcorr, rdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.rd_mat(A), avgRuns) spdaccs.append(spdcorr / spdtotal) dsdaccs.append(dsdcorr / dsdtotal) rdaccs.append(rdcorr / rdtotal) print(p) p += increment params.append(float(p)) plotting.plot_params_vs_accuracy(params, [spdaccs, dsdaccs, rdaccs], "Edge addition probability (p)", ["SPD", "DSD", "RD"], "NCC (q=0.5)") p = Decimal(str(0.5)) q = Decimal(str(0.0)) spdaccs = [] dsdaccs = [] rdaccs = [] params = [] q += increment while float(q) <= 1: A, truth = cg.construct_adj(n, float(p), float(q)) #A, truth = cg.constructWithHubs(n,float(p),float(q),r) spdcorr, spdtotal = sim.runsim(truth, censorP, voting.knn_weighted_majority_vote, metrics.spd_mat(A), avgRuns) dsdcorr, dsdtotal = sim.runsim(truth, censorP, voting.knn_weighted_majority_vote, metrics.dsd_mat(A), avgRuns) rdcorr, rdtotal = sim.runsim(truth, censorP, voting.knn_weighted_majority_vote, metrics.rd_mat(A), avgRuns) spdaccs.append(spdcorr / spdtotal) dsdaccs.append(dsdcorr / dsdtotal) rdaccs.append(rdcorr / rdtotal) print(q) q += increment params.append(float(q)) plotting.plot_params_vs_accuracy(params, [spdaccs, dsdaccs, rdaccs], "Edge deletion probability (q)", ["SPD", "DSD", "RD"], "NCCH (p=0.5, number of hubs=100)") return
def test_cg_h(): n = 250 p = Decimal(str(0.5)) q = Decimal(str(0.5)) censorP = 0.7 avgRuns = 10 increment = Decimal(str(1.0)) spdaccs = [] dsdaccs = [] rdaccs = [] params = [] r = Decimal(str(0.0)) r += increment while float(r) <= 20: #A, truth = cg.construct_adj(n,float(p),float(q)) A, truth = cg.constructWithHubs(n, float(p), float(q), int(r)) spdcorr, spdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.spd_mat(A), avgRuns) dsdcorr, dsdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.dsd_mat(A), avgRuns) rdcorr, rdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.rd_mat(A), avgRuns) spdaccs.append(spdcorr / spdtotal) dsdaccs.append(dsdcorr / dsdtotal) rdaccs.append(rdcorr / rdtotal) r += increment params.append(int(r)) plotting.plot_params_vs_accuracy(params, [spdaccs, dsdaccs, rdaccs], "Number of hubs", ["SPD", "DSD", "RD"]) increment = Decimal(str(50.0)) spdaccs = [] dsdaccs = [] rdaccs = [] params = [] r = Decimal(str(50.0)) r += increment while float(r) <= 400: #A, truth = cg.construct_adj(n,float(p),float(q)) A, truth = cg.constructWithHubs(n, float(p), float(q), int(r)) spdcorr, spdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.spd_mat(A), avgRuns) dsdcorr, dsdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.dsd_mat(A), avgRuns) rdcorr, rdtotal = sim.runsim(truth, censorP, voting.scipy_weighted_knn, metrics.rd_mat(A), avgRuns) spdaccs.append(spdcorr / spdtotal) dsdaccs.append(dsdcorr / dsdtotal) rdaccs.append(rdcorr / rdtotal) r += increment params.append(int(r)) plotting.plot_params_vs_accuracy(params, [spdaccs, dsdaccs, rdaccs], "Number of hubs", ["SPD", "DSD", "RD"]) return
def test_coauthorship(): """ http://konect.uni-koblenz.de/networks/com-dblp """ fname = "../../Data/com-dblp.ungraph.txt/com-dblp.ungraph.txt" f = open(fname, 'rb') G = nx.read_edgelist(f, comments='#', delimiter=None, nodetype=int) #G = max(nx.connected_component_subgraphs(G), key=len) f.close() #print(len(G)) truth = {} f2name = "../../Data/com-dblp.top5000.cmty.txt/com-dblp.top5000.cmty.txt" f2 = open(f2name, 'rb') linenum = 0 for line in f2.readlines(): if linenum > 500: break line = line.decode('UTF-8') community = line.rstrip().split('\t') for c in community: truth[c] = linenum linenum += 1 #print(len(truth)) f2.close() print(len(G)) print(len(truth.keys())) for v in (G.copy()): if not (str(v) in truth): G.remove_node(v) G = max(nx.connected_component_subgraphs(G), key=len) print(len(G)) mapping = {} vnum = 0 for node in G.copy(): mapping[node] = vnum vnum += 1 nx.relabel_nodes(G, mapping, copy=False) newtruth = {} for m in mapping: newtruth[mapping[m]] = truth[str(m)] print(len(newtruth)) censorP = 0.3 avgRuns = 100 dsdcorrect, dsdtotal = sim.runsim(G, newtruth, censorP, voting.weightedMajorityVote, metrics.dsdMat(G), avgRuns) print(dsdcorrect) print(dsdtotal) spdcorrect, spdtotal = sim.runsim(G, newtruth, censorP, voting.weightedMajorityVote, metrics.spdMat(G), avgRuns) print(spdcorrect) print(spdtotal) print("DSD: ", dsdcorrect / dsdtotal) print("SPD: ", spdcorrect / spdtotal) return
def test_completeGraphsWithHubs(n, p, q, r, censorP, vote, metric, avgRuns): G, truth = cg.constructWithHubs(n, p, q, r) correct, total = sim.runsim(G, truth, censorP, vote, metric(G), avgRuns) return correct / total
def test_completeGraphs(n, p, q, censorP, vote, metric, avgRuns): A, truth = cg.construct_adj(n, p, q) correct, total = sim.runsim(truth, censorP, vote, metric(A), avgRuns) return correct / total