def run_alg(alg, dsname, check_skip=True): try: runname = alg if not check_skip or (check_skip and not has_run(runname, dsname)): print("runing ", alg, dsname) if alg == 'cdc_SVINET': gct.run_alg(runname=runname, data=gct.load_local_graph(dsname), algname=alg, seed=123, max_iterations=1000) else: gct.run_alg(runname=runname, data=gct.load_local_graph(dsname), algname=alg, seed=123) print("finished", alg, dsname) else: print("skip", runname, dsname) except: print("except", runname, dsname) fpath = os.path.join("/{}/tmp/{}_{}.done".format('data', runname, dsname)) open(fpath, 'a').close() return True
def test_1(self): bad_algs = [ 'igraph_community_optimal_modularity', #too slow 'scan_AnyScan_ScanIdealPar', #never finish ] runned_algs = [] algs = gct.list_algorithms() algs = [u for u in algs if u not in bad_algs and u not in runned_algs] datasets = [self.prefix + "_1", self.prefix + "_2"] for dsname in datasets: for alg in algs: runname = alg if not self.has_run(runname, dsname): print("runing ", alg, dsname) gct.run_alg(runname=runname, data=gct.load_local_graph(dsname), algname=alg, seed=123) runned_algs.append(alg) print("AAAA", runned_algs) results = {} d = gct.list_all_clustering_results(print_format=False) for u in d: if self.prefix + "_" in u: for run in d[u]: a = gct.load_clustering_result(u, run) print(u, run) results[u + run] = a import pandas as pd lst = [] for alg in algs: for dsname in datasets: if dsname.startswith(self.prefix + '_1'): a = [] columns = [] dsname2 = self.prefix + "_2" a += [alg, dsname, dsname2] columns += ['alg', 'data1', 'data2'] if dsname + alg in results and dsname2 + alg in results: cluster1 = gct.to_cluster(results[dsname + alg]) cluster2 = gct.to_cluster(results[dsname2 + alg]) compa = gct.ClusterComparator(cluster1, cluster2) gt = list( gct.load_local_graph( dsname).get_ground_truth().values())[0] a += [ compa.sklean_nmi(), gt.is_overlap, cluster1.is_overlap, cluster2.is_overlap, cluster1.num_cluster, cluster2.num_cluster, gt.num_cluster ] columns += [ 'nmi_12', 'ovp_gt', 'ovp1', 'ovp2', "#c1", "#c2", '#c_gt' ] compa = gct.ClusterComparator(gt, cluster1) a += [compa.sklean_nmi()] columns += ['nmi_t1'] compa = gct.ClusterComparator(gt, cluster2) a += [compa.sklean_nmi()] columns += ['nmi_t2'] lst.append(a) else: bad_algs.append(alg) for alg in bad_algs: lst.append([alg] + [None] * (len(columns) - 1)) rdf = pd.DataFrame(lst, columns=columns) with pd.option_context('display.max_rows', 2000, 'display.max_columns', 200): print(rdf) rdf.to_csv(self.prefix + ".csv", index=None)
def test_1(self): bad_algs = [ 'igraph_community_optimal_modularity', #too slow 'scan_Scanpp', #never finish ] # these alg failed for this test runned_algs = [] algs = gct.list_algorithms() algs = [u for u in algs if u not in bad_algs and u not in runned_algs] datasets = [self.prefix + "_w1", self.prefix + "_uw1"] for dsname in datasets: for alg in algs: runname = alg if not self.has_run(runname, dsname): print ("runing ", alg, dsname) if alg =='cdc_SVINET': gct.run_alg(runname=runname, data=gct.load_local_graph(dsname), algname=alg, seed=123, max_iterations=1000) else: gct.run_alg(runname=runname, data=gct.load_local_graph(dsname), algname=alg, seed=123) runned_algs.append(alg) print ("AAAA", runned_algs) results = {} d = gct.list_all_clustering_results(print_format=False) for u in d: if self.prefix + "_" in u: for run in d[u]: a = gct.load_clustering_result(u, run) print (u, run) results[u + run] = a import pandas as pd lst = []; for alg in algs: for dsname in datasets: if dsname.startswith(self.prefix + '_uw'): a = [];columns = [] dsname2 = dsname.replace(self.prefix + "_uw", self.prefix + "_w") a += [alg, dsname, dsname2] columns += ['alg', 'data1', 'data2'] cluster1 = gct.to_cluster(results[dsname + alg]) cluster2 = gct.to_cluster(results[dsname2 + alg]) compa = gct.ClusterComparator(cluster1, cluster2) a += [compa.sklean_nmi(), cluster1.is_overlap, cluster2.is_overlap, cluster1.num_cluster, cluster2.num_cluster, compa.OvpNMI()['NMImax']] columns += ['nmi_12', 'ovp1', 'ovp2', "#c1", "#c2", 'ovpnmi_12'] gt = list(gct.load_local_graph(dsname).get_ground_truth().values())[0] compa = gct.ClusterComparator(gt, cluster1) a += [compa.sklean_nmi(), compa.OvpNMI()['NMImax']] columns += ['nmi_t1', 'ovpnmi_t1'] compa = gct.ClusterComparator(gt, cluster2) a += [compa.sklean_nmi(), compa.OvpNMI()['NMImax']] columns += ['nmi_t2', 'ovpnmi_t2'] lst.append(a) for alg in bad_algs: lst.append([alg] + [None] * (len(columns) - 1)) rdf = pd.DataFrame(lst, columns=columns) with pd.option_context('display.max_rows', 2000, 'display.max_columns', 200): print (rdf)