def generate_Erdos_Renyi(name, n_node, n_edge, directed=False, seed=None, overide=False): ''' Generates an Erdos-Renyi random graph of the specified GraphType. ''' if not overide and local_exists(name): raise Exception("graph {} exists".format(name)) else: params = { "name": "Erdos-Renyi", 'n_node': n_node, "n_edge": n_edge, "directed": directed } description = "Erdos_Renyi random graph" weighted = False gen = RandomGenerator(params=params) edges, gt = gen.generate(seed) from .dataset import Dataset return Dataset(name, description=description, groundtruthObj=gt, edgesObj=edges, \ directed=directed, weighted=weighted, overide=overide, additional_meta={'genopts': params})
def generate_undirected_unweighted_hier_random_graph_LFR(name, N, k=None, maxk=None, mu1=None, mu2=None, t1=None, t2=None, minc=None, \ maxc=None, on=None, om=None, minC=None, maxC=None, seed=None, overide=False): ''' Lancichinetti-Fortunato-Radicchi Benchmark geneartor. Original from https://sites.google.com/site/andrealancichinetti/files Parameter =========== ========================================================== -N [number of nodes] -k [average degree] -maxk [maximum degree] -t1 [minus exponent for the degree sequence] -t2 [minus exponent for the community size distribution] -minc [minimum for the micro community sizes] -maxc [maximum for the micro community sizes] -on [number of overlapping nodes] -om [number of memberships of the overlapping nodes] -minC [minimum for the macro community size] -maxC [maximum for the macro community size] -mu1 [mixing parameter for the macro communities (see Readme file)] -mu2 [mixing parameter for the micro communities (see Readme file)] =========== ========================================================== -------------------- Examples --------------------------- Example2: ./hbenchmark -f flags.dat ./hbenchmark -N 10000 -k 20 -maxk 50 -mu2 0.3 -minc 20 -maxc 50 -minC 100 -maxC 1000 -mu1 0.1 Reference Lancichinetti, Andrea, and Santo Fortunato. "Benchmarks for testing community detection algorithms on directed and weighted graphs with overlapping communities." Physical Review E 80.1 (2009): 016118. ''' if not overide and local_exists(name): raise Exception("graph {} exists".format(name)) else: params = locals() del params['overide'] description = "LFR random graph" directed = False weighted = False params['directed'] = directed params['hier'] = True params['weighted'] = weighted params['name'] = 'LFR' gen = RandomGenerator(params=params) edges, gt = gen.generate(seed) from .dataset import Dataset return Dataset(name, description=description, groundtruthObj=gt, edgesObj=edges, directed=directed, weighted=weighted, overide=overide, additional_meta={'genopts': params})
def load_sample_dataset(name, overide=False): if not overide and local_exists(name): return load_local(name) else: path = os.path.join(config.GCT_HOME, 'data', _DATASET_[name]) edges = pd.read_csv(path) gt = pd.read_csv(path.replace("_edges", '_gt')) description = "" directed = False return convert.from_edgelist(name, edges, groundtruth=gt, directed=directed, description=description, overide=overide)
def load_snap_dataset(name, overide=False): if not overide and local_exists(name): return load_local(name) else: conf = _DATASET_[name] edges = conf.get_edges() gt = conf.get_ground_truth() description = conf.description weighted = conf.weighted directed = conf.directed return Dataset(name, description=description, groundtruthObj=gt, edgesObj=edges, directed=directed, weighted=weighted, overide=overide)
def setUp(self): overide = False def f(name): overide = False return random_dataset.generate_ovp_LFR(name, N=128, k=16, maxk=16, muw=0.1, minc=32, beta=1, a=0, overide=overide) name = "test_LFR_unw_und" self.graph_unweighted_undirect = self.get_dataset(name, f) assert not self.graph_unweighted_undirect.is_directed() assert not self.graph_unweighted_undirect.is_weighted() def f2(name): return random_dataset.generate_ovp_LFR("test_LFR_w_und", N=128, k=16, maxk=16, muw=0.1, minc=32, beta=1, weighted=True, a=0, overide=overide) name = "test_LFR_w_und" self.graph_weighted_undirect = self.get_dataset(name, f) assert not self.graph_weighted_undirect.is_directed() assert self.graph_weighted_undirect.is_weighted() name = "test_LFR_w_dir" if dataset.local_exists(name): self.graph_weighted_direct = dataset.load_local(name) else: self.graph_weighted_direct = random_dataset.generate_ovp_LFR( name, N=128, k=16, maxk=16, muw=0.1, minc=32, beta=1, weighted=True, a=1, overide=overide) assert self.graph_weighted_direct.is_directed() assert self.graph_weighted_direct.is_weighted() name = "test_LFR_unw_dir" if dataset.local_exists(name): self.graph_unweighted_direct = dataset.load_local(name) else: self.graph_unweighted_direct = random_dataset.generate_ovp_LFR( name, N=128, k=16, maxk=16, muw=0.1, minc=32, beta=1, weighted=False, a=1, overide=overide) assert self.graph_unweighted_direct.is_directed() assert not self.graph_unweighted_direct.is_weighted() self.graphs = [ self.graph_unweighted_undirect, self.graph_weighted_undirect, self.graph_weighted_direct, self.graph_unweighted_direct ]
def get_dataset(self, name, fun): if dataset.local_exists(name): return dataset.load_local(name) else: return fun(name)
def generate_directed_weighted_random_graph_LFR(name, N, k=None, maxk=None, mut=None, muw=None, beta=None, t1=None, t2=None, \ minc=None, maxc=None, on=None, om=None, C=None, seed=None, overide=False): ''' Lancichinetti-Fortunato-Radicchi Benchmark geneartor. Original from https://sites.google.com/site/andrealancichinetti/files Parameter: =========== ========================================================== -N [number of nodes] -k [average in-degree] -maxk [maximum in-degree] -mut [mixing parameter for the topology] -muw [mixing parameter for the weights] -beta [exponent for the weight distribution] -t1 [minus exponent for the degree sequence] -t2 [minus exponent for the community size distribution] -minc [minimum for the community sizes] -maxc [maximum for the community sizes] -on [number of overlapping nodes] -om [number of memberships of the overlapping nodes] =========== ========================================================== -N, -k, -maxk, -muw have to be specified. For the others, the program can use default values: t1=2, t2=1, on=0, om=0, beta=1.5, mut=muw, minc and maxc will be chosen close to the degree sequence extremes. If you set a parameter twice, the latter one will be taken. -------------------- Other options --------------------------- To have a random network use: -rand Using this option will set muw=0, mut=0, and minc=maxc=N, i.e. there will be one only community. Use option -sup (-inf) if you want to produce a benchmark whose distribution of the ratio of external in-degree/total in-degree is superiorly (inferiorly) bounded by the mixing parameter. -------------------- Examples --------------------------- Example1: ./benchmark -N 1000 -k 15 -maxk 50 -muw 0.1 -minc 20 -maxc 50 Example2: ./benchmark -f flags.dat -t1 3 Reference Lancichinetti, Andrea, and Santo Fortunato. "Benchmarks for testing community detection algorithms on directed and weighted graphs with overlapping communities." Physical Review E 80.1 (2009): 016118. ''' if not overide and local_exists(name): raise Exception("graph {} exists".format(name)) else: params = locals() del params['overide'] description = "LFR random graph" directed = True weighted = True params['directed'] = directed params['weighted'] = weighted params['name'] = 'LFR' gen = RandomGenerator(params=params) edges, gt = gen.generate(seed) from .dataset import Dataset return Dataset(name, description=description, groundtruthObj=gt, edgesObj=edges, directed=directed, weighted=weighted, overide=overide)
def generate_ovp_LFR(name, N, k=None, maxk=None, mut=None, muw=None, beta=None, t1=None, t2=None, minc=None, maxc=None, on=None, om=None, C=None, a=0, weighted=False, seed=None, overide=False): ''' Extended version of the Lancichinetti-Fortunato-Radicchi Benchmark for Weighted Overlapping networks to evaluate clustering algorithms using generated ground-truth communities. Refer https://github.com/eXascaleInfolab/LFR-Benchmark_UndirWeightOvp Parameter ========== =========================================================================== -N [number of nodes] -k [average degree] -maxk [maximum degree] -mut [mixing parameter for the topology] -muw [mixing parameter for the weights] -beta [exponent for the weight distribution] -t1 [minus exponent for the degree sequence] -t2 [minus exponent for the community size distribution] -minc [minimum for the community sizes] -maxc [maximum for the community sizes] -on [number of overlapping nodes] -om [number of memberships of the overlapping nodes] -C [Average clustering coefficient] -cnl [output communities as strings of nodes (input format for NMI evaluation)] -name [base name for the output files]. It is used for the network, communities and statistics; files extensions are added automatically: .nsa - network, represented by space/tab separated arcs .nse - network, represented by space/tab separated edges {.cnl, .nmc} - communities, represented by nodes lists '.cnl' if '-cnl' is used, otherwise as a nodes membership in communities '.nmc') .nst - network statistics -seed [file name of the random seed, default: seed.txt] -a [{0, 1} yield directed network (1 - arcs) rather than undirected (0 - edges), default: 0 - edges] ========== =========================================================================== Reference Lancichinetti, Andrea, and Santo Fortunato. "Benchmarks for testing community detection algorithms on directed and weighted graphs with overlapping communities." Physical Review E 80.1 (2009): 016118. ''' if not overide and local_exists(name): raise Exception("graph {} exists".format(name)) else: params = locals() del params['overide'] params['a'] = int(a) params['name'] = 'ovp_LFR' description = "overlap LFR random graph" directed = (a > 0) params['directed'] = directed gen = RandomGenerator(params=params) edges, gt = gen.generate(seed) from .dataset import Dataset return Dataset(name, description=description, groundtruthObj=gt, edgesObj=edges, \ directed=directed, weighted=weighted, overide=overide, additional_meta={'genopts': params})