def benchmark(N, k, maxk, gamma, beta, mu, minc=None, maxc=None, pause=False): """Unweighted, Undirected, Non-overlapping benchmark graph. This is the graph supporting Lancichenetti, Fortunato, Radicci, PRE 78 046110 (2008). Arguments are: N # number of nodes k # average degree maxk # maximum degree gamma # exponent for the degree distribution beta # exponent for the community size distribution mu # mixing parameter minc # minimum for the community sizes (optional) maxc # maximum for the community sizes (optional) Example parameters: N=1000, k=15, maxk=100, gamma=2, beta=1, mu=.1 """ if minc is None: minc = "" if maxc is None: maxc = "" params = textwrap.dedent("""\ %(N)s %(k)s %(maxk)s %(gamma)s %(beta)s %(mu)s %(minc)s %(maxc)s """ % locals()) prog = _get_file('lfr_benchmarks/new/benchmark_2_2/benchmark') kwargs = {} args = [prog] + makeargs(kwargs) print "Arguments are: ", " ".join(args) with pcd.util.tmpdir_context(chdir=True, prefix='tmp-lfrbenchmark', dir=tmpbase) as tmpdir: open('parameters.dat', 'w').write(params) retcode = subprocess.call(args) assert retcode == 0 g = networkx.Graph() for n, c in read_file('community.dat'): g.add_node(n - 1, cmty=c - 1) for n1, n2 in read_file('network.dat'): g.add_edge(n1 - 1, n2 - 1) g.graph['statistics'] = open('statistics.dat').read() #g.graph['stats'] = stats(g) if pause: import fitz.interact fitz.interact.interact() return g
def benchmark(N, k, maxk, gamma, beta, mu, minc=None, maxc=None, pause=False): """Unweighted, Undirected, Non-overlapping benchmark graph. This is the graph supporting Lancichenetti, Fortunato, Radicci, PRE 78 046110 (2008). Arguments are: N # number of nodes k # average degree maxk # maximum degree gamma # exponent for the degree distribution beta # exponent for the community size distribution mu # mixing parameter minc # minimum for the community sizes (optional) maxc # maximum for the community sizes (optional) Example parameters: N=1000, k=15, maxk=100, gamma=2, beta=1, mu=.1 """ if minc is None: minc = "" if maxc is None: maxc = "" params = textwrap.dedent("""\ %(N)s %(k)s %(maxk)s %(gamma)s %(beta)s %(mu)s %(minc)s %(maxc)s """%locals()) prog = _get_file('lfr_benchmarks/new/benchmark_2_2/benchmark') kwargs = { } args = [ prog ] + makeargs(kwargs) print "Arguments are: ", " ".join(args) with pcd.util.tmpdir_context(chdir=True, prefix='tmp-lfrbenchmark', dir=tmpbase) as tmpdir: open('parameters.dat', 'w').write(params) retcode = subprocess.call(args) assert retcode == 0 g = networkx.Graph() for n, c in read_file('community.dat'): g.add_node(n-1, cmty=c-1) for n1, n2 in read_file('network.dat'): g.add_edge(n1-1, n2-1) g.graph['statistics'] = open('statistics.dat').read() #g.graph['stats'] = stats(g) if pause: import fitz.interact ; fitz.interact.interact() return g
def hierarchical(pause=False, **kwargs): """Binary networks with overlapping nodes and hierarchies This program is an implementation of the algorithm described in the paper'Direc ted, weighted and overlapping benchmark graphs for community detection algorithm s', written by Andrea Lancichinetti and Santo Fortunato. In particular, this program is to produce binary networks with overlapping nodes and hierarchies. -N [number of nodes] -k [average degree] -maxk [maximum degree] -t1 [minus exponent for the degree sequence] -t2 [minus exponent for the community size distribution] -minc [minimum for the micro community sizes] -maxc [maximum for the micro community sizes] -on [number of overlapping nodes] -om [number of memberships of the overlapping nodes] -minC [minimum for the macro community size] -maxC [maximum for the macro community size] -mu1 [mixing parameter for the macro communities (see Readme file)] -mu2 [mixing parameter for the micro communities (see Readme file)] Example2: ./hbenchmark -f flags.dat ./hbenchmark -N 10000 -k 20 -maxk 50 -mu2 0.3 -minc 20 -maxc 50 -minC 100 -maxC 1000 -mu1 0.1 """ prog = _get_file('lfr_benchmarks/new/hierarchical_bench2_2/hbenchmark') args = [prog] + makeargs(kwargs) print "Arguments are: ", " ".join(args) with tmpdir_context(chdir=True, prefix="tmp-lfrbenchmark", dir=tmpbase): retcode = subprocess.call(args) assert retcode == 0 g = networkx.Graph() for n, c in read_file('community_first_level.dat'): g.add_node(n - 1, microC=c - 1) for n, c in read_file('community_second_level.dat'): g.add_node(n - 1, macroC=c - 1) for n1, n2 in read_file('network.dat'): g.add_edge(n1 - 1, n2 - 1) g.graph['stats'] = stats(g) if pause: import fitz.interact fitz.interact.interact() return g
def binary(pause=False, **kwargs): """Binary networks with overlapping nodes. -N [number of nodes] -k [average degree] -maxk [maximum degree] -mu [mixing parameter] -t1 [minus exponent for the degree sequence] -t2 [minus exponent for the community size distribution] -minc [minimum for the community sizes] -maxc [maximum for the community sizes] -on [number of overlapping nodes] -om [number of memberships of the overlapping nodes] -C [average clustering coefficient] -N, -k, -maxk, -mu have to be specified. For the others, the program can use default values: t1=2, t2=1, on=0, om=0, minc and maxc will be chosen close to the degree sequence extremes. If you set a parameter twice, the latter one will be taken. """ prog = _get_file('lfr_benchmarks/new/binary_networks/benchmark') args = [prog] + makeargs(kwargs) print "Arguments are: ", " ".join(args) with pcd.util.tmpdir_context(chdir=True, prefix="tmp-lfrbenchmark", dir=tmpbase): retcode = subprocess.call(args) assert retcode == 0 g = networkx.Graph() #for n, c in read_file('community.dat'): # g.add_node(n-1, cmty=c-1) for x in read_file('community.dat'): n, cmtys = x[0], x[1:] cmtys = [c - 1 for c in cmtys] g.add_node(n - 1, cmtys=cmtys) for n1, n2 in read_file('network.dat'): g.add_edge(n1 - 1, n2 - 1) g.graph['statistics'] = open('statistics.dat').read() #g.graph['stats'] = stats(g) if pause: import fitz.interact fitz.interact.interact() return g
def hierarchical(pause=False, **kwargs): """Binary networks with overlapping nodes and hierarchies This program is an implementation of the algorithm described in the paper'Direc ted, weighted and overlapping benchmark graphs for community detection algorithm s', written by Andrea Lancichinetti and Santo Fortunato. In particular, this program is to produce binary networks with overlapping nodes and hierarchies. -N [number of nodes] -k [average degree] -maxk [maximum degree] -t1 [minus exponent for the degree sequence] -t2 [minus exponent for the community size distribution] -minc [minimum for the micro community sizes] -maxc [maximum for the micro community sizes] -on [number of overlapping nodes] -om [number of memberships of the overlapping nodes] -minC [minimum for the macro community size] -maxC [maximum for the macro community size] -mu1 [mixing parameter for the macro communities (see Readme file)] -mu2 [mixing parameter for the micro communities (see Readme file)] Example2: ./hbenchmark -f flags.dat ./hbenchmark -N 10000 -k 20 -maxk 50 -mu2 0.3 -minc 20 -maxc 50 -minC 100 -maxC 1000 -mu1 0.1 """ prog = _get_file('lfr_benchmarks/new/hierarchical_bench2_2/hbenchmark') args = [ prog ] + makeargs(kwargs) print "Arguments are: ", " ".join(args) with tmpdir_context(chdir=True, prefix="tmp-lfrbenchmark", dir=tmpbase): retcode = subprocess.call(args) assert retcode == 0 g = networkx.Graph() for n, c in read_file('community_first_level.dat'): g.add_node(n-1, microC=c-1) for n, c in read_file('community_second_level.dat'): g.add_node(n-1, macroC=c-1) for n1, n2 in read_file('network.dat'): g.add_edge(n1-1, n2-1) g.graph['stats'] = stats(g) if pause: import fitz.interact ; fitz.interact.interact() return g
def binary(pause=False, **kwargs): """Binary networks with overlapping nodes. -N [number of nodes] -k [average degree] -maxk [maximum degree] -mu [mixing parameter] -t1 [minus exponent for the degree sequence] -t2 [minus exponent for the community size distribution] -minc [minimum for the community sizes] -maxc [maximum for the community sizes] -on [number of overlapping nodes] -om [number of memberships of the overlapping nodes] -C [average clustering coefficient] -N, -k, -maxk, -mu have to be specified. For the others, the program can use default values: t1=2, t2=1, on=0, om=0, minc and maxc will be chosen close to the degree sequence extremes. If you set a parameter twice, the latter one will be taken. """ prog = _get_file('lfr_benchmarks/new/binary_networks/benchmark') args = [ prog ] + makeargs(kwargs) print "Arguments are: ", " ".join(args) with pcd.util.tmpdir_context(chdir=True, prefix="tmp-lfrbenchmark", dir=tmpbase): retcode = subprocess.call(args) assert retcode == 0 g = networkx.Graph() #for n, c in read_file('community.dat'): # g.add_node(n-1, cmty=c-1) for x in read_file('community.dat'): n, cmtys = x[0], x[1:] cmtys = [c-1 for c in cmtys] g.add_node(n-1, cmtys=cmtys) for n1, n2 in read_file('network.dat'): g.add_edge(n1-1, n2-1) g.graph['statistics'] = open('statistics.dat').read() #g.graph['stats'] = stats(g) if pause: import fitz.interact ; fitz.interact.interact() return g
def weighted(pause=False, **kwargs): """Undirected weighted networks with overlapping nodes. , This program is an implementation of the algorithm described in the paper\"Directed, weighted and overlapping benchmark graphs for community detection algorithms\", written by Andrea Lancichinetti and Santo Fortunato. In particular, this program is to produce undirected weighted networks with overlapping nodes. Each feedback is very welcome. If you have found a bug or have problems, or want to give advises, please contact us: -N [number of nodes] -k [average degree] -maxk [maximum degree] -mut [mixing parameter for the topology] -muw [mixing parameter for the weights] -beta [exponent for the weight distribution] -t1 [minus exponent for the degree sequence] -t2 [minus exponent for the community size distribution] -minc [minimum for the community sizes] -maxc [maximum for the community sizes] -on [number of overlapping nodes] -om [number of memberships of the overlapping nodes] -C [average clustering coefficient] -N, -k, -maxk, -muw have to be specified. For the others, the program can use default values: t1=2, t2=1, on=0, om=0, beta=1.5, mut=muw, minc and maxc will be chosen close to the degree sequence extremes. If you set a parameter twice, the latter one will be taken. To have a random network use: -rand Using this option will set muw=0, mut=0, and minc=maxc=N, i.e. there will be one only community. Use option -sup (-inf) if you want to produce a benchmark whose distribution of the ratio of external degree/total degree is superiorly (inferiorly) bounded by the mixing parameter. The flag -C is not mandatory. If you use it, the program will perform a number of rewiring steps to increase the average cluster coefficient up to the wished value. Since other constraints must be fulfilled, if the wished value will not be reached after a certain time, the program will stop (displaying a warning). Example1: ./benchmark -N 1000 -k 15 -maxk 50 -muw 0.1 -minc 20 -maxc 50 Example2: ./benchmark -f flags.dat -t1 3 """ prog = _get_file('lfr_benchmarks/new/weighted_networks/benchmark') args = [ prog ] + makeargs(kwargs) print "Arguments are: ", " ".join(args) with pcd.util.tmpdir_context(chdir=True, prefix='tmp-lfrbenchmark', dir=tmpbase): retcode = subprocess.call(args) assert retcode == 0 g = networkx.Graph() for x in read_file('community.dat'): n, cmtys = x[0], x[1:] cmtys = [c-1 for c in cmtys] g.add_node(n-1, cmtys=cmtys) for n1, n2, weight in read_file('network.dat'): g.add_edge(n1-1, n2-1, weight=weight) g.graph['statistics'] = open('statistics.dat').read() #g.graph['stats'] = stats(g) if pause: import fitz.interact ; fitz.interact.interact() return g
def nmi_LFK_LF(cmtys1, cmtys2, check=True, use_existing=False): """Compute NMI using the overlap-including definition. This uses the external code 'mutual3/mutual' to calculate the overlap-using In. If the communities object is a pcd.cmty.CommunityFile, has a fname attribute, and it exists and doesn't end in .gz or .bz2, and the argument use_existing is true, then this file will be used for the input to the NMI code. The NMI code is very dumb, and does not remove comments, and uses only floating-point or integer node IDs, and does not give any type of error if these conditions are not met. Thus, only enable use_existing if you can vouch for the. check: bool, default True If true, check that all node names are either representable as integers or floating point numbers. This can either be python integers or floats, or strings of those. use_existing: bool, default False If true, and community object has a '.fname' attribute, and that file exists, use that as the input filename instead of re-writing the file. WARNING: if you use this, you must ensure that there are no comments within the file, or anything else which make cause the program to break. """ from pcd.support.algorithms import _get_file from pcd.cmty import CommunityFile binary = _get_file('mutual3/mutual') def _is_float_str(x): """Is this a string representation of a float?""" try: float(x) return True except ValueError: return False # check that community files are valid for the program: if check: for nodes in cmtys1.itervalues(): assert all(isinstance(x, (int, float)) or _is_float_str(x) for x in nodes ) for nodes in cmtys2.itervalues(): assert all(isinstance(x, (int, float)) or _is_float_str(x) for x in nodes ) # We must use os.path.abspath *outside* of the tmpdir_context, or # else the absolute path will be wrong. args = [ binary ] if (use_existing and isinstance(cmtys1, CommunityFile) and hasattr(cmtys1, 'fname') and os.path.exists(cmtys1.fname) and not (cmtys1.fname.endswith('.bz2') or cmtys1.fname.endswith('.gz'))): args.append(os.path.abspath(cmtys1.fname)) else: args.append(None) if (use_existing and isinstance(cmtys1, CommunityFile) and hasattr(cmtys2, 'fname') and os.path.exists(cmtys2.fname) and not (cmtys2.fname.endswith('.bz2') or cmtys2.fname.endswith('.gz'))): args.append(os.path.abspath(cmtys2.fname)) else: args.append(None) with pcd.util.tmpdir_context(chdir=True, dir='.', prefix='tmp-nmi-'): # Write community files, if they do not already exist. These # must be written inside of the tmpdir_context context because # only in here does it know the right if args[1] is None: cmtys1.write_clusters('cmtys1.txt', raw=True) args[1] = 'cmtys1.txt' #else: # # Symlink it instead of run in-place (program can fail if # # filenames are weird!) # os.symlink(args[1], 'cmtys1.txt') # args[1] = 'cmtys1.txt' if args[2] is None: cmtys2.write_clusters('cmtys2.txt', raw=True) args[2] = 'cmtys2.txt' #else: # os.symlink(args[2], 'cmtys2.txt') # args[1] = 'cmtys2.txt' p = subprocess.Popen(args, stdout=subprocess.PIPE) ret = p.wait() stdout = p.stdout.read() if ret != 0: print stdout raise RuntimeError("The program '%s' returned non-zero: %s"%( args[0], ret)) nmi = float(stdout.split(':', 1)[1]) return nmi
def weighted(pause=False, **kwargs): """Undirected weighted networks with overlapping nodes. , This program is an implementation of the algorithm described in the paper\"Directed, weighted and overlapping benchmark graphs for community detection algorithms\", written by Andrea Lancichinetti and Santo Fortunato. In particular, this program is to produce undirected weighted networks with overlapping nodes. Each feedback is very welcome. If you have found a bug or have problems, or want to give advises, please contact us: -N [number of nodes] -k [average degree] -maxk [maximum degree] -mut [mixing parameter for the topology] -muw [mixing parameter for the weights] -beta [exponent for the weight distribution] -t1 [minus exponent for the degree sequence] -t2 [minus exponent for the community size distribution] -minc [minimum for the community sizes] -maxc [maximum for the community sizes] -on [number of overlapping nodes] -om [number of memberships of the overlapping nodes] -C [average clustering coefficient] -N, -k, -maxk, -muw have to be specified. For the others, the program can use default values: t1=2, t2=1, on=0, om=0, beta=1.5, mut=muw, minc and maxc will be chosen close to the degree sequence extremes. If you set a parameter twice, the latter one will be taken. To have a random network use: -rand Using this option will set muw=0, mut=0, and minc=maxc=N, i.e. there will be one only community. Use option -sup (-inf) if you want to produce a benchmark whose distribution of the ratio of external degree/total degree is superiorly (inferiorly) bounded by the mixing parameter. The flag -C is not mandatory. If you use it, the program will perform a number of rewiring steps to increase the average cluster coefficient up to the wished value. Since other constraints must be fulfilled, if the wished value will not be reached after a certain time, the program will stop (displaying a warning). Example1: ./benchmark -N 1000 -k 15 -maxk 50 -muw 0.1 -minc 20 -maxc 50 Example2: ./benchmark -f flags.dat -t1 3 """ prog = _get_file('lfr_benchmarks/new/weighted_networks/benchmark') args = [prog] + makeargs(kwargs) print "Arguments are: ", " ".join(args) with pcd.util.tmpdir_context(chdir=True, prefix='tmp-lfrbenchmark', dir=tmpbase): retcode = subprocess.call(args) assert retcode == 0 g = networkx.Graph() for x in read_file('community.dat'): n, cmtys = x[0], x[1:] cmtys = [c - 1 for c in cmtys] g.add_node(n - 1, cmtys=cmtys) for n1, n2, weight in read_file('network.dat'): g.add_edge(n1 - 1, n2 - 1, weight=weight) g.graph['statistics'] = open('statistics.dat').read() #g.graph['stats'] = stats(g) if pause: import fitz.interact fitz.interact.interact() return g