Exemple #1
0
def benchmark(N, k, maxk, gamma, beta, mu, minc=None, maxc=None, pause=False):
    """Unweighted, Undirected, Non-overlapping benchmark graph.

    This is the graph supporting Lancichenetti, Fortunato, Radicci,
    PRE 78 046110 (2008).

    Arguments are:

    N     # number of nodes
    k     # average degree
    maxk  # maximum degree
    gamma # exponent for the degree distribution
    beta  # exponent for the community size distribution
    mu    # mixing parameter
    minc  # minimum for the community sizes (optional)
    maxc  # maximum for the community sizes (optional)

    Example parameters:
    N=1000, k=15, maxk=100, gamma=2, beta=1, mu=.1
    """

    if minc is None: minc = ""
    if maxc is None: maxc = ""

    params = textwrap.dedent("""\
    %(N)s
    %(k)s
    %(maxk)s
    %(gamma)s
    %(beta)s
    %(mu)s
    %(minc)s
    %(maxc)s
    """ % locals())

    prog = _get_file('lfr_benchmarks/new/benchmark_2_2/benchmark')
    kwargs = {}
    args = [prog] + makeargs(kwargs)
    print "Arguments are: ", " ".join(args)

    with pcd.util.tmpdir_context(chdir=True,
                                 prefix='tmp-lfrbenchmark',
                                 dir=tmpbase) as tmpdir:
        open('parameters.dat', 'w').write(params)

        retcode = subprocess.call(args)
        assert retcode == 0

        g = networkx.Graph()
        for n, c in read_file('community.dat'):
            g.add_node(n - 1, cmty=c - 1)
        for n1, n2 in read_file('network.dat'):
            g.add_edge(n1 - 1, n2 - 1)
        g.graph['statistics'] = open('statistics.dat').read()
        #g.graph['stats'] = stats(g)

        if pause:
            import fitz.interact
            fitz.interact.interact()
    return g
Exemple #2
0
def benchmark(N, k, maxk, gamma, beta, mu,
              minc=None, maxc=None, pause=False):
    """Unweighted, Undirected, Non-overlapping benchmark graph.

    This is the graph supporting Lancichenetti, Fortunato, Radicci,
    PRE 78 046110 (2008).

    Arguments are:

    N     # number of nodes
    k     # average degree
    maxk  # maximum degree
    gamma # exponent for the degree distribution
    beta  # exponent for the community size distribution
    mu    # mixing parameter
    minc  # minimum for the community sizes (optional)
    maxc  # maximum for the community sizes (optional)

    Example parameters:
    N=1000, k=15, maxk=100, gamma=2, beta=1, mu=.1
    """

    if minc is None: minc = ""
    if maxc is None: maxc = ""

    params = textwrap.dedent("""\
    %(N)s
    %(k)s
    %(maxk)s
    %(gamma)s
    %(beta)s
    %(mu)s
    %(minc)s
    %(maxc)s
    """%locals())

    prog = _get_file('lfr_benchmarks/new/benchmark_2_2/benchmark')
    kwargs = { }
    args = [ prog ] + makeargs(kwargs)
    print "Arguments are: ", " ".join(args)

    with pcd.util.tmpdir_context(chdir=True, prefix='tmp-lfrbenchmark', dir=tmpbase) as tmpdir:
        open('parameters.dat', 'w').write(params)

        retcode = subprocess.call(args)
        assert retcode == 0

        g = networkx.Graph()
        for n, c in read_file('community.dat'):
            g.add_node(n-1, cmty=c-1)
        for n1, n2 in read_file('network.dat'):
            g.add_edge(n1-1, n2-1)
        g.graph['statistics'] = open('statistics.dat').read()
        #g.graph['stats'] = stats(g)

        if pause:
            import fitz.interact ; fitz.interact.interact()
    return g
Exemple #3
0
def hierarchical(pause=False, **kwargs):
    """Binary networks with overlapping nodes and hierarchies

    This program is an implementation of the algorithm described in
    the paper'Direc ted, weighted and overlapping benchmark graphs for
    community detection algorithm s', written by Andrea Lancichinetti
    and Santo Fortunato. In particular, this program is to produce
    binary networks with overlapping nodes and hierarchies.

    -N              [number of nodes]
    -k              [average degree]
    -maxk           [maximum degree]
    -t1             [minus exponent for the degree sequence]
    -t2             [minus exponent for the community size distribution]
    -minc           [minimum for the micro community sizes]
    -maxc           [maximum for the micro community sizes]
    -on             [number of overlapping nodes]
    -om             [number of memberships of the overlapping nodes]
    -minC           [minimum for the macro community size]
    -maxC           [maximum for the macro community size]
    -mu1            [mixing parameter for the macro communities (see Readme file)]
    -mu2            [mixing parameter for the micro communities (see Readme file)]

    Example2:
    ./hbenchmark -f flags.dat
    ./hbenchmark -N 10000 -k 20 -maxk 50 -mu2 0.3 -minc 20 -maxc 50 -minC 100 -maxC 1000 -mu1 0.1
    """
    prog = _get_file('lfr_benchmarks/new/hierarchical_bench2_2/hbenchmark')
    args = [prog] + makeargs(kwargs)
    print "Arguments are: ", " ".join(args)

    with tmpdir_context(chdir=True, prefix="tmp-lfrbenchmark", dir=tmpbase):
        retcode = subprocess.call(args)
        assert retcode == 0

        g = networkx.Graph()
        for n, c in read_file('community_first_level.dat'):
            g.add_node(n - 1, microC=c - 1)
        for n, c in read_file('community_second_level.dat'):
            g.add_node(n - 1, macroC=c - 1)
        for n1, n2 in read_file('network.dat'):
            g.add_edge(n1 - 1, n2 - 1)
        g.graph['stats'] = stats(g)
        if pause:
            import fitz.interact
            fitz.interact.interact()
    return g
Exemple #4
0
def binary(pause=False, **kwargs):
    """Binary networks with overlapping nodes.

    -N              [number of nodes]
    -k              [average degree]
    -maxk           [maximum degree]
    -mu             [mixing parameter]
    -t1             [minus exponent for the degree sequence]
    -t2             [minus exponent for the community size distribution]
    -minc           [minimum for the community sizes]
    -maxc           [maximum for the community sizes]
    -on             [number of overlapping nodes]
    -om             [number of memberships of the overlapping nodes]
    -C              [average clustering coefficient]


    -N, -k, -maxk, -mu have to be specified. For the others, the
    program can use default values:
    t1=2, t2=1, on=0, om=0, minc and maxc will be chosen close to the
    degree sequence extremes.
    If you set a parameter twice, the latter one will be taken.
    """
    prog = _get_file('lfr_benchmarks/new/binary_networks/benchmark')
    args = [prog] + makeargs(kwargs)
    print "Arguments are: ", " ".join(args)

    with pcd.util.tmpdir_context(chdir=True,
                                 prefix="tmp-lfrbenchmark",
                                 dir=tmpbase):
        retcode = subprocess.call(args)
        assert retcode == 0

        g = networkx.Graph()
        #for n, c in read_file('community.dat'):
        #    g.add_node(n-1, cmty=c-1)
        for x in read_file('community.dat'):
            n, cmtys = x[0], x[1:]
            cmtys = [c - 1 for c in cmtys]
            g.add_node(n - 1, cmtys=cmtys)
        for n1, n2 in read_file('network.dat'):
            g.add_edge(n1 - 1, n2 - 1)
        g.graph['statistics'] = open('statistics.dat').read()
        #g.graph['stats'] = stats(g)
        if pause:
            import fitz.interact
            fitz.interact.interact()
    return g
Exemple #5
0
def hierarchical(pause=False, **kwargs):
    """Binary networks with overlapping nodes and hierarchies

    This program is an implementation of the algorithm described in
    the paper'Direc ted, weighted and overlapping benchmark graphs for
    community detection algorithm s', written by Andrea Lancichinetti
    and Santo Fortunato. In particular, this program is to produce
    binary networks with overlapping nodes and hierarchies.

    -N              [number of nodes]
    -k              [average degree]
    -maxk           [maximum degree]
    -t1             [minus exponent for the degree sequence]
    -t2             [minus exponent for the community size distribution]
    -minc           [minimum for the micro community sizes]
    -maxc           [maximum for the micro community sizes]
    -on             [number of overlapping nodes]
    -om             [number of memberships of the overlapping nodes]
    -minC           [minimum for the macro community size]
    -maxC           [maximum for the macro community size]
    -mu1            [mixing parameter for the macro communities (see Readme file)]
    -mu2            [mixing parameter for the micro communities (see Readme file)]

    Example2:
    ./hbenchmark -f flags.dat
    ./hbenchmark -N 10000 -k 20 -maxk 50 -mu2 0.3 -minc 20 -maxc 50 -minC 100 -maxC 1000 -mu1 0.1
    """
    prog = _get_file('lfr_benchmarks/new/hierarchical_bench2_2/hbenchmark')
    args = [ prog ] + makeargs(kwargs)
    print "Arguments are: ", " ".join(args)

    with tmpdir_context(chdir=True, prefix="tmp-lfrbenchmark", dir=tmpbase):
        retcode = subprocess.call(args)
        assert retcode == 0

        g = networkx.Graph()
        for n, c in read_file('community_first_level.dat'):
            g.add_node(n-1, microC=c-1)
        for n, c in read_file('community_second_level.dat'):
            g.add_node(n-1, macroC=c-1)
        for n1, n2 in read_file('network.dat'):
            g.add_edge(n1-1, n2-1)
        g.graph['stats'] = stats(g)
        if pause:
            import fitz.interact ; fitz.interact.interact()
    return g
Exemple #6
0
def binary(pause=False, **kwargs):
    """Binary networks with overlapping nodes.

    -N              [number of nodes]
    -k              [average degree]
    -maxk           [maximum degree]
    -mu             [mixing parameter]
    -t1             [minus exponent for the degree sequence]
    -t2             [minus exponent for the community size distribution]
    -minc           [minimum for the community sizes]
    -maxc           [maximum for the community sizes]
    -on             [number of overlapping nodes]
    -om             [number of memberships of the overlapping nodes]
    -C              [average clustering coefficient]


    -N, -k, -maxk, -mu have to be specified. For the others, the
    program can use default values:
    t1=2, t2=1, on=0, om=0, minc and maxc will be chosen close to the
    degree sequence extremes.
    If you set a parameter twice, the latter one will be taken.
    """
    prog = _get_file('lfr_benchmarks/new/binary_networks/benchmark')
    args = [ prog ] + makeargs(kwargs)
    print "Arguments are: ", " ".join(args)

    with pcd.util.tmpdir_context(chdir=True, prefix="tmp-lfrbenchmark", dir=tmpbase):
        retcode = subprocess.call(args)
        assert retcode == 0

        g = networkx.Graph()
        #for n, c in read_file('community.dat'):
        #    g.add_node(n-1, cmty=c-1)
        for x in read_file('community.dat'):
            n, cmtys = x[0], x[1:]
            cmtys = [c-1 for c in cmtys]
            g.add_node(n-1, cmtys=cmtys)
        for n1, n2 in read_file('network.dat'):
            g.add_edge(n1-1, n2-1)
        g.graph['statistics'] = open('statistics.dat').read()
        #g.graph['stats'] = stats(g)
        if pause:
            import fitz.interact ; fitz.interact.interact()
    return g
Exemple #7
0
def weighted(pause=False, **kwargs):
    """Undirected weighted networks with overlapping nodes.
,
    This program is an implementation of the algorithm described in
    the paper\"Directed, weighted and overlapping benchmark graphs for
    community detection algorithms\", written by Andrea Lancichinetti
    and Santo Fortunato. In particular, this program is to produce
    undirected weighted networks with overlapping nodes.  Each
    feedback is very welcome. If you have found a bug or have
    problems, or want to give advises, please contact us:


    -N              [number of nodes]
    -k              [average degree]
    -maxk           [maximum degree]
    -mut            [mixing parameter for the topology]
    -muw            [mixing parameter for the weights]
    -beta           [exponent for the weight distribution]
    -t1             [minus exponent for the degree sequence]
    -t2             [minus exponent for the community size distribution]
    -minc           [minimum for the community sizes]
    -maxc           [maximum for the community sizes]
    -on             [number of overlapping nodes]
    -om             [number of memberships of the overlapping nodes]
    -C              [average clustering coefficient]


    -N, -k, -maxk, -muw have to be specified. For the others, the
    program can use default values:

    t1=2, t2=1, on=0, om=0, beta=1.5, mut=muw, minc and maxc will be
    chosen close to the degree sequence extremes.  If you set a
    parameter twice, the latter one will be taken.

    To have a random network use:
    -rand
    Using this option will set muw=0, mut=0, and minc=maxc=N, i.e.
    there will be one only community.
    Use option -sup (-inf) if you want to produce a benchmark whose
    distribution of the ratio of external degree/total degree is
    superiorly (inferiorly) bounded by the mixing parameter.

    The flag -C is not mandatory. If you use it, the program will
    perform a number of rewiring steps to increase the average cluster
    coefficient up to the wished value.  Since other constraints must
    be fulfilled, if the wished value will not be reached after a
    certain time, the program will stop (displaying a warning).

    Example1:
    ./benchmark -N 1000 -k 15 -maxk 50 -muw 0.1 -minc 20 -maxc 50
    Example2:
    ./benchmark -f flags.dat -t1 3
    """
    prog = _get_file('lfr_benchmarks/new/weighted_networks/benchmark')
    args = [ prog ] + makeargs(kwargs)
    print "Arguments are: ", " ".join(args)

    with pcd.util.tmpdir_context(chdir=True, prefix='tmp-lfrbenchmark', dir=tmpbase):
        retcode = subprocess.call(args)
        assert retcode == 0

        g = networkx.Graph()
        for x in read_file('community.dat'):
            n, cmtys = x[0], x[1:]
            cmtys = [c-1 for c in cmtys]
            g.add_node(n-1, cmtys=cmtys)
        for n1, n2, weight in read_file('network.dat'):
            g.add_edge(n1-1, n2-1, weight=weight)
        g.graph['statistics'] = open('statistics.dat').read()
        #g.graph['stats'] = stats(g)
        if pause:
            import fitz.interact ; fitz.interact.interact()
    return g
Exemple #8
0
def nmi_LFK_LF(cmtys1, cmtys2, check=True, use_existing=False):
    """Compute NMI using the overlap-including definition.

    This uses the external code 'mutual3/mutual' to calculate the
    overlap-using In.

    If the communities object is a pcd.cmty.CommunityFile, has a fname
    attribute, and it exists and doesn't end in .gz or .bz2, and the
    argument use_existing is true, then this file will be used for the
    input to the NMI code.  The NMI code is very dumb, and does not
    remove comments, and uses only floating-point or integer node IDs,
    and does not give any type of error if these conditions are not
    met.  Thus, only enable use_existing if you can vouch for the.

    check: bool, default True
        If true, check that all node names are either representable as
        integers or floating point numbers.  This can either be python
        integers or floats, or strings of those.
    use_existing: bool, default False
        If true, and community object has a '.fname' attribute, and
        that file exists, use that as the input filename instead of
        re-writing the file.  WARNING: if you use this, you must
        ensure that there are no comments within the file, or anything
        else which make cause the program to break.
        """
    from pcd.support.algorithms import _get_file
    from pcd.cmty import CommunityFile
    binary = _get_file('mutual3/mutual')

    def _is_float_str(x):
        """Is this a string representation of a float?"""
        try:
            float(x)
            return True
        except ValueError:
            return False
    # check that community files are valid for the program:
    if check:
        for nodes in cmtys1.itervalues():
            assert all(isinstance(x, (int, float)) or _is_float_str(x) for x in nodes )
        for nodes in cmtys2.itervalues():
            assert all(isinstance(x, (int, float)) or _is_float_str(x) for x in nodes )

    # We must use os.path.abspath *outside* of the tmpdir_context, or
    # else the absolute path will be wrong.
    args = [ binary ]
    if (use_existing
        and isinstance(cmtys1, CommunityFile)
        and hasattr(cmtys1, 'fname')
        and os.path.exists(cmtys1.fname)
        and not (cmtys1.fname.endswith('.bz2')
                 or cmtys1.fname.endswith('.gz'))):
        args.append(os.path.abspath(cmtys1.fname))
    else:
        args.append(None)

    if (use_existing
        and isinstance(cmtys1, CommunityFile)
        and hasattr(cmtys2, 'fname')
        and os.path.exists(cmtys2.fname)
        and not (cmtys2.fname.endswith('.bz2')
                 or cmtys2.fname.endswith('.gz'))):
        args.append(os.path.abspath(cmtys2.fname))
    else:
        args.append(None)

    with pcd.util.tmpdir_context(chdir=True, dir='.', prefix='tmp-nmi-'):
        # Write community files, if they do not already exist.  These
        # must be written inside of the tmpdir_context context because
        # only in here does it know the right
        if args[1] is None:
            cmtys1.write_clusters('cmtys1.txt', raw=True)
            args[1] = 'cmtys1.txt'
        #else:
        #    # Symlink it instead of run in-place (program can fail if
        #    # filenames are weird!)
        #    os.symlink(args[1], 'cmtys1.txt')
        #    args[1] = 'cmtys1.txt'
        if args[2] is None:
            cmtys2.write_clusters('cmtys2.txt', raw=True)
            args[2] = 'cmtys2.txt'
        #else:
        #    os.symlink(args[2], 'cmtys2.txt')
        #    args[1] = 'cmtys2.txt'
        p = subprocess.Popen(args, stdout=subprocess.PIPE)
        ret = p.wait()
        stdout = p.stdout.read()
        if ret != 0:
            print stdout
            raise RuntimeError("The program '%s' returned non-zero: %s"%(
                args[0], ret))
        nmi = float(stdout.split(':', 1)[1])
    return nmi
Exemple #9
0
def weighted(pause=False, **kwargs):
    """Undirected weighted networks with overlapping nodes.
,
    This program is an implementation of the algorithm described in
    the paper\"Directed, weighted and overlapping benchmark graphs for
    community detection algorithms\", written by Andrea Lancichinetti
    and Santo Fortunato. In particular, this program is to produce
    undirected weighted networks with overlapping nodes.  Each
    feedback is very welcome. If you have found a bug or have
    problems, or want to give advises, please contact us:


    -N              [number of nodes]
    -k              [average degree]
    -maxk           [maximum degree]
    -mut            [mixing parameter for the topology]
    -muw            [mixing parameter for the weights]
    -beta           [exponent for the weight distribution]
    -t1             [minus exponent for the degree sequence]
    -t2             [minus exponent for the community size distribution]
    -minc           [minimum for the community sizes]
    -maxc           [maximum for the community sizes]
    -on             [number of overlapping nodes]
    -om             [number of memberships of the overlapping nodes]
    -C              [average clustering coefficient]


    -N, -k, -maxk, -muw have to be specified. For the others, the
    program can use default values:

    t1=2, t2=1, on=0, om=0, beta=1.5, mut=muw, minc and maxc will be
    chosen close to the degree sequence extremes.  If you set a
    parameter twice, the latter one will be taken.

    To have a random network use:
    -rand
    Using this option will set muw=0, mut=0, and minc=maxc=N, i.e.
    there will be one only community.
    Use option -sup (-inf) if you want to produce a benchmark whose
    distribution of the ratio of external degree/total degree is
    superiorly (inferiorly) bounded by the mixing parameter.

    The flag -C is not mandatory. If you use it, the program will
    perform a number of rewiring steps to increase the average cluster
    coefficient up to the wished value.  Since other constraints must
    be fulfilled, if the wished value will not be reached after a
    certain time, the program will stop (displaying a warning).

    Example1:
    ./benchmark -N 1000 -k 15 -maxk 50 -muw 0.1 -minc 20 -maxc 50
    Example2:
    ./benchmark -f flags.dat -t1 3
    """
    prog = _get_file('lfr_benchmarks/new/weighted_networks/benchmark')
    args = [prog] + makeargs(kwargs)
    print "Arguments are: ", " ".join(args)

    with pcd.util.tmpdir_context(chdir=True,
                                 prefix='tmp-lfrbenchmark',
                                 dir=tmpbase):
        retcode = subprocess.call(args)
        assert retcode == 0

        g = networkx.Graph()
        for x in read_file('community.dat'):
            n, cmtys = x[0], x[1:]
            cmtys = [c - 1 for c in cmtys]
            g.add_node(n - 1, cmtys=cmtys)
        for n1, n2, weight in read_file('network.dat'):
            g.add_edge(n1 - 1, n2 - 1, weight=weight)
        g.graph['statistics'] = open('statistics.dat').read()
        #g.graph['stats'] = stats(g)
        if pause:
            import fitz.interact
            fitz.interact.interact()
    return g