Ejemplo n.º 1
0
def clique_percolation(G, data_prefix='snap_'):

    '''
    Parameters
    -----------
    G:                  An iGraph or edge list file
    '''

    snap_home, graph_file = snap.setup(G)

    if graph_file is None:
        return

    path_cpm = os.path.join(snap_home, "examples", "cliques", "cliquesmain")

    try:
        FNULL = open(os.devnull, 'w')
        out = subprocess.Popen([path_cpm,"-o:"+data_prefix,"-i:"+graph_file], stdout=FNULL).wait()


    except TypeError as e:
        print("Error occurred: {}".format(e))
        return

    os.remove(graph_file)

    return snap.read_communities_by_community("cpm-" + data_prefix + ".txt", G, delete_file=True)
Ejemplo n.º 2
0
def cesna(G,
          attributes_to_include,
          data_prefix='snap_',
          node_filepath='',
          detect_comm=100,
          min_comm=5,
          max_comm=100,
          trials=5,
          threads=cpu_count(),
          alpha=.3,
          beta=0.3):
    '''
    Parameters
    -----------
    G:                  An iGraph or edge list file
    f_attributes:       Input node attribute file name (Required)
    f_attribute_names:  Input file name for node attribute names (Required)
    nodes:              Input file name for node names (Node ID, Node label)
    detect_comm:         The number of communities to detect (-1: detect automatically) (default:10)
    min_comm:           Minimum number of communities to try (default:3)
    max_comm:           Maximum number of communities to try (default:20)
    trials:             How many trials for the number of communities (default:5)
    threads:            Number of threads for parallelization (default:4)
    aw:                 We maximize (1 - aw) P(Network) + aw * P(Attributes) (default:0.5)
    lw:                 Weight for l-1 regularization on learning the logistic model parameters (default:1)
    alpha:              Alpha for backtracking line search (default:0.05)
    beta:               Beta for backtracking line search (default:0.3)
    mf                  if the fraction of nodes with positive values for an attribute is smaller than this, we ignore that attribute (default:0)
    '''

    snap_home, graph_file = snap.setup(G)

    f_attribute_names, f_attributes = snap.attribute_setup(
        G, attributes_to_include)
    if graph_file is None:
        return

    path_cesna = os.path.join(snap_home, "examples", "cesna", "cesna")

    try:
        FNULL = open(os.devnull, 'w')
        out = subprocess.Popen([
            path_cesna, "-o:" + data_prefix, "-i:" + graph_file,
            "-l:" + node_filepath, "-c:" + str(detect_comm),
            "-mc:" + str(min_comm), "-xc:" + str(max_comm),
            "-nc:" + str(trials), "-nt:" + str(threads), "-sa:" + str(alpha),
            "-sb:" + str(beta), "-a:" + f_attributes, "-n:" + f_attribute_names
        ],
                               stdout=FNULL).wait()

    except TypeError as e:
        print("Error occurred: {}".format(e))
        return

    os.remove(graph_file)

    return snap.read_communities_by_community(data_prefix + "cmtyvv.txt",
                                              G,
                                              delete_file=True)
Ejemplo n.º 3
0
def bigclam(G,
            data_prefix='snap_',
            node_filepath='',
            detect_comm=100,
            min_comm=5,
            max_comm=100,
            trials=5,
            threads=cpu_count(),
            alpha=0.3,
            beta=0.3):
    '''
    BigClam from Snap

    Parameters
    ----------
    G :                 A NetworkX graph or edge list file
    data_prefix:        Output file for communitities (data_prefix + cmtyvv.txt)
    node_file_path:     Input file name for node names (Node ID, Node label)
    detect_comm:        The number of communities to detect (-1: detect automatically) (Default: 100)
    min_comm:           Minimum number of communities to try (Default = 5)
    max_comm:           Maximum number of communities to try (Default = 100)
    trials:             How many trials for the number of communities (Default = 10)
    threads:            Number of threads for parallelization (Default = 4)
    alpha:              Alpha for backtracking line search (Default = 0.05)
    beta:               Beta for backtracking line search (Default = 0.3)

    Returns:  List of SubGraphs representing the communities.  The SubGraphs are automatically serialized to disk as file data_prefix+'cmtyvv.txt'
    '''

    snap_home, graph_file = setup(G, include_header=False)

    if graph_file is None:
        return None

    path_bigclam = os.path.join(snap_home, "examples", "bigclam", "bigclam")

    try:
        FNULL = open(os.devnull, 'w')
        out = subprocess.Popen([
            path_bigclam, "-o:" + data_prefix, "-i:" + graph_file, "-l:" +
            node_filepath, "-c:" + str(detect_comm), "-mc:" + str(min_comm),
            "-xc:" + str(max_comm), "-nc:" + str(trials),
            "-nt:" + str(threads), "-sa:" + str(alpha), "-sb:" + str(beta)
        ],
                               stdout=FNULL).wait()

    except TypeError as e:
        print("Error occurred: {}".format(e))
        return

    os.remove(graph_file)

    return read_communities_by_community(data_prefix + "cmtyvv.txt",
                                         G,
                                         delete_file=True)
Ejemplo n.º 4
0
def coda(G,
         data_prefix='snap_',
         node_filepath='',
         graph_type=0,
         detect_comm=100,
         min_comm=5,
         max_comm=100,
         trials=10,
         threads=4,
         alpha=0.05,
         beta=0.3):
    '''
    Coda from Snap

    Parameters
    ----------
    G :                 A NetworkX graph or edge list file
    node_file_path:     Input file name for node names (Node ID, Node label)
    graph_type:         0=directed, 1=undirected (default: 0)
    detect_comm:        The number of communities to detect (-1: detect automatically) (Default: 100)
    min_comm:           Minimum number of communities to try (Default = 5)
    max_comm:           Maximum number of communities to try (Default = 100)
    trials:             How many trials for the number of communities (Default = 10)
    threads:            Number of threads for parallelization (Default = 4)
    alpha:              Alpha for backtracking line search (Default = 0.05)
    beta:               Beta for backtracking line search (Default = 0.3)
    '''

    snap_home, graph_file = setup(G)
    path_coda = os.path.join(snap_home, "examples", "coda", "coda")

    try:
        FNULL = open(os.devnull, 'w')

        out = subprocess.Popen([
            path_coda, "-o:" + data_prefix, "-i:" + graph_file,
            "-l:" + node_filepath, "-g:" + str(graph_type),
            "-c:" + str(detect_comm), "-mc:" + str(min_comm),
            "xc:" + str(max_comm), "-nc:" + str(trials), "-nt:" + str(threads),
            "-sa:" + str(alpha), "-sb:" + str(beta)
        ],
                               stdout=FNULL).wait()

    except TypeError as e:
        print("Error occurred: {}".format(e))
        return

    os.remove(graph_file)

    #CODE returns an "in" and an "out" file. Not sure why... so am just using out
    return read_communities_by_community(data_prefix + "cmtyvv.out.txt", G)
Ejemplo n.º 5
0
def cesna(G, attributes_to_include, data_prefix='snap_', node_filepath='', detect_comm=100, min_comm=5, max_comm=100, trials=5, threads=cpu_count(), alpha=.3, beta=0.3):

    '''
    Parameters
    -----------
    G:                  An iGraph or edge list file
    f_attributes:       Input node attribute file name (Required)
    f_attribute_names:  Input file name for node attribute names (Required)
    nodes:              Input file name for node names (Node ID, Node label)
    detect_comm:         The number of communities to detect (-1: detect automatically) (default:10)
    min_comm:           Minimum number of communities to try (default:3)
    max_comm:           Maximum number of communities to try (default:20)
    trials:             How many trials for the number of communities (default:5)
    threads:            Number of threads for parallelization (default:4)
    aw:                 We maximize (1 - aw) P(Network) + aw * P(Attributes) (default:0.5)
    lw:                 Weight for l-1 regularization on learning the logistic model parameters (default:1)
    alpha:              Alpha for backtracking line search (default:0.05)
    beta:               Beta for backtracking line search (default:0.3)
    mf                  if the fraction of nodes with positive values for an attribute is smaller than this, we ignore that attribute (default:0)
    '''

    snap_home, graph_file = snap.setup(G)

    f_attribute_names, f_attributes = snap.attribute_setup(G, attributes_to_include)
    if graph_file is None:
        return

    path_cesna = os.path.join(snap_home, "examples", "cesna", "cesna")

    try:
        FNULL = open(os.devnull, 'w')
        out = subprocess.Popen([path_cesna,"-o:"+data_prefix,"-i:"+graph_file,"-l:"+node_filepath, "-c:" + str(detect_comm), "-mc:"+str(min_comm), "-xc:"+str(max_comm), "-nc:"+str(trials), "-nt:"+str(threads), "-sa:"+str(alpha), "-sb:"+str(beta),  "-a:"+f_attributes, "-n:"+f_attribute_names],stdout=FNULL).wait()


    except TypeError as e:
        print("Error occurred: {}".format(e))
        return

    os.remove(graph_file)

    return snap.read_communities_by_community(data_prefix + "cmtyvv.txt", G, delete_file=True)
Ejemplo n.º 6
0
def bigclam(G, data_prefix='snap_', node_filepath='', detect_comm=100, min_comm=5, max_comm=100, trials=10, threads=4, alpha=0.05, beta=0.3):
    '''
    BigClam from Snap

    Parameters
    ----------
    G :                 A NetworkX graph or edge list file
    data_prefix:        Output file for communitities (data_prefix + cmtyvv.txt)
    node_file_path:     Input file name for node names (Node ID, Node label)
    detect_comm:        The number of communities to detect (-1: detect automatically) (Default: 100)
    min_comm:           Minimum number of communities to try (Default = 5)
    max_comm:           Maximum number of communities to try (Default = 100)
    trials:             How many trials for the number of communities (Default = 10)
    threads:            Number of threads for parallelization (Default = 4)
    alpha:              Alpha for backtracking line search (Default = 0.05)
    beta:               Beta for backtracking line search (Default = 0.3)

    Returns:  List of SubGraphs representing the communities.  The SubGraphs are automatically serialized to disk as file data_prefix+'cmtyvv.txt'
    '''

    snap_home, graph_file = setup(G)


    if graph_file is None:
        return None

    path_bigclam = os.path.join(snap_home, "examples", "bigclam", "bigclam")

    try:

        out = subprocess.Popen([path_bigclam,"-o:"+data_prefix,"-i:"+graph_file,"-l:"+node_filepath,"-c:"+str(detect_comm), "-mc:"+str(min_comm), "-xc:"+str(max_comm), "-nc:"+str(trials), "-nt:"+str(threads), "-sa:"+str(alpha), "-sb:"+str(beta)]).wait()

    except TypeError as e:
        print("Error occurred: {}".format(e))
        return


    os.remove(graph_file)

    return read_communities_by_community(data_prefix + "cmtyvv.txt", G)
Ejemplo n.º 7
0
def coda(
    G,
    data_prefix="snap_",
    node_filepath="",
    graph_type=0,
    detect_comm=100,
    min_comm=5,
    max_comm=100,
    trials=10,
    threads=4,
    alpha=0.05,
    beta=0.3,
):
    """
    Coda from Snap

    Parameters
    ----------
    G :                 A NetworkX graph or edge list file
    node_file_path:     Input file name for node names (Node ID, Node label)
    graph_type:         0=directed, 1=undirected (default: 0)
    detect_comm:        The number of communities to detect (-1: detect automatically) (Default: 100)
    min_comm:           Minimum number of communities to try (Default = 5)
    max_comm:           Maximum number of communities to try (Default = 100)
    trials:             How many trials for the number of communities (Default = 10)
    threads:            Number of threads for parallelization (Default = 4)
    alpha:              Alpha for backtracking line search (Default = 0.05)
    beta:               Beta for backtracking line search (Default = 0.3)
    """

    snap_home, graph_file = setup(G)
    path_coda = os.path.join(snap_home, "examples", "coda", "coda")

    try:
        FNULL = open(os.devnull, "w")

        out = subprocess.Popen(
            [
                path_coda,
                "-o:" + data_prefix,
                "-i:" + graph_file,
                "-l:" + node_filepath,
                "-g:" + str(graph_type),
                "-c:" + str(detect_comm),
                "-mc:" + str(min_comm),
                "xc:" + str(max_comm),
                "-nc:" + str(trials),
                "-nt:" + str(threads),
                "-sa:" + str(alpha),
                "-sb:" + str(beta),
            ],
            stdout=FNULL,
        ).wait()

    except TypeError as e:
        print("Error occurred: {}".format(e))
        return

    os.remove(graph_file)

    # CODE returns an "in" and an "out" file. Not sure why... so am just using out
    return read_communities_by_community(data_prefix + "cmtyvv.out.txt", G)