Example #1
0
def generate_L_sbm(nnodes=10, p_in=.8, p_out=.1, seed=42, n_blocks=2):
    """generates L from stochastic block model with two clusters, with nnodes."""
    if n_blocks == 2:
        G = stochastic_block_model([nnodes // 2, nnodes // 2],
                                   [[p_in, p_out], [p_out, p_in]],
                                   seed=seed)
        L = nx.laplacian_matrix(G).todense()
        return L
    else:
        prob_matrix = np.eye(n_blocks) * (p_in - p_out) + p_out
        G = stochastic_block_model([nnodes // n_blocks] * n_blocks,
                                   prob_matrix,
                                   seed=seed)
        L = nx.laplacian_matrix(G).todense()
        return L
Example #2
0
def graph_gen(group_sizes, intra_edge_density, extra_edge_density):
    """num_groups is number of nodes in each group
    num_nodes uses SBM as graph generator
    initializes graph and edge weights"""

    p = generate_density_matrix(intra_edge_density, extra_edge_density)

    g = stochastic_block_model(group_sizes, p, directed=True, selfloops=False)
    g = edge_weight_init(g)
    return g
Example #3
0
def create_graph(pi,nq):
    graph = stochastic_block_model(nq,pi)
    adjacency = to_scipy_sparse_matrix(graph)
    labels = np.ones(np.sum(nq),dtype=int)
    
    nq_sum = np.cumsum(nq)
    labels[:nq[0]] *= 0
    for q in range(1,len(nq)-1):
        labels[nq_sum[q-1]:nq_sum[q]] *= q
    labels[nq_sum[-2]:] *= len(nq)-1
    
    return adjacency,labels
Example #4
0
def generates_equal_sbm(n_clusters,n_by_cluster,p_in,p_out):

    p = np.ones((n_clusters,n_clusters))*p_out
    for i in range(n_clusters):
        p[i,i] = p_in

    graph_sbm = stochastic_block_model([n_by_cluster for k in range(n_clusters)],p)
    adjacency_sbm = to_scipy_sparse_matrix(graph_sbm)
    labels_sbm = np.ones(shape=(n_by_cluster*n_clusters,),dtype=int)

    for k in range(n_clusters):
        labels_sbm[k*n_by_cluster:(k+1)*n_by_cluster] *= k
        
    return adjacency_sbm,labels_sbm
def get_graph(graph_path, labels, gen_sbm, seed):
    if gen_sbm:
        graph, node_mappings, reverse_node_mappings = create_graph_and_node_mappings_from_file(
            graph_path)
        edge_probabilities, block_sizes, node_lists = build_label_based_sbm(
            graph, node_mappings, reverse_node_mappings, labels)

        sbm = stochastic_block_model(block_sizes, edge_probabilities,
                                     node_lists, seed, True, False)
        sbm = nx.relabel_nodes(sbm, reverse_node_mappings)
        graph = sbm
    else:
        graph = nx.read_edgelist(graph_path, create_using=nx.DiGraph)
    return graph
Example #6
0
def SBM(
    sizes: list,
    p: list,
    nodelist: list = None,
    seed: object = 42,
    directed: bool = False,
    selfloops: bool = False,
    sparse: bool = True,
) -> [object, object]:
    """
    Returns a stochastic block model graph.

    This model partitions the nodes in blocks of arbitrary sizes, and places edges between pairs of nodes independently, with a probability that depends on the blocks.

    :param sizes: Sizes of blocks (list of ints)
    :param p: Element (r,s) gives the density of edges going from the nodes of group r to nodes of group s. p must match the number of groups (len(sizes) == len(p)), and it must be symmetric if the graph is undirected. (List of floats)
    :param nodelist: The block tags are assigned according to the node identifiers in nodelist. If nodelist is None, then the ordering is the range [0,sum(sizes)-1]. Optional, default None.
    :param seed: Indicator of random number generation state.
    :param directed: hether to create a directed graph or not. Boolean, default False.
    :param selfloops: Whether to include self-loops or not. Optional, default False.
    :param sparse: Use the sparse heuristic to speed up the generator. Optional, default True.

    :return: A networkx synthetic graph, the set of communities  (NodeClustering object)

    :Example:

    >>> from cdlib.benchmark import SBM
    >>> sizes = [75, 75, 300]
    >>> probs = [[0.25, 0.05, 0.02], [0.05, 0.35, 0.07], [0.02, 0.07, 0.40]]
    >>> G, coms = SBM(sizes, probs, seed=0)

    :References:

    Holland, P. W., Laskey, K. B., & Leinhardt, S., “Stochastic blockmodels: First steps”, Social networks, 5(2), 109-137, 1983.

    .. note:: Reference implementation: https://networkx.org/documentation/stable/reference/generated/networkx.generators.community.stochastic_block_model.html#networkx.generators.community.stochastic_block_model
    """

    from networkx.generators.community import stochastic_block_model

    G = stochastic_block_model(
        sizes=sizes,
        p=p,
        nodelist=nodelist,
        seed=seed,
        directed=directed,
        selfloops=selfloops,
        sparse=sparse,
    )
    communities = defaultdict(list)
    for n, data in G.nodes(data=True):
        communities[data["block"]].append(n)

    coms = NodeClustering(
        list(communities.values()),
        G,
        "SBM",
        method_parameters={
            "sizes": sizes,
            "p": p,
            "nodelist": nodelist,
            "seed": seed,
            "directed": directed,
            "selfloops": selfloops,
            "sparse": sparse,
        },
    )

    return G, coms
Example #7
0
                  dtype=np.int16)[:, 1]
adj = nx.adjacency_matrix(G).todense()
encoder = LabelEncoder()
true = encoder.fit_transform(true)
np.savetxt('file.csv', adj, delimiter='\t')
test_clustering_structure()

####################

####################
print("---SBM---")
probs = [[0.95, 0.55, 0.02], [0.55, 0.95, 0.55], [0.02, 0.55, 0.95]]
size = 300
sizes = [size, size, size]

G = community.stochastic_block_model(sizes, probs)
true = [[i] * size for i in range(3)]
true = sum(true, [])
adj = nx.adjacency_matrix(G).todense()
np.savetxt('file.csv', adj, delimiter='\t')
test_clustering_structure()

####################

####################
print("---Polblogs---")
G = nx.read_gml("../data/polblogs.gml")

G = G.to_undirected()
print(nx.info(G))
true = []
Example #8
0
def main_generate_data():
    N_graph = 1  # number of graphs

    for graph in range(N_graph):
        print('### GRAPH {0} ###'.format(graph))

        # stochastic block model
        C = 2  # number of communities
        N = 100  # number of nodes
        deltak = int(
            N /
            C)  # number of nodes per community (communities have equal size)
        L = 2  # number of layers
        L_a = 1  # number of assortative layers
        sizes = [deltak] * C  # sizes of blocks, here equal size

        if not os.path.exists('../data/input' + str(C) + str(L) + str(L_a) +
                              '/graph' + str(graph)):
            os.makedirs('../data/input' + str(C) + str(L) + str(L_a) +
                        '/graph' + str(graph))

        # generate the network structure
        G = [nx.MultiDiGraph() for _ in range(L)]
        for l in range(L):
            if l < L_a:
                p = tl.probabilities('assortative', sizes, N, C)
                G[l] = stochastic_block_model(sizes, p, directed=True)
                # Adj = nx.to_numpy_matrix(G[l], weight='weight')
                # plt.imshow(Adj, cmap="Greys", interpolation="nearest")
                # plt.show()
            elif l == 1:
                p = tl.probabilities('disassortative', sizes, N, C)
                G[l] = stochastic_block_model(sizes, p, directed=True)
                # Adj = nx.to_numpy_matrix(G[l], weight='weight')
                # plt.imshow(Adj, cmap="Greys", interpolation="nearest")
                # plt.show()
            elif l == 2:
                p = tl.probabilities('core-periphery', sizes, N, C)
                G[l] = stochastic_block_model(sizes, p, directed=True)
                # Adj = nx.to_numpy_matrix(G[l], weight='weight')
                # plt.imshow(Adj, cmap="Greys", interpolation="nearest")
                # plt.show()
            elif l == 3:
                p = tl.probabilities('directed-biased', sizes, N, C)
                G[l] = stochastic_block_model(sizes, p, directed=True)
                # Adj = nx.to_numpy_matrix(G[l], weight='weight')
                # plt.imshow(Adj, cmap="Greys", interpolation="nearest")
                # plt.show()

            print('Nodes: ', G[l].number_of_nodes())
            print('Edges: ', G[l].number_of_edges())

        # save the graph
        folder = '../data/input' + str(C) + str(L) + str(L_a) + '/graph' + str(
            graph) + '/'
        if not os.path.exists(folder):
            os.makedirs(folder)

        tl.write_adjacency(G,
                           folder=folder,
                           fname='adj.csv',
                           ego='source',
                           alter='target')

        # generate the covariates
        for perc in [
                0.3, 0.5, 0.7, 0.9
        ]:  # loop over fractions of match between communities and metadata
            if (perc is not None) and (perc > 0.):
                metadata = {}
                nodes_match = np.random.choice(np.array(G[0].nodes()),
                                               size=int(N * perc),
                                               replace=False)
                for i in G[0].nodes():
                    if i in nodes_match:
                        metadata[i] = ('Meta0' if i < deltak else 'Meta1')
                    else:
                        metadata[i] = 'Meta' + str(
                            np.random.randint(2, size=1)[0])
                # name_file = 'nodes_match_' + str(perc)[0] + '_' + str(perc)[2]
                # with open('../data/input' + str(C) + str(L) + str(L_a) + '/graph' + str(graph) + '/' + name_file,
                #           'w', newline='') as myfile:
                #     csv.writer(myfile, quoting=csv.QUOTE_ALL)

                tl.write_design_Matrix(metadata,
                                       perc,
                                       folder=folder,
                                       fname='X_',
                                       nodeID='Name',
                                       attr_name='Metadata')
Example #9
0
parser.add_argument("--fil", default='lap', type=str, help='Filtration')

if __name__ == '__main__':
    # sys.argv = ['graph/gm.py']
    args = parser.parse_args()
    rs = args.rs
    radius, fil = 1, args.fil
    n_node, p = 100, args.p
    sizes = [n_node] * 3
    permute_flag = True
    labels = [0] * n_node + [1] * n_node + [2] * n_node
    probs = [[0.5, p, p],
             [p, 0.5, p],
             [p, p, 0.5]]

    g = stochastic_block_model(sizes, probs, seed=rs)
    lp = LaplacianEigenmaps(d=1)
    lp.learn_embedding(g, weight='weight')
    lapfeat = lp.get_embedding()
    degfeat = np.array(list(dict(nx.degree(g)).values())).reshape(3 * n_node, 1)
    clf = classifier(degfeat, labels, method=None)
    clf.svm()

    for n in g.nodes():
        g.node[n]['lap'] = float(lapfeat[n,0])
    g = add_edgeval(g, fil=fil)

    ego = egograph(g, radius=radius, n = len(g), recompute_flag=True, norm_flag=True, print_flag=False)
    egographs = ego.egographs(method='serial')
    dgms = alldgms(egographs, radius=radius, dataset='', recompute_flag=True, method='serial', n=n_node)  # compute dgms in parallel
Example #10
0
def create_sbm_graph(graph, block_sizes, edge_probabilities, node_lists, output_path, seed, reverse_node_mappings):
    sbm = stochastic_block_model(block_sizes, edge_probabilities, node_lists, seed, True, has_selfloops(graph))
    sbm = nx.relabel_nodes(sbm, reverse_node_mappings)
    nx.write_edgelist(sbm, output_path)