def _gen(self, gname: str, gen_id: int) -> nx.Graph: import graph_tool.all as gt # local import assert 'state' in self.params, 'missing parameter: state for SBM' state = self.params['state'] gen_gt_g = gt.generate_sbm( state.b.a, gt.adjacency(state.get_bg(), state.get_ers()).T) # returns a graphtool graph g = graphtool_to_networkx(gen_gt_g) g.name = gname g.gen_id = gen_id return g
def generate_power_law_bipartite_net(N, frac_left_node, gamma, ave_deg, min_deg_left, min_deg_right, node_class): """ Generate power law bipartite network. Params ------ N : int Number of nodes frac_left_node : float Fraction of nodes on the left part. gamma : float Power-law exponent (same expoent for both sides) ave_deg : float Average degree min_deg_left : int Minimum degree for nodes on the left part min_deg_right : int Minimum degree for nodes on the right part node_class : list of str Name of the class for the left and right nodes node_class[0] : str for left nodes. node_class[1] : str for right nodes. Return ------ G : networkx.Graph """ def zipf(a, min, max, size=None): """ Generate Zipf-like random variables, but in inclusive [min...max] interval """ v = np.arange(min, max + 1) # values to sample p = 1.0 / np.power(v, a) # probabilities p /= np.sum(p) # normalized return np.random.choice(v, size=size, replace=True, p=p) def add_n_stabs(deg, n): """ Add n stabs to degree sequence """ stubs = np.random.choice(np.arange(len(deg)), size=int(n), replace=True, p=deg / np.sum(deg)) for s in stubs: deg[s] += 1 return deg def to_graphical_deg_seq(deg_left, deg_right): """ Make the degree sequence to be graphical by adding edges """ deg_left_sum = np.sum(deg_left) deg_right_sum = np.sum(deg_right) if deg_left_sum < deg_right_sum: deg_left = add_n_stabs(deg_left, deg_right_sum - deg_left_sum) elif deg_left_sum > deg_right_sum: deg_right = add_n_stabs(deg_right, deg_left_sum - deg_right_sum) return deg_left, deg_right # Compute the number of nodes N_left = int(N * frac_left_node) N_right = N - N_left # Generate degree sequence deg_left = zipf(3, min_deg_left, N_right, size=N_left) deg_right = zipf(3, min_deg_right, N_left, size=N_right) # Rescale such that the average degree is the prescribed average degree E = ave_deg * (N_left + N_right) deg_left = np.clip(np.round(deg_left * E / np.sum(deg_left)), min_deg_left, N_right) deg_right = np.clip(np.round(deg_right * E / np.sum(deg_right)), min_deg_right, N_left) # Make them graphical degree sequences deg_left, deg_right = to_graphical_deg_seq(deg_left, deg_right) # Prepare parameters for graph-tool E = np.sum(deg_right) gt_params = { "out_degs": np.concatenate([np.zeros_like(deg_left), deg_right]).astype(int), "in_degs": np.concatenate([deg_left, np.zeros_like(deg_right)]).astype(int), "b": np.concatenate([np.zeros(N_left), np.ones(N_right)]), "probs": np.array([[0, 0], [E, 0]]), "directed": True, "micro_degs": True, } # Generate the network until the degree sequence # satisfied the thresholds while True: g = gt.generate_sbm(**gt_params) A = gt.adjacency(g).T A.data = np.ones_like(A.data) outdeg = np.array(A.sum(axis=1)).reshape(-1)[N_left:] indeg = np.array(A.sum(axis=0)).reshape(-1)[:N_left] if (np.min(indeg) >= min_deg_left) and (np.min(outdeg) >= min_deg_right): break # Convert to the networkx objet G = nx.from_scipy_sparse_matrix(A, create_using=nx.Graph) # Add attributes to the nodes node_attr = {i: node_class[int(i > N_left)] for i in range(N)} nx.set_node_attributes(G, node_attr, "class") return G
def generate(args, utility): N = args.numvertices N_adjusted = int(args.numvertices * 1.13) C = args.communities M = args.communityexponent min_degree = 1 # args.mindegree max_degree = int(args.maxdegree * N_adjusted) # A = args.maxdegree ratio_within_over_between = args.overlap block_size_heterogeneity = args.blocksizevariation powerlaw_exponent = args.powerlawexponent density = args.density num_blocks = C if num_blocks == -1: num_blocks = int( N_adjusted**M ) # number of blocks grows sub-linearly with number of nodes. Exponent is a parameter. print('Number of blocks: {}'.format(num_blocks)) # N_adjusted = 200 # number of nodes tag = "test_{0}_{1}_{2}_{3}".format(num_blocks, args.maxdegree, powerlaw_exponent, density) overlap = "low" if args.overlap < 5: overlap = "high" block_size_variation = "low" if args.blocksizevariation > 1.0: block_size_variation = "high" file_name = "{4}/{3}/{0}Overlap_{1}BlockSizeVar/{3}_{0}Overlap_{1}BlockSizeVar_{2}_nodes".format( args.overlap, args.blocksizevariation, N, tag, args.directory) if args.remote: utility.exec_command("mkdir -p {}".format(os.path.dirname(file_name))) else: utility.makedirs(os.path.dirname(file_name), exist_ok=True) # define discrete power law distribution def discrete_power_law(a, min_v, max_v): x = np.arange(min_v, max_v + 1, dtype='float') pmf = x**a pmf /= pmf.sum() return stats.rv_discrete(values=(x, pmf)) print("expected degrees: [{},{}]".format(min_degree, max_degree)) # set in-degree and out-degree distribution rv_indegree = discrete_power_law(powerlaw_exponent, min_degree, max_degree) rv_outdegree = discrete_power_law(powerlaw_exponent, min_degree, max_degree) # define the return function for in and out degrees def degree_distribution_function(rv1, rv2): return (rv1.rvs(size=1), rv2.rvs(size=1)) # this parameter adjusts the ratio between the total number of within-block edges and between-block edges # ratio_within_over_between = 5 # set the within-block and between-block edge strength accordingly def inter_block_strength(a, b): if a == b: # within block interaction strength return 1 else: # between block interaction strength avg_within_block_nodes = float(N_adjusted) / num_blocks avg_between_block_nodes = N_adjusted - avg_within_block_nodes return avg_within_block_nodes / avg_between_block_nodes / ratio_within_over_between # draw block membership distribution from a Dirichlet random variable # block_size_heterogeneity = 1 # 3; # larger means the block sizes are more uneven block_distribution = np.random.dirichlet( np.ones(num_blocks) * 10 / block_size_heterogeneity, 1)[0] # draw block membership for each node block_membership_vector = np.where( np.random.multinomial(n=1, size=N_adjusted, pvals=block_distribution))[1] # renumber this in case some blocks don't have any elements blocks, counts = np.unique(block_membership_vector, return_counts=True) block_mapping = {value: index for index, value in enumerate(blocks)} block_membership_vector = np.asarray([ block_mapping[block_membership_vector[i]] for i in range(block_membership_vector.size) ]) num_blocks = blocks.size #################### # GENERATE DEGREE-CORRECTED SBM #################### blocks, counts = np.unique(block_membership_vector, return_counts=True) block_edge_propensities = np.zeros((num_blocks, num_blocks), dtype=np.float32) for row in range(num_blocks): for col in range(num_blocks): strength = inter_block_strength(row, col) value = strength * counts[row] * counts[col] block_edge_propensities[row, col] = value if N_adjusted > 1000000: total_degrees = np.asarray( [rv_outdegree.rvs() for i in range(N_adjusted)]) else: total_degrees = rv_outdegree.rvs(size=N_adjusted) out_degrees = np.random.uniform(size=N_adjusted) * total_degrees out_degrees = np.round(out_degrees) in_degrees = total_degrees - out_degrees sum_degrees = total_degrees.sum() print("sum degrees: ", sum_degrees) expected_e = sum_degrees K = expected_e / (np.sum(out_degrees + in_degrees)) print("out: [{},{}]".format(np.min(out_degrees), np.max(out_degrees))) print("in: [{},{}]".format(np.min(in_degrees), np.max(in_degrees))) # print("B:\n", block_edge_propensities) g_sample = gt.generate_sbm( # Block membership of each vertex b=block_membership_vector, # Edge propensities between communities probs=block_edge_propensities * (expected_e / block_edge_propensities.sum()), # The out degree propensity of each vertex out_degs=out_degrees, # The in degree propensity of each vertex in_degs=in_degrees, directed=True, micro_ers=False, # If True, num edges b/n groups will be exactly probs micro_degs=False # If True, degrees of nodes will be exactly degs ) # remove (1-density) percent of the edges edge_filter = g_sample.new_edge_property('bool') edge_filter.a = stats.bernoulli.rvs(density, size=edge_filter.a.shape) g_sample.set_edge_filter(edge_filter) g_sample.purge_edges() # remove all island vertices print('Filtering out zero vertices...') degrees = g_sample.get_total_degrees(np.arange(g_sample.num_vertices())) vertex_filter = g_sample.new_vertex_property('bool', vals=degrees > 0.0) g_sample.set_vertex_filter(vertex_filter) g_sample.purge_vertices() # store the nodal block memberships in a vertex property block_membership_vector = block_membership_vector[degrees > 0.0] true_partition = block_membership_vector assert block_membership_vector.size == g_sample.num_vertices() block_membership = g_sample.new_vertex_property( "int", vals=block_membership_vector) # compute and report basic statistics on the generated graph bg, bb, vcount, ecount, avp, aep = gt.condensation_graph(g_sample, block_membership, self_loops=True) edge_count_between_blocks = np.zeros((num_blocks, num_blocks)) for e in bg.edges(): edge_count_between_blocks[bg.vertex_index[e.source()], bg.vertex_index[e.target()]] = ecount[e] num_within_block_edges = sum(edge_count_between_blocks.diagonal()) num_between_block_edges = g_sample.num_edges() - num_within_block_edges # print count statistics print('Number of nodes: {} expected {} filtered % {}'.format( g_sample.num_vertices(), N, (N_adjusted - g_sample.num_vertices()) / N_adjusted)) print('Number of edges: {} expected number of edges: {}'.format( g_sample.num_edges(), expected_e)) degrees = g_sample.get_total_degrees(np.arange(g_sample.num_vertices())) print('Vertex degrees: [{},{},{}]'.format(np.min(degrees), np.mean(degrees), np.max(degrees))) unique_degrees, counts = np.unique(degrees, return_counts=True) print("degrees: {}\ncounts: {}".format(unique_degrees[:20], counts[:20])) print('Avg. Number of nodes per block: {}'.format(g_sample.num_vertices() / num_blocks)) print('# Within-block edges / # Between-blocks edges: {}'.format( num_within_block_edges / num_between_block_edges)) save_graph(g_sample, true_partition, utility, file_name)