def SimulateSbm(sbm_data, num_vertices, num_edges, pi, prop_mat, out_degs=None): """Generates a stochastic block model, storing data in sbm_data.graph. This function uses graph_tool.generate_sbm. Refer to that documentation for more information on the model and parameters. Args: sbm_data: StochasticBlockModel dataclass to store result data. num_vertices: (int) number of nodes in the graph. num_edges: (int) expected number of edges in the graph. pi: interable of non-zero community size proportions. Must sum to 1.0. prop_mat: square, symmetric matrix of community edge count rates. out_degs: Out-degree propensity for each node. If not provided, a constant value will be used. Note that the values will be normalized inside each group, if they are not already so. Returns: (none) """ if np.sum(pi) != 1.0: raise ValueError("entries of pi must sum to 1.0") if prop_mat.shape[0] != len(pi) or prop_mat.shape[1] != len(pi): raise ValueError("prop_mat must be k x k where k = len(pi)") sbm_data.graph_memberships = _GenerateNodeMemberships(num_vertices, pi) edge_counts = _ComputeExpectedEdgeCounts(num_edges, num_vertices, pi, prop_mat) sbm_data.graph = generation.generate_sbm(sbm_data.graph_memberships, edge_counts, out_degs) stats.remove_self_loops(sbm_data.graph) stats.remove_parallel_edges(sbm_data.graph) sbm_data.graph.reindex_edges()
def SimulateSbm(sbm_data, num_vertices, num_edges, pi, prop_mat, out_degs=None, pi2=None): """Generates a stochastic block model, storing data in sbm_data.graph. This function uses graph_tool.generate_sbm. Refer to that documentation for more information on the model and parameters. This function can generate a heterogeneous SBM graph, meaning each node is exactly one of two types (and both types are present). To generate a heteroteneous SBM graph, `pi2` must be supplied, and additional fields of `sbm_data` will be filled. See the StochasticBlockModel dataclass for details. Args: sbm_data: StochasticBlockModel dataclass to store result data. num_vertices: (int) number of nodes in the graph. num_edges: (float) expected number of edges in the graph. pi: iterable of non-zero community size relative proportions. Community i will be pi[i] / pi[j] times larger than community j. prop_mat: square, symmetric matrix of community edge count rates. out_degs: Out-degree propensity for each node. If not provided, a constant value will be used. Note that the values will be normalized inside each group, if they are not already so. pi2: This is the pi vector for the vertices of type 2. Type 2 community k will be pi2[k] / pi[j] times larger than type 1 community j. Supplying this argument produces a heterogeneous model. Returns: (none) """ if pi2 is None: pi2 = [] k1, k2 = len(pi), len(pi2) pi = np.array(list(pi) + list(pi2)).astype(np.float64) pi /= np.sum(pi) if prop_mat.shape[0] != len(pi) or prop_mat.shape[1] != len(pi): raise ValueError("prop_mat must be k x k; k = len(pi1) + len(pi2)") sbm_data.graph_memberships = _GenerateNodeMemberships(num_vertices, pi) sbm_data.type1_clusters = sorted(list(set(sbm_data.graph_memberships))) if len(pi2) > 0: sbm_data.cross_links = hsu.GetCrossLinks([k1, k2], 0, 1) type1_clusters, type2_clusters = zip(*sbm_data.cross_links) sbm_data.type1_clusters = sorted(list(set(type1_clusters))) sbm_data.type2_clusters = sorted(list(set(type2_clusters))) edge_counts = _ComputeExpectedEdgeCounts(num_edges, num_vertices, pi, prop_mat) sbm_data.graph = generation.generate_sbm(sbm_data.graph_memberships, edge_counts, out_degs) graph_tool.stats.remove_self_loops(sbm_data.graph) graph_tool.stats.remove_parallel_edges(sbm_data.graph) sbm_data.graph.reindex_edges()
def generate_block_model(nodes, groups, in_group_p, between_group_p): group_memberships = [] group_sizes = [0] * groups for i in range(nodes): group_memberships.append((i % groups)) group_sizes[i % groups] += 1 probabilities = np.ndarray([groups, groups]) for i in range(groups): for j in range(groups): if i == j: probabilities[i][ j] = in_group_p * group_sizes[i] * group_sizes[j] else: probabilities[i][ j] = between_group_p * group_sizes[i] * group_sizes[j] / 2 return generate_sbm(group_memberships, probabilities)
def SimulateSbm(sbm_data, num_vertices, num_edges, pi, prop_mat, out_degs = None): """Generates a stochastic block model, storing data in sbm_data.graph. This function uses graph_tool.generate_sbm. Refer to that documentation for more information on the model and parameters. Args: sbm_data: StochasticBlockModel dataclass to store result data. num_vertices: (int) number of nodes in the graph. num_edges: (int) expected number of edges in the graph. pi: iterable of non-zero community size proportions. Must sum to 1.0. prop_mat: square, symmetric matrix of community edge count rates. out_degs: Out-degree propensity for each node. If not provided, a constant value will be used. Note that the values will be normalized inside each group, if they are not already so. Returns: (none) """ # Equivalent to assertAlmostEqual(np.sum(pi), 1.0, places=12) # https://docs.python.org/3/library/unittest.html#unittest.TestCase.assertNotAlmostEqual # # Some leniency is required here because some theoretically-valid ways to # programmatically compute a simplex vector suffer from precision errors. One # example of this is in the simulate_sbm_community_sizes_seven_groups test # from sbm_simulator_test.py. Places>=12 covers known similar cases (to date). if round(abs(np.sum(pi) - 1.0), 12) != 0: raise ValueError("entries of pi must sum to 1.0") if prop_mat.shape[0] != len(pi) or prop_mat.shape[1] != len(pi): raise ValueError("prop_mat must be k x k where k = len(pi)") sbm_data.graph_memberships = _GenerateNodeMemberships(num_vertices, pi) edge_counts = _ComputeExpectedEdgeCounts(num_edges, num_vertices, pi, prop_mat) sbm_data.graph = generation.generate_sbm(sbm_data.graph_memberships, edge_counts, out_degs) graph_tool.stats.remove_self_loops(sbm_data.graph) graph_tool.stats.remove_parallel_edges(sbm_data.graph) sbm_data.graph.reindex_edges()
def SimulateSbm(self, n, m, pi, prop_mat, out_degs=None): """Generates a stochastic block model. This function uses graph_tool.generation.generate_sbm. Refer to that documentation for more information on the model and parameters. Args: n: (int) number of nodes in the graph. m: (int) expected number of edges in the graph. pi: interable of non-zero community size proportions. Must sum to 1.0. prop_mat: square, symmetric matrix of community edge count rates. out_degs: Out-degree propensity for each node. If not provided, a constant value will be used. Note that the values will be normalized inside each group, if they are not already so. Returns: (none) """ if np.sum(pi) != 1.0: raise ValueError("entries of pi must sum to 1.0") if prop_mat.shape[0] != len(pi) or prop_mat.shape[1] != len(pi): raise ValueError("prop_mat must be k x k where k = len(pi)") self.memberships = self._GenerateNodeMemberships(n, pi) edge_counts = self._ComputeExpectedEdgeCounts(m, n, pi, prop_mat) self.graph = generation.generate_sbm(self.memberships, edge_counts, out_degs)
def SimulateSbm(sbm_data, num_vertices, num_edges, pi, prop_mat, out_degs=None, num_vertices2=0, pi2=None): """Generates a stochastic block model, storing data in sbm_data.graph. This function uses graph_tool.generate_sbm. Refer to that documentation for more information on the model and parameters. This function can generate a heterogeneous SBM graph, meaning each node is exactly one of two types (and both types are present). To generate a heteroteneous SBM graph, both `num_vertices2` and `pi2` must be non-zero and supplied (respectively). When this happens, additional fields of `sbm_data` are filled. See the StochasticBlockModel dataclass for full details. Args: sbm_data: StochasticBlockModel dataclass to store result data. num_vertices: (int) number of nodes in the graph. num_edges: (int) expected number of edges in the graph. pi: iterable of non-zero community size proportions. Must sum to 1.0. prop_mat: square, symmetric matrix of community edge count rates. out_degs: Out-degree propensity for each node. If not provided, a constant value will be used. Note that the values will be normalized inside each group, if they are not already so. num_vertices2: If simulating a heterogeneous SBM, this is the number of vertices of type 2. pi2: If simulating a heterogeneous SBM, this is the pi vector for the vertices of type 2. Must sum to 1.0. Returns: (none) """ if ((num_vertices2 == 0 and pi2 is not None) or (num_vertices2 > 0 and pi2 is None)): raise ValueError( "num_vertices2 and pi2 must be either both supplied or both None") if num_vertices2 == 0: pi2 = [] # Equivalent to assertAlmostEqual(np.sum(pi), 1.0, places=12) # https://docs.python.org/3/library/unittest.html#unittest.TestCase.assertNotAlmostEqual # # Some leniency is required here because some theoretically-valid ways to # programmatically compute a simplex vector suffer from precision errors. One # example of this is in the simulate_sbm_community_sizes_seven_groups test # from sbm_simulator_test.py. Places>=12 covers known similar cases (to date). if round(abs(np.sum(pi) - 1.0), 12) != 0: raise ValueError("entries of pi ( must sum to 1.0") if len(pi2) > 0 and round(abs(np.sum(pi2) - 1.0), 12) != 0: raise ValueError("entries of pi2 ( must sum to 1.0") k1, k2 = len(pi), len(pi2) pi = np.array(list(pi) + list(pi2)) pi /= np.sum(pi) if prop_mat.shape[0] != len(pi) or prop_mat.shape[1] != len(pi): raise ValueError("prop_mat must be k x k; k = len(pi1) + len(pi2)") sbm_data.graph_memberships = _GenerateNodeMemberships( num_vertices + num_vertices2, pi) sbm_data.type1_clusters = sorted(list(set(sbm_data.graph_memberships))) if num_vertices2 > 0: sbm_data.cross_links = hsu.GetCrossLinks(k1, k2) type1_clusters, type2_clusters = zip(*sbm_data.cross_links) sbm_data.type1_clusters = sorted(list(set(type1_clusters))) sbm_data.type2_clusters = sorted(list(set(type2_clusters))) edge_counts = _ComputeExpectedEdgeCounts(num_edges, num_vertices + num_vertices2, pi, prop_mat) sbm_data.graph = generation.generate_sbm(sbm_data.graph_memberships, edge_counts, out_degs) graph_tool.stats.remove_self_loops(sbm_data.graph) graph_tool.stats.remove_parallel_edges(sbm_data.graph) sbm_data.graph.reindex_edges()