Ejemplo n.º 1
0
def SimulateSbm(sbm_data,
                num_vertices,
                num_edges,
                pi,
                prop_mat,
                out_degs=None):
    """Generates a stochastic block model, storing data in sbm_data.graph.

  This function uses graph_tool.generate_sbm. Refer to that
  documentation for more information on the model and parameters.

  Args:
    sbm_data: StochasticBlockModel dataclass to store result data.
    num_vertices: (int) number of nodes in the graph.
    num_edges: (int) expected number of edges in the graph.
    pi: interable of non-zero community size proportions. Must sum to 1.0.
    prop_mat: square, symmetric matrix of community edge count rates.
    out_degs: Out-degree propensity for each node. If not provided, a constant
      value will be used. Note that the values will be normalized inside each
      group, if they are not already so.
  Returns: (none)
  """
    if np.sum(pi) != 1.0:
        raise ValueError("entries of pi must sum to 1.0")
    if prop_mat.shape[0] != len(pi) or prop_mat.shape[1] != len(pi):
        raise ValueError("prop_mat must be k x k where k = len(pi)")
    sbm_data.graph_memberships = _GenerateNodeMemberships(num_vertices, pi)
    edge_counts = _ComputeExpectedEdgeCounts(num_edges, num_vertices, pi,
                                             prop_mat)
    sbm_data.graph = generation.generate_sbm(sbm_data.graph_memberships,
                                             edge_counts, out_degs)
    stats.remove_self_loops(sbm_data.graph)
    stats.remove_parallel_edges(sbm_data.graph)
    sbm_data.graph.reindex_edges()
Ejemplo n.º 2
0
def SimulateSbm(sbm_data,
                num_vertices,
                num_edges,
                pi,
                prop_mat,
                out_degs=None,
                pi2=None):
    """Generates a stochastic block model, storing data in sbm_data.graph.

  This function uses graph_tool.generate_sbm. Refer to that
  documentation for more information on the model and parameters.

  This function can generate a heterogeneous SBM graph, meaning each node is
  exactly one of two types (and both types are present). To generate a
  heteroteneous SBM graph, `pi2` must be supplied, and additional fields of
  `sbm_data` will be filled. See the StochasticBlockModel dataclass for details.

  Args:
    sbm_data: StochasticBlockModel dataclass to store result data.
    num_vertices: (int) number of nodes in the graph.
    num_edges: (float) expected number of edges in the graph.
    pi: iterable of non-zero community size relative proportions. Community i
      will be pi[i] / pi[j] times larger than community j.
    prop_mat: square, symmetric matrix of community edge count rates.
    out_degs: Out-degree propensity for each node. If not provided, a constant
      value will be used. Note that the values will be normalized inside each
      group, if they are not already so.
    pi2: This is the pi vector for the vertices of type 2. Type 2 community k
      will be pi2[k] / pi[j] times larger than type 1 community j. Supplying
      this argument produces a heterogeneous model.
  Returns: (none)
  """
    if pi2 is None: pi2 = []
    k1, k2 = len(pi), len(pi2)
    pi = np.array(list(pi) + list(pi2)).astype(np.float64)
    pi /= np.sum(pi)
    if prop_mat.shape[0] != len(pi) or prop_mat.shape[1] != len(pi):
        raise ValueError("prop_mat must be k x k; k = len(pi1) + len(pi2)")
    sbm_data.graph_memberships = _GenerateNodeMemberships(num_vertices, pi)
    sbm_data.type1_clusters = sorted(list(set(sbm_data.graph_memberships)))
    if len(pi2) > 0:
        sbm_data.cross_links = hsu.GetCrossLinks([k1, k2], 0, 1)
        type1_clusters, type2_clusters = zip(*sbm_data.cross_links)
        sbm_data.type1_clusters = sorted(list(set(type1_clusters)))
        sbm_data.type2_clusters = sorted(list(set(type2_clusters)))
    edge_counts = _ComputeExpectedEdgeCounts(num_edges, num_vertices, pi,
                                             prop_mat)
    sbm_data.graph = generation.generate_sbm(sbm_data.graph_memberships,
                                             edge_counts, out_degs)
    graph_tool.stats.remove_self_loops(sbm_data.graph)
    graph_tool.stats.remove_parallel_edges(sbm_data.graph)
    sbm_data.graph.reindex_edges()
Ejemplo n.º 3
0
def generate_block_model(nodes, groups, in_group_p, between_group_p):
    group_memberships = []
    group_sizes = [0] * groups
    for i in range(nodes):
        group_memberships.append((i % groups))
        group_sizes[i % groups] += 1
    probabilities = np.ndarray([groups, groups])
    for i in range(groups):
        for j in range(groups):
            if i == j:
                probabilities[i][
                    j] = in_group_p * group_sizes[i] * group_sizes[j]
            else:
                probabilities[i][
                    j] = between_group_p * group_sizes[i] * group_sizes[j] / 2
    return generate_sbm(group_memberships, probabilities)
Ejemplo n.º 4
0
def SimulateSbm(sbm_data,
                num_vertices,
                num_edges,
                pi,
                prop_mat,
                out_degs = None):
  """Generates a stochastic block model, storing data in sbm_data.graph.

  This function uses graph_tool.generate_sbm. Refer to that
  documentation for more information on the model and parameters.

  Args:
    sbm_data: StochasticBlockModel dataclass to store result data.
    num_vertices: (int) number of nodes in the graph.
    num_edges: (int) expected number of edges in the graph.
    pi: iterable of non-zero community size proportions. Must sum to 1.0.
    prop_mat: square, symmetric matrix of community edge count rates.
    out_degs: Out-degree propensity for each node. If not provided, a constant
      value will be used. Note that the values will be normalized inside each
      group, if they are not already so.
  Returns: (none)
  """
  # Equivalent to assertAlmostEqual(np.sum(pi), 1.0, places=12)
  # https://docs.python.org/3/library/unittest.html#unittest.TestCase.assertNotAlmostEqual
  #
  # Some leniency is required here because some theoretically-valid ways to
  # programmatically compute a simplex vector suffer from precision errors. One
  # example of this is in the simulate_sbm_community_sizes_seven_groups test
  # from sbm_simulator_test.py. Places>=12 covers known similar cases (to date).
  if round(abs(np.sum(pi) - 1.0), 12) != 0:
    raise ValueError("entries of pi must sum to 1.0")
  if prop_mat.shape[0] != len(pi) or prop_mat.shape[1] != len(pi):
    raise ValueError("prop_mat must be k x k where k = len(pi)")
  sbm_data.graph_memberships = _GenerateNodeMemberships(num_vertices, pi)
  edge_counts = _ComputeExpectedEdgeCounts(num_edges, num_vertices, pi,
                                           prop_mat)
  sbm_data.graph = generation.generate_sbm(sbm_data.graph_memberships,
                                           edge_counts, out_degs)
  graph_tool.stats.remove_self_loops(sbm_data.graph)
  graph_tool.stats.remove_parallel_edges(sbm_data.graph)
  sbm_data.graph.reindex_edges()
Ejemplo n.º 5
0
    def SimulateSbm(self, n, m, pi, prop_mat, out_degs=None):
        """Generates a stochastic block model.

    This function uses graph_tool.generation.generate_sbm. Refer to that
    documentation for more information on the model and parameters.

    Args:
      n: (int) number of nodes in the graph.
      m: (int) expected number of edges in the graph.
      pi: interable of non-zero community size proportions. Must sum to 1.0.
      prop_mat: square, symmetric matrix of community edge count rates.
      out_degs: Out-degree propensity for each node. If not provided, a constant
        value will be used. Note that the values will be normalized inside each
        group, if they are not already so.
    Returns: (none)
    """
        if np.sum(pi) != 1.0:
            raise ValueError("entries of pi must sum to 1.0")
        if prop_mat.shape[0] != len(pi) or prop_mat.shape[1] != len(pi):
            raise ValueError("prop_mat must be k x k where k = len(pi)")
        self.memberships = self._GenerateNodeMemberships(n, pi)
        edge_counts = self._ComputeExpectedEdgeCounts(m, n, pi, prop_mat)
        self.graph = generation.generate_sbm(self.memberships, edge_counts,
                                             out_degs)
Ejemplo n.º 6
0
def SimulateSbm(sbm_data,
                num_vertices,
                num_edges,
                pi,
                prop_mat,
                out_degs=None,
                num_vertices2=0,
                pi2=None):
    """Generates a stochastic block model, storing data in sbm_data.graph.

  This function uses graph_tool.generate_sbm. Refer to that
  documentation for more information on the model and parameters.

  This function can generate a heterogeneous SBM graph, meaning each node is
  exactly one of two types (and both types are present). To generate a
  heteroteneous SBM graph, both `num_vertices2` and `pi2` must be non-zero and
  supplied (respectively). When this happens, additional fields of `sbm_data`
  are filled. See the StochasticBlockModel dataclass for full details.

  Args:
    sbm_data: StochasticBlockModel dataclass to store result data.
    num_vertices: (int) number of nodes in the graph.
    num_edges: (int) expected number of edges in the graph.
    pi: iterable of non-zero community size proportions. Must sum to 1.0.
    prop_mat: square, symmetric matrix of community edge count rates.
    out_degs: Out-degree propensity for each node. If not provided, a constant
      value will be used. Note that the values will be normalized inside each
      group, if they are not already so.
    num_vertices2: If simulating a heterogeneous SBM, this is the number of
      vertices of type 2.
    pi2: If simulating a heterogeneous SBM, this is the pi vector for the
      vertices of type 2. Must sum to 1.0.
  Returns: (none)
  """
    if ((num_vertices2 == 0 and pi2 is not None)
            or (num_vertices2 > 0 and pi2 is None)):
        raise ValueError(
            "num_vertices2 and pi2 must be either both supplied or both None")
    if num_vertices2 == 0:
        pi2 = []
    # Equivalent to assertAlmostEqual(np.sum(pi), 1.0, places=12)
    # https://docs.python.org/3/library/unittest.html#unittest.TestCase.assertNotAlmostEqual
    #
    # Some leniency is required here because some theoretically-valid ways to
    # programmatically compute a simplex vector suffer from precision errors. One
    # example of this is in the simulate_sbm_community_sizes_seven_groups test
    # from sbm_simulator_test.py. Places>=12 covers known similar cases (to date).
    if round(abs(np.sum(pi) - 1.0), 12) != 0:
        raise ValueError("entries of pi ( must sum to 1.0")
    if len(pi2) > 0 and round(abs(np.sum(pi2) - 1.0), 12) != 0:
        raise ValueError("entries of pi2 ( must sum to 1.0")
    k1, k2 = len(pi), len(pi2)
    pi = np.array(list(pi) + list(pi2))
    pi /= np.sum(pi)
    if prop_mat.shape[0] != len(pi) or prop_mat.shape[1] != len(pi):
        raise ValueError("prop_mat must be k x k; k = len(pi1) + len(pi2)")
    sbm_data.graph_memberships = _GenerateNodeMemberships(
        num_vertices + num_vertices2, pi)
    sbm_data.type1_clusters = sorted(list(set(sbm_data.graph_memberships)))
    if num_vertices2 > 0:
        sbm_data.cross_links = hsu.GetCrossLinks(k1, k2)
        type1_clusters, type2_clusters = zip(*sbm_data.cross_links)
        sbm_data.type1_clusters = sorted(list(set(type1_clusters)))
        sbm_data.type2_clusters = sorted(list(set(type2_clusters)))
    edge_counts = _ComputeExpectedEdgeCounts(num_edges,
                                             num_vertices + num_vertices2, pi,
                                             prop_mat)
    sbm_data.graph = generation.generate_sbm(sbm_data.graph_memberships,
                                             edge_counts, out_degs)
    graph_tool.stats.remove_self_loops(sbm_data.graph)
    graph_tool.stats.remove_parallel_edges(sbm_data.graph)
    sbm_data.graph.reindex_edges()