def setUp(self): super(SbmSimulatorTestSbm, self).setUp() self.simulation_with_graph = sbm_simulator.StochasticBlockModel() sbm_simulator.SimulateSbm(self.simulation_with_graph, num_vertices=50, num_edges=500, pi=[0.5, 0.5], prop_mat=np.ones(shape=(2, 2)))
def GenerateStochasticBlockModelWithFeatures( num_vertices, num_edges, pi, prop_mat, out_degs=None, feature_center_distance=0.0, feature_dim=0, num_feature_groups=1, feature_group_match_type=MatchType.RANDOM, feature_cluster_variance=1.0, edge_feature_dim=0, edge_center_distance=0.0, edge_cluster_variance=1.0): """Generates stochastic block model (SBM) with node features. Args: num_vertices: number of nodes in the graph. num_edges: expected number of edges in the graph. pi: interable of non-zero community size proportions. Must sum to 1.0. prop_mat: square, symmetric matrix of community edge count rates. Example: if diagonals are 2.0 and off-diagonals are 1.0, within-community edges are twices as likely as between-community edges. out_degs: Out-degree propensity for each node. If not provided, a constant value will be used. Note that the values will be normalized inside each group, if they are not already so. feature_center_distance: distance between feature cluster centers. When this is 0.0, the signal-to-noise ratio is 0.0. When equal to feature_cluster_variance, SNR is 1.0. feature_dim: dimension of node features. num_feature_groups: number of feature clusters. feature_group_match_type: see sbm_simulator.MatchType. feature_cluster_variance: variance of feature clusters around their centers. centers. Increasing this weakens node feature signal. edge_feature_dim: dimension of edge features. edge_center_distance: per-dimension distance between the intra-class and inter-class means. Increasing this strengthens the edge feature signal. edge_cluster_variance: variance of edge clusters around their centers. Increasing this weakens the edge feature signal. Returns: result: a StochasticBlockModel data class. """ result = sbm_simulator.StochasticBlockModel() sbm_simulator.SimulateSbm(result, num_vertices, num_edges, pi, prop_mat, out_degs) sbm_simulator.SimulateFeatures(result, feature_center_distance, feature_dim, num_feature_groups, feature_group_match_type, feature_cluster_variance) sbm_simulator.SimulateEdgeFeatures(result, edge_feature_dim, edge_center_distance, edge_cluster_variance) return result
def test_simulate_features_grouped_memberships(self): simulation = sbm_simulator.StochasticBlockModel() sbm_simulator.SimulateSbm( simulation, 30, 100, pi=np.ones(3) / 3, prop_mat=np.ones((3, 3))) sbm_simulator.SimulateFeatures( simulation, center_var=1.0, feature_dim=4, num_groups=2, match_type=sbm_simulator.MatchType.GROUPED) expected_memberships = [0] * 20 + [1] * 10 self.assertSameStructure( list([int(d) for d in simulation.feature_memberships]), expected_memberships)
def test_simulate_sbm_community_sizes(self): simulation = sbm_simulator.StochasticBlockModel() unbalanced_pi = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] sbm_simulator.SimulateSbm( simulation, num_vertices=50, num_edges=100.0, pi=unbalanced_pi, prop_mat=np.ones(shape=(10, 10))) expected_sizes = [1, 2, 3, 4, 5, 5, 6, 7, 8, 9] group_counts = collections.defaultdict(int) for cluster_id in simulation.graph_memberships: group_counts[cluster_id] += 1 actual_sizes = [count for cluster_id, count in sorted(group_counts.items())] self.assertSameStructure(expected_sizes, actual_sizes) self.assertEqual(simulation.type1_clusters, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
def setUp(self): super(SbmSimulatorTestHeterogeneousSbm, self).setUp() self.simulation_with_graph = sbm_simulator.StochasticBlockModel() prop_mat = hsu.GetPropMat(2, 5.0, 2, 5.0, 5.0) sbm_simulator.SimulateSbm(self.simulation_with_graph, num_vertices=400, num_edges=16000, pi=np.array([0.5, 0.5]), prop_mat=prop_mat, pi2=np.array([0.5, 0.5])) sbm_simulator.SimulateFeatures(self.simulation_with_graph, center_var=1.0, feature_dim=32, center_var2=1.0, feature_dim2=48, type_center_var=1.0, type_correlation=0.5)
def test_simulate_sbm_community_sizes_seven_groups(self): simulation = sbm_simulator.StochasticBlockModel() num_communities = 7 community_size_slope = 0.5 pi = ( np.array(range(num_communities)) * community_size_slope + np.ones(num_communities)) pi /= np.sum(pi) sbm_simulator.SimulateSbm( simulation, num_vertices=500, num_edges=10000.0, pi=pi, prop_mat=np.ones(shape=(num_communities, num_communities))) expected_sizes = [29, 43, 58, 71, 85, 100, 114] group_counts = collections.Counter(simulation.graph_memberships) actual_sizes = [count for cluster_id, count in sorted(group_counts.items())] self.assertEqual(expected_sizes, actual_sizes) self.assertEqual(simulation.type1_clusters, [0, 1, 2, 3, 4, 5, 6])
def GenerateStochasticBlockModelWithFeatures( num_vertices, num_edges, pi, prop_mat=None, out_degs=None, feature_center_distance=0.0, feature_dim=0, num_feature_groups=None, feature_group_match_type=MatchType.RANDOM, feature_cluster_variance=1.0, edge_feature_dim=0, edge_center_distance=0.0, edge_cluster_variance=1.0, num_vertices2=0, pi2=None, feature_center_distance2=0.0, feature_dim2=0, feature_type_correlation=0.0, feature_type_center_distance=0.0, edge_probability_profile=None): """Generates stochastic block model (SBM) with node features. Args: num_vertices: number of nodes in the graph. num_edges: expected number of edges in the graph. pi: iterable of non-zero community size proportions. Must sum to 1.0. prop_mat: square, symmetric matrix of community edge count rates. Example: if diagonals are 2.0 and off-diagonals are 1.0, within-community edges are twices as likely as between-community edges. out_degs: Out-degree propensity for each node. If not provided, a constant value will be used. Note that the values will be normalized inside each group, if they are not already so. feature_center_distance: distance between feature cluster centers. When this is 0.0, the signal-to-noise ratio is 0.0. When equal to feature_cluster_variance, SNR is 1.0. feature_dim: dimension of node features. num_feature_groups: number of feature clusters. This is ignored if num_vertices2 is provided, as the internal feature generators will assume a heterogeneous SBM model, which does not support differing # feature clusters from # graph clusters. In this case, # feature clusters will be set equal to # graph clusters. If left as default (None), and input sbm_data is homogeneous, set to len(pi1). feature_group_match_type: see sbm_simulator.MatchType. feature_cluster_variance: variance of feature clusters around their centers. centers. Increasing this weakens node feature signal. edge_feature_dim: dimension of edge features. edge_center_distance: per-dimension distance between the intra-class and inter-class means. Increasing this strengthens the edge feature signal. edge_cluster_variance: variance of edge clusters around their centers. Increasing this weakens the edge feature signal. num_vertices2: If simulating a heterogeneous SBM, this is the number of vertices of type 2. pi2: If simulating a heterogeneous SBM, this is the pi vector for the vertices of type 2. Must sum to 1.0. feature_center_distance2: feature_center_distance for type 2 nodes. Not used if len(pi2) = 0. feature_dim2: feature_dim for nodes of type 2. Not used if len(pi2) = 0. feature_type_correlation: proportion of each cluster's center vector that is shared with other clusters linked across types. Not used if len(pi2) = 0. feature_type_center_distance: the variance of the generated centers for feature vectors that are shared across types. Not used if len(pi2) = 0. edge_probability_profile: This can be provided instead of prop_mat. If provided, prop_mat will be built according to the input p-to-q ratios. If prop_mat is provided, it will be preferred over this input. Returns: result: a StochasticBlockModel data class. Raises: ValueError: if neither of prop_mat or edge_probability_profile are provided. """ result = sbm_simulator.StochasticBlockModel() if prop_mat is None and edge_probability_profile is None: raise ValueError( "One of prop_mat or edge_probability_profile must be provided.") if prop_mat is None and edge_probability_profile is not None: prop_mat = hsu.GetPropMat( num_clusters1=len(pi), p_to_q_ratio1=edge_probability_profile.p_to_q_ratio1, num_clusters2=0 if pi2 is None else len(pi2), p_to_q_ratio2=edge_probability_profile.p_to_q_ratio2, p_to_q_ratio_cross=edge_probability_profile.p_to_q_ratio_cross) sbm_simulator.SimulateSbm(result, num_vertices, num_edges, pi, prop_mat, out_degs, num_vertices2, pi2) sbm_simulator.SimulateFeatures( result, feature_center_distance, feature_dim, num_feature_groups, feature_group_match_type, feature_cluster_variance, feature_center_distance2, feature_dim2, feature_type_correlation, feature_type_center_distance) if edge_feature_dim > 0: sbm_simulator.SimulateEdgeFeatures(result, edge_feature_dim, edge_center_distance, edge_cluster_variance) return result