def setUp(self): super(SbmSimulatorTestHeterogeneousSbm, self).setUp() self.simulation_with_graph = sbm_simulator.StochasticBlockModel() prop_mat = hsu.GetPropMat(2, 5.0, 2, 5.0, 5.0) sbm_simulator.SimulateSbm(self.simulation_with_graph, num_vertices=400, num_edges=16000, pi=np.array([0.5, 0.5]), prop_mat=prop_mat, pi2=np.array([0.5, 0.5])) sbm_simulator.SimulateFeatures(self.simulation_with_graph, center_var=1.0, feature_dim=32, center_var2=1.0, feature_dim2=48, type_center_var=1.0, type_correlation=0.5)
def GenerateStochasticBlockModelWithFeatures( num_vertices, num_edges, pi, prop_mat=None, out_degs=None, feature_center_distance=0.0, feature_dim=0, num_feature_groups=None, feature_group_match_type=MatchType.RANDOM, feature_cluster_variance=1.0, edge_feature_dim=0, edge_center_distance=0.0, edge_cluster_variance=1.0, num_vertices2=0, pi2=None, feature_center_distance2=0.0, feature_dim2=0, feature_type_correlation=0.0, feature_type_center_distance=0.0, edge_probability_profile=None): """Generates stochastic block model (SBM) with node features. Args: num_vertices: number of nodes in the graph. num_edges: expected number of edges in the graph. pi: iterable of non-zero community size proportions. Must sum to 1.0. prop_mat: square, symmetric matrix of community edge count rates. Example: if diagonals are 2.0 and off-diagonals are 1.0, within-community edges are twices as likely as between-community edges. out_degs: Out-degree propensity for each node. If not provided, a constant value will be used. Note that the values will be normalized inside each group, if they are not already so. feature_center_distance: distance between feature cluster centers. When this is 0.0, the signal-to-noise ratio is 0.0. When equal to feature_cluster_variance, SNR is 1.0. feature_dim: dimension of node features. num_feature_groups: number of feature clusters. This is ignored if num_vertices2 is provided, as the internal feature generators will assume a heterogeneous SBM model, which does not support differing # feature clusters from # graph clusters. In this case, # feature clusters will be set equal to # graph clusters. If left as default (None), and input sbm_data is homogeneous, set to len(pi1). feature_group_match_type: see sbm_simulator.MatchType. feature_cluster_variance: variance of feature clusters around their centers. centers. Increasing this weakens node feature signal. edge_feature_dim: dimension of edge features. edge_center_distance: per-dimension distance between the intra-class and inter-class means. Increasing this strengthens the edge feature signal. edge_cluster_variance: variance of edge clusters around their centers. Increasing this weakens the edge feature signal. num_vertices2: If simulating a heterogeneous SBM, this is the number of vertices of type 2. pi2: If simulating a heterogeneous SBM, this is the pi vector for the vertices of type 2. Must sum to 1.0. feature_center_distance2: feature_center_distance for type 2 nodes. Not used if len(pi2) = 0. feature_dim2: feature_dim for nodes of type 2. Not used if len(pi2) = 0. feature_type_correlation: proportion of each cluster's center vector that is shared with other clusters linked across types. Not used if len(pi2) = 0. feature_type_center_distance: the variance of the generated centers for feature vectors that are shared across types. Not used if len(pi2) = 0. edge_probability_profile: This can be provided instead of prop_mat. If provided, prop_mat will be built according to the input p-to-q ratios. If prop_mat is provided, it will be preferred over this input. Returns: result: a StochasticBlockModel data class. Raises: ValueError: if neither of prop_mat or edge_probability_profile are provided. """ result = sbm_simulator.StochasticBlockModel() if prop_mat is None and edge_probability_profile is None: raise ValueError( "One of prop_mat or edge_probability_profile must be provided.") if prop_mat is None and edge_probability_profile is not None: prop_mat = hsu.GetPropMat( num_clusters1=len(pi), p_to_q_ratio1=edge_probability_profile.p_to_q_ratio1, num_clusters2=0 if pi2 is None else len(pi2), p_to_q_ratio2=edge_probability_profile.p_to_q_ratio2, p_to_q_ratio_cross=edge_probability_profile.p_to_q_ratio_cross) sbm_simulator.SimulateSbm(result, num_vertices, num_edges, pi, prop_mat, out_degs, num_vertices2, pi2) sbm_simulator.SimulateFeatures( result, feature_center_distance, feature_dim, num_feature_groups, feature_group_match_type, feature_cluster_variance, feature_center_distance2, feature_dim2, feature_type_correlation, feature_type_center_distance) if edge_feature_dim > 0: sbm_simulator.SimulateEdgeFeatures(result, edge_feature_dim, edge_center_distance, edge_cluster_variance) return result
def test_heterogeneous_inputs(self): np.testing.assert_array_almost_equal( hsu.GetPropMat(3, 3.0, 2, 2.0, 4.0), np.array([[3.0, 1.0, 1.0, 1.0, 4.0], [1.0, 3.0, 1.0, 4.0, 1.0], [1.0, 1.0, 3.0, 1.0, 4.0], [1.0, 4.0, 1.0, 2.0, 1.0], [4.0, 1.0, 4.0, 1.0, 2.0]]))
def test_homogeneous_inptus(self): np.testing.assert_array_almost_equal( hsu.GetPropMat(3, 4.0), np.array([[4.0, 1.0, 1.0], [1.0, 4.0, 1.0], [1.0, 1.0, 4.0]]))