def get(n=50): ns = [n, n] p1 = np.array([[.9,.1],[.1,.9]]) p2 = np.array([[.9,.1],[.1,.9]]) A1 = sbm(ns,p1) A2 = sbm(ns,p2) X1 = AdjacencySpectralEmbed().fit_transform(A1) X2 = AdjacencySpectralEmbed().fit_transform(A2) return X1, X2
def fit(seed): np.random.seed(seed) warnings.filterwarnings("ignore") A1 = sbm(cn, B) A2 = sbm(cm, B) ldt = LatentDistributionTest(n_components=2, method="dcorr") p = ldt.fit(A1, A2) return p
def run_sim(Bx, By, n_components, n_bootstraps, sizes, seed): np.random.seed(seed) A0 = sbm(sizes, Bx, loops=False) A1 = sbm(sizes, Bx, loops=False) A2 = sbm(sizes, By, loops=False) spt_null = SemiparametricTest(n_components=n_components, n_bootstraps=n_bootstraps) spt_alt = SemiparametricTest(n_components=n_components, n_bootstraps=n_bootstraps) spt_null.fit(A0, A1) spt_alt.fit(A0, A2) return (spt_null, spt_alt)
def generate_data(): np.random.seed(1) p1 = [[0.2, 0.1], [0.1, 0.2]] p2 = [[0.1, 0.2], [0.2, 0.1]] n = [50, 50] g1 = [sbm(n, p1) for _ in range(20)] g2 = [sbm(n, p2) for _ in range(20)] g = g1 + g2 y = ["0"] * 20 + ["1"] * 20 return g, y
def test_SBM_epsilon(self): np.random.seed(12345678) B1 = np.array([[0.5, 0.2], [0.2, 0.5]]) B2 = np.array([[0.7, 0.2], [0.2, 0.7]]) b_size = 200 A1 = sbm(2 * [b_size], B1) A2 = sbm(2 * [b_size], B1) A3 = sbm(2 * [b_size], B2) npt_null = NonparametricTest(n_components=2, n_bootstraps=100) npt_alt = NonparametricTest(n_components=2, n_bootstraps=100) p_null = npt_null.fit(A1, A2) p_alt = npt_alt.fit(A1, A3) self.assertTrue(p_null > 0.05) self.assertTrue(p_alt <= 0.05)
def test_SBM_epsilon(self): np.random.seed(12345678) B1 = np.array([[0.5, 0.2], [0.2, 0.5]]) B2 = np.array([[0.7, 0.2], [0.2, 0.7]]) b_size = 200 A1 = sbm(2 * [b_size], B1) A2 = sbm(2 * [b_size], B1) A3 = sbm(2 * [b_size], B2) spt_null = LatentPositionTest(n_components=2, n_bootstraps=100) spt_alt = LatentPositionTest(n_components=2, n_bootstraps=100) p_null = spt_null.fit_predict(A1, A2) p_alt = spt_alt.fit_predict(A1, A3) self.assertTrue(p_null > 0.05) self.assertTrue(p_alt <= 0.05)
def generate_cyclops(X, n, pi, density=None, density_params=[0,1], acorn=None): if acorn is None: acorn = np.random.randint(10**6) np.random.seed(acorn) counts = np.random.multinomial(n, [pi, 1 - pi]).astype(int) if density is None: density = np.random.uniform U = sample(counts[0], density, density_params) X_L = get_latent_positions(U) else: # U = sample(counts[0], density, density_params) density_params = np.array(density_params) d = len(density_params) if density_params.ndim == 1: pass else: X_temp = np.stack([sample(counts[0], density, density_params[i]) for i in range(d)], axis=1) quad = np.sum(np.array([3, 3])*X_temp[:,:2]**2, axis=1)[:, np.newaxis] print(quad, X_temp[0, 0], X_temp[0, 1], X_temp[0, 0]**2 + X_temp[0, 1]**2) X_L = np.concatenate((X_temp[:,:2], quad), axis=1) X = X[:, np.newaxis].T All_X = np.concatenate((X_L, X), axis = 0) P = All_X @ All_X.T A = sbm(np.concatenate((np.ones(counts[0]).astype(int), [counts[1]])), P) return A, counts
def setup_class(cls): estimator = SBMEstimator(directed=True, loops=False) B = np.array([[0.9, 0.1], [0.1, 0.9]]) g = sbm([50, 50], B, directed=True) labels = _n_to_labels([50, 50]) p_mat = _block_to_full(B, labels, (100, 100)) p_mat -= np.diag(np.diag(p_mat)) cls.estimator = estimator cls.p_mat = p_mat cls.graph = g cls.labels = labels
def get_B_and_weight_vec(n, pin=0.5, pout=0.01, mu_in=8, mu_out=2): p = [] wt = [] wtargs = [] for i in range(len(n)): sub_p = [] sub_wt = [] sub_wtargs = [] for j in range(len(n)): sub_wt.append(normal) if i == j: sub_p.append(pin) sub_wtargs.append(dict(loc=mu_in, scale=1)) else: sub_p.append(pout) sub_wtargs.append(dict(loc=mu_out, scale=1)) wt.append(sub_wt) p.append(sub_p) wtargs.append(sub_wtargs) G = sbm(n=n, p=p, wt=wt, wtargs=wtargs) N = len(G) E = int(len(np.argwhere(G > 0)) / 2) # B = np.zeros((E, N)) cnt = 0 weight_vec = np.zeros(E) row = [] col = [] data = [] for item in np.argwhere(G > 0): i, j = item if i > j: continue if i == j: print('nooooo') # B[cnt, i] = 1 # B[cnt, j] = -1 row.append(cnt) col.append(i) data.append(1) row.append(cnt) col.append(j) data.append(-1) weight_vec[cnt] = abs(G[i, j]) cnt += 1 B = csr_matrix((data, (row, col)), shape=(E, N)) return B, weight_vec
def __init__(self, N): #split in log(N) groups K = math.floor(math.log(N)) group_sizes = [math.floor(N / K)] * K group_sizes[0] += N - sum(group_sizes) #Make P P = np.full((K, K), .025) np.fill_diagonal(P, .3) #get G self.groups = np.repeat(list(range(K)), group_sizes) self.adj_matrix = sbm(n=group_sizes, p=P)
def test_SBM_dcorr(self): for test in self.tests.keys(): np.random.seed(12345678) B1 = np.array([[0.5, 0.2], [0.2, 0.5]]) B2 = np.array([[0.7, 0.2], [0.2, 0.7]]) b_size = 200 A1 = sbm(2 * [b_size], B1) A2 = sbm(2 * [b_size], B1) A3 = sbm(2 * [b_size], B2) ldt_null = LatentDistributionTest(test, self.tests[test], n_components=2, n_bootstraps=100) ldt_alt = LatentDistributionTest(test, self.tests[test], n_components=2, n_bootstraps=100) p_null = ldt_null.fit_predict(A1, A2) p_alt = ldt_alt.fit_predict(A1, A3) self.assertTrue(p_null > 0.05) self.assertTrue(p_alt <= 0.05)
def test_SBM_fit_supervised(self): np.random.seed(8888) B = np.array([ [0.9, 0.2, 0.05, 0.1], [0.1, 0.7, 0.1, 0.1], [0.2, 0.4, 0.8, 0.5], [0.1, 0.2, 0.1, 0.7], ]) n = np.array([500, 500, 250, 250]) g = sbm(n, B, directed=True, loops=False) sbe = SBMEstimator(directed=True, loops=False) labels = _n_to_labels(n) sbe.fit(g, y=labels) B_hat = sbe.block_p_ assert_allclose(B_hat, B, atol=0.01)
def get_B_and_weight_vec(n, pin=0.5, pout=0.01, mu_in=8, mu_out=2): p = [] wt = [] wtargs = [] for i in range(len(n)): sub_p = [] sub_wt = [] sub_wtargs = [] for j in range(len(n)): sub_wt.append(normal) if i == j: sub_p.append(pin) sub_wtargs.append(dict(loc=mu_in, scale=1)) else: sub_p.append(pout) sub_wtargs.append(dict(loc=mu_out, scale=1)) wt.append(sub_wt) p.append(sub_p) wtargs.append(sub_wtargs) G = sbm(n=n, p=p, wt=wt, wtargs=wtargs) N = len(G) E = int(len(np.argwhere(G > 0)) / 2) B = np.zeros((E, N)) weight_vec = np.zeros(E) cnt = 0 for item in np.argwhere(G > 0): i, j = item if i > j: continue if i == j: print('nooooo') B[cnt, i] = 1 B[cnt, j] = -1 weight_vec[cnt] = abs(G[i, j]) cnt += 1 return B, weight_vec
return avg_score, test_results #Testing if __name__ == '__main__': import numpy as np from graspy.embed import MultipleASE from graspy.simulations import sbm from graspy.plot import heatmap, pairplot n_verts = 100 p = 0.8 labels_sbm = n_verts * [0] + n_verts * [1] P = np.array([[p, 1.0 - p], [1.0 - p, p]]) undirected_sbms = [] for i in range(32): undirected_sbms.append(sbm(2 * [n_verts], P)) def plotSVC(Xhat, clf): h = 0.0002 x_min, x_max = Xhat[:, 0].min() - 0.01, Xhat[:, 0].max() + 0.01 y_min, y_max = Xhat[:, 1].min() - 0.01, Xhat[:, 1].max() + 0.01 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) import matplotlib matplotlib.use('QT5Agg') import matplotlib.pyplot as plt # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, x_max]x[y_min, y_max]. plt.subplots(figsize=(10, 10)) Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) # Put the result into a color plot
# # n_blocks = 4 n_per_comm = 50 n_verts = n_blocks * n_per_comm comm_proportions = n_blocks * [n_per_comm] low_p = 0.02 p_mat = np.array( [ [0.5, low_p, low_p, low_p], [low_p, 0.4, low_p, low_p], [low_p, low_p, 0.55, low_p], [low_p, low_p, low_p, 0.45], ] ) A, labels = sbm(comm_proportions, p_mat, return_labels=True) heatmap(A, inner_hier_labels=labels, cbar=False) # %% [markdown] # # Compute some Laplacians # The unnormalized graph Laplacian L = unnormalized_laplacian(A) heatmap(L, title="Unnormalized Graph Laplacian") # L should be strictly PSD evals, evecs = eig(L)
#%% %matplotlib inline from graspy.plot import * from graspy.simulations import sbm from graspy.embed import AdjacencySpectralEmbed import numpy as np import matplotlib.pyplot as plt import seaborn as sns g = sbm([100, 100], [[0.8, 0.2], [0.2, 0.8]]) ase = AdjacencySpectralEmbed() X = ase.fit_transform(g) labels = 25 * [0] + 25 * [1] + 25 * [2] + 24 * [-1] + [-2] # pairplot(X, size=50, alpha=0.6) plt.show()
import numpy as np from graspy.utils import to_laplace import matplotlib as mpl sns.set_context("talk") mpl.rcParams["axes.spines.right"] = False mpl.rcParams["axes.spines.top"] = False # %% [markdown] # # n_per_comm = [100, 100, 100] n_verts = np.sum(n_per_comm) block_probs = np.array([[0.4, 0.1, 0.1], [0.1, 0.4, 0.1], [0.1, 0.1, 0.4]]) adj, labels = sbm(n_per_comm, block_probs, return_labels=True) fig, axs = plt.subplots(1, 2, figsize=(10, 5)) sns.heatmap( block_probs, annot=True, cmap="RdBu_r", center=0, square=True, ax=axs[0], cbar=False ) heatmap(adj, inner_hier_labels=labels, ax=axs[1], cbar=False) #%% I_DAD = to_laplace(adj, form="I-DAD") DAD = to_laplace(adj, form="DAD") fig, axs = plt.subplots(1, 2, figsize=(10, 5)) heatmap_kws = dict(inner_hier_labels=labels, cbar=False) heatmap(I_DAD, ax=axs[0], **heatmap_kws)
# %% [markdown] # ## Generate a "perfect" feedforward network (stochastic block model) low_p = 0 diag_p = 0 feedforward_p = 0.2 community_sizes = 5 * [100] B = get_feedforward_B(low_p, diag_p, feedforward_p) plt.figure(figsize=(10, 10)) plt.title("Feedforward SBM block probability matrix") sns.heatmap(B, annot=True, square=True, cmap="Reds", cbar=False) plt.show() A, labels = sbm(community_sizes, B, directed=True, loops=False, return_labels=True) labels = labels.astype(str) heatmap( A, cbar=False, inner_hier_labels=labels, title="Feedforward SBM sampled adjacency matrix", ) plt.show() # %% [markdown] # ## Compute the signal flow metrix on the perfect feedforward network # The energy function that this metric optimizes is for any pair of vertices, make the # signal flow metric for node $i$ ($z_i$), equal to one greater than that for node $j$ # ($z_j$) if node $i$ is above node $j$ in the hierarchy. The basic intuition is that
def simulation(n, pi, normal_params, beta_params, cond_ind=True, errors=None, smooth=False, acorn=None): #- Type checks if isinstance(normal_params, list): sbm_check = False # there are other checks to do.. elif isinstance(normal_params, np.ndarray): if normal_params.ndim is 2: if np.sum(normal_params == normal_params.T) == np.prod( normal_params.shape): sbm_check = True else: msg = 'if normal_params is a 2 dimensional array it must be symmetric' raise ValueError(msg) else: msg = 'if normal_params is an array, it must be a 2 dimensional array' raise TypeError(msg) else: msg = 'normal_params must be either a list or a 2 dimensional array' raise TypeError(msg) if acorn is None: acorn = np.random.randint(10**6) np.random.seed(acorn) #- Multinomial trial counts = np.random.multinomial(n, [pi, 1 - pi]) #- Hard code the number of blocks K = 2 #- Set labels labels = np.concatenate((np.zeros(counts[0]), np.ones(counts[1]))) #- number of seeds = n_{i}/10 n_seeds = np.round(0.1 * counts).astype(int) #- Set training and test data class_train_idx = [ range(np.sum(counts[:k]), np.sum(counts[:k]) + n_seeds[k]) for k in range(K) ] train_idx = np.concatenate((class_train_idx)).astype(int) test_idx = [k for k in range(n) if k not in train_idx] #- Total number of seeds m = np.sum(n_seeds) #- estimate class probabilities pi_hats = n_seeds / m #- Sample from beta distributions beta_samples = beta_sampler(counts, beta_params) Z = beta_samples #- Sample from multivariate normal or SBM either independently of Zs or otherwise if cond_ind: if sbm_check: A = sbm(counts, normal_params) ase_obj = ASE(n_elbows=1) X = ase_obj.fit_transform(A) else: X = MVN_sampler(counts, normal_params) if len(normal_params[0][0]) is 1: X = X[:, np.newaxis] else: if sbm_check: P = blowup( normal_params, counts ) # A big version of B to be able to change connectivity probabilities of individual nodes scales = np.prod(Z, axis=1)**( 1 / Z.shape[1] ) # would do just the outer product, but if the Z's are too small we risk not being connected new_P = P * (scales @ scale.T) # new probability matrix A = sbm(np.ones(n).astype(int), new_P) ase_obj = ASE(n_elbows=1) X = ase_obj.fit_transform(A) else: X = conditional_MVN_sampler(Z=Z, rho=1, counts=counts, params=normal_params, seed=None) if len(normal_params[0][0]) is 1: X = X[:, np.newaxis] XZ = np.concatenate((X, Z), axis=1) #- Estimate normal parameters using seeds params = [] for i in range(K): temp_mu, temp_cov = estimate_normal_parameters(X[class_train_idx[i]]) params.append([temp_mu, temp_cov]) #- Using conditional indendence assumption (RF, KNN used for posterior estimates) if errors is None: errors = [[] for i in range(5)] rf1 = RF(n_estimators=100, max_depth=int(np.round(np.log(Z[train_idx].shape[0])))) rf1.fit(Z[train_idx], labels[train_idx]) knn1 = KNN(n_neighbors=int(np.round(np.log(Z[train_idx].shape[0])))) knn1.fit(Z[train_idx], labels[train_idx]) if smooth: temp_pred = classify(X[test_idx], Z[test_idx], params, rf1, m=m) temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx) errors[0].append(temp_error) temp_pred = classify(X[test_idx], Z[test_idx], params, knn1, m=m) temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx) errors[1].append(temp_error) else: temp_pred = classify(X[test_idx], Z[test_idx], params, rf1) temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx) errors[0].append(temp_error) knn1 = KNN(n_neighbors=int(np.round(np.log(m)))) knn1.fit(Z[train_idx], labels[train_idx]) temp_pred = classify(X[test_idx], Z[test_idx], params, knn1) temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx) errors[1].append(temp_error) temp_pred = QDA(X[test_idx], pi_hats, params) temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx) errors[2].append(temp_error) #- Not using conditional independence assumption (RF, KNN used for classification) XZseeds = np.concatenate((X[train_idx], Z[train_idx]), axis=1) rf2 = RF(n_estimators=100, max_depth=int(np.round(np.log(m)))) rf2.fit(XZ[train_idx], labels[train_idx]) temp_pred = rf2.predict(XZ[test_idx]) temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx) errors[3].append(temp_error) knn2 = KNN(n_neighbors=int(np.round(np.log(m)))) knn2.fit(XZ[train_idx], labels[train_idx]) temp_pred = knn2.predict(XZ[test_idx]) temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx) errors[4].append(temp_error) temp_accuracy = GCN(adj, features, train_idx, labels) temp_error = 1 - temp_accuracy errors[5].append(temp_error) return errors
from numpy.core.defchararray import replace import pandas as pd import seaborn as sns from graspy.embed import AdjacencySpectralEmbed from graspy.plot import heatmap from graspy.simulations import sbm from numpy.core.shape_base import block from sklearn.mixture import GaussianMixture sns.set_context("talk") n_per_comm = [1000, 1000, 1000] n_verts = np.sum(n_per_comm) block_probs = np.array([[0.5, 0.1, 0.1], [0.1, 0.5, 0.1], [0.1, 0.1, 0.5]]) adj, labels = sbm(n_per_comm, block_probs, return_labels=True) # %% ase = AdjacencySpectralEmbed(n_components=3) Xhat = ase.fit_transform(adj) # %% # REF: Anton def _fit_plug_in_variance_estimator(X): """ Takes in ASE of a graph and returns a function that estimates the variance-covariance matrix at a given point using the plug-in estimator from the RDPG Central Limit Theorem.
from SupervisedLearning import MASEPipeline from sklearn.decomposition import PCA from sklearn.neighbors import KNeighborsClassifier #Testing if __name__ == '__main__': import numpy as np from graspy.simulations import sbm from graspy.plot import heatmap, pairplot n_verts = 100 nums = 128 p1 = 0.8 p2 = 0.81 labels_sbm = nums * [1] + nums * [2] P1 = np.array([[p1, 1.0-p1], [1.0-p1, p1]]) P2 = np.array([[p2, 1.0-p2], [1.0-p2, p2]]) undirected_sbms = [] for i in range(nums): undirected_sbms.append(sbm(2 * [n_verts], P1)) for i in range(nums): undirected_sbms.append(sbm(2 * [n_verts], P2)) G = np.array(undirected_sbms) print(G.shape) MASEP = MASEPipeline([('pca', PCA(n_components=4)) ,('knn', KNeighborsClassifier())]) MASEP.set_params(MASE__n_components=6, MASE__algorithm='full') MASEP.fit(undirected_sbms, labels_sbm) print(MASEP.predict(undirected_sbms)) cvs, _ = MASEP.cross_val_score(undirected_sbms, labels_sbm) print(cvs)
n_blocks=n_blocks) plt.figure(figsize=(10, 10)) sns.heatmap(block_probs, annot=True, cmap="Reds", cbar=False) plt.title("Feedforward block probability matrix") stashfig("ffw-B") #%% community_sizes = np.empty(2 * n_blocks, dtype=int) n_feedforward = 100 n_feedback = 100 community_sizes[::2] = n_feedforward community_sizes[1::2] = n_feedback community_sizes = n_blocks * [n_feedforward] labels = n_to_labels(community_sizes) A = sbm(community_sizes, block_probs, directed=True, loops=False) n_verts = A.shape[0] perm_inds = np.random.permutation(n_verts) A_perm = A[np.ix_(perm_inds, perm_inds)] heatmap(A, cbar=False, title="Feedforward SBM") stashfig("ffSBM") heatmap(A_perm, cbar=False, title="Feedforward SBM, shuffled") stashfig("ffSBM-shuffle") true_z = signal_flow(A) sort_inds = np.argsort(true_z)[::-1] heatmap( A[np.ix_(sort_inds, sort_inds)], cbar=False,
def fit(seed): warnings.filterwarnings("ignore") np.random.seed(seed) ldt = LatentDistributionTest(n_components=2, method="dcorr") p = ldt.fit(A1, A2) return p for n in range(start, stop, diff): ns.append(n) for m in range(n, n + stop - start, diff): print(f"Running tests for n={n}, m={m}") cn = [n // k] * k cm = [m // k] * k A1 = sbm(cn, B) A2 = sbm(cm, B) type1_errors = 0 seeds = np.random.randint(0, 1e8, tests) for p in range(tests): out = Parallel(n_jobs=-2, verbose=0)(delayed(fit)(seed) for seed in seeds) out = np.array(out) type1_errors += len(np.where(out < alpha)[0]) error = type1_errors / tests temp.append(error) ms.append(m - n) error_list.append(temp)
"""The argument p is assumed to be some permutation of 0, 1, ..., len(p)-1. Returns an array s, where s[i] gives the index of i in p. """ s = np.empty(p.size, p.dtype) s[p] = np.arange(p.size) return s n = [50, 20, 20, 5, 5] block_p = np.zeros((5, 5)) block_p += np.diag(0.5 * np.ones(5)) n_verts = 100 shuffle_inds = np.random.permutation(n_verts) A = sbm(n, block_p) B = A[np.ix_(shuffle_inds, shuffle_inds)] # B is a permuted version of A (corr = 1) faq = FastApproximateQAP( max_iter=30, eps=0.0001, init_method="rand", n_init=100, shuffle_input=False, maximize=True, ) A_found, B_found = faq.fit_predict(A, B) reverse_shuffle = invert_permutation(shuffle_inds)
feedforward_p, n_blocks=n_blocks) fig, axs = plt.subplots(1, 2, figsize=(20, 10)) sns.heatmap(block_probs, annot=True, cmap="Reds", cbar=False, ax=axs[0], square=True) axs[0].xaxis.tick_top() axs[0].set_title("Block probability matrix", pad=25) np.random.seed(88) adj, labels = sbm(community_sizes, block_probs, directed=True, loops=False, return_labels=True) n_verts = adj.shape[0] adjplot(adj, sort_class=labels, cbar=False, ax=axs[1], square=True) axs[1].set_title("Adjacency matrix", pad=25) plt.tight_layout() stashfig("sbm" + basename) # %% [markdown] # ## currtime = time.time() n_verts = len(adj)
#%% from graspy.simulations import sbm import numpy as np from graspy.plot import heatmap, pairplot n = np.array([100, 100, 100]) p = np.array([[0.3, 0.2, 0.1], [0.01, 0.2, 0.2], [0.02, 0.03, 0.1]]) dcs = [] for i in range(len(n)): dc = np.random.beta(2, 5, n[i]) dc /= dc.sum() dcs.append(dc) dcs = np.concatenate(dcs) adj, labels = sbm(n, p, directed=True, dc=dcs, return_labels=True) heatmap(adj, cbar=False, sort_nodes=True, inner_hier_labels=labels) #%% from graspy.embed import AdjacencySpectralEmbed ase = AdjacencySpectralEmbed(n_components=3) embed = ase.fit_transform(adj) embed = np.concatenate(embed, axis=-1) #%% pairplot(embed, labels=labels) # %% [markdown] # ## norm_embed = embed / np.linalg.norm(embed, axis=1)[:, None]
import graspy import matplotlib.pyplot as plt import numpy as np from sklearn.svm import SVC from graspy.simulations import sbm nums = 32 n = [25, 25] P1 = [[.3, .1], [.1, .7]] P2 = [[.3, .1], [.1, .3]] labels = [1] * nums + [2] * nums #labels = np.matrix([labels]) #labels = labels.transpose(1, 0) np.random.seed(8) Gs = [] for i in range(nums): Gs.append(sbm(n, P1)) for i in range(nums): Gs.append(sbm(n, P2)) def plotSVC(Xhat, clf): h = 0.0002 x_min, x_max = Xhat[:, 0].min() - 0.01, Xhat[:, 0].max() + 0.01 y_min, y_max = Xhat[:, 1].min() - 0.01, Xhat[:, 1].max() + 0.01 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) import matplotlib matplotlib.use('QT5Agg') import matplotlib.pyplot as plt # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, x_max]x[y_min, y_max]. plt.subplots(figsize=(10, 10))
from graspy.plot import * from graspy.simulations import sbm from graspy.embed import AdjacencySpectralEmbed import numpy as np import matplotlib.pyplot as plt import seaborn as sns B = np.array([ [0, 0.2, 0.1, 0.1, 0.1], [0.2, 0.8, 0.1, 0.3, 0.1], [0.15, 0.1, 0, 0.05, 0.1], [0.1, 0.1, 0.2, 1, 0.1], [0.1, 0.2, 0.1, 0.1, 0.8], ]) g = sbm([10, 30, 50, 25, 25], B, directed=True) ase = AdjacencySpectralEmbed() X = ase.fit_transform(g) labels2 = 40 * ["0"] + 100 * ["1"] # pairplot(X, size=50, alpha=0.6) labels1 = 10 * ["d"] + 30 * ["c"] + 50 * ["d"] + 25 * ["e"] + 25 * ["c"] labels1 = np.array(labels1) labels2 = np.array(labels2) # plt.style.use(["seaborn",]) plt.style.use("seaborn-white") heatmap( g, inner_hier_labels=labels1, outer_hier_labels=labels2, figsize=(20, 20), label_fontsize=30,
# %% setting up the model n = 1000 k = 4 expected_degree = 40 degree_corrections = np.random.lognormal(2, 1.5, size=(n)) community_sizes = np.full(k, n // k) block_probs = np.full((k, k), 0.1) block_probs[0, 0] = 0.9 block_probs[1, 1] = 0.7 block_probs[2, 2] = 0.5 block_probs[3, 3] = 0.3 block_heatmap(block_probs, title=r"$B$") _, labels = sbm( community_sizes, block_probs, directed=False, loops=False, return_labels=True ) #%% rescaling to set the expected degree block_p_mat = _block_to_full(block_probs, labels, (n, n)) unscaled_expected_degree = np.mean(np.sum(block_p_mat, axis=1)) scaling_factor = 40 / unscaled_expected_degree print(f"Scaling factor: {scaling_factor:.3f}") #%% [markdown] # ## Sampling from the model # Here I just sample a graph from the model (after rescaling to set the expected degree). # Below I plot the adjacency matrix sorted by block and then by node degree within block. # I also calculate the mean degree to show that it is close to what we specified. #%% adjusting the degree correction params / rescaling, sampling a graph for ul in np.unique(labels):
if weighted: wt = n_comm * [n_comm * [np.random.poisson]] lams = np.random.uniform(0.1, 0.3, size=(n_comm**2)) lams = lams.reshape(n_comm, n_comm) lams[P != base_p] = P[P != base_p] * 5 wtargs = np.array([dict(lam=lam) for lam in lams.ravel()]).reshape(n_comm, n_comm) else: lams = np.ones_like(P) wtargs = None wt = 1 adj, labels = sbm(n_per_comm, P, directed=True, loops=False, wt=wt, wtargs=wtargs, return_labels=True) sns.set_context("talk", font_scale=0.5) fig, axs = plt.subplots(1, 3, figsize=(16, 8)) sns.heatmap( P, annot=True, square=True, ax=axs[0], cbar=False, # cbar_kws=dict(shrink=0.7), cmap="RdBu_r", center=0,