def generate_random_network(beta, N, k_bar, kappa0, gamma): mu = beta * math.sin(math.pi / beta) / (2 * math.pi * k_bar) graphe_random = nx.Graph() kappa = powerlaw.Power_Law(xmin=kappa0, parameters=[gamma]).generate_random(N) theta = np.random.uniform(0, 2 * math.pi, N) # print 1+(len(kappa)/sum(np.log(kappa))) th = np.outer(np.ones(N), theta) # print "th" th2 = np.outer(theta, np.ones(N)) # print "th2" kp = np.outer(np.ones(N), kappa) # print "kp" kp2 = np.outer(kappa, np.ones(N)) # print "kp2" dth = math.pi - np.abs(math.pi - np.abs(th - th2)) # print "dth" khi = N * dth / (2 * math.pi * mu * kp * kp2) # print "khi" r = np.random.uniform(0, 1, [N, N]) # print "r" r2 = np.triu(np.log(1 / r - 1) / beta, k=1) p2 = np.triu(np.log(khi), k=1) # print np.where(r2>p2) ie ln(1/r-1) / beta > ln(khi) <=> r < 1/(1+khi^beta) graphe_random.add_edges_from(np.transpose(np.where(r2 > p2))) return graphe_random
def expected_find_sim(n, t, alpha, k=2): dist = powerlaw.Power_Law(xmin=1, parameters=[alpha], discrete=True) bugs = set() # generate t bugs _bugs = dist.generate_random(t) bugs_accu = [] seq = [] bug_discovery_time = {} for i in range(0, len(_bugs)): bug = int(_bugs[i]) # Here, we skip bug = 1 because we use that to represent nothing happened in a fuzzing run if (n is not None and bug > n) or bug < k: bugs_accu.append(len(bugs)) continue if bug not in bugs: seq.append(bug) bug_discovery_time[bug] = i bugs.add(bug) bugs_accu.append(len(bugs)) return bugs, bugs_accu, seq, bug_discovery_time
def validate(xmin, alpha, discrete='continuous', n_data=10000, n_trials=1): if n_trials > 1: return array([ validate(xmin, alpha, discrete=discrete, n_data=n_data, n_trials=1) for trial in arange(n_trials) ]).T if discrete == 'continuous': discrete = False estimate_discrete = False elif discrete == 'discrete': discrete = True estimate_discrete = False elif discrete == 'discrete_estimate': discrete = True estimate_discrete = True theoretical_distribution = powerlaw.Power_Law(xmin=xmin, parameters=[alpha], discrete=discrete) simulated_data = theoretical_distribution.generate_random( n_data, estimate_discrete=estimate_discrete) fit = powerlaw.Fit(simulated_data, discrete=discrete, estimate_discrete=estimate_discrete) return fit.xmin, fit.alpha
def set_t(self): if (self.args_t): theoretial_distribution = powerlaw.Power_Law(xmin=self.args_t[1],parameters=[self.args_t[0]]) if(self.t_now>=0): tag=int((self.t_end-self.t_now)/self.args_t[1])+4 else: tag=1000 self.ts = theoretial_distribution.generate_random(tag)
def random_graph_nx(n, p, sup_ext, alpha, seed=None, is_weighted=None, is_sparse=False): """Retunrs an undirected weighted adjacency matrix using erdos-renyi model to generate the binary adjacency and assigning weights extracted' from a uniform, gaussian or powerlaw distribution. :param n: umber of nodes. :type n: int :param p: Probability of observing a link between a couple of nodes, defaults to 0.1. :type p: float, optional :param sup_ext: Maximum strength, defaults to 10. :type sup_ext: int, float, optional :param alpha: Powerlaw exponent. :type alpha: float :param seed: Seed of the random process, defaults to None. :type seed: int, optional :param is_weighted: If True the adjacency matrix is weighted, defaults to None. :type is_weighted: bool, optional :param is_sparse: If True the adjacency matrix is sparse, defaults to False. :type is_sparse: bool, optional :return: Adjacency matrix. :rtype: numpy.ndarray, scipy.sparse_matrix """ if seed is None: seed = np.random.randint(0, n**2) nx_graph = nx.fast_gnp_random_graph(n=n, p=p, seed=seed) largest_cc = max(nx.connected_components(nx_graph), key=len) nx_graph_lcc = nx_graph.subgraph(largest_cc).copy() np.random.seed(seed=seed) if is_weighted == "uniform": for e in nx_graph_lcc.edges: nx_graph_lcc[e[0]][e[1]]["weight"] = np.random.randint(0, sup_ext) elif is_weighted == "gaussian": for e in nx_graph_lcc.edges: nx_graph_lcc[e[0]][e[1]]["weight"] = np.random.normal( loc=sup_ext, scale=sup_ext / 5.5) elif is_weighted == "powerlaw": for e in nx_graph_lcc.edges: nx_graph_lcc[e[0]][e[1]]["weight"] = plw.Power_Law( xmin=1, xmax=sup_ext, parameters=[alpha], discrete=True).generate_random(1) if is_sparse: adjacency = nx.to_scipy_sparse_matrix(nx_graph_lcc) else: adjacency = nx.to_numpy_array(nx_graph_lcc) return adjacency
def generateSFNetwork(n=1000, gamma=2.1): #degrees_zipf = [1] degrees_powerlaw = [1] while not is_graphic_Erdos_Gallai(degrees_powerlaw): #degrees_zipf = [int(x) for x in np.random.zipf(gamma, n)] degrees_powerlaw = powerlaw.Power_Law( xmin=2, parameters=[gamma]).generate_random(n).astype('int') g = ConfigurationModel(degrees_powerlaw, relax=True) return g
def barabasi_albert_graph_nx(n, m, sup_ext, alpha, seed=None, is_weighted=None, is_sparse=False): """Generates a undirected weighted adjacency matrix using barabasi-albert model for the binary part and assigning weights extracted from a uniform, gaussian or powerlaw distribution. :param n: Number of nodes. :type n: int :param m: Number of edges to attach from a new node to existing nodes. :type m: int :param sup_ext: Maximum strength, defaults to 10. :type sup_ext: int, float, optional :param alpha: Powerlaw exponent. :type alpha: float :param seed: Seed of the random process, defaults to None. :type seed: int, optional :param is_weighted: If True the adjacency matrix is weighted, defaults to None. :type is_weighted: bool, optional :param is_sparse: If True the adjacency matrix is sparse, defaults to False. :type is_sparse: bool, optional :return: Adjacency matrix. :rtype: numpy.ndarray, scipy.sparse_matrix """ if seed is None: seed = np.random.randint(0, n**2) nx_graph = nx.barabasi_albert_graph(n, m, seed=seed) largest_cc = max(nx.connected_components(nx_graph), key=len) nx_graph_lcc = nx_graph.subgraph(largest_cc).copy() np.random.seed(seed=seed) if is_weighted == "uniform": for e in nx_graph_lcc.edges: nx_graph_lcc[e[0]][e[1]]["weight"] = np.random.randint(0, sup_ext) elif is_weighted == "gaussian": for e in nx_graph_lcc.edges: nx_graph_lcc[e[0]][e[1]]["weight"] = np.random.normal( loc=sup_ext, scale=sup_ext / 5.5) elif is_weighted == "powerlaw": for e in nx_graph_lcc.edges: nx_graph_lcc[e[0]][e[1]]["weight"] = plw.Power_Law( xmin=1, xmax=sup_ext, parameters=[alpha], discrete=True).generate_random(1) if is_sparse: adjacency = nx.to_scipy_sparse_matrix(nx_graph_lcc) else: adjacency = nx.to_numpy_array(nx_graph_lcc) return adjacency
def __pl_param_generator(alpha, size, max_int=1,integer=False, seed=None): ''' power law distribution for sampling parameters NOTE: can't seed generator ''' np.random.seed(seed) distr = pl.Power_Law(xmax=max_int, parameters=[alpha], discrete=integer) s = distr.generate_random(size) if max_int == 1: maxi = 1 s = [x/np.float64(100) if x <100 else maxi for x in s] return s
def __init__(self, unique_id, model, pos): self.unique_id = unique_id self.model = model self.pos = pos self.traversable = True self.trip_lengths = powerlaw.Power_Law( xmin=7, parameters=[1.9]) #1.5-2.0 #xmin 17? self.directions = np.arange(0, 18) #self.not_moved = False ##self.on_trip = False #self.current_direction = 0 self.remaining_steps = 0
def goodness_of_fit(fit, data, gen_data=1000, data_samples_lb=10000): theoretical_distribution = powerlaw.Power_Law(xmin=1,\ parameters=[fit.power_law.alpha],\ discrete=True) simulated_ksdist_list = [] data_samples = max(len(data), data_samples_lb) print("GoF data_samples", data_samples) for _ in range(gen_data): simulated_data = theoretical_distribution.generate_random(data_samples) simulated_ksdist = powerlaw.power_law_ks_distance(simulated_data,\ fit.power_law.alpha,\ xmin=1, xmax=1000, discrete=True) simulated_ksdist_list.append(simulated_ksdist) return sum(np.array(simulated_ksdist_list) > fit.power_law.D) / gen_data
def __init__(self, unique_id, model, pos, exp, directions): self.unique_id = unique_id self.model = model self.pos = pos self.traversable = True self.trip_lengths = powerlaw.Power_Law( xmin=7, parameters=[exp]) #1.5-2.0 #xmin 17? self.direction_range = 3 self.directions = Directions(self.direction_range) self.current_direction = 0 self.remaining_steps = 0 self.start = None self.end_point = None
def __init__(self, unique_id, model, pos, exp, x_min, seed=None): self.unique_id = unique_id self.model = model self.pos = pos self.area_traversed = np.zeros((model.width, model.height)) self.traversable = True self.trip_lengths = powerlaw.Power_Law( xmin=x_min, parameters=[exp]) #1.5-2.0 #xmin 17? self.direction_range = 3 self.directions = Directions(self.direction_range) self.trip_lengths_covered = [] self.steps_covered = [] self.area_traversed[self.pos[0], self.pos[1]] = 1 self.on_trip = False self.current_direction = 0 self.remaining_steps = 0
def initial_directions(M, source_l, source_b, weights, d, NN, energies=None): lons0 = [] lats0 = [] for isource in range(len(source_l)): l = source_l[isource] * np.pi / 180.0 - (2.0 * np.pi) b = Lat2CoLat(source_b[isource] * np.pi / 180.0) #M.addParticles(Id, E, l, b, E0**-1) print(isource, weights[isource], l, b) meanDir = crpropa.Vector3d(-1, 0, 0) meanDir.setRThetaPhi(-1, b, l) kappa = 50.0 ncrs = int((weights[isource] / total) * NN) #if ncrs>0: ncrs = 500 #Emin = 10.0 energies = powerlaw.Power_Law(xmin=10.0, xmax=100.0, parameters=[2.2]).generate_random(ncrs) #print (energies) for i in xrange(ncrs): particleId = crpropa.nucleusId(1, 1) energy = (energies[i]) * crpropa.EeV energy = 10.0 * crpropa.EeV galCenter = crpropa.Vector3d(-1, 0, 0) theta_k = 0.8 * (100.0 * crpropa.EeV / energy) * np.sqrt( d[isource] / 10.0) kappa = (81.0 / theta_k)**2 #print (kappa) momentumVector = crpropa.Random.instance().randFisherVector( meanDir, kappa) M.addParticle(particleId, energy, momentumVector) lons0.append(momentumVector.getPhi()) lats0.append(momentumVector.getTheta()) lons0 = np.array(lons0) - np.pi lats0 = -CoLat2Lat(np.array(lats0)) return (lons0, lats0)
def node_deg(n, m, max_deg): p = 3. simulated_data = [0] while sum(simulated_data) / 2 < m: theoretical_distribution = powerlaw.Power_Law(xmin=1., parameters=[p]) simulated_data = theoretical_distribution.generate_random(n) over_list = np.where(simulated_data > max_deg)[0] while len(over_list) != 0: add_deg = theoretical_distribution.generate_random(len(over_list)) for i, node_id in enumerate(over_list): simulated_data[node_id] = add_deg[i] over_list = np.where(simulated_data > max_deg)[0] simulated_data = np.round(simulated_data) if (m - sum(simulated_data) / 2) < m / 5: p -= 0.01 else: p -= 0.1 if p < 1.01: print("break") break print("expected number of edges : ", sum(simulated_data) / 2) return sorted(simulated_data, reverse=True)
import powerlaw import math import matplotlib.pyplot as plt import numpy as np EXPONENT = [1.5, 1.6, 1.7, 1.8, 1.9, 2.0] ITERATIONS = 1000 draws = [] for exp in EXPONENT: trip_lengths = powerlaw.Power_Law(xmin=7, parameters=[exp]) trip_draws = [] for i in range(ITERATIONS): draw = int(trip_lengths.generate_random(1)[0]) if draw <= 200: trip_draws.append(draw) trip_draws.sort() data = np.asarray(trip_draws) w = 10 n = math.ceil((data.max() - data.min()) / w) plt.title(f"Exp = {exp}, xmin = 7") plt.hist(data, bins=n) plt.xlabel('trip length') plt.savefig(f'../plots/Power_Law/exp={exp}_xmin7.png') plt.close() for exp in EXPONENT: trip_lengths = powerlaw.Power_Law(parameters=[exp]) trip_draws = [] for i in range(ITERATIONS):
figname = 'FigLognormal' savefig(figname + '.eps', bbox_inches='tight') #savefig(figname+'.tiff', bbox_inches='tight', dpi=300) # <markdowncell> # # Creating Simulated Data # <codecell> empirical_data = blackouts #### fit = powerlaw.Fit(empirical_data) simulated_data = fit.power_law.generate_random(10000) theoretical_distribution = powerlaw.Power_Law(xmin=5.0, parameters=[2.5]) simulated_data = theoretical_distribution.generate_random(10000) # <codecell> theoretical_distribution = powerlaw.Power_Law(xmin=5.0, parameters=[2.5]) simulated_data = theoretical_distribution.generate_random(10000) #### fit = powerlaw.Fit(simulated_data) fit.power_law.xmin, fit.power_law.alpha # <codecell> powerlaw.plot_pdf(simulated_data, linewidth=3) fit.power_law.plot_pdf(simulated_data, linestyle='--', color='r')
def set_t(self): if (len(self.args_t) != 0): theoretial_distribution=powerlaw.Power_Law(xmin=self.args_t[1],parameters=[-self.args_t[0]]) self.ts = theoretial_distribution.generate_random(100)
def one_shot_removal(feature_score, alpha): """ Fits the distribution of saliency score to various distributions, find the best fitting one and keep alpha % of the features Performed for a single layer; this function is called by called by compute_new_reduced_model Inputs: - feature_score: Numpy array containing the saliency score for each feature - alpha: 1 - alpha represents the fraction of (the most important) features to keep (float) Returns: - selected_features: Numpy array containing 1s and 0s, 1 represents a selected feature """ selected_features = np.zeros(np.shape(feature_score)) LAYER_SIZE_THRESHOLD = 2 if np.shape(feature_score)[0] > LAYER_SIZE_THRESHOLD: feature_score[feature_score == 0] = 1e-10 x_min = np.min(feature_score) x_max = np.max(feature_score) params_power_law, loglikelihood_power_law = pl.distribution_fit(np.asarray(feature_score), distribution='power_law', xmin=x_min, xmax=x_max, discrete=False, comparison_alpha=False, search_method='Likelihood', estimate_discrete=False) params_lognormal, loglikelihood_lognormal = pl.distribution_fit(np.asarray(feature_score), distribution='lognormal', xmin=x_min, xmax=x_max, discrete=False, comparison_alpha=False, search_method='Likelihood', estimate_discrete=False) params_expo, loglikelihood_expo = pl.distribution_fit(np.asarray(feature_score), distribution='exponential', xmin=x_min, xmax=x_max, discrete=False, comparison_alpha=False, search_method='Likelihood', estimate_discrete=False) params_stretched, loglikelihood_stretched = pl.distribution_fit(np.asarray(feature_score), distribution='stretched_exponential', xmin=x_min, xmax=x_max, discrete=False, comparison_alpha=False, search_method='Likelihood', estimate_discrete=False) print('Shape of layer', np.shape(feature_score)) print('loglikelihood_power_law', loglikelihood_power_law, 'loglikelihood_lognormal', loglikelihood_lognormal, 'loglikelihood_expo', loglikelihood_expo, 'loglikelihood_stretched', loglikelihood_stretched) if loglikelihood_power_law > max(loglikelihood_lognormal, loglikelihood_expo, loglikelihood_stretched): theoretical_distribution = pl.Power_Law(xmin=x_min, parameters=params_power_law, xmax=x_max, discrete=False) prob_dist = theoretical_distribution.cdf(feature_score) best_fit_dist = 'Power_Law' best_param = params_power_law elif loglikelihood_lognormal > max(loglikelihood_power_law, loglikelihood_expo, loglikelihood_stretched): theoretical_distribution = pl.Lognormal(xmin=x_min, parameters=params_lognormal, xmax=x_max, discrete=False) prob_dist = theoretical_distribution.cdf(feature_score) best_fit_dist = 'Lognormal' best_param = params_lognormal elif loglikelihood_expo > max(loglikelihood_power_law, loglikelihood_lognormal, loglikelihood_stretched): theoretical_distribution = pl.Exponential(xmin=x_min, parameters=params_expo, xmax=x_max, discrete=False) prob_dist = theoretical_distribution.cdf(feature_score) best_fit_dist = 'Exponential' best_param = params_expo elif loglikelihood_stretched > max(loglikelihood_power_law, loglikelihood_lognormal, loglikelihood_expo): theoretical_distribution = pl.Stretched_Exponential(xmin=x_min, parameters=params_stretched, xmax=x_max, discrete=False) prob_dist = theoretical_distribution.cdf(feature_score) best_fit_dist = 'Stretched_Exponential' best_param = params_stretched print('values', feature_score) print('PDF: ', prob_dist, prob_dist.shape, 'best fit distribution', best_fit_dist, 'best params ', best_param) selected_features = prob_dist > (1 - alpha) print('Number of DeepLIFT selected features: ', np.sum(selected_features)) if np.shape(feature_score)[0] < LAYER_SIZE_THRESHOLD or np.sum(selected_features) == 0: selected_features = np.ones(np.shape(feature_score)) return selected_features
import keras.backend as K from keras.models import Sequential, Model from keras.layers import Dense, Activation, Conv2D, MaxPool2D, Dropout, Flatten, Input from keras.layers import Reshape, UpSampling2D import powerlaw import math, random import numpy as np import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt REPEATS = 1000 MULT = 2 pwl = powerlaw.Power_Law(xmin=1, discrete=True, parameters=[1.9]) def py(d=0.1): """Something akin to a Pitman-Yor process""" members = [] while True: if len(members) < 1 or random.random() < d: draw = random.normalvariate(0, 1.0) else: draw = random.choice(members) yield draw members.append(draw)
import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import powerlaw as powerlaw from sklearn import preprocessing # Normalizációs konstans feladat # In[39]: prob_df = pd.DataFrame() #próbáljuk meg ezt lerajzolni hisztogramon, ahogy az előbb prob_df["power_2"] = powerlaw.Power_Law(xmin=2, parameters=[2], discrete=False).generate_random(10000) prob_df["power_2_5"] = powerlaw.Power_Law( xmin=2, parameters=[2.5], discrete=False).generate_random(10000) prob_df["power_3"] = powerlaw.Power_Law(xmin=2, parameters=[3], discrete=False).generate_random(10000) prob_df["power_1_5"] = powerlaw.Power_Law( xmin=2, parameters=[1.5], discrete=False).generate_random(10000) fig, axes = plt.subplots(2, 2, figsize=(10, 10)) row = 0 col = 0 for pl in ["power_2", "power_2_5", "power_3", "power_1_5"]: prob_df[pl].hist(bins=20, ax=axes[row, col]) #axes[row,col].bar(x=prob_df[pl].unique(),height=prob_df[pl].value_counts().sort_index()) axes[row, col].set_title(pl) if row == 1: col += 1
def set_up_hash_distr(net_p2p, centrality_measure, hash_distribution, number_selfish_nodes, number_honest_nodes, alpha): # make sure that when there are no selfish nodes that alpha is never unequal 0. (in case you want to simulate only honest nodes) assert not (number_selfish_nodes == 0 and alpha != 0), "Alpha unequal 0 with no selfish nodes" if hash_distribution == "UNIFORM": hashing_power_selfish = np.random.random(number_selfish_nodes) hashing_power_honest = np.random.random(number_honest_nodes) elif hash_distribution == "POWERLAW": power_distrib = pl.Power_Law(parameters=[pl_alpha], discrete=False) hashing_power_selfish = power_distrib.generate_random( number_selfish_nodes) hashing_power_honest = power_distrib.generate_random( number_honest_nodes) elif hash_distribution == "EXPONENTIAL": exp_distrib = pl.Exponential(parameters=[exp_lambda]) hashing_power_selfish = exp_distrib.generate_random( number_selfish_nodes) hashing_power_honest = exp_distrib.generate_random( number_honest_nodes) # normalize vector so that sum of selfish hashing power equals alpha & honest hashing power equals 1-alpha. if number_selfish_nodes != 0: hashing_power_selfish /= sum(hashing_power_selfish) hashing_power_selfish *= alpha hashing_power_honest /= sum(hashing_power_honest) / (1 - alpha) # combine selfish and honest hashing power vectors together hashing_power_unsorted = np.append( hashing_power_selfish, hashing_power_honest) if centrality_measure == "RANDOM": # create an is_selfish vector that corresponds to the order of the hashing_power vector is_selfish = np.append(np.ones(number_selfish_nodes), np.zeros(number_honest_nodes)) # finally, randomize is_selfish and hashing_power arrays in unison randomize = np.arange(len(hashing_power_unsorted)) np.random.shuffle(randomize) hashing_power = hashing_power_unsorted[randomize] is_selfish = is_selfish[randomize] elif centrality_measure == "BETWEENNESS": # compute betweenness centrality and sort it btwn = nx.betweenness_centrality(net_p2p) btwn_sorted = {k: v for k, v in sorted( btwn.items(), key=lambda item: item[1], reverse=True)} # return node indeces sorted for betweenness centrality btwn_sorted_indices = list(btwn_sorted.keys()) selfish_indices = list(btwn_sorted.keys())[:number_selfish_nodes] honest_indices = list(btwn_sorted.keys())[ number_selfish_nodes:len(btwn)] # set selifsh nodes according to betweenness centrality is_selfish = np.zeros(number_honest_nodes+number_selfish_nodes) for i in selfish_indices: is_selfish[i] = 1 # sort hashing power vector so that selfish nodes are assigned correct hashing power hashing_power = hashing_power_unsorted.copy() for (index, value) in enumerate(btwn_sorted): hashing_power[value] = hashing_power_unsorted[index] return hashing_power, is_selfish
samples = np.zeros([num_samp, num_runs]) samp_vec = np.linspace(1, num_samp, num_samp) color_list = ['blue3', 'red3', 'green3', 'yellow3'] fig, ax = plt.subplots(nrows=1, ncols=2, sharex=False, sharey=False) fig.suptitle( 'power-law distribution; x_min = {:5.2f}, exponent = {:5.2f}'.format( x_min, exponent)) bin_min = 1e9 bin_max = 0 for ii in range(num_runs): samples[:, ii] = powerlaw.Power_Law(xmin=x_min, parameters=[exponent ]).generate_random(num_samp) bin_min_ii, bin_max_ii = np.min(samples[:, ii]), np.max(samples[:, ii]) bins = 10**(np.linspace(np.log10(bin_min_ii), np.log10(bin_max_ii), num_bins)) samp_hist, bin_edges = np.histogram(samples[:, ii], bins, density=True) bin_centers = (bin_edges[1:] + bin_edges[:-1]) / 2. ax[0].semilogy(samp_vec, samples[:, ii], '-', color=colors[color_list[ii]], label='trial {}'.format(ii + 1)) ax[1].loglog(bin_centers, samp_hist,
def generate_out_degree_distribution(out_degree_functional_form='power-law', **kwargs): if 'num_nodes' in kwargs: num_nodes = kwargs['num_nodes'] else: raise ValueError( '[_functions_network/generate_degree_distribution] You must specify the number of nodes in the network (num_nodes)' ) out_degree_distribution = dict() if out_degree_functional_form == 'gaussian': if 'center' in kwargs: center = kwargs['center'] else: raise ValueError( '[_functions_network/generate_degree_distribution] For a gaussian out-degree distribution, you must specify the mean of the gaussian distribution (center)' ) if 'st_dev' in kwargs: st_dev = kwargs['st_dev'] else: raise ValueError( '[_functions_network/generate_degree_distribution] For a gaussian out-degree distribution, you must specify the standard deviation (st_dev)' ) out_degree_distribution['functional_form'] = 'gaussian' out_degree_distribution['center'] = center out_degree_distribution['st_dev'] = st_dev out_degree_distribution['num_nodes'] = int(num_nodes) out_degree_distribution['node_degrees'] = np.flipud( np.round(np.sort(np.random.normal( center, st_dev, int(num_nodes))))) # gaussian degree distribution if out_degree_functional_form == 'power_law': if 'alpha' in kwargs: alpha = kwargs['alpha'] else: raise ValueError( '[_functions_network/generate_degree_distribution] For a power-law out-degree distribution, you must specify the exponent (alpha)' ) if 'k_out_min' in kwargs: k_out_min = kwargs['k_out_min'] else: raise ValueError( '[_functions_network/generate_degree_distribution] For a power-law out-degree distribution, you must specify the minimum out degree (k_out_min)' ) out_degree_distribution['functional_form'] = 'power_law' out_degree_distribution['k_out_min'] = k_out_min out_degree_distribution['alpha'] = alpha out_degree_distribution['num_nodes'] = int(num_nodes) out_degree_distribution['node_degrees'] = np.flipud( np.round( np.sort( powerlaw.Power_Law( xmin=k_out_min, parameters=[alpha]).generate_random( int(num_nodes))))) # power-law degree distribution return out_degree_distribution
def BlockModelGen(lamb, n, beta=0, K=3, rho=0, simple=True, power=True, alpha=5, degree_seed=None): """ Description ---------- Generates networks from degree corrected stochastic block model, with various options for node degree distribution Arguments ---------- lambda : average node degree n : size of network beta : out-in ratio: the ratio of between-block edges over within-block edges K : number of communities w : not effective Pi : a vector of community proportion rho : proportion of small degrees within each community if the degrees are from two point mass disbribution. rho >0 gives degree corrected block model. If rho > 0 and simple=TRUE, then generate the degrees from two point mass distribution, with rho porition of 0.2 values and 1-rho proportion of 1 for degree parameters. If rho=0, generate from SBM. simple : Indicator of wether two point mass degrees are used, if rho > 0. If rho=0, this is not effective power : Whether or not use powerlaw distribution for degrees. If FALSE, generate from theta from U(0.2,1); if TRUE, generate theta from powerlaw. Only effective if rho >0, simple=FALSE. alpha : Shape parameter for powerlaw distribution. degree.seed : Can be a vector of a prespecified values for theta. Then the function will do sampling with replacement from the vector to generate theta. It can be used to control noise level between different configuration settings. Returns ------- A dictionary with: (variable name)["A"] : the generated network adjacency matrix (variable name)["g"] : community membership (variable name)["P"] : probability matrix of the network (variable name)["theta"] : node degree parameter Author(s) ---------- Tianxi Li, Elizaveta Levina, Ji Zhu """ w = [1] * K Pi = 1 / K P0 = np.diag(w) if (beta > 0): P0 = np.ones((K, K), dtype=np.int32) diag_P0 = [w_element / beta for w_element in w] np.fill_diagonal(P0, diag_P0) Pi_vec = [[Pi] for i in range(K)] P1 = lamb * P0 P2 = np.matmul((np.transpose(Pi_vec)), P0) P3 = (n - 1) * np.matmul(P2, Pi_vec) * (rho * 0.2 + (1 - rho))**2 P = P1 / P3 if (rho > 0) and (simple != True) and (power != True): P1 = lamb * P0 P2 = np.matmul(((n - 1) * np.transpose(Pi_vec)), P0) P3 = np.matmul(P2, Pi_vec) * (0.6)**2 P = P1 / P3 if (rho > 0) and (simple != True) and (power == True): P1 = lamb * P0 P2 = np.matmul(((n - 1) * np.transpose(Pi_vec)), P0) P3 = np.matmul(P2, Pi_vec) * (1.285)**2 P = P1 / P3 M = np.zeros((n, K), dtype=np.int32) membership = random.choices(range(0, K), k=n, weights=[Pi] * K) i = 0 while i < n: M[i][membership[i]] = 1 i += 1 MP = np.matmul(M, P) A_bar = np.matmul(MP, np.transpose(M)) node_degree = [1] * n if rho > 0: randunif = np.random.uniform(size=n) if simple == True: j = 0 while j < n: if randunif[j] < rho: node_degree[j] = 0.2 j += 1 else: j += 1 else: if power == False: node_degree = np.random.uniform(size=n) * 0.8 + 0.2 else: MM = math.ceil(n / 300) if degree_seed == None: degree_seed = powerlaw.Power_Law( xmin=1, parameters=[alpha]).generate_random(n) node_degree = random.choices(degree_seed, k=n) DD = np.diag(node_degree) A_bar = np.matmul(DD, A_bar) A_bar = np.matmul(A_bar, DD) A_bar = A_bar * lamb / np.mean(np.sum(A_bar, axis=0)) upper_index = np.triu_indices(n, k=1) upper_p = A_bar[upper_index] upper_u = np.random.uniform(size=len(upper_p)) upper_A = np.where(upper_u < upper_p, 1, 0) A = np.zeros((n, n), dtype=np.int32) A[upper_index] = upper_A A = A + np.transpose(A) np.fill_diagonal(A, 0) # return statement in dictionary form dic = dict() # generated network adjacency matrix dic["A"] = A # community membership dic["g"] = membership # probability matrix of the network dic["P"] = A_bar # node degree parameter dic["theta"] = node_degree return (dic)