def generate_bounds_degree_sequence(ranges, simulations): interior_mixing = 10 min_values = (ranges - 2) * [0] max_values = (ranges - 2) * [0] for degree_seq_size in range(2, ranges): for sim in range(simulations): # pick degree sequence a priori and watch how the s statistic evolves as we add more nodes degree_sequence = [ randint(1, degree_seq_size) for i in range(0, degree_seq_size) ] min_value = 10**5 max_value = 0 for sim_ in range(interior_mixing): if sum(degree_sequence) % 2 == 0: graph = random_graphs.configuration_model(degree_sequence, cleaned=True) else: degree_sequence[randint(1, degree_seq_size)] += 1 graph = random_graphs.configuration_model(degree_sequence, cleaned=True) metric = s_metric(graph) if min_value > metric: min_value = metric if max_value < metric: max_value = metric min_values[degree_seq_size - 2] = min_value max_values[degree_seq_size - 2] = max_value return min_values, max_values
def generate_bounds_degree_sequence_planted_partition(ranges, simulations): interior_mixing = 10 min_values = (ranges - 2) * [0] max_values = (ranges - 2) * [0] for degree_seq_size in range(2, ranges): for sim in range(simulations): # pick degree sequence a priori and watch how the s statistic evolves as we add more nodes degree_sequence = get_dd_planted_partition() min_value = 10**5 max_value = 0 for sim_ in range(interior_mixing): if sum(degree_sequence) % 2 == 0: graph = random_graphs.configuration_model(degree_sequence, cleaned=True) else: degree_sequence[randint(1, degree_seq_size)] += 1 graph = random_graphs.configuration_model(degree_sequence, cleaned=True) metric = s_metric(graph) if min_value > metric: min_value = metric if max_value < metric: max_value = metric min_values[degree_seq_size - 2] = min_value max_values[degree_seq_size - 2] = max_value return min_values, max_values # DEGREE SEQUENCE AND GRAPH DIVERSITY # """ Generate bounds under maximum entropy (Configuration Model) """ # ranges = 1000 # simulations = 10 # mins, maxs = generate_bounds_degree_sequence(ranges, simulations) # print(mins) # print(maxs) # plt.plot(mins) # plt.plot(maxs) # plt.title("Bounds for s metric under the configuration model, approximately uniform degree distribution") # plt.xlabel("number of nodes within graph") # plt.ylabel("minimum and maximum s metric") # plt.show() # # """Next we need to define the coefficient of variation""" # """ Next we are going to verify: For graphs with regular structure that have low variability # in their degree sequence D, there is typically very little diversity # in the corresponding space of graphs G( D). """ # # # """ Look at Coeff for graphs with a degree sequence having an exponential form""" # # """ Look at Coeff for graphs that are scale free"""
def configuration_model_generator(n=N, max_degree=-1, fixed_sequence=[]): if len(fixed_sequence) != 0: return random_graphs.configuration_model(fixed_sequence, cleaned=True) if max_degree < 0: max_degree = np.random.randint(2, 100) #print max_degree degree_sequence = np.random.randint(1, max_degree, size=n) return random_graphs.configuration_model(degree_sequence, cleaned=True)
def analyze_structure_for_fixed_degree_seq(n, max_degree=None): # Not sure how to aggregate this into one graph for many different # degree sequences mean_degrees = [] mean_neighbor_degrees = [] # Note, this is the diameter of the largest connected component diameters = [] components_count = [] global_clustering_coefficients = [] largest_component_sizes = [] degree_sequence = generate_random_degree_sequence(n, max_degree) for i in range(500): G = random_graphs.configuration_model(degree_sequence, cleaned=False) print(i) mean_degrees.append(graph_measures.mean_degree(G)) mean_neighbor_degrees.append(graph_measures.mean_neighbor_degree(G)) diameters.append(graph_measures.diameter(G)) components_count.append(graph_measures.connected_components_count(G)) global_clustering_coefficients.append( graph_measures.global_clustering_coefficient(G)) largest_component_sizes.append(graph_measures.largest_component(G)) # Graph results plt.figure(1) plt.subplot(234) plt.hist(diameters) plt.title("Diameter") plt.subplot(235) plt.hist(components_count) plt.title("Components Count") plt.subplot(233) plt.hist(global_clustering_coefficients) plt.title("Clustering Coefficient") plt.subplot(231) plt.hist(mean_degrees) plt.title("Mean Degree") plt.subplot(232) plt.hist(mean_neighbor_degrees) plt.title("Mean Neighbor Degree") plt.subplot(236) plt.hist(largest_component_sizes) plt.title("Largest Component Size") plt.show()
def generate_random_config_model(n, max_degree=None): degree_sequence = generate_random_degree_sequence(n, max_degree) return random_graphs.configuration_model(degree_sequence, cleaned=True)
def test_edge_imputation(): constraints = {'edge_count': (1000, 1100)} accuracy_at_k = [0] * 5 confusion_matrix = [[0 for i in xrange(5)] for j in xrange(5)] samples = 100 index = [ 'Watts Strogatz', 'Geometric', 'Erdos Renyi', 'Barabasi Albert', 'Planted Partition Model' ] constraints_enforced = False rgs = [ structural_identities.watts_strogatz_generator, structural_identities.geometric_generator, structural_identities.erdos_renyi_generator, structural_identities.barabasi_albert_generator, structural_identities.planted_partition_generator ] for uni, rg in enumerate(rgs): title = index[uni] actual = uni created_graphs = [] for i in xrange(samples): G = structural_identities.constrained_generation(rg, constraints) degree_sequence = [1] * G.number_of_nodes() new_G = random_graphs.configuration_model(degree_sequence) new_G = impute_edge_algorithm(new_G, G) created_graphs.append(new_G) cluster, types = predict_structure(new_G, 2, constraints_enforced) predicted = cluster.index(min(cluster)) print title, types[predicted] confusion_matrix[actual][predicted] += 1 array = np.array(cluster) order = array.argsort() ranks = order.argsort().tolist() k = -1 for i in xrange(len(cluster)): # 5 types of rg if title == types[ranks.index(i)]: k = i break j = len(cluster) - 1 while j >= k: accuracy_at_k[j] += 1 j -= 1 # HERE we plot distros observed_metrics, dic = structural_identities.analyze_structural_identity_graphs( created_graphs, uni) predict_metrics, dic = structural_identities.analyze_structural_identity( rg, samples, uni) # constraints=None): structural_identities.graph_created_distributions( uni, observed_metrics, predict_metrics, dic) small_index = ['WS', 'Geo', 'ER', 'BA', 'PPM'] plt.figure(10) for i in xrange(len(accuracy_at_k)): accuracy_at_k[i] /= (samples * 1.0 * len(rgs)) if constraints_enforced: plt.plot([i for i in xrange(1, 6)], accuracy_at_k, marker='o', color='red') else: plt.plot([i for i in xrange(1, 6)], accuracy_at_k, marker='o') plt.xlabel('k (top k labels)') plt.ylim((0, 1.1)) plt.ylabel('Accuracy @ k') plt.title('Prediction Accuracy for Uniformly Sampled Random Graphs') plt.show() sns.set() ax = plt.axes() sns.heatmap(confusion_matrix, ax=ax, cmap="YlGnBu", yticklabels=index, xticklabels=small_index) ax.set_title('Confusion Matrix for Uniformly Sampled Random Graphs') plt.tight_layout() plt.show()