예제 #1
0
def generate_bounds_degree_sequence(ranges, simulations):

    interior_mixing = 10
    min_values = (ranges - 2) * [0]
    max_values = (ranges - 2) * [0]

    for degree_seq_size in range(2, ranges):

        for sim in range(simulations):

            # pick degree sequence a priori and watch how the s statistic evolves as we add more nodes
            degree_sequence = [
                randint(1, degree_seq_size) for i in range(0, degree_seq_size)
            ]
            min_value = 10**5
            max_value = 0

            for sim_ in range(interior_mixing):
                if sum(degree_sequence) % 2 == 0:
                    graph = random_graphs.configuration_model(degree_sequence,
                                                              cleaned=True)
                else:
                    degree_sequence[randint(1, degree_seq_size)] += 1
                    graph = random_graphs.configuration_model(degree_sequence,
                                                              cleaned=True)
                metric = s_metric(graph)
                if min_value > metric:
                    min_value = metric
                if max_value < metric:
                    max_value = metric

        min_values[degree_seq_size - 2] = min_value
        max_values[degree_seq_size - 2] = max_value

    return min_values, max_values
예제 #2
0
def generate_bounds_degree_sequence_planted_partition(ranges, simulations):

    interior_mixing = 10
    min_values = (ranges - 2) * [0]
    max_values = (ranges - 2) * [0]

    for degree_seq_size in range(2, ranges):

        for sim in range(simulations):

            # pick degree sequence a priori and watch how the s statistic evolves as we add more nodes
            degree_sequence = get_dd_planted_partition()
            min_value = 10**5
            max_value = 0

            for sim_ in range(interior_mixing):
                if sum(degree_sequence) % 2 == 0:
                    graph = random_graphs.configuration_model(degree_sequence,
                                                              cleaned=True)
                else:
                    degree_sequence[randint(1, degree_seq_size)] += 1
                    graph = random_graphs.configuration_model(degree_sequence,
                                                              cleaned=True)
                metric = s_metric(graph)
                if min_value > metric:
                    min_value = metric
                if max_value < metric:
                    max_value = metric

        min_values[degree_seq_size - 2] = min_value
        max_values[degree_seq_size - 2] = max_value

    return min_values, max_values


# DEGREE SEQUENCE AND GRAPH DIVERSITY
# """ Generate bounds under maximum entropy (Configuration Model) """
# ranges = 1000
# simulations = 10
# mins, maxs = generate_bounds_degree_sequence(ranges, simulations)
# print(mins)
# print(maxs)
# plt.plot(mins)
# plt.plot(maxs)
# plt.title("Bounds for s metric under the configuration model, approximately uniform degree distribution")
# plt.xlabel("number of nodes within graph")
# plt.ylabel("minimum and maximum s metric")
# plt.show()
#
# """Next we need to define the coefficient of variation"""
# """ Next we are going to verify: For graphs with regular structure that have low variability
# in their degree sequence D, there is typically very little diversity
# in the corresponding space of graphs G( D). """
#
#
# """ Look at Coeff for graphs with a degree sequence having an exponential form"""
#
# """ Look at Coeff for graphs that are scale free"""
def configuration_model_generator(n=N, max_degree=-1, fixed_sequence=[]):
    if len(fixed_sequence) != 0:
        return random_graphs.configuration_model(fixed_sequence, cleaned=True)

    if max_degree < 0:
        max_degree = np.random.randint(2, 100)
    #print max_degree

    degree_sequence = np.random.randint(1, max_degree, size=n)
    return random_graphs.configuration_model(degree_sequence, cleaned=True)
def analyze_structure_for_fixed_degree_seq(n, max_degree=None):
    # Not sure how to aggregate this into one graph for many different
    # degree sequences

    mean_degrees = []
    mean_neighbor_degrees = []
    # Note, this is the diameter of the largest connected component
    diameters = []
    components_count = []
    global_clustering_coefficients = []
    largest_component_sizes = []

    degree_sequence = generate_random_degree_sequence(n, max_degree)
    for i in range(500):
        G = random_graphs.configuration_model(degree_sequence, cleaned=False)
        print(i)

        mean_degrees.append(graph_measures.mean_degree(G))
        mean_neighbor_degrees.append(graph_measures.mean_neighbor_degree(G))
        diameters.append(graph_measures.diameter(G))
        components_count.append(graph_measures.connected_components_count(G))
        global_clustering_coefficients.append(
            graph_measures.global_clustering_coefficient(G))
        largest_component_sizes.append(graph_measures.largest_component(G))

    # Graph results
    plt.figure(1)

    plt.subplot(234)
    plt.hist(diameters)
    plt.title("Diameter")

    plt.subplot(235)
    plt.hist(components_count)
    plt.title("Components Count")

    plt.subplot(233)
    plt.hist(global_clustering_coefficients)
    plt.title("Clustering Coefficient")

    plt.subplot(231)
    plt.hist(mean_degrees)
    plt.title("Mean Degree")

    plt.subplot(232)
    plt.hist(mean_neighbor_degrees)
    plt.title("Mean Neighbor Degree")

    plt.subplot(236)
    plt.hist(largest_component_sizes)
    plt.title("Largest Component Size")

    plt.show()
def generate_random_config_model(n, max_degree=None):
    degree_sequence = generate_random_degree_sequence(n, max_degree)
    return random_graphs.configuration_model(degree_sequence, cleaned=True)
def test_edge_imputation():
    constraints = {'edge_count': (1000, 1100)}
    accuracy_at_k = [0] * 5

    confusion_matrix = [[0 for i in xrange(5)] for j in xrange(5)]
    samples = 100
    index = [
        'Watts Strogatz', 'Geometric', 'Erdos Renyi', 'Barabasi Albert',
        'Planted Partition Model'
    ]
    constraints_enforced = False
    rgs = [
        structural_identities.watts_strogatz_generator,
        structural_identities.geometric_generator,
        structural_identities.erdos_renyi_generator,
        structural_identities.barabasi_albert_generator,
        structural_identities.planted_partition_generator
    ]

    for uni, rg in enumerate(rgs):
        title = index[uni]
        actual = uni
        created_graphs = []
        for i in xrange(samples):
            G = structural_identities.constrained_generation(rg, constraints)

            degree_sequence = [1] * G.number_of_nodes()

            new_G = random_graphs.configuration_model(degree_sequence)
            new_G = impute_edge_algorithm(new_G, G)
            created_graphs.append(new_G)

            cluster, types = predict_structure(new_G, 2, constraints_enforced)

            predicted = cluster.index(min(cluster))
            print title, types[predicted]

            confusion_matrix[actual][predicted] += 1

            array = np.array(cluster)
            order = array.argsort()
            ranks = order.argsort().tolist()

            k = -1
            for i in xrange(len(cluster)):  # 5 types of rg
                if title == types[ranks.index(i)]:
                    k = i
                    break

            j = len(cluster) - 1
            while j >= k:
                accuracy_at_k[j] += 1
                j -= 1

        # HERE we plot distros
        observed_metrics, dic = structural_identities.analyze_structural_identity_graphs(
            created_graphs, uni)
        predict_metrics, dic = structural_identities.analyze_structural_identity(
            rg, samples, uni)  # constraints=None):
        structural_identities.graph_created_distributions(
            uni, observed_metrics, predict_metrics, dic)

    small_index = ['WS', 'Geo', 'ER', 'BA', 'PPM']

    plt.figure(10)

    for i in xrange(len(accuracy_at_k)):
        accuracy_at_k[i] /= (samples * 1.0 * len(rgs))

    if constraints_enforced:
        plt.plot([i for i in xrange(1, 6)],
                 accuracy_at_k,
                 marker='o',
                 color='red')
    else:
        plt.plot([i for i in xrange(1, 6)], accuracy_at_k, marker='o')

    plt.xlabel('k (top k labels)')
    plt.ylim((0, 1.1))
    plt.ylabel('Accuracy @ k')
    plt.title('Prediction Accuracy for Uniformly Sampled Random Graphs')

    plt.show()

    sns.set()
    ax = plt.axes()
    sns.heatmap(confusion_matrix,
                ax=ax,
                cmap="YlGnBu",
                yticklabels=index,
                xticklabels=small_index)
    ax.set_title('Confusion Matrix for Uniformly Sampled Random Graphs')
    plt.tight_layout()
    plt.show()