def question2():
    """generates random graphs of directed_er to explore patterns"""
    er_20_percent = normalise_in_degree_dist(project1.in_degree_distribution(
        directed_er(10000, 0.00043212)))
    er_50_percent = normalise_in_degree_dist(project1.in_degree_distribution(
        directed_er(10000, 0.5)))
    er_80_percent = normalise_in_degree_dist(project1.in_degree_distribution(
        directed_er(10000, 0.9)))
    pyplot.plot(er_20_percent.keys(), er_20_percent.values(), color='red',
                linestyle='None', marker='.', markersize=5)
    pyplot.plot(er_50_percent.keys(), er_50_percent.values(), color='blue',
                linestyle='None', marker='.', markersize=5)
    pyplot.plot(er_80_percent.keys(), er_80_percent.values(), color='green',
                linestyle='None', marker='.', markersize=5)
    pyplot.text(1500, 0.04, 'n=10000\np=0.2', color='red')
    pyplot.text(3500, 0.04, 'n=10000\np=0.5', color='blue')
    pyplot.text(6500, 0.04, 'n=10000\np=0.8', color='green')
    pyplot.xscale('log')
    pyplot.yscale('log')
    pyplot.xlabel('in_degree of node (scale = log10)')
    pyplot.ylabel('Frequency (scale = log10)')
    pyplot.title("Question 2\n"
                 "Sample results of ER Algorithm")
    pyplot.grid(True)
    pyplot.show()
Exemple #2
0
def Question_2():
    '''
    For this question, your task is to consider the shape of the in-degree 
    distribution for an ER graph and compare its shape to that of the physics 
    citation graph. In the homework, we considered the probability of a specific 
    in-degree, k, for a single node. Now, we are interested in the in-degree 
    distribution for the entire ER graph. To determine the shape of this 
    distribution, you are welcome to compute several examples of in-degree 
    distributions or determine the shape mathematically.
    
    Once you have determined the shape of the in-degree distributions for ER 
    graphs, compare the shape of this distribution to the shape of the in-degree 
    distribution for the citation graph.
    '''
    # generate ER graph
    graph = er_graph(27770, .0005)
    distribution = project1.in_degree_distribution(graph)
    factor = 1.0 / sum(distribution.itervalues())
    normalised_distribution = {key : value * factor for key, value 
                                 in distribution.iteritems()}

    for node in range(len(graph)):
        if node not in normalised_distribution:
            normalised_distribution[node] = 0

    
    # plot ER graph
    loglog_plot_dictionary(normalised_distribution, 'Degree', 'Probability', 
    'Log/Log Plot of Binomial Distribution')
    
    return
def question4(nodes, degree, show_value=True):
    q4_graph = dpa_algorithm.dpa(nodes, degree)
    q4_normalised = normalise_in_degree_dist(project1.in_degree_distribution(
        q4_graph))
    if show_value:
        pyplot.loglog(q4_normalised.keys(), q4_normalised.values(),
                      color='magenta', linestyle='none', marker='.',
                      markersize=6)
        pyplot.title('Question 4\nIn-Degree Distribution of DPA(27770, 12)')
        pyplot.xlabel('In-Degrees (scale = log10)')
        pyplot.ylabel('Distribution (scale = log10)')
        pyplot.show()
    return q4_normalised
def question1_plot():
    """
    Code for plot of question 1
    """
    citation_graph = load_graph(CITATION_URL)
    in_degree_dist = in_degree_distribution(citation_graph)
    normalized = normalization(in_degree_dist)
    plt.plot(normalized.keys(), normalized.values(), 'o', markersize=8)
    plt.xscale('log')
    plt.yscale('log')
    plt.title('Log of Normalized In-degree Distribution of Citation Graph')
    plt.xlabel('Log of Citations')
    plt.ylabel('Log of Normalized Distribution')
    plt.show()
def question1(dir_graph, show_value=True):
    """Generates plot required for question 1"""
    citation_dist = project1.in_degree_distribution(dir_graph)
    normalised_dist = normalise_in_degree_dist(citation_dist)
    if show_value:
        pyplot.plot(normalised_dist.keys(), normalised_dist.values(),
                    color='green', linestyle='None', marker='.', markersize=5)
        pyplot.xscale('log')
        pyplot.yscale('log')
        pyplot.xlabel('Number of Citations (scale = log10)')
        pyplot.ylabel('Frequency (scale = log10)')
        pyplot.title("Question 1\n"
                     "Citation Distribution for High-Energy Physics Papers")
        pyplot.grid(True)
        pyplot.show()
    return normalised_dist
def question2a(graph):
    """compares citation graph to er graph"""
    er_result = normalise_in_degree_dist(project1.in_degree_distribution(
        directed_er(27770, 0.00043212)))
    pyplot.loglog(er_result.keys(), er_result.values(), color='blue',
                  linestyle='none', marker='+', markersize=6,
                  label='ER Graph')
    pyplot.loglog(graph.keys(), graph.values(), color='green',
                  linestyle='none', marker='.', markersize=6,
                  label="Citation Graph")
    pyplot.title('Question2\nComparison of Citation Graph & ER Graph')
    pyplot.xlabel('In-Degrees (scale = log10)')
    pyplot.ylabel('Distribution (scale = log10)')
    pyplot.legend(loc='upper right')
    pyplot.grid(True)
    pyplot.show()
def question4_plot(n, m):
    """
    Code for plot of question 4

    Parameters
    ----------
    n: int
    input for dpa()

    m: int
    input for dpa()
    """
    in_degree_dist = in_degree_distribution(dpa(n, m))
    normalized_dist = normalization(in_degree_dist)
    plt.xscale("log")
    plt.yscale("log")
    plt.title("Log Normalized In-degrees Distribution of DPA Graph")
    plt.xlabel("Log of Number of In-degrees")
    plt.ylabel("Log of Normalized Distribution")
    plt.plot(normalized_dist.keys(), normalized_dist.values(), "ro", markersize=8)
def question2_plot():
    """
    Code for plot of question2
    """
    n = 1000
    probs = [0.2, 0.4, 0.6]
    legends = []
    plt.xscale('log')
    plt.yscale('log')
    plt.title('Log Normalized In-degree Distribution '
              'for Different P based on 1000 Nodes')
    plt.xlabel('Log of Number of In-degrees')
    plt.ylabel('Log of Normalized Distribution')
    for p in probs:
        dist = in_degree_distribution(algorithm_ER(n, p))
        dist = normalization(dist)
        plt.plot(dist.keys(), dist.values(), 'o')
        legends.append('distribution of p=%s' % (p))
    plt.legend(legends, loc='upper left')
    plt.show()
Exemple #9
0
def Question_1():
    '''
    Your task for this question is to compute the in-degree distribution for this 
    citation graph. Once you have computed this distribution, you should normalize 
    the distribution (make the values in the dictionary sum to one) and then 
    compute a log/log plot of the points in this normalized distribution.
    '''
    # load data, calculate normalised in-degree distribution
    import alg_load_graph
    citation_graph = alg_load_graph.load_graph(alg_load_graph.CITATION_URL)
    distribution = project1.in_degree_distribution(citation_graph)
    factor = 1.0 / sum(distribution.itervalues())
    normalised_distribution = {key : value * factor for key, value 
                                 in distribution.iteritems()}
    
    # plot
    loglog_plot_dictionary(normalised_distribution, 'Number of Citations', 
                           'Normalized Citation Frequency', 
                           'Log/Log Plot of In-Degree Distribution of Citation Graph\n')
    
    return
Exemple #10
0
def Question_4():
    '''
    Your task for this question is to implement the DPA algorithm, compute a DPA 
    graph using the values from Question 3, and then plot the in-degree 
    distribution for this DPA graph. Creating an efficient implementation of the 
    DPA algorithm from scratch is surprisingly tricky. The key issue in 
    implementing the algorithm is to avoid iterating through every node in the 
    graph when executing Line 6. Using a loop to implement Line 6 leads to 
    implementations that require on the order of 30 minutes in desktop Python to 
    create a DPA graph with 28000 nodes.
    '''
    graph = dpa(28000, 13)
    
    distribution = project1.in_degree_distribution(graph)
    factor = 1.0 / sum(distribution.itervalues())
    normalised_distribution = {key : value * factor for key, value 
                                 in distribution.iteritems()}
    
    loglog_plot_dictionary(normalised_distribution, 'In-Degrees', 
                           'Fraction of Nodes', 
                           'Log/Log Plot of In-Degree Distribution of DPA Graph\n')
    
    return
def question2(subplot=None, filename=None):
    rnd = algorithm_er(3000, 0.1)
    normed = norm(project1.in_degree_distribution(rnd))
    plot(normed, 'random generated', subplot, filename)
Exemple #12
0
def question3(subplot=None, filename=None):
    graph = alg_load_graph.load_graph('alg_phys-cite.txt')
    print('avg_out_degree', avg_out_degree(graph))
    dpa = algorithm_dpa(27700, 13)
    normed = norm(project1.in_degree_distribution(dpa))
    plot(normed, 'DPA-generated', subplot, filename)
# print dpa._node_numbers
# print dpa.run_trial(2)
# print dpa._node_numbers
# print dpa.run_trial(2)
# print dpa._node_numbers

# simulate the dpa digraph with
# n,m obtained by Question 3
dpa_digraph = DPA(num_nodes, m)
# print dpa_digraph
# print average_out_degrees(dpa_digraph)

# the random graph generated by DPA function
num_nodes3 = float(len(dpa_digraph))
print "num_nodes:", num_nodes3
distribution3 = in_degree_distribution(dpa_digraph)

# print the normalized distribution
keys = list(distribution3.keys())
items = []
for key in distribution3:
    items.append(distribution3[key])
# print "keys",keys
# print "items",items
plt.figure(4)
plt.loglog(keys, items, 'ro')
plt.xlabel('the log in-degree of node')
plt.ylabel('the log number of nodes in particular in-degree')
plt.grid(True)
plt.title("the loglog in_degree_distribution for DPA random graph")
# plt.show()
Exemple #14
0
def question1(subplot=None, filename=None):
    graph = alg_load_graph.load_graph('alg_phys-cite.txt')
    normed = norm(project1.in_degree_distribution(graph))
    plot(normed, 'citation', subplot, filename)
Exemple #15
0
def question2(subplot=None, filename=None):
    rnd = algorithm_er(3000, 0.1)
    normed = norm(project1.in_degree_distribution(rnd))
    plot(normed, 'random generated', subplot, filename)
Exemple #16
0
def question4():
    graph = in_degree_distribution(generate_DPA(12, 27770))
    del graph[0]
    plot(graph)
def question3(subplot=None, filename=None):
    graph = alg_load_graph.load_graph('./data/alg_phys-cite.txt')
    print('avg_out_degree', avg_out_degree(graph))
    dpa = algorithm_dpa(27700, 13)
    normed = norm(project1.in_degree_distribution(dpa))
    plot(normed, 'DPA-generated', subplot, filename)
def question1(subplot=None, filename=None):
    graph = alg_load_graph.load_graph('./data/alg_phys-cite.txt')
    normed = norm(project1.in_degree_distribution(graph))
    plot(normed, 'citation', subplot, filename)
Exemple #19
0
def question1():
    graph = in_degree_distribution(load_graph('alg_phys-cite.txt'))
    del graph[0]
    plot(graph)
Exemple #20
0
def question4():
    graph = in_degree_distribution(generate_DPA(12, 27770))
    del graph[0]
    plot(graph)
# print dpa._node_numbers
# print dpa.run_trial(2)
# print dpa._node_numbers
# print dpa.run_trial(2)
# print dpa._node_numbers

# simulate the dpa digraph with 
# n,m obtained by Question 3
dpa_digraph = DPA(num_nodes, m)
# print dpa_digraph
# print average_out_degrees(dpa_digraph)

# the random graph generated by DPA function
num_nodes3 = float(len(dpa_digraph))
print "num_nodes:",num_nodes3
distribution3 = in_degree_distribution(dpa_digraph)

# print the normalized distribution
keys = list(distribution3.keys())
items = []
for key in distribution3:
	items.append(distribution3[key])
# print "keys",keys
# print "items",items
plt.figure(4)
plt.loglog(keys, items, 'ro')
plt.xlabel('the log in-degree of node')
plt.ylabel('the log number of nodes in particular in-degree')
plt.grid(True)
plt.title("the loglog in_degree_distribution for DPA random graph")
# plt.show()
Exemple #22
0
def question1():
    graph = in_degree_distribution(load_graph('alg_phys-cite.txt'))
    del graph[0]
    plot(graph)