def question2(): """generates random graphs of directed_er to explore patterns""" er_20_percent = normalise_in_degree_dist(project1.in_degree_distribution( directed_er(10000, 0.00043212))) er_50_percent = normalise_in_degree_dist(project1.in_degree_distribution( directed_er(10000, 0.5))) er_80_percent = normalise_in_degree_dist(project1.in_degree_distribution( directed_er(10000, 0.9))) pyplot.plot(er_20_percent.keys(), er_20_percent.values(), color='red', linestyle='None', marker='.', markersize=5) pyplot.plot(er_50_percent.keys(), er_50_percent.values(), color='blue', linestyle='None', marker='.', markersize=5) pyplot.plot(er_80_percent.keys(), er_80_percent.values(), color='green', linestyle='None', marker='.', markersize=5) pyplot.text(1500, 0.04, 'n=10000\np=0.2', color='red') pyplot.text(3500, 0.04, 'n=10000\np=0.5', color='blue') pyplot.text(6500, 0.04, 'n=10000\np=0.8', color='green') pyplot.xscale('log') pyplot.yscale('log') pyplot.xlabel('in_degree of node (scale = log10)') pyplot.ylabel('Frequency (scale = log10)') pyplot.title("Question 2\n" "Sample results of ER Algorithm") pyplot.grid(True) pyplot.show()
def Question_2(): ''' For this question, your task is to consider the shape of the in-degree distribution for an ER graph and compare its shape to that of the physics citation graph. In the homework, we considered the probability of a specific in-degree, k, for a single node. Now, we are interested in the in-degree distribution for the entire ER graph. To determine the shape of this distribution, you are welcome to compute several examples of in-degree distributions or determine the shape mathematically. Once you have determined the shape of the in-degree distributions for ER graphs, compare the shape of this distribution to the shape of the in-degree distribution for the citation graph. ''' # generate ER graph graph = er_graph(27770, .0005) distribution = project1.in_degree_distribution(graph) factor = 1.0 / sum(distribution.itervalues()) normalised_distribution = {key : value * factor for key, value in distribution.iteritems()} for node in range(len(graph)): if node not in normalised_distribution: normalised_distribution[node] = 0 # plot ER graph loglog_plot_dictionary(normalised_distribution, 'Degree', 'Probability', 'Log/Log Plot of Binomial Distribution') return
def question4(nodes, degree, show_value=True): q4_graph = dpa_algorithm.dpa(nodes, degree) q4_normalised = normalise_in_degree_dist(project1.in_degree_distribution( q4_graph)) if show_value: pyplot.loglog(q4_normalised.keys(), q4_normalised.values(), color='magenta', linestyle='none', marker='.', markersize=6) pyplot.title('Question 4\nIn-Degree Distribution of DPA(27770, 12)') pyplot.xlabel('In-Degrees (scale = log10)') pyplot.ylabel('Distribution (scale = log10)') pyplot.show() return q4_normalised
def question1_plot(): """ Code for plot of question 1 """ citation_graph = load_graph(CITATION_URL) in_degree_dist = in_degree_distribution(citation_graph) normalized = normalization(in_degree_dist) plt.plot(normalized.keys(), normalized.values(), 'o', markersize=8) plt.xscale('log') plt.yscale('log') plt.title('Log of Normalized In-degree Distribution of Citation Graph') plt.xlabel('Log of Citations') plt.ylabel('Log of Normalized Distribution') plt.show()
def question1(dir_graph, show_value=True): """Generates plot required for question 1""" citation_dist = project1.in_degree_distribution(dir_graph) normalised_dist = normalise_in_degree_dist(citation_dist) if show_value: pyplot.plot(normalised_dist.keys(), normalised_dist.values(), color='green', linestyle='None', marker='.', markersize=5) pyplot.xscale('log') pyplot.yscale('log') pyplot.xlabel('Number of Citations (scale = log10)') pyplot.ylabel('Frequency (scale = log10)') pyplot.title("Question 1\n" "Citation Distribution for High-Energy Physics Papers") pyplot.grid(True) pyplot.show() return normalised_dist
def question2a(graph): """compares citation graph to er graph""" er_result = normalise_in_degree_dist(project1.in_degree_distribution( directed_er(27770, 0.00043212))) pyplot.loglog(er_result.keys(), er_result.values(), color='blue', linestyle='none', marker='+', markersize=6, label='ER Graph') pyplot.loglog(graph.keys(), graph.values(), color='green', linestyle='none', marker='.', markersize=6, label="Citation Graph") pyplot.title('Question2\nComparison of Citation Graph & ER Graph') pyplot.xlabel('In-Degrees (scale = log10)') pyplot.ylabel('Distribution (scale = log10)') pyplot.legend(loc='upper right') pyplot.grid(True) pyplot.show()
def question4_plot(n, m): """ Code for plot of question 4 Parameters ---------- n: int input for dpa() m: int input for dpa() """ in_degree_dist = in_degree_distribution(dpa(n, m)) normalized_dist = normalization(in_degree_dist) plt.xscale("log") plt.yscale("log") plt.title("Log Normalized In-degrees Distribution of DPA Graph") plt.xlabel("Log of Number of In-degrees") plt.ylabel("Log of Normalized Distribution") plt.plot(normalized_dist.keys(), normalized_dist.values(), "ro", markersize=8)
def question2_plot(): """ Code for plot of question2 """ n = 1000 probs = [0.2, 0.4, 0.6] legends = [] plt.xscale('log') plt.yscale('log') plt.title('Log Normalized In-degree Distribution ' 'for Different P based on 1000 Nodes') plt.xlabel('Log of Number of In-degrees') plt.ylabel('Log of Normalized Distribution') for p in probs: dist = in_degree_distribution(algorithm_ER(n, p)) dist = normalization(dist) plt.plot(dist.keys(), dist.values(), 'o') legends.append('distribution of p=%s' % (p)) plt.legend(legends, loc='upper left') plt.show()
def Question_1(): ''' Your task for this question is to compute the in-degree distribution for this citation graph. Once you have computed this distribution, you should normalize the distribution (make the values in the dictionary sum to one) and then compute a log/log plot of the points in this normalized distribution. ''' # load data, calculate normalised in-degree distribution import alg_load_graph citation_graph = alg_load_graph.load_graph(alg_load_graph.CITATION_URL) distribution = project1.in_degree_distribution(citation_graph) factor = 1.0 / sum(distribution.itervalues()) normalised_distribution = {key : value * factor for key, value in distribution.iteritems()} # plot loglog_plot_dictionary(normalised_distribution, 'Number of Citations', 'Normalized Citation Frequency', 'Log/Log Plot of In-Degree Distribution of Citation Graph\n') return
def Question_4(): ''' Your task for this question is to implement the DPA algorithm, compute a DPA graph using the values from Question 3, and then plot the in-degree distribution for this DPA graph. Creating an efficient implementation of the DPA algorithm from scratch is surprisingly tricky. The key issue in implementing the algorithm is to avoid iterating through every node in the graph when executing Line 6. Using a loop to implement Line 6 leads to implementations that require on the order of 30 minutes in desktop Python to create a DPA graph with 28000 nodes. ''' graph = dpa(28000, 13) distribution = project1.in_degree_distribution(graph) factor = 1.0 / sum(distribution.itervalues()) normalised_distribution = {key : value * factor for key, value in distribution.iteritems()} loglog_plot_dictionary(normalised_distribution, 'In-Degrees', 'Fraction of Nodes', 'Log/Log Plot of In-Degree Distribution of DPA Graph\n') return
def question2(subplot=None, filename=None): rnd = algorithm_er(3000, 0.1) normed = norm(project1.in_degree_distribution(rnd)) plot(normed, 'random generated', subplot, filename)
def question3(subplot=None, filename=None): graph = alg_load_graph.load_graph('alg_phys-cite.txt') print('avg_out_degree', avg_out_degree(graph)) dpa = algorithm_dpa(27700, 13) normed = norm(project1.in_degree_distribution(dpa)) plot(normed, 'DPA-generated', subplot, filename)
# print dpa._node_numbers # print dpa.run_trial(2) # print dpa._node_numbers # print dpa.run_trial(2) # print dpa._node_numbers # simulate the dpa digraph with # n,m obtained by Question 3 dpa_digraph = DPA(num_nodes, m) # print dpa_digraph # print average_out_degrees(dpa_digraph) # the random graph generated by DPA function num_nodes3 = float(len(dpa_digraph)) print "num_nodes:", num_nodes3 distribution3 = in_degree_distribution(dpa_digraph) # print the normalized distribution keys = list(distribution3.keys()) items = [] for key in distribution3: items.append(distribution3[key]) # print "keys",keys # print "items",items plt.figure(4) plt.loglog(keys, items, 'ro') plt.xlabel('the log in-degree of node') plt.ylabel('the log number of nodes in particular in-degree') plt.grid(True) plt.title("the loglog in_degree_distribution for DPA random graph") # plt.show()
def question1(subplot=None, filename=None): graph = alg_load_graph.load_graph('alg_phys-cite.txt') normed = norm(project1.in_degree_distribution(graph)) plot(normed, 'citation', subplot, filename)
def question4(): graph = in_degree_distribution(generate_DPA(12, 27770)) del graph[0] plot(graph)
def question3(subplot=None, filename=None): graph = alg_load_graph.load_graph('./data/alg_phys-cite.txt') print('avg_out_degree', avg_out_degree(graph)) dpa = algorithm_dpa(27700, 13) normed = norm(project1.in_degree_distribution(dpa)) plot(normed, 'DPA-generated', subplot, filename)
def question1(subplot=None, filename=None): graph = alg_load_graph.load_graph('./data/alg_phys-cite.txt') normed = norm(project1.in_degree_distribution(graph)) plot(normed, 'citation', subplot, filename)
def question1(): graph = in_degree_distribution(load_graph('alg_phys-cite.txt')) del graph[0] plot(graph)
# print dpa._node_numbers # print dpa.run_trial(2) # print dpa._node_numbers # print dpa.run_trial(2) # print dpa._node_numbers # simulate the dpa digraph with # n,m obtained by Question 3 dpa_digraph = DPA(num_nodes, m) # print dpa_digraph # print average_out_degrees(dpa_digraph) # the random graph generated by DPA function num_nodes3 = float(len(dpa_digraph)) print "num_nodes:",num_nodes3 distribution3 = in_degree_distribution(dpa_digraph) # print the normalized distribution keys = list(distribution3.keys()) items = [] for key in distribution3: items.append(distribution3[key]) # print "keys",keys # print "items",items plt.figure(4) plt.loglog(keys, items, 'ro') plt.xlabel('the log in-degree of node') plt.ylabel('the log number of nodes in particular in-degree') plt.grid(True) plt.title("the loglog in_degree_distribution for DPA random graph") # plt.show()