def run(): citation_graph = alg_load_graph.load_graph(graph_url) in_deg_graph = in_degree_distribution(citation_graph) print in_deg_graph in_deg_norm = normalize_in_deg_dist(in_deg_graph, len(citation_graph)) print in_deg_norm draw_loglog_plot(in_deg_norm)
def question_35(): citation_graph = alg_load_graph.load_graph(CITATION_URL) out_degrees = 0 for node in citation_graph: out_degrees += len(citation_graph[node]) x_vals, y_vals = [], [] norm = norm_dist(citation_graph) for degree in norm: x_vals.append(degree) y_vals.append(norm[degree]) target_node = 27770 step_node = 13 dpa_graph = dpa_digraph_gen(target_node, step_node) x_dpavals, y_dpavals = [], [] norm_dpa = norm_dist(dpa_graph) for degree in norm_dpa: x_dpavals.append(degree) y_dpavals.append(norm_dpa[degree]) plt.loglog(x_vals, y_vals, color="cyan", linestyle='None', marker=".", markersize=6) plt.loglog(x_dpavals, y_dpavals, color="black", linestyle='None', marker=".", markersize=6) plt.xlabel("Log Number of Degrees") plt.ylabel("Log Distribution") plt.title("Normalized Distribution of High Energy Physics Theory Papers") plt.show() #question_35()
def Question_1(): ''' Your task for this question is to compute the in-degree distribution for this citation graph. Once you have computed this distribution, you should normalize the distribution (make the values in the dictionary sum to one) and then compute a log/log plot of the points in this normalized distribution. ''' # load data, calculate normalised in-degree distribution import alg_load_graph citation_graph = alg_load_graph.load_graph(alg_load_graph.CITATION_URL) distribution = project1.in_degree_distribution(citation_graph) factor = 1.0 / sum(distribution.itervalues()) normalised_distribution = {key : value * factor for key, value in distribution.iteritems()} # plot loglog_plot_dictionary(normalised_distribution, 'Number of Citations', 'Normalized Citation Frequency', 'Log/Log Plot of In-Degree Distribution of Citation Graph\n') return
def q1(): citation_graph = loader.load_graph(loader.CITATION_URL) #citation_graph = helper.EX_GRAPH1 print 'start calculating degree distribution' degree_distribution = helper.in_degree_distribution(citation_graph) print 'finish calculating degree distribution' print 'start normalizing' degree_distribution = normalize(degree_distribution) print 'finish normalizing' print degree_distribution print 'start plotting' plt.yscale('log') plt.xscale('log') plt.xlabel('degree') plt.ylabel('distribution') plt.plot(degree_distribution.keys(), degree_distribution.values(),'ro') plt.show()
def question_1(): citation_graph = alg_load_graph.load_graph(CITATION_URL) out_degrees = 0 for node in citation_graph: out_degrees += len(citation_graph[node]) x_vals, y_vals = [], [] norm = norm_dist(citation_graph) for degree in norm: x_vals.append(degree) y_vals.append(norm[degree]) plt.loglog(x_vals, y_vals, color="blue", linestyle='None', marker=".", markersize=6) plt.xlabel("Log Number of Degrees") plt.ylabel("Log Distribution") plt.title("Normalized Distribution of High Energy Physics Theory Papers") plt.show()
def q3(): citation_graph = loader.load_graph(loader.CITATION_URL) n = len(citation_graph) m = average_out_degree(citation_graph) print n, m
#create a set of all unique degree values degree_distribution_dict = dict(Counter(in_degree_dict.values())) num_nodes = len(in_degree_dict) for key in degree_distribution_dict.keys(): degree_distribution_dict[key] = float(degree_distribution_dict[key])/float(num_nodes) return degree_distribution_dict #create a dictionary from the degree set initialized with 0 values #for node in in_degree_dict.keys(): #deg_distribution_dict[in_degree_dict[node]] += 1 #calculating the number of nodes having same degree #return deg_distribution_dict return {} #loading citation graph from url citation_graph = alg_load_graph.load_graph(alg_load_graph.CITATION_URL) def build_plot(): """ Build log log plot of normalized indegree distribution """ #calculate in degree distribution (normalized) for citation graph distribution_dict = in_degree_distribution(citation_graph) #create the log/log plot of normalized indegree distribution plot = [] for input_val in distribution_dict.keys(): if input_val != 0: plot.append([math.log(input_val), math.log(distribution_dict[input_val])]) return plot
def run_example(): """ Computing the popular nodes for an example graph """ my_graph = alg_load_graph.load_graph(GRAPH100_URL) print "There are", len(find_popular_nodes(my_graph)), "popular nodes"
def question3(subplot=None, filename=None): graph = alg_load_graph.load_graph('alg_phys-cite.txt') print('avg_out_degree', avg_out_degree(graph)) dpa = algorithm_dpa(27700, 13) normed = norm(project1.in_degree_distribution(dpa)) plot(normed, 'DPA-generated', subplot, filename)
def question1(subplot=None, filename=None): graph = alg_load_graph.load_graph('alg_phys-cite.txt') normed = norm(project1.in_degree_distribution(graph)) plot(normed, 'citation', subplot, filename)
def question_2(): citation_graph = alg_load_graph.load_graph(CITATION_URL) out_degrees = 0 for node in citation_graph: out_degrees += len(citation_graph[node]) x_vals, y_vals = [], [] norm = norm_dist(citation_graph) for degree in norm: x_vals.append(degree) y_vals.append(norm[degree]) x_d0, y_d0 = [], [] d0 = digraph_rand(5000, 0.2) norm_0 = norm_dist(d0) for degree in norm_0: x_d0.append(degree) y_d0.append(norm_0[degree]) x_d1, y_d1 = [], [] d1 = digraph_rand(5000, 0.6) norm_1 = norm_dist(d1) for degree in norm_1: x_d1.append(degree) y_d1.append(norm_1[degree]) x_d2, y_d2 = [], [] d2 = digraph_rand(5000, 0.9) norm_2 = norm_dist(d2) for degree in norm_2: x_d2.append(degree) y_d2.append(norm_2[degree]) plt.loglog(x_vals, y_vals, color="cyan", linestyle='None', marker=".", markersize=6) plt.loglog(x_d0, y_d0, color="blue", linestyle='None', marker=".", markersize=6) plt.loglog(x_d1, y_d1, color="black", linestyle='None', marker=".", markersize=6) plt.loglog(x_d2, y_d2, color="red", linestyle='None', marker=".", markersize=6) plt.xlabel("Log Number of Degrees") plt.ylabel("Log Distribution") plt.title("Normalized Distribution for Random Generated Digraphs") plt.show()
def question3(subplot=None, filename=None): graph = alg_load_graph.load_graph('./data/alg_phys-cite.txt') print('avg_out_degree', avg_out_degree(graph)) dpa = algorithm_dpa(27700, 13) normed = norm(project1.in_degree_distribution(dpa)) plot(normed, 'DPA-generated', subplot, filename)
def question1(subplot=None, filename=None): graph = alg_load_graph.load_graph('./data/alg_phys-cite.txt') normed = norm(project1.in_degree_distribution(graph)) plot(normed, 'citation', subplot, filename)
import degree_distributions_for_graphs as degree_dis import alg_load_graph import matplotlib.pyplot as plt def plot_distri(digraph): """ plot log/log distribution of digraph """ distri = degree_dis.in_degree_distribution(digraph) sum_degree = float(sum(distri.values())) distriy = [x/sum_degree for x in distri.values()] distrix = distri.keys() plt.loglog(distrix, distriy, "bo") plt.xlabel("Cite Counts(log)") plt.ylabel("Indegree Frequency(log)") plt.title("Indegree Distribution for DPA Graph") plt.show() if __name__ == "__main__": citation_graph = alg_load_graph.load_graph("http://temporary-files.qiniudn.com/alg_phys-cite-1.txt") plot_distri(citation_graph)
if degree_dist.has_key(degree_value): degree_dist[degree_value] += 1 else: degree_dist[degree_value] = 1 return degree_dist ############################################################## # For Application import alg_load_graph import alg_dpa_trial CITATION_URL = "http://storage.googleapis.com/codeskulptor-alg/alg_phys-cite.txt" citation_graph = alg_load_graph.load_graph(CITATION_URL) cite_dist = in_degree_distribution(citation_graph) """ Question 3 """ all_vertex = citation_graph.keys() out_degree = 0 for dummy_vertex in citation_graph.keys(): out_degree += len(citation_graph[dummy_vertex]) ave_out = out_degree/27770.0 print ave_out