def run():
    citation_graph = alg_load_graph.load_graph(graph_url)
    in_deg_graph = in_degree_distribution(citation_graph)
    print in_deg_graph
    in_deg_norm = normalize_in_deg_dist(in_deg_graph, len(citation_graph))
    print in_deg_norm
    draw_loglog_plot(in_deg_norm)
def run():
    citation_graph = alg_load_graph.load_graph(graph_url)
    in_deg_graph = in_degree_distribution(citation_graph)
    print in_deg_graph
    in_deg_norm = normalize_in_deg_dist(in_deg_graph, len(citation_graph))
    print in_deg_norm
    draw_loglog_plot(in_deg_norm)
Example #3
0
def question_35():
    citation_graph = alg_load_graph.load_graph(CITATION_URL)

    out_degrees = 0
    for node in citation_graph:
        out_degrees += len(citation_graph[node])

    x_vals, y_vals = [], []
    norm = norm_dist(citation_graph)
    for degree in norm:
        x_vals.append(degree)
        y_vals.append(norm[degree])

    target_node = 27770
    step_node = 13

    dpa_graph = dpa_digraph_gen(target_node, step_node)

    x_dpavals, y_dpavals = [], []
    norm_dpa = norm_dist(dpa_graph)
    for degree in norm_dpa:
        x_dpavals.append(degree)
        y_dpavals.append(norm_dpa[degree])

    plt.loglog(x_vals,
               y_vals,
               color="cyan",
               linestyle='None',
               marker=".",
               markersize=6)
    plt.loglog(x_dpavals,
               y_dpavals,
               color="black",
               linestyle='None',
               marker=".",
               markersize=6)
    plt.xlabel("Log Number of Degrees")
    plt.ylabel("Log Distribution")
    plt.title("Normalized Distribution of High Energy Physics Theory Papers")
    plt.show()


#question_35()
Example #4
0
def Question_1():
    '''
    Your task for this question is to compute the in-degree distribution for this 
    citation graph. Once you have computed this distribution, you should normalize 
    the distribution (make the values in the dictionary sum to one) and then 
    compute a log/log plot of the points in this normalized distribution.
    '''
    # load data, calculate normalised in-degree distribution
    import alg_load_graph
    citation_graph = alg_load_graph.load_graph(alg_load_graph.CITATION_URL)
    distribution = project1.in_degree_distribution(citation_graph)
    factor = 1.0 / sum(distribution.itervalues())
    normalised_distribution = {key : value * factor for key, value 
                                 in distribution.iteritems()}
    
    # plot
    loglog_plot_dictionary(normalised_distribution, 'Number of Citations', 
                           'Normalized Citation Frequency', 
                           'Log/Log Plot of In-Degree Distribution of Citation Graph\n')
    
    return
def q1():
	citation_graph = loader.load_graph(loader.CITATION_URL)
	#citation_graph = helper.EX_GRAPH1

	print 'start calculating degree distribution'
	degree_distribution = helper.in_degree_distribution(citation_graph)
	print 'finish calculating degree distribution'

	print 'start normalizing'
	degree_distribution = normalize(degree_distribution)
	print 'finish normalizing'

	print degree_distribution

	print 'start plotting'
	plt.yscale('log')
	plt.xscale('log')
 	plt.xlabel('degree')
 	plt.ylabel('distribution')
	plt.plot(degree_distribution.keys(), degree_distribution.values(),'ro') 
	plt.show()
Example #6
0
def question_1():
    citation_graph = alg_load_graph.load_graph(CITATION_URL)

    out_degrees = 0
    for node in citation_graph:
        out_degrees += len(citation_graph[node])

    x_vals, y_vals = [], []
    norm = norm_dist(citation_graph)
    for degree in norm:
        x_vals.append(degree)
        y_vals.append(norm[degree])

    plt.loglog(x_vals,
               y_vals,
               color="blue",
               linestyle='None',
               marker=".",
               markersize=6)
    plt.xlabel("Log Number of Degrees")
    plt.ylabel("Log Distribution")
    plt.title("Normalized Distribution of High Energy Physics Theory Papers")
    plt.show()
def q3():
 	
	citation_graph = loader.load_graph(loader.CITATION_URL)
	n = len(citation_graph)
	m = average_out_degree(citation_graph)
	print n, m
Example #8
0
        #create a set of all unique degree values
        degree_distribution_dict = dict(Counter(in_degree_dict.values()))
        num_nodes = len(in_degree_dict)
        for key in degree_distribution_dict.keys():
            degree_distribution_dict[key] = float(degree_distribution_dict[key])/float(num_nodes)
        return degree_distribution_dict
        #create a dictionary from the degree set initialized with 0 values
        #for node in in_degree_dict.keys():
            #deg_distribution_dict[in_degree_dict[node]] += 1
            #calculating the number of nodes having same degree
        #return deg_distribution_dict
    return {}
    

#loading citation graph from url
citation_graph = alg_load_graph.load_graph(alg_load_graph.CITATION_URL)


def build_plot():
    """
    Build log log plot of normalized indegree distribution
    """
    #calculate in degree distribution (normalized) for citation graph
    distribution_dict = in_degree_distribution(citation_graph)
    #create the log/log plot of normalized indegree distribution
    plot = []
    for input_val in distribution_dict.keys():
        if input_val != 0:
            plot.append([math.log(input_val), math.log(distribution_dict[input_val])])
    return plot
def run_example():
    """
    Computing the popular nodes for an example graph
    """
    my_graph = alg_load_graph.load_graph(GRAPH100_URL)
    print "There are", len(find_popular_nodes(my_graph)), "popular nodes"
Example #10
0
def question3(subplot=None, filename=None):
    graph = alg_load_graph.load_graph('alg_phys-cite.txt')
    print('avg_out_degree', avg_out_degree(graph))
    dpa = algorithm_dpa(27700, 13)
    normed = norm(project1.in_degree_distribution(dpa))
    plot(normed, 'DPA-generated', subplot, filename)
Example #11
0
def question1(subplot=None, filename=None):
    graph = alg_load_graph.load_graph('alg_phys-cite.txt')
    normed = norm(project1.in_degree_distribution(graph))
    plot(normed, 'citation', subplot, filename)
Example #12
0
def question_2():
    citation_graph = alg_load_graph.load_graph(CITATION_URL)

    out_degrees = 0
    for node in citation_graph:
        out_degrees += len(citation_graph[node])

    x_vals, y_vals = [], []
    norm = norm_dist(citation_graph)
    for degree in norm:
        x_vals.append(degree)
        y_vals.append(norm[degree])

    x_d0, y_d0 = [], []
    d0 = digraph_rand(5000, 0.2)
    norm_0 = norm_dist(d0)
    for degree in norm_0:
        x_d0.append(degree)
        y_d0.append(norm_0[degree])

    x_d1, y_d1 = [], []
    d1 = digraph_rand(5000, 0.6)
    norm_1 = norm_dist(d1)
    for degree in norm_1:
        x_d1.append(degree)
        y_d1.append(norm_1[degree])

    x_d2, y_d2 = [], []
    d2 = digraph_rand(5000, 0.9)
    norm_2 = norm_dist(d2)
    for degree in norm_2:
        x_d2.append(degree)
        y_d2.append(norm_2[degree])

    plt.loglog(x_vals,
               y_vals,
               color="cyan",
               linestyle='None',
               marker=".",
               markersize=6)
    plt.loglog(x_d0,
               y_d0,
               color="blue",
               linestyle='None',
               marker=".",
               markersize=6)
    plt.loglog(x_d1,
               y_d1,
               color="black",
               linestyle='None',
               marker=".",
               markersize=6)
    plt.loglog(x_d2,
               y_d2,
               color="red",
               linestyle='None',
               marker=".",
               markersize=6)
    plt.xlabel("Log Number of Degrees")
    plt.ylabel("Log Distribution")
    plt.title("Normalized Distribution for Random Generated Digraphs")
    plt.show()
def question3(subplot=None, filename=None):
    graph = alg_load_graph.load_graph('./data/alg_phys-cite.txt')
    print('avg_out_degree', avg_out_degree(graph))
    dpa = algorithm_dpa(27700, 13)
    normed = norm(project1.in_degree_distribution(dpa))
    plot(normed, 'DPA-generated', subplot, filename)
def question1(subplot=None, filename=None):
    graph = alg_load_graph.load_graph('./data/alg_phys-cite.txt')
    normed = norm(project1.in_degree_distribution(graph))
    plot(normed, 'citation', subplot, filename)
Example #15
0
import degree_distributions_for_graphs as degree_dis
import alg_load_graph
import matplotlib.pyplot as plt

def plot_distri(digraph):
    """
    plot log/log distribution of digraph
    """
    distri = degree_dis.in_degree_distribution(digraph)
    sum_degree = float(sum(distri.values()))
    distriy = [x/sum_degree for x in distri.values()]
    distrix = distri.keys()
    plt.loglog(distrix, distriy, "bo")
    plt.xlabel("Cite Counts(log)")
    plt.ylabel("Indegree Frequency(log)")
    plt.title("Indegree Distribution for DPA Graph")
    plt.show()

if __name__ == "__main__":
    citation_graph = alg_load_graph.load_graph("http://temporary-files.qiniudn.com/alg_phys-cite-1.txt")
    plot_distri(citation_graph)
        if degree_dist.has_key(degree_value):
            degree_dist[degree_value] += 1
        else:
            degree_dist[degree_value] = 1
            
    return degree_dist
    
    
##############################################################
# For Application

import alg_load_graph
import alg_dpa_trial

CITATION_URL = "http://storage.googleapis.com/codeskulptor-alg/alg_phys-cite.txt"
citation_graph = alg_load_graph.load_graph(CITATION_URL)

cite_dist = in_degree_distribution(citation_graph)

"""
Question 3
"""
all_vertex = citation_graph.keys()
out_degree = 0
for dummy_vertex in citation_graph.keys():
    out_degree += len(citation_graph[dummy_vertex])

ave_out = out_degree/27770.0    
print ave_out

def run_example():
    """
    Computing the popular nodes for an example graph
    """
    my_graph = alg_load_graph.load_graph(GRAPH100_URL)
    print "There are", len(find_popular_nodes(my_graph)), "popular nodes"