def calculate(index): """ takes a pickled graph. If it's been analyzed, returns the result of the analysis. Otherwise, it analyzes the graph for clustering, knots, number of vertices and number of edges """ filename = '../Graphs/' + index[6:] + '_graph.txt' try: results = load_object_from_file('../Results/' + index[6:] + '_results.txt') print "Loaded %s" %(index[6:]) except IOError: g = load_object_from_file(filename) results = analyze_graph(g,index[6:]) save_object_to_file(results, '../Results/' + index[6:] + '_results.txt') print "Saved %s" %(index[6:]) except: print 'Something bad happened....' return results
def GetResultList(): """ Returns a list of tuples containing the list of: clustering coefficients has_knot number of vertices number of edges """ indices = load_object_from_file('../Graphs/indices.txt') cs = [] k = [] vs = [] es = [] for index in indices: try: result = load_object_from_file('../Results/' + index[6:] + '_results.txt') cs.append(result.clustering) k.append(result.has_knot) vs.append(result.vertices) es.append(result.edges) except: pass return cs, k, vs, es
def MultiThreadit(): resultDict = dict() #resulting super awesome dictionary def cb(IndexResults): """ Puts the analysis result in the return dictionary """ resultDict[IndexResults.name] = IndexResults indices = load_object_from_file('../Graphs/indices.txt') po = Pool() #pool of processes for index in indices: #each process takes a graph, asynchronously po.apply_async(calculate,(index,),callback=cb) po.close() po.join() print resultDict return resultDict
def three_graph_subplot(): indices = load_object_from_file('../Graphs/indices.txt') graphs = [] for index in indices: try: results = load_object_from_file('../Results/' + index[6:] + '_results.txt') if results.vertices >= 5200: graph = load_object_from_file('../Graphs/' + index[6:] + '_graph.txt') graphs.append((index[6:], graph)) except: pass subplot_count = 1 name, graph = graphs[0] #build list of in degree, out degree, total of each vertex ins, outs, totals = [], [], [] for vertex in graph.vertices(): deg_in = graph.in_degree(vertex) deg_out = graph.out_degree(vertex) deg_tot = deg_in + deg_out ins.append(deg_in) outs.append(deg_out) totals.append(deg_tot) in_out = [] for item in zip(ins,outs): if item[0] < 1 or item[1] < 1: continue else: in_out.append(item[0]/float(item[1])) c = Cdf.MakeCdfFromList(in_out) x, y = c.Render() pyplot.plot(x,y,'o') pyplot.xscale('log') print numpy.mean(in_out) print numpy.median(in_out) #~ pyplot.yscale('log') #~ pyplot.plot(ins,outs,'o') #~ pyplot.xlabel('in-degree') #~ pyplot.ylabel('out-degree') #~ pyplot.xscale('log') #~ pyplot.yscale('log') #~ print correlation.Corr(ins,outs) #~ print correlation.SpearmanCorr(ins,outs) #~ xs_log0 = [] #~ ys_log0 = [] #~ for x in ins: #~ if x <= 0: #~ xs_log0.append(.00001) #~ else: #~ xs_log0.append(math.log(x)) #~ for y in outs: #~ if y <= 0: #~ ys_log0.append(.00001) #~ else: #~ ys_log0.append(math.log(y)) #~ print correlation.Corr(xs_log0,ys_log0) #~ print correlation.SpearmanCorr(xs_log0,ys_log0) #~ coefs = numpy.lib.polyfit(xs_log0, ys_log0, 1) #~ print coefs #~ fit_y0 = numpy.lib.polyval(coefs, xs_log0) #~ fit_y_log0 = [math.exp(1) ** f for f in fit_y0] #~ pyplot.plot(ins, fit_y_log0,'r--',linewidth=4) #create a pmf of degrees #~ pmf0 = Pmf.MakePmfFromList(ins) #~ xs0, ys0 = pmf0.Render() #~ #~ pmf1 = Pmf.MakePmfFromList(outs) #~ xs1, ys1 = pmf1.Render() #~ #~ pmf2 = Pmf.MakePmfFromList(totals) #~ xs2, ys2 = pmf2.Render() #~ #~ #convert to log, so we can find line of best fit #~ xs_log0 = [] #~ ys_log0 = [] #~ for x in xs0: #~ if x <= 0: #~ xs_log0.append(.00001) #~ else: #~ xs_log0.append(math.log(x)) #~ for y in ys0: #~ if y <= 0: #~ ys_log0.append(.00001) #~ else: #~ ys_log0.append(math.log(y)) #~ coefs = numpy.lib.polyfit(xs_log0, ys_log0, 1) #~ fit_y0 = numpy.lib.polyval(coefs, xs_log0) #~ #~ #convert to log, so we can find line of best fit #~ xs_log1 = [] #~ ys_log1 = [] #~ for x in xs1: #~ if x <= 0: #~ xs_log1.append(.00001) #~ else: #~ xs_log1.append(math.log(x)) #~ for y in ys1: #~ if y <= 0: #~ ys_log1.append(.00001) #~ else: #~ ys_log1.append(math.log(y)) #~ coefs = numpy.lib.polyfit(xs_log1, ys_log1, 1) #~ fit_y1 = numpy.lib.polyval(coefs, xs_log1) #~ #~ #convert to log, so we can find line of best fit #~ xs_log2 = [] #~ ys_log2 = [] #~ for x in xs2: #~ if x <= 0: #~ xs_log2.append(.00001) #~ else: #~ xs_log2.append(math.log(x)) #~ for y in ys2: #~ if y <= 0: #~ ys_log2.append(.00001) #~ else: #~ ys_log2.append(math.log(y)) #~ coefs = numpy.lib.polyfit(xs_log2, ys_log2, 1) #~ fit_y2 = numpy.lib.polyval(coefs, xs_log2) #transform fit line, to plot it on log-log scale #~ pyplot.subplot(1,3,1) #~ fit_y_log0 = [math.exp(1) ** f for f in fit_y0] #~ pyplot.plot(xs0, ys0, 'o') #~ pyplot.plot(xs0, fit_y_log0,'r--',linewidth=4) #~ pyplot.xscale('log') #~ pyplot.yscale('log') #~ pyplot.ylabel('P(k)',fontsize=25) #~ #~ pyplot.subplot(1,3,2) #~ fit_y_log1 = [math.exp(1) ** f for f in fit_y1] #~ pyplot.plot(xs1, ys1, 'o') #~ pyplot.plot(xs1, fit_y_log1,'r--',linewidth=4) #~ pyplot.xscale('log') #~ pyplot.yscale('log') #~ pyplot.xlabel('k',fontsize=25) #~ #~ pyplot.subplot(1,3,3) #~ fit_y_log2 = [math.exp(1) ** f for f in fit_y2] #~ pyplot.plot(xs2, ys2, 'o') #~ pyplot.plot(xs2, fit_y_log2,'r--',linewidth=4) #~ pyplot.xscale('log') #~ pyplot.yscale('log') #~ if subplot_count == 1: #~ pyplot.ylabel('P(k)',fontsize=25) #~ if subplot_count == 2: #~ pyplot.xlabel('k',fontsize=25) #pyplot.show() #~ title = 'total_degree_' + name #~ subplot_count += 1 pyplot.show()
def compare_wikipedia_to_ba(): """ For wikipedia articles of at least 4000 vertices, builds a graph of k vs P(k), and finds line of best fit. Also builds this same graph for a Barabasi Albert graph on the same number of vertices. """ indices = load_object_from_file('../Graphs/indices.txt') for index in indices: try: results = load_object_from_file('../Results/' + index[6:] + '_results.txt') if results.vertices >= 3700: #~ print index[6:] graph = load_object_from_file('../Graphs/' + index[6:] + '_graph.txt') graphs.append((index[6:], graph)) except: pass for name, graph in graphs[:3]: #build list of in degree, out degree, total of each vertex ins, outs, totals = [], [], [] for vertex in graph.vertices(): deg_in = graph.in_degree(vertex) deg_out = graph.out_degree(vertex) deg_tot = deg_in + deg_out ins.append(deg_in) outs.append(deg_out) totals.append(deg_tot) #create a pmf of degrees pmf = Pmf.MakePmfFromList(outs) xs, ys = pmf.Render() #convert to log, so we can find line of best fit xs_log = [] ys_log = [] for x in xs: if x <= 0: xs_log.append(.00001) else: xs_log.append(math.log(x)) for y in ys: if y <= 0: ys_log.append(.00001) else: ys_log.append(math.log(y)) coefs = numpy.lib.polyfit(xs_log, ys_log, 1) fit_y = numpy.lib.polyval(coefs, xs_log) print coefs #transform fit line, to plot it on log-log scale fit_y_log = [math.exp(1) ** f for f in fit_y] pyplot.clf() pyplot.plot(xs, ys, 'o') pyplot.plot(xs, fit_y_log,'r--',linewidth=4) pyplot.xscale('log') pyplot.yscale('log') pyplot.xlabel('k') pyplot.ylabel('P(k)') #pyplot.show() title = 'out_degree_' + name pyplot.savefig(title) pyplot.clf() #BA graph w/ same # of vs; show that coefficient is the same vs = graph.vertices() bag = BADirectedGraph(5) bag.build_graph(len(vs)-5) ins, outs, totals = [], [], [] for vertex in bag.vertices(): deg_in = bag.in_degree(vertex) deg_out = bag.out_degree(vertex) deg_tot = deg_in + deg_out ins.append(deg_in) outs.append(deg_out) totals.append(deg_tot) pmf = Pmf.MakePmfFromList(outs) xs, ys = pmf.Render() #convert to log, so we can find line of best fit xs_log = [] ys_log = [] for x in xs: if x <= 0: xs_log.append(.00001) else: xs_log.append(math.log(x)) for y in ys: if y <= 0: ys_log.append(.00001) else: ys_log.append(math.log(y)) coefs = numpy.lib.polyfit(xs_log, ys_log, 1) fit_y = numpy.lib.polyval(coefs, xs_log) print coefs #transform fit line, to plot it on log-log scale fit_y_log = [math.exp(1) ** f for f in fit_y] pyplot.plot(xs, ys, 'o') pyplot.plot(xs, fit_y_log,'r--',linewidth=4) pyplot.xscale('log') pyplot.yscale('log') pyplot.xlabel('k') pyplot.ylabel('P(k)') pyplot.show() title = 'out_degree_BA_on_' + str(len(vs)) +'_vertices' pyplot.savefig(title)