コード例 #1
0
def calculate(index):
    """
    takes a pickled graph. If it's been analyzed,
    returns the result of the analysis. Otherwise, 
    it analyzes the graph for clustering, knots,
    number of vertices and number of edges
    """
    filename = '../Graphs/' + index[6:] + '_graph.txt'
    try:
        results = load_object_from_file('../Results/' + index[6:] + '_results.txt')
        print "Loaded %s" %(index[6:])
    except IOError:
        g = load_object_from_file(filename)
        results = analyze_graph(g,index[6:])
        save_object_to_file(results, '../Results/' + index[6:] + '_results.txt')
        print "Saved %s" %(index[6:])
    except:
		print 'Something bad happened....'
    return results
コード例 #2
0
def GetResultList():
    """
    Returns a list of tuples containing the list of:
    clustering coefficients
    has_knot
    number of vertices
    number of edges
    """
    indices = load_object_from_file('../Graphs/indices.txt')
    cs = []
    k = []
    vs = []
    es = []
    for index in indices:
        try:
            result = load_object_from_file('../Results/' + index[6:] + '_results.txt')
            cs.append(result.clustering)
            k.append(result.has_knot)
            vs.append(result.vertices)
            es.append(result.edges)
        except:
            pass
    return cs, k, vs, es
コード例 #3
0
def MultiThreadit():
    resultDict = dict() #resulting super awesome dictionary
    
    def cb(IndexResults):
        """
        Puts the analysis result in the return dictionary
        """
        resultDict[IndexResults.name] = IndexResults 
                    
    indices = load_object_from_file('../Graphs/indices.txt')
    po = Pool() #pool of processes
    for index in indices: #each process takes a graph, asynchronously
        po.apply_async(calculate,(index,),callback=cb)
    po.close()
    po.join()
    print resultDict
    return resultDict
コード例 #4
0
def three_graph_subplot():
    indices = load_object_from_file('../Graphs/indices.txt')
    graphs = []
    for index in indices:
        try:
            results = load_object_from_file('../Results/' + index[6:] + '_results.txt')
            if results.vertices >= 5200:
                graph = load_object_from_file('../Graphs/' + index[6:] + '_graph.txt')
                graphs.append((index[6:], graph))
        except:
            pass
    subplot_count = 1        
    name, graph = graphs[0]
    #build list of in degree, out degree, total of each vertex
    ins, outs, totals = [], [], []
    for vertex in graph.vertices():
        deg_in = graph.in_degree(vertex)
        deg_out = graph.out_degree(vertex)
        deg_tot = deg_in + deg_out
        
        ins.append(deg_in)
        outs.append(deg_out)
        totals.append(deg_tot)
    in_out = []
    for item in zip(ins,outs):
        if item[0] < 1 or item[1] < 1:
            continue
        else:
            in_out.append(item[0]/float(item[1]))
    c = Cdf.MakeCdfFromList(in_out)
    x, y = c.Render()
    pyplot.plot(x,y,'o')
    pyplot.xscale('log')
    print numpy.mean(in_out)
    print numpy.median(in_out)
    #~ pyplot.yscale('log')
    #~ pyplot.plot(ins,outs,'o')
    #~ pyplot.xlabel('in-degree')
    #~ pyplot.ylabel('out-degree')
    #~ pyplot.xscale('log')
    #~ pyplot.yscale('log')
    #~ print correlation.Corr(ins,outs)
    #~ print correlation.SpearmanCorr(ins,outs)
    #~ xs_log0 = []
    #~ ys_log0 = []
    #~ for x in ins:
        #~ if x <= 0:
            #~ xs_log0.append(.00001)
        #~ else:
            #~ xs_log0.append(math.log(x))
    #~ for y in outs:
        #~ if y <= 0:
            #~ ys_log0.append(.00001)
        #~ else:
            #~ ys_log0.append(math.log(y))
    #~ print correlation.Corr(xs_log0,ys_log0)
    #~ print correlation.SpearmanCorr(xs_log0,ys_log0)
    #~ coefs = numpy.lib.polyfit(xs_log0, ys_log0, 1)
    #~ print coefs
    #~ fit_y0 = numpy.lib.polyval(coefs, xs_log0)
    #~ fit_y_log0 = [math.exp(1) ** f for f in fit_y0]
    #~ pyplot.plot(ins, fit_y_log0,'r--',linewidth=4)
    #create a pmf of degrees
    #~ pmf0 = Pmf.MakePmfFromList(ins)
    #~ xs0, ys0 = pmf0.Render()
    #~ 
    #~ pmf1 = Pmf.MakePmfFromList(outs)
    #~ xs1, ys1 = pmf1.Render()
    #~ 
    #~ pmf2 = Pmf.MakePmfFromList(totals)
    #~ xs2, ys2 = pmf2.Render()
    #~ 
    #~ #convert to log, so we can find line of best fit
    #~ xs_log0 = []
    #~ ys_log0 = []
    #~ for x in xs0:
        #~ if x <= 0:
            #~ xs_log0.append(.00001)
        #~ else:
            #~ xs_log0.append(math.log(x))
    #~ for y in ys0:
        #~ if y <= 0:
            #~ ys_log0.append(.00001)
        #~ else:
            #~ ys_log0.append(math.log(y))
    #~ coefs = numpy.lib.polyfit(xs_log0, ys_log0, 1)
    #~ fit_y0 = numpy.lib.polyval(coefs, xs_log0)
    #~ 
   #~ #convert to log, so we can find line of best fit
    #~ xs_log1 = []
    #~ ys_log1 = []
    #~ for x in xs1:
        #~ if x <= 0:
            #~ xs_log1.append(.00001)
        #~ else:
            #~ xs_log1.append(math.log(x))
    #~ for y in ys1:
        #~ if y <= 0:
            #~ ys_log1.append(.00001)
        #~ else:
            #~ ys_log1.append(math.log(y))
    #~ coefs = numpy.lib.polyfit(xs_log1, ys_log1, 1)
    #~ fit_y1 = numpy.lib.polyval(coefs, xs_log1)
    #~ 
   #~ #convert to log, so we can find line of best fit
    #~ xs_log2 = []
    #~ ys_log2 = []
    #~ for x in xs2:
        #~ if x <= 0:
            #~ xs_log2.append(.00001)
        #~ else:
            #~ xs_log2.append(math.log(x))
    #~ for y in ys2:
        #~ if y <= 0:
            #~ ys_log2.append(.00001)
        #~ else:
            #~ ys_log2.append(math.log(y))
    #~ coefs = numpy.lib.polyfit(xs_log2, ys_log2, 1)
    #~ fit_y2 = numpy.lib.polyval(coefs, xs_log2)
    
    #transform fit line, to plot it on log-log scale
    #~ pyplot.subplot(1,3,1)
    #~ fit_y_log0 = [math.exp(1) ** f for f in fit_y0]
    #~ pyplot.plot(xs0, ys0, 'o')
    #~ pyplot.plot(xs0, fit_y_log0,'r--',linewidth=4)
    #~ pyplot.xscale('log')
    #~ pyplot.yscale('log')
    #~ pyplot.ylabel('P(k)',fontsize=25)
    #~ 
    #~ pyplot.subplot(1,3,2)
    #~ fit_y_log1 = [math.exp(1) ** f for f in fit_y1]
    #~ pyplot.plot(xs1, ys1, 'o')
    #~ pyplot.plot(xs1, fit_y_log1,'r--',linewidth=4)
    #~ pyplot.xscale('log')
    #~ pyplot.yscale('log')
    #~ pyplot.xlabel('k',fontsize=25)
    #~ 
    #~ pyplot.subplot(1,3,3)
    #~ fit_y_log2 = [math.exp(1) ** f for f in fit_y2]
    #~ pyplot.plot(xs2, ys2, 'o')
    #~ pyplot.plot(xs2, fit_y_log2,'r--',linewidth=4)
    #~ pyplot.xscale('log')
    #~ pyplot.yscale('log')
    
    #~ if subplot_count == 1:
        #~ pyplot.ylabel('P(k)',fontsize=25)
    #~ if subplot_count == 2:
        #~ pyplot.xlabel('k',fontsize=25)


    #pyplot.show()
    #~ title = 'total_degree_' + name
    #~ subplot_count += 1
    pyplot.show()
コード例 #5
0
def compare_wikipedia_to_ba():
    """
    For wikipedia articles of at least 4000 vertices, builds a graph of
    k vs P(k), and finds line of best fit. Also builds this same graph
    for a Barabasi Albert graph on the same number of vertices.
    """
    indices = load_object_from_file('../Graphs/indices.txt')
    for index in indices:
        try:
            results = load_object_from_file('../Results/' + index[6:] + '_results.txt')
            if results.vertices >= 3700:
                #~ print index[6:]
                graph = load_object_from_file('../Graphs/' + index[6:] + '_graph.txt')
                graphs.append((index[6:], graph))
        except:
            pass
           
    for name, graph in graphs[:3]:
        #build list of in degree, out degree, total of each vertex
        ins, outs, totals = [], [], []
        for vertex in graph.vertices():
            deg_in = graph.in_degree(vertex)
            deg_out = graph.out_degree(vertex)
            deg_tot = deg_in + deg_out
            
            ins.append(deg_in)
            outs.append(deg_out)
            totals.append(deg_tot)
    
        #create a pmf of degrees
        pmf = Pmf.MakePmfFromList(outs)
        xs, ys = pmf.Render()
        
        #convert to log, so we can find line of best fit
        xs_log = []
        ys_log = []
        for x in xs:
            if x <= 0:
                xs_log.append(.00001)
            else:
                xs_log.append(math.log(x))
        for y in ys:
            if y <= 0:
                ys_log.append(.00001)
            else:
                ys_log.append(math.log(y))
        coefs = numpy.lib.polyfit(xs_log, ys_log, 1)
        fit_y = numpy.lib.polyval(coefs, xs_log)
        print coefs
        
        #transform fit line, to plot it on log-log scale
        fit_y_log = [math.exp(1) ** f for f in fit_y]
        pyplot.clf()
        pyplot.plot(xs, ys, 'o')
        pyplot.plot(xs, fit_y_log,'r--',linewidth=4)
        pyplot.xscale('log')
        pyplot.yscale('log')
        pyplot.xlabel('k')
        pyplot.ylabel('P(k)')

        #pyplot.show()
        title = 'out_degree_' + name
        pyplot.savefig(title)
        
        pyplot.clf()

        #BA graph w/ same # of vs; show that coefficient is the same
        vs = graph.vertices()
        bag = BADirectedGraph(5)
        bag.build_graph(len(vs)-5)
        ins, outs, totals = [], [], []
        for vertex in bag.vertices():
            deg_in = bag.in_degree(vertex)
            deg_out = bag.out_degree(vertex)
            deg_tot = deg_in + deg_out
            
            ins.append(deg_in)
            outs.append(deg_out)
            totals.append(deg_tot)
        pmf = Pmf.MakePmfFromList(outs)
        xs, ys = pmf.Render()
        
        #convert to log, so we can find line of best fit
        xs_log = []
        ys_log = []
        for x in xs:
            if x <= 0:
                xs_log.append(.00001)
            else:
                xs_log.append(math.log(x))
        for y in ys:
            if y <= 0:
                ys_log.append(.00001)
            else:
                ys_log.append(math.log(y))
        coefs = numpy.lib.polyfit(xs_log, ys_log, 1)
        fit_y = numpy.lib.polyval(coefs, xs_log)
        print coefs
        
        #transform fit line, to plot it on log-log scale
        fit_y_log = [math.exp(1) ** f for f in fit_y]
        
        pyplot.plot(xs, ys, 'o')
        pyplot.plot(xs, fit_y_log,'r--',linewidth=4)
        pyplot.xscale('log')
        pyplot.yscale('log')
        pyplot.xlabel('k')
        pyplot.ylabel('P(k)')

        pyplot.show()
        title = 'out_degree_BA_on_' + str(len(vs)) +'_vertices'
        pyplot.savefig(title)