Beispiel #1
0
def not_enough_RAM(filename,ram_used):
    page_size = profile.vmB('VmExe:')/1024
    architecture_size = int(platform.architecture()[0].split('bit')[0])
    #The size of a python graph object is roughly 4 times its size in a file.
    file_size = (profile.filesize(filename)*4)/1024
    available_ram = TOTAL_RAM/1024 - ram_used
    available_vm = (available_ram/page_size)*architecture_size
    return available_vm < file_size
increment = 10000
start = 1000

profile.start_clock()
file = open('csv/sparse_graphs.csv', 'w')
file.write("Nodes GenerateTime SaveTime FileSize\n")
#The +1 on the max size is just to be sure we include the max size in our range.
for i in range(start,max_size+1,increment):
    edge_probability = 1.0/i;
    scaling = 10
    #I only want my graph to be dense with a small probability (average density = 0.03125*#nodes)
    G = zen.generating.rgm.erdos_renyi(i,edge_probability*scaling)
    G.compact()
    filename = 'sparse' + str(i) + ".graph"
    
    #Profiling generation time
    difftime = profile.get_time_from_clock()
    gentime = str(difftime.seconds) + "." + str(difftime.microseconds/1000)
    print "Graph " + filename + " has taken " + gentime + " to generate."
    
    #Saving the generated graph
    edgelist.write(G,'storage/edgelist/sparse/' + filename)
    filesize = profile.filesize('storage/edgelist/sparse/' + filename)
    filesize = filesize/1024
    
    #Profiling IO time
    difftime = profile.get_time_from_clock()
    savetime = str(difftime.seconds) + "." + str(difftime.microseconds/1000)
    print "Graph " + filename + " (" + str(filesize) + "kB), has taken " + savetime + " seconds to save on disk."
    file.write(str(i) + " " + gentime + " " + savetime + " " + str(filesize) + "\n")
profile.start_clock()
file = open('csv/random_graphs.csv', 'w')
file.write("Nodes GenerateTime SaveTime FileSize\n")
#The +1 on the max size is just to be sure we include the max size in our range.
for i in range(start,max_size+1,increment):
    edge_probability_1 = random.uniform(0,1.0)
    edge_probability_2 = random.uniform(0,1.0)
    edge_probability_3 = random.uniform(0,1.0)
    edge_probability_4 = random.uniform(0,1.0)
    edge_probability_5 = random.uniform(0,1.0)
    #I only want my graph to be dense with a small probability (average density = 0.03125*#nodes)
    G = zen.generating.rgm.erdos_renyi(i,edge_probability_1*edge_probability_2*edge_probability_3*edge_probability_4*edge_probability_5 * 0.1)
    G.compact()
    filename = 'random' + str(i) + ".graph"
    
    #Profiling generation time
    difftime = profile.get_time_from_clock()
    gentime = str(difftime.seconds) + "." + str(difftime.microseconds/1000)
    print "Graph " + filename + " has taken " + gentime + " to generate."
    
    #Saving the generated graph
    edgelist.write(G,'storage/edgelist/random/' + filename)
    filesize = profile.filesize('storage/edgelist/random/' + filename)
    filesize = filesize/1024
    
    #Profiling IO time
    difftime = profile.get_time_from_clock()
    savetime = str(difftime.seconds) + "." + str(difftime.microseconds/1000)
    print "Graph " + filename + " (" + str(filesize) + "kB), has taken " + savetime + " seconds to save on disk."
    file.write(str(i) + " " + gentime + " " + savetime + " " + str(filesize) + "\n")
    
    #Define edges for each node
    for j in range(i):
        k = 0
        while k < max_degree:
            x = random.randint(1,max_degree)
            x = round(random.uniform(0,1)*random.uniform(0,1)*x)
            other_node = (j+x) % i
            if not G.has_edge(j,other_node):
                G.add_edge(j,other_node)
            k+=1
    G.compact()
    filename = 'metric' + str(i) + ".graph"
    
    #Profiling generation time
    difftime = profile.get_time_from_clock()
    gentime = str(difftime.seconds) + "." + str(difftime.microseconds/1000)
    print "Graph " + filename + " has taken " + gentime + " to generate."
    
    #Saving the generated graph
    edgelist.write(G,'storage/edgelist/metric/' + filename)
    filesize = profile.filesize('storage/edgelist/metric/' + filename)
    filesize = filesize/1024
    
    #Profiling IO time
    difftime = profile.get_time_from_clock()
    savetime = str(difftime.seconds) + "." + str(difftime.microseconds/1000)
    print "Graph " + filename + " (" + str(filesize) + "kB), has taken " + savetime + " seconds to save on disk."
    file.write(str(i) + " " + gentime + " " + savetime + " " + str(filesize) + "\n")

Beispiel #5
0
def profile_graph(type):
    global max_size,increment
    print 'Profiling ' + type + " graphs!"
    file = open('csv/' + type + '_graphs_profile.csv', 'w')
    file.write("Nodes FileSize LoadTime VM RAM SP NCC LCC GCC MST\n")
    profile.start_clock()
    for i in range(increment,max_size+1,increment):
        #We want to profile the time taken to load each graph into memory for each category. 
        #We use manual garbage collection to make sure we are only keeping the minimum number of 
        #objects within memory
        gc.collect()
        
        #Load the graph from memory
        filename = type + str(i) + ".graph"
        filesize = profile.filesize("storage/"+ type + "/" + filename)/1024
        
        #The operating system will kill the profiling process if there is not enough ram to fit the VM
        #requirements to store the graph
        if not_enough_RAM("storage/"+ type + "/" + filename,ram_zen_python):
            print 'Graph is too big to be loaded in virtual memory, continuing to next graph...'
            file.write(str(i) + " " + str(filesize) + " 0 0 0 0 0 0 0 0\n")
            continue
        profile.start_clock()
        G = memlist.read("storage/" + type + "/" + filename)
        
        difftime = profile.get_time_from_clock()
        loadtime = str(difftime.seconds) + "." + str(difftime.microseconds/1000)
        vm_graph = round(profile.memory()/1024)
        ram_graph = round(profile.resident()/1024)
        #Using pickle measures the byte size of the
        
        print "Graph " + filename + " has taken " + loadtime + " to load. The graph is using " + str(vm_graph) + "kB of VM and " + str(ram_graph) + "kB of RAM"
        
        #Creating a list of lists
        sample = 20
        list = [0] * sample
        
        #Execute a few shortest paths and take the maximum value as a reference.
        for j in range(sample):
            index = random.randint(0,i)
            #source = G.node_object(index)
            #zen.algorithms.shortest_path.single_source_shortest_path(G, index)
            #zen.algorithms.shortest_path.dijkstra_path(G,index)
            list[j] = profile.get_time_from_clock()
        difftime = max(list)
        shortestpathtime = str(difftime.seconds) + "." + str(difftime.microseconds/1000)
        
        #Execute a few clustering computations and take the maximum value as a reference.
        #zen.algorithms.clustering.ncc(G)
        difftime = profile.get_time_from_clock()
        ncctime = str(difftime.seconds) + "." + str(difftime.microseconds/1000)
        
        #zen.algorithms.clustering.lcc(G)
        difftime = profile.get_time_from_clock()
        lcctime = str(difftime.seconds) + "." + str(difftime.microseconds/1000)
        
        #zen.algorithms.clustering.gcc(G)
        difftime = profile.get_time_from_clock()
        gcctime = str(difftime.seconds) + "." + str(difftime.microseconds/1000)
        
        #zen.algorithms.spanning.minimum_spanning_tree(G)
        difftime = profile.get_time_from_clock()    
        msttime = str(difftime.seconds) + "." + str(difftime.microseconds/1000)
        
        print "Time for queries : SP=" + shortestpathtime + "seconds, NCC=" + ncctime + "seconds, LCC=" + lcctime + "seconds, GCC=" + gcctime + "seconds, MST=" + msttime
        file.write(str(i) + " " + str(filesize) + " " + loadtime + " " + str(vm_graph) + " " + str(ram_graph) + " " + shortestpathtime + " " + ncctime + " " + lcctime + " " + gcctime + " " + msttime + "\n")
max_size = 41000
increment = 10000
start = 1000

profile.start_clock()
file = open("csv/barabasi_graphs.csv", "w")
file.write("GraphName GenerateTime SaveTime FileSize\n")
# The +1 on the max size is just to be sure we include the max size in our range.
for i in range(start, max_size + 1, increment):
    # Defines the maximum degree per node
    G = zen.generating.barabasi_albert(i, 1)
    G.compact()
    filename = "barabasi" + str(i) + ".graph"

    # Profiling generation time
    difftime = profile.get_time_from_clock()
    gentime = str(difftime.seconds) + "." + str(difftime.microseconds / 1000)
    print "Graph " + filename + " has taken " + gentime + " to generate."

    # Saving the generated graph
    edgelist.write(G, "storage/edgelist/barabasi/" + filename)
    filesize = profile.filesize("storage/edgelist/barabasi/" + filename)
    filesize = filesize / 1024

    # Profiling IO time
    difftime = profile.get_time_from_clock()
    savetime = str(difftime.seconds) + "." + str(difftime.microseconds / 1000)
    print "Graph " + filename + " (" + str(filesize) + "kB), has taken " + savetime + " seconds to save on disk."
    file.write(filename + " " + gentime + " " + savetime + " " + str(filesize) + "\n")
profile.start_clock()
file = open("csv/dense_graphs.csv", "w")
file.write("Nodes GenerateTime SaveTime FileSize\n")
# The +1 on the max size is just to be sure we include the max size in our range.
for i in range(start, max_size + 1, increment):
    edge_probability = random.uniform(0, 1.0)
    scaling = 0.05
    base_value = 0.05
    # Graphs of this category are potentially very big (average node degree = 0.0725*#nodes), which means the 20,000 node graph may have up to 4,000,000 edges.
    # which would be a graph of size 600MB.
    G = zen.generating.rgm.erdos_renyi(i, edge_probability * scaling + base_value)
    G.compact()
    filename = "dense" + str(i) + ".graph"

    # Profiling generation time
    difftime = profile.get_time_from_clock()
    gentime = str(difftime.seconds) + "." + str(difftime.microseconds / 1000)
    print "Graph " + filename + " has taken " + gentime + " to generate."

    # Saving the generated graph
    edgelist.write(G, "storage/edgelist/dense/" + filename)
    filesize = profile.filesize("storage/edgelist/dense/" + filename)
    filesize = filesize / 1024

    # Profiling IO time
    difftime = profile.get_time_from_clock()
    savetime = str(difftime.seconds) + "." + str(difftime.microseconds / 1000)
    print "Graph " + filename + " (" + str(filesize) + "kB), has taken " + savetime + " seconds to save on disk."
    file.write(str(i) + " " + gentime + " " + savetime + " " + str(filesize) + "\n")