def not_enough_RAM(filename,ram_used): page_size = profile.vmB('VmExe:')/1024 architecture_size = int(platform.architecture()[0].split('bit')[0]) #The size of a python graph object is roughly 4 times its size in a file. file_size = (profile.filesize(filename)*4)/1024 available_ram = TOTAL_RAM/1024 - ram_used available_vm = (available_ram/page_size)*architecture_size return available_vm < file_size
increment = 10000 start = 1000 profile.start_clock() file = open('csv/sparse_graphs.csv', 'w') file.write("Nodes GenerateTime SaveTime FileSize\n") #The +1 on the max size is just to be sure we include the max size in our range. for i in range(start,max_size+1,increment): edge_probability = 1.0/i; scaling = 10 #I only want my graph to be dense with a small probability (average density = 0.03125*#nodes) G = zen.generating.rgm.erdos_renyi(i,edge_probability*scaling) G.compact() filename = 'sparse' + str(i) + ".graph" #Profiling generation time difftime = profile.get_time_from_clock() gentime = str(difftime.seconds) + "." + str(difftime.microseconds/1000) print "Graph " + filename + " has taken " + gentime + " to generate." #Saving the generated graph edgelist.write(G,'storage/edgelist/sparse/' + filename) filesize = profile.filesize('storage/edgelist/sparse/' + filename) filesize = filesize/1024 #Profiling IO time difftime = profile.get_time_from_clock() savetime = str(difftime.seconds) + "." + str(difftime.microseconds/1000) print "Graph " + filename + " (" + str(filesize) + "kB), has taken " + savetime + " seconds to save on disk." file.write(str(i) + " " + gentime + " " + savetime + " " + str(filesize) + "\n")
profile.start_clock() file = open('csv/random_graphs.csv', 'w') file.write("Nodes GenerateTime SaveTime FileSize\n") #The +1 on the max size is just to be sure we include the max size in our range. for i in range(start,max_size+1,increment): edge_probability_1 = random.uniform(0,1.0) edge_probability_2 = random.uniform(0,1.0) edge_probability_3 = random.uniform(0,1.0) edge_probability_4 = random.uniform(0,1.0) edge_probability_5 = random.uniform(0,1.0) #I only want my graph to be dense with a small probability (average density = 0.03125*#nodes) G = zen.generating.rgm.erdos_renyi(i,edge_probability_1*edge_probability_2*edge_probability_3*edge_probability_4*edge_probability_5 * 0.1) G.compact() filename = 'random' + str(i) + ".graph" #Profiling generation time difftime = profile.get_time_from_clock() gentime = str(difftime.seconds) + "." + str(difftime.microseconds/1000) print "Graph " + filename + " has taken " + gentime + " to generate." #Saving the generated graph edgelist.write(G,'storage/edgelist/random/' + filename) filesize = profile.filesize('storage/edgelist/random/' + filename) filesize = filesize/1024 #Profiling IO time difftime = profile.get_time_from_clock() savetime = str(difftime.seconds) + "." + str(difftime.microseconds/1000) print "Graph " + filename + " (" + str(filesize) + "kB), has taken " + savetime + " seconds to save on disk." file.write(str(i) + " " + gentime + " " + savetime + " " + str(filesize) + "\n")
#Define edges for each node for j in range(i): k = 0 while k < max_degree: x = random.randint(1,max_degree) x = round(random.uniform(0,1)*random.uniform(0,1)*x) other_node = (j+x) % i if not G.has_edge(j,other_node): G.add_edge(j,other_node) k+=1 G.compact() filename = 'metric' + str(i) + ".graph" #Profiling generation time difftime = profile.get_time_from_clock() gentime = str(difftime.seconds) + "." + str(difftime.microseconds/1000) print "Graph " + filename + " has taken " + gentime + " to generate." #Saving the generated graph edgelist.write(G,'storage/edgelist/metric/' + filename) filesize = profile.filesize('storage/edgelist/metric/' + filename) filesize = filesize/1024 #Profiling IO time difftime = profile.get_time_from_clock() savetime = str(difftime.seconds) + "." + str(difftime.microseconds/1000) print "Graph " + filename + " (" + str(filesize) + "kB), has taken " + savetime + " seconds to save on disk." file.write(str(i) + " " + gentime + " " + savetime + " " + str(filesize) + "\n")
def profile_graph(type): global max_size,increment print 'Profiling ' + type + " graphs!" file = open('csv/' + type + '_graphs_profile.csv', 'w') file.write("Nodes FileSize LoadTime VM RAM SP NCC LCC GCC MST\n") profile.start_clock() for i in range(increment,max_size+1,increment): #We want to profile the time taken to load each graph into memory for each category. #We use manual garbage collection to make sure we are only keeping the minimum number of #objects within memory gc.collect() #Load the graph from memory filename = type + str(i) + ".graph" filesize = profile.filesize("storage/"+ type + "/" + filename)/1024 #The operating system will kill the profiling process if there is not enough ram to fit the VM #requirements to store the graph if not_enough_RAM("storage/"+ type + "/" + filename,ram_zen_python): print 'Graph is too big to be loaded in virtual memory, continuing to next graph...' file.write(str(i) + " " + str(filesize) + " 0 0 0 0 0 0 0 0\n") continue profile.start_clock() G = memlist.read("storage/" + type + "/" + filename) difftime = profile.get_time_from_clock() loadtime = str(difftime.seconds) + "." + str(difftime.microseconds/1000) vm_graph = round(profile.memory()/1024) ram_graph = round(profile.resident()/1024) #Using pickle measures the byte size of the print "Graph " + filename + " has taken " + loadtime + " to load. The graph is using " + str(vm_graph) + "kB of VM and " + str(ram_graph) + "kB of RAM" #Creating a list of lists sample = 20 list = [0] * sample #Execute a few shortest paths and take the maximum value as a reference. for j in range(sample): index = random.randint(0,i) #source = G.node_object(index) #zen.algorithms.shortest_path.single_source_shortest_path(G, index) #zen.algorithms.shortest_path.dijkstra_path(G,index) list[j] = profile.get_time_from_clock() difftime = max(list) shortestpathtime = str(difftime.seconds) + "." + str(difftime.microseconds/1000) #Execute a few clustering computations and take the maximum value as a reference. #zen.algorithms.clustering.ncc(G) difftime = profile.get_time_from_clock() ncctime = str(difftime.seconds) + "." + str(difftime.microseconds/1000) #zen.algorithms.clustering.lcc(G) difftime = profile.get_time_from_clock() lcctime = str(difftime.seconds) + "." + str(difftime.microseconds/1000) #zen.algorithms.clustering.gcc(G) difftime = profile.get_time_from_clock() gcctime = str(difftime.seconds) + "." + str(difftime.microseconds/1000) #zen.algorithms.spanning.minimum_spanning_tree(G) difftime = profile.get_time_from_clock() msttime = str(difftime.seconds) + "." + str(difftime.microseconds/1000) print "Time for queries : SP=" + shortestpathtime + "seconds, NCC=" + ncctime + "seconds, LCC=" + lcctime + "seconds, GCC=" + gcctime + "seconds, MST=" + msttime file.write(str(i) + " " + str(filesize) + " " + loadtime + " " + str(vm_graph) + " " + str(ram_graph) + " " + shortestpathtime + " " + ncctime + " " + lcctime + " " + gcctime + " " + msttime + "\n")
max_size = 41000 increment = 10000 start = 1000 profile.start_clock() file = open("csv/barabasi_graphs.csv", "w") file.write("GraphName GenerateTime SaveTime FileSize\n") # The +1 on the max size is just to be sure we include the max size in our range. for i in range(start, max_size + 1, increment): # Defines the maximum degree per node G = zen.generating.barabasi_albert(i, 1) G.compact() filename = "barabasi" + str(i) + ".graph" # Profiling generation time difftime = profile.get_time_from_clock() gentime = str(difftime.seconds) + "." + str(difftime.microseconds / 1000) print "Graph " + filename + " has taken " + gentime + " to generate." # Saving the generated graph edgelist.write(G, "storage/edgelist/barabasi/" + filename) filesize = profile.filesize("storage/edgelist/barabasi/" + filename) filesize = filesize / 1024 # Profiling IO time difftime = profile.get_time_from_clock() savetime = str(difftime.seconds) + "." + str(difftime.microseconds / 1000) print "Graph " + filename + " (" + str(filesize) + "kB), has taken " + savetime + " seconds to save on disk." file.write(filename + " " + gentime + " " + savetime + " " + str(filesize) + "\n")
profile.start_clock() file = open("csv/dense_graphs.csv", "w") file.write("Nodes GenerateTime SaveTime FileSize\n") # The +1 on the max size is just to be sure we include the max size in our range. for i in range(start, max_size + 1, increment): edge_probability = random.uniform(0, 1.0) scaling = 0.05 base_value = 0.05 # Graphs of this category are potentially very big (average node degree = 0.0725*#nodes), which means the 20,000 node graph may have up to 4,000,000 edges. # which would be a graph of size 600MB. G = zen.generating.rgm.erdos_renyi(i, edge_probability * scaling + base_value) G.compact() filename = "dense" + str(i) + ".graph" # Profiling generation time difftime = profile.get_time_from_clock() gentime = str(difftime.seconds) + "." + str(difftime.microseconds / 1000) print "Graph " + filename + " has taken " + gentime + " to generate." # Saving the generated graph edgelist.write(G, "storage/edgelist/dense/" + filename) filesize = profile.filesize("storage/edgelist/dense/" + filename) filesize = filesize / 1024 # Profiling IO time difftime = profile.get_time_from_clock() savetime = str(difftime.seconds) + "." + str(difftime.microseconds / 1000) print "Graph " + filename + " (" + str(filesize) + "kB), has taken " + savetime + " seconds to save on disk." file.write(str(i) + " " + gentime + " " + savetime + " " + str(filesize) + "\n")