#!/usr/bin/python ''' Created on Jun 7, 2010 @author: jose ''' #!/usr/bin/python from cloudlight import BigGraph import sys filename = len(sys.argv) > 1 and sys.argv[1] or None debug = True if not filename: print 'Error: first argument missing, input filename with BigGraph archive!' exit(-1) # cache size in 2KB pages (?) cache_size = 2**16 print 'opening BigGraph ' + filename graph = BigGraph(filename, cache_size) graph.debug = debug print 'indexing with create_index_kcores()' graph.create_index_kcores()
#!/usr/bin/python ''' Created on Aug 1, 2010 @author: jose ''' from cloudlight import BigGraph import sys filename = len(sys.argv) > 1 and sys.argv[1] or None if not filename: print 'Error: first argument missing, input filename with BigGraph archive!' print 'opening BigGraph ' + filename graph = BigGraph(filename) print 'dropping index with remove_parameter_cache()' graph.remove_parameter_cache('seen_degree') if __name__ == '__main__': pass
filename = len(sys.argv) > 1 and sys.argv[1] or None if not filename: #filename = 'orkut-links-fst.txt.toundirected.30mill' print 'Error: first argument missing, input filename with space separated graph!' exit(-1) outname = len(sys.argv) > 2 and sys.argv[2] or None if not outname: #outname = 'orkut-2k_sym.big_graph' print 'Error: second argument missing, output filename!' exit(-1) graphfile = filename graph = BigGraph(graphfile) graph.debug = True graph.input_debug_links = 200000 graph.output_debug_nodes = 10000 print 'NODES:' print graph.number_of_nodes() print 'EDGES:' print graph.number_of_edges() print 'dumping Graph to edge list file ...' graph.save_edgelist(outname) print 'done.'
digraph = BigDiGraph(graphfile + '.big_digraph') digraph.debug = True digraph.input_debug_links = 200000 digraph.output_debug_nodes = 100 digraph.max_links_input = links digraph.max_nodes_analysis = 10000 print 'digraph.load_edgelist(open(filename)) ...' digraph.load_edgelist(open(filename)) print 'digraph.create_indices() ...' digraph.create_indices() graph = BigGraph(graphfile + '.disconnected') print 'digraph.add_only_symmetric_edgelist(graph) ...' digraph.add_only_symmetric_edgelist(graph) print 'graph.create_indices() ...' graph.create_indices() number_of_nodes = graph.number_of_nodes() comps = [ len(comp) / float(number_of_nodes) for comp in graph.connected_components() ] if comps[0] < 0.5: print 'ERROR: biggest connected componnet not found!'
print sys.argv filename = len(sys.argv) > 1 and sys.argv[1] or None if not filename: print 'Error: first argument missing, input filename with BigGraph!' exit(-1) outname = len(sys.argv) > 2 and sys.argv[2] or None if not outname: print 'Error: second argument missing, output filename for BigGraph!' exit(-1) graph = BigGraph(filename) #graph = BigGraph() #graph.add_edge(1,2) #graph.add_edge(2,3) #graph.add_edge(3,1) #graph.add_edge(4,5) aux_graph = Graph() connected_graph = BigGraph(outname) graph.add_random_component(aux_graph) for src, dst in aux_graph.edges_iter(): connected_graph.add_edge(src, dst)
if not outname: #outname = 'orkut-2k_sym.big_graph' print 'Error: second argument missing, output filename!' exit(-1) links = len(sys.argv) > 3 and sys.argv[3] or None if not links: #links = 2000 print 'Error: third argument missing, max number of links!' exit(-1) else: links = int(links) graph = BigGraph(outname) graph.debug = True graph.input_debug_links = 200000 graph.output_debug_nodes = 10000 graph.max_links_input = links graph.max_nodes_analysis = 10000 graph.load_edgelist(open(filename)) graph.create_indices() print 'output Graph (possibly disconnected):' print 'nodes = %d' % graph.number_of_nodes() print 'edges = %d' % graph.number_of_edges()
else: q.put('FINISHED') create_dbs = True if create_dbs: for strat_name in strats: for l in lookaheads: new_filename = filename + '.%s.lookahead%d' % (strat_name, l) print 'creating BigGraph ' + new_filename os.system('cp %s %s' % (filename, new_filename)) print 'choosing random node seed for crawlers...' graph = BigGraph(filename, cache_size_pages) node_rand_seed = graph.random_nodes()[0] del graph print 'done. choosed -> %s' % str(node_rand_seed) processors = len(strats) * len(lookaheads) # 4 out = open(outname, 'w') processes = [] graphs = [] list_of_strategies = [] results = 0 for strat_name in strats: for l in lookaheads: