def pipelined_delete(r, *keys): p = r.pipeline() for key in keys: p.delete(key) result = p.execute() for key, res in zip(keys, result): log.info("Deleting '{0}'... {0}".format(key, Color.wrap('[failure]', Color.FAIL) if not res else Color.wrap('[success]', Color.OKBLUE)))
def _assign_pops(unassigned_list_key, failed_list_key, no_add_processed=False): """ Assign all of the IP addresses found in the redis list :unassigned_list_key:. If any fail, put them in the redis list :failed_list_key. Store processed links in 'delayed_job:processed_links' unless :no_add_processed: is False """ r = connection.Redis() while r.llen(unassigned_list_key) > 0: try: if no_add_processed: link = r.rpop(unassigned_list_key) else: link = r.rpoplpush(unassigned_list_key, "delayed_job:processed_links") if link is None: return ip1, ip2 = link.split(":")[2:] cross_as = different_as(r, dbkeys.ip_key(ip1), dbkeys.ip_key(ip2)) cross_24 = different_24(r, ip1, ip2) if cross_as is None: # This means that one side of this link has no AS. We don't want it continue log.info("link: %s, delay: %s" % (link, dbkeys.get_delay(link))) if dbkeys.get_delay(link) > 2.5 or cross_as or cross_24: success = handle_cross_pop_link(link) else: success = handle_same_pop_link(link) log.info("Assigning PoPs. Remaining: [{0}]. " "Deferred for join: [{1}]".format( Color.wrap(r.llen(unassigned_list_key), Color.OKBLUE), Color.wrap(r.llen('delayed_job:popjoins'), Color.HEADER))) if not success: assert_pops_ok(r, ip1, ip2) r.lpush(failed_list_key, link) except DataError as e: log.error("Fatal Error - Resetting: " + e) args = object() args.reset = True return assign_pops(args)
def create_graph(args): r = connection.Redis() if args.xml: log.info("Loading saved graph from file: %s" % args.xml) @timeit def load_graph(path): return nx.Graph(nx.read_graphml(path)) graph, time_taken = load_graph(args.xml) log.info("Graph loading complete in %0.2f seconds" % time_taken) graphpath = args.xml else: graph = load_from_redis(r, args) graphpath = args.reload used_nodes = set() # used_nodes will contain only the vertices that we want in the graph. # However, it contains all of the edges between those vertices, not # only those used in paths. log.info("Populating sources for shortest path... ") nodetypes = nx.get_node_attributes(graph, 'nodetype') p = r.pipeline() p.delete(SP_KEY) p.delete(USED_KEY) for node, nodetype in nodetypes.iteritems(): if nodetype in ('relay', 'client', 'dest'): p.sadd(SP_KEY, node) p.hset(TYPE_KEY, node, nodetype) p.execute() log.info(Color.wrap("[complete]", Color.OKGREEN)) log.info("Spawning 2 workers to process shortest paths... ") timer = ProgressTimer(int(r.scard(SP_KEY))) left = timer.total timer.tick(1) #pool = Pool(processes=12, initializer=thread_init, #initargs=(graphpath, SP_KEY, TYPE_KEY, USED_KEY, PATH_KEY, )) workers = [] for x in xrange(2): p = multiprocessing.Process( target=concurrent.thread_shortest_path, args=(graphpath, SP_KEY, TYPE_KEY, USED_KEY, PATH_KEY, USED_KEY, )) p.start() workers.append(p) while left > 0: time.sleep(30) timer.tick((timer.total - left) - timer.total_done) sys.stderr.write( "{0} Spawning workers to process shortest paths... {1}" .format(Color.NEWL, Color.wrapformat("[ETA: {0}]", Color.OKBLUE, timer.eta()))) left = r.scard(SP_KEY) log.info("\nWaiting for jobs to terminate... ") for job in workers: p.join() #pool.join() log.info("Done") used_nodes = r.smembers(USED_KEY) if len(used_nodes) == 0: raise RuntimeError('Saved zero nodes after processing') core_graph = graph.subgraph(used_nodes) to_remove = list() def rev_tuple(tup): return (tup[1], tup[0]) for edge in core_graph.edges_iter(): if (not r.sismember(PATH_KEY, edge) and not r.sismember(PATH_KEY, rev_tuple(edge))): to_remove.append(edge) log.info("Kept %s vertices in the graph" % len(used_nodes)) log.info("Removed %s extraneous edges" % len(to_remove)) core_graph.remove_edges_from(to_remove) log.info("Re-trimming graph") #collapse_graph_in_place(graph) log.info("Done") log.info("Writing core graph... ") nx.write_graphml(core_graph, "%s.xml" % args.save, prettyprint=True) try: nx.write_dot(core_graph, "%s.dot" % args.save) except: log.info("Failed to write dot graph") log.info(Color.wrap("[complete]", Color.OKGREEN)) log.info("Cleaning up...") pipelined_delete(r, TYPE_KEY, USED_KEY, PATH_KEY)
def load_from_redis(r, args): """ Create a igraph graph from redis """ log.info("Loading from Redis") linkdict = LinkDict(r) vertices = VertexList() tor_vertices = set() graphlinks = [] graphattrs = dict() graphattrs['latency'] = [] stats = Stats({'non-pop-trim': int, 'unattachable-poi-count': int, 'poi-latency-defaulted': int, 'unattachable-poi': set, 'num-pois': int, 'num-pops': int, 'num-links': int, 'num-clients': int, 'client-connect-points': int}) pipe = r.pipeline() i = 0 #Obtain the set of Tor relay IPs log.info("Reading Tor relays from %s... " % args.pointsofinterest) try: with open(args.pointsofinterest) as f: PoIs = json.load(f) except IOError as e: log.info("Error: [%s]" % e) raise log.info(Color.wrap("Done", Color.OKBLUE)) log.info("Attaching clients to graph.") #Add clients if args.num_clients: clients_attached, client_attach_points = add_asn_endpoints( vertices, graphlinks, args.client_data, args.num_clients, 'client') log.info("Attached {0} clients to {1} attachment points".format( clients_attached, client_attach_points)) log.info("Attaching destinations to graph.") #Add dests if args.num_dests: dests_attached, dest_attach_points = add_alexa_destinations( vertices, graphlinks, args.num_dests) log.info("Attached {0} dests to {0} attachment points".format( dests_attached, dest_attach_points)) protected = set() protected.update([poi['pop'] for poi in PoIs]) protected.update(vertices.keys()) # We want to trim all of the hanging edges of the graph. log.info("Trimming degree-1 vertices...") found_hanging_edge = True pass_ctr = 0 while found_hanging_edge: pass_ctr += 1 found_hanging_edge = False removed = set() n = 0 timer = ProgressTimer(len(linkdict)) for pop in linkdict.keys(): if n % 100 == 0 or n == timer.total - 1: timer.tick(100) sys.stderr.write( "{0}Pass {1}: {2} {3}".format( Color.NEWL, pass_ctr, Color.wrapformat("[{0} processed, {1} trimmed]", Color.HEADER, n, stats['non-pop-trim']), Color.wrapformat("[eta:{0}]", Color.OKGREEN, timer.eta()) )) n += 1 if pop in removed: continue # we saw this already if len(linkdict[pop]) >= 2: continue # it can stay if pop in protected: continue # We need relay/client/dest connect point # It's only connected to one connected = linkdict[pop].pop() removed.add(pop) del linkdict[pop] linkdict[connected].remove(pop) if len(linkdict[connected]) == 0: # This was a matched pair attached to nothing else del linkdict[connected] removed.add(connected) stats.incr('non-pop-trim') found_hanging_edge = True sys.stderr.write("\n") linkdict.collapse_degree_two(protected=protected) log.info("Trimmed {non-pop-trim} degree two hops".format(**stats)) #Set vertex id's for all of the pops we have links for. log.info("Adding PoPs...") for pop in linkdict.iterkeys(): if pop in vertices: continue # we have this one already. vertices.add_vertex(pop, nodeid=pop, nodetype='pop', asn=r.get(dbkeys.POP.asn(pop)), countries=r.smembers(dbkeys.POP.countries(pop))) stats.incr('num-pops') i += 1 log.info(Color.wrapformat("Added [{0}]", Color.OKBLUE, stats['num-pops'])) #Attach the relays for poi in PoIs: if poi['pop'] not in vertices: log.warn("Matched relay to {0}, but couldn't find it " "in vertices".format(poi['pop'])) stats.incr('unattachable-poi-count') stats.incr('unattachable-poi', poi['id']) continue vertices.add_vertex(poi['id'], nodeid=poi['id'], nodetype='relay', **poi) linkdelays = [ delay for edge in r.smembers(dbkeys.Link.intralink(poi['pop'])) for delay in r.smembers(dbkeys.delay_key(*eval(edge)))] try: deciles = util.decile_transform(linkdelays) except util.EmptyListError: deciles = [5 for x in xrange(10)] stats.incr('poi-latency-defaulted') graphlinks.append(EdgeLink(poi['id'], poi['pop'], {'latency': deciles, 'med_latency': deciles[len(deciles)/2]})) stats.incr('num-pois') tor_vertices.add(poi['id']) i += 1 log.info("Added {0} PoIs. Did not attach {1} " "whose connection point was not linked to anything." .format(stats['num-pois'], stats['unattachable-poi-count']) ) log.info("PoIs defaulted to 5ms links: [{0}]".format( stats['poi-latency-defaulted'])) pipe.execute() already_processed = set() log.info("Processing links... ") i = 0 for pop1 in linkdict.iterkeys(): if pop1 not in vertices: continue for pop2 in linkdict[pop1]: if (pop2 not in vertices or dbkeys.Link.interlink(pop1, pop2) in already_processed): continue linkkey = dbkeys.Link.interlink(pop1, pop2) linkdelays = [ delay for edge in r.smembers(linkkey) for delay in r.smembers(dbkeys.delay_key(*eval(edge)))] try: latency = util.decile_transform(linkdelays) except util.EmptyListError: latency = eval(r.get("graph:collapsed:%s" % (dbkeys.Link.interlink(pop1, pop2)))) graphlinks.append(EdgeLink(pop1, pop2, {'latency': latency, 'med_latency': latency[len(latency)/2]})) stats.incr('num-links') already_processed.add(dbkeys.Link.interlink(pop1, pop2)) i += 1 sys.stderr.write("{0}Processed links for {1} pops" .format(Color.NEWL, i)) log.info("Processed {0} pop links " .format(stats['num-links'])) log.info("Making Graph") with open("vertices.dat", 'w') as vertout: vertices.write(vertout) gr = nx.Graph() gr.add_nodes_from(vertices.nx_tuple_iter()) gr.add_edges_from([edge.nx_tuple() for edge in graphlinks]) try: bfs_edges = nx.bfs_edges(gr, linkdict.max_degree()) except: print "Something was wrong with: %s" % linkdict.max_degree() raise bfs_node_gen = (node for pair in bfs_edges for node in pair) subgraph = gr.subgraph(bfs_node_gen) assert nx.is_connected(subgraph) log.info("BFS reduced graph from {0} to {1} vertices".format( len(gr), len(subgraph))) log.info("Writing data file") nx.write_graphml(subgraph, args.reload) log.info("Wrote files") log.info("STATS:") for key, val in stats.iteritems(): log.info("{0}: {1}".format(key, val)) return gr
def thread_shortest_path(graphpath, sp_key, type_key, node_key, path_key, used_key): global thread_graph log_out = redirect_output() log_out.write("Spawned with sp_key = {0}; type_key = {1}; " "node_key={2}; path_key={3}\n" .format(sp_key, type_key, used_key, path_key)) log_out.flush() r = connection.Redis() log_out.write("Reading graph...") log_out.flush() thread_graph = nx.read_graphml(graphpath) log_out.write(" Complete\n") log_out.flush() target = r.spop(sp_key) while target: used_nodes = set() used_paths = set() log_out.write("Obtaining shortest paths for %s... " % target) log_out.flush() paths = nx.single_source_dijkstra_path( thread_graph, target, weight='latency') for path_target, path in paths.iteritems(): if r.hget(type_key, path_target) in ('relay', 'client', 'dest'): # We only care for relays, clients and destinations try: errors, total = check_valley_free( thread_graph, path, log=log_out) # if total > 0: # log_out.write("Path from %{0} to %s was valley-free. " # " %0.0f/%0.0f (%0.2f) missing links in calc\n" # % (target, path_target, # errors, total, errors / total)) except ValleyFreeError: # If it's not valley free, we rebuild a new path log_out.write("Path from %s to %s is not valley-free. " "Building a new one... " % (target, path_target)) try: path, time_taken = valley_free_path( thread_graph, target, path_target) log_out.write(" finished in %s seconds. New Path: %s " "[%0.0f/%0.0f (%0.2f%%) of links had no " "information]\n" % (Color.wrap(time_taken, Color.OKBLUE), str(path), path.errct, len(path) - 1, float(path.errct) / float(len(path) - 1))) except ValleyFreeError: log_out.write( Color.warn( "Couldn't produce valley-free path from %s to %s\n" % (target, path_target))) raise else: # Now store the links and nodes from this # path so that we keep them. used_nodes.update(path) hops = list() for hop in pairwise(path): hops.append(hop) #Add each hop to our set of paths used_paths.add(hop) r.sadd(path_key, *used_paths) r.sadd(used_key, *used_nodes) log_out.write("Done\n") log_out.flush() target = r.spop(sp_key) log_out.write("Exiting ") log_out.flush() log_out.close() sys.exit(0)