def write_failed(result):
  if not all(result):
    log.info(Color.wrapformat("[failed]", Color.FAIL))
  else:
    log.info(Color.wrapformat("[{0} removed]", Color.OKBLUE, len(result)))
def add_asn_endpoints(vertex_list, linklist, datafile, count, endpointtype):
    """
    Add endpoint nodes that connect to the graph based on ASNs.

    @param vertex_list: An instance of VertexList
    @type vertex_list: C{VertexList}
    @param linklist: A tuple containing a list of links, and an attribute
        dictionary
    @type linklist: C{tuple}
    @param datafile: Path to a datafile that contains at least
        two columns called 'Number' and 'ASN'. The 'Number'
        column should indicate the relative number of endpoints
        that should connect to that ASN.
    @type datafile: C{str}

    @param count: The number of endpoints to add.
    @type count: C{int}
    @param endpointtype: The label for the endpoint. 'client' is a
        good example.
    @type endpointtype: C{str}
    """
    r = connection.Redis()
    try:
        cdata_file = datautil.DataFile(datafile, sep="|")
        cdata_file.add_index('ASN')
    except IOError:
        logging.error(Color.fail("Error reading %s" % datafile))
        sys.exit(-1)

    attach = dict()
    for asn in cdata_file['ASN']:
        try:
            pop = find_pop_for_asn(asn)
            attach[asn] = (pop, cdata_file['ASN'][asn][0]['Number'])
        except graph_objects.ASNNotKnown:
            pass

    def node_id(asn, unique):
        return "%s_%s_%s" % (endpointtype, asn, unique)

    counter = 0
    if len(attach) == 0:
        sys.stderr.write(Color.fail(
            "[failed] No %s could be attached.\n" % endpointtype))
        logging.error(Color.fail(
            "[failed] No %s could be attached." % endpointtype))

    else:
        total = sum(map(lambda x: int(x[1]), attach.itervalues()))
        for asn, data in attach.iteritems():
            num_to_attach = round(count * (float(data[1]) / float(total)))
            for j in xrange(0, int(num_to_attach)):

                vertex_list.add_vertex(node_id(asn, j), nodeid=node_id(asn, j),
                                       nodetype=endpointtype, asn=asn)
                linkkey = dbkeys.Link.intralink(data[0])

                linkdelays = [
                    delay
                    for edge in r.smembers(linkkey)
                    for delay in r.smembers(dbkeys.delay_key(*eval(edge)))]

                try:
                  latency = util.decile_transform(linkdelays)
                except util.EmptyListError:
                  latency = [5 for x in xrange(10)]

                linklist.append(EdgeLink(node_id(asn, j),
                                data[0],
                                {'latency': latency,
                                 'med_latency': latency[len(latency)/2]}))
                counter += 1

        log.info(Color.wrapformat("Success [{0} attached]", Color.OKBLUE,
                                  counter))
    return (counter, len(attach))
def cleanup(args):
  """
  Clean all of the pop and link related information out of the database.
  """
  r = connection.Redis(structures.ConnectionInfo(**args.redis))

  log.info("Removing IP pop data (may take a while)... ")
  ips = r.smembers('iplist')
  p = r.pipeline()
  for ip in ips:
    p.hdel(dbkeys.ip_key(ip), 'pop')
  result = p.execute()
  del ips
  log.info(Color.wrapformat("[{0} removed]",
                            Color.OKBLUE,
                            len(filter(None, result))))

  log.info("Removing PoP link data (may take a while)... ")
  linkkeys = r.keys("links:*")
  p = r.pipeline()
  for key in linkkeys:
    p.delete(key)
  result = p.execute()
  write_failed(result)
  del linkkeys

  log.info("Removing pop keys... ")
  popkeys = r.keys("pop:*")
  p = r.pipeline()
  for key in popkeys:
    p.delete(key)
  result = p.execute()
  write_failed(result)
  del popkeys

  log.info("Removing asn keys... ")
  popkeys = r.keys("asn:*")
  p = r.pipeline()
  for key in popkeys:
    p.delete(key)
  result = p.execute()
  write_failed(result)
  del popkeys

  if args.ip_links:
    log.info("Removing ip links... ")
    p = r.pipeline()
    p.delete(dbkeys.Link.unassigned())
    p.delete(dbkeys.Link.unassigned_fails())
    p.delete("delayed_job:processed_links")
    iplinkkeys = r.keys("ip:links:*")
    for key in iplinkkeys:
      p.delete(key)
    result = p.execute()
    write_failed(result)

  pipelined_delete(r,
                   'poplist',
                   'join:history',
                   'delayed_job:popjoins',
                   'delayed_job:popjoins:known',
                   'popincr',
                   'mutex:popjoin:init')
def create_graph(args):

    r = connection.Redis()
    if args.xml:

        log.info("Loading saved graph from file: %s" % args.xml)

        @timeit
        def load_graph(path):
          return nx.Graph(nx.read_graphml(path))

        graph, time_taken = load_graph(args.xml)
        log.info("Graph loading complete in %0.2f seconds" % time_taken)
        graphpath = args.xml

    else:
        graph = load_from_redis(r, args)
        graphpath = args.reload

    used_nodes = set()

    # used_nodes will contain only the vertices that we want in the graph.
    # However, it contains all of the edges between those vertices, not
    # only those used in paths.

    log.info("Populating sources for shortest path... ")
    nodetypes = nx.get_node_attributes(graph, 'nodetype')
    p = r.pipeline()
    p.delete(SP_KEY)
    p.delete(USED_KEY)
    for node, nodetype in nodetypes.iteritems():

        if nodetype in ('relay', 'client', 'dest'):
            p.sadd(SP_KEY, node)
            p.hset(TYPE_KEY, node, nodetype)
    p.execute()

    log.info(Color.wrap("[complete]", Color.OKGREEN))

    log.info("Spawning 2 workers to process shortest paths... ")

    timer = ProgressTimer(int(r.scard(SP_KEY)))
    left = timer.total
    timer.tick(1)
    #pool = Pool(processes=12, initializer=thread_init,
                #initargs=(graphpath, SP_KEY, TYPE_KEY, USED_KEY, PATH_KEY, ))
    workers = []
    for x in xrange(2):
        p = multiprocessing.Process(
            target=concurrent.thread_shortest_path,
            args=(graphpath, SP_KEY, TYPE_KEY, USED_KEY, PATH_KEY, USED_KEY, ))

        p.start()
        workers.append(p)

    while left > 0:
        time.sleep(30)
        timer.tick((timer.total - left) - timer.total_done)
        sys.stderr.write(
            "{0} Spawning workers to process shortest paths... {1}"
            .format(Color.NEWL,
                    Color.wrapformat("[ETA: {0}]", Color.OKBLUE, timer.eta())))

        left = r.scard(SP_KEY)

    log.info("\nWaiting for jobs to terminate... ")
    for job in workers:
        p.join()
    #pool.join()
    log.info("Done")

    used_nodes = r.smembers(USED_KEY)
    if len(used_nodes) == 0:
      raise RuntimeError('Saved zero nodes after processing')
    core_graph = graph.subgraph(used_nodes)
    to_remove = list()

    def rev_tuple(tup):
        return (tup[1], tup[0])

    for edge in core_graph.edges_iter():
        if (not r.sismember(PATH_KEY, edge) and
                not r.sismember(PATH_KEY, rev_tuple(edge))):
            to_remove.append(edge)

    log.info("Kept %s vertices in the graph" % len(used_nodes))
    log.info("Removed %s extraneous edges" % len(to_remove))
    core_graph.remove_edges_from(to_remove)

    log.info("Re-trimming graph")
    #collapse_graph_in_place(graph)
    log.info("Done")

    log.info("Writing core graph... ")
    nx.write_graphml(core_graph, "%s.xml" % args.save, prettyprint=True)

    try:
      nx.write_dot(core_graph, "%s.dot" % args.save)
    except:
      log.info("Failed to write dot graph")

    log.info(Color.wrap("[complete]", Color.OKGREEN))

    log.info("Cleaning up...")
    pipelined_delete(r, TYPE_KEY, USED_KEY, PATH_KEY)
def load_from_redis(r, args):
    """
    Create a igraph graph from redis
    """

    log.info("Loading from Redis")
    linkdict = LinkDict(r)
    vertices = VertexList()
    tor_vertices = set()
    graphlinks = []
    graphattrs = dict()
    graphattrs['latency'] = []
    stats = Stats({'non-pop-trim': int,
                   'unattachable-poi-count': int,
                   'poi-latency-defaulted': int,
                   'unattachable-poi': set,
                   'num-pois': int,
                   'num-pops': int,
                   'num-links': int,
                   'num-clients': int,
                   'client-connect-points': int})

    pipe = r.pipeline()
    i = 0
#Obtain the set of Tor relay IPs
    log.info("Reading Tor relays from %s... " % args.pointsofinterest)
    try:
        with open(args.pointsofinterest) as f:
          PoIs = json.load(f)

    except IOError as e:
        log.info("Error: [%s]" % e)
        raise

    log.info(Color.wrap("Done", Color.OKBLUE))

    log.info("Attaching clients to graph.")

    #Add clients
    if args.num_clients:

      clients_attached, client_attach_points = add_asn_endpoints(
          vertices,
          graphlinks,
          args.client_data,
          args.num_clients,
          'client')

      log.info("Attached {0} clients to {1} attachment points".format(
          clients_attached, client_attach_points))

    log.info("Attaching destinations to graph.")
    #Add dests
    if args.num_dests:
        dests_attached, dest_attach_points = add_alexa_destinations(
            vertices,
            graphlinks,
            args.num_dests)

    	log.info("Attached {0} dests to {0} attachment points".format(
        	dests_attached, dest_attach_points))

    protected = set()
    protected.update([poi['pop'] for poi in PoIs])
    protected.update(vertices.keys())

    # We want to trim all of the hanging edges of the graph.
    log.info("Trimming degree-1 vertices...")
    found_hanging_edge = True
    pass_ctr = 0

    while found_hanging_edge:
        pass_ctr += 1
        found_hanging_edge = False
        removed = set()
        n = 0
        timer = ProgressTimer(len(linkdict))
        for pop in linkdict.keys():

            if n % 100 == 0 or n == timer.total - 1:
                timer.tick(100)
                sys.stderr.write(
                    "{0}Pass {1}: {2} {3}".format(
                        Color.NEWL,
                        pass_ctr,
                        Color.wrapformat("[{0} processed, {1} trimmed]",
                                         Color.HEADER,
                                         n, stats['non-pop-trim']),
                        Color.wrapformat("[eta:{0}]",
                                         Color.OKGREEN,
                                         timer.eta())
                    ))

            n += 1

            if pop in removed:
                continue  # we saw this already
            if len(linkdict[pop]) >= 2:
                continue  # it can stay
            if pop in protected:
                continue  # We need relay/client/dest connect point

            # It's only connected to one
            connected = linkdict[pop].pop()
            removed.add(pop)
            del linkdict[pop]
            linkdict[connected].remove(pop)
            if len(linkdict[connected]) == 0:
                # This was a matched pair attached to nothing else
                del linkdict[connected]
                removed.add(connected)
            stats.incr('non-pop-trim')
            found_hanging_edge = True
        sys.stderr.write("\n")

    linkdict.collapse_degree_two(protected=protected)

    log.info("Trimmed {non-pop-trim} degree two hops".format(**stats))

#Set vertex id's for all of the pops we have links for.
    log.info("Adding PoPs...")
    for pop in linkdict.iterkeys():
        if pop in vertices:
            continue  # we have this one already.

        vertices.add_vertex(pop,
                            nodeid=pop,
                            nodetype='pop',
                            asn=r.get(dbkeys.POP.asn(pop)),
                            countries=r.smembers(dbkeys.POP.countries(pop)))

        stats.incr('num-pops')
        i += 1

    log.info(Color.wrapformat("Added [{0}]", Color.OKBLUE, stats['num-pops']))

    #Attach the relays
    for poi in PoIs:

        if poi['pop'] not in vertices:
          log.warn("Matched relay to {0}, but couldn't find it "
                   "in vertices".format(poi['pop']))
          stats.incr('unattachable-poi-count')
          stats.incr('unattachable-poi', poi['id'])
          continue

        vertices.add_vertex(poi['id'],
                            nodeid=poi['id'],
                            nodetype='relay',
                            **poi)

        linkdelays = [
            delay
            for edge in r.smembers(dbkeys.Link.intralink(poi['pop']))
            for delay in r.smembers(dbkeys.delay_key(*eval(edge)))]

        try:
          deciles = util.decile_transform(linkdelays)
        except util.EmptyListError:
          deciles = [5 for x in xrange(10)]
          stats.incr('poi-latency-defaulted')

        graphlinks.append(EdgeLink(poi['id'], poi['pop'],
                          {'latency': deciles,
                            'med_latency': deciles[len(deciles)/2]}))

        stats.incr('num-pois')
        tor_vertices.add(poi['id'])
        i += 1

    log.info("Added {0} PoIs. Did not attach {1} "
             "whose connection point was not linked to anything."
             .format(stats['num-pois'],
                     stats['unattachable-poi-count'])
             )
    log.info("PoIs defaulted to 5ms links: [{0}]".format(
        stats['poi-latency-defaulted']))

    pipe.execute()

    already_processed = set()
    log.info("Processing links... ")
    i = 0

    for pop1 in linkdict.iterkeys():
        if pop1 not in vertices:
            continue

        for pop2 in linkdict[pop1]:
            if (pop2 not in vertices
                    or dbkeys.Link.interlink(pop1, pop2) in already_processed):
                continue

            linkkey = dbkeys.Link.interlink(pop1, pop2)
            linkdelays = [
                delay
                for edge in r.smembers(linkkey)
                for delay in r.smembers(dbkeys.delay_key(*eval(edge)))]

            try:
              latency = util.decile_transform(linkdelays)
            except util.EmptyListError:
              latency = eval(r.get("graph:collapsed:%s" %
                                   (dbkeys.Link.interlink(pop1, pop2))))
            graphlinks.append(EdgeLink(pop1, pop2,
              {'latency': latency, 'med_latency': latency[len(latency)/2]}))

            stats.incr('num-links')
            already_processed.add(dbkeys.Link.interlink(pop1, pop2))

        i += 1
        sys.stderr.write("{0}Processed links for {1} pops"
                         .format(Color.NEWL, i))

    log.info("Processed {0} pop links "
             .format(stats['num-links']))

    log.info("Making Graph")

    with open("vertices.dat", 'w') as vertout:
        vertices.write(vertout)

    gr = nx.Graph()
    gr.add_nodes_from(vertices.nx_tuple_iter())
    gr.add_edges_from([edge.nx_tuple() for edge in graphlinks])

    try:
      bfs_edges = nx.bfs_edges(gr, linkdict.max_degree())
    except:
      print "Something was wrong with: %s" % linkdict.max_degree()
      raise

    bfs_node_gen = (node for pair in bfs_edges for node in pair)
    subgraph = gr.subgraph(bfs_node_gen)

    assert nx.is_connected(subgraph)

    log.info("BFS reduced graph from {0} to {1} vertices".format(
             len(gr), len(subgraph)))

    log.info("Writing data file")
    nx.write_graphml(subgraph, args.reload)

    log.info("Wrote files")

    log.info("STATS:")
    for key, val in stats.iteritems():
      log.info("{0}: {1}".format(key, val))

    return gr
def preprocess_joins():
  log.debug("Determining how many joins to preprocess")
  r = connection.Redis()
  numjoins = r.llen('delayed_job:popjoins')
  joins = map(eval, r.lrange('delayed_job:popjoins', 0, -1))
  jm = dict()
  reduced_joins = list()
  seen_joins = set()
  log.info("Preprocessing %s joins" % numjoins)

  def get_join_target(jm, node):
    target = node
    seen = set()
    try:
      while True:
        seen.add(target)
        target = jm[target]
    except KeyError:
      pass
    seen.remove(target)
    if len(seen) > 1:
      jm.update(itertools.izip_longest(seen, [target], fillvalue=target))
    return target

  timer = ProgressTimer(numjoins)
  for i, join in enumerate(joins):
    from_node = get_join_target(jm, join[1])
    to_node = get_join_target(jm, join[0])
    if from_node != to_node:
      jm[from_node] = to_node

    if i % 100 == 0:
      timer.tick(100)
      sys.stderr.write(Color.NEWL + "PreProcessing joins {0}"
                       .format(Color.wrapformat("[ETA: {0} seconds]",
                                                Color.OKBLUE, timer.eta())))

  sys.stderr.write(Color.NEWL +
                   "PreProcessing joins {0}\n"
                   .format(Color.wrapformat("[complete]", Color.OKGREEN)))

  timer = ProgressTimer(numjoins)
  for i, join in enumerate(joins):
    newjoin = (get_join_target(jm, join[0]), join[1])

    if newjoin not in seen_joins:
      reduced_joins.append(newjoin)
      seen_joins.add(newjoin)

    if i % 100 == 0:
      timer.tick(100)
      sys.stderr.write(Color.NEWL + "Reducing join list {0}".format(
                       Color.wrapformat("[ETA: {0} seconds]",
                                        Color.OKBLUE, timer.eta())))

  sys.stderr.write(Color.NEWL +
                   "Reducing join list {0}\n"
                   .format(Color.wrapformat("[complete]", Color.OKGREEN)))

  log.info("Reduced join list from %s to %s joins"
           % (numjoins, len(reduced_joins)))

  return reduced_joins
def process_delayed_joins(args):
  log.info("Processing delayed joins")
  r = connection.Redis()
  if r.llen("delayed_job:unassigned_link_fails") > 0:
    sys.stderr.write("Have unassigned links. "
                     "Run assign_pops --process_failed\n")
    raise SilentExit()
  # Now we process any joins that need to happen. First we lock.
  error_ctr = 0
  dbkeys.mutex_popjoin().acquire()
  try:
    joinlist = preprocess_joins()
    inprocess = list()
    x = len(joinlist)
    log.info("Joining pop pairs: %d".format(x))

    fh = None
    if args.log_joins:
      fh = logging.FileHandler(args.log_joins, mode='w')
      fh.setLevel(logging.DEBUG)
      fh.setFormatter(logging.Formatter('%(message)s'))
      log.addHandler(fh)

    timer = ProgressTimer(x)
    for i, to_join in enumerate(joinlist):
      inprocess.append(to_join)
      log.info("Joining %s to %s\n" % (to_join[1], to_join[0]))

      try:
        joined = join_pops(r, to_join[0], to_join[1])
      except redis_errors as e:
        log.error("Encountered error while processing: {0}. [{1}]\n"
                  .format(to_join, e))
        joinlist.insert(0, inprocess.pop())
        error_ctr += 1
        continue

      else:
        if joined is not None:
          log.info("Joined %s to %s\n" % (joined[1], joined[0]))

        if (r.sismember(dbkeys.POP.list(), to_join[1])
           or r.exists(dbkeys.POP.members(to_join[1]))):

          if descend_target_chain(r, to_join[0]) != to_join[1]:
            raise Exception("Join Failed in ways it should not have...")
          else:
            log.info("Did not join {0} to {1} because {2} had "
                     "previously been joined to {3}\n"
                     .format(to_join[1], to_join[0], to_join[0], to_join[1]))
        timer.tick(1)

      x = len(joinlist) - i

      sys.stderr.write("{newl} {0} joins left {1}\n".format(
                       x,
                       Color.wrapformat("[{0} seconds to finish]",
                                        Color.OKBLUE, timer.eta()),
                       newl=Color.NEWL))

    r.delete('delayed_job:popjoins')
    r.delete('delayed_job:popjoins:inprocess')
    r.delete(r.keys(dbkeys.POP.joined("*")))
    log.info("Joined pops with %d errors while processing" % error_ctr)

    if fh is not None:
      log.removeHandler(fh)

  except KeyboardInterrupt:
    pass
  finally:
    dbkeys.mutex_popjoin().release()
  def collapse_degree_two(self, protected=[]):
    log.info("Cleaning up collapse dbkeys...")
    r = connection.Redis()
    p = r.pipeline()
    for key in r.keys("graph:collapsed:*"):
      p.delete(key)
    write_failed(p.execute())

    pass_ctr = 0
    collapsable = True
    ignoreable = set()
    clogout = open('collapse.log', 'w')
    while collapsable:
      pass_ctr += 1
      sys.stderr.write("\n")
      collapsable = False
      degree2nodes = filter(
          lambda val: (len(val[1]) == 2 and val[0] not in ignoreable),
          self.iteritems())

      counter = 0
      n = 0
      deferred = 0
      collapsed = set()
      timer = ProgressTimer(len(degree2nodes))

      for node, connections in degree2nodes:

        if n % 50 == 0 or n == timer.total - 1:
          timer.tick(50)
          sys.stderr.write(
              "{0}Pass {1}: {2} {3}".format(
                  Color.NEWL, pass_ctr,
                  Color.wrapformat(
                      "[{0} processed, {1} collapsed, {2} deferred]",
                      Color.HEADER, n, counter, deferred
                  ),
                  Color.wrapformat(
                      "[eta: {0}]",
                      Color.OKGREEN, timer.eta()
                  ))
          )

        n += 1

        asns = [r.get(dbkeys.POP.asn(x)) for x in connections | set([node])]
        countries = [r.smembers(dbkeys.POP.countries(x))
                     for x in connections | set([node])]

        same_asn = reduce(lambda x, y: x if x == y else False, asns)
        same_country = True
        for x, y in pairwise(countries):
          if x & y != x:
            same_country = False

        if (same_asn is False or same_country is False or node in protected):
          ignoreable.update(connections | set([node]))
          continue

        if len(collapsed & (connections | set([node]))) != 0:
          deferred += 1
          continue

        collapsed.update(connections | set([node]))
        side1 = connections.pop()
        side2 = connections.pop()
        connections.update(set([side1, side2]))

        try:
          #side1_delay = median(get_delays(dbkeys.Link.interlink(node, side1)))
          side1_delays = decile_transform(
              [float(delay)
               for edge in r.smembers(dbkeys.Link.interlink(node, side1))
               for delay in r.smembers(dbkeys.delay_key(*eval(edge)))])
        except:
          side1_delays = eval(r.get("graph:collapsed:%s" %
                              (dbkeys.Link.interlink(node, side1))))
        try:
          #side2_delay = median(get_delays(dbkeys.Link.interlink(node, side2)))
          side2_delays = decile_transform(
              [float(delay)
               for edge in r.smembers(dbkeys.Link.interlink(node, side2))
               for delay in r.smembers(dbkeys.delay_key(*eval(edge)))])
        except:
          side2_delays = eval(r.get("graph:collapsed:%s" %
                              (dbkeys.Link.interlink(node, side2))))

        combined_delays = [s1 + s2
                           for s1 in side1_delays
                           for s2 in side2_delays]

        r.set('graph:collapsed:%s'
              % (dbkeys.Link.interlink(*list(connections))),
              decile_transform(combined_delays))

        clogout.write("Collapsed %s <-> %s <-> %s\n" %
                      (side1, node, side2))

        collapsable = True

        del self[node]
        self[side1].add(side2)
        self[side2].add(side1)
        self[side1].remove(node)
        self[side2].remove(node)

        counter += 1

    clogout.close()