def scrapeIDs(self, seed, size): if type(size) != int: raise TypeError def next(ss, qq): id = qq.popleft() if id in ss: return None node = self.makeID(id) qq.extend(node.out.keys()) ss.add_node(node) return id s = NodeSample() q = deque([self.getNSID(seed)]) while len(s) < size: id = next(s, q) if id is not None: LOG.info("id sample: %s/%s (added %s)" % (len(s), size, id)) s.build() return s
def evaluateScheme(self, scheme): """ Evaluate the given address scheme against the perfect address scheme from the complete data of the world. """ prune = scheme.copy() prune.delete_vertices(v.index for v in prune.vs.select(lambda vx: vx[NAA] is None)) ss = NodeSample() for tag in prune.vs[NID]: ss.add_node(self.getTagInfo(tag).build_node()) MAX = float("inf") def dist(arc, graph): return -log(arc[AAT]*graph.vs[arc.target][NAT]/graph.vs[arc.source][NAT]) def nattr(dist): return exp(-dist) # build address scheme of input tags local = ss.build(complete=False) assert None not in local.vs[NAT] local.es[AAT_AD] = [dist(arc, local) for arc in local.es] path = local.shortest_paths(0, weights=AAT_AD)[0] graph_prune_arcs(local, [k for k,v in sort_v(enumerate(path))]) local.vs[NAA] = [nattr(d) for d in path] # build address scheme of n tags from world data, where n = len(input tags) # OPT LOW this rebuilds the entire graph each time, not optimal, but means # we can just use already-existing implementation of dijkstra from igraph sw = NodeSample() tinfo = self.getTagInfo(prune.vs[0][NID]) sw.add_node(tinfo.build_node()) visit = set([0]) # visited nodes, sw vids trail = [(0, 0.0)] # trail of visited nodes, ss vids for i in xrange(0, len(prune.vs)-1): # n-1 because root already added for rtag in tinfo.rtag.iterkeys(): if rtag not in sw: sw.add_node(self.getTagInfo(rtag).build_node()) world = sw.build(complete=False) world.es[AAT_AD] = [dist(arc, world) for arc in world.es] path = world.shortest_paths(0, weights=AAT_AD)[0] # get next tag in world addr scheme npath = [MAX if i in visit else v for i, v in enumerate(path)] d = min(npath) index = npath.index(d) tinfo = self.getTagInfo(world.vs[index][NID]) visit.add(index) if tinfo.tag not in ss: trail.append((len(ss), d)) ss.add_node(tinfo.build_node()) else: trail.append((prune.vs[NID].index(tinfo.tag),d)) # OPT LOW world = ss.build() assert len(trail) == len(local.vs) graph_prune_arcs(world, [vid for vid, dist in trail]) trail = dict(trail) world.vs[NAA] = [nattr(trail[i]) if i in trail else None for i in xrange(0, len(world.vs))] return AddrSchemeEval(prune, local, world)