Exemple #1
0
	def scrapeIDs(self, seed, size):

		if type(size) != int:
			raise TypeError

		def next(ss, qq):
			id = qq.popleft()
			if id in ss: return None
			node = self.makeID(id)
			qq.extend(node.out.keys())
			ss.add_node(node)
			return id

		s = NodeSample()
		q = deque([self.getNSID(seed)])

		while len(s) < size:
			id = next(s, q)
			if id is not None:
				LOG.info("id sample: %s/%s (added %s)" % (len(s), size, id))

		s.build()
		return s
Exemple #2
0
	def evaluateScheme(self, scheme):
		"""
		Evaluate the given address scheme against the perfect address scheme
		from the complete data of the world.
		"""
		prune = scheme.copy()
		prune.delete_vertices(v.index for v in prune.vs.select(lambda vx: vx[NAA] is None))

		ss = NodeSample()
		for tag in prune.vs[NID]:
			ss.add_node(self.getTagInfo(tag).build_node())

		MAX = float("inf")
		def dist(arc, graph):
			return -log(arc[AAT]*graph.vs[arc.target][NAT]/graph.vs[arc.source][NAT])
		def nattr(dist):
			return exp(-dist)

		# build address scheme of input tags
		local = ss.build(complete=False)
		assert None not in local.vs[NAT]
		local.es[AAT_AD] = [dist(arc, local) for arc in local.es]
		path = local.shortest_paths(0, weights=AAT_AD)[0]
		graph_prune_arcs(local, [k for k,v in sort_v(enumerate(path))])
		local.vs[NAA] = [nattr(d) for d in path]

		# build address scheme of n tags from world data, where n = len(input tags)
		# OPT LOW this rebuilds the entire graph each time, not optimal, but means
		# we can just use already-existing implementation of dijkstra from igraph
		sw = NodeSample()
		tinfo = self.getTagInfo(prune.vs[0][NID])
		sw.add_node(tinfo.build_node())
		visit = set([0]) # visited nodes, sw vids
		trail = [(0, 0.0)] # trail of visited nodes, ss vids
		for i in xrange(0, len(prune.vs)-1): # n-1 because root already added
			for rtag in tinfo.rtag.iterkeys():
				if rtag not in sw:
					sw.add_node(self.getTagInfo(rtag).build_node())
			world = sw.build(complete=False)
			world.es[AAT_AD] = [dist(arc, world) for arc in world.es]
			path = world.shortest_paths(0, weights=AAT_AD)[0]

			# get next tag in world addr scheme
			npath = [MAX if i in visit else v for i, v in enumerate(path)]
			d = min(npath)
			index = npath.index(d)
			tinfo = self.getTagInfo(world.vs[index][NID])
			visit.add(index)
			if tinfo.tag not in ss:
				trail.append((len(ss), d))
				ss.add_node(tinfo.build_node())
			else:
				trail.append((prune.vs[NID].index(tinfo.tag),d)) # OPT LOW

		world = ss.build()
		assert len(trail) == len(local.vs)
		graph_prune_arcs(world, [vid for vid, dist in trail])
		trail = dict(trail)
		world.vs[NAA] = [nattr(trail[i]) if i in trail else None for i in xrange(0, len(world.vs))]

		return AddrSchemeEval(prune, local, world)