Example #1
0
	def by_f1_score(self, map):
		"""
		@return: a sorted list of {rtag:f1score}, for related tags.
		"""
		return list(sort_v(((k, f1_score(len(self.docs), tot, ix)) for k, (ix, tot) in map.iteritems()), reverse=True))
Example #2
0
	def by_intersect(self, map):
		"""
		@return: a sorted list of {rtag:intersection}, for related tags.
		"""
		return list(sort_v(((k, ix) for k, (ix, tot) in map.iteritems()), reverse=True))
Example #3
0
	def by_precision(self, map):
		"""
		@return: a sorted list of {rtag:(precision,rtotal)} for related tags,
		         where precision = intersect/rtag.total
		"""
		return list(sort_v(((k, (float(ix)/tot, tot)) for k, (ix, tot) in map.iteritems()), reverse=True))
Example #4
0
	def by_recall(self, map):
		"""
		@return: a sorted list of {rtag:(recall,rtotal)}, for related tags,
		         where recall = intersect/tag.total
		"""
		return list(sort_v(((k, (float(ix)/len(self.docs), tot)) for k, (ix, tot) in map.iteritems()), reverse=True))
Example #5
0
	def evaluateScheme(self, scheme):
		"""
		Evaluate the given address scheme against the perfect address scheme
		from the complete data of the world.
		"""
		prune = scheme.copy()
		prune.delete_vertices(v.index for v in prune.vs.select(lambda vx: vx[NAA] is None))

		ss = NodeSample()
		for tag in prune.vs[NID]:
			ss.add_node(self.getTagInfo(tag).build_node())

		MAX = float("inf")
		def dist(arc, graph):
			return -log(arc[AAT]*graph.vs[arc.target][NAT]/graph.vs[arc.source][NAT])
		def nattr(dist):
			return exp(-dist)

		# build address scheme of input tags
		local = ss.build(complete=False)
		assert None not in local.vs[NAT]
		local.es[AAT_AD] = [dist(arc, local) for arc in local.es]
		path = local.shortest_paths(0, weights=AAT_AD)[0]
		graph_prune_arcs(local, [k for k,v in sort_v(enumerate(path))])
		local.vs[NAA] = [nattr(d) for d in path]

		# build address scheme of n tags from world data, where n = len(input tags)
		# OPT LOW this rebuilds the entire graph each time, not optimal, but means
		# we can just use already-existing implementation of dijkstra from igraph
		sw = NodeSample()
		tinfo = self.getTagInfo(prune.vs[0][NID])
		sw.add_node(tinfo.build_node())
		visit = set([0]) # visited nodes, sw vids
		trail = [(0, 0.0)] # trail of visited nodes, ss vids
		for i in xrange(0, len(prune.vs)-1): # n-1 because root already added
			for rtag in tinfo.rtag.iterkeys():
				if rtag not in sw:
					sw.add_node(self.getTagInfo(rtag).build_node())
			world = sw.build(complete=False)
			world.es[AAT_AD] = [dist(arc, world) for arc in world.es]
			path = world.shortest_paths(0, weights=AAT_AD)[0]

			# get next tag in world addr scheme
			npath = [MAX if i in visit else v for i, v in enumerate(path)]
			d = min(npath)
			index = npath.index(d)
			tinfo = self.getTagInfo(world.vs[index][NID])
			visit.add(index)
			if tinfo.tag not in ss:
				trail.append((len(ss), d))
				ss.add_node(tinfo.build_node())
			else:
				trail.append((prune.vs[NID].index(tinfo.tag),d)) # OPT LOW

		world = ss.build()
		assert len(trail) == len(local.vs)
		graph_prune_arcs(world, [vid for vid, dist in trail])
		trail = dict(trail)
		world.vs[NAA] = [nattr(trail[i]) if i in trail else None for i in xrange(0, len(world.vs))]

		return AddrSchemeEval(prune, local, world)