def by_f1_score(self, map): """ @return: a sorted list of {rtag:f1score}, for related tags. """ return list(sort_v(((k, f1_score(len(self.docs), tot, ix)) for k, (ix, tot) in map.iteritems()), reverse=True))
def by_intersect(self, map): """ @return: a sorted list of {rtag:intersection}, for related tags. """ return list(sort_v(((k, ix) for k, (ix, tot) in map.iteritems()), reverse=True))
def by_precision(self, map): """ @return: a sorted list of {rtag:(precision,rtotal)} for related tags, where precision = intersect/rtag.total """ return list(sort_v(((k, (float(ix)/tot, tot)) for k, (ix, tot) in map.iteritems()), reverse=True))
def by_recall(self, map): """ @return: a sorted list of {rtag:(recall,rtotal)}, for related tags, where recall = intersect/tag.total """ return list(sort_v(((k, (float(ix)/len(self.docs), tot)) for k, (ix, tot) in map.iteritems()), reverse=True))
def evaluateScheme(self, scheme): """ Evaluate the given address scheme against the perfect address scheme from the complete data of the world. """ prune = scheme.copy() prune.delete_vertices(v.index for v in prune.vs.select(lambda vx: vx[NAA] is None)) ss = NodeSample() for tag in prune.vs[NID]: ss.add_node(self.getTagInfo(tag).build_node()) MAX = float("inf") def dist(arc, graph): return -log(arc[AAT]*graph.vs[arc.target][NAT]/graph.vs[arc.source][NAT]) def nattr(dist): return exp(-dist) # build address scheme of input tags local = ss.build(complete=False) assert None not in local.vs[NAT] local.es[AAT_AD] = [dist(arc, local) for arc in local.es] path = local.shortest_paths(0, weights=AAT_AD)[0] graph_prune_arcs(local, [k for k,v in sort_v(enumerate(path))]) local.vs[NAA] = [nattr(d) for d in path] # build address scheme of n tags from world data, where n = len(input tags) # OPT LOW this rebuilds the entire graph each time, not optimal, but means # we can just use already-existing implementation of dijkstra from igraph sw = NodeSample() tinfo = self.getTagInfo(prune.vs[0][NID]) sw.add_node(tinfo.build_node()) visit = set([0]) # visited nodes, sw vids trail = [(0, 0.0)] # trail of visited nodes, ss vids for i in xrange(0, len(prune.vs)-1): # n-1 because root already added for rtag in tinfo.rtag.iterkeys(): if rtag not in sw: sw.add_node(self.getTagInfo(rtag).build_node()) world = sw.build(complete=False) world.es[AAT_AD] = [dist(arc, world) for arc in world.es] path = world.shortest_paths(0, weights=AAT_AD)[0] # get next tag in world addr scheme npath = [MAX if i in visit else v for i, v in enumerate(path)] d = min(npath) index = npath.index(d) tinfo = self.getTagInfo(world.vs[index][NID]) visit.add(index) if tinfo.tag not in ss: trail.append((len(ss), d)) ss.add_node(tinfo.build_node()) else: trail.append((prune.vs[NID].index(tinfo.tag),d)) # OPT LOW world = ss.build() assert len(trail) == len(local.vs) graph_prune_arcs(world, [vid for vid, dist in trail]) trail = dict(trail) world.vs[NAA] = [nattr(trail[i]) if i in trail else None for i in xrange(0, len(world.vs))] return AddrSchemeEval(prune, local, world)