Exemple #1
0
	def createTGraph(self, totalsize, pgdb, display=False, node_attr={
	  "style": ("filled", "filled"),
	  "fillcolor":("firebrick1", "limegreen"),
	  "shape":("ellipse","doublecircle"),
	}):
		"""
		Creates a graph representing this producer as a tgraph.

		@param totalsize: total number of documents in the entire world
		@param pgdb: an open database of {prid:Producer} (for tgraphs)
		@param display: whether to generate for display (adds attributes to
		       pretty up the graph)
		@param node_attr: {attr:(tag,prod)} node attributes for graphviz; each
		       attribute should be mapped to a (tag,prod) pair that holds the
		       attribute value for the respective type of node; this only has
		       an effect if <display> is True
		       an effect if <display> is True
		"""

		# estimate total size from producer's own perspective
		# the formula is pulled out of my ass but should give passable results
		# - neighbours are not independent => total lower than this
		# - neighbours are not entire network => total higher than this
		total = union_ind(chain([self.size()], (pgdb[self.docgr.vs[pid]["id"]].size() for pid in self.prange())), totalsize)
		# print "producer %s (%s): total size of network estimated to be %s (actual %s)" % (self.nsid, self.size(), total, totalsize)

		gg = self.docgr.copy()
		del gg.vs[NAA]
		gg["base_t"] = 0
		gg["base_g"] = self.base_p - self.base_t

		# node-attrs for prange
		gg.vs[self.base_p:][NAT] = [pgdb[gg.vs[pid][NID]].size()/float(total) for pid in self.prange()]

		# infer arcs between tags
		mem = [filter(lambda id: id in self.drange(), gg.successors(tid)) for tid in self.trange()]
		edges, arc_a = infer_arcs(mem, total)

		gg.delete_vertices(self.drange())
		gg.add_edges(edges)
		#assert gg.es[-len(edges):][AAT] == [None] * len(edges)
		gg.es[-len(edges):][AAT] = arc_a

		if display:
			gg.vs["label"] = gg.vs[NID]
			del gg.vs[NID]
			for attr, val in node_attr.iteritems():
				gg.vs[attr] = [val[0] for i in self.drange()] + [val[1] for i in self.trange()] + [val[2] for i in self.prange()]

		return gg
Exemple #2
0
	def generateTGraphs(self):
		"""
		DOCUMENT
		"""
		name = "tgraphs"

		tot_s = len(self.comm)
		id_p = dict(("%04d" % i, i) for i in xrange(0, tot_s))

		# generate docsets for new producers
		def run_p(nsid):
			prod = Producer(nsid)
			prod.initContent(set(chain(*(self.pddb[self.prodgr.vs[p][NID]] for p in self.comm[id_p[nsid]]))), self.dtdb, True)
			prod.inferScores()
			prod.repTag(cover=0) # TWEAK
			self.pgdb[nsid] = prod
			self.pgsb[nsid] = prod.state
		exec_unique(id_p, self.pgsb, run_p, None, "%s db: producers" % name, LOG.info)

		tot_p = len(self.prodgr.vs)
		edges, arc_a = infer_arcs(self.comm, tot_p, ratio=2*log(1+tot_p)) # TWEAK # relax for tgraphs
		self.sprdgr = Graph(tot_s, list(edges), directed=True,
		  vertex_attrs={NID:list("%04d" % i for i in xrange(0, tot_s)), "label":[len(com) for com in self.comm]})
		g = self.sprdgr
		LOG.info("%s db: generated producer graph" % name)

		# generate content arcs between producers
		def run_r(nsid):
			prod = self.pgdb[nsid]
			if prod.state != P_ARC:
				rprod = g.vs.select(g.successors(id_p[nsid]))[NID]
				pmap = dict((rnsid, ProducerRelation(None,
				  *self.inferProdArc(prod, self.pgdb[rnsid], show_tag=True))) for rnsid in rprod)
				prod.initProdArcs(pmap, has_tags=True)
				self.pgdb[nsid] = prod
			self.pgsb[nsid] = prod.state
		exec_unique(self.pgdb.iterkeys(), lambda nsid: self.pgsb[nsid] >= P_ARC, run_r, None,
		  "%s db: relations" % name, LOG.info, steps=0x10000)