def createTGraph(self, totalsize, pgdb, display=False, node_attr={ "style": ("filled", "filled"), "fillcolor":("firebrick1", "limegreen"), "shape":("ellipse","doublecircle"), }): """ Creates a graph representing this producer as a tgraph. @param totalsize: total number of documents in the entire world @param pgdb: an open database of {prid:Producer} (for tgraphs) @param display: whether to generate for display (adds attributes to pretty up the graph) @param node_attr: {attr:(tag,prod)} node attributes for graphviz; each attribute should be mapped to a (tag,prod) pair that holds the attribute value for the respective type of node; this only has an effect if <display> is True an effect if <display> is True """ # estimate total size from producer's own perspective # the formula is pulled out of my ass but should give passable results # - neighbours are not independent => total lower than this # - neighbours are not entire network => total higher than this total = union_ind(chain([self.size()], (pgdb[self.docgr.vs[pid]["id"]].size() for pid in self.prange())), totalsize) # print "producer %s (%s): total size of network estimated to be %s (actual %s)" % (self.nsid, self.size(), total, totalsize) gg = self.docgr.copy() del gg.vs[NAA] gg["base_t"] = 0 gg["base_g"] = self.base_p - self.base_t # node-attrs for prange gg.vs[self.base_p:][NAT] = [pgdb[gg.vs[pid][NID]].size()/float(total) for pid in self.prange()] # infer arcs between tags mem = [filter(lambda id: id in self.drange(), gg.successors(tid)) for tid in self.trange()] edges, arc_a = infer_arcs(mem, total) gg.delete_vertices(self.drange()) gg.add_edges(edges) #assert gg.es[-len(edges):][AAT] == [None] * len(edges) gg.es[-len(edges):][AAT] = arc_a if display: gg.vs["label"] = gg.vs[NID] del gg.vs[NID] for attr, val in node_attr.iteritems(): gg.vs[attr] = [val[0] for i in self.drange()] + [val[1] for i in self.trange()] + [val[2] for i in self.prange()] return gg
def generateTGraphs(self): """ DOCUMENT """ name = "tgraphs" tot_s = len(self.comm) id_p = dict(("%04d" % i, i) for i in xrange(0, tot_s)) # generate docsets for new producers def run_p(nsid): prod = Producer(nsid) prod.initContent(set(chain(*(self.pddb[self.prodgr.vs[p][NID]] for p in self.comm[id_p[nsid]]))), self.dtdb, True) prod.inferScores() prod.repTag(cover=0) # TWEAK self.pgdb[nsid] = prod self.pgsb[nsid] = prod.state exec_unique(id_p, self.pgsb, run_p, None, "%s db: producers" % name, LOG.info) tot_p = len(self.prodgr.vs) edges, arc_a = infer_arcs(self.comm, tot_p, ratio=2*log(1+tot_p)) # TWEAK # relax for tgraphs self.sprdgr = Graph(tot_s, list(edges), directed=True, vertex_attrs={NID:list("%04d" % i for i in xrange(0, tot_s)), "label":[len(com) for com in self.comm]}) g = self.sprdgr LOG.info("%s db: generated producer graph" % name) # generate content arcs between producers def run_r(nsid): prod = self.pgdb[nsid] if prod.state != P_ARC: rprod = g.vs.select(g.successors(id_p[nsid]))[NID] pmap = dict((rnsid, ProducerRelation(None, *self.inferProdArc(prod, self.pgdb[rnsid], show_tag=True))) for rnsid in rprod) prod.initProdArcs(pmap, has_tags=True) self.pgdb[nsid] = prod self.pgsb[nsid] = prod.state exec_unique(self.pgdb.iterkeys(), lambda nsid: self.pgsb[nsid] >= P_ARC, run_r, None, "%s db: relations" % name, LOG.info, steps=0x10000)