class SubstrateProjector: """ this class takes as input a bipartite graph (typically obtained using a loader class, see KublaiLoader for example) and projects it on entities of one type projected paths e1 - f - e2 induce edges e1 - e2 weights on edges e1 - f, f - e2 combine and define weights on edges e1 - e2 the class requires that: - nodes of the original graph have an "id" attribute (string) - nodes of the original graph have a two-value "type" attribute (string) - edges have weights (positive real numbers) stored in a 'edgeWeight' property (double) used to sort out entities, one of these value is used to compute the projected graph edges of the resulting graph moreover will hold an attribute ";" concatenating id's of all entities f leading to edge e1 - e2 (under a property names name catalystTypeName) caution: the graph should have been obtained form whatever convenient loader class and then needs to be cloned -- the code should operate on the clone graph (new entities will be added to he original graph) """ def __init__(self, analysisGraph, substrateTypeName="substrate", catalystTypeName="catalyst"): self.superGraph = analysisGraph self.bipartiteGraph = analysisGraph.addCloneSubGraph() self.bipartiteGraph.setName("bipartiteGraph") self.substrateGraph = None self.substrateTypeName = substrateTypeName self.catalystTypeName = catalystTypeName self.type = self.bipartiteGraph.getStringProperty("type") self.ids = self.bipartiteGraph.getStringProperty("rcmnId") self.weights = self.bipartiteGraph.getDoubleProperty("edgeWeight") self.graphHandler = GraphHandler() def substrateProjection(self): selected = self.bipartiteGraph.getBooleanProperty("selected") selected.setAllNodeValue(False) for n in self.bipartiteGraph.getNodes(): if self.type[n] == self.substrateTypeName: selected[n] = True self.substrateGraph = self.superGraph.addSubGraph(selected) self.substrateGraph.setName(self.substrateTypeName + "Projection") weights = self.substrateGraph.getDoubleProperty("edgeWeight") catalystIds = self.substrateGraph.getStringProperty("rcmnId") for s1 in self.substrateGraph.getNodes(): for s2 in self.substrateGraph.getNodes(): if (not s1.id == s2.id) and (self.graphHandler.findEdge(s1, s2, self.substrateGraph, False) == None): catalystSet = self.graphHandler.commonNeighbors(s1, s2, self.bipartiteGraph) if len(catalystSet) > 0: e = self.substrateGraph.addEdge(s1, s2) catalystIds.setEdgeValue(e, self.__catalystListValue__(catalystSet)) weights[e] = self.__scalarProduct__(s1, s2, catalystSet) def __scalarProduct__(self, s1, s2, catalystSet): prod = 0.0 for c in catalystSet: e1 = self.graphHandler.findEdge(c, s1, self.bipartiteGraph, False) e2 = self.graphHandler.findEdge(c, s2, self.bipartiteGraph, False) prod += self.weights[e1] * self.weights[e2] return prod def __catalystListValue__(self, catalystSet): cIds = [] for c in catalystSet: cIds.append(self.ids.getNodeValue(c)) return ";".join(cIds)
class CatalystProjector: ''' this class takes as input a bipartite graph and the associated (projected) substrate graph and projects it on entities of one type the class requires that: - nodes of the original graph have an "id" attribute (string) - nodes of the original graph have a two-value "type" attribute (string) - edges have weights (positive real numbers) stored in a 'edgeWeight' property (double) used to sort out entities, one of these value is used to compute the projected graph edges of the resulting graph moreover will hold an attribute ";" concatenating id's of all entities f leading to edge e1 - e2 (under a property names name catalystTypeName) ''' def __init__(self, bipartiteGraph, substrateGraph, substrateTypeName = 'substrate', catalystTypeName = 'catalyst'): self.bipartiteGraph = bipartiteGraph self.superGraph = self.bipartiteGraph.getSuperGraph() self.substrateGraph = substrateGraph self.catalystGraph = None self.substrateTypeName = substrateTypeName self.catalystTypeName = catalystTypeName self.type = self.bipartiteGraph.getStringProperty('type') self.ids = self.bipartiteGraph.getStringProperty('rcmnId') self.weights = self.bipartiteGraph.getDoubleProperty('edgeWeight') self.graphHandler = GraphHandler() def catalystProjection(self): ''' create catalyst subgraph, insert all necessary nodes ''' selected = self.bipartiteGraph.getBooleanProperty('selected') selected.setAllNodeValue(False) for n in self.bipartiteGraph.getNodes(): if self.type[n] == self.catalystTypeName: selected[n] = True self.catalystGraph = self.superGraph.addSubGraph(selected) self.catalystGraph.setName(self.catalystTypeName + 'Projection') ''' assign weights to catalyst nodes ''' catalystIds = self.catalystGraph.getStringProperty('rcmnId') catalystWeights = self.catalystGraph.getDoubleProperty('edgeWeight') substrateIds = self.substrateGraph.getStringProperty('rcmnId') substrateWeights = self.substrateGraph.getDoubleProperty('edgeWeight') for e in self.substrateGraph.getEdges(): catalystList = substrateIds[e].split(';') for id in catalystList: n = self.graphHandler.findNodeById(id, self.catalystGraph, catalystIds, False) catalystIds[n] = id catalystWeights[n] += substrateWeights[e] ''' scan catalyst (as attributes of edges in substrate subgraph) and accordingly instantiate edges in catalyst subgraph assign weights to catalyst edges ''' for e in self.substrateGraph.getEdges(): catalystList = substrateIds[e].split(';') for i in range(len(catalystList)): for j in range(i + 1, len(catalystList)): n1 = self.graphHandler.findNodeById(catalystList[i], self.catalystGraph, catalystIds, False) n2 = self.graphHandler.findNodeById(catalystList[j], self.catalystGraph, catalystIds, False) f = self.graphHandler.findEdge(n1, n2, self.catalystGraph, True) catalystWeights[f] += substrateWeights[e]
class KublaiLoader: def __init__(self, graph, fileName): self.graph = graph self.fileName = fileName self.type = self.graph.getStringProperty('type') self.ids = self.graph.getStringProperty('rcmnId') self.weights = self.graph.getDoubleProperty('edgeWeight') self.substrate = 'group' self.catalyst = 'member' self.graphHandler = GraphHandler() def processFile(self): ''' builds a bipartite graph with edges connecting substrates (documents/grups) to catalysts (terms/members) -- substrates interact through catalysts todo/wishlist: would need to process time data as well (post dates) ''' self.graph.clear() f = open(self.fileName, "r") obj = json.loads(f.read()) topics = [t for t in obj] for t in topics: self.__processTopic__(t) mh = MetricHandler(self.graph) mh.rescale(self.weights, [1.0, 10.0], 'edges') def __processTopic__(self, topic): idContributor = topic["contributorName"].encode('UTF-8') if "groupId" not in topic.keys(): return None idGroup = topic["groupId"].encode('UTF-8') content = topic["description"].encode('UTF-8') nContrib = self.graphHandler.findNodeById(idContributor, self.graph, self.ids, True) nGroup = self.graphHandler.findNodeById(idGroup, self.graph, self.ids, True) e = self.graphHandler.findEdge(nContrib, nGroup, self.graph) self.weights[e] += len(content) if "comments" in topic.keys(): for c in topic["comments"]: if self.__processComment__(c, idGroup, nGroup) == None: print c def __processComment__(self, comment, idGroup, nodeGroup): idContributor = comment["contributorName"].encode('UTF-8') if comment["description"] == None: print idGroup print idContributor return None content = comment["description"].encode('UTF-8') nContrib = self.graphHandler.findNodeById(idContributor, self.graph, self.ids, True) e = self.graphHandler.findEdge(nContrib, nodeGroup, self.graph, True) self.weights[e] += len(content) return True