예제 #1
0
class SubstrateProjector:
    """
	this class takes as input a bipartite graph
	(typically obtained using a loader class, see KublaiLoader for example)
	and projects it on entities of one type
	projected paths e1 - f - e2 induce edges e1 - e2
	weights on edges e1 - f, f - e2 combine and define
	weights on edges e1 - e2
	
	the class requires that:
	- nodes of the original graph have an "id" attribute (string)
	- nodes of the original graph have a two-value "type" attribute (string)
	- edges have weights (positive real numbers) stored in a 'edgeWeight' property (double)
	
	used to sort out entities, one of these value is used to compute the projected graph
	edges of the resulting graph moreover  will hold an attribute ";" concatenating id's of all
	entities f leading to edge e1 - e2 (under a property names name catalystTypeName)
	
	caution: the graph should have been obtained form whatever convenient loader class
	and then needs to be cloned -- the code should operate on the clone graph (new
	entities will be added to he original graph)
	"""

    def __init__(self, analysisGraph, substrateTypeName="substrate", catalystTypeName="catalyst"):

        self.superGraph = analysisGraph
        self.bipartiteGraph = analysisGraph.addCloneSubGraph()
        self.bipartiteGraph.setName("bipartiteGraph")
        self.substrateGraph = None
        self.substrateTypeName = substrateTypeName
        self.catalystTypeName = catalystTypeName
        self.type = self.bipartiteGraph.getStringProperty("type")
        self.ids = self.bipartiteGraph.getStringProperty("rcmnId")
        self.weights = self.bipartiteGraph.getDoubleProperty("edgeWeight")

        self.graphHandler = GraphHandler()

    def substrateProjection(self):
        selected = self.bipartiteGraph.getBooleanProperty("selected")
        selected.setAllNodeValue(False)
        for n in self.bipartiteGraph.getNodes():
            if self.type[n] == self.substrateTypeName:
                selected[n] = True
        self.substrateGraph = self.superGraph.addSubGraph(selected)
        self.substrateGraph.setName(self.substrateTypeName + "Projection")

        weights = self.substrateGraph.getDoubleProperty("edgeWeight")
        catalystIds = self.substrateGraph.getStringProperty("rcmnId")
        for s1 in self.substrateGraph.getNodes():
            for s2 in self.substrateGraph.getNodes():
                if (not s1.id == s2.id) and (self.graphHandler.findEdge(s1, s2, self.substrateGraph, False) == None):
                    catalystSet = self.graphHandler.commonNeighbors(s1, s2, self.bipartiteGraph)
                    if len(catalystSet) > 0:
                        e = self.substrateGraph.addEdge(s1, s2)
                        catalystIds.setEdgeValue(e, self.__catalystListValue__(catalystSet))
                        weights[e] = self.__scalarProduct__(s1, s2, catalystSet)

    def __scalarProduct__(self, s1, s2, catalystSet):
        prod = 0.0
        for c in catalystSet:
            e1 = self.graphHandler.findEdge(c, s1, self.bipartiteGraph, False)
            e2 = self.graphHandler.findEdge(c, s2, self.bipartiteGraph, False)
            prod += self.weights[e1] * self.weights[e2]
        return prod

    def __catalystListValue__(self, catalystSet):
        cIds = []
        for c in catalystSet:
            cIds.append(self.ids.getNodeValue(c))
        return ";".join(cIds)
예제 #2
0
class CatalystProjector:
	'''
	this class takes as input a bipartite graph
	and the associated (projected) substrate graph
	and projects it on entities of one type
	
	the class requires that:
	- nodes of the original graph have an "id" attribute (string)
	- nodes of the original graph have a two-value "type" attribute (string)
	- edges have weights (positive real numbers) stored in a 'edgeWeight' property (double)
	
	used to sort out entities, one of these value is used to compute the projected graph
	edges of the resulting graph moreover  will hold an attribute ";" concatenating id's of all
	entities f leading to edge e1 - e2 (under a property names name catalystTypeName)
	'''
	
	def __init__(self, bipartiteGraph, substrateGraph, substrateTypeName = 'substrate', catalystTypeName = 'catalyst'):
		
		self.bipartiteGraph = bipartiteGraph
		self.superGraph = self.bipartiteGraph.getSuperGraph()
		self.substrateGraph = substrateGraph
		self.catalystGraph = None
		self.substrateTypeName = substrateTypeName
		self.catalystTypeName = catalystTypeName
		self.type = self.bipartiteGraph.getStringProperty('type')
		self.ids = self.bipartiteGraph.getStringProperty('rcmnId')
		self.weights = self.bipartiteGraph.getDoubleProperty('edgeWeight')
		self.graphHandler = GraphHandler()
		
	def catalystProjection(self):
		'''
		create catalyst subgraph, insert all necessary nodes
		'''
		selected = self.bipartiteGraph.getBooleanProperty('selected')
		selected.setAllNodeValue(False)
		for n in self.bipartiteGraph.getNodes():
			if self.type[n] == self.catalystTypeName:
				selected[n] = True
		self.catalystGraph = self.superGraph.addSubGraph(selected)
		self.catalystGraph.setName(self.catalystTypeName + 'Projection')

		'''
		assign weights to catalyst nodes
		'''
		catalystIds = self.catalystGraph.getStringProperty('rcmnId')
		catalystWeights = self.catalystGraph.getDoubleProperty('edgeWeight')
		substrateIds = self.substrateGraph.getStringProperty('rcmnId')
		substrateWeights = self.substrateGraph.getDoubleProperty('edgeWeight')
		for e in self.substrateGraph.getEdges():
			catalystList = substrateIds[e].split(';')
			for id in catalystList:
				n = self.graphHandler.findNodeById(id, self.catalystGraph, catalystIds, False)
				catalystIds[n] = id
				catalystWeights[n] += substrateWeights[e]

		'''
		scan catalyst (as attributes of edges in substrate subgraph)
		and accordingly instantiate edges in catalyst subgraph
		assign weights to catalyst edges
		'''
		for e in self.substrateGraph.getEdges():
			catalystList = substrateIds[e].split(';')
			for i in range(len(catalystList)):
				for j in range(i + 1, len(catalystList)):
					n1 = self.graphHandler.findNodeById(catalystList[i], self.catalystGraph, catalystIds, False)
					n2 = self.graphHandler.findNodeById(catalystList[j], self.catalystGraph, catalystIds, False)
					f = self.graphHandler.findEdge(n1, n2, self.catalystGraph, True)
					catalystWeights[f] += substrateWeights[e]
예제 #3
0
class KublaiLoader:
	
	def __init__(self, graph, fileName):
		
		self.graph = graph
		self.fileName = fileName
		self.type = self.graph.getStringProperty('type')
		self.ids = self.graph.getStringProperty('rcmnId')
		self.weights = self.graph.getDoubleProperty('edgeWeight')
		self.substrate = 'group'
		self.catalyst = 'member'
		self.graphHandler = GraphHandler()
						
	def processFile(self):
		'''
		builds a bipartite graph with edges connecting substrates (documents/grups)
		to catalysts (terms/members) -- substrates interact through catalysts
		
		todo/wishlist: would need to process time data as well (post dates)
		'''
		self.graph.clear()
		
		f = open(self.fileName, "r")
		obj = json.loads(f.read())
		topics = [t for t in obj]
		
		for t in topics:
			self.__processTopic__(t)
		
		mh = MetricHandler(self.graph)
		mh.rescale(self.weights, [1.0, 10.0], 'edges')
		
	def __processTopic__(self, topic):
		idContributor = topic["contributorName"].encode('UTF-8')
		if "groupId" not in topic.keys():
			return None
		idGroup = topic["groupId"].encode('UTF-8')
		content = topic["description"].encode('UTF-8')
		
		nContrib = self.graphHandler.findNodeById(idContributor, self.graph, self.ids, True)
		nGroup = self.graphHandler.findNodeById(idGroup, self.graph, self.ids, True)
		e = self.graphHandler.findEdge(nContrib, nGroup, self.graph)
		self.weights[e] += len(content)			
				
		if "comments" in topic.keys():
			for c in topic["comments"]:
				if self.__processComment__(c, idGroup, nGroup) == None:
					print c
			
	def __processComment__(self, comment, idGroup, nodeGroup):
		idContributor = comment["contributorName"].encode('UTF-8')
		if comment["description"] == None:
			print idGroup
			print idContributor
			return None
		content = comment["description"].encode('UTF-8')

		nContrib = self.graphHandler.findNodeById(idContributor, self.graph, self.ids, True)
		e = self.graphHandler.findEdge(nContrib, nodeGroup, self.graph, True)
		self.weights[e] += len(content)
		return True