예제 #1
0
class CatalystProjector:
	'''
	this class takes as input a bipartite graph
	and the associated (projected) substrate graph
	and projects it on entities of one type
	
	the class requires that:
	- nodes of the original graph have an "id" attribute (string)
	- nodes of the original graph have a two-value "type" attribute (string)
	- edges have weights (positive real numbers) stored in a 'edgeWeight' property (double)
	
	used to sort out entities, one of these value is used to compute the projected graph
	edges of the resulting graph moreover  will hold an attribute ";" concatenating id's of all
	entities f leading to edge e1 - e2 (under a property names name catalystTypeName)
	'''
	
	def __init__(self, bipartiteGraph, substrateGraph, substrateTypeName = 'substrate', catalystTypeName = 'catalyst'):
		
		self.bipartiteGraph = bipartiteGraph
		self.superGraph = self.bipartiteGraph.getSuperGraph()
		self.substrateGraph = substrateGraph
		self.catalystGraph = None
		self.substrateTypeName = substrateTypeName
		self.catalystTypeName = catalystTypeName
		self.type = self.bipartiteGraph.getStringProperty('type')
		self.ids = self.bipartiteGraph.getStringProperty('rcmnId')
		self.weights = self.bipartiteGraph.getDoubleProperty('edgeWeight')
		self.graphHandler = GraphHandler()
		
	def catalystProjection(self):
		'''
		create catalyst subgraph, insert all necessary nodes
		'''
		selected = self.bipartiteGraph.getBooleanProperty('selected')
		selected.setAllNodeValue(False)
		for n in self.bipartiteGraph.getNodes():
			if self.type[n] == self.catalystTypeName:
				selected[n] = True
		self.catalystGraph = self.superGraph.addSubGraph(selected)
		self.catalystGraph.setName(self.catalystTypeName + 'Projection')

		'''
		assign weights to catalyst nodes
		'''
		catalystIds = self.catalystGraph.getStringProperty('rcmnId')
		catalystWeights = self.catalystGraph.getDoubleProperty('edgeWeight')
		substrateIds = self.substrateGraph.getStringProperty('rcmnId')
		substrateWeights = self.substrateGraph.getDoubleProperty('edgeWeight')
		for e in self.substrateGraph.getEdges():
			catalystList = substrateIds[e].split(';')
			for id in catalystList:
				n = self.graphHandler.findNodeById(id, self.catalystGraph, catalystIds, False)
				catalystIds[n] = id
				catalystWeights[n] += substrateWeights[e]

		'''
		scan catalyst (as attributes of edges in substrate subgraph)
		and accordingly instantiate edges in catalyst subgraph
		assign weights to catalyst edges
		'''
		for e in self.substrateGraph.getEdges():
			catalystList = substrateIds[e].split(';')
			for i in range(len(catalystList)):
				for j in range(i + 1, len(catalystList)):
					n1 = self.graphHandler.findNodeById(catalystList[i], self.catalystGraph, catalystIds, False)
					n2 = self.graphHandler.findNodeById(catalystList[j], self.catalystGraph, catalystIds, False)
					f = self.graphHandler.findEdge(n1, n2, self.catalystGraph, True)
					catalystWeights[f] += substrateWeights[e]
예제 #2
0
class KublaiLoader:
	
	def __init__(self, graph, fileName):
		
		self.graph = graph
		self.fileName = fileName
		self.type = self.graph.getStringProperty('type')
		self.ids = self.graph.getStringProperty('rcmnId')
		self.weights = self.graph.getDoubleProperty('edgeWeight')
		self.substrate = 'group'
		self.catalyst = 'member'
		self.graphHandler = GraphHandler()
						
	def processFile(self):
		'''
		builds a bipartite graph with edges connecting substrates (documents/grups)
		to catalysts (terms/members) -- substrates interact through catalysts
		
		todo/wishlist: would need to process time data as well (post dates)
		'''
		self.graph.clear()
		
		f = open(self.fileName, "r")
		obj = json.loads(f.read())
		topics = [t for t in obj]
		
		for t in topics:
			self.__processTopic__(t)
		
		mh = MetricHandler(self.graph)
		mh.rescale(self.weights, [1.0, 10.0], 'edges')
		
	def __processTopic__(self, topic):
		idContributor = topic["contributorName"].encode('UTF-8')
		if "groupId" not in topic.keys():
			return None
		idGroup = topic["groupId"].encode('UTF-8')
		content = topic["description"].encode('UTF-8')
		
		nContrib = self.graphHandler.findNodeById(idContributor, self.graph, self.ids, True)
		nGroup = self.graphHandler.findNodeById(idGroup, self.graph, self.ids, True)
		e = self.graphHandler.findEdge(nContrib, nGroup, self.graph)
		self.weights[e] += len(content)			
				
		if "comments" in topic.keys():
			for c in topic["comments"]:
				if self.__processComment__(c, idGroup, nGroup) == None:
					print c
			
	def __processComment__(self, comment, idGroup, nodeGroup):
		idContributor = comment["contributorName"].encode('UTF-8')
		if comment["description"] == None:
			print idGroup
			print idContributor
			return None
		content = comment["description"].encode('UTF-8')

		nContrib = self.graphHandler.findNodeById(idContributor, self.graph, self.ids, True)
		e = self.graphHandler.findEdge(nContrib, nodeGroup, self.graph, True)
		self.weights[e] += len(content)
		return True