Beispiel #1
0
	def __init__(self, db_host, db_port, db_name):
		self.mongodbAPI = MongodbAPI(db_host, db_port, db_name)
		doc = Document()
		self.doc = doc
		gexf = doc.createElement("gexf")
		doc.appendChild(gexf)
		
		meta = doc.createElement("meta")
		gexf.appendChild(meta)
		
		graph = doc.createElement("graph")
		graph.setAttribute("mode","static")
		graph.setAttribute("defaultedgetype","directed")
		gexf.appendChild(graph)
		
		nodes = doc.createElement("nodes")
		graph.appendChild(nodes)

		edges = doc.createElement("edges")
		graph.appendChild(edges)
		
		for p in self.mongodbAPI.find_pages():
			#print(p)
			node = self.create_node_from_page(p)
			nodes.appendChild(node)
			for edge in self.create_edges_from_page(p):
				edges.appendChild(edge)

		self.doc = doc
Beispiel #2
0
class Extractor:
	"""
	récupère ce qu'il y a dans la db et génère un gexf
	"""
	def __init__(self, db_host, db_port, db_name):
		self.mongodbAPI = MongodbAPI(db_host, db_port, db_name)
		doc = Document()
		self.doc = doc
		gexf = doc.createElement("gexf")
		doc.appendChild(gexf)
		
		meta = doc.createElement("meta")
		gexf.appendChild(meta)
		
		graph = doc.createElement("graph")
		graph.setAttribute("mode","static")
		graph.setAttribute("defaultedgetype","directed")
		gexf.appendChild(graph)
		
		nodes = doc.createElement("nodes")
		graph.appendChild(nodes)

		edges = doc.createElement("edges")
		graph.appendChild(edges)
		
		for p in self.mongodbAPI.find_pages():
			#print(p)
			node = self.create_node_from_page(p)
			nodes.appendChild(node)
			for edge in self.create_edges_from_page(p):
				edges.appendChild(edge)

		self.doc = doc

	def create_node_from_page(self, page):
		i = self.url_to_node_id(page["_url"])
		node = self.doc.createElement("node")
		node.setAttribute("id", i)
		node.setAttribute("label", i)
		return node

	def create_edges_from_page(self, page):
		edges = []
		id_source = self.url_to_node_id(page["_url"])
		for link in page["links"]:
			id_target = self.url_to_node_id(link)
			edge = self.doc.createElement("edge")
			edge.setAttribute("id", id_source+"_to_"+id_target)
			edge.setAttribute("source", id_source)
			edge.setAttribute("target", id_target)
			edges.append(edge)
		return edges

	def url_to_node_id(self, url):
		return url

	def to_xml(self):
		return self.doc.toprettyxml(indent="\t")