Esempio n. 1
0
def concatenate_candidates_grampal(graph, nodes, text):
	"""Get the multiwords from the top nodes of the graph using spacy as service.

	Args:
		graph (`igraph`): Graph to be analyse.\n
		nodes (:obj:`list`): The list of top nodes.\n
		text (:obj:`str`): Text of origin.

	Returns:
		nodes (:obj:`list`): The list of the multiwords.

	"""
	if graph is None:
		print("Error graph: Empty graph")
		return None
	if nodes is None:
		print("Error nodes: Top nodes is empty")
		return None
	if text is None:
		print("Error text: The length of the text must be len > 0")
		return None
	ginstance = Grampal()
	pos = 0
	control = 0
	multiword = OrderedDict()

	for sentence in text.sents:
		response = ginstance.analiza(sentence.text)
		if response.status_code != 200:
			continue
		lines = response.text.splitlines()
		for i in range(len(lines)):
			control = 0
			if lines[i] != "":
				if i < len(lines)-1:
					for node in nodes:
						if pos in graph.vs[node]["pos"]:
							if control == 1:
								break
							for node2 in nodes:
								if pos+1 in graph.vs[node2]["pos"]:
									lema_aux = graph.vs[node]["lema"]+" "+graph.vs[node2]["lema"]
									multiword[lema_aux] = graph.vs[node]["value"]+graph.vs[node2]["value"]
									multiword[lema_aux] = graph.vs[node]["occur"]+graph.vs[node2]["occur"]
									multiword[lema_aux] = graph.vs[node]["fr"]+graph.vs[node2]["fr"]
									multiword[lema_aux] = (graph.vs[node]["orig"], graph.vs[node2]["orig"])
									multiword[lema_aux] = (graph.vs[node]["syntactic"], graph.vs[node2]["syntactic"])
									multiword[lema_aux] = pos
									control = 1
									break
							if lines[i+1] != "" and control == 0:
								if ginstance.info_lemma(lines[i+1]).lower() in CONNECTORS:
									if i < len(lines)-2:
										for node2 in nodes:
											if pos+2 in graph.vs[node2]["pos"]:
												lema_aux = graph.vs[node]["lema"]+" "+ginstance.info_lemma(lines[i+1]).lower()+" "+graph.vs[node2]["lema"]
												multiword[lema_aux] = graph.vs[node]["value"]+graph.vs[node2]["value"]
												multiword[lema_aux] = graph.vs[node]["occur"]+graph.vs[node2]["occur"]
												multiword[lema_aux] = graph.vs[node]["fr"]+graph.vs[node2]["fr"]
												multiword[lema_aux] = (graph.vs[node]["orig"], graph.vs[node2]["orig"])
												multiword[lema_aux] = (graph.vs[node]["syntactic"], graph.vs[node2]["syntactic"])
												multiword[lema_aux] = pos
												control = 1
				pos += 1
	return multiword
Esempio n. 2
0
def create_graph_grampal(text, k=2):
	"""Create a graph with the keywords and their links using grampal as service.

	Args:
		text (:obj:`str`): The text of origin. \n
		k (:obj:`int`): The correlation value ,by default = 2.

	Returns:
		graph (`igraph`): The graph generated.

	"""
	if text is None:
		print("Error text: The text cannot be void")
		return None
	if k <= 0:
		print("Error k: The correlation value has to be > 0")
		return -1
	graph = igraph.Graph()
	ginstance = Grampal()
	counter = 0
	values = []
	values2 = []
	values3 = []
	uniq = OrderedDict()
	uniq2 = OrderedDict()
	uniq3 = OrderedDict()
	for sentence in text.sents:
		response = ginstance.analiza(sentence.text)
		if response.status_code != 200:
			continue
		lines = response.text.splitlines()
		for i in range(len(lines)):
			if lines[i] != "":
				if ginstance.info_syntactic(lines[i])in SYNTACTIC_GROUP:
					if ginstance.info_lemma(lines[i]) != "UNKN" and ginstance.info_lemma(lines[i]) not in EXCLUSIONS:
						values.append((ginstance.info_lemma(lines[i]), counter))
						values2.append((ginstance.info_lemma(lines[i]), ginstance.info_orig(lines[i])))
						values3.append((ginstance.info_lemma(lines[i]), ginstance.info_syntactic(lines[i])))
				counter += 1

	for node in values:
		uniq[node[0]] = node[1]
	for node in values2:
		uniq2[node[0]] = node[1]
	for node in values3:
		uniq3[node[0]] = node[1]
	for key, value in uniq.items():
		graph.add_vertices(1)
		graph.vs[graph.vcount()-1]["lema"] = key
		graph.vs[graph.vcount()-1]["pos"] = value
		graph.vs[graph.vcount()-1]["orig"] = uniq2.get(key)
		graph.vs[graph.vcount()-1]["occur"] = len(value)
		graph.vs[graph.vcount()-1]["fr"] = round(len(value)/len(uniq), 4)
		graph.vs[graph.vcount()-1]["syntactic"] = uniq3.get(key)[0]
	l_aux = list(uniq.keys())
	for counter in range(0, len(values)):
		for i in range(1, k+1):
			if counter +i < len(values):
				current = values[counter][0]
				jump = values[counter+i][0]
				if current != jump:
					try:
						if graph.get_eid(l_aux.index(current), l_aux.index(jump)):
							pass
					except:
						graph.add_edge(l_aux.index(current), l_aux.index(jump))
	return graph
Esempio n. 3
0
	def test_info_orig_none(self):
		ginstance = Grampal()
		self.assertEqual(ginstance.info_orig(None), None, "El texto a analizar no puede estar a None")
Esempio n. 4
0
	def test_info_orig_estandar(self):
		ginstance = Grampal()
		respuesta = ginstance.analiza("Soy")
		self.assertEqual(ginstance.info_orig(respuesta.text), "Soy", "El texto a analizar no puede estar a None")
Esempio n. 5
0
	def test_info_orig_empty(self):
		ginstance = Grampal()
		self.assertEqual(ginstance.info_orig(""), None, "El texto a analizar no puede estar vacío")
Esempio n. 6
0
	def test_analiza_none(self):
		ginstance = Grampal()
		respuesta = ginstance.analiza(None)
		self.assertEqual(respuesta, None, "El paramatro texto no puede estar a None")
Esempio n. 7
0
	def test_analiza_empty(self):
		ginstance = Grampal()
		respuesta = ginstance.analiza("")
		self.assertEqual(respuesta, None, "El texto a analizar no puede estar vacío")
Esempio n. 8
0
	def test_analiza_servidor(self):
		ginstance = Grampal('http://leptis.lllf.uam.es/api/soyunaprueba')
		respuesta = ginstance.analiza("Soy una prueba")
		self.assertEqual(respuesta.status_code, 404, "No hay respuesta del servidor")
Esempio n. 9
0
	def test_analiza_estandar(self):
		ginstance = Grampal()
		respuesta = ginstance.analiza("Soy una prueba")
		self.assertEqual(respuesta.status_code, 200, "Funcionamiento estandar")