def concatenate_candidates_grampal(graph, nodes, text): """Get the multiwords from the top nodes of the graph using spacy as service. Args: graph (`igraph`): Graph to be analyse.\n nodes (:obj:`list`): The list of top nodes.\n text (:obj:`str`): Text of origin. Returns: nodes (:obj:`list`): The list of the multiwords. """ if graph is None: print("Error graph: Empty graph") return None if nodes is None: print("Error nodes: Top nodes is empty") return None if text is None: print("Error text: The length of the text must be len > 0") return None ginstance = Grampal() pos = 0 control = 0 multiword = OrderedDict() for sentence in text.sents: response = ginstance.analiza(sentence.text) if response.status_code != 200: continue lines = response.text.splitlines() for i in range(len(lines)): control = 0 if lines[i] != "": if i < len(lines)-1: for node in nodes: if pos in graph.vs[node]["pos"]: if control == 1: break for node2 in nodes: if pos+1 in graph.vs[node2]["pos"]: lema_aux = graph.vs[node]["lema"]+" "+graph.vs[node2]["lema"] multiword[lema_aux] = graph.vs[node]["value"]+graph.vs[node2]["value"] multiword[lema_aux] = graph.vs[node]["occur"]+graph.vs[node2]["occur"] multiword[lema_aux] = graph.vs[node]["fr"]+graph.vs[node2]["fr"] multiword[lema_aux] = (graph.vs[node]["orig"], graph.vs[node2]["orig"]) multiword[lema_aux] = (graph.vs[node]["syntactic"], graph.vs[node2]["syntactic"]) multiword[lema_aux] = pos control = 1 break if lines[i+1] != "" and control == 0: if ginstance.info_lemma(lines[i+1]).lower() in CONNECTORS: if i < len(lines)-2: for node2 in nodes: if pos+2 in graph.vs[node2]["pos"]: lema_aux = graph.vs[node]["lema"]+" "+ginstance.info_lemma(lines[i+1]).lower()+" "+graph.vs[node2]["lema"] multiword[lema_aux] = graph.vs[node]["value"]+graph.vs[node2]["value"] multiword[lema_aux] = graph.vs[node]["occur"]+graph.vs[node2]["occur"] multiword[lema_aux] = graph.vs[node]["fr"]+graph.vs[node2]["fr"] multiword[lema_aux] = (graph.vs[node]["orig"], graph.vs[node2]["orig"]) multiword[lema_aux] = (graph.vs[node]["syntactic"], graph.vs[node2]["syntactic"]) multiword[lema_aux] = pos control = 1 pos += 1 return multiword
def create_graph_grampal(text, k=2): """Create a graph with the keywords and their links using grampal as service. Args: text (:obj:`str`): The text of origin. \n k (:obj:`int`): The correlation value ,by default = 2. Returns: graph (`igraph`): The graph generated. """ if text is None: print("Error text: The text cannot be void") return None if k <= 0: print("Error k: The correlation value has to be > 0") return -1 graph = igraph.Graph() ginstance = Grampal() counter = 0 values = [] values2 = [] values3 = [] uniq = OrderedDict() uniq2 = OrderedDict() uniq3 = OrderedDict() for sentence in text.sents: response = ginstance.analiza(sentence.text) if response.status_code != 200: continue lines = response.text.splitlines() for i in range(len(lines)): if lines[i] != "": if ginstance.info_syntactic(lines[i])in SYNTACTIC_GROUP: if ginstance.info_lemma(lines[i]) != "UNKN" and ginstance.info_lemma(lines[i]) not in EXCLUSIONS: values.append((ginstance.info_lemma(lines[i]), counter)) values2.append((ginstance.info_lemma(lines[i]), ginstance.info_orig(lines[i]))) values3.append((ginstance.info_lemma(lines[i]), ginstance.info_syntactic(lines[i]))) counter += 1 for node in values: uniq[node[0]] = node[1] for node in values2: uniq2[node[0]] = node[1] for node in values3: uniq3[node[0]] = node[1] for key, value in uniq.items(): graph.add_vertices(1) graph.vs[graph.vcount()-1]["lema"] = key graph.vs[graph.vcount()-1]["pos"] = value graph.vs[graph.vcount()-1]["orig"] = uniq2.get(key) graph.vs[graph.vcount()-1]["occur"] = len(value) graph.vs[graph.vcount()-1]["fr"] = round(len(value)/len(uniq), 4) graph.vs[graph.vcount()-1]["syntactic"] = uniq3.get(key)[0] l_aux = list(uniq.keys()) for counter in range(0, len(values)): for i in range(1, k+1): if counter +i < len(values): current = values[counter][0] jump = values[counter+i][0] if current != jump: try: if graph.get_eid(l_aux.index(current), l_aux.index(jump)): pass except: graph.add_edge(l_aux.index(current), l_aux.index(jump)) return graph
def test_info_orig_none(self): ginstance = Grampal() self.assertEqual(ginstance.info_orig(None), None, "El texto a analizar no puede estar a None")
def test_info_orig_estandar(self): ginstance = Grampal() respuesta = ginstance.analiza("Soy") self.assertEqual(ginstance.info_orig(respuesta.text), "Soy", "El texto a analizar no puede estar a None")
def test_info_orig_empty(self): ginstance = Grampal() self.assertEqual(ginstance.info_orig(""), None, "El texto a analizar no puede estar vacío")
def test_analiza_none(self): ginstance = Grampal() respuesta = ginstance.analiza(None) self.assertEqual(respuesta, None, "El paramatro texto no puede estar a None")
def test_analiza_empty(self): ginstance = Grampal() respuesta = ginstance.analiza("") self.assertEqual(respuesta, None, "El texto a analizar no puede estar vacío")
def test_analiza_servidor(self): ginstance = Grampal('http://leptis.lllf.uam.es/api/soyunaprueba') respuesta = ginstance.analiza("Soy una prueba") self.assertEqual(respuesta.status_code, 404, "No hay respuesta del servidor")
def test_analiza_estandar(self): ginstance = Grampal() respuesta = ginstance.analiza("Soy una prueba") self.assertEqual(respuesta.status_code, 200, "Funcionamiento estandar")