def annotate_timex(text, date, lang): f = open('texto.txt', 'w') f.write(text) textanotador2 = '' start_time = time() url = 'https://annotador.oeg.fi.upm.es/annotate' params = "{\"inputText\":\"" + text + "\",\"inputDate\":\"\",\"domain\":\"legal\",\"lan\":\"" + lang + "\",\"format\":\"timex3\"}" headers = {'Content-Type': 'application/json;charset=utf-8'} #response=requests.post(url, data=params) response = requests.request("POST", url, headers=headers, data=params.encode('utf8')) textanotador = response.text print('ENTRA ANOTADOR') print(textanotador) code = response.status_code list_anotador = textanotador.split('|') print(list_anotador) deletes = [] cont = 0 for i in list_anotador: if ('<' in i and len(i) > 2): cont = cont + 1 deletes.append(i) ind = list_anotador.index(i) list_anotador.pop(ind) for i in list_anotador: if ('<' in i and len(i) > 2): print(i) cont = cont + 1 deletes.append(i) ind = list_anotador.index(i) list_anotador.pop(ind) anotador = [] for i in list_anotador: anotador.append(i.strip().replace(',', '')) if (code != 200): print( 'WARNING: Annotador is down. Temporal expressions could not be removed.' ) anotador = text.split('| ') conts_log.error( 'Annotador is down. Temporal expressions could not be removed.', code) else: elapsed_time = time() - start_time txt = 'AÑOTADOR, DELETE (' + str(cont) + ') NEW LIST SIZE: (' + str( len(anotador)) + ') TIME: (' + str(elapsed_time) + ')' joind = ', '.join(deletes) print('AÑOTADOR DELETE', cont, len(anotador), elapsed_time) conts_log.information(txt, 'TERMS REMOVED: ' + joind) return (anotador)
def clean_terms(termlist, lang_in): start_time=time() if(lang_in=="es"): stop=stopwords.words('spanish') file=open(sw_spanish, 'r', encoding='utf-8') mystop=file.readlines() elif(lang_in=="en"): stop=stopwords.words('english') file=open(sw_english, 'r', encoding='utf-8') mystop=file.readlines() clean_list = [] cont=0 for i in mystop: #print(i.strip()) stop.append(i.strip()) #print(stop) deletes=[] for i in termlist: k=i.strip(',.:') # print(k) if ((k.lower() in stop) or (k in stop)): deletes.append(k) elif ((k.lower() not in stop) or (k not in stop)): clean_list.append(k.replace(',', '').replace('-', '')) print(deletes) cont=len(termlist)-len(clean_list) elapsed_time=time()-start_time txt='CLEAN_TERMS, DELETE ('+str(cont)+') NEW LIST SIZE: ('+str(len(clean_list))+') TIME: ('+str(elapsed_time)+')' joind=', '.join(deletes) conts_log.information(txt, 'TERMS REMOVED: '+joind) print('CLEAN_TERMS, DELETE', cont, len(clean_list), elapsed_time ) return(clean_list)
def delete_numbers(list_): start_time=time() file=open('./data/numberlist_es', 'r', encoding='utf-8') read=file.readlines() cont=0 deletes=[] for i in read: if(i[-1:]=='\n'): i=i[:-1] for j in list_: if(' '+i+' ' in ' '+j+' ' ): deletes.append(j) ind=list_.index(j) cont=cont+1 list_.pop(ind) #list_.sort() elapsed_time=time()-start_time txt='NUMBERS, DELETE'+' ('+str(cont)+') NEW LIST SIZE: ('+str(len(list_))+') TIME: ('+str(elapsed_time)+')' joind=', '.join(deletes) print('NUMEROS DELETE', cont, len(list_), elapsed_time) conts_log.information(txt, 'TERMS REMOVED: '+joind) return(list_)
def quit_plural(valuelist): start_time=time() file=open('./data/numberlist_es', 'r', encoding='utf-8') read=file.readlines() plural=[] cont=0 for i in valuelist: ind=valuelist.index(i) term=i.replace(',', '').replace('-', ' ') valuelist[ind]=term plu='' if('es' in term[-2:] or 's' in term[-1:]): slp=term.split(' ') for n in read: if(n[:-1] in slp): plu=i if not len(plu): for j in slp: if( ('es' in j[-2:] ) and 't' not in j[-3:-2] and 'l' not in j[-3:-2] or ('les' in j[-3:] ) ): plu+=' '+j[:-2] if('on' in plu[-2:]): plu=' '+plu[:-2]+'ón' if('v' in plu[-1:]): plu=' '+plu+'e' if('bl' in plu[-2:]): plu=' '+plu+'e' if('br' in plu[-2:]): plu=' '+plu+'e' elif(('s' in j[-1:]) ): plu+=' '+j[:-1] pos=slp.index(j) if(pos>0): bef=slp[0] if('n' in bef[-1:] and 'ón' not in bef[-2:]): splb=plu.split(' ') firts=splb[1] if('n' not in firts[-1:]): pass else: plu0=firts[:-1] join1=' '.join(splb[2:]) plu=plu0+' '+join1 else: plu+=' '+j ind=valuelist.index(term) valuelist[ind]=plu.strip() cont=cont+1 quit_plu=[] nuevalista=set(valuelist) for i in nuevalista: quit_plu.append(i) deletes = [] new=[] for i in valuelist: if i not in new: new.append(i) else: deletes.append(i) #print('plurañes eliminadas ->', deletes) elapsed_time=time()-start_time txt='PLURAL, DELETE'+' ('+str(len(valuelist)-len(quit_plu))+') NEW LIST SIZE: ('+str(len(quit_plu))+') TIME: ('+str(elapsed_time)+')' joind=', '.join(deletes) print('PLURALES DELETE', len(valuelist)-len(quit_plu), len(quit_plu), elapsed_time) conts_log.information(txt, 'TERMS REMOVED: '+joind) return(quit_plu)
def delate_pattern(anotador): total=0 deletes=[] start_time=time() lemmas_list=[] cont=0 cont_inf=0 cont_post=0 for i in anotador: if(len(i)>1): #print( i, i.split(' ') ) lang='es' pos_tagger = CoreNLPParser('https://corenlp-tool.lynx-project.eu/?pipelineLanguage='+lang, tagtype='pos') #si se cae el de lynx, probar con este https://corenlp.run/ #print(i) tag=pos_tagger.tag(i.split(' ')) total=total+1 joini=i list_pos=[] spl=joini.split(' ') if(joini!=''): join_tag='' for t in tag: print(t) if(t[1] == 'MD' ): doc=nlp(t[0]) print(doc) lemlist=[tok.lemma_ for tok in doc] lem=''.join(lemlist) lemmas_list.append(lem) if(lem==i): lem=t[0] list_pos.append('aux--'+str(lem)) if(len(spl)==1): ind=anotador.index(str(i)) anotador[ind]=str(lem) if(t[1] == 'NNP'): list_pos.append('noun-'+str(t[0])) if(t[1][:1] == 'VB'): cont_inf=cont_inf+1 doc=nlp(t[0]) print(doc) for tok in doc: l=tok.lemma_ if(l!=t[0]): cont_post=cont_post+1 lemlist=[tok.lemma_ for tok in doc] lem=''.join(lemlist) lemmas_list.append(lem) if(lem==i): lem=t[0] list_pos.append('verb-'+str(lem)) if(len(spl)==1): ind=anotador.index(str(i)) anotador[ind]=str(lem) if(t[1] == 'RB'): list_pos.append('adv--'+str(t[0])) if(t[1] == 'JJ'): list_pos.append('adj--'+str(t[0])) if(t[1] == 'CC'): list_pos.append('sconj'+str(t[0])) spl_i=joini.split(' ') if(len(list_pos)==1): pos1=list_pos[0] if(pos1[0:4]=='adv-' ): term=pos1[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 elif(len(list_pos)==2 and len(spl_i)==2): pos1=list_pos[0] pos2=list_pos[1] term='' if(pos1[0:4]=='aux-' and pos2[0:4]=='verb'): term=pos1[5:]+' '+pos2[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='verb' and pos2[0:4]=='aux-'): term=pos1[5:]+' '+pos2[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='verb' and pos2[0:4]=='verb'): term=pos1[5:]+' '+pos2[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='noun' and pos2[0:4]=='verb'): term=pos1[5:]+' '+pos2[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='noun' and pos2[0:4]=='aux-'): term=pos1[5:]+' '+pos2[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='adv-' and pos2[0:4]=='adj-'): term=pos1[5:]+' '+pos2[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='adj-' and pos2[0:4]=='adv-'): term=pos1[5:]+' '+pos2[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='adv-' and pos2[0:4]=='aux-'): term=pos1[5:]+' '+pos2[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='aux-' and pos2[0:4]=='adv-'): term=pos1[5:]+' '+pos2[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='adv-' and pos2[0:4]=='verb'): term=pos1[5:]+' '+pos2[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='verb' and pos2[0:4]=='aux-'): term=pos1[5:]+' '+pos2[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='noun' and pos2[0:4]=='adv-'): term=pos1[5:]+' '+pos2[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='adv-' and pos2[0:4]=='noun'): term=pos1[5:]+' '+pos2[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='verb' and pos2[0:4]=='adv-'): term=pos1[5:]+' '+pos2[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='verb' and pos2[0:4]=='noun'): term=pos1[5:]+' '+pos2[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='aux-' and pos2[0:4]=='noun'): term=pos1[5:]+' '+pos2[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='adj-' and pos2[0:4]=='noun'): term=pos1[5:]+' '+pos2[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 elif(len(list_pos)==3 and len(spl_i)==3): #print(list_pos, spl_i,'-', len(list_pos), len(spl_i)) pos1=list_pos[0] pos2=list_pos[1] pos3=list_pos[2] term='' if(pos1[0:4]=='noun' and pos2[0:4]=='verb' and pos3[0:4]=='verb'): term=pos1[5:]+' '+pos2[5:]+' '+pos3[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='noun' and pos2[0:4]=='aux-' and pos3[0:4]=='verb'): term=pos1[5:]+' '+pos2[5:]+' '+pos3[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='noun' and pos2[0:4]=='aux-' and pos3[0:4]=='aux-'): term=pos1[5:]+' '+pos2[5:]+' '+pos3[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='noun' and pos2[0:4]=='verb' and pos3[0:4]=='aux-'): term=pos1[5:]+' '+pos2[5:]+' '+pos3[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='noun' and pos2[0:4]=='verb' and pos3[0:4]=='noun'): term=pos1[5:]+' '+pos2[5:]+' '+pos3[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='noun' and pos2[0:4]=='aux-' and pos3[0:4]=='noun'): term=pos1[5:]+' '+pos2[5:]+' '+pos3[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='verb' and pos2[0:4]=='noun' and pos3[0:4]=='noun'): term=pos1[5:]+' '+pos2[5:]+' '+pos3[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='noun' and pos2[0:4]=='noun' and pos3[0:4]=='verb'): term=pos1[5:]+' '+pos2[5:]+' '+pos3[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='aux-' and pos2[0:4]=='noun' and pos3[0:4]=='noun'): term=pos1[5:]+' '+pos2[5:]+' '+pos3[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='noun' and pos2[0:4]=='noun' and pos3[0:4]=='aux-'): term=pos1[5:]+' '+pos2[5:]+' '+pos3[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='aux-' and pos2[0:4]=='verb' and pos3[0:4]=='noun'): term=pos1[5:]+' '+pos2[5:]+' '+pos3[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='noun' and pos2[0:4]=='verb' and pos3[0:4]=='adj-'): term=pos1[5:]+' '+pos2[5:]+' '+pos3[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='noun' and pos2[0:4]=='verb' and pos3[0:4]=='noun' and joini in anotador): term=pos1[5:]+' '+pos2[5:]+' '+pos3[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='verb' and pos2[0:4]=='noun' and pos3[0:4]=='adj-' and joini in anotador): term=pos1[5:]+' '+pos2[5:]+' '+pos3[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='noun' and pos2[0:4]=='aux-' and pos3[0:4]=='adj-' and joini in anotador): term=pos1[5:]+' '+pos2[5:]+' '+pos3[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='noun' and pos2[0:4]=='adv-' and pos3[0:4]=='adj-' and joini in anotador): term=pos1[5:]+' '+pos2[5:]+' '+pos3[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='adj-' and pos2[0:4]=='adv-' and pos3[0:4]=='adj-' and joini in anotador): term=pos1[5:]+' '+pos2[5:]+' '+pos3[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='noun' and pos2[0:4]=='adv-' and pos3[0:4]=='scon' and joini in anotador): term=pos1[5:]+' '+pos2[5:]+' '+pos3[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='adj-' and pos2[0:4]=='scon' and pos3[0:4]=='adv-' and joini in anotador): term=pos1[5:]+' '+pos2[5:]+' '+pos3[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='aux-' and pos2[0:4]=='noun' and pos3[0:4]=='adj-' and joini in anotador): term=pos1[5:]+' '+pos2[5:]+' '+pos3[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='verb' and pos2[0:4]=='verb' and pos3[0:4]=='verb' and joini in anotador): term=pos1[5:]+' '+pos2[5:]+' '+pos3[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 if(pos1[0:4]=='adj-' and pos2[0:4]=='noun' and pos3[0:4]=='adj-' and joini in anotador): term=pos1[5:]+' '+pos2[5:]+' '+pos3[5:] deletes.append(joini) ind=anotador.index(joini) #anotador.pop(ind) cont=cont+1 for i in deletes: if(i in anotador): ind=anotador.index(i) anotador.pop(ind) elapsed_time=time()-start_time txt='PATRONES, DELETE'+' ('+str(cont)+') NEW LIST SIZE: ('+str(len(anotador))+') TIME: ('+str(elapsed_time)+')' joind=', '.join(deletes) print('PATRONES DELETE', cont, len(anotador), elapsed_time) conts_log.information(txt, 'TERMS REMOVED: '+joind) return(anotador)
def delete_pattern(anotador, pos_tagger): total = 0 deletes = [] start_time = time() lemmas_list = [] cont = 0 cont_inf = 0 cont_post = 0 for i in anotador: print('this is i') print(i) if (len(i) > 1): #print( i, i.split(' ') ) #pos_tagger = CoreNLPParser('https://corenlp.run/', tagtype='pos') #si se cae el de lynx, probar con este https://corenlp.run/ #print(i) doc = pos_tagger(i) #print(doc) sent = doc.sentences[0] word = sent.words tag = [] for token in word: pos = token.upos term = token.text tupla = (term, pos) tag.append(tupla) print(token.text) print(pos) #tag=pos_tagger.tag(i.split(' ')) print('this is tag ') print(tag) total = total + 1 joini = i list_pos = [] spl = joini.split(' ') if (joini != ''): join_tag = '' for t in tag: print('this is t') print(t) if (t[1] == 'AUX'): doc = nlp(t[0]) lemlist = [tok.lemma_ for tok in doc] lem = ''.join(lemlist) lemmas_list.append(lem) if (lem == i): lem = t[0] list_pos.append('aux--' + str(lem)) if (len(spl) == 1): ind = anotador.index(str(i)) anotador[ind] = str(lem) if (t[1] == 'NOUN'): list_pos.append('noun-' + str(t[0])) if (t[1] == 'VERB'): cont_inf = cont_inf + 1 doc = nlp(t[0]) for tok in doc: l = tok.lemma_ if (l != t[0]): cont_post = cont_post + 1 lemlist = [tok.lemma_ for tok in doc] lem = ''.join(lemlist) lemmas_list.append(lem) if (lem == i): lem = t[0] list_pos.append('verb-' + str(lem)) if (len(spl) == 1): ind = anotador.index(str(i)) anotador[ind] = str(lem) if (t[1] == 'ADV'): list_pos.append('adv--' + str(t[0])) if (t[1] == 'ADJ'): list_pos.append('adj--' + str(t[0])) if (t[1] == 'SCONJ'): list_pos.append('sconj' + str(t[0])) spl_i = joini.split(' ') if (len(list_pos) == 1): pos1 = list_pos[0] if (pos1[0:4] == 'adv-'): term = pos1[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 elif (len(list_pos) == 2 and len(spl_i) == 2): pos1 = list_pos[0] pos2 = list_pos[1] term = '' if (pos1[0:4] == 'aux-' and pos2[0:4] == 'verb'): term = pos1[5:] + ' ' + pos2[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'verb' and pos2[0:4] == 'aux-'): term = pos1[5:] + ' ' + pos2[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'verb' and pos2[0:4] == 'verb'): term = pos1[5:] + ' ' + pos2[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'noun' and pos2[0:4] == 'verb'): term = pos1[5:] + ' ' + pos2[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'noun' and pos2[0:4] == 'aux-'): term = pos1[5:] + ' ' + pos2[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'adv-' and pos2[0:4] == 'adj-'): term = pos1[5:] + ' ' + pos2[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'adj-' and pos2[0:4] == 'adv-'): term = pos1[5:] + ' ' + pos2[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'adv-' and pos2[0:4] == 'aux-'): term = pos1[5:] + ' ' + pos2[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'aux-' and pos2[0:4] == 'adv-'): term = pos1[5:] + ' ' + pos2[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'adv-' and pos2[0:4] == 'verb'): term = pos1[5:] + ' ' + pos2[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'verb' and pos2[0:4] == 'aux-'): term = pos1[5:] + ' ' + pos2[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'noun' and pos2[0:4] == 'adv-'): term = pos1[5:] + ' ' + pos2[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'adv-' and pos2[0:4] == 'noun'): term = pos1[5:] + ' ' + pos2[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'verb' and pos2[0:4] == 'adv-'): term = pos1[5:] + ' ' + pos2[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'verb' and pos2[0:4] == 'noun'): term = pos1[5:] + ' ' + pos2[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'aux-' and pos2[0:4] == 'noun'): term = pos1[5:] + ' ' + pos2[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'adj-' and pos2[0:4] == 'noun'): term = pos1[5:] + ' ' + pos2[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 elif (len(list_pos) == 3 and len(spl_i) == 3): #print(list_pos, spl_i,'-', len(list_pos), len(spl_i)) pos1 = list_pos[0] pos2 = list_pos[1] pos3 = list_pos[2] term = '' if (pos1[0:4] == 'noun' and pos2[0:4] == 'verb' and pos3[0:4] == 'verb'): term = pos1[5:] + ' ' + pos2[5:] + ' ' + pos3[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'noun' and pos2[0:4] == 'aux-' and pos3[0:4] == 'verb'): term = pos1[5:] + ' ' + pos2[5:] + ' ' + pos3[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'noun' and pos2[0:4] == 'aux-' and pos3[0:4] == 'aux-'): term = pos1[5:] + ' ' + pos2[5:] + ' ' + pos3[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'noun' and pos2[0:4] == 'verb' and pos3[0:4] == 'aux-'): term = pos1[5:] + ' ' + pos2[5:] + ' ' + pos3[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'noun' and pos2[0:4] == 'verb' and pos3[0:4] == 'noun'): term = pos1[5:] + ' ' + pos2[5:] + ' ' + pos3[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'noun' and pos2[0:4] == 'aux-' and pos3[0:4] == 'noun'): term = pos1[5:] + ' ' + pos2[5:] + ' ' + pos3[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'verb' and pos2[0:4] == 'noun' and pos3[0:4] == 'noun'): term = pos1[5:] + ' ' + pos2[5:] + ' ' + pos3[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'noun' and pos2[0:4] == 'noun' and pos3[0:4] == 'verb'): term = pos1[5:] + ' ' + pos2[5:] + ' ' + pos3[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'aux-' and pos2[0:4] == 'noun' and pos3[0:4] == 'noun'): term = pos1[5:] + ' ' + pos2[5:] + ' ' + pos3[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'noun' and pos2[0:4] == 'noun' and pos3[0:4] == 'aux-'): term = pos1[5:] + ' ' + pos2[5:] + ' ' + pos3[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'aux-' and pos2[0:4] == 'verb' and pos3[0:4] == 'noun'): term = pos1[5:] + ' ' + pos2[5:] + ' ' + pos3[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'noun' and pos2[0:4] == 'verb' and pos3[0:4] == 'adj-'): term = pos1[5:] + ' ' + pos2[5:] + ' ' + pos3[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'noun' and pos2[0:4] == 'verb' and pos3[0:4] == 'noun' and joini in anotador): term = pos1[5:] + ' ' + pos2[5:] + ' ' + pos3[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'verb' and pos2[0:4] == 'noun' and pos3[0:4] == 'adj-' and joini in anotador): term = pos1[5:] + ' ' + pos2[5:] + ' ' + pos3[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'noun' and pos2[0:4] == 'aux-' and pos3[0:4] == 'adj-' and joini in anotador): term = pos1[5:] + ' ' + pos2[5:] + ' ' + pos3[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'noun' and pos2[0:4] == 'adv-' and pos3[0:4] == 'adj-' and joini in anotador): term = pos1[5:] + ' ' + pos2[5:] + ' ' + pos3[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'adj-' and pos2[0:4] == 'adv-' and pos3[0:4] == 'adj-' and joini in anotador): term = pos1[5:] + ' ' + pos2[5:] + ' ' + pos3[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'noun' and pos2[0:4] == 'adv-' and pos3[0:4] == 'scon' and joini in anotador): term = pos1[5:] + ' ' + pos2[5:] + ' ' + pos3[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'adj-' and pos2[0:4] == 'scon' and pos3[0:4] == 'adv-' and joini in anotador): term = pos1[5:] + ' ' + pos2[5:] + ' ' + pos3[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'aux-' and pos2[0:4] == 'noun' and pos3[0:4] == 'adj-' and joini in anotador): term = pos1[5:] + ' ' + pos2[5:] + ' ' + pos3[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'verb' and pos2[0:4] == 'verb' and pos3[0:4] == 'verb' and joini in anotador): term = pos1[5:] + ' ' + pos2[5:] + ' ' + pos3[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 if (pos1[0:4] == 'adj-' and pos2[0:4] == 'noun' and pos3[0:4] == 'adj-' and joini in anotador): term = pos1[5:] + ' ' + pos2[5:] + ' ' + pos3[5:] deletes.append(joini) ind = anotador.index(joini) #anotador.pop(ind) cont = cont + 1 for i in deletes: if (i in anotador): ind = anotador.index(i) anotador.pop(ind) elapsed_time = time() - start_time txt = 'PATRONES, DELETE' + ' (' + str(cont) + ') NEW LIST SIZE: (' + str( len(anotador)) + ') TIME: (' + str(elapsed_time) + ')' joind = ', '.join(deletes) print('PATRONES DELETE', cont, len(anotador), elapsed_time) conts_log.information(txt, 'TERMS REMOVED: ' + joind) return (anotador)
def wsidFunction(termIn, listcontext, definitions): #print(termIn,'|', context.lower(),'|', definitions) start_time=time() conts_log.information('-----WSID----','') defiMax=str uri_max=str index_max=0 code=0 index_max_list=list() posDefs=list() pesos_max_list=list() uri_max_list=list() if(listcontext): cont=0 for s in definitions[0]: conts_log.information('Senses: '+s,'') #print('Senses: ',s) for context in listcontext: pesos=[] context=context.lower() conts_log.information('Context: '+context,'') termIn=termIn.lower() start=context.index(termIn) longTerm=len(termIn) end=context.index(termIn.lower())+longTerm listdef=definitions[0] listIde=definitions[1] #print('CONTEXT---',cont,context) #print('START---', start) #print('END---', end) #print('SENSES---',definitions[0]) #print('----Entrando WSDI----') auth_token = getToken() #print(auth_token) hed = { 'Authorization': 'Bearer ' + auth_token, 'accept': 'application/json', 'Content-Type': 'application/json' } url_lkgp_status='http://entity-linking-lynx.apps.cybly.cloud/disambiguate_demo?' params={'context': context, 'start_ind': start, 'end_ind': end, 'senses': definitions[0]} response = requests.post(url_lkgp_status,params=params,headers =hed) #response = requests.get('https://apim-88-staging.cloud.itandtel.at/api/entity-linking', params=params) code=response.status_code #code=200 #print('CODE WSID',code) #print('response', response) if(code!=200): conts_log.error('Wsid code: ', code) req = response.request command = "curl -X {method} -H {headers} -d '{data}' '{uri}'" method = req.method uri = req.url data = req.body headers = ['"{0}: {1}"'.format(k, v) for k, v in req.headers.items()] headers = " -H ".join(headers) #print(command.format(method=method, headers=headers, data=data, uri=uri)) try: pesos=response.json() #print(pesos) if(code==200): peso_max = max(pesos)#se obtiene el peso maximo #print('1. ', peso_max) index_max=pesos.index(peso_max)#se obtiene el indice del peso maximo #print('2. ', index_max) index_max_list.append(index_max)#lista con indices de pesos maximos #print('3. ', index_max_list) pesos_max_list.append(pesos[index_max])#lista con pesos maximos #print('4. ', pesos_max_list) if(len(listdef)): defiMax=listdef[index_max]#definicion maxima #print('5. ', defiMax) posDefs.append(defiMax)#lista con definiciones maximas #print('6. ', posDefs) if(len(listIde)): uri_max=listIde[index_max]#uri maximo uri_max_list.append(uri_max)#lista con uri maximas #print('7. ', uri_max) #print('8. ', uri_max_list) except json.decoder.JSONDecodeError: pass cont=cont+1 #print(index_max, defiMax, uri_max) max1=int valid=str valid_context=str if(len(index_max_list)): max1=max(index_max_list)#maximo de todos los pesos maximos index_max1=index_max_list.index(max1) valid=posDefs[index_max1] uri_max=uri_max_list[index_max1] #print('9. ', max1, index_max1, valid, uri_max) max2=max(pesos_max_list) index_max2=pesos_max_list.index(max2) #contx=pesos_max_list[max2] valid_context=listcontext[index_max2] #print('10. ', max2, index_max2, valid_context) #print('--------->',max1, valid, uri_max, valid_context) #print('Result context: '+str(valid_context), 'Result sense: '+str(valid)) conts_log.information('Result context: '+str(valid_context), 'Result sense: '+str(valid)) elapsed_time=time()-start_time conts_log.information('Time wsid: '+str(elapsed_time),'') conts_log.information('-------------','') return(valid, uri_max,code, valid_context)