def convert_word_digits(string_value: str): """ Replace all digits in ``text`` str with ``replace_with`` str, i.e., 123.34 to 000.00 """ t2d = text2digits.Text2Digits() return t2d.convert(string_value)
def validateDosageValue(result_dict, ehrc_df): try: drugName = result_dict.get('medicine_name').lower() extracted_dosage_value = result_dict.get('dosage_value', "") if extracted_dosage_value != "": if not (is_number(extracted_dosage_value)): t2d = text2digits.Text2Digits() extracted_dosage_value = t2d.convert( extracted_dosage_value).strip() extracted_dosage_suffix = result_dict.get('dosage_suffix', "") dosage_value = extracted_dosage_value.lower().replace( " ", "").strip() + extracted_dosage_suffix.lower().replace( " ", "").strip() ehrc_df['MOLECULE_NAME'] = ehrc_df['MOLECULE_NAME'].str.lower() if dosage_value != "" and sum(ehrc_df["MOLECULE_NAME"].astype( "str").str.contains(drugName)) > 0: correct_dosage_values = ehrc_df.loc[ ehrc_df['MOLECULE_NAME'] == drugName.lower(), 'DOSAGE'].iloc[0].split(',') correct_dosage_v_list = [ i.replace(" ", "").strip() for i in correct_dosage_values ] if dosage_value in correct_dosage_v_list: return True else: return False elif sum(ehrc_df["MOLECULE_NAME"].astype("str").str.contains( drugName)) == 0: print("Molecule Not Found in Master >> " + drugName) return "Molecule Not Found in Master" else: return "Exception" # print("Something went wrong", result_dict) else: # print("Dosage Value Missing") return "Dosage Value Missing" except Exception as e: print(e) return "Exception"
def guessDosageDurationValue(pres_text, entity_frame, entity_index, guess_type="dosage", replace_text="xxxDosageValuexxx"): try: guessed_dosage = False if guess_type == "dosage": prev_entity = "xxxDrugNamexxx" current_entity_index = 1 index_cnt = 2 else: prev_entity = "xxxTIMINGxxx" current_entity_index = 4 replace_text = "xxxDurationValuexxx" index_cnt = 5 if entity_index[current_entity_index] == -1: #finding closes neighbors # Left Neighbor should be drug name or 0 if pres_text.find(prev_entity) != -1: left_start = pres_text.find(prev_entity) + len(prev_entity) #Right Neighbor should be found by walking till one reaches a found entity while entity_index[index_cnt] == -1: index_cnt += 1 entity = entity_frame[index_cnt] right_end = pres_text.find(entity) bound_text = pres_text[left_start:right_end] bound_text = bound_text.strip() #start removing separators from left and right ptr = 0 while ptr < len(bound_text) and bound_text[ptr] in [ "/", ",", ";", "-" ] and len(bound_text) >= 0: ptr += 1 if ptr < len(bound_text): bound_text = bound_text[ptr:] elif ptr >= len(bound_text): bound_text = "" ptr = len(bound_text) - 1 while len(bound_text) > 0 and ptr < len( bound_text) and bound_text[ptr] in ["/", ",", ";", "-"]: if ptr < len(bound_text) and ptr != 0: bound_text = bound_text[:ptr] ptr -= 1 elif ptr == 0: bound_text = "" ptr -= 1 #double check if it is a dosage value or not # print("BOUND ", bound_text, " len" , len(bound_text)) if len(bound_text) > 0 and is_number(bound_text): guessed_dosage = True pres_text = pres_text[0:pres_text.find( bound_text)] + replace_text + pres_text[right_end:] return pres_text, bound_text, guessed_dosage elif len(bound_text) > 0: t2di = text2digits.Text2Digits() extracted_value = t2di.convert(bound_text).strip() if is_number(extracted_value): guessed_dosage = True pres_text = pres_text[0:pres_text.find( bound_text)] + replace_text + pres_text[right_end:] return pres_text, bound_text, guessed_dosage else: return pres_text, "Guessed Value May Not be a Dosage/Duration", False else: return pres_text, "No need to guess, it is already there.", False except Exception as ee: print(ee) track = traceback.format_exc() print(track) return pres_text, "Some exception occurred", False
def getData(): url = request.args.get('url') # From Newspaper Framework getting required data content = Article(url) content.download() content.parse() title = content.title rawText = content.text # Unformatted Data to show to user textDisplay = rawText.split("\n\n") textDisplay = ''.join(textDisplay) # Converting numbered text to digits t2d = text2digits.Text2Digits() numText = t2d.convert(rawText) text = numText.split("\n\n") text = ''.join(text) # Implemented API data limit restriction if len(text) < 5000: text = text else: text = text[:5000] jsonData = {"text": text} configDataResource = os.path.join(SITE_ROOT, "data", "configdata.json") configData = json.load(open(configDataResource)) # NER API call request headers = { 'x-api-key': configData["X_API_KEY"], 'Content-type': 'application/json' } ner_response = requests.post( configData["NAMED_ENTITY_RECOGNITION_ENDPOINT"], headers=headers, data=json.dumps(jsonData)) # print(ner_response.text) # Deserializing the response places = lambda: None places.__dict__ = json.loads(ner_response.text) print(places.LOC) json_url = os.path.join(SITE_ROOT, "data", "sg-citi.json") data = json.load(open(json_url)) nlp = spacy.load("en_core_web_sm") doc = nlp(text) LOC = [] CASE = [] for ent in doc.ents: print(ent.text, ent.start_char, ent.end_char, ent.label_) if ent.label_ == "CARDINAL": CASE.append(ent.text) if ent.label_ == "GPE": LOC.append(ent.text) count = [] for i in CASE: if i.isdigit(): if i not in count: count.append(i) print("COUNT: ", count) if not len(count): count = list(i for i in range(80, 500, 7)) returnJson = { "text": textDisplay, "location": [], "category": ner_response.text } for i in places.LOC: for citi in data: if i in citi["name"] and citi["name"] not in returnJson["location"]: returnJson["location"].append({ "name": citi["name"], "lat": "no1", "lon": "no2", "count": count[random.randrange(0, len(count))] }) break print(returnJson) return jsonify(returnJson)
def oblicz(sentence): from text2digits import text2digits from googletrans import Translator from infix import shift_infix as infix #tłumaczy podane zdanie translator = Translator() translated = translator.translate(sentence).text @infix def percent(a, b): return((a * b)/100) #zamienia przetłumaczone słowa opisujące liczby na liczby int t2d = text2digits.Text2Digits() converted = t2d.convert(translated) #zamienia słowa typu plus na operatory typu + def strToOperator(word): if word =="by" or word =="to" or word == "of" or word == "power" or word == "into" or word == "out" or word == "calculate" or word == "do" or word=="yourself" or word =="and" or word =="enbis" or word =="nbis": word = " " elif word == "times" or word == "multiply": word = "*" elif word == "minus": word = "-" elif word == "add" or word == "plus": word = "+" elif word =="divided" or word == "divide": word = "/" elif word == "the": word = "**" elif word=="percent": word = "<<percent>>" return(word) #rozdziela przekonwertowany wyraz converted na liste, i szuka w niej operatorów, jeśli je znajdzie to zamienia je na odpowiedni znak. Tworzy nową listę toBeGlued - z niej powstanie wyrażenie do obliczenia toBeGlued = [] glue = " " splitted = str(converted).split() for word in splitted: try: operator = strToOperator(word) toBeGlued.append(operator) except: toBeGlued.append(word) #to skleja naszą listę toBeGlued w jednolite wyrażenie matematyczne for x in toBeGlued: if x == " ": toBeGlued.remove(x) else: pass if toBeGlued[0] == "+" or toBeGlued[0] == "*" or toBeGlued[0] == "/": test = toBeGlued[0] toBeGlued[0] = toBeGlued[1] toBeGlued[1] = test else: pass joined = glue.join(toBeGlued) try: return(print(eval(str(joined)))) #ta linia printuje wynik jeżeli jest git except: #a ten kod poniżej jeśli jest błąd z procentami splitted = joined.split() new = [] newest = [] final = [] glue2 = " " for word in splitted: try: converted = int(word) new.append(converted) except: new.append(word) for n, i in enumerate(new): if i == "<<percent>>": new[n-1] = new[n-1] * 0.01 new[n] = " " else: pass for x in new: try: y = str(x) newest.append(y) except: newest.append(x) joined = glue.join(newest) return(print(eval(joined)))
def recuperarOrden(fraseGrabada, traductor): # Se traduce y tokeniza la frase para obtener solo las palabras con valor fraseFinal, fraseTraducida = escucharOrdenes(fraseGrabada, traductor) # print(fraseFinal) # Palabras que detecta + frases de respuesta para esas palabras clave # Dichas palabras no coinciden y sirven para identificar distintas ordenes LEER_LIBRO = ("read", "study", "research", "studying", "scan") LEER_LIBRO_OK = [ "Cusha oreha", "Vamos a leer!", "Empecemos.", "Prepárate, que empezamos.", "Escucha bien.", "Ponte el cinturón, que llegan curvas" ] NO_ENTENDIDO = [ "Lo siento, no te he entendido", "No te he entendido bien", "Vuelve a repetir, por fa", "¿Puedes repetirlo otra vez?", "Repítemelo otra vez, por favor", ] PAGINA_ATRAS = ("back", "come", "return", "behind") PAGINA_ATRAS_OK = [ "Okey jefe, voy para atrás", "Entendido", "Volvemos atrás", "Okey", "Sí", "Estupendo" ] N_PAGINA_ADELANTE = ("advance", "get", "move", "proceed", "forward", "ahead") N_PAGINA_ADELANTE_OK = [ "Okey, vamos adelante", "Avanzando", "Vamos", "Entendido" ] N_PAGINAS_ATRAS = ("back", "behind", "return", "behind") N_PAGINAS_ATRAS_OK = [ "Entendido", "Vamos hacia detrás", "Okey, retrocedemos", ] APAGAR = ("close", "down", "night", "evening", "goodbye", "bye") APAGAR_OK = [ "Nos vemos la próxima vez", "Hasta la vista, beiby", "Adiós", "bai bai" ] print("") print("Frase traducida :", fraseTraducida) print("Frase final :", fraseFinal) # Se inicializa el detector de numeros detectorNumeros = t2d.Text2Digits() # Se usa el detector de numeros para transformar los numeros de letras en digitos (two -> 2) fraseNumerada = detectorNumeros.convert(fraseTraducida) # Se inicializan variables relevantes para detectar cierto tipos de ordenes numPaginas = 0 #print("Frase Digitalizada :", fraseNumerada) # Comprobamos que la frase tenga o no numeros para elegir un set de ordenes u otras tieneDigitos = any(map(str.isdigit, fraseNumerada)) orden = gv.UNKNOWN ''' Si algunas de las palabras de la frase contiene una palabra clave devuelve una orden. El orden de los if's es importante porque hay palabras mas relevantes que otras Ordenes listadas: - 0: No se entendio (default) - 1*: Parar (acción implementada dentro de otra función) - 2*: Ir una pagina atras (acción absorbida por Ir X paginas hacia detras) - 3: Leer el libro - 4: Ir X paginas hacia delante - 5: Ir X paginas hacia detras - 6: Apagar ''' fraseRespuesta = 'Lo siento, no lo he entendido. ¿Me lo puedes repetir?' if not tieneDigitos: for palabra in fraseFinal: #print("palabra leyendo :", word[0].lower()) if palabra[gv.FIRST].lower() in PAGINA_ATRAS: fraseRespuesta = random.choice(PAGINA_ATRAS_OK) orden = gv.BACK_PAGE numPaginas = 1 break elif palabra[gv.FIRST].lower() in APAGAR: fraseRespuesta = random.choice(APAGAR_OK) orden = gv.TURN_DOWN break elif palabra[gv.FIRST].lower() in LEER_LIBRO: fraseRespuesta = random.choice(LEER_LIBRO_OK) orden = gv.READ_BOOK break else: orden = gv.UNKNOWN else: # Si la frase contiene un digito, se estima automaticamente que es una orden de avanzar # o retroceder X numero de paginas for palabra in fraseFinal: if palabra[gv.FIRST].lower() in N_PAGINA_ADELANTE: fraseRespuesta = random.choice(N_PAGINA_ADELANTE_OK) orden = gv.ADVANCE_N_PAGES break elif palabra[gv.FIRST].lower() in N_PAGINAS_ATRAS: fraseRespuesta = random.choice(N_PAGINAS_ATRAS_OK) orden = gv.BACK_N_PAGES break #default else: orden = gv.UNKNOWN # Si no ha encontrado ninguna coincidencia, devuelve error if (orden == gv.UNKNOWN): fraseRespuesta = random.choice(NO_ENTENDIDO) # Si la orden es >= 5 es una orden de tipo "mover X numero de paginas" elif orden >= gv.ADVANCE_N_PAGES: for palabra in fraseNumerada: if str.isdigit(palabra): numPaginas = int(palabra) #print("valor de read: ", readOrder) return orden, numPaginas, fraseRespuesta