def export_time_single(st_arr, st=None, force_return=False): not_st = False if not st: st = cleaning("".join(st_arr).replace("-", ":").replace("/", ":").replace("و", ":").replace(",", ":")) not_st = True # try hh:mm format mtch = re.findall("\d+[:]\d+", st) if mtch: t = mtch[0].split(":") try: hour = int(t[0]) minute = int(t[1]) if hour > 24: temp = minute minute = hour hour = temp return datetime.time(fix_hour_ampm(" ".join(st_arr), hour), minute) except Exception: pass if not_st: st = cleaning(" ".join(st_arr)) hour, minute = hour_min_exporter(st, force_return=force_return) if hour == None: # hour = datetime.datetime.now().hour return None if minute == None: # minute = datetime.datetime.now().minute return None return datetime.time(hour, minute)
def convertStr2num(st): st = cleaning(st) st_arr = np.array(st.split(" ")) st_arr = st_arr[np.where(st_arr != "و")] thousand = np.where(st_arr == num_to_perstr[1000])[0] million = np.where(st_arr == num_to_perstr[1000000])[0] billion = np.where(st_arr == num_to_perstr[1000000000])[0] num = 0 if thousand.shape[0] != 0 or million.shape[0] != 0 or billion.shape[0] != 0: if billion.shape[0] != 0: if billion[0] == 0: num += 1000000000 else: num += convertStr2num(" ".join( st_arr[:billion[0]])) * 1000000000 if not st_arr[-1] == num_to_perstr[1000000000]: num += convertStr2num(" ".join(st_arr[billion[0] + 1:])) elif million.shape[0] != 0: if million[0] == 0: num += 1000000 else: num += convertStr2num(" ".join(st_arr[:million[0]])) * 1000000 if not st_arr[-1] == num_to_perstr[1000000]: num += convertStr2num(" ".join(st_arr[million[0] + 1:])) else: if thousand[0] == 0: num += 1000 else: num += convertStr2num(" ".join(st_arr[:thousand[0]])) * 1000 if not st_arr[-1] == num_to_perstr[1000]: num += convertStr2num(" ".join(st_arr[thousand[0] + 1:])) else: for s in st_arr: num += perstr_to_num[s] return num
def tr_date(date_list, tokens, labels): d_words = [] if len(date_list) <= 2: for d in date_list: y = tr_single_date(d.date()) d_words.append(cleaning(y)) else: d_words = concatenate_bi(tokens, labels, "B_DAT", "I_DAT") return d_words
def tr_time(time_iso, tokens, labels): t_words = [] if len(time_iso) <= 2: for t in time_iso: y = tr_single_time(t) t_words.append(cleaning(y)) else: t_words = concatenate_bi(tokens, labels, "B_TIM", "I_TIM") return t_words
def aibot(self, Question): answer = { 'type': ['-1'], 'city': [], 'date': [], 'time': [], 'religious_time': [], 'calendar_type': [], 'event': [], 'api_url': '', 'result': [] } type_pred = TR_ID_AIBOTID[classify_question(self.classifier_model, self.classifier_tokenizer, Question)] tokens, labels = ner_question(self.ner_model, self.ner_tokenizer, self.ner_config, Question) Question = cleaning(Question) if type_pred == "-1": answer["type"] = ["-1"] generated_sentence = "سوال پرسیده شده خارج از توان بات میباشد" elif type_pred == "1": answer, generated_sentence = self.weather_api.get_answer( Question, tokens, labels) generated_sentence = cleaning(generated_sentence).replace("٫", "/") elif type_pred == "2": answer, generated_sentence = self.adhan_api.get_answer( Question, tokens, labels) if not answer: answer, generated_sentence = self.weather_api.get_answer( Question, tokens, labels) elif type_pred == "3": answer, generated_sentence = self.time_api.get_answer( Question, tokens, labels) else: answer, generated_sentence = self.calender_api.get_answer( Question, tokens, labels) return answer, generated_sentence
def get_answer(self, question, tokens, labels): answer = { 'type': ['2'], 'city': [], 'date': [], 'time': [], 'religious_time': [], 'calendar_type': [], 'event': [], 'api_url': [''], 'result': [] } generated_sentence = "" is_wrong_classifier = False for k in [ "سردتر", "گرمتر", "سردترین", "گرمترین", "اختلاف دمای", "میانگین دما" ]: if k in question: is_wrong_classifier = True break if is_wrong_classifier: return False location = unique_without_sort( location_handler(question, tokens, labels)) answer["city"] = location date_list = [] date_list_jalali = [] exportdate = export_date(question, tokens, labels) events = [] for d in exportdate: if d[0]: date_list.append(d[0]) if (not d[1][0]) and (not d[1][1]) and (type(d[1][2]) != bool): events.append(d[1][2]) answer["event"] = events d_n = len(date_list) no_date = False today = datetime.datetime.today() if d_n == 0: date_list = [today] today_j = gregorian_to_jalali(today.year, today.month, today.day) answer["date"] = [format_jalali_date(today_j)] d_n = 1 no_date = True date_list = unique_without_sort(date_list) d_n = len(date_list) date_list_jalali = [] for d in date_list: j = gregorian_to_jalali(d.year, d.month, d.day) date_list_jalali.append(format_jalali_date(j)) answer["date"] = date_list_jalali l_n = len(location) if l_n == 0: answer["city"] = ["تهران"] location = ["تهران"] l_n = 1 exportedAdhan = self.export_adhan_names(question) n_adhan = len(exportedAdhan) if n_adhan == 0: return answer, generated_sentence answer["religious_time"] = copy(exportedAdhan) new_adhan_names = [] for e in exportedAdhan: if tr_adhan_names[e] == "All": new_adhan_names.append("اذان صبح") new_adhan_names.append("اذان ظهر") new_adhan_names.append("اذان مغرب") new_adhan_names.append("نیمه شب شرعی") else: new_adhan_names.append(e) exportedAdhan = new_adhan_names n_adhan = len(exportedAdhan) res, url = self.get_city_adhan_time(location[0], date_list[0].date(), exportedAdhan[0]) answer["api_url"] = [url] if n_adhan == 1 and l_n == 1 and d_n == 1: if res != None: answer["result"] = [res.strftime("%H:%M")] if date_list[0].date() >= today.date(): generated_sentence = "{} به افق {}، {}، {} میباشد".format( exportedAdhan[0], location[0], tr_single_date(date_list[0]), tr_single_time(res)) else: generated_sentence = "{} به افق {}، {}، {} بوده".format( exportedAdhan[0], location[0], tr_single_date(date_list[0]), tr_single_time(res)) is_hour_lef_asked = False ihla = [] for h in hours_left_asked: if h in question: is_hour_lef_asked = True ihla.append(h) if not is_hour_lef_asked: return answer, cleaning(generated_sentence) else: tnow = datetime.datetime.now() dadhan = datetime.datetime.combine(date_list[0].date(), res) if ihla[0] != "گذشته": if dadhan < tnow: dadhan = dadhan + datetime.timedelta(1) date_list[0] = date_list[0] + datetime.timedelta(1) generated_sentence = "تا {} {} {}، {} مانده است" else: generated_sentence = "از {} {} {}، {} گذشته است" dt = tnow - dadhan if tnow > dadhan else dadhan - tnow answer["result"], gd = self.format_time_delta(dt) answer["result"] = [answer["result"]] generated_sentence = generated_sentence.format( exportedAdhan[0], tr_single_date(date_list[0]), location[0], gd) else: # check if it's a logical question isLogical = False for l in adhan_logical_question: if l in tokens: isLogical = True if isLogical: if n_adhan > 1 and l_n == 1 and d_n == 1: answer["result"], answer[ "api_url"], gd = self.get_difference_adhan( location[0], location[0], date_list[0], date_list[0], exportedAdhan[0], exportedAdhan[1]) generated_sentence = "اختلاف {} و {} {} {}، {} است".format( exportedAdhan[0], exportedAdhan[1], tr_single_date(date_list[0]), location[0], gd) elif n_adhan == 1 and l_n > 1 and d_n == 1: answer["result"], answer[ "api_url"], gd = self.get_difference_adhan( location[0], location[1], date_list[0], date_list[0], exportedAdhan[0], exportedAdhan[0]) generated_sentence = "اختلاف {} {} {}، {} میباشد".format( exportedAdhan[0], tr_single_date(date_list[0]), " و ".join(location), gd) elif n_adhan == 1 and l_n == 1 and d_n > 1: answer["result"], answer[ "api_url"], gd = self.get_difference_adhan( location[0], location[0], date_list[0], date_list[1], exportedAdhan[0], exportedAdhan[0]) generated_sentence = "اختلاف {} {} {}، {} است".format( exportedAdhan[0], " و ".join(tr_date(date_list, tokens, labels)), location[0], gd) elif n_adhan == 2 and l_n == 2 and d_n == 1: answer["result"], answer[ "api_url"], gd = self.get_difference_adhan( location[0], location[1], date_list[0], date_list[0], exportedAdhan[0], exportedAdhan[1]) generated_sentence = "اختلاف زمان {} {} و {} {} {}، {} میباشد".format( exportedAdhan[0], location[0], exportedAdhan[1], location[1], tr_single_date(date_list[0]), gd) elif n_adhan == 2 and l_n == 1 and d_n == 2: answer["result"], answer[ "api_url"], gd = self.get_difference_adhan( location[0], location[0], date_list[0], date_list[1], exportedAdhan[0], exportedAdhan[1]) generated_sentence = "اختلاف زمان {} {} و {} {} {}، {} میباشید".format( exportedAdhan[0], tr_single_date(date_list[0]), exportedAdhan[1], tr_single_date(date_list[1]), location[0], gd) elif n_adhan == 1 and l_n == 2 and d_n == 2: answer["result"], answer[ "api_url"], gd = self.get_difference_adhan( location[0], location[1], date_list[0], date_list[0], exportedAdhan[0], exportedAdhan[1]) generated_sentence = "اختلاف زمان {} {} {} و {} {}، {} است".format( exportedAdhan[0], tr_single_date(date_list[0]), location[0], tr_single_date(date_list[1]), location[1], gd) else: res_list = [] url_list = [] if n_adhan >= 2 and l_n == 1 and d_n == 1: generated_sentence = "{} به افق {} ".format( tr_single_date(date_list[0]), location[0]) for i, ad in enumerate(exportedAdhan): res, url = self.get_city_adhan_time( location[0], date_list[0].date(), ad) if res != None and url != None: res_list.append(res.strftime("%H:%M")) url_list.append(url) if i < n_adhan - 2: generated_sentence = generated_sentence + \ "{} ،{} ، ".format(ad, tr_single_time(res)) elif i == n_adhan - 2: generated_sentence = generated_sentence + \ "{} ،{} و ".format(ad, tr_single_time(res)) else: generated_sentence = generated_sentence + \ "{} ،{} ".format(ad, tr_single_time(res)) if date_list[0].date() >= today.date(): generated_sentence = generated_sentence + "میباشد" else: generated_sentence = generated_sentence + "بوده" elif n_adhan == 1 and l_n >= 2 and d_n == 1: generated_sentence = "{} ".format( tr_single_date(date_list[0])) for i, lc in enumerate(location): res, url = self.get_city_adhan_time( lc, date_list[0].date(), exportedAdhan[0]) if res != None and url != None: res_list.append(res.strftime("%H:%M")) url_list.append(url) if i < l_n - 2: generated_sentence = generated_sentence + \ "{} به افق {}، {} ،".format( exportedAdhan[0], lc, tr_single_time(res)) elif i == l_n - 2: generated_sentence = generated_sentence + \ "{} به افق {}، {} و ".format( exportedAdhan[0], lc, tr_single_time(res)) else: generated_sentence = generated_sentence + \ "{} به افق {}، {} ".format( exportedAdhan[0], lc, tr_single_time(res)) generated_sentence = generated_sentence + " میباشد" elif n_adhan == 1 and l_n == 1 and d_n >= 2: generated_sentence = "{} {}، ".format( exportedAdhan[0], location[0]) for i, dat in enumerate(date_list): res, url = self.get_city_adhan_time( location[0], dat.date(), exportedAdhan[0]) if res != None and url != None: res_list.append(res.strftime("%H:%M")) url_list.append(url) if i < d_n - 2: generated_sentence = generated_sentence + \ "{}، {}، ".format(tr_single_date( dat), tr_single_time(res)) elif i == d_n - 2: generated_sentence = generated_sentence + \ "{}، {} و ".format(tr_single_date( dat), tr_single_time(res)) else: generated_sentence = generated_sentence + \ "{}، {}، ".format(tr_single_date( dat), tr_single_time(res)) generated_sentence = generated_sentence + " میباشد" elif n_adhan == l_n and d_n == 1: generated_sentence = "{}، ".format( tr_single_date(date_list[0])) s = 0 for ad, lc in zip(exportedAdhan, location): res, url = self.get_city_adhan_time( lc, date_list[0].date(), ad) if res != None and url != None: res_list.append(res.strftime("%H:%M")) url_list.append(url) if s < l_n - 2: generated_sentence = generated_sentence + \ "{} {}، {}، ".format( ad, lc, tr_single_time(res)) elif s == l_n - 2: generated_sentence = generated_sentence + \ "{} {}، {} و ".format( ad, lc, tr_single_time(res)) else: generated_sentence = generated_sentence + \ "{} {}، {} ".format( ad, lc, tr_single_time(res)) s += 1 generated_sentence = generated_sentence + "میباشد" elif n_adhan == 1 and l_n == d_n: generated_sentence = "" s = 0 for lc, dat in zip(location, date_list): res, url = self.get_city_adhan_time( lc, dat.date(), exportedAdhan[0]) if res != None and url != None: res_list.append(res.strftime("%H:%M")) url_list.append(url) if s < l_n - 2: generated_sentence = generated_sentence + \ "{} {}، {}، {}، ".format( exportedAdhan[0], lc, tr_single_date(dat), tr_single_time(res)) elif s == l_n - 2: generated_sentence = generated_sentence + \ "{} {}، {}، {} و ".format( exportedAdhan[0], lc, tr_single_date(dat), tr_single_time(res)) else: generated_sentence = generated_sentence + \ "{} {}، {}، {} ".format( exportedAdhan[0], lc, tr_single_date(dat), tr_single_time(res)) s += 1 generated_sentence = generated_sentence + "میباشد" elif n_adhan == d_n and l_n == 1: generated_sentence = "" s = 0 for ad, dat in zip(exportedAdhan, date_list): res, url = self.get_city_adhan_time( location[0], dat.date(), ad) if res != None and url != None: res_list.append(res.strftime("%H:%M")) url_list.append(url) if s < d_n - 2: generated_sentence = generated_sentence + \ "{} {}، {}، ".format( tr_single_date(dat), ad, tr_single_time(res)) elif s == d_n - 2: generated_sentence = generated_sentence + \ "{} {}، {} و ".format( tr_single_date(dat), ad, tr_single_time(res)) else: generated_sentence = generated_sentence + \ "{} {}، {} ".format( tr_single_date(dat), ad, tr_single_time(res)) s += 1 generated_sentence = generated_sentence + \ "به افق {} میباشد".format(location[0]) elif (n_adhan == d_n) and (l_n == d_n): generated_sentence = "" s = 0 for i in range(d_n): res, url = self.get_city_adhan_time( location[i], date_list[i].date(), exportedAdhan[i]) if res != None and url != None: res_list.append(res.strftime("%H:%M")) url_list.append(url) if s < l_n - 2: generated_sentence = generated_sentence + \ "{}، {} {}، {}، ".format(tr_single_date( date_list[i]), exportedAdhan[i], location[i], tr_single_time(res)) elif s == l_n - 2: generated_sentence = generated_sentence + \ "{} {}، {}، {} و ".format(tr_single_date( date_list[i]), exportedAdhan[i], location[i], tr_single_time(res)) else: generated_sentence = generated_sentence + \ "{} {}، {} {} ".format(tr_single_date( date_list[i]), exportedAdhan[i], location[i], tr_single_time(res)) s += 1 generated_sentence = generated_sentence + "میباشد" answer["result"] = res_list answer["api_url"] = url_list return answer, cleaning(generated_sentence) return answer, cleaning(generated_sentence)
def get_answer(self, question, tokens, labels): answer = { 'type': ['4'], 'city': [], 'date': [], 'time': [], 'religious_time': [], 'calendar_type': [], 'event': [], 'api_url': [''], 'result': [] } generated_sentence = "" is_time_asked = False for t in time_asked: if t in question: is_time_asked = True if is_time_asked: return self.time.get_answer(question, tokens, labels) date_list = [] date_list_jalali = [] exportdate = export_date(question, tokens, labels, True) events = [] which_date_is_event = [] for i, d in enumerate(exportdate): if d[0]: date_list.append(d[0]) if (not d[1][0]) and (not d[1][1]) and (type(d[1][2]) != bool): events.append(d[1][2]) which_date_is_event.append(i) d_n = len(date_list) today = datetime.datetime.today() no_date = False if d_n == 0: date_list = [today] d_n = 1 no_date = True date_list = unique_without_sort(date_list) d_n = len(date_list) date_list_jalali = [] for d in date_list: j = gregorian_to_jalali(d.year, d.month, d.day) date_list_jalali.append(format_jalali_date(j)) answer["date"] = date_list_jalali event_list = events answer["event"] = list(event_list) self.bii = concatenate_bi(tokens, labels, "B_DAT", "I_DAT") if no_date: answer["result"] = date_list_jalali generated_sentence = "امروز، {} است".format( tr_single_date(date_list[0], force_date=True)) else: if d_n == 1: asingle, generated_sentence = self.get_single_answer( question, answer, date_list, events) if asingle != None: answer = asingle else: answer["result"] = date_list_jalali trsd = tr_single_date(date_list[0], True) if self.bii: if date_list[0].date() >= today.date(): generated_sentence = "{}، {} میباشد".format( " ".join(self.bii), trsd) else: generated_sentence = "{}، {} بوده است".format( " ".join(self.bii), trsd) else: if date_list[0].date() >= today.date(): generated_sentence = "تاریخ داده شده {} است".format( trsd) else: generated_sentence = "تاریخ داده شده {} بوده".format( trsd) else: answer["result"] = [] tokenize_questions = hazm.sent_tokenize(question) if len(tokenize_questions) == 1: tokenize_questions = question.split(" و ") if d_n == len(tokenize_questions): generated_sentence = "" if d_n != len(events): s = 0 for i, (d, tk) in enumerate( zip(date_list, tokenize_questions)): if i in which_date_is_event: n_answer, n_generated_sentence = self.get_single_answer( tk, answer, [d], [events[which_date_is_event[s]]], self.bii[i] if len(self.bii) == d_n else None) s += 1 else: n_answer, n_generated_sentence = self.get_single_answer( tk, answer, [d], None, self.bii[i] if len(self.bii) == d_n else None) if n_answer != None: answer = n_answer if generated_sentence: generated_sentence = generated_sentence + " و " + n_generated_sentence else: generated_sentence = n_generated_sentence else: n_answer, n_generated_sentence = self.get_single_answer( question, answer, [d], events, self.bii[i] if len(self.bii) == d_n else None) if n_answer != None: answer = n_answer if generated_sentence: generated_sentence = generated_sentence + " و " + n_generated_sentence else: generated_sentence = n_generated_sentence else: n_generated_sentence = "تاریخ داده شده {} میباشد".format( tr_single_date(d)) j = gregorian_to_jalali( d.year, d.month, d.day) answer["result"].append( format_jalali_date(j)) if generated_sentence: generated_sentence = generated_sentence + " و " + n_generated_sentence else: generated_sentence = n_generated_sentence else: for i in range(d_n): n_answer, n_generated_sentence = self.get_single_answer( tokenize_questions[i], answer, [date_list[i]], [events[i]], self.bii[i] if len(self.bii) == d_n else None) if n_answer != None: answer = n_answer if generated_sentence: generated_sentence = generated_sentence + " و " + n_generated_sentence else: generated_sentence = n_generated_sentence else: n_answer, n_generated_sentence = self.get_single_answer( question, answer, [date_list[i]], [events[i]], self.bii[i] if len(self.bii) == d_n else None) if n_answer != None: answer = n_answer if generated_sentence: generated_sentence = generated_sentence + " و " + n_generated_sentence else: generated_sentence = n_generated_sentence else: j = gregorian_to_jalali( date_list[i].year, date_list[i].month, date_list[i].day) answer["result"].append( format_jalali_date(j)) n_generated_sentence = "تاریخ داده شده {} است".format( tr_single_date(date_list[i])) if generated_sentence: generated_sentence = generated_sentence + " و " + n_generated_sentence else: generated_sentence = n_generated_sentence else: for d in date_list: n_answer, n_generated_sentence = self.get_single_answer( question, answer, [d], events, self.bii[i] if len(self.bii) == d_n else None) if n_answer != None: answer = n_answer if generated_sentence: generated_sentence = generated_sentence + " و " + n_generated_sentence else: generated_sentence = n_generated_sentence return answer, cleaning(generated_sentence)
def hour_min_exporter(st, force_return=False): # try ساعت + num mtch = re.findall("ساعت \d+", st) if mtch: mtch_minute = st.find("دقیقه") minute = 0 if mtch_minute == -1: # try minute_literals min_liter = [] for m in minute_literals.keys(): if m in st: min_liter.append(m) if min_liter: minute = minute_literals[min_liter[0]] else: # try num + دقیقه mtch_min = re.findall("\d+ دقیقه", st) if mtch_min: try: minute = int(mtch_min[0].strip("دقیقه")) except Exception: pass else: # try writed number + دقیقه probable_min = cleaning(st[:mtch_minute]) m_n = [] for w in word_tokenize(probable_min): if w in perstr_to_num.keys(): m_n.append(w) if m_n: minute = convertStr2num(" ".join(m_n)) try: hour = int(mtch[0].strip("ساعت")) except Exception: hour = 0 return fix_hour_ampm(st, hour), minute # try ساعت + writed num mtch = st.find("ساعت") if mtch != -1: h_n = [] probable_hour = st[mtch + len("ساعت"):] for w in word_tokenize(probable_hour): if w in perstr_to_num.keys(): h_n.append(w) if h_n: h_n.append("صفر") mtch_minute = st.find("دقیقه") if mtch_minute == -1: minute = 0 hour = convertStr2num(" ".join(h_n)) # try minute literals m_l = [] for m in minute_literals: if m in st: m_l.append(m) if m_l: minute = minute_literals[m_l[0]] try: if hour > 24: raise Exception return fix_hour_ampm(st, hour), minute except Exception: pass # try num + دقیقه mtch_m = re.findall("\d+ دقیقه", st) if mtch_m: try: minute = int(mtch_m[0].strip("دقیقه")) hour = convertStr2num(" ".join(h_n)) if hour > 24: raise Exception return fix_hour_ampm(st, hour), minute except Exception: pass # maybe the writed numbers are for minute too! if len(h_n) > 2: try: hour = convertStr2num(h_n[0]) minute = convertStr2num(" ".join(h_n[1:])) if hour > 24 or minute > 60: raise Exception return fix_hour_ampm(st, hour), minute except Exception: try: if len(h_n) >= 3: h_n.append("صفر") # hours are at maximum 2 numbers (<24) hour = convertStr2num(" ".join(h_n[:2])) minute = convertStr2num(" ".join(h_n[2:])) if hour > 24: raise Exception return fix_hour_ampm(st, hour), minute else: raise Exception except Exception: try: hour = convertStr2num(" ".join(h_n[:-2])) minute = convertStr2num(" ".join(h_n[-2])) if hour > 24: raise Exception return fix_hour_ampm(st, hour), minute except Exception: pass else: try: hour = convertStr2num(" ".join(h_n[:-1])) minute = 0 if hour > 24: raise Exception return fix_hour_ampm(hour), minute except Exception: pass # try time literals t_l = [] for tl in time_literals: if tl in st: t_l.append(tl) if t_l: # try num + tl mtch = re.findall("\d+ {}".format(t_l[0]), st) if mtch: try: h = int(mtch[0].strip(t_l[0])) t = datetime.datetime.now() + datetime.timedelta(hours=h * time_literals[t_l[0]]) hour, minute = t.hour, t.minute return fix_hour_ampm(st, hour), minute except Exception: pass # try writed number + tl h_n = [] probable_number = st[:st.find(t_l[0])] for n in perstr_to_num.keys(): if n in word_tokenize(probable_number): h_n.append(n) if h_n: try: h = convertStr2num(" ".join(h_n)) t = datetime.datetime.now() + datetime.timedelta(hours=h * time_literals[t_l[0]]) hour, minute = t.hour, t.minute return fix_hour_ampm(st, hour), minute except Exception: pass # if none of the above return tl itself try: t = datetime.datetime.now() + \ datetime.timedelta(hours=time_literals[t_l[0]]) hour, minute = t.hour, t.minute return fix_hour_ampm(st, hour), minute except Exception: pass # let's just return a number as hour: if force_return: mtch = re.findall("\d+", st) if mtch: hour = int(mtch[0]) return fix_hour_ampm(st, hour), 0 return None, None
def export_date_single(st_arr, today_list, calender_type_is_found, calender_type, no_period_return=False): today = today_list[calender_type] is_day_none = False is_month_none = False is_year_none = False st = "".join(st_arr).replace("/", "-").replace(",", "-").replace( ".", "-").replace("\\", "-") st = cleaning(st) st_space = cleaning(" ".join(st_arr)) mtch = re.findall("\d+\-\d+\-\d+", st) if mtch: try: if calender_type_is_found: d_ = build_date_fromisoformat(mtch[0], calender_type) else: d_ = build_date_fromisoformat(mtch[0], -1) return d_, (is_day_none, is_month_none, is_year_none) except Exception: pass # try mm-dd format mtch = re.findall("\d+\-\d+", st) if mtch: try: mtch_n = mtch[0].split("-") day = int(mtch_n[1]) month = int(mtch_n[0]) if month > 12: tmp = day day = month month = tmp mtch_place = st_space.find(mtch_n[-1]) year = year_exporter(st_space[mtch_place + len(mtch_n[-1]):], today) if not year: mtch_place = st.find(mtch[0]) year = year_exporter(st_space[:mtch_place], today) if not year: year = year_exporter(st_space, today) if not year: year = today.year if calender_type == 0: year, month, day = jalali_to_gregorian(year, month, day) d_ = datetime.datetime(year, month, day) elif calender_type == 2: d_ = convert.Hijri(year, month, day).to_gregorian() year, month, day = d_.year, d_.month, d_.day d_ = datetime.datetime(year, month, day) + \ datetime.timedelta(1) else: d_ = datetime.datetime(year, month, day) return d_, (is_day_none, is_month_none, is_year_none) except Exception: pass st = st_space # try monthes name mtch = re.findall(" | ".join(shamsimonthes.keys()), st) if not mtch: # mtch = re.findall("| ".join(shamsimonthes.keys()), st) for s in shamsimonthes.keys(): mtch = re.findall("\s{}$".format(s), st) if mtch: break if mtch: month = shamsimonthes[cleaning(mtch[0])] today = today_list[0] month_place = st_space.find(cleaning(mtch[0])) try: day, year, is_day_none, is_year_none = month_matched( st_space, month_place, today_list[0]) if not is_day_none: year, month, day = jalali_to_gregorian(year, month, day) d_ = datetime.datetime(year, month, day) return d_, (is_day_none, is_month_none, is_year_none) except Exception: pass mtch = re.findall(" | ".join(miladimonthes.keys()), st) if not mtch: # mtch = re.findall("| ".join(miladimonthes.keys()), st) for s in miladimonthes.keys(): mtch = re.findall("\s{}$".format(s), st) if mtch: break if mtch: month = miladimonthes[cleaning(mtch[0])] today = today_list[1] month_place = st_space.find(cleaning(mtch[0])) try: day, year, is_day_none, is_year_none = month_matched( st_space, month_place, today, 1) if not is_day_none: d_ = datetime.datetime(year, month, day) return d_, (is_day_none, is_month_none, is_year_none) except Exception: pass mtch = re.findall(" | ".join(qamariMonthes.keys()), st) if not mtch: # mtch = re.findall("| ".join(qamariMonthes.keys()), st) for s in qamariMonthes.keys(): mtch = re.findall("\s{}$".format(s), st) if mtch: break if mtch: month = qamariMonthes[cleaning(mtch[0])] today = today_list[2] month_place = st_space.find(cleaning(mtch[0])) try: day, year, is_day_none, is_year_none = month_matched( st_space, month_place, today_list[2], 2) if not is_day_none: d_ = convert.Hijri(year, month, day).to_gregorian() d_ = datetime.datetime(d_.year, d_.month, d_.day) + datetime.timedelta(1) return d_, (is_day_none, is_month_none, is_year_none) except Exception: pass # try day literals day, is_day_literal = day_exporter(st, today_list[1], no_period_return) if day and is_day_literal: d_ = day is_month_none = True is_year_none = True return d_, (is_day_none, is_month_none, is_year_none) # try month literals d_m = [] for l in month_literals.keys(): if l in st: d_m.append(l) if d_m: is_month_none = False month = today.month + month_literals[d_m[0]] if month > 12: month = month - 12 elif month < 1: month = 12 + month month_loc = st.find(d_m[0]) try: day, year, is_day_none, is_year_none = month_matched( st_space, month_loc, today) if calender_type == 0: year, month, day = jalali_to_gregorian(year, month, day) d_ = datetime.datetime(year, month, day) elif calender_type == 2: year, _, day = convert.Hijri(year, month, day) d_ = datetime.datetime(year, month, day) + \ datetime.timedelta(1) else: d_ = datetime.datetime(year, month, day) return d_, (is_day_none, is_month_none, is_year_none) except Exception: pass return None, (is_day_none, is_month_none, is_year_none)
def year_exporter(st, today, calender_type=0): year = re.findall("\d{4}", st) try: if year: return int(year[0]) else: # try year literals year_literal_ = [] for l in year_literals.keys(): if l in st: year_literal_.append(l) if year_literal_: probable_year = cleaning(year_literal_[0]) probable_year_num = st.find(probable_year) probable_year_num = st[:probable_year_num] pn = [] for w in word_tokenize(probable_year_num): if w in perstr_to_num.keys(): pn.append(w) if pn: try: n = convertStr2num(" ".join(pn)) if n > 20: raise Exception return today.year + n * year_literals[year_literal_[0]] except Exception: pass year = today.year + \ year_literals[year_literal_[0]] return year year = re.findall(" \d{2} ", st) if not year: year = re.findall("^\d{2} ", st) if not year: year = re.findall(" \d{2}$", st) if not year: year = re.findall(" \d{2}[؟|.]$", st) if year: year = [year[0].replace("؟", "").replace(".", "")] if year and int(year[0]) > 31 and calender_type == 0: year = int(year[0]) year += 1300 return year else: # try writed numbers probable_year = [] for w in word_tokenize(st): if w in perstr_to_num.keys(): probable_year.append(w) if probable_year: try: year = convertStr2num(" ".join(probable_year)) if year < 1300: if calender_type == 0 and year < 100 and year > 31: year += 1300 else: raise Exception return year except Exception: pass except Exception: return None return None
def event_exporter(question, tokens, labels): modified_question = re.sub("\d+", "", question) for ae in after_event: modified_question = re.sub(ae, "", modified_question) if len(modified_question) <= 3: return pd.DataFrame() event_list = [] # try matching exactly the names of the events: for i, e in enumerate(df_event["event"]): if e in question: event_list.append(df_event.iloc[i]) # if not event_list: # event_tokens = list(np.array(tokens)[np.where( # (labels == "B_DAT") | (labels == "I_DAT"))]) # if len(event_tokens) > 0: # if len(event_tokens) > 1: # n_t = [] # for e in event_tokens: # if not e in after_event: # n_t.append(e) # st = " ".join(n_t) # st = cleaning(re.sub("\d+", "", st)) # else: # st = event_tokens[0] # for i, e in enumerate(df_event["event"]): # if st in e: # event_list.append(df_event.iloc[i]) if not event_list: # try finding event_literals for el in event_literals: if el in question: el_loc = question.find(el) if el_loc == 0: el_q = question else: el_q = cleaning(question[el_loc - 1:]) # el_q = el_q.replace(el, "") el_q = re.sub("\d+", "", el_q) el_q = re.sub(" | ".join(shamsimonthes.keys()), "", el_q) el_q = re.sub(" | ".join(miladimonthes.keys()), "", el_q) el_q = re.sub(" | ".join(qamariMonthes.keys()), "", el_q) el_q = cleaning(el_q) for ae in after_event: if ae in el_q: el_q = cleaning(el_q.replace(ae, "")) el_q = cleaning(el_q.replace("است", "")) for i, e in enumerate(df_event["event"]): if (el_q in e) or (e in el_q): event_list.append(df_event.iloc[i]) # let's once again search in the entire question if not event_list: for i, e in enumerate(df_event["event"]): if cleaning(e) in question: event_list.append(df_event.iloc[i]) # form the data frame event_list = pd.DataFrame(event_list) if not event_list.empty: e_u = event_list["event"].to_numpy() if len(e_u) < len(event_list): e_l = [] for e in e_u: e_l.append( event_list.iloc[np.where(event_list["event"] == e)[0][0]]) event_list = pd.DataFrame(e_l) return event_list
def exact_check_event(text, today_gregorian, today_hijri, today_jalali, calender_type=0): event_list = [] modified_text = re.sub("\d+", "", text) for ae in after_event: modified_text = modified_text.replace(ae, "") if not modified_text: return None, (True, True, True) for i, e in enumerate(df_event["event"]): if e in text: event_list.append(df_event.iloc[i]) if not event_list and (not text in wrong_date): st = text for yl in year_literals.keys(): if yl in st: st = re.sub(yl, "", st) st = re.sub("\d+", "", st) for num in perstr_to_num.keys(): if " " + num + " " in st: st = re.sub(" " + num + " ", " ", st) st = cleaning(st) if len(st) <= 3: return None, (True, True, True) for i, e in enumerate(df_event["event"]): if st in e: event_list.append(df_event.iloc[i]) if not event_list and not (text in wrong_date): event_tokens = word_tokenize(text) n_t = [] for e in event_tokens: if not e in after_event: n_t.append(e) st = " ".join(n_t) st = cleaning(re.sub("\d+", "", st)) for i, e in enumerate(df_event["event"]): if st in e: event_list.append(df_event.iloc[i]) if not event_list and (not text in wrong_date): st = text for yl in year_literals.keys(): if yl in st: st = re.sub(yl, "", st) st = re.sub("\d+", "", st) for be in before_events: st = re.sub(" {} ".format(be), " ", st) st = re.sub("^{} ".format(be), " ", st) for num in perstr_to_num.keys(): if " " + num + " " in st: st = re.sub(" " + num + " ", " ", st) if len(st) <= 3: return None, (True, True, True) for i, e in enumerate(df_event["event"]): if st in e: event_list.append(df_event.iloc[i]) events = pd.DataFrame(event_list) if events.empty: return None, (True, True, True) year = year_exporter(text, today_jalali, calender_type=calender_type) year_type = "j_d" if year is None: year = today_gregorian.year year_type = "g_d" elif year > 1420 and year < 1600: year_type = "h_d" elif year > 1600: year_type = "g_d" event_date = [] for i in range(len(events)): if year_type == "g_d": a = re.findall("{}-\d+-\d+".format(year), events["g_d"].iloc[i]) else: a = re.findall("[(]{}, \d+, \d+[)]".format(year), events[year_type].iloc[i]) if a: event_date.append(events.iloc[i]) if event_date: d = datetime.datetime.fromisoformat(event_date[-1]["g_d"]) return d, (False, False, event_date[-1]["event"]) else: ev = events.iloc[-1] if ev["calender_type"] == "g": d = datetime.datetime.fromisoformat(ev["g_d"]) if year_type == "j_d": year_of_that = gregorian_to_jalali(d.year, d.month, d.day)[0] dy = year_of_that - year d = datetime.datetime(d.year - dy, d.month, d.day) elif year_type == "h_d": year_of_that = convert.Gregorian(d.year, d.month, d.day).to_hijri().year dy = year_of_that - year d = datetime.datetime(d.year - dy, d.month, d.day) else: d = datetime.datetime(year, d.month, d.day) elif ev["calender_type"] == "j": d = ast.literal_eval(ev["j_d"]) if year_type == "j_d": d = jalali_to_gregorian(year, d[1], d[2]) d = datetime.datetime(d[0], d[1], d[2]) elif year_type == "h_d": m_d = jalali_to_gregorian(d[0], d[1], d[2]) year_of_that = convert.Gregorian(m_d[0], m_d[1], m_d[2]).to_hijri().year dy = year_of_that - year j_d = (d[0] - dy, d[1], d[2]) d = jalali_to_gregorian(j_d[0], j_d[1], j_d[2]) d = datetime.datetime(d[0], d[1], d[2]) else: m_d = jalali_to_gregorian(d[0], d[1], d[2])[0] dy = m_d - year m_d = jalali_to_gregorian(d[0] - dy, d[1], d[2]) d = datetime.datetime(m_d[0], m_d[1], m_d[2]) else: # calender_type == "h" d = ast.literal_eval(ev["h_d"]) m_d = convert.Hijri(d[0], d[1], d[2]).to_gregorian() if year_type == "j_d": year_of_that = gregorian_to_jalali(m_d.year, m_d.month, m_d.day)[0] dy = year_of_that - year y = convert.Gregorian(m_d.year - dy, m_d.month, m_d.day).to_hijri().year m_d = convert.Hijri(y, d[1], d[2]).to_gregorian() d = datetime.datetime(m_d.year, m_d.month, m_d.day) + datetime.timedelta(1) elif year_type == "g_d": dy = m_d.year - year m_d = convert.Hijri(d[0] - dy, d[1], d[2]).to_gregorian() d = datetime.datetime(m_d.year, m_d.month, m_d.day) + datetime.timedelta(1) else: # year_type == "h_d" h_d = (year, d[1], d[2]) m_d = convert.Hijri(h_d[0], h_d[1], h_d[2]).to_gregorian() d = datetime.datetime(m_d.year, m_d.month, m_d.day) + datetime.timedelta(1) return d, (False, False, ev["event"])
def get_answer(self, question, tokens, labels): answer = { 'type': ['3'], 'city': [], 'date': [], 'time': [], 'religious_time': [], 'calendar_type': [], 'event': [], 'api_url': [''], 'result': [] } generated_sentence = "" date = datetime.datetime.today().date() exportd = export_date(question, tokens, labels) no_date = True if exportd[0][0] != None: date = exportd[0][0].date() answer["date"] = [ format_jalali_date( gregorian_to_jalali(date.year, date.month, date.day)) ] no_date = False time_list = [] time_iso = [] exporttime, is_adhan, adhan_url, adhan_names = export_time( question, tokens, labels) if is_adhan: answer["religious_time"] = adhan_names answer["api_url"].append(adhan_url) for t in exporttime: if t != None: time_list.append(t.strftime("%H:%M")) time_iso.append(datetime.datetime.combine(date, t)) t_n = len(time_iso) no_time = False if t_n == 0: now = datetime.datetime.now() time_list.append(now.strftime("%H:%M")) time_iso.append(datetime.datetime.combine(date, now.time())) t_n = 1 no_time = True answer["time"] = time_list location = list( unique_without_sort(location_handler(question, tokens, labels))) if len(location) == 1 and location[0] == USER_CITY: location = [] answer["city"] = location if len(location) == 0: answer["result"].append(time_list[0]) if no_time: generated_sentence = "الآن {} است".format( tr_single_time(time_iso[0])) else: generated_sentence = "{} میباشد".format( tr_single_time(time_iso[0])) is_hour_lef_asked = False for h in hours_left_asked: if h in question: is_hour_lef_asked = True if is_hour_lef_asked: tnow = datetime.datetime.now() dt = abs(tnow - time_iso[0]) r, gt = self.format_time_delta(dt) answer["result"] = [r] if no_date: if time_iso[0] > tnow: generated_sentence = "تا {}، {} مانده است".format( tr_single_time(time_iso[0]), gt) else: generated_sentence = "از {}، {} گذشته است".format( tr_single_time(time_iso[0]), gt) else: if time_iso[0] > tnow: generated_sentence = "تا {} {}، {} مانده است".format( tr_single_time(time_iso[0]), tr_single_date(date), gt) else: generated_sentence = "از {} {}، {} گذشته است".format( tr_single_time(time_iso[0]), tr_single_date(date), gt) return answer, cleaning(generated_sentence) location_country = [] for l in location: c = self.countries_df["summary"].iloc[np.where( self.countries_df["title"] == l)].to_numpy() if len(c) > 0: location_country.append(c[0][:-1]) time_zone_list = [] if len(location) > len(location_country): for l in location: if l in location_country: continue city_info = Weather.get_city_info(l) if city_info == None: s = re.sub("^ا", "آ", l) city_info = Weather.get_city_info(s) if city_info != None: p_tz = self.possible_timezones(city_info["timezone"]) if not p_tz: p_tz = self.possible_timezones(city_info["timezone"], False) if p_tz: time_zone_list.append(p_tz[0]) if location_country: location_country = list(unique_without_sort(location_country)) for l in location_country: tz = self.zone_df["Europe/Andorra"].iloc[np.where( self.zone_df["AD"] == l)].to_numpy() if len(tz) >= 1: time_zone_list.append(tz[0]) is_reversed_asked = False for r in time_reverse_convert: if r in question: is_reversed_asked = True if len(time_zone_list) == 1: new_location = copy(location) single_ans, generated_sentence = self.get_single_answer( question, [location[-1]], time_zone_list, time_iso) answer["result"] = single_ans if isinstance( single_ans, list) else [single_ans] elif len(time_zone_list) == 2 and is_reversed_asked: time_zone_list.remove("Asia/Tehran") timzon = pytz.timezone(time_zone_list[0]) t = timzon.localize(time_iso[0], is_dst=None).astimezone(pytz.utc) t = t.astimezone(self.local_time) answer["result"] = [t.strftime("%H:%M")] new_location = copy(location) try: new_location.remove("تهران") except Exception: try: new_location.remove("ایران") except Exception: pass generated_sentence = "{} در {}، {} به وقتa تهران میباشد".format( tr_single_time(time_iso[0], literal=False), new_location[0], tr_single_time(t, literal=False)) else: time_list = [] for tz in time_zone_list: time_zone = pytz.timezone(tz) t = self.local_time.localize(time_iso[0], is_dst=None).astimezone(pytz.utc) t = t.astimezone(time_zone) d = datetime.datetime(t.year, t.month, t.day, t.hour, t.minute) time_list.append(d) is_hours_difference_asked = False for h in hours_difference_asked: if h in question: is_hours_difference_asked = True if is_hours_difference_asked: dt = abs(time_list[0] - time_list[-1]) r, gt = self.format_time_delta(dt) generated_sentence = "اختلاف زمان {} و {}، {} است".format( location[0], location[-1], gt) answer["result"] = [r] else: new_location = copy(location) if (len(time_zone_list) == 2 and "Asia/Tehran" in time_zone_list): time_zone_list.remove("Asia/Tehran") new_location.remove(USER_CITY) location = new_location t_s = [] for t in time_list: t_s.append(t.strftime("%H:%M")) if not len(t_s) == len(location): t_s = unique_without_sort(t_s) l_n = len(location) if l_n == len(t_s): generated_sentence = "{} ".format( tr_single_time(time_iso[0], True)) for i, (t, lc) in enumerate(zip(time_list, location)): if i != l_n - 1: generated_sentence = generated_sentence + "در {}، {} و ".format( lc, tr_single_time(t, True)) else: generated_sentence = generated_sentence + "در {}، {} ".format( lc, tr_single_time(t, True)) generated_sentence = generated_sentence + "میباشد" answer["result"] = t_s return answer, cleaning(generated_sentence)