def hasModifierText(tpentity): text_lower = tpentity.getText().lower() #remove all punctuation text_norm = text_lower.translate(str.maketrans("", "", ",")) #convert to list text_list = text_norm.split(" ") if len(text_list) > 0: #loop through list looking for expression temp_text = [ "nearly", "almost", "<", "late", "mid", "fiscal", "fy", "over", "early", "approximately", "beginning" ] for t in text_list: answer = next((m for m in temp_text if m in t), None) if answer is not None: answer2 = next((m for m in temp_text if t in m), None) if answer2 is not None: return True, t, calculateSpan(text_norm, t)[0], calculateSpan( text_norm, t)[1] else: return False, None, None, None # if no 2 digit hour expressions were found return false else: return False, None, None, None # if no 2 digit day expressions were found return false else: return False, None, None, None # if the text_list does not have any entries, return false
def hasPartOfWeek(tpentity): # convert to all lower # text_lower = tpentity.getText().lower() text = tpentity.getText() # remove all punctuation text_norm = text.translate(str.maketrans("", "", string.punctuation)) # convert to list text_list = text_norm.split(" ") # define my period lists partofday = ["weekend", "weekends"] # figure out if any of the tokens in the text_list are also in the ampm list intersect = list(set(text_list) & set(partofday)) # only proceed if the intersect list has a length of 1 or more. # For this method I'm assuming it will only be a length of 1, if it is not then we don't know what to do with it. if len(intersect) == 1: term = intersect[0] start_idx, end_idx = calculateSpan(text_norm, term) if term == "weekend" or term == "weekends": return True, "Weekend", start_idx, end_idx else: return False, None, None, None else: return False, None, None, None
def hasAMPM(tpentity): # convert to all lower # text_lower = tpentity.getText().lower() text = tpentity.getText() # remove all punctuation text_norm = text.translate(str.maketrans("", "", ",")) # convert to list text_list = text_norm.split(" ") if len(text_list) > 0: for text in text_list: if (re.search('AM|A\.M\.|am|a\.m\.', text)): match = re.search('AM|A\.M\.|am|a\.m\.', text).group(0) start_idx, end_idx = calculateSpan(text_norm, match) return True, "AM", start_idx, end_idx elif (re.search('PM|P\.M\.|pm|p\.m\.', text)): match = re.search('PM|P\.M\.|pm|p\.m\.', text).group(0) start_idx, end_idx = calculateSpan(text_norm, match) return True, "PM", start_idx, end_idx return False, None, None, None
def has24HourTime(tpentity, flags): # text_lower = tpentity.getText().lower() # remove all punctuation # text_norm = text_lower.translate(str.maketrans("", "", ",")) # convert to list stext = tpentity.getText() text_list = stext.split(" ") if not flags["loneDigitYear"]: # loop through list looking for expression for text in text_list: tz_format = re.search( '\d{0,4}(AST|EST|EDT|CST|CDT|MST|MDT|PST|PDT|AKST|HST|HAST|HADT|SST|SDT|GMT|CHST|UTC)', text) if len(text) == 4: num = utils.getNumberFromText(text) if num is not None: hour = utils.getNumberFromText(text[:2]) minute = utils.getNumberFromText(text[2:]) if (hour is not None) and (minute is not None): if (minute > 60) or (hour > 24): return False, None, None, None else: start_idx, end_idx = calculateSpan(stext, text) return True, text, start_idx, end_idx elif tz_format is not None: time = tz_format[0] hour = utils.getNumberFromText(time[0:2]) minute = utils.getNumberFromText(time[2:4]) # if (minute > 60) or (hour > 24): # return False, None, None, None # else: start_idx, end_idx = calculateSpan(stext, time) return True, time, start_idx, end_idx return False, None, None, None # if no 4 digit year expressions were found return false else: return False, None, None, None # if loneDigitYearFlag has already been set
def hasPartOfDay(tpentity): # convert to all lower text = tpentity.getText().lower() # text = tpentity.getText() # remove all punctuation text_norm = text.translate(str.maketrans("", "", string.punctuation)) # convert to list text_list = text_norm.split(" ") # define my period lists partofday = [ "morning", "evening", "afternoon", "night", "dawn", "dusk", "tonight", "overnight", "nights", "mornings", "evening", "afternoons", "noon", "bedtime", "midnight", "eve" ] # figure out if any of the tokens in the text_list are also in the ampm list intersect = list(set(text_list) & set(partofday)) # only proceed if the intersect list has a length of 1 or more. # For this method I'm assuming it will only be a length of 1, if it is not then we don't know what to do with it. if len(intersect) == 1: term = intersect[0] start_idx, end_idx = calculateSpan(text_norm, term) if term == "morning" or term == "mornings": return True, "Morning", start_idx, end_idx if term == "dawn": return True, "Dawn", start_idx, end_idx elif term == "evening" or term == "dusk" or term == "evenings" or term == "eve": return True, "Evening", start_idx, end_idx elif term == "afternoon" or term == "afternoons": return True, "Afternoon", start_idx, end_idx elif term == "nights": return True, "Night", start_idx, end_idx elif term == "noon": return True, "Noon", start_idx, end_idx elif term == "bedtime": return True, "Unknown", start_idx, end_idx elif term == "midnight": return True, "Midnight", start_idx, end_idx elif term == "night" or term == "overnight" or term == "tonight": m = re.search("night", text_norm) sidx = m.span(0)[0] eidx = m.span(0)[1] return True, "Night", sidx, eidx else: return False, None, None, None else: return False, None, None, None
def hasEmbeddedPeriodInterval(tpentity): # convert to all lower # text_lower = tpentity.getText().lower() text = tpentity.getText() # remove all punctuation text_norm = text.translate( str.maketrans(string.punctuation, ' ' * len(string.punctuation))) # convert to list text_list = text_norm.split(" ") # define my period/interval term lists terms = [ "day", "week", "month", "hour", "days", "weeks", "months", "hours", "hrs" ] #, "date"] ## if the term does not exist by itself it may be a substring. Go through each word in the TimePhrase string and see if a substring matches. for t in text_list: for r in terms: ## see if r is a substring of t ## if yes and the substring is at the end, extract the first substring and test to see if it is a number. idx = t.find(r) if (idx > 0): # then the r term is not the first substring. Extract and test. sub1 = t[:idx] sub2 = t[idx:] # sub1 should be a number if (isinstance(utils.getNumberFromText(sub1), (int))): # if it is a number then test to figure out what sub2 is. this_term = sub2 start_idx, end_idx = calculateSpan(text_norm, this_term) if this_term in ["day", "daily", "days"]: #print("ACK! Found an Embedded Day") return True, "Day", start_idx, end_idx, sub1 elif this_term in ["week", "weekly", "weeks"]: return True, "Week", start_idx, end_idx, sub1 elif this_term in ["month", "monthly", "months"]: return True, "Month", start_idx, end_idx, sub1 elif this_term in ["hour", "hourly", "hours"]: return True, "Hour", start_idx, end_idx, sub1 else: return False, None, None, None, None return False, None, None, None, None
def hasSeasonOfYear(tpentity, ref_list): refStart_span, refEnd_span = tpentity.getSpan() # convert to all lower # text_lower = tpentity.getText().lower() text = tpentity.getText().lower() # remove all punctuation text_norm = text.translate( str.maketrans(string.punctuation, ' ' * len(string.punctuation))).strip() # convert to list text_list = text_norm.split(" ") # define my period lists seasonofyear = [ "summer", "winter", "fall", "spring", "summers", "falls", "winters", "springs" ] # figure out if any of the tokens in the text_list are also in the ampm list intersect = list(set(text_list) & set(seasonofyear)) # only proceed if the intersect list has a length of 1 or more. # For this method I'm assuming it will only be a length of 1, if it is not then we don't know what to do with it. if len(intersect) == 1: term = intersect[0] start_idx, end_idx = calculateSpan(text_norm, term) if term == "summer" or term == "summers": start_idx, end_idx = calculateSpan(text_norm, "summer") absStart = refStart_span + start_idx absEnd = refStart_span + end_idx postag = ref_list[utils.getRefIdx(ref_list, absStart, absEnd)].getPos() if postag == "NN": return True, "Summer", start_idx, end_idx elif term == "winter" or term == "winters": start_idx, end_idx = calculateSpan(text_norm, "winter") absStart = refStart_span + start_idx absEnd = refStart_span + end_idx postag = ref_list[utils.getRefIdx(ref_list, absStart, absEnd)].getPos() if postag == "NN": return True, "Winter", start_idx, end_idx elif term == "fall" or term == "falls": start_idx, end_idx = calculateSpan(text_norm, "fall") absStart = refStart_span + start_idx absEnd = refStart_span + end_idx postag = ref_list[utils.getRefIdx(ref_list, absStart, absEnd)].getPos() if postag == "NN": return True, "Fall", start_idx, end_idx elif term == "spring" or term == "springs": start_idx, end_idx = calculateSpan(text_norm, "spring") absStart = refStart_span + start_idx absEnd = refStart_span + end_idx postag = ref_list[utils.getRefIdx(ref_list, absStart, absEnd)].getPos() if postag == "NN": return True, "Spring", start_idx, end_idx else: return False, None, None, None return False, None, None, None
def hasPeriodInterval(tpentity): # convert to all lower # text_lower = tpentity.getText().lower() text = tpentity.getText().lower() #print("In hasPeriodInterval text: ", text) reg = re.search( "date/time", text) ##we don't want to annotate these specific types of mentions if reg: #print("Found date/time, returning FALSE") return False, None, None, None, None # remove all punctuation text_norm = text.translate( str.maketrans(string.punctuation, ' ' * len(string.punctuation))).strip() # convert to list text_list = text_norm.split(" ") #print("text list: " + str(text_list)) # define my period lists terms = [ "decades", "decade", "yesterday", "yesterdays", "today", "todays", "tomorrow", "tomorrows", "day", "week", "month", "year", "daily", "weekly", "monthly", "yearly", "century", "minute", "second", "hour", "hourly", "days", "weeks", "months", "years", "centuries", "century", "minutes", "seconds", "hours", "time", "shortly", "soon", "briefly", "awhile", "future", "lately", "annual", "hr", "hrs", "min", "mins", "quarter" ] #, "date"] # figure out if any of the tokens in the text_list are also in the interval list intersect = list(set(text_list) & set(terms)) #print("My intersection: " + str(intersect)) # only proceed if the intersect list has a length of 1 or more. # For this method I'm assuming it will only be a length of 1, if it is not then we don't know what to do with it. if len(intersect) == 1: # test if the intersect list contains plural or singular period. this_term = list(set(intersect) & set(terms))[0] start_idx, end_idx = calculateSpan(text_norm, this_term) if this_term in [ "day", "daily", "days", "yesterday", "tomorrow", "yesterdays", "tomorrows", "today", "todays" ]: return True, "Day", start_idx, end_idx, False elif this_term in ["week", "weekly", "weeks"]: return True, "Week", start_idx, end_idx, False elif this_term in ["month", "monthly", "months"]: return True, "Month", start_idx, end_idx, False elif this_term in ["year", "yearly", "years", "annual"]: return True, "Year", start_idx, end_idx, False elif this_term in ["century", "centuries"]: return True, "Century", start_idx, end_idx, False elif this_term in ["decade", "decades"]: return True, "Decade", start_idx, end_idx, False elif this_term in ["minute", "minutes", "min", "mins"]: return True, "Minute", start_idx, end_idx, False elif this_term in ["second", "seconds"]: return True, "Second", start_idx, end_idx, False elif this_term in ["hour", "hourly", "hours", "hr", "hrs"]: return True, "Hour", start_idx, end_idx, False elif this_term in [ "time", "shortly", "soon", "briefly", "awhile", "future", "lately", "quarter" ]: return True, "Unknown", start_idx, end_idx, False else: return False, None, None, None, None elif len(intersect) > 1: this_term = list( set(intersect) & set([ "daily", "weekly", "monthly", "yearly", "weeks", "days", "months", "years" ])) if (this_term): if (len(this_term) == 1): this_term = this_term[0] start_idx, end_idx = calculateSpan(text_norm, this_term) if this_term in ["daily", "days"]: #print("Returning a Daily") return True, "Day", start_idx, end_idx, False elif this_term in ["weekly", "weeks"]: return True, "Week", start_idx, end_idx, False elif this_term in ["monthly", "months"]: return True, "Month", start_idx, end_idx, False elif this_term in ["yearly", "years"]: return True, "Year", start_idx, end_idx, False else: return False, None, None, None, None else: return False, None, None, None, None else: return False, None, None, None, None else: return False, None, None, None, None
def hasTextMonth(tpentity, ref_list): refStart_span, refEnd_span = tpentity.getSpan() # convert to all lower text_lower = tpentity.getText().lower() # remove all punctuation # text_norm = text_lower.translate(str.maketrans(",", ' ')).strip() text_norm = text_lower.translate( str.maketrans(string.punctuation, ' ' * len(string.punctuation))).strip() # convert to list text_list = text_norm.split(" ") # define my month lists full_month = [ "january", "february", "march", "april", "may", "june", "july", "august", "september", "october", "november", "december" ] # run for full month t_flag = False for tok in text_list: answer = next((m for m in full_month if tok in m), None) if answer is not None and not t_flag: answer2 = next((m for m in full_month if m in tok), None) if answer2 is not None and not t_flag: t_flag = True # answer2 should contain the element that matches. We need to find the span in the original phrase and return the correct value start_idx, end_idx = calculateSpan(text_lower, answer2) absStart = refStart_span + start_idx absEnd = refStart_span + end_idx postag = ref_list[utils.getRefIdx(ref_list, absStart, absEnd)].getPos() if postag == "NNP": if answer2 in ["january"]: return True, "January", start_idx, end_idx elif answer2 in ["february"]: return True, "February", start_idx, end_idx elif answer2 in ["march"]: return True, "March", start_idx, end_idx elif answer2 in ["april"]: return True, "April", start_idx, end_idx elif answer2 in ["may"]: return True, "May", start_idx, end_idx elif answer2 in ["june"]: return True, "June", start_idx, end_idx elif answer2 in ["july"]: return True, "July", start_idx, end_idx elif answer2 in ["august"]: return True, "August", start_idx, end_idx elif answer2 in ["september"]: return True, "September", start_idx, end_idx elif answer2 in ["october"]: return True, "October", start_idx, end_idx elif answer2 in ["november"]: return True, "November", start_idx, end_idx elif answer2 in ["december"]: return True, "December", start_idx, end_idx # run for abbr month abbr_month = [ "jan.", "feb.", "mar.", "apr.", "jun.", "jul.", "aug.", "sept.", "sep.", "oct.", "nov.", "dec." ] adj_punc = '!"#$%&\'()*+,-/:;<=>?@[\\]^_`{|}~' text_norm2 = text_lower.translate( str.maketrans(adj_punc, ' ' * len(adj_punc))).strip() # convert to list text_list2 = text_norm2.split(" ") t_flag = False for tok in text_list2: answer = next((m for m in abbr_month if tok in m), None) if answer is not None and not t_flag: answer2 = next((m for m in abbr_month if m in tok), None) if answer2 is not None and not t_flag: t_flag = True # answer2 should contain the element that matches. We need to find the span in the original phrase and return the correct value start_idx, end_idx = calculateSpan(text_lower, answer2) absStart = refStart_span + start_idx absEnd = refStart_span + end_idx postag = ref_list[utils.getRefIdx(ref_list, absStart, absEnd)].getPos() if postag == "NNP": if answer2 in ["jan."]: return True, "January", start_idx, end_idx elif answer2 in ["feb."]: return True, "February", start_idx, end_idx elif answer2 in ["mar."]: return True, "March", start_idx, end_idx elif answer2 in ["apr."]: return True, "April", start_idx, end_idx elif answer2 in ["jun."]: return True, "June", start_idx, end_idx elif answer2 in ["jul."]: return True, "July", start_idx, end_idx elif answer2 in ["aug."]: return True, "August", start_idx, end_idx elif answer2 in ["sept.", "sep."]: return True, "September", start_idx, end_idx elif answer2 in ["oct."]: return True, "October", start_idx, end_idx elif answer2 in ["nov."]: return True, "November", start_idx, end_idx elif answer2 in ["dec."]: return True, "December", start_idx, end_idx # run for abbr month without punctuation abbr_month = [ "jan", "feb", "mar", "apr", "jun", "jul", "aug", "sept", "sep", "oct", "nov", "dec" ] adj_punc = '!"#$%&\'()*+,-/:;<=>?@[\\]^_`{|}~' text_norm2 = text_lower.translate( str.maketrans(adj_punc, ' ' * len(adj_punc))).strip() # convert to list text_list2 = text_norm2.split(" ") t_flag = False for tok in text_list2: answer = next((m for m in abbr_month if tok in m), None) if answer is not None and not t_flag: answer2 = next((m for m in abbr_month if m in tok), None) if answer2 is not None and not t_flag: t_flag = True # answer2 should contain the element that matches. We need to find the span in the original phrase and return the correct value start_idx, end_idx = calculateSpan(text_lower, answer2) absStart = refStart_span + start_idx absEnd = refStart_span + end_idx postag = ref_list[utils.getRefIdx(ref_list, absStart, absEnd)].getPos() if postag == "NNP": if answer2 in ["jan"]: return True, "January", start_idx, end_idx elif answer2 in ["feb"]: return True, "February", start_idx, end_idx elif answer2 in ["mar"]: return True, "March", start_idx, end_idx elif answer2 in ["apr"]: return True, "April", start_idx, end_idx elif answer2 in ["jun"]: return True, "June", start_idx, end_idx elif answer2 in ["jul"]: return True, "July", start_idx, end_idx elif answer2 in ["aug"]: return True, "August", start_idx, end_idx elif answer2 in ["sept", "sep"]: return True, "September", start_idx, end_idx elif answer2 in ["oct"]: return True, "October", start_idx, end_idx elif answer2 in ["nov"]: return True, "November", start_idx, end_idx elif answer2 in ["dec"]: return True, "December", start_idx, end_idx return False, None, None, None
def buildTextMonthAndDay(s, chrono_id, chrono_list, flags, dct=None, ref_list=None): boo, val, idxstart, idxend = hasTextMonth(s, ref_list) if boo and not flags["month"]: flags["month"] = True ref_Sspan, ref_Espan = s.getSpan() abs_Sspan = ref_Sspan + idxstart abs_Espan = ref_Sspan + idxend my_month_entity = chrono.chronoMonthOfYearEntity( entityID=str(chrono_id) + "entity", start_span=abs_Sspan, end_span=abs_Espan, month_type=val) chrono_id = chrono_id + 1 ## assume all numbers 1-31 are days ## assume all numbers >1000 are years ## parse all text before month ## test to see if all text is a number or text year ## if no: ## remove all punctuation ## seperate by spaces ## parse each token, if find a number then assign to day or year as appropriate ## if yes: ## assign to day or year as appropriate ## parse all text after month ## test to see if all text is a number or text year ## if no: ## remove all punctuation ## seperate by spaces ## parse each token, if find a number then assign to day or year as appropriate ## if yes: ## assign to day or year as appropriate #idx_end is the last index of the month. If there are any characters after it the length of the string will be greater than the endidx. if (idxend < len(s.getText())): substr = s.getText()[idxend:].strip(",.").strip() num = utils.getNumberFromText(substr) if num is not None: if num <= 31 and not flags["day"]: flags["day"] = True day_startidx, day_endidx = calculateSpan( s.getText(), str(num)) #substr) abs_Sspan = ref_Sspan + day_startidx abs_Espan = ref_Sspan + day_endidx my_day_entity = chrono.ChronoDayOfMonthEntity( entityID=str(chrono_id) + "entity", start_span=abs_Sspan, end_span=abs_Espan, value=num) chrono_list.append(my_day_entity) chrono_id = chrono_id + 1 #now figure out if it is a NEXT or LAST #create doctime if False: #dct is not None: mStart = my_month_entity.get_start_span() mEnd = my_month_entity.get_end_span() this_dct = datetime.datetime( int(dct.year), int( utils.getMonthNumber( my_month_entity.get_month_type())), int(my_day_entity.get_value()), 0, 0) if this_dct > dct: chrono_list.append( chrono.ChronoNextOperator( entityID=str(chrono_id) + "entity", start_span=mStart, end_span=mEnd, repeating_interval=my_month_entity.get_id( ))) chrono_id = chrono_id + 1 elif this_dct < dct: chrono_list.append( chrono.ChronoLastOperator( entityID=str(chrono_id) + "entity", start_span=mStart, end_span=mEnd, repeating_interval=my_month_entity.get_id( ))) chrono_id = chrono_id + 1 elif num >= 1500 and num <= 2050 and not flags[ "fourdigityear"] and not flags["loneDigitYear"]: flags["fourdigityear"] = True year_startidx, year_endidx = calculateSpan( s.getText(), substr) abs_Sspan = ref_Sspan + year_startidx abs_Espan = ref_Sspan + year_endidx my_year_entity = chrono.ChronoYearEntity( entityID=str(chrono_id) + "entity", start_span=abs_Sspan, end_span=abs_Espan, value=num) chrono_list.append(my_year_entity) my_year_entity.set_sub_interval(my_month_entity.get_id()) chrono_id = chrono_id + 1 else: ##parse and process each token ##replace punctuation substr = substr.translate( str.maketrans(string.punctuation, ' ' * len(string.punctuation))) ##split on spaces tokenized_text = WhitespaceTokenizer().tokenize(substr) for i in range(0, len(tokenized_text)): num = utils.getNumberFromText(tokenized_text[i]) if num is not None: if num <= 31: day_startidx, day_endidx = calculateSpan( s.getText(), tokenized_text[i]) abs_Sspan = ref_Sspan + day_startidx abs_Espan = ref_Sspan + day_endidx my_day_entity = chrono.ChronoDayOfMonthEntity( entityID=str(chrono_id) + "entity", start_span=abs_Sspan, end_span=abs_Espan, value=num) chrono_list.append(my_day_entity) chrono_id = chrono_id + 1 #now figure out if it is a NEXT or LAST #create doctime if False: #dct is not None: mStart = my_month_entity.get_start_span() mEnd = my_month_entity.get_end_span() this_dct = datetime.datetime( int(dct.year), int( utils.getMonthNumber( my_month_entity.get_month_type())), int(my_day_entity.get_value()), 0, 0) if this_dct > dct: chrono_list.append( chrono.ChronoNextOperator( entityID=str(chrono_id) + "entity", start_span=mStart, end_span=mEnd, repeating_interval=my_month_entity. get_id())) chrono_id = chrono_id + 1 elif this_dct < dct: chrono_list.append( chrono.ChronoLastOperator( entityID=str(chrono_id) + "entity", start_span=mStart, end_span=mEnd, repeating_interval=my_month_entity. get_id())) chrono_id = chrono_id + 1 elif num >= 1500 and num <= 2050 and not flags[ "fourdigityear"] and not flags["loneDigitYear"]: flags["fourdigityear"] = True year_startidx, year_endidx = calculateSpan( s.getText(), tokenized_text[i]) abs_Sspan = ref_Sspan + year_startidx abs_Espan = ref_Sspan + year_endidx my_year_entity = chrono.ChronoYearEntity( entityID=str(chrono_id) + "entity", start_span=abs_Sspan, end_span=abs_Espan, value=num) chrono_list.append(my_year_entity) my_year_entity.set_sub_interval( my_month_entity.get_id()) chrono_id = chrono_id + 1 ## if the start of the month is not 0 then we have leading text to parse if (idxstart > 0): #substr = s.getText()[:idxstart].strip(",.").strip() hasMod, mod_type, mod_start, mod_end = hasModifier(s) if (hasMod): if mod_type == "This": chrono_list.append( chrono.ChronoThisOperator( entityID=str(chrono_id) + "entity", start_span=ref_Sspan + mod_start, end_span=ref_Sspan + mod_end, repeating_interval=my_month_entity.get_id())) chrono_id = chrono_id + 1 if mod_type == "Next": chrono_list.append( chrono.ChronoNextOperator( entityID=str(chrono_id) + "entity", start_span=ref_Sspan + mod_start, end_span=ref_Sspan + mod_end, repeating_interval=my_month_entity.get_id())) chrono_id = chrono_id + 1 if mod_type == "Last": # print("FOUND LAST") chrono_list.append( chrono.ChronoLastOperator( entityID=str(chrono_id) + "entity", start_span=ref_Sspan + mod_start, end_span=ref_Sspan + mod_end, repeating_interval=my_month_entity.get_id(), semantics="Interval-Not-Included")) chrono_id = chrono_id + 1 chrono_list.append(my_month_entity) return chrono_list, chrono_id, flags
def hasEmbeddedPeriodInterval(tpentity): # convert to all lower # text_lower = tpentity.getText().lower() text = tpentity.getText() # remove all punctuation text_norm = text.translate( str.maketrans(string.punctuation, ' ' * len(string.punctuation))) # convert to list text_list = text_norm.split(" ") # define my period/interval term lists print( "TOFIX: PeriodInterval.py @ line 388: convert to using the dictionary." ) terms = [ "decades", "decade", "yesterday", "yesterdays", "today", "todays", "tomorrow", "tomorrows", "day", "week", "month", "year", "daily", "weekly", "monthly", "yearly", "century", "minute", "second", "hour", "hourly", "days", "weeks", "months", "years", "centuries", "century", "minutes", "seconds", "hours", "time", "shortly", "soon", "briefly", "awhile", "future", "lately", "annual", "hr", "hrs", "min", "mins", "quarter" ] #, "date"] ## if the term does not exist by itself it may be a substring. Go through each word in the TimePhrase string and see if a substring matches. for t in text_list: for r in terms: ## see if r is a substring of t ## if yes and the substring is at the end, extract the first substring and test to see if it is a number. idx = t.find(r) if (idx > 0): # then the r term is not the first substring. Extract and test. sub1 = t[:idx] sub2 = t[idx:] # sub1 should be a number if (isinstance(utils.getNumberFromText(sub1), (int))): # if it is a number then test to figure out what sub2 is. this_term = sub2 start_idx, end_idx = calculateSpan(text_norm, this_term) if this_term in [ "day", "daily", "days", "yesterday", "tomorrow", "yesterdays", "tomorrows", "today", "todays" ]: #print("ACK! Found an Embedded Day") return True, "Day", start_idx, end_idx, sub1 elif this_term in ["week", "weekly", "weeks"]: return True, "Week", start_idx, end_idx, sub1 elif this_term in ["month", "monthly", "months"]: return True, "Month", start_idx, end_idx, sub1 elif this_term in ["year", "yearly", "years"]: return True, "Year", start_idx, end_idx, sub1 elif this_term in ["century", "centuries"]: return True, "Century", start_idx, end_idx, sub1 elif this_term in ["decade", "decades"]: return True, "Decade", start_idx, end_idx, sub1 elif this_term in ["minute", "minutes"]: return True, "Minute", start_idx, end_idx, sub1 elif this_term in ["second", "seconds"]: return True, "Second", start_idx, end_idx, sub1 elif this_term in ["hour", "hourly", "hours"]: return True, "Hour", start_idx, end_idx, sub1 elif this_term in [ "time", "shortly", "soon", "briefly", "awhile", "future", "lately" ]: return True, "Unknown", start_idx, end_idx, sub1 else: return False, None, None, None, None return False, None, None, None, None