def temporalTest(tok, include_relative=True): #remove punctuation #tok = tok.translate(str.maketrans("", "", string.punctuation)) #if the token has a dollar sign or percent sign it is not temporal m = re.search('[#$%]', tok) if m is not None: return False #look for date patterns mm[/-]dd[/-]yyyy, mm[/-]dd[/-]yy, yyyy[/-]mm[/-]dd, yy[/-]mm[/-]dd m = re.search('([0-9]{1,4}[-/][0-9]{1,2}[-/][0-9]{1,4})', tok) if m is not None: return True #look for date patterns mm[/-]dd, mm[/-]yy, mm[/-]yyyy m = re.search('([0-9]{1,2}[-/][0-9]{2,4})', tok) if m is not None: return True #looks for a string of 8 digits that could possibly be a date in the format 19980304 or 03041998 or 980304 m = re.search('([0-9]{4,8})', tok) if m is not None: if tt.has24HourTime(m.group(0)): return True if tt.hasDateOrTime(m.group(0)): return True #look for time patterns hh:mm:ss m = re.search('([0-9]{2}:[0-9]{2}:[0-9]{2})', tok) if m is not None: return True if tt.hasTextMonth(tok): return True if tt.hasDayOfWeek(tok): return True if tt.hasPeriodInterval(tok): return True if tt.hasAMPM(tok): return True if tt.hasPartOfWeek(tok): return True if tt.hasSeasonOfYear(tok): return True if tt.hasPartOfDay(tok): return True if tt.hasTimeZone(tok): return True if tt.hasTempText(tok) and include_relative: return True if tt.hasModifierText(tok): return True if tt.hasClinAbr(tok): return True return False
def temporalTest(tok): # remove punctuation # if the token has a dollar sign or percent sign it is not temporal m = re.search('[#$%]', tok) if m is not None: return False, -1 # look for date patterns mm[/-]dd[/-]yyyy, mm[/-]dd[/-]yy, yyyy[/-]mm[/-]dd, yy[/-]mm[/-]dd # m = re.search('([0-9]{1,4}[-/][0-9]{1,2}[-/][0-9]{1,4})', tok) # if m is not None: # return True, 12 # looks for a string of 8 digits that could possibly be a date in the format 19980304 or 03041998 or 980304 # m = re.search('([0-9]{4,8})', tok) # if m is not None: # if tt.has24HourTime(m.group(0)): # return True, 0 # if tt.hasDateOrTime(m.group(0)): # return True, 12 ### NOT USEFUL FOR FREQUENCY ### # look for time patterns hh:mm:ss # m = re.search('([0-9]{2}:[0-9]{2}:[0-9]{2})', tok) # if m is not None: # return True, 1 # if tt.hasTextMonth(tok): # return True, 2 ## THIS RETURNS TRUE FOR STRINGS SUCH AS "DOCTOR" BECAUSE it contains "OCT" if tt.hasDayOfWeek(tok): return True, 3 if tt.hasPeriodInterval(tok): return True, 4 if tt.hasAMPM(tok): return True, 5 #if tt.hasPartOfWeek(tok): # return True, 6 # if tt.hasSeasonOfYear(tok): # return True, 7 if tt.hasPartOfDay(tok): return True, 8 #if tt.hasTimeZone(tok): # return True, 9 # if tt.hasTempText(tok): # return True, 10 # not useful to us if tt.hasDoseDuration(tok): return True, -1 # if tt.hasFor(tok): I'm not sure if I'm going to use this or not yet # return True, -1 else: return False, -1