def buildChronoList( TimePhraseList, chrono_id, ref_list, PIclassifier, PIfeatures, ): chrono_list = [] ## Do some further pre-processing on the ref token list ## Replace all punctuation with spaces ref_list = referenceToken.replacePunctuation(ref_list) ## Convert to lowercase ref_list = referenceToken.lowercase(ref_list) for s in TimePhraseList: chrono_tmp_list = [] # this is the new chrono time flags so we don't duplicate effort. Will ned to eventually re-write this flow. # The flags are in the order: [loneDigitYear, month, day, hour, minute, second] chrono_tmp_list, chrono_id = DoseDuration.buildDoseDuration( s, chrono_id, chrono_tmp_list, ref_list, PIclassifier, PIfeatures) #chrono_tmp_list, chrono_id = buildFrequency(s, chrono_id, chrono_tmp_list, ref_list) # tmplist, chrono_id = buildSubIntervals(chrono_tmp_list, chrono_id, dct, ref_list) chrono_list = chrono_list + chrono_tmp_list chrono_tmp_list = [] return chrono_list, chrono_id
def buildChronoListML(TimePhraseList, chrono_id, ref_list, X, classifier): chrono_list = [] ## Do some further pre-processing on the ref token list ## Replace all punctuation with spaces ref_list = referenceToken.replacePunctuation(ref_list) ## Convert to lowercase ref_list = referenceToken.lowercase(ref_list) n = 0 while n < len(TimePhraseList): s = TimePhraseList[n] if (hasSingular(s.getItems())): chrono_list.append( chrono.ChronoFrequencyEntity(id=str(chrono_id) + "entity", label="Frequency", span=s.getSpan(), text=s.getText())) chrono_id += 1 TimePhraseList.pop(n) n -= 1 n += 1 if len(TimePhraseList) != len(X): print("FATAL ERROR: LEN(PHRASE FEATURES)!=LEN(PHRASES)") exit(1) for s, x in zip(TimePhraseList, X): chrono_tmp_list = [] # this is the new chrono time flags so we don't duplicate effort. Will ned to eventually re-write this flow. # The flags are in the order: [loneDigitYear, month, day, hour, minute, second] #chrono_tmp_list, chrono_id = DoseDuration.buildDoseDuration(s, chrono_id, chrono_tmp_list, ref_list, PIclassifier, PIfeatures) chrono_tmp_list, chrono_id = buildFrequencyML(s, chrono_id, chrono_tmp_list, x, classifier) chrono_list = chrono_list + chrono_tmp_list chrono_tmp_list = [] # tmplist, chrono_id = buildSubIntervals(chrono_tmp_list, chrono_id, dct, ref_list) return chrono_list, chrono_id
def buildChronoList(TimePhraseList, chrono_id, ref_list, PIclassifier, PIfeatures, dct=None): chrono_list = [] ## Do some further pre-processing on the ref token list ## Replace all punctuation with spaces ref_list = referenceToken.replacePunctuation(ref_list) ## Convert to lowercase ref_list = referenceToken.lowercase(ref_list) for s in TimePhraseList: print(s) chrono_tmp_list = [] # this is the new chrono time flags so we don't duplicate effort. Will ned to eventually re-write this flow. # The flags are in the order: [loneDigitYear, month, day, hour, minute, second] chrono_time_flags = {"loneDigitYear":False, "month":False, "day":False, "hour":False, "minute":False, "second":False, "fourdigityear":False, "twodigityear":False} #Parse out Year function chrono_tmp_list, chrono_id, chrono_time_flags = MonthYear.buildYear(s, chrono_id, chrono_tmp_list, chrono_time_flags) #Parse out Two-Digit Year chrono_tmp_list, chrono_id, chrono_time_flags = MonthYear.build2DigitYear(s, chrono_id, chrono_tmp_list, chrono_time_flags) #Parse out Month-of-Year chrono_tmp_list, chrono_id, chrono_time_flags = MonthYear.buildMonthOfYear(s, chrono_id, chrono_tmp_list, chrono_time_flags) #Parse out Day-of-Month chrono_tmp_list, chrono_id, chrono_time_flags = DayOfMonth.buildDayOfMonth(s, chrono_id, chrono_tmp_list, chrono_time_flags) #Parse out HourOfDay chrono_tmp_list, chrono_id, chrono_time_flags = HourOfDay.buildHourOfDay(s, chrono_id, chrono_tmp_list, chrono_time_flags) #Parse out MinuteOfHour chrono_tmp_list, chrono_id, chrono_time_flags = MinuteOfHour.buildMinuteOfHour(s, chrono_id, chrono_tmp_list, chrono_time_flags) #Parse out SecondOfMinute chrono_tmp_list, chrono_id, chrono_time_flags = SecondOfMinute.buildSecondOfMinute(s, chrono_id, chrono_tmp_list, chrono_time_flags) #Parse modifier text chrono_tmp_list, chrono_id = Modifier.buildModifierText(s, chrono_id, chrono_tmp_list) #call non-standard formatting temporal phrases chrono_tmp_list, chrono_id, chrono_time_flags = NumericDate.buildNumericDate(s, chrono_id, chrono_tmp_list, chrono_time_flags) chrono_tmp_list, chrono_id, chrono_time_flags = TwentyFourHourTime.build24HourTime(s, chrono_id, chrono_tmp_list, chrono_time_flags) chrono_tmp_list, chrono_id = DayOfWeek.buildDayOfWeek(s, chrono_id, chrono_tmp_list) chrono_tmp_list, chrono_id, chrono_time_flags = TextMonthAndDay.buildTextMonthAndDay(s, chrono_id, chrono_tmp_list, chrono_time_flags, dct, ref_list) chrono_tmp_list, chrono_id = AMPM.buildAMPM(s, chrono_id, chrono_tmp_list, chrono_time_flags) chrono_tmp_list, chrono_id = PartOfDay.buildPartOfDay(s, chrono_id, chrono_tmp_list) chrono_tmp_list, chrono_id = PartOfWeek.buildPartOfWeek(s, chrono_id, chrono_tmp_list) chrono_tmp_list, chrono_id = Season.buildSeasonOfYear(s, chrono_id, chrono_tmp_list, ref_list) chrono_tmp_list, chrono_id = PeriodInterval.buildPeriodInterval(s, chrono_id, chrono_tmp_list, ref_list, PIclassifier, PIfeatures) chrono_tmp_list, chrono_id = TextYear.buildTextYear(s, chrono_id, chrono_tmp_list) chrono_tmp_list, chrono_id = This.buildThis(s, chrono_id, chrono_tmp_list) chrono_tmp_list, chrono_id = BeforeAfter.buildBeforeAfter(s, chrono_id, chrono_tmp_list) chrono_tmp_list, chrono_id = NthFromStart.buildNthFromStart(s, chrono_id, chrono_tmp_list, ref_list) chrono_tmp_list, chrono_id = TimeZone.buildTimeZone(s, chrono_id, chrono_tmp_list) chrono_tmp_list, chrono_id = Last.buildLast(s, chrono_id, chrono_tmp_list) # print("XXXXXXXXX") # print(s) # for e in chrono_tmp_list: # print(e) tmplist, chrono_id = buildSubIntervals(chrono_tmp_list, chrono_id, dct, ref_list) chrono_list = chrono_list+tmplist #Going to incorporate in future builds #chrono_list, chrono_id = buildDuration(s, chrono_id, chrono_list) #chrono_list, chrono_id = buildSet(s, chrono_id, chrono_list) return chrono_list, chrono_id
def buildChronoList(TimePhraseList, chrono_id, ref_list, PIclassifier, PIfeatures, dct=None): chrono_list = [] ## Do some further pre-processing on the ref token list ## Replace all punctuation with spaces ref_list = referenceToken.replacePunctuation(ref_list) ## Convert to lowercase ref_list = referenceToken.lowercase(ref_list) ## this list will contain only the phrases that have a temporal component with a scate entity. timex_list = [] for s in TimePhraseList: print("\nNOW PARSING PHRASE: " + s.getText() + "\n") chrono_tmp_list = [] # this is the new chrono time flags so we don't duplicate effort. Will ned to eventually re-write this flow. # The flags are in the order: [loneDigitYear, month, day, hour, minute, second] chrono_time_flags = { "loneDigitYear": False, "month": False, "day": False, "hour": False, "minute": False, "second": False, "fourdigityear": False, "twodigityear": False } #Parse out Year function chrono_tmp_list, chrono_id, chrono_time_flags = MonthYear.buildYear( s, chrono_id, chrono_tmp_list, chrono_time_flags) #Parse out Two-Digit Year chrono_tmp_list, chrono_id, chrono_time_flags = MonthYear.build2DigitYear( s, chrono_id, chrono_tmp_list, chrono_time_flags) #Parse out Month-of-Year chrono_tmp_list, chrono_id, chrono_time_flags = MonthYear.buildMonthOfYear( s, chrono_id, chrono_tmp_list, chrono_time_flags) #Parse out Day-of-Month chrono_tmp_list, chrono_id, chrono_time_flags = DayOfMonth.buildDayOfMonth( s, chrono_id, chrono_tmp_list, chrono_time_flags) #Parse AMPM before Hour of Day chrono_tmp_list, chrono_id = AMPM.buildAMPM(s, chrono_id, chrono_tmp_list, chrono_time_flags) #Parse out HourOfDay chrono_tmp_list, chrono_id, chrono_time_flags = HourOfDay.buildHourOfDay( s, chrono_id, chrono_tmp_list, chrono_time_flags) #Parse out MinuteOfHour chrono_tmp_list, chrono_id, chrono_time_flags = MinuteOfHour.buildMinuteOfHour( s, chrono_id, chrono_tmp_list, chrono_time_flags) #Parse out SecondOfMinute chrono_tmp_list, chrono_id, chrono_time_flags = SecondOfMinute.buildSecondOfMinute( s, chrono_id, chrono_tmp_list, chrono_time_flags) #Parse modifier text chrono_tmp_list, chrono_id = Modifier.buildModifierText( s, chrono_id, chrono_tmp_list) #call non-standard formatting temporal phrases chrono_tmp_list, chrono_id, chrono_time_flags = NumericDate.buildNumericDate( s, chrono_id, chrono_tmp_list, chrono_time_flags) chrono_tmp_list, chrono_id, chrono_time_flags = TwentyFourHourTime.build24HourTime( s, chrono_id, chrono_tmp_list, chrono_time_flags) chrono_tmp_list, chrono_id = DayOfWeek.buildDayOfWeek( s, chrono_id, chrono_tmp_list) chrono_tmp_list, chrono_id, chrono_time_flags = TextMonthAndDay.buildTextMonthAndDay( s, chrono_id, chrono_tmp_list, chrono_time_flags, dct, ref_list) #chrono_tmp_list, chrono_id = AMPM.buildAMPM(s, chrono_id, chrono_tmp_list, chrono_time_flags) chrono_tmp_list, chrono_id = PartOfDay.buildPartOfDay( s, chrono_id, chrono_tmp_list) chrono_tmp_list, chrono_id = PartOfWeek.buildPartOfWeek( s, chrono_id, chrono_tmp_list) chrono_tmp_list, chrono_id = Season.buildSeasonOfYear( s, chrono_id, chrono_tmp_list, ref_list) chrono_tmp_list, chrono_id = PeriodInterval.buildPeriodInterval( s, chrono_id, chrono_tmp_list, ref_list, PIclassifier, PIfeatures) chrono_tmp_list, chrono_id = TextYear.buildTextYear( s, chrono_id, chrono_tmp_list) chrono_tmp_list, chrono_id = This.buildThis(s, chrono_id, chrono_tmp_list) chrono_tmp_list, chrono_id = BeforeAfter.buildBeforeAfter( s, chrono_id, chrono_tmp_list) chrono_tmp_list, chrono_id = NthFromStart.buildNthFromStart( s, chrono_id, chrono_tmp_list, ref_list) chrono_tmp_list, chrono_id = TimeZone.buildTimeZone( s, chrono_id, chrono_tmp_list) chrono_tmp_list, chrono_id = Last.buildLast(s, chrono_id, chrono_tmp_list) chrono_tmp_list, chrono_id = Frequency.buildFrequency( s, chrono_id, chrono_tmp_list) print("XXXXXXXXX") # if len(chrono_tmp_list) > 0: # print(s) # timex_list.append(s) # for e in chrono_tmp_list: # print(e) tmplist, chrono_id = buildSubIntervals(chrono_tmp_list, chrono_id, dct, ref_list) ## tmplist is a list of ChronoEntities for a single phrase, but can be returned empty ## Need to add ISO conversion here! if len(tmplist) > 0: print("Converting phrase to ISO: " + str(s)) s.getISO(tmplist) print("ISO Value: " + str(s)) print("TIMEX3 String: " + s.i2b2format()) timex_list.append(s) chrono_list = chrono_list + tmplist ##chrono_list is a list of ChronoEntities, and phrase information is lost #print(chrono_list) #Going to incorporate in future builds #chrono_list, chrono_id = buildDuration(s, chrono_id, chrono_list) #chrono_list, chrono_id = buildSet(s, chrono_id, chrono_list) #print("TIMEX LIST: " + str(timex_list)) return chrono_list, chrono_id, timex_list