Exemple #1
0
def buildDayOfWeek(s, chrono_id, chrono_list):
    boo, val, idxstart, idxend = hasDayOfWeek(s)
    if boo:
        ref_Sspan, ref_Espan = s.getSpan()
        abs_Sspan = ref_Sspan + idxstart
        abs_Espan = ref_Sspan + idxend
        my_entity = chrono.ChronoDayOfWeekEntity(entityID=str(chrono_id) +
                                                 "entity",
                                                 start_span=abs_Sspan,
                                                 end_span=abs_Espan,
                                                 day_type=val)
        chrono_list.append(my_entity)
        chrono_id = chrono_id + 1
        # check here to see if it has a modifier
        hasMod, mod_type, mod_start, mod_end = Chrono.TimePhraseToChrono.Modifier.hasNextLastThis(
            s)
        if (hasMod):
            if mod_type == "This":
                chrono_list.append(
                    chrono.ChronoThisOperator(
                        entityID=str(chrono_id) + "entity",
                        start_span=abs_Sspan,
                        end_span=abs_Espan,
                        repeating_interval=my_entity.get_id()))
                chrono_id = chrono_id + 1

            if mod_type == "Next":
                chrono_list.append(
                    chrono.ChronoNextOperator(
                        entityID=str(chrono_id) + "entity",
                        start_span=abs_Sspan,
                        end_span=abs_Espan,
                        repeating_interval=my_entity.get_id()))
                chrono_id = chrono_id + 1

            if mod_type == "Last":
                chrono_list.append(
                    chrono.ChronoLastOperator(
                        entityID=str(chrono_id) + "entity",
                        start_span=abs_Sspan,
                        end_span=abs_Espan,
                        repeating_interval=my_entity.get_id(),
                        semantics="Interval-Included"))
                chrono_id = chrono_id + 1
            # else:
            #    chrono_list.append(chrono.ChronoLastOperator(entityID=str(chrono_id) + "entity", start_span=abs_Sspan, end_span=abs_Espan, repeating_interval=my_entity.get_id(), semantics="Interval-Included"))
            #    chrono_id = chrono_id + 1

        # else:
        # TODO all last operators are getting added here except yesterday...
        #    chrono_list.append(chrono.ChronoLastOperator(entityID=str(chrono_id) + "entity", start_span=abs_Sspan, end_span=abs_Espan, semantics="Interval-Included", repeating_interval=my_entity.get_id()))
        #    chrono_id = chrono_id + 1

    return chrono_list, chrono_id
Exemple #2
0
def buildLast(s, chrono_id, chrono_list):
    boo, val, startSpan, endSpan = hasLast(s)

    if boo:
        ref_StartSpan, ref_EndSpan = s.getSpan()
        abs_StartSpan = ref_StartSpan + startSpan
        abs_EndSpan = abs_StartSpan + abs(endSpan - startSpan)

        chrono_last_entity = chrono.ChronoLastOperator(
            entityID=str(chrono_id) + "entity",
            start_span=abs_StartSpan,
            end_span=abs_EndSpan)

        chrono_id = chrono_id + 1
        chrono_list.append(chrono_last_entity)

    return chrono_list, chrono_id
Exemple #3
0
def buildSeasonOfYear(s, chrono_id, chrono_list, ref_list):

    boo, val, idxstart, idxend = hasSeasonOfYear(s, ref_list)
    if boo:
        ref_Sspan, ref_Espan = s.getSpan()
        abs_Sspan = ref_Sspan + idxstart
        abs_Espan = ref_Sspan + idxend
        my_entity = chrono.ChronoSeasonOfYearEntity(entityID=str(chrono_id) +
                                                    "entity",
                                                    start_span=abs_Sspan,
                                                    end_span=abs_Espan,
                                                    season_type=val)
        chrono_id = chrono_id + 1

        #check here to see if it has a modifier
        hasMod, mod_type, mod_start, mod_end = hasNextLastThis(s)
        if (hasMod):
            if mod_type == "This":
                chrono_list.append(
                    chrono.ChronoThisOperator(
                        entityID=str(chrono_id) + "entity",
                        start_span=abs_Sspan,
                        end_span=abs_Espan,
                        repeating_interval=my_entity.get_id()))
                chrono_id = chrono_id + 1

            if mod_type == "Next":
                chrono_list.append(
                    chrono.ChronoNextOperator(
                        entityID=str(chrono_id) + "entity",
                        start_span=abs_Sspan,
                        end_span=abs_Espan,
                        repeating_interval=my_entity.get_id()))
                chrono_id = chrono_id + 1

            if mod_type == "Last":
                chrono_list.append(
                    chrono.ChronoLastOperator(
                        entityID=str(chrono_id) + "entity",
                        start_span=abs_Sspan,
                        end_span=abs_Espan,
                        repeating_interval=my_entity.get_id()))
                chrono_id = chrono_id + 1
            #else:
            #    chrono_list.append(chrono.ChronoLastOperator(entityID=str(chrono_id) + "entity", start_span=abs_Sspan, end_span=abs_Espan, repeating_interval=my_entity.get_id()))
            #    chrono_id = chrono_id + 1

    # else:
    #        chrono_list.append(chrono.ChronoLastOperator(entityID=str(chrono_id) + "entity", start_span=abs_Sspan, end_span=abs_Espan, repeating_interval=my_entity.get_id()))
    #       chrono_id = chrono_id+1

    #check to see if it has a number associated with it.  We assume the number comes before the interval string
        if idxstart > 0:
            substr = s.getText()[0:idxstart]
            m = re.search('([0-9]{1,2})', substr)
            if m is not None:
                num_val = m.group(0)
                abs_Sspan = ref_Sspan + m.span(0)[0]
                abs_Espan = ref_Sspan + m.span(0)[1]

                my_number_entity = chrono.ChronoNumber(
                    entityID=str(chrono_id) + "entity",
                    start_span=abs_Sspan,
                    end_span=abs_Espan,
                    value=num_val)
                chrono_id = chrono_id + 1

                #add the number entity to the list
                chrono_list.append(my_number_entity)
                my_entity.set_number(my_number_entity.get_id())
                #else search for a text number
            else:
                texNumVal = utils.getNumberFromText(substr)
                if texNumVal is not None:
                    #create the number entity
                    my_number_entity = chrono.ChronoNumber(
                        entityID=str(chrono_id) + "entity",
                        start_span=ref_Sspan,
                        end_span=ref_Sspan + (idxstart - 1),
                        value=texNumVal)
                    chrono_id = chrono_id + 1
                    #append to list
                    chrono_list.append(my_number_entity)
                    #link to interval entity
                    my_entity.set_number(my_number_entity.get_id())

        chrono_list.append(my_entity)

    return chrono_list, chrono_id
Exemple #4
0
def buildPeriodInterval(s, chrono_id, chrono_list, ref_list, classifier,
                        feats):

    features = feats.copy()
    ref_Sspan, ref_Espan = s.getSpan()
    #print("In buildPeriodInterval(), TimePhrase Text: " + s.getText())
    boo, val, idxstart, idxend, plural = hasPeriodInterval(s)

    # FIND terms that are always marked as calendar intervals!
    if boo and re.search(
            "yesterday|yesterdays|tomorrow|tomorrows|today|todays|daily|/min|/week",
            s.getText()):
        abs_Sspan = ref_Sspan + idxstart
        abs_Espan = ref_Sspan + idxend
        my_entity = chrono.ChronoCalendarIntervalEntity(
            entityID=str(chrono_id) + "entity",
            start_span=abs_Sspan,
            end_span=abs_Espan,
            calendar_type=val,
            number=None)
        chrono_id = chrono_id + 1

        if re.search("yesterday|yesterdays", s.getText()):

            my_last_entity = chrono.ChronoLastOperator(
                entityID=str(chrono_id) + "entity",
                start_span=abs_Sspan,
                end_span=abs_Espan,
                repeating_interval=str(chrono_id - 1) + "entity")
            chrono_id = chrono_id + 1
            chrono_list.append(my_last_entity)

        chrono_list.append(my_entity)

    # FIND terms that are always marked as periods!
    elif boo and val == "Unknown":
        abs_Sspan = ref_Sspan + idxstart
        abs_Espan = ref_Sspan + idxend
        my_entity = chrono.ChronoPeriodEntity(entityID=str(chrono_id) +
                                              "entity",
                                              start_span=abs_Sspan,
                                              end_span=abs_Espan,
                                              period_type=val,
                                              number=None)
        chrono_id = chrono_id + 1
        chrono_list.append(my_entity)

    elif boo:
        abs_Sspan = ref_Sspan + idxstart
        abs_Espan = ref_Sspan + idxend

        # get index of overlapping reference token
        #ref_idx = -1
        #for i in range(0,len(ref_list)):
        #    if(utils.overlap(ref_list[i].getSpan(),(abs_Sspan,abs_Espan))):
        #        ref_idx = i
        #        break

        ref_idx = utils.getRefIdx(ref_list, abs_Sspan, abs_Espan)

        # extract ML features
        my_features = utils.extract_prediction_features(
            ref_list, ref_idx, feats.copy())

        # classify into period or interval
        if classifier[1] == "NN":
            my_class = ChronoKeras.keras_classify(
                classifier[0], np.array(list(my_features.values())))
            #print("Class: " + str(my_class) + " : Start: " + str(abs_Sspan) + " : End: "+ str(abs_Espan))
        elif classifier[1] in ("SVM", "RF"):
            feat_array = [int(i) for i in my_features.values()]
            my_class = classifier[0].predict([feat_array])[0]
        else:
            my_class = classifier[0].classify(my_features)
            #print("Class: " + str(my_class) + " : Start: " + str(abs_Sspan) + " : End: "+ str(abs_Espan))

        # if 1 then it is a period, if 0 then it is an interval
        if my_class == 1:
            my_entity = chrono.ChronoPeriodEntity(
                entityID=str(chrono_id) + "entity",
                start_span=abs_Sspan,
                end_span=abs_Espan,
                period_type=getPeriodValue(val),
                number=None)
            chrono_id = chrono_id + 1
            ### Check to see if this calendar interval has a "this" in front of it
            prior_tok = ref_list[ref_idx - 1].getText().lower()
            if prior_tok.translate(
                    str.maketrans(string.punctuation,
                                  ' ' * len(string.punctuation))) == "this":
                # add a This entitiy and link it to the interval.
                start_span, end_span = re.search(prior_tok, "this").span(0)
                prior_start, prior_end = ref_list[ref_idx - 1].getSpan()

                chrono_this_entity = chrono.ChronoThisOperator(
                    entityID=str(chrono_id) + "entity",
                    start_span=prior_start + start_span,
                    end_span=prior_start + end_span)
                chrono_id = chrono_id + 1
                chrono_this_entity.set_period(my_entity.get_id())
                chrono_list.append(chrono_this_entity)

            else:
                # check for a Last Word
                hasMod, mod_type, mod_start, mod_end = hasModifier(s)

                if (hasMod):
                    if mod_type == "Next":
                        chrono_list.append(
                            chrono.ChronoNextOperator(
                                entityID=str(chrono_id) + "entity",
                                start_span=ref_Sspan + mod_start,
                                end_span=ref_Sspan + mod_end,
                                period=my_entity.get_id()))
                        chrono_id = chrono_id + 1

                    if mod_type == "Last":
                        chrono_list.append(
                            chrono.ChronoLastOperator(
                                entityID=str(chrono_id) + "entity",
                                start_span=ref_Sspan + mod_start,
                                end_span=ref_Sspan + mod_end,
                                period=my_entity.get_id(),
                                semantics="Interval-Not-Included"))
                        chrono_id = chrono_id + 1

        else:
            my_entity = chrono.ChronoCalendarIntervalEntity(
                entityID=str(chrono_id) + "entity",
                start_span=abs_Sspan,
                end_span=abs_Espan,
                calendar_type=val,
                number=None)
            chrono_id = chrono_id + 1
            ### Check to see if this calendar interval has a "this" in front of it
            prior_tok = ref_list[ref_idx - 1].getText().lower()
            if prior_tok.translate(
                    str.maketrans(string.punctuation,
                                  ' ' * len(string.punctuation))) == "this":
                # add a This entitiy and link it to the interval.
                start_span, end_span = re.search(prior_tok, "this").span(0)
                prior_start, prior_end = ref_list[ref_idx - 1].getSpan()

                chrono_this_entity = chrono.ChronoThisOperator(
                    entityID=str(chrono_id) + "entity",
                    start_span=prior_start + start_span,
                    end_span=prior_start + end_span)
                chrono_id = chrono_id + 1
                chrono_this_entity.set_repeating_interval(my_entity.get_id())
                chrono_list.append(chrono_this_entity)
            else:
                # check for a Last Word
                hasMod, mod_type, mod_start, mod_end = hasModifier(s)
                if (hasMod):
                    if mod_type == "Next":
                        chrono_list.append(
                            chrono.ChronoNextOperator(
                                entityID=str(chrono_id) + "entity",
                                start_span=ref_Sspan + mod_start,
                                end_span=ref_Sspan + mod_end,
                                repeating_interval=my_entity.get_id()))
                        chrono_id = chrono_id + 1

                    if mod_type == "Last":
                        chrono_list.append(
                            chrono.ChronoLastOperator(
                                entityID=str(chrono_id) + "entity",
                                start_span=ref_Sspan + mod_start,
                                end_span=ref_Sspan + mod_end,
                                repeating_interval=my_entity.get_id(),
                                semantics="Interval-Not-Included"))
                        chrono_id = chrono_id + 1

        #check to see if it has a number associated with it.  We assume the number comes before the interval string
        if idxstart > 0:
            substr = s.getText()[0:idxstart]
            m = re.search('([0-9]{1,2})', substr)
            if m is not None:
                num_val = m.group(0)
                abs_Sspan = ref_Sspan + m.span(0)[0]
                abs_Espan = ref_Sspan + m.span(0)[1]

                my_number_entity = chrono.ChronoNumber(
                    entityID=str(chrono_id) + "entity",
                    start_span=abs_Sspan,
                    end_span=abs_Espan,
                    value=num_val)
                chrono_id = chrono_id + 1

                #add the number entity to the list
                chrono_list.append(my_number_entity)
                my_entity.set_number(my_number_entity.get_id())
            #else search for a text number
            else:
                texNumVal = utils.getNumberFromText(substr)
                if texNumVal is not None:
                    #create the number entity
                    my_number_entity = chrono.ChronoNumber(
                        entityID=str(chrono_id) + "entity",
                        start_span=ref_Sspan,
                        end_span=ref_Sspan + (idxstart - 1),
                        value=texNumVal)
                    chrono_id = chrono_id + 1
                    #append to list
                    chrono_list.append(my_number_entity)
                    #link to interval entity
                    my_entity.set_number(my_number_entity.get_id())

        chrono_list.append(my_entity)

    else:
        boo2, val, idxstart, idxend, numstr = hasEmbeddedPeriodInterval(s)
        if (boo2):
            abs_Sspan = ref_Sspan + idxstart
            abs_Espan = ref_Sspan + idxend

            # get index of overlapping reference token
            ref_idx = -1
            for i in range(0, len(ref_list)):
                if (utils.overlap(ref_list[i].getSpan(),
                                  (abs_Sspan, abs_Espan))):
                    ref_idx = i
                    break

            # extract ML features
            my_features = utils.extract_prediction_features(
                ref_list, ref_idx, features)

            # classify into period or interval
            if (classifier[1] == "NN"):
                my_class = ChronoKeras.keras_classify(
                    classifier[0], np.array(list(my_features.values())))
                #print("Class: " + str(my_class) + " : Start: " + str(abs_Sspan) + " : End: "+ str(abs_Espan))
            else:
                my_class = classifier[0].classify(my_features)
                #print("Class: " + str(my_class) + " : Start: " + str(abs_Sspan) + " : End: "+ str(abs_Espan))

            # if 1 then it is a period, if 0 then it is an interval
            if (my_class == 1):
                my_entity = chrono.ChronoPeriodEntity(
                    entityID=str(chrono_id) + "entity",
                    start_span=abs_Sspan,
                    end_span=abs_Espan,
                    period_type=getPeriodValue(val),
                    number=None)
                chrono_id = chrono_id + 1
            else:
                my_entity = chrono.ChronoCalendarIntervalEntity(
                    entityID=str(chrono_id) + "entity",
                    start_span=abs_Sspan,
                    end_span=abs_Espan,
                    calendar_type=val)
                chrono_id = chrono_id + 1

            #Extract the number and identify the span of numstr

            substr = s.getText(
            )[:idxstart]  ## extract entire first part of TimePhrase phrase
            m = re.search(
                '([0-9]{1,2})', substr
            )  #search for an integer in the subphrase and extract it's coordinates
            if m is not None:
                num_val = m.group(0)
                abs_Sspan = ref_Sspan + m.span(0)[0]
                abs_Espan = ref_Sspan + m.span(0)[1]

                my_number_entity = chrono.ChronoNumber(
                    entityID=str(chrono_id) + "entity",
                    start_span=abs_Sspan,
                    end_span=abs_Espan,
                    value=num_val)
                chrono_id = chrono_id + 1

                #add the number entity to the list
                chrono_list.append(my_number_entity)
                #link to interval entity
                my_entity.set_number(my_number_entity.get_id())
            #else search for a text number
            else:
                texNumVal = utils.getNumberFromText(numstr)
                if texNumVal is not None:
                    m = re.search(
                        numstr,
                        substr)  #search for the number string in the subphrase
                    if m is not None:
                        abs_Sspan = ref_Sspan + m.span(0)[0]
                        abs_Espan = ref_Sspan + m.span(0)[1]
                        #create the number entity
                        my_number_entity = chrono.ChronoNumber(
                            entityID=str(chrono_id) + "entity",
                            start_span=abs_Sspan,
                            end_span=abs_Espan,
                            value=texNumVal)
                        chrono_id = chrono_id + 1
                        #append to list
                        chrono_list.append(my_number_entity)
                        #link to interval entity
                        my_entity.set_number(my_number_entity.get_id())

            chrono_list.append(my_entity)

    return chrono_list, chrono_id
Exemple #5
0
def buildSubIntervals(chrono_list, chrono_id, dct, ref_list):
    year = None
    month = None
    day = None
    hour = None
    minute = None
    second = None
    daypart = None
    dayweek = None
    interval = None
    period = None
    nth = None
    nxt = None
    this = None
    tz = None
    ampm = None
    modifier = None
    last = None
    
    entity_count = 0
   
    #print("in Build Subintervals") 
    ## loop through all entities and pull out the approriate IDs
    for e in range(0,len(chrono_list)):
        #print(chrono_list[e].get_id())
        e_type = chrono_list[e].get_type()
        #print("E-type: " + e_type)
        
        if e_type == "Two-Digit-Year" or e_type == "Year":
            year = e
            entity_count = entity_count + 1
            # print("YEAR VALUE: " + str(chrono_list[e].get_value()))
        elif e_type == "Month-Of-Year":
            # print("FOUND Month")
            month = e
            entity_count = entity_count + 1
        elif e_type == "Day-Of-Month":
            day = e
            entity_count = entity_count + 1
        elif e_type == "Hour-Of-Day":
            hour = e
            entity_count = entity_count + 1
        elif e_type == "Minute-Of-Hour":
            minute = e
            entity_count = entity_count + 1
        elif e_type == "Second-Of-Minute":
            second = e
            entity_count = entity_count + 1
        elif e_type == "Part-Of-Day":
            daypart = e
            entity_count = entity_count + 1
        elif e_type == "Day-Of-Week":
            dayweek = e
            entity_count = entity_count + 1
        elif e_type == "Calendar-Interval":
            interval = e
            entity_count = entity_count + 1
        elif e_type == "Period":
            period = e
            entity_count = entity_count + 1
        elif e_type == "NthFromStart":
            nth = e
            entity_count = entity_count + 1
        elif e_type == "Next":
            nxt = e
            entity_count = entity_count + 1
        elif e_type == "This":
            this = e
            entity_count = entity_count + 1
        
        elif e_type == "Time-Zone":
            tz = e
            entity_count = entity_count + 1
        elif e_type == "AMPM-Of-Day":
            ampm = e
            entity_count = entity_count + 1
        elif e_type == "Modifier":
            modifier = e
            entity_count = entity_count + 1
        elif e_type == "Last":
            last = e
            entity_count = entity_count + 1
            
        
        
    ## Now add additional NEXT and LAST entities where needed
    ## Need to edit to figure out if a modifier word exists first, then test for year, etc.
    ## need to look specifically for modifier words in the other methods.  This method catches full dates that are next or last with no modifier words.
    ## update: I now have a buildLast() method that identifies the modifier words.
    if year is None:
        if dct is not None:
            if month is not None and this is None and nxt is None and last is None:                
                mStart = chrono_list[month].get_start_span()
                mEnd = chrono_list[month].get_end_span()
                
                my_month = utils.getMonthNumber(chrono_list[month].get_month_type())
                
                if day is not None and my_month == dct.month:
                    # add a Last
                    if chrono_list[day].get_value() <= dct.day:
                        chrono_list.append(chrono.ChronoLastOperator(entityID=str(chrono_id) + "entity", start_span=mStart, end_span=mEnd, repeating_interval=chrono_list[month].get_id()))
                        chrono_id = chrono_id + 1
                    elif chrono_list[day].get_value() > dct.day:
                        chrono_list.append(chrono.ChronoNextOperator(entityID=str(chrono_id) + "entity", start_span=mStart, end_span=mEnd, repeating_interval=chrono_list[month].get_id()))
                        chrono_id = chrono_id + 1
                
                elif my_month < dct.month:
                    chrono_list.append(chrono.ChronoLastOperator(entityID=str(chrono_id) + "entity", start_span=mStart, end_span=mEnd, repeating_interval=chrono_list[month].get_id()))
                    chrono_id = chrono_id + 1
                    
                elif my_month > dct.month:
                    chrono_list.append(chrono.ChronoNextOperator(entityID=str(chrono_id) + "entity", start_span=mStart, end_span=mEnd, repeating_interval=chrono_list[month].get_id()))
                    chrono_id = chrono_id + 1      
            
            ##having a problem where a past day is being referenced without it being explicit.  
            ##need to look at the closest preceding verb tense to see if it is past or present I think.
            ##will need the reference list to do this.
            if dayweek is not None and this is None and nxt is None and last is None:                
                mStart = chrono_list[dayweek].get_start_span()
                mEnd = chrono_list[dayweek].get_end_span()
                
                #Get ref idx for this token
                ref = utils.getRefIdx(ref_list, mStart, mEnd)
                vb = None
                
                while vb is None and ref != 0:
                    if "VB" in ref_list[ref].getPos():
                        if ref_list[ref].getPos() in ["VBD","VBN"]:
                            #past tense so put as a last
                            chrono_list.append(chrono.ChronoLastOperator(entityID=str(chrono_id) + "entity", start_span=mStart, end_span=mEnd, repeating_interval=chrono_list[dayweek].get_id()))
                            chrono_id = chrono_id + 1
                            # print("FOUND DAYWEEK LAST")
                        elif ref_list[ref].getPos() in ["VB","VBG","VBP","VBZ"]:
                            #present tense so put as a next
                            chrono_list.append(chrono.ChronoNextOperator(entityID=str(chrono_id) + "entity", start_span=mStart, end_span=mEnd, repeating_interval=chrono_list[dayweek].get_id()))
                            chrono_id = chrono_id + 1  
                            # print("FOUND DAYWEEK NEXT")
                        vb = True
                    # print("Ref Tok: " + str(ref))
                    ref-=1
                
                '''
                weekdays = {"Monday":0, "Tuesday":1, "Wednesday":2, "Thursday":3, "Friday":4, "Saturday":5, "Sunday":6}
                ##Monday is 0 and Sunday is 6
                dct_day = dct.weekday()
                ##need convert the doctime to a day of week
                my_dayweek = weekdays[chrono_list[dayweek].get_day_type()]
                
                if my_dayweek < dct_day:
                    chrono_list.append(chrono.ChronoLastOperator(entityID=str(chrono_id) + "entity", start_span=mStart, end_span=mEnd, repeating_interval=chrono_list[dayweek].get_id()))
                    chrono_id = chrono_id + 1
                    print("FOUND DAYWEEK LAST")
                    
                elif my_dayweek > dct_day:
                    chrono_list.append(chrono.ChronoNextOperator(entityID=str(chrono_id) + "entity", start_span=mStart, end_span=mEnd, repeating_interval=chrono_list[dayweek].get_id()))
                    chrono_id = chrono_id + 1  
                    print("FOUND DAYWEEK NEXT")        
                '''
    
    ## Now assign all sub-intervals
    if second is not None and minute is not None:
        chrono_list[minute].set_sub_interval(chrono_list[second].get_id())
    if minute is not None and hour is not None:
        #print("Linking entities " + str(minute) + " and " + str(hour))
        chrono_list[hour].set_sub_interval(chrono_list[minute].get_id())
    if hour is not None and day is not None:
        chrono_list[day].set_sub_interval(chrono_list[hour].get_id())
    if day is not None and month is not None:
        chrono_list[month].set_sub_interval(chrono_list[day].get_id())
    if month is not None and year is not None:
        chrono_list[year].set_sub_interval(chrono_list[month].get_id())
    if dayweek is not None and hour is not None:
        chrono_list[dayweek].set_sub_interval(chrono_list[hour].get_id())
    if dayweek is not None and daypart is not None and hour is None:
        chrono_list[dayweek].set_sub_interval(chrono_list[daypart].get_id())
    if day is not None and daypart is not None and hour is None:
        chrono_list[day].set_sub_interval(chrono_list[daypart].get_id())
    if nth is not None and period is not None:
        # print("Adding period sub-interval")
        chrono_list[nth].set_period(chrono_list[period].get_id())
    elif nth is not None and interval is not None:
        # print("Adding interval sub-interval")
        chrono_list[nth].set_repeating_interval(chrono_list[interval].get_id())
    
    
    ## Test to see if we have a Last entity AND the entity count is only 1
    ## If yes, then remove the Last entity
    ## Current not implementing this, but may need to add it in the future.  This removal of entities reduced our recall by half
#    if last is not None and entity_count == 1:
#        print("Found a Last without a temporal entity")
#        del chrono_list[last]
        
    reindex = False
    if ampm is not None and hour is not None:
        chrono_list[hour].set_ampm(chrono_list[ampm].get_id())
    elif ampm is not None and hour is None:
        # Delete the AMPM entity if no hour associated with it.
        #print("Deleting AMPM")
        del chrono_list[ampm]
        reindex = True

    ## I know I need to reindex here, but I honestly forgot exactly why.
    if reindex:
        for e in range(0,len(chrono_list)):
            #print(chrono_list[e].get_id())
            e_type = chrono_list[e].get_type()
            if e_type == "Time-Zone":
                #print("Reindexing Time Zone Value: " + str(chrono_list[e]))
                tz = e
                    
        
    if tz is not None and hour is not None:
        chrono_list[hour].set_time_zone(chrono_list[tz].get_id())
    elif tz is not None and hour is None:
        # Delete the tz entity if there is no hour to link it to.  Not sure if this will work for all cases.
        #print("Deleting TimeZone")
        del chrono_list[tz]

    # Link modifiers
    if modifier is not None and period is not None:
        chrono_list[period].set_modifier(chrono_list[modifier].get_id())
    elif modifier is not None and interval is not None:
        chrono_list[interval].set_modifier(chrono_list[modifier].get_id())
    elif modifier is not None and period is None and interval is None:
        # Delete the modifier entity if there is no period or interval to link it to.  Not sure if this will work for all cases.
        #print("Deleting Modifier")
        del chrono_list[modifier]
    
    
    ##### Notes: This next bit is complicated.  If I include it I remove some False Positives, but I also create some False Negatives.
    ##### I think more complex parsing is needed here to figure out if the ordinal is an NthFromStart or not.  
    ##### I think implementing a machine learning method here may help.
    #elif nth is not None:
        # if the nthFromStart does not have a corresponding interval we should remove it from the list.
        #print("REMOVING NthFromStart: " + str(chrono_list[nth]))
        #del chrono_list[nth]
    
    return chrono_list, chrono_id
Exemple #6
0
def buildDoseDuration(s, chrono_id, chrono_list, ref_list, classifier, feats):
    features = feats.copy()
    ref_Sspan, ref_Espan = s.getSpan()
    #print("In buildPeriodInterval(), TimePhrase Text: " + s.getText())
    bad = re.compile(r"^q\d|^Q\d")
    parts = s.getText().split()
    containsnum = False

    #various checks to ensure that this phrase is actually a dose duration

    if isDoseDuration(parts[0]):
        return chrono_list, chrono_id
    if "every" in s.getText().lower() or "time" in s.getText().lower(
    ) or "per" in s.getText().lower():
        return chrono_list, chrono_id
    if bad.match(s.getText()):
        return chrono_list, chrono_id
    if "/" in s.getText():
        return chrono_list, chrono_id
    if "[**" in s.getText() or "**]" in s.getText():
        return chrono_list, chrono_id
    if "ly" in s.getText():
        return chrono_list, chrono_id
    if "(" in s.getText() or ")" in s.getText():
        return chrono_list, chrono_id
    if "once" in s.getText().lower() or "twice" in s.getText().lower():
        return chrono_list, chrono_id
    if "past" in s.getText().lower() or "ago" in s.getText().lower():
        return chrono_list, chrono_id
    if "RANDOM" in s.getText():
        return chrono_list, chrono_id
    for part in parts:
        part = re.sub('[' + string.punctuation + ']', '', part).strip()
        for ref in ref_list:
            if ref.getText().lower() == part.lower():
                if (ref.isNumeric()):
                    containsnum = True
                    if utils.isOrdinal(ref.getText()):
                        return chrono_list, chrono_id
                    break

                elif not tt.hasDoseDuration(ref.getText().lower()):
                    return chrono_list, chrono_id
    if containsnum == False:
        return chrono_list, chrono_id

    boo, val, idxstart, idxend, plural = hasDoseDuration(s)
    if boo:
        abs_Sspan = ref_Sspan
        abs_Espan = ref_Espan

        # get index of overlapping reference token
        # ref_idx = -1
        # for i in range(0,len(ref_list)):
        #    if(utils.overlap(ref_list[i].getSpan(),(abs_Sspan,abs_Espan))):
        #        ref_idx = i
        #        break

        ref_idx = utils.getRefIdx(ref_list, abs_Sspan, abs_Espan)

        # extract ML features
        my_features = utils.extract_prediction_features(
            ref_list, ref_idx, feats.copy())

        # classify into period or interval
        if classifier[1] == "NN":
            my_class = ChronoKeras.keras_classify(
                classifier[0], np.array(list(my_features.values())))
            # print("Class: " + str(my_class) + " : Start: " + str(abs_Sspan) + " : End: "+ str(abs_Espan))
        elif classifier[1] in ("SVM", "RF"):
            feat_array = [int(i) for i in my_features.values()]
            my_class = classifier[0].predict([feat_array])[0]
        else:
            my_class = classifier[0].classify(my_features)
            # print("Class: " + str(my_class) + " : Start: " + str(abs_Sspan) + " : End: "+ str(abs_Espan))

        # if 1 then it is a period, if 0 then it is an interval
        if my_class == 1:
            my_entity = chrono.ChronoDoseDurationEntity(
                entityID=str(chrono_id) + "entity",
                start_span=abs_Sspan,
                end_span=abs_Espan,
                dose_type=getDoseDurationValue(val),
                number=None,
                text=s.getText())
            chrono_id = chrono_id + 1
            ### Check to see if this calendar interval has a "this" in front of it
            prior_tok = ref_list[ref_idx - 1].getText().lower()
            if prior_tok.translate(
                    str.maketrans(string.punctuation,
                                  ' ' * len(string.punctuation))) == "this":
                # add a This entitiy and link it to the interval.
                start_span, end_span = re.search(prior_tok, "this").span(0)
                prior_start, prior_end = ref_list[ref_idx - 1].getSpan()

                chrono_this_entity = chrono.ChronoThisOperator(
                    entityID=str(chrono_id) + "entity",
                    start_span=prior_start + start_span,
                    end_span=prior_start + end_span)
                chrono_id = chrono_id + 1
                chrono_this_entity.set_period(my_entity.get_id())
                chrono_list.append(chrono_this_entity)

            else:
                # check for a Last Word
                hasMod, mod_type, mod_start, mod_end = hasModifier(s)

                if (hasMod):
                    if mod_type == "Next":
                        chrono_list.append(
                            chrono.ChronoNextOperator(
                                entityID=str(chrono_id) + "entity",
                                start_span=ref_Sspan + mod_start,
                                end_span=ref_Sspan + mod_end,
                                period=my_entity.get_id()))
                        chrono_id = chrono_id + 1

                    if mod_type == "Last":
                        chrono_list.append(
                            chrono.ChronoLastOperator(
                                entityID=str(chrono_id) + "entity",
                                start_span=ref_Sspan + mod_start,
                                end_span=ref_Sspan + mod_end,
                                period=my_entity.get_id(),
                                semantics="Interval-Not-Included"))
                        chrono_id = chrono_id + 1

        else:
            my_entity = chrono.ChronoDoseDurationEntity(
                entityID=str(chrono_id) + "entity",
                start_span=abs_Sspan,
                end_span=abs_Espan,
                dose_type=val,
                number=None,
                text=s.getText())
            chrono_id = chrono_id + 1
            ### Check to see if this calendar interval has a "this" in front of it
            prior_tok = ref_list[ref_idx - 1].getText().lower()
            if prior_tok.translate(
                    str.maketrans(string.punctuation,
                                  ' ' * len(string.punctuation))) == "this":
                # add a This entitiy and link it to the interval.
                start_span, end_span = re.search(prior_tok, "this").span(0)
                prior_start, prior_end = ref_list[ref_idx - 1].getSpan()

                chrono_this_entity = chrono.ChronoThisOperator(
                    entityID=str(chrono_id) + "entity",
                    start_span=prior_start + start_span,
                    end_span=prior_start + end_span)
                chrono_id = chrono_id + 1
                chrono_this_entity.set_repeating_interval(my_entity.get_id())
                chrono_list.append(chrono_this_entity)
            else:
                # check for a Last Word
                hasMod, mod_type, mod_start, mod_end = hasModifier(s)
                if (hasMod):
                    if mod_type == "Next":
                        chrono_list.append(
                            chrono.ChronoNextOperator(
                                entityID=str(chrono_id) + "entity",
                                start_span=ref_Sspan + mod_start,
                                end_span=ref_Sspan + mod_end,
                                repeating_interval=my_entity.get_id()))
                        chrono_id = chrono_id + 1

                    if mod_type == "Last":
                        chrono_list.append(
                            chrono.ChronoLastOperator(
                                entityID=str(chrono_id) + "entity",
                                start_span=ref_Sspan + mod_start,
                                end_span=ref_Sspan + mod_end,
                                repeating_interval=my_entity.get_id(),
                                semantics="Interval-Not-Included"))
                        chrono_id = chrono_id + 1

        # check to see if it has a number associated with it.  We assume the number comes before the interval string

        chrono_list.append(my_entity)
    else:
        boo2, val, idxstart, idxend, numstr = hasEmbeddedPeriodInterval(s)
        if (boo2):
            abs_Sspan = ref_Sspan
            abs_Espan = ref_Espan

            # get index of overlapping reference token
            ref_idx = -1
            for i in range(0, len(ref_list)):
                if (utils.overlap(ref_list[i].getSpan(),
                                  (abs_Sspan, abs_Espan))):
                    ref_idx = i
                    break

            # extract ML features
            my_features = utils.extract_prediction_features(
                ref_list, ref_idx, features)

            # classify into period or interval
            if (classifier[1] == "NN"):
                my_class = ChronoKeras.keras_classify(
                    classifier[0], np.array(list(my_features.values())))
                #print("Class: " + str(my_class) + " : Start: " + str(abs_Sspan) + " : End: "+ str(abs_Espan))
            else:
                my_class = classifier[0].classify(my_features)
                #print("Class: " + str(my_class) + " : Start: " + str(abs_Sspan) + " : End: "+ str(abs_Espan))

            # if 1 then it is a period, if 0 then it is an interval
            if (my_class == 1):
                my_entity = chrono.ChronoDoseDurationEntity(
                    entityID=str(chrono_id) + "entity",
                    start_span=abs_Sspan,
                    end_span=abs_Espan,
                    dose_type=getDoseDurationValue(val),
                    number=None,
                    text=s.getText())
                chrono_id = chrono_id + 1
            else:
                my_entity = chrono.ChronoDoseDurationEntity(
                    entityID=str(chrono_id) + "entity",
                    start_span=abs_Sspan,
                    end_span=abs_Espan,
                    dose_type=val,
                    number=None,
                    text=s.getText())
                chrono_id = chrono_id + 1

            #Extract the number and identify the span of numstr

            substr = s.getText(
            )[:idxstart]  ## extract entire first part of TimePhrase phrase
            m = re.search(
                '([0-9]{1,2})', substr
            )  #search for an integer in the subphrase and extract it's coordinates

            chrono_list.append(my_entity)

    return chrono_list, chrono_id
Exemple #7
0
def buildTextMonthAndDay(s,
                         chrono_id,
                         chrono_list,
                         flags,
                         dct=None,
                         ref_list=None):
    boo, val, idxstart, idxend = hasTextMonth(s, ref_list)
    if boo and not flags["month"]:
        flags["month"] = True
        ref_Sspan, ref_Espan = s.getSpan()
        abs_Sspan = ref_Sspan + idxstart
        abs_Espan = ref_Sspan + idxend
        my_month_entity = chrono.chronoMonthOfYearEntity(
            entityID=str(chrono_id) + "entity",
            start_span=abs_Sspan,
            end_span=abs_Espan,
            month_type=val)
        chrono_id = chrono_id + 1

        ## assume all numbers 1-31 are days
        ## assume all numbers >1000 are years
        ## parse all text before month
        ## test to see if all text is a number or text year
        ## if no:
        ## remove all punctuation
        ## seperate by spaces
        ## parse each token, if find a number then assign to day or year as appropriate
        ## if yes:
        ## assign to day or year as appropriate

        ## parse all text after month
        ## test to see if all text is a number or text year
        ## if no:
        ## remove all punctuation
        ## seperate by spaces
        ## parse each token, if find a number then assign to day or year as appropriate
        ## if yes:
        ## assign to day or year as appropriate

        #idx_end is the last index of the month.  If there are any characters after it the length of the string will be greater than the endidx.
        if (idxend < len(s.getText())):
            substr = s.getText()[idxend:].strip(",.").strip()

            num = utils.getNumberFromText(substr)
            if num is not None:
                if num <= 31 and not flags["day"]:
                    flags["day"] = True
                    day_startidx, day_endidx = calculateSpan(
                        s.getText(), str(num))  #substr)
                    abs_Sspan = ref_Sspan + day_startidx
                    abs_Espan = ref_Sspan + day_endidx
                    my_day_entity = chrono.ChronoDayOfMonthEntity(
                        entityID=str(chrono_id) + "entity",
                        start_span=abs_Sspan,
                        end_span=abs_Espan,
                        value=num)
                    chrono_list.append(my_day_entity)
                    chrono_id = chrono_id + 1

                    #now figure out if it is a NEXT or LAST
                    #create doctime
                    if False:  #dct is not None:
                        mStart = my_month_entity.get_start_span()
                        mEnd = my_month_entity.get_end_span()
                        this_dct = datetime.datetime(
                            int(dct.year),
                            int(
                                utils.getMonthNumber(
                                    my_month_entity.get_month_type())),
                            int(my_day_entity.get_value()), 0, 0)
                        if this_dct > dct:
                            chrono_list.append(
                                chrono.ChronoNextOperator(
                                    entityID=str(chrono_id) + "entity",
                                    start_span=mStart,
                                    end_span=mEnd,
                                    repeating_interval=my_month_entity.get_id(
                                    )))
                            chrono_id = chrono_id + 1
                        elif this_dct < dct:
                            chrono_list.append(
                                chrono.ChronoLastOperator(
                                    entityID=str(chrono_id) + "entity",
                                    start_span=mStart,
                                    end_span=mEnd,
                                    repeating_interval=my_month_entity.get_id(
                                    )))
                            chrono_id = chrono_id + 1
                elif num >= 1500 and num <= 2050 and not flags[
                        "fourdigityear"] and not flags["loneDigitYear"]:
                    flags["fourdigityear"] = True
                    year_startidx, year_endidx = calculateSpan(
                        s.getText(), substr)
                    abs_Sspan = ref_Sspan + year_startidx
                    abs_Espan = ref_Sspan + year_endidx

                    my_year_entity = chrono.ChronoYearEntity(
                        entityID=str(chrono_id) + "entity",
                        start_span=abs_Sspan,
                        end_span=abs_Espan,
                        value=num)
                    chrono_list.append(my_year_entity)
                    my_year_entity.set_sub_interval(my_month_entity.get_id())
                    chrono_id = chrono_id + 1
            else:
                ##parse and process each token
                ##replace punctuation
                substr = substr.translate(
                    str.maketrans(string.punctuation,
                                  ' ' * len(string.punctuation)))
                ##split on spaces
                tokenized_text = WhitespaceTokenizer().tokenize(substr)
                for i in range(0, len(tokenized_text)):
                    num = utils.getNumberFromText(tokenized_text[i])
                    if num is not None:
                        if num <= 31:
                            day_startidx, day_endidx = calculateSpan(
                                s.getText(), tokenized_text[i])
                            abs_Sspan = ref_Sspan + day_startidx
                            abs_Espan = ref_Sspan + day_endidx
                            my_day_entity = chrono.ChronoDayOfMonthEntity(
                                entityID=str(chrono_id) + "entity",
                                start_span=abs_Sspan,
                                end_span=abs_Espan,
                                value=num)
                            chrono_list.append(my_day_entity)
                            chrono_id = chrono_id + 1

                            #now figure out if it is a NEXT or LAST
                            #create doctime
                            if False:  #dct is not None:
                                mStart = my_month_entity.get_start_span()
                                mEnd = my_month_entity.get_end_span()
                                this_dct = datetime.datetime(
                                    int(dct.year),
                                    int(
                                        utils.getMonthNumber(
                                            my_month_entity.get_month_type())),
                                    int(my_day_entity.get_value()), 0, 0)
                                if this_dct > dct:
                                    chrono_list.append(
                                        chrono.ChronoNextOperator(
                                            entityID=str(chrono_id) + "entity",
                                            start_span=mStart,
                                            end_span=mEnd,
                                            repeating_interval=my_month_entity.
                                            get_id()))
                                    chrono_id = chrono_id + 1
                                elif this_dct < dct:
                                    chrono_list.append(
                                        chrono.ChronoLastOperator(
                                            entityID=str(chrono_id) + "entity",
                                            start_span=mStart,
                                            end_span=mEnd,
                                            repeating_interval=my_month_entity.
                                            get_id()))
                                    chrono_id = chrono_id + 1
                        elif num >= 1500 and num <= 2050 and not flags[
                                "fourdigityear"] and not flags["loneDigitYear"]:
                            flags["fourdigityear"] = True
                            year_startidx, year_endidx = calculateSpan(
                                s.getText(), tokenized_text[i])
                            abs_Sspan = ref_Sspan + year_startidx
                            abs_Espan = ref_Sspan + year_endidx

                            my_year_entity = chrono.ChronoYearEntity(
                                entityID=str(chrono_id) + "entity",
                                start_span=abs_Sspan,
                                end_span=abs_Espan,
                                value=num)
                            chrono_list.append(my_year_entity)
                            my_year_entity.set_sub_interval(
                                my_month_entity.get_id())
                            chrono_id = chrono_id + 1

        ## if the start of the month is not 0 then we have leading text to parse
        if (idxstart > 0):
            #substr = s.getText()[:idxstart].strip(",.").strip()
            hasMod, mod_type, mod_start, mod_end = hasModifier(s)
            if (hasMod):
                if mod_type == "This":
                    chrono_list.append(
                        chrono.ChronoThisOperator(
                            entityID=str(chrono_id) + "entity",
                            start_span=ref_Sspan + mod_start,
                            end_span=ref_Sspan + mod_end,
                            repeating_interval=my_month_entity.get_id()))
                    chrono_id = chrono_id + 1

                if mod_type == "Next":
                    chrono_list.append(
                        chrono.ChronoNextOperator(
                            entityID=str(chrono_id) + "entity",
                            start_span=ref_Sspan + mod_start,
                            end_span=ref_Sspan + mod_end,
                            repeating_interval=my_month_entity.get_id()))
                    chrono_id = chrono_id + 1

                if mod_type == "Last":
                    # print("FOUND LAST")
                    chrono_list.append(
                        chrono.ChronoLastOperator(
                            entityID=str(chrono_id) + "entity",
                            start_span=ref_Sspan + mod_start,
                            end_span=ref_Sspan + mod_end,
                            repeating_interval=my_month_entity.get_id(),
                            semantics="Interval-Not-Included"))
                    chrono_id = chrono_id + 1

        chrono_list.append(my_month_entity)

    return chrono_list, chrono_id, flags
def buildPeriodInterval(s, chrono_id, chrono_list, ref_list, classifier,
                        feats):

    features = feats.copy()
    ref_Sspan, ref_Espan = s.getSpan()
    boo, val, idxstart, idxend, plural = hasPeriodInterval(s)

    # FIND terms that are always marked as calendar intervals!
    if boo and re.search(
            "yesterday|yesterdays|tomorrow|tomorrows|today|todays|daily|/min|/week",
            s.getText()):
        abs_Sspan = ref_Sspan + idxstart
        abs_Espan = ref_Sspan + idxend
        my_entity = chrono.ChronoCalendarIntervalEntity(
            entityID=str(chrono_id) + "entity",
            start_span=abs_Sspan,
            end_span=abs_Espan,
            calendar_type=val,
            number=None)
        chrono_id = chrono_id + 1

        if re.search("yesterday|yesterdays", s.getText()):

            my_last_entity = chrono.ChronoLastOperator(
                entityID=str(chrono_id) + "entity",
                start_span=abs_Sspan,
                end_span=abs_Espan,
                repeating_interval=str(chrono_id - 1) + "entity")
            chrono_id = chrono_id + 1
            chrono_list.append(my_last_entity)

        chrono_list.append(my_entity)

    # FIND terms that are always marked as periods!
    elif boo and val == "Unknown":
        abs_Sspan = ref_Sspan + idxstart
        abs_Espan = ref_Sspan + idxend
        my_entity = chrono.ChronoPeriodEntity(entityID=str(chrono_id) +
                                              "entity",
                                              start_span=abs_Sspan,
                                              end_span=abs_Espan,
                                              period_type=val,
                                              number=None)
        chrono_id = chrono_id + 1
        chrono_list.append(my_entity)

    elif boo:
        abs_Sspan = ref_Sspan + idxstart
        abs_Espan = ref_Sspan + idxend

        # get index of overlapping reference token
        ref_idx = utils.getRefIdx(ref_list, abs_Sspan, abs_Espan)

        # extract ML features
        my_features = utils.extract_prediction_features(
            ref_list, ref_idx, feats.copy())

        # classify into period or interval
        if classifier[1] == "NN":
            my_class = ChronoKeras.keras_classify(
                classifier[0], np.array(list(my_features.values())))
            #print("Class: " + str(my_class) + " : Start: " + str(abs_Sspan) + " : End: "+ str(abs_Espan))
        elif classifier[1] in ("SVM", "RF"):
            feat_array = [int(i) for i in my_features.values()]
            my_class = classifier[0].predict([feat_array])[0]
        else:
            my_class = classifier[0].classify(my_features)
            #print("Class: " + str(my_class) + " : Start: " + str(abs_Sspan) + " : End: "+ str(abs_Espan))

        # if 1 then it is a period, if 0 then it is an interval
        if my_class == 1:
            my_entity = chrono.ChronoPeriodEntity(
                entityID=str(chrono_id) + "entity",
                start_span=abs_Sspan,
                end_span=abs_Espan,
                period_type=getPeriodValue(val),
                number=None)
            chrono_id = chrono_id + 1
            ### Check to see if this calendar interval has a "this" in front of it
            prior_tok = ref_list[ref_idx - 1].getText().lower()
            if prior_tok.translate(
                    str.maketrans(string.punctuation,
                                  ' ' * len(string.punctuation))) == "this":
                # add a This entitiy and link it to the interval.
                start_span, end_span = re.search(prior_tok, "this").span(0)
                prior_start, prior_end = ref_list[ref_idx - 1].getSpan()

                chrono_this_entity = chrono.ChronoThisOperator(
                    entityID=str(chrono_id) + "entity",
                    start_span=prior_start + start_span,
                    end_span=prior_start + end_span)
                chrono_id = chrono_id + 1
                chrono_this_entity.set_period(my_entity.get_id())
                chrono_list.append(chrono_this_entity)

            else:
                # check for a Last Word
                hasMod, mod_type, mod_start, mod_end = hasNextLastThis(s)

                if (hasMod):
                    if mod_type == "Next":
                        chrono_list.append(
                            chrono.ChronoNextOperator(
                                entityID=str(chrono_id) + "entity",
                                start_span=ref_Sspan + mod_start,
                                end_span=ref_Sspan + mod_end,
                                period=my_entity.get_id()))
                        chrono_id = chrono_id + 1

                    if mod_type == "Last":
                        chrono_list.append(
                            chrono.ChronoLastOperator(
                                entityID=str(chrono_id) + "entity",
                                start_span=ref_Sspan + mod_start,
                                end_span=ref_Sspan + mod_end,
                                period=my_entity.get_id(),
                                semantics="Interval-Not-Included"))
                        chrono_id = chrono_id + 1

        else:
            my_entity = chrono.ChronoCalendarIntervalEntity(
                entityID=str(chrono_id) + "entity",
                start_span=abs_Sspan,
                end_span=abs_Espan,
                calendar_type=val,
                number=None)
            chrono_id = chrono_id + 1
            ### Check to see if this calendar interval has a "this" in front of it
            prior_tok = ref_list[ref_idx - 1].getText().lower()
            if prior_tok.translate(
                    str.maketrans(string.punctuation,
                                  ' ' * len(string.punctuation))) == "this":
                # add a This entitiy and link it to the interval.
                start_span, end_span = re.search(prior_tok, "this").span(0)
                prior_start, prior_end = ref_list[ref_idx - 1].getSpan()

                chrono_this_entity = chrono.ChronoThisOperator(
                    entityID=str(chrono_id) + "entity",
                    start_span=prior_start + start_span,
                    end_span=prior_start + end_span)
                chrono_id = chrono_id + 1
                chrono_this_entity.set_repeating_interval(my_entity.get_id())
                chrono_list.append(chrono_this_entity)
            else:
                # check for a Last Word
                hasMod, mod_type, mod_start, mod_end = hasNextLastThis(s)
                if (hasMod):
                    if mod_type == "Next":
                        chrono_list.append(
                            chrono.ChronoNextOperator(
                                entityID=str(chrono_id) + "entity",
                                start_span=ref_Sspan + mod_start,
                                end_span=ref_Sspan + mod_end,
                                repeating_interval=my_entity.get_id()))
                        chrono_id = chrono_id + 1

                    if mod_type == "Last":
                        chrono_list.append(
                            chrono.ChronoLastOperator(
                                entityID=str(chrono_id) + "entity",
                                start_span=ref_Sspan + mod_start,
                                end_span=ref_Sspan + mod_end,
                                repeating_interval=my_entity.get_id(),
                                semantics="Interval-Not-Included"))
                        chrono_id = chrono_id + 1

        #check to see if it has a number associated with it.  We assume the number comes before the interval string
        #to figure out if the number we find is close to the interval token the end of the number string needs to be within 2 characters of the start of the interval token.
        #I tried just extracting the previous reference token, but that doesn't work because phrases like "42-year-old" are actually one reference token.
        # So I decided I had to do it the hard way with index arithmetic.  The one concern about this method is that I assume there is a space at the end.  This could cause some issues down the line.
        # Yep, we are getting the spans wrong for phrases like "six-months".  I am going to test for a space as the last character before just assuming there was one.
        if idxstart > 0:
            ## get the absolute span of the interval token
            abs_Sspan = ref_Sspan + idxstart
            abs_Espan = ref_Sspan + idxend

            ## purposfully split on a single space
            substr = s.getText()[0:idxstart]
            # test to see if last character is a space and set a flag.
            has_space = True if substr[len(substr) - 1] == ' ' else False
            substr = substr.strip(' ').split(' ')

            ## get the previous token
            prevtok = substr[len(substr) - 1]
            prev_sSpan = idxstart - len(
                prevtok) - 1 if has_space else idxstart - len(prevtok)
            prev_eSpan = idxstart - 1

            ## get the rest of the substring joined by a space
            if len(substr) > 1:
                rest_of_phrase = ' '.join(substr[0:len(substr) - 1])
                rest_of_phrase_length = len(rest_of_phrase) + 1

            else:
                rest_of_phrase_length = 0

            m = re.search('([0-9]{1,2})', prevtok)
            if m is not None:
                num_val = m.group(0)
                abs_Sspan = ref_Sspan + rest_of_phrase_length + m.span(0)[0]
                abs_Espan = ref_Sspan + rest_of_phrase_length + m.span(0)[1]

                my_number_entity = chrono.ChronoNumber(
                    entityID=str(chrono_id) + "entity",
                    start_span=abs_Sspan,
                    end_span=abs_Espan,
                    value=num_val)
                chrono_id = chrono_id + 1

                #add the number entity to the list
                chrono_list.append(my_number_entity)
                my_entity.set_number(my_number_entity.get_id())
            #else search for a text number
            else:
                texNumVal = utils.getNumberFromText(prevtok)
                if texNumVal is not None:
                    abs_Sspan = ref_Sspan + rest_of_phrase_length
                    abs_Espan = ref_Sspan + rest_of_phrase_length + len(
                        prevtok
                    ) if has_space else ref_Sspan + rest_of_phrase_length + len(
                        prevtok) - 1

                    #create the number entity
                    my_number_entity = chrono.ChronoNumber(
                        entityID=str(chrono_id) + "entity",
                        start_span=abs_Sspan,
                        end_span=abs_Espan,
                        value=texNumVal)
                    chrono_id = chrono_id + 1
                    #append to list
                    chrono_list.append(my_number_entity)
                    #link to interval entity
                    my_entity.set_number(my_number_entity.get_id())

        chrono_list.append(my_entity)

    else:
        boo2, val, idxstart, idxend, numstr = hasEmbeddedPeriodInterval(s)
        if (boo2):
            abs_Sspan = ref_Sspan + idxstart
            abs_Espan = ref_Sspan + idxend

            # get index of overlapping reference token
            ref_idx = -1
            for i in range(0, len(ref_list)):
                if (utils.overlap(ref_list[i].getSpan(),
                                  (abs_Sspan, abs_Espan))):
                    ref_idx = i
                    break

            # extract ML features
            my_features = utils.extract_prediction_features(
                ref_list, ref_idx, features)

            # classify into period or interval
            if (classifier[1] == "NN"):
                my_class = ChronoKeras.keras_classify(
                    classifier[0], np.array(list(my_features.values())))
                #print("Class: " + str(my_class) + " : Start: " + str(abs_Sspan) + " : End: "+ str(abs_Espan))
            elif classifier[1] in ("SVM", "RF"):
                feat_array = [int(i) for i in my_features.values()]
                my_class = classifier[0].predict([feat_array])[0]
            else:
                my_class = classifier[0].classify(my_features)
                #print("Class: " + str(my_class) + " : Start: " + str(abs_Sspan) + " : End: "+ str(abs_Espan))

            # if 1 then it is a period, if 0 then it is an interval
            if (my_class == 1):
                my_entity = chrono.ChronoPeriodEntity(
                    entityID=str(chrono_id) + "entity",
                    start_span=abs_Sspan,
                    end_span=abs_Espan,
                    period_type=getPeriodValue(val),
                    number=None)
                chrono_id = chrono_id + 1
            else:
                my_entity = chrono.ChronoCalendarIntervalEntity(
                    entityID=str(chrono_id) + "entity",
                    start_span=abs_Sspan,
                    end_span=abs_Espan,
                    calendar_type=val)
                chrono_id = chrono_id + 1

            #Extract the number and identify the span of numstr
            if idxstart > 0:
                ## get the absolute span of the interval token
                abs_Sspan = ref_Sspan + idxstart
                abs_Espan = ref_Sspan + idxend

                ## purposfully split on a single space
                substr = s.getText()[0:idxstart]
                # test to see if last character is a space and set a flag.
                has_space = True if substr[len(substr) - 1] == ' ' else False
                substr = substr.strip(' ').split(' ')

                ## get the previous token
                prevtok = substr[len(substr) - 1]
                prev_sSpan = idxstart - len(
                    prevtok) - 1 if has_space else idxstart - len(prevtok)
                prev_eSpan = idxstart - 1

                ## get the rest of the substring joined by a space
                if len(substr) > 1:
                    rest_of_phrase = ' '.join(substr[0:len(substr) - 1])
                    rest_of_phrase_length = len(rest_of_phrase) + 1

                else:
                    rest_of_phrase_length = 0

                ## now calculate the relative span of prevtok
                #rel_Sspan = rest_of_phrase_length
                #rel_Espan = rest_of_phrase_length + len(prevtok)

                m = re.search('([0-9]{1,2})', prevtok)
                if m is not None:
                    num_val = m.group(0)
                    abs_Sspan = ref_Sspan + rest_of_phrase_length + m.span(
                        0)[0]
                    abs_Espan = ref_Sspan + rest_of_phrase_length + m.span(
                        0)[1]

                    my_number_entity = chrono.ChronoNumber(
                        entityID=str(chrono_id) + "entity",
                        start_span=abs_Sspan,
                        end_span=abs_Espan,
                        value=num_val)
                    chrono_id = chrono_id + 1

                    #add the number entity to the list
                    chrono_list.append(my_number_entity)
                    my_entity.set_number(my_number_entity.get_id())
                #else search for a text number
                else:
                    texNumVal = utils.getNumberFromText(prevtok)
                    if texNumVal is not None:
                        abs_Sspan = ref_Sspan + rest_of_phrase_length
                        abs_Espan = ref_Sspan + rest_of_phrase_length + len(
                            prevtok
                        ) if has_space else ref_Sspan + rest_of_phrase_length + len(
                            prevtok) - 1
                        #create the number entity
                        my_number_entity = chrono.ChronoNumber(
                            entityID=str(chrono_id) + "entity",
                            start_span=abs_Sspan,
                            end_span=abs_Espan,
                            value=texNumVal)
                        chrono_id = chrono_id + 1
                        #append to list
                        chrono_list.append(my_number_entity)
                        #link to interval entity
                        my_entity.set_number(my_number_entity.get_id())

            chrono_list.append(my_entity)

    return chrono_list, chrono_id