コード例 #1
0
def buildDayOfWeek(s, chrono_id, chrono_list):
    boo, val, idxstart, idxend = hasDayOfWeek(s)
    if boo:
        ref_Sspan, ref_Espan = s.getSpan()
        abs_Sspan = ref_Sspan + idxstart
        abs_Espan = ref_Sspan + idxend
        my_entity = chrono.ChronoDayOfWeekEntity(entityID=str(chrono_id) +
                                                 "entity",
                                                 start_span=abs_Sspan,
                                                 end_span=abs_Espan,
                                                 day_type=val)
        chrono_list.append(my_entity)
        chrono_id = chrono_id + 1
        # check here to see if it has a modifier
        hasMod, mod_type, mod_start, mod_end = Chrono.TimePhraseToChrono.Modifier.hasNextLastThis(
            s)
        if (hasMod):
            if mod_type == "This":
                chrono_list.append(
                    chrono.ChronoThisOperator(
                        entityID=str(chrono_id) + "entity",
                        start_span=abs_Sspan,
                        end_span=abs_Espan,
                        repeating_interval=my_entity.get_id()))
                chrono_id = chrono_id + 1

            if mod_type == "Next":
                chrono_list.append(
                    chrono.ChronoNextOperator(
                        entityID=str(chrono_id) + "entity",
                        start_span=abs_Sspan,
                        end_span=abs_Espan,
                        repeating_interval=my_entity.get_id()))
                chrono_id = chrono_id + 1

            if mod_type == "Last":
                chrono_list.append(
                    chrono.ChronoLastOperator(
                        entityID=str(chrono_id) + "entity",
                        start_span=abs_Sspan,
                        end_span=abs_Espan,
                        repeating_interval=my_entity.get_id(),
                        semantics="Interval-Included"))
                chrono_id = chrono_id + 1
            # else:
            #    chrono_list.append(chrono.ChronoLastOperator(entityID=str(chrono_id) + "entity", start_span=abs_Sspan, end_span=abs_Espan, repeating_interval=my_entity.get_id(), semantics="Interval-Included"))
            #    chrono_id = chrono_id + 1

        # else:
        # TODO all last operators are getting added here except yesterday...
        #    chrono_list.append(chrono.ChronoLastOperator(entityID=str(chrono_id) + "entity", start_span=abs_Sspan, end_span=abs_Espan, semantics="Interval-Included", repeating_interval=my_entity.get_id()))
        #    chrono_id = chrono_id + 1

    return chrono_list, chrono_id
コード例 #2
0
def buildSeasonOfYear(s, chrono_id, chrono_list, ref_list):

    boo, val, idxstart, idxend = hasSeasonOfYear(s, ref_list)
    if boo:
        ref_Sspan, ref_Espan = s.getSpan()
        abs_Sspan = ref_Sspan + idxstart
        abs_Espan = ref_Sspan + idxend
        my_entity = chrono.ChronoSeasonOfYearEntity(entityID=str(chrono_id) +
                                                    "entity",
                                                    start_span=abs_Sspan,
                                                    end_span=abs_Espan,
                                                    season_type=val)
        chrono_id = chrono_id + 1

        #check here to see if it has a modifier
        hasMod, mod_type, mod_start, mod_end = hasNextLastThis(s)
        if (hasMod):
            if mod_type == "This":
                chrono_list.append(
                    chrono.ChronoThisOperator(
                        entityID=str(chrono_id) + "entity",
                        start_span=abs_Sspan,
                        end_span=abs_Espan,
                        repeating_interval=my_entity.get_id()))
                chrono_id = chrono_id + 1

            if mod_type == "Next":
                chrono_list.append(
                    chrono.ChronoNextOperator(
                        entityID=str(chrono_id) + "entity",
                        start_span=abs_Sspan,
                        end_span=abs_Espan,
                        repeating_interval=my_entity.get_id()))
                chrono_id = chrono_id + 1

            if mod_type == "Last":
                chrono_list.append(
                    chrono.ChronoLastOperator(
                        entityID=str(chrono_id) + "entity",
                        start_span=abs_Sspan,
                        end_span=abs_Espan,
                        repeating_interval=my_entity.get_id()))
                chrono_id = chrono_id + 1
            #else:
            #    chrono_list.append(chrono.ChronoLastOperator(entityID=str(chrono_id) + "entity", start_span=abs_Sspan, end_span=abs_Espan, repeating_interval=my_entity.get_id()))
            #    chrono_id = chrono_id + 1

    # else:
    #        chrono_list.append(chrono.ChronoLastOperator(entityID=str(chrono_id) + "entity", start_span=abs_Sspan, end_span=abs_Espan, repeating_interval=my_entity.get_id()))
    #       chrono_id = chrono_id+1

    #check to see if it has a number associated with it.  We assume the number comes before the interval string
        if idxstart > 0:
            substr = s.getText()[0:idxstart]
            m = re.search('([0-9]{1,2})', substr)
            if m is not None:
                num_val = m.group(0)
                abs_Sspan = ref_Sspan + m.span(0)[0]
                abs_Espan = ref_Sspan + m.span(0)[1]

                my_number_entity = chrono.ChronoNumber(
                    entityID=str(chrono_id) + "entity",
                    start_span=abs_Sspan,
                    end_span=abs_Espan,
                    value=num_val)
                chrono_id = chrono_id + 1

                #add the number entity to the list
                chrono_list.append(my_number_entity)
                my_entity.set_number(my_number_entity.get_id())
                #else search for a text number
            else:
                texNumVal = utils.getNumberFromText(substr)
                if texNumVal is not None:
                    #create the number entity
                    my_number_entity = chrono.ChronoNumber(
                        entityID=str(chrono_id) + "entity",
                        start_span=ref_Sspan,
                        end_span=ref_Sspan + (idxstart - 1),
                        value=texNumVal)
                    chrono_id = chrono_id + 1
                    #append to list
                    chrono_list.append(my_number_entity)
                    #link to interval entity
                    my_entity.set_number(my_number_entity.get_id())

        chrono_list.append(my_entity)

    return chrono_list, chrono_id
コード例 #3
0
def buildPeriodInterval(s, chrono_id, chrono_list, ref_list, classifier,
                        feats):

    features = feats.copy()
    ref_Sspan, ref_Espan = s.getSpan()
    #print("In buildPeriodInterval(), TimePhrase Text: " + s.getText())
    boo, val, idxstart, idxend, plural = hasPeriodInterval(s)

    # FIND terms that are always marked as calendar intervals!
    if boo and re.search(
            "yesterday|yesterdays|tomorrow|tomorrows|today|todays|daily|/min|/week",
            s.getText()):
        abs_Sspan = ref_Sspan + idxstart
        abs_Espan = ref_Sspan + idxend
        my_entity = chrono.ChronoCalendarIntervalEntity(
            entityID=str(chrono_id) + "entity",
            start_span=abs_Sspan,
            end_span=abs_Espan,
            calendar_type=val,
            number=None)
        chrono_id = chrono_id + 1

        if re.search("yesterday|yesterdays", s.getText()):

            my_last_entity = chrono.ChronoLastOperator(
                entityID=str(chrono_id) + "entity",
                start_span=abs_Sspan,
                end_span=abs_Espan,
                repeating_interval=str(chrono_id - 1) + "entity")
            chrono_id = chrono_id + 1
            chrono_list.append(my_last_entity)

        chrono_list.append(my_entity)

    # FIND terms that are always marked as periods!
    elif boo and val == "Unknown":
        abs_Sspan = ref_Sspan + idxstart
        abs_Espan = ref_Sspan + idxend
        my_entity = chrono.ChronoPeriodEntity(entityID=str(chrono_id) +
                                              "entity",
                                              start_span=abs_Sspan,
                                              end_span=abs_Espan,
                                              period_type=val,
                                              number=None)
        chrono_id = chrono_id + 1
        chrono_list.append(my_entity)

    elif boo:
        abs_Sspan = ref_Sspan + idxstart
        abs_Espan = ref_Sspan + idxend

        # get index of overlapping reference token
        #ref_idx = -1
        #for i in range(0,len(ref_list)):
        #    if(utils.overlap(ref_list[i].getSpan(),(abs_Sspan,abs_Espan))):
        #        ref_idx = i
        #        break

        ref_idx = utils.getRefIdx(ref_list, abs_Sspan, abs_Espan)

        # extract ML features
        my_features = utils.extract_prediction_features(
            ref_list, ref_idx, feats.copy())

        # classify into period or interval
        if classifier[1] == "NN":
            my_class = ChronoKeras.keras_classify(
                classifier[0], np.array(list(my_features.values())))
            #print("Class: " + str(my_class) + " : Start: " + str(abs_Sspan) + " : End: "+ str(abs_Espan))
        elif classifier[1] in ("SVM", "RF"):
            feat_array = [int(i) for i in my_features.values()]
            my_class = classifier[0].predict([feat_array])[0]
        else:
            my_class = classifier[0].classify(my_features)
            #print("Class: " + str(my_class) + " : Start: " + str(abs_Sspan) + " : End: "+ str(abs_Espan))

        # if 1 then it is a period, if 0 then it is an interval
        if my_class == 1:
            my_entity = chrono.ChronoPeriodEntity(
                entityID=str(chrono_id) + "entity",
                start_span=abs_Sspan,
                end_span=abs_Espan,
                period_type=getPeriodValue(val),
                number=None)
            chrono_id = chrono_id + 1
            ### Check to see if this calendar interval has a "this" in front of it
            prior_tok = ref_list[ref_idx - 1].getText().lower()
            if prior_tok.translate(
                    str.maketrans(string.punctuation,
                                  ' ' * len(string.punctuation))) == "this":
                # add a This entitiy and link it to the interval.
                start_span, end_span = re.search(prior_tok, "this").span(0)
                prior_start, prior_end = ref_list[ref_idx - 1].getSpan()

                chrono_this_entity = chrono.ChronoThisOperator(
                    entityID=str(chrono_id) + "entity",
                    start_span=prior_start + start_span,
                    end_span=prior_start + end_span)
                chrono_id = chrono_id + 1
                chrono_this_entity.set_period(my_entity.get_id())
                chrono_list.append(chrono_this_entity)

            else:
                # check for a Last Word
                hasMod, mod_type, mod_start, mod_end = hasModifier(s)

                if (hasMod):
                    if mod_type == "Next":
                        chrono_list.append(
                            chrono.ChronoNextOperator(
                                entityID=str(chrono_id) + "entity",
                                start_span=ref_Sspan + mod_start,
                                end_span=ref_Sspan + mod_end,
                                period=my_entity.get_id()))
                        chrono_id = chrono_id + 1

                    if mod_type == "Last":
                        chrono_list.append(
                            chrono.ChronoLastOperator(
                                entityID=str(chrono_id) + "entity",
                                start_span=ref_Sspan + mod_start,
                                end_span=ref_Sspan + mod_end,
                                period=my_entity.get_id(),
                                semantics="Interval-Not-Included"))
                        chrono_id = chrono_id + 1

        else:
            my_entity = chrono.ChronoCalendarIntervalEntity(
                entityID=str(chrono_id) + "entity",
                start_span=abs_Sspan,
                end_span=abs_Espan,
                calendar_type=val,
                number=None)
            chrono_id = chrono_id + 1
            ### Check to see if this calendar interval has a "this" in front of it
            prior_tok = ref_list[ref_idx - 1].getText().lower()
            if prior_tok.translate(
                    str.maketrans(string.punctuation,
                                  ' ' * len(string.punctuation))) == "this":
                # add a This entitiy and link it to the interval.
                start_span, end_span = re.search(prior_tok, "this").span(0)
                prior_start, prior_end = ref_list[ref_idx - 1].getSpan()

                chrono_this_entity = chrono.ChronoThisOperator(
                    entityID=str(chrono_id) + "entity",
                    start_span=prior_start + start_span,
                    end_span=prior_start + end_span)
                chrono_id = chrono_id + 1
                chrono_this_entity.set_repeating_interval(my_entity.get_id())
                chrono_list.append(chrono_this_entity)
            else:
                # check for a Last Word
                hasMod, mod_type, mod_start, mod_end = hasModifier(s)
                if (hasMod):
                    if mod_type == "Next":
                        chrono_list.append(
                            chrono.ChronoNextOperator(
                                entityID=str(chrono_id) + "entity",
                                start_span=ref_Sspan + mod_start,
                                end_span=ref_Sspan + mod_end,
                                repeating_interval=my_entity.get_id()))
                        chrono_id = chrono_id + 1

                    if mod_type == "Last":
                        chrono_list.append(
                            chrono.ChronoLastOperator(
                                entityID=str(chrono_id) + "entity",
                                start_span=ref_Sspan + mod_start,
                                end_span=ref_Sspan + mod_end,
                                repeating_interval=my_entity.get_id(),
                                semantics="Interval-Not-Included"))
                        chrono_id = chrono_id + 1

        #check to see if it has a number associated with it.  We assume the number comes before the interval string
        if idxstart > 0:
            substr = s.getText()[0:idxstart]
            m = re.search('([0-9]{1,2})', substr)
            if m is not None:
                num_val = m.group(0)
                abs_Sspan = ref_Sspan + m.span(0)[0]
                abs_Espan = ref_Sspan + m.span(0)[1]

                my_number_entity = chrono.ChronoNumber(
                    entityID=str(chrono_id) + "entity",
                    start_span=abs_Sspan,
                    end_span=abs_Espan,
                    value=num_val)
                chrono_id = chrono_id + 1

                #add the number entity to the list
                chrono_list.append(my_number_entity)
                my_entity.set_number(my_number_entity.get_id())
            #else search for a text number
            else:
                texNumVal = utils.getNumberFromText(substr)
                if texNumVal is not None:
                    #create the number entity
                    my_number_entity = chrono.ChronoNumber(
                        entityID=str(chrono_id) + "entity",
                        start_span=ref_Sspan,
                        end_span=ref_Sspan + (idxstart - 1),
                        value=texNumVal)
                    chrono_id = chrono_id + 1
                    #append to list
                    chrono_list.append(my_number_entity)
                    #link to interval entity
                    my_entity.set_number(my_number_entity.get_id())

        chrono_list.append(my_entity)

    else:
        boo2, val, idxstart, idxend, numstr = hasEmbeddedPeriodInterval(s)
        if (boo2):
            abs_Sspan = ref_Sspan + idxstart
            abs_Espan = ref_Sspan + idxend

            # get index of overlapping reference token
            ref_idx = -1
            for i in range(0, len(ref_list)):
                if (utils.overlap(ref_list[i].getSpan(),
                                  (abs_Sspan, abs_Espan))):
                    ref_idx = i
                    break

            # extract ML features
            my_features = utils.extract_prediction_features(
                ref_list, ref_idx, features)

            # classify into period or interval
            if (classifier[1] == "NN"):
                my_class = ChronoKeras.keras_classify(
                    classifier[0], np.array(list(my_features.values())))
                #print("Class: " + str(my_class) + " : Start: " + str(abs_Sspan) + " : End: "+ str(abs_Espan))
            else:
                my_class = classifier[0].classify(my_features)
                #print("Class: " + str(my_class) + " : Start: " + str(abs_Sspan) + " : End: "+ str(abs_Espan))

            # if 1 then it is a period, if 0 then it is an interval
            if (my_class == 1):
                my_entity = chrono.ChronoPeriodEntity(
                    entityID=str(chrono_id) + "entity",
                    start_span=abs_Sspan,
                    end_span=abs_Espan,
                    period_type=getPeriodValue(val),
                    number=None)
                chrono_id = chrono_id + 1
            else:
                my_entity = chrono.ChronoCalendarIntervalEntity(
                    entityID=str(chrono_id) + "entity",
                    start_span=abs_Sspan,
                    end_span=abs_Espan,
                    calendar_type=val)
                chrono_id = chrono_id + 1

            #Extract the number and identify the span of numstr

            substr = s.getText(
            )[:idxstart]  ## extract entire first part of TimePhrase phrase
            m = re.search(
                '([0-9]{1,2})', substr
            )  #search for an integer in the subphrase and extract it's coordinates
            if m is not None:
                num_val = m.group(0)
                abs_Sspan = ref_Sspan + m.span(0)[0]
                abs_Espan = ref_Sspan + m.span(0)[1]

                my_number_entity = chrono.ChronoNumber(
                    entityID=str(chrono_id) + "entity",
                    start_span=abs_Sspan,
                    end_span=abs_Espan,
                    value=num_val)
                chrono_id = chrono_id + 1

                #add the number entity to the list
                chrono_list.append(my_number_entity)
                #link to interval entity
                my_entity.set_number(my_number_entity.get_id())
            #else search for a text number
            else:
                texNumVal = utils.getNumberFromText(numstr)
                if texNumVal is not None:
                    m = re.search(
                        numstr,
                        substr)  #search for the number string in the subphrase
                    if m is not None:
                        abs_Sspan = ref_Sspan + m.span(0)[0]
                        abs_Espan = ref_Sspan + m.span(0)[1]
                        #create the number entity
                        my_number_entity = chrono.ChronoNumber(
                            entityID=str(chrono_id) + "entity",
                            start_span=abs_Sspan,
                            end_span=abs_Espan,
                            value=texNumVal)
                        chrono_id = chrono_id + 1
                        #append to list
                        chrono_list.append(my_number_entity)
                        #link to interval entity
                        my_entity.set_number(my_number_entity.get_id())

            chrono_list.append(my_entity)

    return chrono_list, chrono_id
コード例 #4
0
ファイル: This.py プロジェクト: jacobkantrowitz/Chrono
def buildThis(s, chrono_id, chrono_list):
    # convert to lowercase
    text = s.getText().lower()
    # remove all punctuation
    text_norm = text.translate(
        str.maketrans(string.punctuation,
                      " " * len(string.punctuation))).strip()
    # convert to list
    text_list = text_norm.split(" ")

    ## find the word "now" as a single token
    for tok in text_list:
        if tok == "now":
            ## get start end coordinates in original temporal phrase
            start_idx, end_idx = re.search("now", text).span(0)
            ref_startSpan, ref_endSpan = s.getSpan()

            ## create a This entity
            chrono_this_entity = chrono.ChronoThisOperator(
                entityID=str(chrono_id) + "entity",
                start_span=ref_startSpan + start_idx,
                end_span=ref_startSpan + end_idx)
            chrono_id = chrono_id + 1
            chrono_list.append(chrono_this_entity)

        elif tok == "today" or tok == "todays":
            start_idx, end_idx = re.search("today", text).span(0)
            ref_startSpan, ref_endSpan = s.getSpan()

            ## create a This entity
            chrono_this_entity = chrono.ChronoThisOperator(
                entityID=str(chrono_id) + "entity",
                start_span=ref_startSpan + start_idx,
                end_span=ref_startSpan + end_idx)
            chrono_id = chrono_id + 1

            chrono_interval_entity = chrono.ChronoCalendarIntervalEntity(
                entityID=str(chrono_id) + "entity",
                start_span=ref_startSpan + start_idx,
                end_span=ref_startSpan + end_idx,
                calendar_type="Day",
                number=None)
            chrono_id = chrono_id + 1

            chrono_this_entity.set_repeating_interval(
                chrono_interval_entity.get_id())

            chrono_list.append(chrono_this_entity)
            chrono_list.append(chrono_interval_entity)

        ## Note, may need to look for phrases like "current week" at some point.
        elif tok == "current":
            ## get start end coordinates in original temporal phrase
            start_idx, end_idx = re.search("current", text).span(0)
            ref_startSpan, ref_endSpan = s.getSpan()

            ## create a This entity
            chrono_this_entity = chrono.ChronoThisOperator(
                entityID=str(chrono_id) + "entity",
                start_span=ref_startSpan + start_idx,
                end_span=ref_startSpan + end_idx)
            chrono_id = chrono_id + 1
            chrono_list.append(chrono_this_entity)

    return chrono_list, chrono_id
コード例 #5
0
def buildDoseDuration(s, chrono_id, chrono_list, ref_list, classifier, feats):
    features = feats.copy()
    ref_Sspan, ref_Espan = s.getSpan()
    #print("In buildPeriodInterval(), TimePhrase Text: " + s.getText())
    bad = re.compile(r"^q\d|^Q\d")
    parts = s.getText().split()
    containsnum = False

    #various checks to ensure that this phrase is actually a dose duration

    if isDoseDuration(parts[0]):
        return chrono_list, chrono_id
    if "every" in s.getText().lower() or "time" in s.getText().lower(
    ) or "per" in s.getText().lower():
        return chrono_list, chrono_id
    if bad.match(s.getText()):
        return chrono_list, chrono_id
    if "/" in s.getText():
        return chrono_list, chrono_id
    if "[**" in s.getText() or "**]" in s.getText():
        return chrono_list, chrono_id
    if "ly" in s.getText():
        return chrono_list, chrono_id
    if "(" in s.getText() or ")" in s.getText():
        return chrono_list, chrono_id
    if "once" in s.getText().lower() or "twice" in s.getText().lower():
        return chrono_list, chrono_id
    if "past" in s.getText().lower() or "ago" in s.getText().lower():
        return chrono_list, chrono_id
    if "RANDOM" in s.getText():
        return chrono_list, chrono_id
    for part in parts:
        part = re.sub('[' + string.punctuation + ']', '', part).strip()
        for ref in ref_list:
            if ref.getText().lower() == part.lower():
                if (ref.isNumeric()):
                    containsnum = True
                    if utils.isOrdinal(ref.getText()):
                        return chrono_list, chrono_id
                    break

                elif not tt.hasDoseDuration(ref.getText().lower()):
                    return chrono_list, chrono_id
    if containsnum == False:
        return chrono_list, chrono_id

    boo, val, idxstart, idxend, plural = hasDoseDuration(s)
    if boo:
        abs_Sspan = ref_Sspan
        abs_Espan = ref_Espan

        # get index of overlapping reference token
        # ref_idx = -1
        # for i in range(0,len(ref_list)):
        #    if(utils.overlap(ref_list[i].getSpan(),(abs_Sspan,abs_Espan))):
        #        ref_idx = i
        #        break

        ref_idx = utils.getRefIdx(ref_list, abs_Sspan, abs_Espan)

        # extract ML features
        my_features = utils.extract_prediction_features(
            ref_list, ref_idx, feats.copy())

        # classify into period or interval
        if classifier[1] == "NN":
            my_class = ChronoKeras.keras_classify(
                classifier[0], np.array(list(my_features.values())))
            # print("Class: " + str(my_class) + " : Start: " + str(abs_Sspan) + " : End: "+ str(abs_Espan))
        elif classifier[1] in ("SVM", "RF"):
            feat_array = [int(i) for i in my_features.values()]
            my_class = classifier[0].predict([feat_array])[0]
        else:
            my_class = classifier[0].classify(my_features)
            # print("Class: " + str(my_class) + " : Start: " + str(abs_Sspan) + " : End: "+ str(abs_Espan))

        # if 1 then it is a period, if 0 then it is an interval
        if my_class == 1:
            my_entity = chrono.ChronoDoseDurationEntity(
                entityID=str(chrono_id) + "entity",
                start_span=abs_Sspan,
                end_span=abs_Espan,
                dose_type=getDoseDurationValue(val),
                number=None,
                text=s.getText())
            chrono_id = chrono_id + 1
            ### Check to see if this calendar interval has a "this" in front of it
            prior_tok = ref_list[ref_idx - 1].getText().lower()
            if prior_tok.translate(
                    str.maketrans(string.punctuation,
                                  ' ' * len(string.punctuation))) == "this":
                # add a This entitiy and link it to the interval.
                start_span, end_span = re.search(prior_tok, "this").span(0)
                prior_start, prior_end = ref_list[ref_idx - 1].getSpan()

                chrono_this_entity = chrono.ChronoThisOperator(
                    entityID=str(chrono_id) + "entity",
                    start_span=prior_start + start_span,
                    end_span=prior_start + end_span)
                chrono_id = chrono_id + 1
                chrono_this_entity.set_period(my_entity.get_id())
                chrono_list.append(chrono_this_entity)

            else:
                # check for a Last Word
                hasMod, mod_type, mod_start, mod_end = hasModifier(s)

                if (hasMod):
                    if mod_type == "Next":
                        chrono_list.append(
                            chrono.ChronoNextOperator(
                                entityID=str(chrono_id) + "entity",
                                start_span=ref_Sspan + mod_start,
                                end_span=ref_Sspan + mod_end,
                                period=my_entity.get_id()))
                        chrono_id = chrono_id + 1

                    if mod_type == "Last":
                        chrono_list.append(
                            chrono.ChronoLastOperator(
                                entityID=str(chrono_id) + "entity",
                                start_span=ref_Sspan + mod_start,
                                end_span=ref_Sspan + mod_end,
                                period=my_entity.get_id(),
                                semantics="Interval-Not-Included"))
                        chrono_id = chrono_id + 1

        else:
            my_entity = chrono.ChronoDoseDurationEntity(
                entityID=str(chrono_id) + "entity",
                start_span=abs_Sspan,
                end_span=abs_Espan,
                dose_type=val,
                number=None,
                text=s.getText())
            chrono_id = chrono_id + 1
            ### Check to see if this calendar interval has a "this" in front of it
            prior_tok = ref_list[ref_idx - 1].getText().lower()
            if prior_tok.translate(
                    str.maketrans(string.punctuation,
                                  ' ' * len(string.punctuation))) == "this":
                # add a This entitiy and link it to the interval.
                start_span, end_span = re.search(prior_tok, "this").span(0)
                prior_start, prior_end = ref_list[ref_idx - 1].getSpan()

                chrono_this_entity = chrono.ChronoThisOperator(
                    entityID=str(chrono_id) + "entity",
                    start_span=prior_start + start_span,
                    end_span=prior_start + end_span)
                chrono_id = chrono_id + 1
                chrono_this_entity.set_repeating_interval(my_entity.get_id())
                chrono_list.append(chrono_this_entity)
            else:
                # check for a Last Word
                hasMod, mod_type, mod_start, mod_end = hasModifier(s)
                if (hasMod):
                    if mod_type == "Next":
                        chrono_list.append(
                            chrono.ChronoNextOperator(
                                entityID=str(chrono_id) + "entity",
                                start_span=ref_Sspan + mod_start,
                                end_span=ref_Sspan + mod_end,
                                repeating_interval=my_entity.get_id()))
                        chrono_id = chrono_id + 1

                    if mod_type == "Last":
                        chrono_list.append(
                            chrono.ChronoLastOperator(
                                entityID=str(chrono_id) + "entity",
                                start_span=ref_Sspan + mod_start,
                                end_span=ref_Sspan + mod_end,
                                repeating_interval=my_entity.get_id(),
                                semantics="Interval-Not-Included"))
                        chrono_id = chrono_id + 1

        # check to see if it has a number associated with it.  We assume the number comes before the interval string

        chrono_list.append(my_entity)
    else:
        boo2, val, idxstart, idxend, numstr = hasEmbeddedPeriodInterval(s)
        if (boo2):
            abs_Sspan = ref_Sspan
            abs_Espan = ref_Espan

            # get index of overlapping reference token
            ref_idx = -1
            for i in range(0, len(ref_list)):
                if (utils.overlap(ref_list[i].getSpan(),
                                  (abs_Sspan, abs_Espan))):
                    ref_idx = i
                    break

            # extract ML features
            my_features = utils.extract_prediction_features(
                ref_list, ref_idx, features)

            # classify into period or interval
            if (classifier[1] == "NN"):
                my_class = ChronoKeras.keras_classify(
                    classifier[0], np.array(list(my_features.values())))
                #print("Class: " + str(my_class) + " : Start: " + str(abs_Sspan) + " : End: "+ str(abs_Espan))
            else:
                my_class = classifier[0].classify(my_features)
                #print("Class: " + str(my_class) + " : Start: " + str(abs_Sspan) + " : End: "+ str(abs_Espan))

            # if 1 then it is a period, if 0 then it is an interval
            if (my_class == 1):
                my_entity = chrono.ChronoDoseDurationEntity(
                    entityID=str(chrono_id) + "entity",
                    start_span=abs_Sspan,
                    end_span=abs_Espan,
                    dose_type=getDoseDurationValue(val),
                    number=None,
                    text=s.getText())
                chrono_id = chrono_id + 1
            else:
                my_entity = chrono.ChronoDoseDurationEntity(
                    entityID=str(chrono_id) + "entity",
                    start_span=abs_Sspan,
                    end_span=abs_Espan,
                    dose_type=val,
                    number=None,
                    text=s.getText())
                chrono_id = chrono_id + 1

            #Extract the number and identify the span of numstr

            substr = s.getText(
            )[:idxstart]  ## extract entire first part of TimePhrase phrase
            m = re.search(
                '([0-9]{1,2})', substr
            )  #search for an integer in the subphrase and extract it's coordinates

            chrono_list.append(my_entity)

    return chrono_list, chrono_id
コード例 #6
0
def buildTextMonthAndDay(s,
                         chrono_id,
                         chrono_list,
                         flags,
                         dct=None,
                         ref_list=None):
    boo, val, idxstart, idxend = hasTextMonth(s, ref_list)
    if boo and not flags["month"]:
        flags["month"] = True
        ref_Sspan, ref_Espan = s.getSpan()
        abs_Sspan = ref_Sspan + idxstart
        abs_Espan = ref_Sspan + idxend
        my_month_entity = chrono.chronoMonthOfYearEntity(
            entityID=str(chrono_id) + "entity",
            start_span=abs_Sspan,
            end_span=abs_Espan,
            month_type=val)
        chrono_id = chrono_id + 1

        ## assume all numbers 1-31 are days
        ## assume all numbers >1000 are years
        ## parse all text before month
        ## test to see if all text is a number or text year
        ## if no:
        ## remove all punctuation
        ## seperate by spaces
        ## parse each token, if find a number then assign to day or year as appropriate
        ## if yes:
        ## assign to day or year as appropriate

        ## parse all text after month
        ## test to see if all text is a number or text year
        ## if no:
        ## remove all punctuation
        ## seperate by spaces
        ## parse each token, if find a number then assign to day or year as appropriate
        ## if yes:
        ## assign to day or year as appropriate

        #idx_end is the last index of the month.  If there are any characters after it the length of the string will be greater than the endidx.
        if (idxend < len(s.getText())):
            substr = s.getText()[idxend:].strip(",.").strip()

            num = utils.getNumberFromText(substr)
            if num is not None:
                if num <= 31 and not flags["day"]:
                    flags["day"] = True
                    day_startidx, day_endidx = calculateSpan(
                        s.getText(), str(num))  #substr)
                    abs_Sspan = ref_Sspan + day_startidx
                    abs_Espan = ref_Sspan + day_endidx
                    my_day_entity = chrono.ChronoDayOfMonthEntity(
                        entityID=str(chrono_id) + "entity",
                        start_span=abs_Sspan,
                        end_span=abs_Espan,
                        value=num)
                    chrono_list.append(my_day_entity)
                    chrono_id = chrono_id + 1

                    #now figure out if it is a NEXT or LAST
                    #create doctime
                    if False:  #dct is not None:
                        mStart = my_month_entity.get_start_span()
                        mEnd = my_month_entity.get_end_span()
                        this_dct = datetime.datetime(
                            int(dct.year),
                            int(
                                utils.getMonthNumber(
                                    my_month_entity.get_month_type())),
                            int(my_day_entity.get_value()), 0, 0)
                        if this_dct > dct:
                            chrono_list.append(
                                chrono.ChronoNextOperator(
                                    entityID=str(chrono_id) + "entity",
                                    start_span=mStart,
                                    end_span=mEnd,
                                    repeating_interval=my_month_entity.get_id(
                                    )))
                            chrono_id = chrono_id + 1
                        elif this_dct < dct:
                            chrono_list.append(
                                chrono.ChronoLastOperator(
                                    entityID=str(chrono_id) + "entity",
                                    start_span=mStart,
                                    end_span=mEnd,
                                    repeating_interval=my_month_entity.get_id(
                                    )))
                            chrono_id = chrono_id + 1
                elif num >= 1500 and num <= 2050 and not flags[
                        "fourdigityear"] and not flags["loneDigitYear"]:
                    flags["fourdigityear"] = True
                    year_startidx, year_endidx = calculateSpan(
                        s.getText(), substr)
                    abs_Sspan = ref_Sspan + year_startidx
                    abs_Espan = ref_Sspan + year_endidx

                    my_year_entity = chrono.ChronoYearEntity(
                        entityID=str(chrono_id) + "entity",
                        start_span=abs_Sspan,
                        end_span=abs_Espan,
                        value=num)
                    chrono_list.append(my_year_entity)
                    my_year_entity.set_sub_interval(my_month_entity.get_id())
                    chrono_id = chrono_id + 1
            else:
                ##parse and process each token
                ##replace punctuation
                substr = substr.translate(
                    str.maketrans(string.punctuation,
                                  ' ' * len(string.punctuation)))
                ##split on spaces
                tokenized_text = WhitespaceTokenizer().tokenize(substr)
                for i in range(0, len(tokenized_text)):
                    num = utils.getNumberFromText(tokenized_text[i])
                    if num is not None:
                        if num <= 31:
                            day_startidx, day_endidx = calculateSpan(
                                s.getText(), tokenized_text[i])
                            abs_Sspan = ref_Sspan + day_startidx
                            abs_Espan = ref_Sspan + day_endidx
                            my_day_entity = chrono.ChronoDayOfMonthEntity(
                                entityID=str(chrono_id) + "entity",
                                start_span=abs_Sspan,
                                end_span=abs_Espan,
                                value=num)
                            chrono_list.append(my_day_entity)
                            chrono_id = chrono_id + 1

                            #now figure out if it is a NEXT or LAST
                            #create doctime
                            if False:  #dct is not None:
                                mStart = my_month_entity.get_start_span()
                                mEnd = my_month_entity.get_end_span()
                                this_dct = datetime.datetime(
                                    int(dct.year),
                                    int(
                                        utils.getMonthNumber(
                                            my_month_entity.get_month_type())),
                                    int(my_day_entity.get_value()), 0, 0)
                                if this_dct > dct:
                                    chrono_list.append(
                                        chrono.ChronoNextOperator(
                                            entityID=str(chrono_id) + "entity",
                                            start_span=mStart,
                                            end_span=mEnd,
                                            repeating_interval=my_month_entity.
                                            get_id()))
                                    chrono_id = chrono_id + 1
                                elif this_dct < dct:
                                    chrono_list.append(
                                        chrono.ChronoLastOperator(
                                            entityID=str(chrono_id) + "entity",
                                            start_span=mStart,
                                            end_span=mEnd,
                                            repeating_interval=my_month_entity.
                                            get_id()))
                                    chrono_id = chrono_id + 1
                        elif num >= 1500 and num <= 2050 and not flags[
                                "fourdigityear"] and not flags["loneDigitYear"]:
                            flags["fourdigityear"] = True
                            year_startidx, year_endidx = calculateSpan(
                                s.getText(), tokenized_text[i])
                            abs_Sspan = ref_Sspan + year_startidx
                            abs_Espan = ref_Sspan + year_endidx

                            my_year_entity = chrono.ChronoYearEntity(
                                entityID=str(chrono_id) + "entity",
                                start_span=abs_Sspan,
                                end_span=abs_Espan,
                                value=num)
                            chrono_list.append(my_year_entity)
                            my_year_entity.set_sub_interval(
                                my_month_entity.get_id())
                            chrono_id = chrono_id + 1

        ## if the start of the month is not 0 then we have leading text to parse
        if (idxstart > 0):
            #substr = s.getText()[:idxstart].strip(",.").strip()
            hasMod, mod_type, mod_start, mod_end = hasModifier(s)
            if (hasMod):
                if mod_type == "This":
                    chrono_list.append(
                        chrono.ChronoThisOperator(
                            entityID=str(chrono_id) + "entity",
                            start_span=ref_Sspan + mod_start,
                            end_span=ref_Sspan + mod_end,
                            repeating_interval=my_month_entity.get_id()))
                    chrono_id = chrono_id + 1

                if mod_type == "Next":
                    chrono_list.append(
                        chrono.ChronoNextOperator(
                            entityID=str(chrono_id) + "entity",
                            start_span=ref_Sspan + mod_start,
                            end_span=ref_Sspan + mod_end,
                            repeating_interval=my_month_entity.get_id()))
                    chrono_id = chrono_id + 1

                if mod_type == "Last":
                    # print("FOUND LAST")
                    chrono_list.append(
                        chrono.ChronoLastOperator(
                            entityID=str(chrono_id) + "entity",
                            start_span=ref_Sspan + mod_start,
                            end_span=ref_Sspan + mod_end,
                            repeating_interval=my_month_entity.get_id(),
                            semantics="Interval-Not-Included"))
                    chrono_id = chrono_id + 1

        chrono_list.append(my_month_entity)

    return chrono_list, chrono_id, flags
コード例 #7
0
def buildPeriodInterval(s, chrono_id, chrono_list, ref_list, classifier,
                        feats):

    features = feats.copy()
    ref_Sspan, ref_Espan = s.getSpan()
    boo, val, idxstart, idxend, plural = hasPeriodInterval(s)

    # FIND terms that are always marked as calendar intervals!
    if boo and re.search(
            "yesterday|yesterdays|tomorrow|tomorrows|today|todays|daily|/min|/week",
            s.getText()):
        abs_Sspan = ref_Sspan + idxstart
        abs_Espan = ref_Sspan + idxend
        my_entity = chrono.ChronoCalendarIntervalEntity(
            entityID=str(chrono_id) + "entity",
            start_span=abs_Sspan,
            end_span=abs_Espan,
            calendar_type=val,
            number=None)
        chrono_id = chrono_id + 1

        if re.search("yesterday|yesterdays", s.getText()):

            my_last_entity = chrono.ChronoLastOperator(
                entityID=str(chrono_id) + "entity",
                start_span=abs_Sspan,
                end_span=abs_Espan,
                repeating_interval=str(chrono_id - 1) + "entity")
            chrono_id = chrono_id + 1
            chrono_list.append(my_last_entity)

        chrono_list.append(my_entity)

    # FIND terms that are always marked as periods!
    elif boo and val == "Unknown":
        abs_Sspan = ref_Sspan + idxstart
        abs_Espan = ref_Sspan + idxend
        my_entity = chrono.ChronoPeriodEntity(entityID=str(chrono_id) +
                                              "entity",
                                              start_span=abs_Sspan,
                                              end_span=abs_Espan,
                                              period_type=val,
                                              number=None)
        chrono_id = chrono_id + 1
        chrono_list.append(my_entity)

    elif boo:
        abs_Sspan = ref_Sspan + idxstart
        abs_Espan = ref_Sspan + idxend

        # get index of overlapping reference token
        ref_idx = utils.getRefIdx(ref_list, abs_Sspan, abs_Espan)

        # extract ML features
        my_features = utils.extract_prediction_features(
            ref_list, ref_idx, feats.copy())

        # classify into period or interval
        if classifier[1] == "NN":
            my_class = ChronoKeras.keras_classify(
                classifier[0], np.array(list(my_features.values())))
            #print("Class: " + str(my_class) + " : Start: " + str(abs_Sspan) + " : End: "+ str(abs_Espan))
        elif classifier[1] in ("SVM", "RF"):
            feat_array = [int(i) for i in my_features.values()]
            my_class = classifier[0].predict([feat_array])[0]
        else:
            my_class = classifier[0].classify(my_features)
            #print("Class: " + str(my_class) + " : Start: " + str(abs_Sspan) + " : End: "+ str(abs_Espan))

        # if 1 then it is a period, if 0 then it is an interval
        if my_class == 1:
            my_entity = chrono.ChronoPeriodEntity(
                entityID=str(chrono_id) + "entity",
                start_span=abs_Sspan,
                end_span=abs_Espan,
                period_type=getPeriodValue(val),
                number=None)
            chrono_id = chrono_id + 1
            ### Check to see if this calendar interval has a "this" in front of it
            prior_tok = ref_list[ref_idx - 1].getText().lower()
            if prior_tok.translate(
                    str.maketrans(string.punctuation,
                                  ' ' * len(string.punctuation))) == "this":
                # add a This entitiy and link it to the interval.
                start_span, end_span = re.search(prior_tok, "this").span(0)
                prior_start, prior_end = ref_list[ref_idx - 1].getSpan()

                chrono_this_entity = chrono.ChronoThisOperator(
                    entityID=str(chrono_id) + "entity",
                    start_span=prior_start + start_span,
                    end_span=prior_start + end_span)
                chrono_id = chrono_id + 1
                chrono_this_entity.set_period(my_entity.get_id())
                chrono_list.append(chrono_this_entity)

            else:
                # check for a Last Word
                hasMod, mod_type, mod_start, mod_end = hasNextLastThis(s)

                if (hasMod):
                    if mod_type == "Next":
                        chrono_list.append(
                            chrono.ChronoNextOperator(
                                entityID=str(chrono_id) + "entity",
                                start_span=ref_Sspan + mod_start,
                                end_span=ref_Sspan + mod_end,
                                period=my_entity.get_id()))
                        chrono_id = chrono_id + 1

                    if mod_type == "Last":
                        chrono_list.append(
                            chrono.ChronoLastOperator(
                                entityID=str(chrono_id) + "entity",
                                start_span=ref_Sspan + mod_start,
                                end_span=ref_Sspan + mod_end,
                                period=my_entity.get_id(),
                                semantics="Interval-Not-Included"))
                        chrono_id = chrono_id + 1

        else:
            my_entity = chrono.ChronoCalendarIntervalEntity(
                entityID=str(chrono_id) + "entity",
                start_span=abs_Sspan,
                end_span=abs_Espan,
                calendar_type=val,
                number=None)
            chrono_id = chrono_id + 1
            ### Check to see if this calendar interval has a "this" in front of it
            prior_tok = ref_list[ref_idx - 1].getText().lower()
            if prior_tok.translate(
                    str.maketrans(string.punctuation,
                                  ' ' * len(string.punctuation))) == "this":
                # add a This entitiy and link it to the interval.
                start_span, end_span = re.search(prior_tok, "this").span(0)
                prior_start, prior_end = ref_list[ref_idx - 1].getSpan()

                chrono_this_entity = chrono.ChronoThisOperator(
                    entityID=str(chrono_id) + "entity",
                    start_span=prior_start + start_span,
                    end_span=prior_start + end_span)
                chrono_id = chrono_id + 1
                chrono_this_entity.set_repeating_interval(my_entity.get_id())
                chrono_list.append(chrono_this_entity)
            else:
                # check for a Last Word
                hasMod, mod_type, mod_start, mod_end = hasNextLastThis(s)
                if (hasMod):
                    if mod_type == "Next":
                        chrono_list.append(
                            chrono.ChronoNextOperator(
                                entityID=str(chrono_id) + "entity",
                                start_span=ref_Sspan + mod_start,
                                end_span=ref_Sspan + mod_end,
                                repeating_interval=my_entity.get_id()))
                        chrono_id = chrono_id + 1

                    if mod_type == "Last":
                        chrono_list.append(
                            chrono.ChronoLastOperator(
                                entityID=str(chrono_id) + "entity",
                                start_span=ref_Sspan + mod_start,
                                end_span=ref_Sspan + mod_end,
                                repeating_interval=my_entity.get_id(),
                                semantics="Interval-Not-Included"))
                        chrono_id = chrono_id + 1

        #check to see if it has a number associated with it.  We assume the number comes before the interval string
        #to figure out if the number we find is close to the interval token the end of the number string needs to be within 2 characters of the start of the interval token.
        #I tried just extracting the previous reference token, but that doesn't work because phrases like "42-year-old" are actually one reference token.
        # So I decided I had to do it the hard way with index arithmetic.  The one concern about this method is that I assume there is a space at the end.  This could cause some issues down the line.
        # Yep, we are getting the spans wrong for phrases like "six-months".  I am going to test for a space as the last character before just assuming there was one.
        if idxstart > 0:
            ## get the absolute span of the interval token
            abs_Sspan = ref_Sspan + idxstart
            abs_Espan = ref_Sspan + idxend

            ## purposfully split on a single space
            substr = s.getText()[0:idxstart]
            # test to see if last character is a space and set a flag.
            has_space = True if substr[len(substr) - 1] == ' ' else False
            substr = substr.strip(' ').split(' ')

            ## get the previous token
            prevtok = substr[len(substr) - 1]
            prev_sSpan = idxstart - len(
                prevtok) - 1 if has_space else idxstart - len(prevtok)
            prev_eSpan = idxstart - 1

            ## get the rest of the substring joined by a space
            if len(substr) > 1:
                rest_of_phrase = ' '.join(substr[0:len(substr) - 1])
                rest_of_phrase_length = len(rest_of_phrase) + 1

            else:
                rest_of_phrase_length = 0

            m = re.search('([0-9]{1,2})', prevtok)
            if m is not None:
                num_val = m.group(0)
                abs_Sspan = ref_Sspan + rest_of_phrase_length + m.span(0)[0]
                abs_Espan = ref_Sspan + rest_of_phrase_length + m.span(0)[1]

                my_number_entity = chrono.ChronoNumber(
                    entityID=str(chrono_id) + "entity",
                    start_span=abs_Sspan,
                    end_span=abs_Espan,
                    value=num_val)
                chrono_id = chrono_id + 1

                #add the number entity to the list
                chrono_list.append(my_number_entity)
                my_entity.set_number(my_number_entity.get_id())
            #else search for a text number
            else:
                texNumVal = utils.getNumberFromText(prevtok)
                if texNumVal is not None:
                    abs_Sspan = ref_Sspan + rest_of_phrase_length
                    abs_Espan = ref_Sspan + rest_of_phrase_length + len(
                        prevtok
                    ) if has_space else ref_Sspan + rest_of_phrase_length + len(
                        prevtok) - 1

                    #create the number entity
                    my_number_entity = chrono.ChronoNumber(
                        entityID=str(chrono_id) + "entity",
                        start_span=abs_Sspan,
                        end_span=abs_Espan,
                        value=texNumVal)
                    chrono_id = chrono_id + 1
                    #append to list
                    chrono_list.append(my_number_entity)
                    #link to interval entity
                    my_entity.set_number(my_number_entity.get_id())

        chrono_list.append(my_entity)

    else:
        boo2, val, idxstart, idxend, numstr = hasEmbeddedPeriodInterval(s)
        if (boo2):
            abs_Sspan = ref_Sspan + idxstart
            abs_Espan = ref_Sspan + idxend

            # get index of overlapping reference token
            ref_idx = -1
            for i in range(0, len(ref_list)):
                if (utils.overlap(ref_list[i].getSpan(),
                                  (abs_Sspan, abs_Espan))):
                    ref_idx = i
                    break

            # extract ML features
            my_features = utils.extract_prediction_features(
                ref_list, ref_idx, features)

            # classify into period or interval
            if (classifier[1] == "NN"):
                my_class = ChronoKeras.keras_classify(
                    classifier[0], np.array(list(my_features.values())))
                #print("Class: " + str(my_class) + " : Start: " + str(abs_Sspan) + " : End: "+ str(abs_Espan))
            elif classifier[1] in ("SVM", "RF"):
                feat_array = [int(i) for i in my_features.values()]
                my_class = classifier[0].predict([feat_array])[0]
            else:
                my_class = classifier[0].classify(my_features)
                #print("Class: " + str(my_class) + " : Start: " + str(abs_Sspan) + " : End: "+ str(abs_Espan))

            # if 1 then it is a period, if 0 then it is an interval
            if (my_class == 1):
                my_entity = chrono.ChronoPeriodEntity(
                    entityID=str(chrono_id) + "entity",
                    start_span=abs_Sspan,
                    end_span=abs_Espan,
                    period_type=getPeriodValue(val),
                    number=None)
                chrono_id = chrono_id + 1
            else:
                my_entity = chrono.ChronoCalendarIntervalEntity(
                    entityID=str(chrono_id) + "entity",
                    start_span=abs_Sspan,
                    end_span=abs_Espan,
                    calendar_type=val)
                chrono_id = chrono_id + 1

            #Extract the number and identify the span of numstr
            if idxstart > 0:
                ## get the absolute span of the interval token
                abs_Sspan = ref_Sspan + idxstart
                abs_Espan = ref_Sspan + idxend

                ## purposfully split on a single space
                substr = s.getText()[0:idxstart]
                # test to see if last character is a space and set a flag.
                has_space = True if substr[len(substr) - 1] == ' ' else False
                substr = substr.strip(' ').split(' ')

                ## get the previous token
                prevtok = substr[len(substr) - 1]
                prev_sSpan = idxstart - len(
                    prevtok) - 1 if has_space else idxstart - len(prevtok)
                prev_eSpan = idxstart - 1

                ## get the rest of the substring joined by a space
                if len(substr) > 1:
                    rest_of_phrase = ' '.join(substr[0:len(substr) - 1])
                    rest_of_phrase_length = len(rest_of_phrase) + 1

                else:
                    rest_of_phrase_length = 0

                ## now calculate the relative span of prevtok
                #rel_Sspan = rest_of_phrase_length
                #rel_Espan = rest_of_phrase_length + len(prevtok)

                m = re.search('([0-9]{1,2})', prevtok)
                if m is not None:
                    num_val = m.group(0)
                    abs_Sspan = ref_Sspan + rest_of_phrase_length + m.span(
                        0)[0]
                    abs_Espan = ref_Sspan + rest_of_phrase_length + m.span(
                        0)[1]

                    my_number_entity = chrono.ChronoNumber(
                        entityID=str(chrono_id) + "entity",
                        start_span=abs_Sspan,
                        end_span=abs_Espan,
                        value=num_val)
                    chrono_id = chrono_id + 1

                    #add the number entity to the list
                    chrono_list.append(my_number_entity)
                    my_entity.set_number(my_number_entity.get_id())
                #else search for a text number
                else:
                    texNumVal = utils.getNumberFromText(prevtok)
                    if texNumVal is not None:
                        abs_Sspan = ref_Sspan + rest_of_phrase_length
                        abs_Espan = ref_Sspan + rest_of_phrase_length + len(
                            prevtok
                        ) if has_space else ref_Sspan + rest_of_phrase_length + len(
                            prevtok) - 1
                        #create the number entity
                        my_number_entity = chrono.ChronoNumber(
                            entityID=str(chrono_id) + "entity",
                            start_span=abs_Sspan,
                            end_span=abs_Espan,
                            value=texNumVal)
                        chrono_id = chrono_id + 1
                        #append to list
                        chrono_list.append(my_number_entity)
                        #link to interval entity
                        my_entity.set_number(my_number_entity.get_id())

            chrono_list.append(my_entity)

    return chrono_list, chrono_id