Esempio n. 1
0
def sex_count(ent):
    """Enrich the match with data."""
    data = {}

    for token in ent:
        label = token.ent_type_
        value = token.lower_

        if label == 'sex':
            data['sex'] = value
        elif (as_int := to_positive_int(value)) is not None:
            data['count'] = as_int
Esempio n. 2
0
def fraction(token):
    """Handle fractional values like 10 3/8 inches."""
    trait = Trait(start=token.start, end=token.end)
    trait.units = token.group.get("units")
    trait.units_inferred = not bool(trait.units)
    whole = to_positive_float(token.group.get("whole", "0"))
    numerator = to_positive_int(token.group["numerator"])
    denominator = to_positive_int(token.group["denominator"])
    try:
        trait.value = whole + Fraction(numerator, denominator)
    except TypeError:
        print(f"Fraction error: {numerator} / {denominator}")
        return None
    if trait.units:
        trait.value = convert_units(trait.value, trait.units)
    add_flags(token, trait)
    return trait
Esempio n. 3
0
def range_(span):
    """Get a count range."""
    values = [to_positive_int(t.text) for t in span if t.is_digit]

    data = {'low': min(values)}
    if len(values) > 1:
        data['high'] = max(values)

    return data
Esempio n. 4
0
def multiple_seta_count(ent):
    """Handle multiple seta in one match."""
    data = {'body_part': 'seta'}
    values = []

    for token in ent:
        label = token._.cached_label

        if label == 'seta':
            data['seta'] = REPLACE.get(token.lower_, token.lower_)

        elif label == 'number_word':
            values.append(to_positive_int(REPLACE.get(token.lower_)))

        elif match := IS_INT.match(token.text):
            value = to_positive_int(match.group(0))
            values.append(value)

        elif label == 'group':
            data['group'] = token.lower_
Esempio n. 5
0
def range_values(ent):
    """Extract values from the range and cached label."""
    data = {}
    range_ = [
        e for e in ent.ents if e._.cached_label.split('.')[0] == 'range'
    ][0]

    values = re.findall(FLOAT_RE, range_.text)

    if not all([re.search(INT_TOKEN_RE, v) for v in values]):
        raise RejectMatch

    keys = range_.label_.split('.')[1:]
    for key, value in zip(keys, values):
        data[key] = to_positive_int(value)

    range_._.data = data
    range_._.new_label = 'count'
    return range_
Esempio n. 6
0
def seta_count(ent):
    """Enrich the match."""
    data = {'body_part': 'seta'}
    location = []

    for token in ent:
        label = token._.cached_label

        if label == 'seta':
            data['seta'] = REPLACE.get(token.lower_, token.lower_)

        elif label == 'number_word':
            data['count'] = int(REPLACE.get(token.lower_, -1))

        elif token.lower_ in MISSING:
            data['count'] = 0

        elif label == 'group':
            data['group'] = token.lower_

        elif match := IS_INT.match(token.text):
            data['count'] = to_positive_int(match.group(0))
Esempio n. 7
0
def count_word(ent):
    """Enrich the match with data."""
    ent._.new_label = 'count'
    word = [e for e in ent.ents if e.label_ == 'count_word'][0]
    word._.data = {'low': to_positive_int(REPLACE[word.text])}
    word._.new_label = 'count'
def sample(token):
    """Convert the span into a single integer."""
    match = re.search(INT_RE, token.text)
    value = match.group(0)
    return {'n': to_positive_int(value)}