예제 #1
0
def margin(span):
    """Enrich a margin match."""
    data = {
        'subpart': 'margin',
        'margin': squash([t.lower_ for t in span
                          if t.ent_type_ in {'margin_term', 'surface'}])}
    return data
예제 #2
0
def convert(token):
    """Build a collector trait"""
    names = regex.split(r'\s*(?:and|with|[,&])\s*',
                        token.group.get('col_name'))

    traits = []

    for name, suffix in zip_longest(names, names[1:], fillvalue=''):
        name = regex.sub(r'\.{3,}.*', '', name)
        if len(name) < MIN_LEN:
            continue

        trait = Trait(start=token.start, end=token.end)
        trait.col_name = name

        if suffix.lower() in name_parts.SUFFIXES:
            trait.col_name = f'{name} {suffix}'

        if name.lower() not in name_parts.SUFFIXES:
            traits.append(trait)

    if not traits:
        return None

    if token.group.get('collector_no'):
        col_no = token.group['collector_no']
        # Temp hack
        if col_no[-1] in ('m', 'M'):
            return None
        traits[0].col_no = col_no

    return squash(traits)
예제 #3
0
def surface(span):
    """Enrich a surface match."""
    data = {}

    fields = {
        'present': set(),
        'subpart': set(),
        'surface': [],
    }

    for token in span:
        label = token.ent_type_
        if label in ('part', 'location'):
            data[label] = REPLACE.get(token.lower_, token.lower_)
        elif label == 'surface':
            fields['surface'].append(REPLACE.get(token.lower_, token.lower_))
        elif label == 'subpart':
            fields['subpart'].add(REPLACE.get(token.lower_, token.lower_))
        elif token.lower_ in PRESENT:
            fields['present'].add(PRESENCE.get(token.lower_, False))

    if len(fields['subpart']) > 1:
        fields['subpart'] -= {'surface'}

    fields = {k: squash(v) for k, v in fields.items() if fields[k]}
    data = {**data, **fields}

    return data
예제 #4
0
def as_value(token, trait, value_field="number", unit_field="units"):
    """Convert token values and units to trait fields."""
    units = as_list(token.group.get(unit_field, []))
    trait.units = squash(units) if units else None
    values = []
    for i, val in enumerate(as_list(token.group.get(value_field, []))):
        val = to_positive_float(val)
        if val is None:
            return False
        if i < len(units):
            unit = units[i]
        else:
            unit = units[-1] if units else None
        values.append(convert_units(val, unit))
    if not values:
        return False
    trait.value = squash(values)
    trait.units_inferred = not bool(trait.units)
    return True
예제 #5
0
def shape(span):
    """Enrich a shape match."""
    data = {
        'shape':
        squash([
            REPLACE.get(t.lower_, t.lower_) for t in span
            if t.ent_type_ == 'shape'
        ])
    }

    if field := [t.lower_ for t in span if t.ent_type_ == 'part']:
        data['part'] = field[0]
예제 #6
0
def compound(token):
    """Handle a pattern like: 4 ft 9 in."""
    trait = Trait(start=token.start, end=token.end)
    trait.units = [token.group["feet"], token.group["inches"]]
    trait.units_inferred = False
    trait.is_flag_missing(token, "key", rename="ambiguous_key")
    fts = convert_units(to_positive_float(token.group["ft"]), "ft")
    ins = [
        convert_units(to_positive_float(i), "in")
        for i in as_list(token.group["in"])
    ]
    value = [round(fts + i, 2) for i in ins]
    trait.value = squash(value)
    add_flags(token, trait)
    return trait
예제 #7
0
def compound(token):
    """Convert a compound weight like: 2 lbs. 3.1 - 4.5 oz."""
    trait = Trait(start=token.start, end=token.end)
    trait.units = [token.group["pounds"], token.group["ounces"]]
    trait.units_inferred = False
    trait.is_flag_missing(token, "key", rename="ambiguous_key")
    lbs = convert_units(to_positive_float(token.group["lbs"]), "lbs")
    ozs = [
        convert_units(to_positive_float(oz), "ozs")
        for oz in as_list(token.group["ozs"])
    ]
    value = [round(lbs + oz, 2) for oz in ozs]
    trait.value = squash(value)
    add_flags(token, trait)
    return trait
예제 #8
0
 def transfer(self, token, names):
     """Move fields from a token to the trait if they exist in the token."""
     for name in names:
         if name in token.group:
             values = [v.lower() for v in as_list(token.group[name])]
             setattr(self, name, squash(values))