BODY_MASS = Base( name=__name__.split(".")[-1], rules=[ VOCAB["uuid"], # UUIDs cause problems with numbers # Looking for keys like: MassInGrams VOCAB.term( "key_with_units", r""" ( weight | mass) [\s-]* in [\s-]* (?P<mass_units> grams | g | lbs ) """, ), # These words indicate a body mass follows VOCAB.part("key_leader", "full observed total".split()), # Words for weight VOCAB.part("weight", "weights? weigh(ed|ing|s)?".split()), # Keys like: w.t. VOCAB.part("key_with_dots", r" \b w \.? \s? t s? \.? "), # Common prefixes that indicate a body mass VOCAB.part("mass", "mass"), VOCAB.part("body", "body"), # These indicate that the mass is NOT a body mass VOCAB.term( "other_wt", """ femur baculum bacu bac spleen thymus kidney testes testis ovaries epididymis epid """.split(), ),
PLACENTAL_SCAR_COUNT = Base( name=__name__.split(".")[-1], rules=[ VOCAB["uuid"], # UUIDs cause problems with numbers VOCAB["shorthand"], # Adjectives to placental scars VOCAB.term( "adj", r""" faint prominent recent old possible """.split(), ), # Skip arbitrary words VOCAB["word"], VOCAB.part("sep", r" [;/] "), VOCAB.grouper( "count", """ none embryo conj | none visible | integer | none """, ), VOCAB.producer( convert_count, """(?P<count1> count ) op (?P<count2> count ) ( eq (?P<value> count ) )? plac_scar """, ), VOCAB.producer( convert_count, """plac_scar op?
def convert(token): """Convert parsed token into a trait.""" trait = Trait( value="enlarged" if token.group.get("pos") else "not enlarged", start=token.start, end=token.end, ) return trait NIPPLES_ENLARGED = Base( name=__name__.split(".")[-1], rules=[ VOCAB["conj"], VOCAB.part("separator", r' [;"?/,] '), VOCAB.term("enlarged_abbrev", r"[oc]e[ln]"), VOCAB.term("not_enlarged_abbrev", r"[oc]s[ln]"), VOCAB.term("false", """ false """), VOCAB.producer(convert, """ (?P<pos> nipple enlarged ) """), VOCAB.producer(convert, """ (?P<pos> enlarged nipple ) """), VOCAB.producer(convert, """ (?P<pos> enlarged_abbrev ) """), VOCAB.producer(convert, """ (?P<neg> none nipple ) """), VOCAB.producer(convert, """ (?P<neg> nipple none ) """), VOCAB.producer(convert, """ (?P<neg> nipple not_enlarged ) """), VOCAB.producer(convert, """ (?P<neg> not_enlarged false? nipple ) """), VOCAB.producer(convert, """ (?P<neg> not_enlarged_abbrev ) """), ], )
imagos? imms? immatures? jeunes? juvs? juveniles? juvéniles? larvae? larvals? larves? leptocephales? leptocephalus matures? metamorphs? neonates? nestlings? nulliparous premetamorphs? sub-adults? subads? subadulte?s? tadpoles? têtard yearlings? yg ygs young """.split(), ), # This indicates that the following words are NOT a life stage VOCAB.term("skip", r" determin \w* "), # Compound words separated by dashes or slashes # E.g. adult/juvenile or over-winter VOCAB.part("joiner", r" \s* [/-] \s* "), # Use this to find the end of a life stage pattern VOCAB.part("separator", r' [;,"?] | $ '), # For life stages with numbers as words in them VOCAB["ordinals"], VOCAB["time_units"], VOCAB.part("after", "after"), VOCAB.part("hatching", "hatching"), # Match any word VOCAB.part("word", r" \b \w [\w?.-]* (?! [./-] ) "), VOCAB.grouper("as_time", " after? (ordinals | hatching) time_units"), # E.g.: life stage juvenile/yearling VOCAB.producer( convert, "json_key (?P<value> ( intrinsic | word ) joiner intrinsic )"), # E.g.: life stage young adult
"""Parse v****a state notations.""" from traiter.old.vocabulary import Vocabulary import vertnet.pylib.shared_reproductive_patterns as patterns from vertnet.parsers.base import Base, convert VOCAB = Vocabulary(patterns.VOCAB) VAGINA_STATE = Base( name=__name__.split(".")[-1], rules=[ VOCAB.part("v****a", r""" (?<! sal ) ( v****a | vag | vulva ) """), VOCAB.term("abbrev", r""" ov cv [oc][sme][ln] vc vo """.split()), VOCAB.part( "closed", r""" closed | imperforated | imperf | cerrada | non [-\s] perforated | unperforate | non [-\s] perf | clsd | imp """, ), VOCAB.part("open", r""" open | perforated? | perf | abrir """), VOCAB.part("other", r""" swollen | plugged | plug | sealed """), VOCAB.grouper("state", """ closed | open | other """), VOCAB.producer(convert, """ (?P<value> v****a partially? state ) """), VOCAB.producer(convert, """ (?P<value> state v****a state? ) """), VOCAB.producer(convert, """ (?P<value> ( state | abbrev ) v****a? ) """), ], )
import vertnet.pylib.patterns as patterns from vertnet.parsers.base import Base, convert VOCAB = Vocabulary(patterns.VOCAB) SEX = Base( name=__name__.split(".")[-1], rules=[ # JSON keys for sex VOCAB.term("sex_key", "sex"), # The sexes VOCAB.term("sex_vocab", "females? males?".split()), # These are words that indicate that "sex" is not a key VOCAB.term("not_sex", "and is was".split()), # Allow arbitrary words in some cases VOCAB.part("word", r' \b [a-z] [^;,"=:\s]* '), # Some patterns need a terminator VOCAB.part("separator", ' [;,"] | $ '), # E.g.: sex might be female; VOCAB.producer( convert, """ sex_key (?P<value> ( sex_vocab | word ){1,2} quest? ) separator """, ), # E.g.: sex=female?, Or: sex=unknown VOCAB.producer(convert, " sex_key (?P<value> ( sex_vocab | word ) quest? ) "), # E.g.: male, Or: male? VOCAB.producer(convert, " (?P<value> sex_vocab quest? ) "), ], )
from traiter.old.vocabulary import Vocabulary import vertnet.pylib.shared_reproductive_patterns as patterns from vertnet.parsers.base import Base, convert VOCAB = Vocabulary(patterns.VOCAB) LACTATION_STATE = Base( name=__name__.split(".")[-1], rules=[ VOCAB.part( "lactating", r""" ( lactating | lactation | lactated | lactate | lact | lactaing | lactacting | lactataing | lactational | oelact | celact | lactati | lactacting | lactatin | lactatting | lactatng | nursing | suckling ) \b """, ), VOCAB.part("not", r" \b ( not | non | no ) "), VOCAB.part( "post", r""" \b ( (( just | recently ) \s+ )? finished | post | recently | recent | had | pre ) """, ), VOCAB.part("pre", r" \b pre [\s\-]? "), # Separates measurements
from traiter.old.vocabulary import Vocabulary import vertnet.pylib.shared_reproductive_patterns as patterns from vertnet.parsers.base import Base from vertnet.pylib.reproductive import convert, double VOCAB = Vocabulary(patterns.VOCAB) TESTES_SIZE = Base( name=__name__.split(".")[-1], rules=[ VOCAB["uuid"], # UUIDs cause problems with numbers # Note: abbrev differs from the one in the testes_state_trait VOCAB.term("abbrev", "tes ts tnd td tns ta".split()), # The abbreviation key, just: t. This can be a problem. VOCAB.part("char_key", r" \b t (?! [a-z] )"), # A key with units, like: gonadLengthInMM VOCAB.term( "key_with_units", r""" (?P<ambiguous_key> gonad ) \s* (?P<dim> length | len | width ) \s* in \s* (?P<len_units> millimeters | mm ) """, ), VOCAB.grouper( "value", """ cross | number len_units? (?! mass_units ) """, ), VOCAB.grouper( "state",
fix_up=fix_up, rules=[ VOCAB["uuid"], # UUIDs cause problems with numbers # Units are in the key, like: EarLengthInMillimeters VOCAB.term( "key_with_units", r""" ear \s* ( length | len ) \s* in \s* (?P<len_units> millimeters | mm ) """, ), # Abbreviation containing the measured from notation, like: e/n or e/c VOCAB.part( "char_measured_from", r""" (?<! [a-z] ) (?<! [a-z] \s ) (?P<ambiguous_key> e ) /? (?P<measured_from1> n | c ) [-]? (?! \.? [a-z] ) """, ), # The abbreviation key, just: e. This can be a problem. VOCAB.part( "char_key", r""" (?<! \w ) (?<! \w \s ) (?P<ambiguous_key> e ) (?! \.? \s? [a-z\(] ) """, ), # Standard keywords that indicate an ear length follows VOCAB.term( "keyword",
if not (has_month and has_year): return None trait = convert(token) if trait: trait.value = str(trait.value[:-2]) + '??' return trait LABEL_DATE = Base( name=__name__.split('.')[-1], rules=[ VOCAB['eol'], VOCAB['uuid'], # Get rid of these before they're a problem VOCAB.term('label', ' date '.split()), VOCAB.part('digits', r'(?<! \d ) ( [12]\d{3} | \d{1,2} ) (?! \d )'), VOCAB.part('sep', r' [/_-]+ ', capture=False), VOCAB.part('noise', r""" \w+ """, priority=LOWEST, capture=False), VOCAB.producer( convert, """ label? (?P<value> digits sep? month_name sep? digits ) """), VOCAB.producer( convert, """ label? (?P<value> month_name sep? digits sep? digits ) """), VOCAB.producer( convert, """ label? (?P<value> digits sep digits sep digits ) """), VOCAB.producer( short_date_digits, f""" label? (?P<value> digits sep digits ) """), VOCAB.producer(
if col_no[-1] in ('m', 'M'): return None traits[0].col_no = col_no return squash(traits) COLLECTOR = Base( name='collector', rules=[ VOCAB['eol'], VOCAB['month_name'], STATE_NAMES, VOCAB.part('col_label', r""" \b ( collect(or|ed) | coll | col ) ( \s* by )? """, capture=False), VOCAB.term('no_label', r""" number no num """.split(), capture=False), VOCAB.term('part', r""" [[:alpha:]]+ """, priority=LOWEST, capture=False), VOCAB.term('other_label', r""" art artist ass assist assistant auth authors? cartographer conservator contributor corator curator curatorial det determiner dir director ecologist editor entomologist expedition explorer extractor gardener geographer geologist georeferencer grower herbarium horticulturalist
VOCAB.term( "key_with_units", r""" ( total | snout \s* vent | head \s* body | fork ) \s* ( length | len )? \s* in \s* (?P<units> millimeters | mm ) """, ), # Various total length keys VOCAB.part( "len_key", r""" t \s* [o.]? \s* l [._]? (?! [a-z] ) | total [\s-]* length [\s-]* in | ( total | max | standard ) [\s-]* lengths? \b | meas [\s*:]? \s* length [\s(]* [l] [)\s:]* | meas ( [a-z]* )? \.? : \s* l (?! [a-z.] ) | s \.? \s? l \.? (?! [a-z.] ) | label [\s.]* lengths? \b | ( fork | mean | body ) [\s-]* lengths? \b | s \.? \s? v \.? \s? l \.? (?! [a-z.] ) | snout [\s-]* vent [\s-]* lengths? \b """, ), # Words that indicate we don't have a total length VOCAB.term("skip", " horns? tag ".split()), # The word length on its own. Make sure it isn't proceeded by a letter VOCAB.part( "ambiguous", r""" (?<! [a-z] \s* ) (?P<ambiguous_key> lengths? ) """, ), # # We don't know if this is a length until we see the units
from vertnet.parsers.base import Base from vertnet.pylib.trait import Trait VOCAB = Vocabulary(patterns.VOCAB) def convert(token): """Convert parsed token into a trait.""" trait = Trait( value="pregnant" if token.group.get("pos") else "not pregnant", start=token.start, end=token.end, ) return trait PREGNANCY_STATE = Base( name=__name__.split(".")[-1], rules=[ VOCAB.term( "pregnant", r""" prega?n?ant pregnan preg pregnancy pregnancies gravid """. split(), ), VOCAB.part("separator", r' [;,"] '), VOCAB.producer(convert, """ (?P<neg> pregnant none) """), VOCAB.producer(convert, """ (?P<neg> none pregnant ) """), VOCAB.producer(convert, """ (?P<pos> pregnant ) """), ], )
side=token.group["side"][1].lower(), start=token.start, end=token.end, ) return [trait1, trait2] OVARIES_STATE = Base( name=__name__.split(".")[-1], rules=[ VOCAB.term("other", """ sev somewhat few """.split()), # Skip words VOCAB.term("skip", " womb nullip ".split()), # VOCAB['comma'], VOCAB.part("sep", r" [;\(] "), # E.g.: ovaries and uterine horns # Or: ovaries and fallopian tubes VOCAB.grouper( "ovaries", r""" ovary ( ( and? uterus horns? ) | and? fallopian )? """, ), # E.g.: covered in copious fat VOCAB.grouper("coverage", " covered word{0,2} fat "), # E.g.: +corpus luteum VOCAB.grouper("luteum", " sign? corpus? (alb | lut) "), VOCAB.grouper( "value_words", """
"""Find taxon notations on herbarium specimen labels.""" import pandas as pd from traiter.old.vocabulary import LOWEST, Vocabulary from digi_leap.parsers.base import Base from digi_leap.pylib import const, patterns from digi_leap.pylib.trait import Trait PLANT_FAMILIES = const.DATA_DIR / 'itis_plant_families.csv' PLANT_GENERA = const.DATA_DIR / 'itis_plant_genera.csv' VOCAB = Vocabulary(patterns.VOCAB) VOCAB.part('word', r' \S+ ', capture=False, priority=LOWEST) DATA = pd.read_csv(PLANT_FAMILIES, na_filter=False, dtype=str) VOCAB.term('plant_family', DATA['complete_name'].tolist()) DATA = pd.read_csv(PLANT_GENERA, na_filter=False, dtype=str) VOCAB.term('plant_genus', DATA['complete_name'].tolist()) def convert(token): """Normalize a parsed taxon notation""" return Trait(start=token.start, end=token.end, value=token.group['value']) PLANT_TAXON = Base(name='plant_taxon', rules=[ VOCAB['eol'], VOCAB.producer(convert,
"""Shared token patterns.""" from traiter.old.vocabulary import FIRST, LOWEST, Vocabulary VOCAB = Vocabulary() # Chars that may be a token VOCAB.part('slash', r' [/] ', capture=False) VOCAB.part('dash', r' (?: – | - ) ', capture=False) VOCAB.part('open', r' [(\[] ', capture=False) VOCAB.part('close', r' [)\]] ', capture=False) VOCAB.part('x', r' [x×] ', capture=False) VOCAB.part('quest', r' [?] ') VOCAB.part('comma', r' [,] ', capture=False, priority=LOWEST) VOCAB.part('semicolon', r' [;] ', capture=False, priority=LOWEST) VOCAB.part('ampersand', r' [&] ', capture=False) VOCAB.part('eq', r' [=] ', capture=False) VOCAB.part('under', r' [_] ', capture=False) VOCAB.part('eol', r' [\n\r\f] ', capture=False) VOCAB.part('dot', r' [.] ', capture=False) # Small words VOCAB.part('by', r' by ', capture=False) VOCAB.part('to', r' to ', capture=False) VOCAB.part('with', r' with ', capture=False) VOCAB.part('up_to', r' ( up \s+ )? to ', capture=False) VOCAB.term('and', r' and ', capture=False) VOCAB.term('conj', ' or and '.split(), capture=False) VOCAB.term('prep', ' to with on of '.split(), capture=False) VOCAB.term('word', r' [a-z] \w* ', capture=False, priority=LOWEST)
count = to_positive_int(token.group["subcount"]) trait.value = count + count trait.left = count trait.right = count return trait EMBRYO_COUNT = Base( name=__name__.split(".")[-1], rules=[ VOCAB["uuid"], # UUIDs cause problems with numbers VOCAB["shorthand"], VOCAB["metric_mass"], VOCAB.part( "sex", r""" males? | females? | (?<! [a-z] ) [mf] (?! [a-z] ) """, ), VOCAB.term("repo_key", r""" reproductive \s data """), VOCAB.term("near_term", r" near[\s-]?term"), VOCAB.term("each_side", r" each \s side "), VOCAB.term("skip", r" w wt ".split()), VOCAB.part("sep", r" [;] "), VOCAB.part("bang", r" [!] "), VOCAB.grouper( "count", """ none (word | plac_scar) conj | integer | none | num_words | bang """, ), VOCAB.grouper("present", " found | near_term "), VOCAB.grouper("numeric", " integer | real "), VOCAB.grouper("skip_len", " ( x? numeric metric_len ) | (x numeric metric_len?) "),
value="lactating" if token.group.get("pos") else "not lactating", start=token.start, end=token.end, ) return trait LACTATION_STATE = Base( name=__name__.split(".")[-1], rules=[ VOCAB.part( "lactating", r""" ( lactating | lactation | lactated | lactate | lact | lactaing | lactacting | lactataing | lactational | oelact | celact | lactati | lactacting | lactatin | lactatting | lactatng | nursing | suckling ) \b """, ), VOCAB.term("lactating_abbrev", r"[oc][esm]l"), VOCAB.term("not_lactating_abbrev", r"[oc][esm]n"), VOCAB.term("post", r""" post | finished """), # Separates measurements VOCAB.part("separator", r' [;"/] '), VOCAB.producer(convert, """ (?P<pos> lactating ) """), VOCAB.producer(convert, """ (?P<pos> lactating_abbrev ) """), VOCAB.producer(convert, """ (?P<neg> (none | post) lactating ) """), VOCAB.producer(convert, """ (?P<neg> lactating (none | post) ) """),
"""Fix problematic parses.""" # Try to disambiguate doubles quotes from inches return fix_up_inches(trait, text) EMBRYO_LENGTH = Base( name=__name__.split(".")[-1], fix_up=fix_up, rules=[ VOCAB["uuid"], # UUIDs cause problems with numbers VOCAB["shorthand"], VOCAB.part( "embryo_len_key", r""" (?<! collector [\s=:.] ) (?<! reg [\s=:.] ) ( ( crown | cr ) ( [_\s\-] | \s+ to \s+ )? rump | (?<! [a-z] ) crl (?! [a-z] ) | (?<! [a-z] ) c \.? r \.? (?! [a-z] ) )""", ), VOCAB.part("len", r" (length | len) (?! [a-z] ) "), VOCAB.part("other", r" \( \s* \d+ \s* \w+ \s* \) "), VOCAB.part("separator", r' [;"/.] '), VOCAB.grouper("value", """ cross | number len_units? (?! sex ) """), VOCAB.grouper("key", """ embryo_len_key len? ( eq | colon )? """), VOCAB.grouper( "count", """ number side number side eq? | number plus number ( eq number )? """,
"""Shared reproductive trait tokens (testes & ovaries).""" from traiter.old.vocabulary import LOWEST, Vocabulary import vertnet.pylib.patterns as patterns VOCAB = Vocabulary(patterns.VOCAB) VOCAB.term("sex", "females? | males? | [f]") VOCAB.term("active", "active inactive".split()) VOCAB.part("and", r" ( and \b | [&] ) ") VOCAB.term("count", r"""( only | all | both )? \s* [12]""") VOCAB.term( "color", r""" (( dark | light | pale ) \s* )? ( red | pink | brown | black | white | pigmented ) """, ) VOCAB.term("texture", " smooth ") VOCAB.term("covered", " covered ") VOCAB.term("destroyed", "destroy(ed)?") VOCAB.part( "size", r""" ( very \s+ )?
"key_with_units", r"""( forearm \s* )? \s* ( length | len ) \s* in \s* (?P<units> millimeters | mm ) """, ), # Standard keywords that indicate a forearm length follows VOCAB.term( "key", r""" forearm ( \s* ( length | len | l ) )? | fore? \s? [.]? \s? a | fa """, ), # Some patterns require a separator VOCAB.part("sep", r" [;,] | $ ", capture=False), VOCAB.grouper("noise", " word dash ".split()), # Handle fractional values like: forearm 9/16" VOCAB.producer( fraction, [ "key len_fraction units", # E.g.: forearm = 9/16 inches "key len_fraction", # E.g.: forearm = 9/16 ], ), # A typical hind-foot notation VOCAB.producer( simple, [ "key_with_units len_range", # E.g.: forearmLengthInMM=9-10 "key noise? len_range units ", # E.g.: forearmLength=9-10 mm
"""Shared token patterns.""" from traiter.old.vocabulary import FIRST, LOWEST, Vocabulary from vertnet.pylib.util import NUM_WORDS, ORDINALS VOCAB = Vocabulary() # Chars that may be a token VOCAB.part("slash", r" [/] ", capture=False) VOCAB.part("dash", r" \p{Pd} ", capture=False) VOCAB.part("open", r" \p{Ps} ", capture=False) VOCAB.part("close", r" \p{Pe} ", capture=False) VOCAB.part("x", r" [x×] ", capture=False) VOCAB.part("quest", r" [?] ") VOCAB.part("comma", r" [,] ", capture=False, priority=LOWEST) VOCAB.part("semicolon", r" [;] ", capture=False, priority=LOWEST) VOCAB.part("colon", r" [:] ", capture=False, priority=LOWEST) VOCAB.part("ampersand", r" [&] ", capture=False) VOCAB.part("eq", r" [=] ", capture=False) VOCAB.part("plus", r" [+] ", capture=False) VOCAB.part("under", r" [_] ", capture=False) VOCAB.part("eol", r" [\n\r\f] ", capture=False) VOCAB.part("dot", r" [.] ", capture=False) # Small words VOCAB.part("by", r" by ", capture=False) VOCAB.part("to", r" to ", capture=False) VOCAB.part("with", r" with ", capture=False) VOCAB.part("up_to", r" ( up \s+ )? to ", capture=False) VOCAB.term("and", r" and ", capture=False)
def typed(token): """Convert single value tokens into a result.""" trait = Trait(start=token.start, end=token.end) trait.notation = token.group["notation"] trait.value = to_positive_int(token.group["value1"]) trait.value += to_positive_int(token.group.get("value2")) return trait NIPPLE_COUNT = Base( name=__name__.split(".")[-1], rules=[ VOCAB["uuid"], # UUIDs cause problems with numbers VOCAB.term("id", r" \d+-\d+ "), VOCAB.term("adj", r""" inguinal ing pectoral pec pr """.split()), VOCAB.part("number", r" number | no | [#] "), VOCAB.part("eq", r" is | eq | equals? | [=] "), # Skip arbitrary words VOCAB["word"], VOCAB["sep"], VOCAB.grouper("count", " (?: integer | none )(?! side ) "), VOCAB.grouper("modifier", "adj visible".split()), VOCAB.grouper("skip", " number eq? integer "), VOCAB.producer( typed, """ (?P<notation> (?P<value1> count) modifier (?P<value2> count) modifier ) nipple """, ),
name=__name__.split(".")[-1], fix_up=fix_up, rules=[ VOCAB["uuid"], # UUIDs cause problems with numbers # Looking for keys like: tailLengthInMM VOCAB.term( "key_with_units", r""" tail \s* ( length | len ) \s* in \s* (?P<units> millimeters | mm ) """, ), # The abbreviation key, just: t. This can be a problem. VOCAB.part( "char_key", r""" \b (?P<ambiguous_key> t ) (?! [a-z] ) (?! _ \D ) """, ), # Standard keywords that indicate a tail length follows VOCAB.term("keyword", [r" tail \s* length ", r" tail \s* len ", "tail", "tal"]), # Some patterns require a separator VOCAB.part("sep", r" [;,] | $ ", capture=False), # Consider all of these tokens a key VOCAB.grouper("key", "keyword char_key".split()), # Handle fractional values like: tailLength 9/16" VOCAB.producer( fraction, [ # E.g.: tail = 9/16 in "key len_fraction (?P<units> len_units )",
if token.group.get('us_county'): trait.us_county = token.group['us_county'].title() if token.group.get('us_state'): trait.us_state = us_states.normalize_state(token.group['us_state']) return trait ADMIN_UNIT = Base( name='us_county', rules=[ VOCAB['eol'], VOCAB.term('skip', r""" of the """.split()), VOCAB.term('co_label', r""" co | coun[tc]y """, capture=False), VOCAB.term('st_label', r""" ( plants | flora ) \s* of """, capture=False), VOCAB.term('other', r"""alluvial flood river plain """.split()), VOCAB.part('nope', r""" [(] """), VOCAB['word'], VOCAB.producer(convert, ' us_state? eol? co_label comma? us_county '), VOCAB.producer(convert, ' us_county co_label comma? us_state? '), VOCAB.producer(convert, ' us_county comma? us_state '), VOCAB.producer(convert, """ st_label us_state eol? co_label us_county """), VOCAB.producer(convert, ' st_label eol? us_state '), VOCAB.producer(convert, ' (?<! skip ) us_state (?! other | nope ) '), ])