def test_parse_106(self): self.assertEqual( TOTAL_LENGTH.parse("trap TL01 26g; 212-115-27-20=26g;"), [ Trait( value=212, units="mm_shorthand", units_inferred=False, is_shorthand=True, start=16, end=33, ) ], )
def test_parse_013(self): self.assertEqual( TOTAL_LENGTH.parse("2 ft. 3.1 - 4.5 in. "), [ Trait( value=[688.34, 723.9], ambiguous_key=True, units=["ft", "in"], units_inferred=False, start=0, end=18, ) ], )
def test_parse_088(self): self.assertEqual( TOTAL_LENGTH.parse( "unformatted measurements=L-11", T-3.125", " "HF-1.5" ; sex=male ; hind foot with claw=1.5 in; " "total length=11 in; tail length=3.125 in | . " "4/12/39 . | 1.5 TRUE"), [ Trait( value=11.0, units=None, units_inferred=True, ambiguous_key=True, start=25, end=29, ), Trait(value=279.4, units="in", units_inferred=False, start=103, end=121), ], )
def test_parse_087(self): self.assertEqual( TOTAL_LENGTH.parse("L: 275. T: 65.; "), [ Trait( value=275, units=None, units_inferred=True, ambiguous_key=True, start=0, end=6, ) ], )
def test_parse_010(self): self.assertEqual( TOTAL_LENGTH.parse("143-63-20-17=13"), [ Trait( value=143, units="mm_shorthand", units_inferred=False, is_shorthand=True, start=0, end=15, ) ], )
def test_parse_072(self): self.assertEqual( TOTAL_LENGTH.parse("[308]-190-45-20"), [ Trait( value=308, units="mm_shorthand", units_inferred=False, is_shorthand=True, estimated_value=True, start=0, end=15, ) ], )
def test_parse_14(self): self.assertEqual( TESTES_SIZE.parse('"gonadLengthInMM":"12", "gonadWidthInMM":"5",'), [ Trait( value=12, units="MM", units_inferred=False, ambiguous_key=True, dimension="length", start=1, end=21, ), Trait( value=5, units="MM", units_inferred=False, ambiguous_key=True, dimension="width", start=25, end=43, ), ], )
def test_parse_13(self): self.assertEqual( EAR_LENGTH.parse("E/c-21mm"), [ Trait( value=21, units="mm", units_inferred=False, ambiguous_key=True, measured_from="c", start=0, end=8, ) ], )
def test_parse_05(self): self.assertEqual( OVARY_SIZE.parse("moderate fat, scars 3R, 4L, no embryos " "[right ovary listed, left ovary: 4 x 2 mm]"), [ Trait( value=[4, 2], units="mm", units_inferred=False, side="left", start=60, end=80, ) ], )
def compound(token): """Handle a pattern like: 4 ft 9 in.""" trait = Trait(start=token.start, end=token.end) trait.units = [token.group["feet"], token.group["inches"]] trait.units_inferred = False trait.is_flag_missing(token, "key", rename="ambiguous_key") fts = convert_units(to_positive_float(token.group["ft"]), "ft") ins = [ convert_units(to_positive_float(i), "in") for i in as_list(token.group["in"]) ] value = [round(fts + i, 2) for i in ins] trait.value = squash(value) add_flags(token, trait) return trait
def test_parse_07(self): self.assertEqual( FOREARM_LENGTH.parse( '{"measurements":"82-34-8-13-32(fa)", "weightInGrams":"8.0"}'), [ Trait( value=32, units="mm_shorthand", units_inferred=False, is_shorthand=True, start=17, end=34, ) ], )
def test_parse_082(self): self.assertEqual( TOTAL_LENGTH.parse( "LENGTH 0 3/8 IN. WING CHORD 5.25 IN. TAIL 4.25 IN."), [ Trait( value=9.52, ambiguous_key=True, units="IN", units_inferred=False, start=0, end=15, ) ], )
def test_parse_028(self): self.assertEqual( TOTAL_LENGTH.parse( '{"time collected":"0712-0900", "length":"12.0" }'), [ Trait( value=12, ambiguous_key=True, units=None, units_inferred=True, start=32, end=45, ) ], )
def test_parse_16(self): self.assertEqual( TESTES_SIZE.parse('"gonadLengthInMM":"9mm w.o./epid", '), [ Trait( value=9, units=["MM", "mm"], units_inferred=False, ambiguous_key=True, dimension="length", start=1, end=22, ) ], )
def test_parse_073(self): self.assertEqual( TOTAL_LENGTH.parse('"{"measurements":"[308]-190-45-20" }"'), [ Trait( value=308, units="mm_shorthand", units_inferred=False, is_shorthand=True, estimated_value=True, start=3, end=33, ) ], )
def test_parse_065(self): self.assertEqual( TOTAL_LENGTH.parse( "LENGTH: 117MM. SOFT self.parserTS COLOR ON LABEL."), [ Trait( value=117, units="MM", units_inferred=False, ambiguous_key=True, start=0, end=13, ) ], )
def test_parse_33(self): self.assertEqual( TESTES_SIZE.parse( "; reproductive data=R 20mm L x 6 mm Wne scars ;"), [ Trait( value=20, units="mm", units_inferred=False, side="r", start=2, end=26, ) ], )
def compound(token): """Convert a compound weight like: 2 lbs. 3.1 - 4.5 oz.""" trait = Trait(start=token.start, end=token.end) trait.units = [token.group["pounds"], token.group["ounces"]] trait.units_inferred = False trait.is_flag_missing(token, "key", rename="ambiguous_key") lbs = convert_units(to_positive_float(token.group["lbs"]), "lbs") ozs = [ convert_units(to_positive_float(oz), "ozs") for oz in as_list(token.group["ozs"]) ] value = [round(lbs + oz, 2) for oz in ozs] trait.value = squash(value) add_flags(token, trait) return trait
def test_parse_06(self): self.assertEqual( HIND_FOOT_LENGTH.parse('{"measurements":"192-84-[31]-19=38g" }'), [ Trait( value=31, units="mm_shorthand", units_inferred=False, is_shorthand=True, estimated_value=True, start=17, end=35, ) ], )
def test_parse_033(self): self.assertEqual( TOTAL_LENGTH.parse( "field measurements on fresh dead specimen were " "157-60-20-19-21g"), [ Trait( value=157, units="mm_shorthand", units_inferred=False, is_shorthand=True, start=6, end=63, ) ], )
def test_parse_22(self): self.assertEqual( EMBRYO_LENGTH.parse( "Mammals v****a open; mammae tiny; not lactating9 embryos; " "cr-10 ?" ), [ Trait( value=10, units=None, units_inferred=True, start=58, end=65, uncertain=True, ) ], )
def test_parse_103(self): self.assertEqual( TOTAL_LENGTH.parse( "Tail=239.0 mm; Hind Foot=74.0 mm (81.0 mm); Ear=34.0 mm.; " "Weight=560 g; Length=522.0 mm"), [ Trait( value=522, units="mm", units_inferred=False, ambiguous_key=True, start=72, end=87, ) ], )
def test_parse_029(self): self.assertEqual( TOTAL_LENGTH.parse( '{"time collected":"1030", "water depth":"1-8", ' '"bottom":"abrupt lava cliff dropping off to sand at ' '45 ft.", "length":"119-137" }'), [ Trait( value=[119, 137], ambiguous_key=True, units=None, units_inferred=True, start=109, end=125, ) ], )
def convert(token): """Convert single value tokens into a result.""" value = token.group.get("value") if not value: return None trait = Trait(start=token.start, end=token.end) trait.value = to_positive_int(value) if trait.value > 100: return None if token.group.get("notation"): trait.notation = token.group["notation"] return trait
def convert_count(token): """Convert parsed tokens into a result.""" trait = Trait(start=token.start, end=token.end) value = to_positive_int(token.group.get("value")) count1 = to_positive_int(token.group.get("count1")) count2 = to_positive_int(token.group.get("count2")) if not value: value = count1 + count2 if value >= 1000: return None trait.value = "present" if value > 0 else "absent" return trait
def fraction(token): """Handle fractional values like 10 3/8 inches.""" trait = Trait(start=token.start, end=token.end) trait.units = token.group.get("units") trait.units_inferred = not bool(trait.units) whole = to_positive_float(token.group.get("whole", "0")) numerator = to_positive_int(token.group["numerator"]) denominator = to_positive_int(token.group["denominator"]) try: trait.value = whole + Fraction(numerator, denominator) except TypeError: print(f"Fraction error: {numerator} / {denominator}") return None if trait.units: trait.value = convert_units(trait.value, trait.units) add_flags(token, trait) return trait
def convert_count(token): """Convert parsed tokens into a result.""" trait = Trait(start=token.start, end=token.end) trait.value = to_positive_int(token.group.get("value")) count1 = to_positive_int(token.group.get("count1")) count2 = to_positive_int(token.group.get("count2")) side1 = SUB.get(token.group.get("side1", " ").lower()[0], "side1") side2 = SUB.get(token.group.get("side2", " ").lower()[0], "side2") if not trait.value: trait.value = count1 + count2 if count1 or side1 != "side1": setattr(trait, side1, count1) if count2 or side2 != "side2": setattr(trait, side2, count2) return trait if all(x < 1000 for x in as_list(trait.value)) else None
def convert_many(token): """Convert several values.""" values = token.group["value"] units = as_list(token.group.get("len_units", [])) traits = [] for i, value in enumerate(values): trait = Trait(start=token.start, end=token.end) if i < len(units): trait.units = units[i] trait.units_inferred = False else: trait.units = units[-1] if units else None trait.units_inferred = True trait.value = convert_units.convert_units(to_positive_float(value), trait.units) if trait.value > TOO_BIG: continue add_flags(token, trait) traits.append(trait) return traits
def convert(token): """Convert parsed tokens into a result.""" trait = Trait(start=token.start, end=token.end) if token.group.get("total"): trait.value = to_positive_int(token.group["total"]) if token.group.get("subcount"): trait.value = sum( to_positive_int(c) for c in as_list(token.group["subcount"])) if token.group.get("subcount") and token.group.get("sub"): for count, sub in zip(as_list(token.group["subcount"]), as_list(token.group.get("sub"))): count = "1" if count == "!" else count sub = SUB.get(sub[0].lower(), sub) setattr(trait, sub, to_positive_int(count)) elif token.group.get("side"): side = token.group["side"].lower() trait.side = SUB.get(side, side) return trait if all(x < 1000 for x in as_list(trait.value)) else None
def test_parse_16(self): self.assertEqual( PLACENTAL_SCAR_COUNT.parse("placental scars 1 + 1, mam tissue"), [Trait(value=2, side1=1, side2=1, start=0, end=21)], )