def test_sentencizer_01(self): text = shorten(""" It was common “along a tiny stream.” Argia apicalis. """) doc = NLP(text) sents = list(doc.sents) self.assertEqual(len(sents), 2)
def test_parse_11(self): self.assertEqual( FOREARM_LENGTH.parse( shorten(""" sex=male ; unformatted measurements=126-54-10-16-7=18.7; FA 54 ; hind foot with claw=10 mm; tragus length=7 mm; tail length=54 mm; ear from notch=16 mm; forearm length=54 mm; total length=126 mm""")), [ { "start": 36, "end": 55, "value": 7.0, "units": "mm_shorthand", "units_inferred": False, "is_shorthand": True, }, { "start": 57, "end": 62, "units": None, "value": 54.0, "units_inferred": True, }, { "start": 153, "end": 173, "units": "mm", "value": 54.0, "units_inferred": False, }, ], )
def test_sentencizer_06(self): text = shorten(""" Capsules 8–15 × 6–12 mm, larger wings deltate-rounded, 10–17 mm wide, smaller 3.5–5 mm wide. 2n = 34, 56 (South America). """) doc = NLP(text) sents = list(doc.sents) self.assertEqual(len(sents), 2)
def test_sentencizer_05(self): text = shorten(""" Plants perennial (rhizomatous), usually glabrous, sometimes sparsely hairy. Stems [10–]30–70[–100] cm. Leaves: stipules lanceolate to oblong. """) doc = NLP(text) sents = list(doc.sents) self.assertEqual(len(sents), 3)
def test_parse_108(self): self.assertEqual( TOTAL_LENGTH.parse( shorten("""Body: 12 gm; Body and tail: 109 mm; Tail: 43 mm; Hind Foot: 11 mm; Ear: 13 mm""")), [ Trait(value=109, units="mm", units_inferred=False, start=13, end=34) ], )
def test(text: str) -> List[Dict]: """Find entities in the doc.""" text = shorten(text) doc = NLP(text) traits = [e._.data for e in doc.ents] # from pprint import pp # pp(traits, compact=True) # from spacy import displacy # displacy.serve(doc, options={'collapse_punct': False, 'compact': True}) return traits
def test_traits(text: str) -> List[Dict]: """Find entities in the doc.""" text = shorten(text) text = clean_text(text, trans=TRANS) doc = NLP(text) traits = [e._.data for e in doc.ents] # from pprint import pp # pp(traits) # from spacy import displacy # options = {'collapse_punct': False, 'compact': True} # displacy.serve(doc, options=options) return traits
def test_parse_07(self): self.assertEqual( TRAGUS_LENGTH.parse( shorten( """ {"measurements":"78-39-5-14-8(TR)-30(FA)", "weightInGrams":"3.5" }""" ) ), [ { "start": 17, "end": 40, "value": 8.0, "units": "mm_shorthand", "units_inferred": False, "is_shorthand": True, } ], )
def test_parse_06(self): self.assertEqual( TRAGUS_LENGTH.parse( shorten( """ sex=male ; unformatted measurements=126-54-10-16-7=18.7; FA 54 ; hind foot with claw=10 mm; tragus length=7 mm; tail length=54 mm; ear from notch=16 mm; forearm length=54 mm; total length=126 mm""" ) ), [ { "start": 92, "end": 110, "units": "mm", "value": 7.0, "units_inferred": False, } ], )
def test(text: str) -> List[Dict]: """Find entities in the doc.""" text = shorten(text) return TEST.test_traits(text)
def test_sentencizer_02(self): text = shorten("""(Dunn et al. 2009, Jørgensen 2015).""") doc = NLP(text) sents = list(doc.sents) self.assertEqual(len(sents), 1)
def test_parse_109(self): self.assertEqual( TOTAL_LENGTH.parse( shorten("""Note in catalog: Recatalogued from 115818-850""")), [], )
def test_parse_107(self): self.assertEqual(TOTAL_LENGTH.parse(shorten("""Body: 14 g""")), [])