def test_sentencizer_01(self):
     text = shorten("""
         It was common “along a tiny stream.” Argia apicalis.
     """)
     doc = NLP(text)
     sents = list(doc.sents)
     self.assertEqual(len(sents), 2)
 def test_parse_11(self):
     self.assertEqual(
         FOREARM_LENGTH.parse(
             shorten("""
             sex=male ; unformatted measurements=126-54-10-16-7=18.7; FA 54
             ; hind foot with claw=10 mm; tragus length=7 mm;
             tail length=54 mm; ear from notch=16 mm;
             forearm length=54 mm; total length=126 mm""")),
         [
             {
                 "start": 36,
                 "end": 55,
                 "value": 7.0,
                 "units": "mm_shorthand",
                 "units_inferred": False,
                 "is_shorthand": True,
             },
             {
                 "start": 57,
                 "end": 62,
                 "units": None,
                 "value": 54.0,
                 "units_inferred": True,
             },
             {
                 "start": 153,
                 "end": 173,
                 "units": "mm",
                 "value": 54.0,
                 "units_inferred": False,
             },
         ],
     )
Exemple #3
0
 def test_sentencizer_06(self):
     text = shorten("""
         Capsules 8–15 × 6–12 mm, larger wings deltate-rounded, 10–17 mm wide, 
         smaller 3.5–5 mm wide. 2n = 34, 56 (South America).
         """)
     doc = NLP(text)
     sents = list(doc.sents)
     self.assertEqual(len(sents), 2)
Exemple #4
0
 def test_sentencizer_05(self):
     text = shorten("""
         Plants perennial (rhizomatous), usually glabrous, sometimes sparsely hairy.
         Stems [10–]30–70[–100] cm. Leaves: stipules lanceolate to oblong.
         """)
     doc = NLP(text)
     sents = list(doc.sents)
     self.assertEqual(len(sents), 3)
 def test_parse_108(self):
     self.assertEqual(
         TOTAL_LENGTH.parse(
             shorten("""Body: 12 gm; Body and tail: 109 mm; Tail: 43 mm;
                 Hind Foot: 11 mm; Ear: 13 mm""")),
         [
             Trait(value=109,
                   units="mm",
                   units_inferred=False,
                   start=13,
                   end=34)
         ],
     )
def test(text: str) -> List[Dict]:
    """Find entities in the doc."""
    text = shorten(text)
    doc = NLP(text)
    traits = [e._.data for e in doc.ents]

    # from pprint import pp
    # pp(traits, compact=True)

    # from spacy import displacy
    # displacy.serve(doc, options={'collapse_punct': False, 'compact': True})

    return traits
def test_traits(text: str) -> List[Dict]:
    """Find entities in the doc."""
    text = shorten(text)
    text = clean_text(text, trans=TRANS)

    doc = NLP(text)

    traits = [e._.data for e in doc.ents]

    # from pprint import pp
    # pp(traits)

    # from spacy import displacy
    # options = {'collapse_punct': False, 'compact': True}
    # displacy.serve(doc, options=options)

    return traits
 def test_parse_07(self):
     self.assertEqual(
         TRAGUS_LENGTH.parse(
             shorten(
                 """
             {"measurements":"78-39-5-14-8(TR)-30(FA)",
             "weightInGrams":"3.5" }"""
             )
         ),
         [
             {
                 "start": 17,
                 "end": 40,
                 "value": 8.0,
                 "units": "mm_shorthand",
                 "units_inferred": False,
                 "is_shorthand": True,
             }
         ],
     )
 def test_parse_06(self):
     self.assertEqual(
         TRAGUS_LENGTH.parse(
             shorten(
                 """
             sex=male ; unformatted measurements=126-54-10-16-7=18.7; FA 54
             ; hind foot with claw=10 mm; tragus length=7 mm;
             tail length=54 mm; ear from notch=16 mm;
             forearm length=54 mm; total length=126 mm"""
             )
         ),
         [
             {
                 "start": 92,
                 "end": 110,
                 "units": "mm",
                 "value": 7.0,
                 "units_inferred": False,
             }
         ],
     )
Exemple #10
0
def test(text: str) -> List[Dict]:
    """Find entities in the doc."""
    text = shorten(text)
    return TEST.test_traits(text)
Exemple #11
0
 def test_sentencizer_02(self):
     text = shorten("""(Dunn et al. 2009, Jørgensen 2015).""")
     doc = NLP(text)
     sents = list(doc.sents)
     self.assertEqual(len(sents), 1)
 def test_parse_109(self):
     self.assertEqual(
         TOTAL_LENGTH.parse(
             shorten("""Note in catalog: Recatalogued from 115818-850""")),
         [],
     )
 def test_parse_107(self):
     self.assertEqual(TOTAL_LENGTH.parse(shorten("""Body: 14 g""")), [])