Example #1
0
 def sort_key(cls, span: Span):
     return (
         -span.num_tokens,
         span.match_type(),
         span.offset,
         span.label,
     )
Example #2
0
 def sort_key(cls, span: Span):
     return (
         -span.num_tokens,  # longest wins
         span.match_type(),  # exact name > exact synonym > lower case
         span.offset,  # deterministic
         span.label,  # deterministic
     )
Example #3
0
 def is_keep(self, span: Span):
     return (
         # keep text with length > 3
         len(span.text) > 3
         # keep text with numbers or symbols (often measurements)
         or
         len(set.intersection(set(span.text), set("1234567890%<>=-+~"))) > 0
         # keep text which is not solely a lowercase synonym
         or span.match_type()
         not in {SpanMatch.LowercaseSynonym, SpanMatch.LowercaseName})