def sort_key(cls, span: Span): return ( -span.num_tokens, span.match_type(), span.offset, span.label, )
def sort_key(cls, span: Span): return ( -span.num_tokens, # longest wins span.match_type(), # exact name > exact synonym > lower case span.offset, # deterministic span.label, # deterministic )
def is_keep(self, span: Span): return ( # keep text with length > 3 len(span.text) > 3 # keep text with numbers or symbols (often measurements) or len(set.intersection(set(span.text), set("1234567890%<>=-+~"))) > 0 # keep text which is not solely a lowercase synonym or span.match_type() not in {SpanMatch.LowercaseSynonym, SpanMatch.LowercaseName})