Example #1
0
def _process_mm_biomarker_result(spans: List[Span],
                                 tokens: List[DocToken]) -> List[Span]:
    result_spans = []

    measurement_spans = [x for x in spans if x.label == 'MEASUREMENT']
    if len(measurement_spans) > 1:
        abnormal_text = measurement_spans[0].text
        total_text = measurement_spans[1].text
        result_spans.append(
            Span(text=abnormal_text,
                 entity=AbnormalCells.create(abnormal_text),
                 tokens=measurement_spans[0].tokens))
        result_spans.append(
            Span(text=total_text,
                 entity=TotalCells.create(total_text),
                 tokens=measurement_spans[1].tokens))
    elif len(measurement_spans) == 1:
        total_text = measurement_spans[0].text
        result_spans.append(
            Span(text=total_text,
                 entity=TotalCells.create(total_text),
                 tokens=measurement_spans[0].tokens))

    mm_mutation_type_spans = [
        x for x in spans if x.label == 'MM_BIOMARKER_MUTATION'
    ]
    if len(mm_mutation_type_spans) > 0:
        token_start = mm_mutation_type_spans[0].offset
        mm_mutation_type_tokens = tokens[token_start:]
        text = ' '.join([x.token for x in mm_mutation_type_tokens])
        result_spans.append(
            Span(text=text,
                 entity=MmBiomarkerMutation.create(text),
                 tokens=mm_mutation_type_tokens))
    return result_spans
Example #2
0
def spans(doc, tokens):
    spans = [
        Span(
            text="a",
            doc=doc,
            entity=Entity(name="A", label="LABEL_0"),
            tokens=tokens[:1],
        ),
        Span(
            text="a",
            doc=doc,
            entity=Entity(name="B", label="LABEL_0"),
            tokens=tokens[:1],
        ),
        Span(
            text="a",
            doc=doc,
            entity=Entity(name="A", label="LABEL_1"),
            tokens=tokens[:1],
        ),
        Span(
            text="a",
            doc=doc,
            entity=Entity(name="C", label="LABEL_0", synonyms=["a"]),
            tokens=tokens[:1],
        ),
    ]
    assert 4 == count_it(spans)
    return spans
Example #3
0
 def sort_key(cls, span: Span):
     return (
         -span.num_tokens,
         span.match_type(),
         span.offset,
         span.label,
     )
Example #4
0
 def sort_key(cls, span: Span):
     return (
         -span.num_tokens,  # longest wins
         span.match_type(),  # exact name > exact synonym > lower case
         span.offset,  # deterministic
         span.label,  # deterministic
     )
Example #5
0
def _process_mm_biomarker(spans: List[Span]) -> List[Span]:
    result_spans = []
    if len(spans) > 0:
        text = '/'.join([x.text for x in spans])
        tokens = [token for span in spans for token in span.tokens]

        biomarker_span = Span(text=text,
                              entity=Biomarker.create(text),
                              tokens=tokens)

        biomarker_result_span = Span(
            text=text,
            entity=BiomarkerResult.create(biomarker_span),
            tokens=tokens)
        result_spans.append(biomarker_result_span)

    return result_spans
Example #6
0
 def is_keep(self, span: Span):
     return (
         # keep text with length > 3
         len(span.text) > 3
         # keep text with numbers or symbols (often measurements)
         or
         len(set.intersection(set(span.text), set("1234567890%<>=-+~"))) > 0
         # keep text which is not solely a lowercase synonym
         or span.match_type()
         not in {SpanMatch.LowercaseSynonym, SpanMatch.LowercaseName})
Example #7
0
    def _resolve_entity(self, prefix: Token, doc_tokens: List[DocToken]):
        any_found = False

        while not any_found and prefix:
            entities = self.resolver.resolve(term=prefix)
            for entity in entities:
                span = Span(
                    text=prefix,
                    doc=self.doc,
                    entity=entity,
                    tokens=doc_tokens,
                )
                self.spans.append(span)
                any_found = True

            if not any_found:
                prefix = prefix.left_token
                doc_tokens = doc_tokens[:-1]