Beispiel #1
0
def load():
    import medspacy
    nlp = medspacy.load(enable=["sentencizer", "tokenizer"])

    # Add components
    from medspacy.target_matcher import TargetMatcher, TargetRule
    target_matcher = TargetMatcher(nlp)
    target_filepath = path.join(RESOURCES_DIR, "target_rules.json")
    target_rules = TargetRule.from_json(target_filepath)
    target_matcher.add(target_rules)
    nlp.add_pipe(target_matcher)

    from medspacy.context import ConTextComponent, ConTextRule
    context_filepath = path.join(RESOURCES_DIR, "context_rules.json")
    context = ConTextComponent(nlp, rules=None, add_attrs=CONTEXT_ATTRS)
    context_rules = ConTextRule.from_json(context_filepath)
    context.add(context_rules)
    nlp.add_pipe(context)

    from medspacy.section_detection import Sectionizer
    # TODO: Add radiology section rules
    sectionizer = Sectionizer(nlp)
    nlp.add_pipe(sectionizer)

    clf = DocumentClassifier(nlp)
    nlp.add_pipe(clf)

    return nlp
 def test_item_modifier_termination(self):
     context = ConTextComponent(nlp, rules=None, terminations=None)
     item = ConTextItem("no evidence of",
                        "NEGATED_EXISTENCE",
                        "FORWARD",
                        terminated_by={"POSITIVE_EXISTENCE", "UNCERTAIN"})
     context.add([item])
     assert item.terminated_by == {"POSITIVE_EXISTENCE", "UNCERTAIN"}
 def test_null_modifier_termination(self):
     context = ConTextComponent(nlp, rules=None, terminations=None)
     item = ConTextItem("no evidence of",
                        "NEGATED_EXISTENCE",
                        "FORWARD",
                        terminated_by=None)
     context.add([item])
     assert item.terminated_by == set()
 def test_rule_modifier_termination(self):
     context = ConTextComponent(nlp, rules=None, terminations=None)
     rule = ConTextRule("no evidence of",
                        "NEGATED_EXISTENCE",
                        "FORWARD",
                        terminated_by={"POSITIVE_EXISTENCE", "UNCERTAIN"})
     context.add([rule])
     assert rule.terminated_by == {"POSITIVE_EXISTENCE", "UNCERTAIN"}
    def test_is_historical(self):
        doc = nlp("History of pneumonia.")
        context = ConTextComponent(nlp, add_attrs=True, rules=None)
        rules = [ConTextRule("history of", "HISTORICAL", direction="forward")]
        context.add(rules)
        doc.ents = (doc[-2:-1], )
        context(doc)

        assert doc.ents[0]._.is_historical is True
    def test_is_historical(self):
        doc = nlp("History of pneumonia.")
        context = ConTextComponent(nlp, add_attrs=True, rules=None)
        item_data = [ConTextItem("history of", "HISTORICAL", rule="forward")]
        context.add(item_data)
        doc.ents = (doc[-2:-1], )
        context(doc)

        assert doc.ents[0]._.is_historical is True
    def test_is_family(self):
        doc = nlp("Family history of breast cancer.")
        context = ConTextComponent(nlp, add_attrs=True, rules=None)
        item_data = [
            ConTextItem("family history of", "FAMILY", rule="forward")
        ]
        context.add(item_data)
        doc.ents = (doc[-3:-1], )
        context(doc)

        assert doc.ents[0]._.is_family is True
    def test_is_negated(self):
        doc = nlp("There is no evidence of pneumonia.")
        context = ConTextComponent(nlp, add_attrs=True, rules=None)
        item_data = [
            ConTextItem("no evidence of", "NEGATED_EXISTENCE", rule="forward")
        ]
        context.add(item_data)
        doc.ents = (doc[-2:-1], )
        context(doc)

        assert doc.ents[0]._.is_negated is True
    def test_is_family(self):
        doc = nlp("Family history of breast cancer.")
        context = ConTextComponent(nlp, add_attrs=True, rules=None)
        rules = [
            ConTextRule("family history of", "FAMILY", direction="forward")
        ]
        context.add(rules)
        doc.ents = (doc[-3:-1], )
        context(doc)

        assert doc.ents[0]._.is_family is True
 def test_global_allowed_types1(self):
     """Check that if the ConTextComponent has allowed_types defined
     and a ConTextRule does not, the ConTextRule will receive the component's
     value.
     """
     context = ConTextComponent(nlp, rules=None, allowed_types={"PROBLEM"})
     rule = ConTextRule("no evidence of",
                        "NEGATED_EXISTENCE",
                        "FORWARD",
                        allowed_types=None)
     context.add([rule])
     assert rule.allowed_types == {"PROBLEM"}
Beispiel #11
0
    def test_terminate_stops_forward_modifier(self):
        context = ConTextComponent(nlp, rules=None)

        item = ConTextItem("no evidence of", "NEGATED_EXISTENCE", "FORWARD")
        item2 = ConTextItem("but", "TERMINATE", "TERMINATE")
        context.add([item, item2])
        doc = nlp("No evidence of chf but she has pneumonia.")
        doc.ents = (Span(doc, 3, 4, "PROBLEM"), Span(doc, 7, 8, "PROBLEM"))
        context(doc)
        chf, pneumonia = doc.ents
        assert len(chf._.modifiers) > 0
        assert len(pneumonia._.modifiers) == 0
Beispiel #12
0
    def test_terminate_stops_backward_modifier(self):
        context = ConTextComponent(nlp, rules=None)

        item = ConTextItem("is ruled out", "NEGATED_EXISTENCE", "BACKWARD")
        item2 = ConTextItem("but", "CONJ", "TERMINATE")
        context.add([item, item2])
        doc = nlp("Pt has chf but pneumonia is ruled out")
        doc.ents = (Span(doc, 2, 3, "PROBLEM"), Span(doc, 4, 5, "PROBLEM"))
        context(doc)
        chf, pneumonia = doc.ents
        assert len(chf._.modifiers) == 0
        assert len(pneumonia._.modifiers) > 0
 def test_global_allowed_types2(self):
     """Check that if the ConTextComponent does not have allowed_types defined
     and a ConTextItem does, the ConTextItem will not receive the component's
     value.
     """
     context = ConTextComponent(nlp, rules=None, allowed_types=None)
     item = ConTextItem("no evidence of",
                        "NEGATED_EXISTENCE",
                        "FORWARD",
                        allowed_types={"PROBLEM"})
     context.add([item])
     assert item.allowed_types == {"PROBLEM"}
    def test_regex_pattern(self):
        rules = [
            ConTextRule("no history of",
                        "NEGATED_EXISTENCE",
                        direction="FORWARD",
                        pattern="no (history|hx) of"),
        ]
        context = ConTextComponent(nlp, rules=None)
        context.add(rules)

        doc = nlp("No history of afib. No hx of MI.")
        context(doc)
        assert len(doc._.context_graph.modifiers) == 2
 def test_global_allowed_types2(self):
     """Check that if both the ConTextComponent and a ConTextRule have allowed_types defined,
     the ConTextRule will not receive the component's value.
     """
     context = ConTextComponent(nlp,
                                rules=None,
                                allowed_types={"TREATMENT"})
     rule = ConTextRule("no evidence of",
                        "NEGATED_EXISTENCE",
                        "FORWARD",
                        allowed_types={"PROBLEM"})
     context.add([rule])
     assert rule.allowed_types == {"PROBLEM"}
Beispiel #16
0
    def test_is_negated(self):
        doc = nlp("There is no evidence of pneumonia.")
        context = ConTextComponent(nlp, add_attrs=True, rules=None)
        rules = [
            ConTextRule("no evidence of",
                        "NEGATED_EXISTENCE",
                        direction="forward")
        ]
        context.add(rules)
        doc.ents = (Span(doc, 5, 6, "CONDITION"), )
        context(doc)

        assert doc.ents[0]._.is_negated is True
Beispiel #17
0
    def test_prune_false(self):
        rules = [
            ConTextRule("history of", "HISTORICAL", direction="FORWARD"),
            ConTextRule("no history of",
                        "NEGATED_EXISTENCE",
                        direction="FORWARD"),
        ]
        context = ConTextComponent(nlp, rules=None, prune=False)
        context.add(rules)

        doc = nlp("No history of afib.")
        context(doc)

        assert len(doc._.context_graph.modifiers) == 2
    def test_on_modifies_false(self):
        def on_modifies(target, modifier, span_between):
            return False

        context = ConTextComponent(nlp, rules=None)
        item = ConTextItem("no evidence of",
                           "NEGATED_EXISTENCE",
                           on_modifies=on_modifies)
        context.add([item])
        doc = nlp("There is no evidence of pneumonia or chf.")
        doc.ents = (doc[5:6], doc[7:8])
        context(doc)

        for ent in doc.ents:
            assert len(ent._.modifiers) == 0
Beispiel #19
0
    def test_custom_terminate_stops_forward_modifier(self):
        doc = nlp("negative for flu, positive for pneumonia.")
        context = ConTextComponent(nlp, rules=None)

        item = ConTextItem("negative for",
                           "NEGATED_EXISTENCE",
                           rule="FORWARD",
                           terminated_by={"POSITIVE_EXISTENCE"})
        item2 = ConTextItem("positive for",
                            "POSITIVE_EXISTENCE",
                            rule="FORWARD")
        context.add([item, item2])
        doc.ents = (Span(doc, 2, 3, "PROBLEM"), Span(doc, 6, 7))
        flu, pneumonia = doc.ents
        context(doc)
        assert len(flu._.modifiers) == 1
        assert len(pneumonia._.modifiers) == 1
    def test_pseudo_modifier(self):
        item_data = [
            ConTextItem("negative", "NEGATED_EXISTENCE"),
            ConTextItem("negative attitude",
                        "PSEUDO_NEGATED_EXISTENCE",
                        rule="PSEUDO"),
        ]
        context = ConTextComponent(nlp, rules=None)
        context.add(item_data)

        doc = nlp("She has a negative attitude about her treatment.")
        doc.ents = (doc[-2:-1], )
        context(doc)

        assert len(doc.ents[0]._.modifiers) == 0
        assert len(doc._.context_graph.modifiers) == 1
        assert doc._.context_graph.modifiers[
            0].category == "PSEUDO_NEGATED_EXISTENCE"
Beispiel #21
0
    def test_pseudo_modifier(self):
        rules = [
            ConTextRule("negative", "NEGATED_EXISTENCE"),
            ConTextRule("negative attitude",
                        "PSEUDO_NEGATED_EXISTENCE",
                        direction="PSEUDO"),
        ]
        context = ConTextComponent(nlp, rules=None)
        context.add(rules)

        doc = nlp("She has a negative attitude about her treatment.")
        doc.ents = (Span(doc, 7, 8, "CONDITION"), )
        context(doc)

        assert len(doc.ents[0]._.modifiers) == 0
        assert len(doc._.context_graph.modifiers) == 1
        assert doc._.context_graph.modifiers[
            0].category == "PSEUDO_NEGATED_EXISTENCE"
    def test_custom_attributes_value1(self):
        custom_attrs = {
            "NEGATED_EXISTENCE": {
                "is_negated": True
            },
        }
        try:
            Span.set_extension("is_negated", default=False)
        except:
            pass
        context = ConTextComponent(nlp, add_attrs=custom_attrs)
        context.add(
            [ConTextItem("no evidence of", "NEGATED_EXISTENCE", "FORWARD")])
        doc = nlp("There is no evidence of pneumonia.")
        doc.ents = (doc[-2:-1], )
        context(doc)

        assert doc.ents[0]._.is_negated is True
    def test_custom_attributes_value2(self):
        custom_attrs = {
            "FAMILY": {
                "is_family": True
            },
        }
        try:
            Span.set_extension("is_family", default=False)
        except:
            pass
        context = ConTextComponent(nlp, add_attrs=custom_attrs)
        context.add([
            ConTextRule("no evidence of", "DEFINITE_NEGATED_EXISTENCE",
                        "FORWARD")
        ])
        doc = nlp("There is no evidence of pneumonia.")
        doc.ents = (doc[-2:-1], )
        context(doc)

        assert doc.ents[0]._.is_family is False
    def test_simple_callback(self, capsys):
        context = ConTextComponent(nlp, rules=None)

        def simple_callback(matcher, doc, i, matches):
            match_id, start, end = matches[i]
            span = doc[start:end]
            print("Matched on span:", span)

        context.add([
            ConTextItem(
                "no evidence of",
                "NEGATED_EXISTENCE",
                "FORWARD",
                on_match=simple_callback,
            )
        ])

        doc = nlp("There is no evidence of pneumonia.")
        context(doc)
        captured = capsys.readouterr()
        assert captured.out == "Matched on span: no evidence of\n"
Beispiel #25
0
def load(model="default",
         enable=None,
         disable=None,
         load_rules=True,
         set_attributes=True):
    """Load a spaCy language object with cov_bsv pipeline components.
    By default, the base model will be 'en_core_web_sm' with the 'tagger'
    and 'parser' pipeline components, supplemented with the following custom
    components:
        - preprocessor (set to be nlp.tokenizer): Modifies the preprocessed text and returns
            a tokenized Doc. Preprocess rules are defined in cov_bsv.knowledge_base.preprocess_rules
        - concept_tagger: Assigns a semantic tag in a custom attribute "token._.concept_tag"
            to each Token in a Doc, which helps with concept extraction and normalization.
            Concept tag rules are defined in cov_bsv.knowledge_base.concept_tag_rules.
        - target_matcher: Extracts spans to doc.ents using extended rule-based matching.
            Target rules are defined in cov_bsv.knowledge_base.target_rules.
        - sectionizer: Identifies note section headers in the text and assigns section titles to
            entities and tokens contained in that section. Section patterns are defined in
            cov_bsv.knowledge_base.section_patterns.
        - context: Identifies semantic modifiers of entities and asserts attributes such as
            positive status, negation, and other experiencier. Context rules are defined in
            cov_bsv.knowledge_base.context_rules.
        - postprocessor: Modifies or removes the entity based on business logic. This handles
            special cases or complex logic using the results of earlier entities. Postprocess rules
            are defined in cov_bsv.knowledge_base.postprocess_rules.
        - document_classifier: Assigns a label of "POS", "UNK", or "NEG" to the doc._.cov_classification.
            A document will be classified as positive if it has at least one positive, non-excluded entity.

    Args:
        model: The name of the base spaCy model to load. If "default" will load the tagger and parser
            from "en_core_web_sm".
        enable (iterable or None): A list of component names to include in the pipeline.
        If None, will include all pipeline components listed above.
        disable (iterable or None): A list of component names to exclude.
            Cannot be set if `enable` is not None.
        load_rules (bool): Whether or not to include default rules for custom components. Default True.
        set_attributes (bool): Whether or not to register custom attributes to spaCy classes. If load_rules is True,
            this will automatically be set to True because the rules in the knowledge base rely on these custom attributes.
            The following extensions are registered (all defaults are False unless specified):
                Span._.is_future
                Span._.is_historical
                Span._.is_positive
                Span._.is_not_relevant
                Span._.is_negated
                Span._.is_uncertain
                Span._.is_screening
                Span._.is_other_experiencer
                Span._.concept_tag (default "")

    Returns:
        nlp: a spaCy Language object
    """
    if enable is not None and disable is not None:
        raise ValueError("Either `enable` or `disable` must be None.")
    if disable is not None:
        # If there's a single pipe name, nest it in a set
        if isinstance(disable, str):
            disable = {disable}
        else:
            disable = set(disable)
        enable = set(DEFAULT_PIPENAMES).difference(set(disable))
    elif enable is not None:
        if isinstance(enable, str):
            enable = {enable}
        else:
            enable = set(enable)
        disable = set(DEFAULT_PIPENAMES).difference(enable)
    else:
        enable = DEFAULT_PIPENAMES
        disable = set()

    if model == "default":
        model = "en_core_web_sm"
        disable.add("ner")

    if set_attributes:
        _set_attributes()

    import spacy
    nlp = spacy.load(model, disable=disable)

    if "preprocessor" in enable:
        from medspacy.preprocess import Preprocessor

        preprocessor = Preprocessor(nlp.tokenizer)
        if load_rules:
            preprocessor.add(preprocess_rules)
        nlp.tokenizer = preprocessor

    if "concept_tagger" in enable:
        from spacy.tokens import Token

        Token.set_extension("concept_tag", default="", force=True)
        from medspacy.ner import ConceptTagger

        concept_tagger = ConceptTagger(nlp)
        if load_rules:
            for (_, rules) in concept_tag_rules.items():
                concept_tagger.add(rules)
        nlp.add_pipe(concept_tagger)

    if "target_matcher" in enable:
        from medspacy.ner import TargetMatcher

        target_matcher = TargetMatcher(nlp)
        if load_rules:
            for (_, rules) in target_rules.items():
                target_matcher.add(rules)
        nlp.add_pipe(target_matcher)

    if "sectionizer" in enable:
        from medspacy.section_detection import Sectionizer
        sectionizer = Sectionizer(nlp, rules=None, add_attrs=SECTION_ATTRS)
        if load_rules:
            sectionizer.add(section_rules)
        nlp.add_pipe(sectionizer)

    if "context" in enable:
        from medspacy.context import ConTextComponent

        context = ConTextComponent(
            nlp,
            add_attrs=CONTEXT_MAPPING,
            rules=None,
            remove_overlapping_modifiers=True,
        )
        if load_rules:
            context.add(context_rules)
        nlp.add_pipe(context)

    if "postprocessor" in enable:
        from medspacy.postprocess import Postprocessor

        postprocessor = Postprocessor(debug=False)
        if load_rules:
            postprocessor.add(postprocess_rules)
        nlp.add_pipe(postprocessor)

    if "document_classifier" in enable:
        document_classifier = DocumentClassifier()
        nlp.add_pipe(document_classifier)

    return nlp