def load(): import medspacy nlp = medspacy.load(enable=["sentencizer", "tokenizer"]) # Add components from medspacy.target_matcher import TargetMatcher, TargetRule target_matcher = TargetMatcher(nlp) target_filepath = path.join(RESOURCES_DIR, "target_rules.json") target_rules = TargetRule.from_json(target_filepath) target_matcher.add(target_rules) nlp.add_pipe(target_matcher) from medspacy.context import ConTextComponent, ConTextRule context_filepath = path.join(RESOURCES_DIR, "context_rules.json") context = ConTextComponent(nlp, rules=None, add_attrs=CONTEXT_ATTRS) context_rules = ConTextRule.from_json(context_filepath) context.add(context_rules) nlp.add_pipe(context) from medspacy.section_detection import Sectionizer # TODO: Add radiology section rules sectionizer = Sectionizer(nlp) nlp.add_pipe(sectionizer) clf = DocumentClassifier(nlp) nlp.add_pipe(clf) return nlp
def test_item_modifier_termination(self): context = ConTextComponent(nlp, rules=None, terminations=None) item = ConTextItem("no evidence of", "NEGATED_EXISTENCE", "FORWARD", terminated_by={"POSITIVE_EXISTENCE", "UNCERTAIN"}) context.add([item]) assert item.terminated_by == {"POSITIVE_EXISTENCE", "UNCERTAIN"}
def test_null_modifier_termination(self): context = ConTextComponent(nlp, rules=None, terminations=None) item = ConTextItem("no evidence of", "NEGATED_EXISTENCE", "FORWARD", terminated_by=None) context.add([item]) assert item.terminated_by == set()
def test_rule_modifier_termination(self): context = ConTextComponent(nlp, rules=None, terminations=None) rule = ConTextRule("no evidence of", "NEGATED_EXISTENCE", "FORWARD", terminated_by={"POSITIVE_EXISTENCE", "UNCERTAIN"}) context.add([rule]) assert rule.terminated_by == {"POSITIVE_EXISTENCE", "UNCERTAIN"}
def test_is_historical(self): doc = nlp("History of pneumonia.") context = ConTextComponent(nlp, add_attrs=True, rules=None) rules = [ConTextRule("history of", "HISTORICAL", direction="forward")] context.add(rules) doc.ents = (doc[-2:-1], ) context(doc) assert doc.ents[0]._.is_historical is True
def test_is_historical(self): doc = nlp("History of pneumonia.") context = ConTextComponent(nlp, add_attrs=True, rules=None) item_data = [ConTextItem("history of", "HISTORICAL", rule="forward")] context.add(item_data) doc.ents = (doc[-2:-1], ) context(doc) assert doc.ents[0]._.is_historical is True
def test_is_family(self): doc = nlp("Family history of breast cancer.") context = ConTextComponent(nlp, add_attrs=True, rules=None) item_data = [ ConTextItem("family history of", "FAMILY", rule="forward") ] context.add(item_data) doc.ents = (doc[-3:-1], ) context(doc) assert doc.ents[0]._.is_family is True
def test_is_negated(self): doc = nlp("There is no evidence of pneumonia.") context = ConTextComponent(nlp, add_attrs=True, rules=None) item_data = [ ConTextItem("no evidence of", "NEGATED_EXISTENCE", rule="forward") ] context.add(item_data) doc.ents = (doc[-2:-1], ) context(doc) assert doc.ents[0]._.is_negated is True
def test_is_family(self): doc = nlp("Family history of breast cancer.") context = ConTextComponent(nlp, add_attrs=True, rules=None) rules = [ ConTextRule("family history of", "FAMILY", direction="forward") ] context.add(rules) doc.ents = (doc[-3:-1], ) context(doc) assert doc.ents[0]._.is_family is True
def test_global_allowed_types1(self): """Check that if the ConTextComponent has allowed_types defined and a ConTextRule does not, the ConTextRule will receive the component's value. """ context = ConTextComponent(nlp, rules=None, allowed_types={"PROBLEM"}) rule = ConTextRule("no evidence of", "NEGATED_EXISTENCE", "FORWARD", allowed_types=None) context.add([rule]) assert rule.allowed_types == {"PROBLEM"}
def test_terminate_stops_forward_modifier(self): context = ConTextComponent(nlp, rules=None) item = ConTextItem("no evidence of", "NEGATED_EXISTENCE", "FORWARD") item2 = ConTextItem("but", "TERMINATE", "TERMINATE") context.add([item, item2]) doc = nlp("No evidence of chf but she has pneumonia.") doc.ents = (Span(doc, 3, 4, "PROBLEM"), Span(doc, 7, 8, "PROBLEM")) context(doc) chf, pneumonia = doc.ents assert len(chf._.modifiers) > 0 assert len(pneumonia._.modifiers) == 0
def test_terminate_stops_backward_modifier(self): context = ConTextComponent(nlp, rules=None) item = ConTextItem("is ruled out", "NEGATED_EXISTENCE", "BACKWARD") item2 = ConTextItem("but", "CONJ", "TERMINATE") context.add([item, item2]) doc = nlp("Pt has chf but pneumonia is ruled out") doc.ents = (Span(doc, 2, 3, "PROBLEM"), Span(doc, 4, 5, "PROBLEM")) context(doc) chf, pneumonia = doc.ents assert len(chf._.modifiers) == 0 assert len(pneumonia._.modifiers) > 0
def test_global_allowed_types2(self): """Check that if the ConTextComponent does not have allowed_types defined and a ConTextItem does, the ConTextItem will not receive the component's value. """ context = ConTextComponent(nlp, rules=None, allowed_types=None) item = ConTextItem("no evidence of", "NEGATED_EXISTENCE", "FORWARD", allowed_types={"PROBLEM"}) context.add([item]) assert item.allowed_types == {"PROBLEM"}
def test_regex_pattern(self): rules = [ ConTextRule("no history of", "NEGATED_EXISTENCE", direction="FORWARD", pattern="no (history|hx) of"), ] context = ConTextComponent(nlp, rules=None) context.add(rules) doc = nlp("No history of afib. No hx of MI.") context(doc) assert len(doc._.context_graph.modifiers) == 2
def test_global_allowed_types2(self): """Check that if both the ConTextComponent and a ConTextRule have allowed_types defined, the ConTextRule will not receive the component's value. """ context = ConTextComponent(nlp, rules=None, allowed_types={"TREATMENT"}) rule = ConTextRule("no evidence of", "NEGATED_EXISTENCE", "FORWARD", allowed_types={"PROBLEM"}) context.add([rule]) assert rule.allowed_types == {"PROBLEM"}
def test_is_negated(self): doc = nlp("There is no evidence of pneumonia.") context = ConTextComponent(nlp, add_attrs=True, rules=None) rules = [ ConTextRule("no evidence of", "NEGATED_EXISTENCE", direction="forward") ] context.add(rules) doc.ents = (Span(doc, 5, 6, "CONDITION"), ) context(doc) assert doc.ents[0]._.is_negated is True
def test_prune_false(self): rules = [ ConTextRule("history of", "HISTORICAL", direction="FORWARD"), ConTextRule("no history of", "NEGATED_EXISTENCE", direction="FORWARD"), ] context = ConTextComponent(nlp, rules=None, prune=False) context.add(rules) doc = nlp("No history of afib.") context(doc) assert len(doc._.context_graph.modifiers) == 2
def test_on_modifies_false(self): def on_modifies(target, modifier, span_between): return False context = ConTextComponent(nlp, rules=None) item = ConTextItem("no evidence of", "NEGATED_EXISTENCE", on_modifies=on_modifies) context.add([item]) doc = nlp("There is no evidence of pneumonia or chf.") doc.ents = (doc[5:6], doc[7:8]) context(doc) for ent in doc.ents: assert len(ent._.modifiers) == 0
def test_custom_terminate_stops_forward_modifier(self): doc = nlp("negative for flu, positive for pneumonia.") context = ConTextComponent(nlp, rules=None) item = ConTextItem("negative for", "NEGATED_EXISTENCE", rule="FORWARD", terminated_by={"POSITIVE_EXISTENCE"}) item2 = ConTextItem("positive for", "POSITIVE_EXISTENCE", rule="FORWARD") context.add([item, item2]) doc.ents = (Span(doc, 2, 3, "PROBLEM"), Span(doc, 6, 7)) flu, pneumonia = doc.ents context(doc) assert len(flu._.modifiers) == 1 assert len(pneumonia._.modifiers) == 1
def test_pseudo_modifier(self): item_data = [ ConTextItem("negative", "NEGATED_EXISTENCE"), ConTextItem("negative attitude", "PSEUDO_NEGATED_EXISTENCE", rule="PSEUDO"), ] context = ConTextComponent(nlp, rules=None) context.add(item_data) doc = nlp("She has a negative attitude about her treatment.") doc.ents = (doc[-2:-1], ) context(doc) assert len(doc.ents[0]._.modifiers) == 0 assert len(doc._.context_graph.modifiers) == 1 assert doc._.context_graph.modifiers[ 0].category == "PSEUDO_NEGATED_EXISTENCE"
def test_pseudo_modifier(self): rules = [ ConTextRule("negative", "NEGATED_EXISTENCE"), ConTextRule("negative attitude", "PSEUDO_NEGATED_EXISTENCE", direction="PSEUDO"), ] context = ConTextComponent(nlp, rules=None) context.add(rules) doc = nlp("She has a negative attitude about her treatment.") doc.ents = (Span(doc, 7, 8, "CONDITION"), ) context(doc) assert len(doc.ents[0]._.modifiers) == 0 assert len(doc._.context_graph.modifiers) == 1 assert doc._.context_graph.modifiers[ 0].category == "PSEUDO_NEGATED_EXISTENCE"
def test_custom_attributes_value1(self): custom_attrs = { "NEGATED_EXISTENCE": { "is_negated": True }, } try: Span.set_extension("is_negated", default=False) except: pass context = ConTextComponent(nlp, add_attrs=custom_attrs) context.add( [ConTextItem("no evidence of", "NEGATED_EXISTENCE", "FORWARD")]) doc = nlp("There is no evidence of pneumonia.") doc.ents = (doc[-2:-1], ) context(doc) assert doc.ents[0]._.is_negated is True
def test_custom_attributes_value2(self): custom_attrs = { "FAMILY": { "is_family": True }, } try: Span.set_extension("is_family", default=False) except: pass context = ConTextComponent(nlp, add_attrs=custom_attrs) context.add([ ConTextRule("no evidence of", "DEFINITE_NEGATED_EXISTENCE", "FORWARD") ]) doc = nlp("There is no evidence of pneumonia.") doc.ents = (doc[-2:-1], ) context(doc) assert doc.ents[0]._.is_family is False
def test_simple_callback(self, capsys): context = ConTextComponent(nlp, rules=None) def simple_callback(matcher, doc, i, matches): match_id, start, end = matches[i] span = doc[start:end] print("Matched on span:", span) context.add([ ConTextItem( "no evidence of", "NEGATED_EXISTENCE", "FORWARD", on_match=simple_callback, ) ]) doc = nlp("There is no evidence of pneumonia.") context(doc) captured = capsys.readouterr() assert captured.out == "Matched on span: no evidence of\n"
def load(model="default", enable=None, disable=None, load_rules=True, set_attributes=True): """Load a spaCy language object with cov_bsv pipeline components. By default, the base model will be 'en_core_web_sm' with the 'tagger' and 'parser' pipeline components, supplemented with the following custom components: - preprocessor (set to be nlp.tokenizer): Modifies the preprocessed text and returns a tokenized Doc. Preprocess rules are defined in cov_bsv.knowledge_base.preprocess_rules - concept_tagger: Assigns a semantic tag in a custom attribute "token._.concept_tag" to each Token in a Doc, which helps with concept extraction and normalization. Concept tag rules are defined in cov_bsv.knowledge_base.concept_tag_rules. - target_matcher: Extracts spans to doc.ents using extended rule-based matching. Target rules are defined in cov_bsv.knowledge_base.target_rules. - sectionizer: Identifies note section headers in the text and assigns section titles to entities and tokens contained in that section. Section patterns are defined in cov_bsv.knowledge_base.section_patterns. - context: Identifies semantic modifiers of entities and asserts attributes such as positive status, negation, and other experiencier. Context rules are defined in cov_bsv.knowledge_base.context_rules. - postprocessor: Modifies or removes the entity based on business logic. This handles special cases or complex logic using the results of earlier entities. Postprocess rules are defined in cov_bsv.knowledge_base.postprocess_rules. - document_classifier: Assigns a label of "POS", "UNK", or "NEG" to the doc._.cov_classification. A document will be classified as positive if it has at least one positive, non-excluded entity. Args: model: The name of the base spaCy model to load. If "default" will load the tagger and parser from "en_core_web_sm". enable (iterable or None): A list of component names to include in the pipeline. If None, will include all pipeline components listed above. disable (iterable or None): A list of component names to exclude. Cannot be set if `enable` is not None. load_rules (bool): Whether or not to include default rules for custom components. Default True. set_attributes (bool): Whether or not to register custom attributes to spaCy classes. If load_rules is True, this will automatically be set to True because the rules in the knowledge base rely on these custom attributes. The following extensions are registered (all defaults are False unless specified): Span._.is_future Span._.is_historical Span._.is_positive Span._.is_not_relevant Span._.is_negated Span._.is_uncertain Span._.is_screening Span._.is_other_experiencer Span._.concept_tag (default "") Returns: nlp: a spaCy Language object """ if enable is not None and disable is not None: raise ValueError("Either `enable` or `disable` must be None.") if disable is not None: # If there's a single pipe name, nest it in a set if isinstance(disable, str): disable = {disable} else: disable = set(disable) enable = set(DEFAULT_PIPENAMES).difference(set(disable)) elif enable is not None: if isinstance(enable, str): enable = {enable} else: enable = set(enable) disable = set(DEFAULT_PIPENAMES).difference(enable) else: enable = DEFAULT_PIPENAMES disable = set() if model == "default": model = "en_core_web_sm" disable.add("ner") if set_attributes: _set_attributes() import spacy nlp = spacy.load(model, disable=disable) if "preprocessor" in enable: from medspacy.preprocess import Preprocessor preprocessor = Preprocessor(nlp.tokenizer) if load_rules: preprocessor.add(preprocess_rules) nlp.tokenizer = preprocessor if "concept_tagger" in enable: from spacy.tokens import Token Token.set_extension("concept_tag", default="", force=True) from medspacy.ner import ConceptTagger concept_tagger = ConceptTagger(nlp) if load_rules: for (_, rules) in concept_tag_rules.items(): concept_tagger.add(rules) nlp.add_pipe(concept_tagger) if "target_matcher" in enable: from medspacy.ner import TargetMatcher target_matcher = TargetMatcher(nlp) if load_rules: for (_, rules) in target_rules.items(): target_matcher.add(rules) nlp.add_pipe(target_matcher) if "sectionizer" in enable: from medspacy.section_detection import Sectionizer sectionizer = Sectionizer(nlp, rules=None, add_attrs=SECTION_ATTRS) if load_rules: sectionizer.add(section_rules) nlp.add_pipe(sectionizer) if "context" in enable: from medspacy.context import ConTextComponent context = ConTextComponent( nlp, add_attrs=CONTEXT_MAPPING, rules=None, remove_overlapping_modifiers=True, ) if load_rules: context.add(context_rules) nlp.add_pipe(context) if "postprocessor" in enable: from medspacy.postprocess import Postprocessor postprocessor = Postprocessor(debug=False) if load_rules: postprocessor.add(postprocess_rules) nlp.add_pipe(postprocessor) if "document_classifier" in enable: document_classifier = DocumentClassifier() nlp.add_pipe(document_classifier) return nlp