def test_one_sided(self): text = "a&p:\n # dm2 " tokens = tokenizer_lib.tokenize(text) # Remove suffix only. labeled_char_span = ap_parsing_lib.LabeledCharSpan( start_char=8, end_char=12, span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE) # "dm2 " self.assertEqual( ap_parsing_utils.normalize_labeled_char_span( labeled_char_span, tokens), ap_parsing_lib.LabeledCharSpan( start_char=8, end_char=11, span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE) ) # "dm2" # Remove prefix only. labeled_char_span = ap_parsing_lib.LabeledCharSpan( start_char=3, end_char=11, span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE ) # ":\n # dm2" self.assertEqual( ap_parsing_utils.normalize_labeled_char_span( labeled_char_span, tokens), ap_parsing_lib.LabeledCharSpan( start_char=8, end_char=11, span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE) ) # "dm2"
def test_usage(self): # 0 12 34 56 78 90 text = "some longer tokens in this test" # 0123456789012345678901234567890 # 0 1 2 3 tokens = tokenizer_lib.tokenize(text) labeled_char_spans = [ ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE, start_char=5, end_char=18), # "longer tokens" - already normalized. ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE, start_char=14, end_char=25), # "kens in thi" -> "tokens in this" ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE, start_char=18, end_char=19), # Invalid - only space. ] expected = [ ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE, start_char=5, end_char=18), # "longer tokens" ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE, start_char=12, end_char=26) # "tokens in this" ] self.assertEqual( ap_parsing_utils.normalize_labeled_char_spans_iterable( labeled_char_spans, tokens), expected)
def test_labeled_char_spans_to_token_spans(self): # Char space: # 0 1 2 3 # 01234567890123456 78901234 56789012345 text = "# DM2: on insulin\n # COPD\n- nebs prn" # Token:012 3456 78 90123 4567 89 # 0 1 tokens = tokenizer_lib.tokenize(text) labeled_char_spans = [ ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE, start_char=2, end_char=17), # "DM2: on insulin" ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE, start_char=21, end_char=25), # "COPD" ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM, start_char=28, end_char=32), # "nebs" ] labeled_token_spans = [ ap_parsing_lib.LabeledTokenSpan( span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE, start_token=2, end_token=9), # "DM2: on insulin" ap_parsing_lib.LabeledTokenSpan( span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE, start_token=13, end_token=14), # "COPD" ap_parsing_lib.LabeledTokenSpan( span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM, start_token=17, end_token=18), # "nebs" ] labeled_char_span_to_labeled_token_span = functools.partial( ap_parsing_utils.labeled_char_span_to_labeled_token_span, tokens=tokens) self.assertEqual(labeled_token_spans, [ labeled_char_span_to_labeled_token_span(labeled_char_span) for labeled_char_span in labeled_char_spans ]) labeled_token_span_to_labeled_char_span = functools.partial( ap_parsing_utils.labeled_token_span_to_labeled_char_span, tokens=tokens) self.assertEqual(labeled_char_spans, [ labeled_token_span_to_labeled_char_span(labeled_token_span) for labeled_token_span in labeled_token_spans ])
def build(cls, text, labeled_char_spans): """Builds structured AP inplace from text and labels. Bundles together labels into clusters based on problem titles. Args: text: str, text of A&P section labeled_char_spans: LabeledCharSpans, which are converted to cluster fragments. Returns: An instance of StructuredAP. """ tokens = tokenizer_lib.tokenize(text) labeled_char_spans = ap_parsing_utils.normalize_labeled_char_spans_iterable( labeled_char_spans, tokens) labeled_char_spans.sort(key=lambda x: x.start_char) structured_ap = cls(problem_clusters=list(), prefix_text="") structured_ap._parse_problem_clusters(labeled_char_spans, text) # pylint: disable=protected-access prefix_text_span = ap_parsing_utils.normalize_labeled_char_span( ap_parsing_lib.LabeledCharSpan( start_char=0, end_char=labeled_char_spans[0].start_char, span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE), tokens) structured_ap.prefix_text = ( text[prefix_text_span.start_char:prefix_text_span.end_char] if prefix_text_span else "") return structured_ap
def test_get_converted_labels(self): ap_text = "\n".join([ "50 yo m with hx of copd, dm2", "#. COPD ex: started on abx in ED.", " - continue abx." ]) tokens = tokenizer_lib.tokenize(ap_text) labels = [ ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE, start_char=32, end_char=39), # span_text="COPD ex" ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_DESCRIPTION, start_char=41, end_char=63), # span_text="started on abx in ED.\n" ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM, start_char=67, end_char=80, action_item_type=ap_parsing_lib.ActionItemType.MEDICATIONS ), # span_text="continue abx." ] converted_labels = data_lib.generate_model_labels(labels, tokens) expected_fragment_labels = np.zeros(45) expected_fragment_labels[21] = 1 # B-PT COPD ex expected_fragment_labels[22:24] = 2 # I-PT COPD ex expected_fragment_labels[26] = 3 # B-PD started on abx in ED expected_fragment_labels[27:35] = 4 # I-PD started on abx in ED expected_fragment_labels[41] = 5 # B-AI continue abx expected_fragment_labels[42:44] = 6 # I-AI continue abx expected_ai_labels = np.zeros(45) expected_ai_labels[41:44] = 1 # continue abx - medications self.assertAllEqual(converted_labels["fragment_type"], expected_fragment_labels) self.assertAllEqual(converted_labels["action_item_type"], expected_ai_labels)
def test_metadata(self): text = "a&p:\n - nebs " tokens = tokenizer_lib.tokenize(text) labeled_char_span = ap_parsing_lib.LabeledCharSpan( start_char=3, end_char=11, span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM, action_item_type=ap_parsing_lib.ActionItemType. MEDICATIONS # ":\n - neb" ) self.assertEqual( ap_parsing_utils.normalize_labeled_char_span( labeled_char_span, tokens), ap_parsing_lib.LabeledCharSpan( start_char=8, end_char=12, span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM, action_item_type=ap_parsing_lib.ActionItemType.MEDICATIONS) ) # "nebs"
def convert_ratings(csv_labeled_char_span): """Converts labeled spans from CSV to LabeledCharSpan object.""" labeled_char_span = ap_parsing_lib.LabeledCharSpan( start_char=csv_labeled_char_span.char_start, end_char=csv_labeled_char_span.char_end, span_type=ap_parsing_lib.LabeledSpanType[ csv_labeled_char_span.span_type]) if csv_labeled_char_span.action_item_type: labeled_char_span.action_item_type = ap_parsing_lib.ActionItemType[ csv_labeled_char_span.action_item_type] return (str(csv_labeled_char_span.note_id), labeled_char_span)
def test_compile(self): structured_ap = aug_lib.StructuredAP( prefix_text="50 yo m with hx of dm2, copd", problem_clusters=[ aug_lib.ProblemCluster(fragments=[ aug_lib.ProblemClusterFragment( labeled_char_span=ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType. PROBLEM_TITLE, start_char=29, end_char=32), text="dm2", prefix_delim="\n*. ", suffix_delim=": "), aug_lib.ProblemClusterFragment( labeled_char_span=ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType. PROBLEM_DESCRIPTION, start_char=34, end_char=44), text="on insulin", prefix_delim="", suffix_delim=""), aug_lib.ProblemClusterFragment( labeled_char_span=ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType. ACTION_ITEM, start_char=47, end_char=51), text="RISS", prefix_delim="\n- ", suffix_delim=""), ]) ]) expected = "50 yo m with hx of dm2, copd\n*. dm2: on insulin\n- RISS" result, _ = structured_ap.compile() self.assertEqual(result, expected)
def test_build(self): ap = "\n".join( ["50 yo m with hx of dm2, copd", "dm2: on insulin", "- RISS"]) labeled_char_spans = [ ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE, start_char=29, end_char=32), ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_DESCRIPTION, start_char=34, end_char=44), ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM, start_char=47, end_char=51), ] expected = aug_lib.StructuredAP( prefix_text="50 yo m with hx of dm2, copd", problem_clusters=[ aug_lib.ProblemCluster(fragments=[ aug_lib.ProblemClusterFragment( labeled_char_span=ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType. PROBLEM_TITLE, start_char=29, end_char=32), text="dm2", prefix_delim=aug_lib._DefaultDelims. PROBLEM_TITLE_PREFIX, suffix_delim=aug_lib._DefaultDelims. PROBLEM_TITLE_SUFFIX), aug_lib.ProblemClusterFragment( labeled_char_span=ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType. PROBLEM_DESCRIPTION, start_char=34, end_char=44), text="on insulin", prefix_delim=aug_lib._DefaultDelims. PROBLEM_DESCRIPTION_PREFIX, suffix_delim=""), aug_lib.ProblemClusterFragment( labeled_char_span=ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType. ACTION_ITEM, start_char=47, end_char=51), text="RISS", prefix_delim=aug_lib._DefaultDelims.ACTION_ITEM_PREFIX, suffix_delim=""), ]) ]) structured_ap = aug_lib.StructuredAP.build(ap, labeled_char_spans) self.assertEqual(structured_ap, expected)
def problem_cluster_to_labeled_char_spans(problem_cluster): """Convert regex annotator output to labeled char spans. Args: problem_cluster: ProblemCluster object containing character spans. Returns: A list of LabeledCharSpan corresponding to the spans in the cluster. """ labeled_char_spans = [] # Problem title labeled_char_spans.append( ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE, start_char=problem_cluster.problem_title[0], end_char=problem_cluster.problem_title[1])) # Problem description for pd_start, pd_end in problem_cluster.problem_description: if pd_start > 0: labeled_char_spans.append( ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType. PROBLEM_DESCRIPTION, start_char=pd_start, end_char=pd_end)) # Action items for ai_start, ai_end in problem_cluster.action_items: labeled_char_spans.append( ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM, start_char=ai_start, end_char=ai_end)) return labeled_char_spans
def test_compile_with_labels(self): structured_ap = aug_lib.StructuredAP( prefix_text="50 yo m with hx of dm2, copd", problem_clusters=[ # spans are kept from *original* text. aug_lib.ProblemCluster(fragments=[ aug_lib.ProblemClusterFragment( labeled_char_span=ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType. PROBLEM_TITLE, start_char=29, end_char=32), text="dm2", prefix_delim="\n*. ", suffix_delim=": "), aug_lib.ProblemClusterFragment( labeled_char_span=ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType. PROBLEM_DESCRIPTION, start_char=34, end_char=44), text="on insulin", prefix_delim="", suffix_delim=""), aug_lib.ProblemClusterFragment( labeled_char_span=ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType. ACTION_ITEM, start_char=47, end_char=51), text="RISS", prefix_delim="\n- ", suffix_delim=""), ]) ]) expected = ( "50 yo m with hx of dm2, copd\n*. dm2: on insulin\n- RISS", [ ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE, start_char=32, end_char=35), # span_text="dm2" ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType. PROBLEM_DESCRIPTION, start_char=37, end_char=47), # span_text="on insulin" ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM, start_char=50, end_char=54), # span_text="RISS" ]) result_ap_text, result_labeled_char_spans = structured_ap.compile() self.assertEqual((result_ap_text, result_labeled_char_spans), expected)
def labeled_token_span_to_labeled_char_span(labeled_token_span, tokens): """Converts labeled spans from token to character level. Args: labeled_token_span: A token level span. tokens: Document tokens. Returns: LabeledCharSpan: Character level labeled span. """ start_char, end_char = token_span_to_char_span( tokens, (labeled_token_span.start_token, labeled_token_span.end_token)) labeled_char_span = ap_parsing_lib.LabeledCharSpan( span_type=labeled_token_span.span_type, action_item_type=labeled_token_span.action_item_type, start_char=start_char, end_char=end_char) return labeled_char_span
def test_midword(self): text = "a&p:\n # COPD: on nebs " tokens = tokenizer_lib.tokenize(text) # Extend word boundry right. labeled_char_span = ap_parsing_lib.LabeledCharSpan( start_char=6, end_char=11, span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE) # "# COP" self.assertEqual( ap_parsing_utils.normalize_labeled_char_span( labeled_char_span, tokens), ap_parsing_lib.LabeledCharSpan( start_char=8, end_char=12, span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE) ) # "COPD" # Extend word boundry left. labeled_char_span = ap_parsing_lib.LabeledCharSpan( start_char=9, end_char=14, span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE) # "OPD: " self.assertEqual( ap_parsing_utils.normalize_labeled_char_span( labeled_char_span, tokens), ap_parsing_lib.LabeledCharSpan( start_char=8, end_char=12, span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE) ) # "COPD" # Extend word boundry both directions. labeled_char_span = ap_parsing_lib.LabeledCharSpan( start_char=9, end_char=11, span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE) # "OP" self.assertEqual( ap_parsing_utils.normalize_labeled_char_span( labeled_char_span, tokens), ap_parsing_lib.LabeledCharSpan( start_char=8, end_char=12, span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE) ) # "COPD"
def test_process_rating_labels(self): rating_labels = [ ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE, start_char=0, end_char=50), # before ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE, start_char=45, end_char=65), # partially contained ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE, start_char=50, end_char=150), # exactly matches section ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE, start_char=100, end_char=105), # contained ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE, start_char=150, end_char=155), # after ] expected = [ ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE, start_char=0, end_char=100), ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE, start_char=50, end_char=55) ] self.assertEqual( data_lib.process_rating_labels( rating_labels, note_section_lib.Section(50, 150, [])), expected)
def test_usage(self): augmentation_config = aug_lib.AugmentationConfig( augmentation_sequences=[ aug_lib.AugmentationSequence( name="test", augmentation_sequence=[ aug_lib.ChangeDelimAugmentation(fragment_types=[ ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE ], delims=["\n"]) ]) ], augmentation_number_deterministic=1) ap_data = [ ( "0|10", data_lib.APData( note_id=0, subject_id=0, ap_text="a&p:\n # dm2:\n-RISS", labeled_char_spans=[ ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType. PROBLEM_TITLE, start_char=8, end_char=11), # span_text="dm2", ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType. ACTION_ITEM, action_item_type=ap_parsing_lib.ActionItemType. MEDICATIONS, start_char=14, end_char=18) # span_text="RISS", ])), ] expected = [ *ap_data, ( "0|10", data_lib.APData( note_id=0, subject_id=0, ap_text="a&p\ndm2:\n- RISS", tokens=tokenizer_lib.tokenize("a&p\ndm2:\n- RISS"), labeled_char_spans=[ ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType. PROBLEM_TITLE, start_char=4, end_char=7), # span_text="dm2", ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType. ACTION_ITEM, action_item_type=ap_parsing_lib.ActionItemType. MEDICATIONS, start_char=11, end_char=15) # span_text="RISS", ], augmentation_name="test")), ] with test_pipeline.TestPipeline() as p: results = (p | beam.Create(ap_data) | beam.ParDo(data_lib.ApplyAugmentations(), augmentation_config)) util.assert_that(results, util.equal_to(expected))
def setUp(self): super().setUp() self.problem_clusters = [ aug_lib.ProblemCluster(fragments=[ aug_lib.ProblemClusterFragment( labeled_char_span=ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE, start_char=29, end_char=32), text="dm2", prefix_delim="", suffix_delim=""), aug_lib.ProblemClusterFragment( labeled_char_span=ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType. PROBLEM_DESCRIPTION, start_char=34, end_char=44), text="on insulin", prefix_delim="", suffix_delim=""), aug_lib.ProblemClusterFragment( labeled_char_span=ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM, action_item_type=ap_parsing_lib.ActionItemType. MEDICATIONS, start_char=47, end_char=51), text="RISS", prefix_delim="", suffix_delim="") ]), aug_lib.ProblemCluster(fragments=[ aug_lib.ProblemClusterFragment( labeled_char_span=ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE, start_char=52, end_char=58), text="anemia", prefix_delim="", suffix_delim=""), aug_lib.ProblemClusterFragment( labeled_char_span=ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM, action_item_type=ap_parsing_lib.ActionItemType. OBSERVATIONS_LABS, start_char=59, end_char=64), text="trend", prefix_delim="", suffix_delim="") ]), aug_lib.ProblemCluster(fragments=[ aug_lib.ProblemClusterFragment( labeled_char_span=ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE, start_char=65, end_char=69), text="COPD", prefix_delim="", suffix_delim=""), aug_lib.ProblemClusterFragment( labeled_char_span=ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM, action_item_type=ap_parsing_lib.ActionItemType. MEDICATIONS, start_char=70, end_char=74), text="nebs", prefix_delim="", suffix_delim="") ]), aug_lib.ProblemCluster(fragments=[ aug_lib.ProblemClusterFragment( labeled_char_span=ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE, start_char=75, end_char=81), text="sepsis", prefix_delim="", suffix_delim=""), aug_lib.ProblemClusterFragment( labeled_char_span=ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType. PROBLEM_DESCRIPTION, start_char=82, end_char=93), text="dd pna, uti", prefix_delim="", suffix_delim=""), aug_lib.ProblemClusterFragment( labeled_char_span=ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType. PROBLEM_DESCRIPTION, start_char=94, end_char=117), text="yesterday without fever", prefix_delim="", suffix_delim=""), aug_lib.ProblemClusterFragment( labeled_char_span=ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM, action_item_type=ap_parsing_lib.ActionItemType. MEDICATIONS, start_char=118, end_char=127), text="cont. abx", prefix_delim="", suffix_delim=""), aug_lib.ProblemClusterFragment( labeled_char_span=ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM, action_item_type=ap_parsing_lib.ActionItemType. OBSERVATIONS_LABS, start_char=128, end_char=131), text="cis", prefix_delim="", suffix_delim=""), aug_lib.ProblemClusterFragment( labeled_char_span=ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM, action_item_type=ap_parsing_lib.ActionItemType. CONSULTS, start_char=132, end_char=142), text="id consult", prefix_delim="", suffix_delim="") ]) ] self.ap = aug_lib.StructuredAP(problem_clusters=self.problem_clusters, prefix_text="")
def test_multiratings(self): section_markers = { "hpi": ["history of present illness"], "a&p": ["assessment and plan"], } ap_text = "a&p:\n # dm2:\n-RISS" notes_with_ratings = [("0", { "notes": [ data_lib.Note(note_id=0, text="blablabla\n" + ap_text, subject_id=0, category="PHYSICIAN") ], "ratings": [[ ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE, start_char=19, end_char=22), ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM, start_char=24, end_char=28) ], [ ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE, start_char=18, end_char=22), ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM, start_char=25, end_char=28) ]], "note_partition": ["test", "test"] })] expected = [( "0|10", data_lib.APData( partition=data_lib.Partition.TEST, note_id="0", subject_id="0", ap_text=ap_text, char_offset=10, tokens=tokenizer_lib.tokenize(ap_text), labeled_char_spans=[ ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE, start_char=8, end_char=11), ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM, start_char=14, end_char=18) ]))] * 2 with test_pipeline.TestPipeline() as p: results = (p | beam.Create(notes_with_ratings) | beam.ParDo( data_lib.ProcessAPData( filter_inorganic_threshold=0), section_markers)) util.assert_that(results, util.equal_to(expected))
def test_usage(self): section_markers = { "hpi": ["history of present illness"], "a&p": ["assessment and plan"], } ap_texts = ["a&p:\n # dm2:\n-RISS", "a&p:\n # COPD:\n-nebs"] notes_with_ratings = [("0", { "notes": [ data_lib.Note(note_id=0, text="blablabla\n" + ap_texts[0], subject_id=0, category="PHYSICIAN") ], "ratings": [[ ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE, start_char=19, end_char=22), ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM, action_item_type=ap_parsing_lib.ActionItemType.MEDICATIONS, start_char=24, end_char=28) ]], "note_partition": ["val"] })] + [("1", { "notes": [ data_lib.Note(note_id=1, text="blablabla\n" + ap_texts[1], subject_id=1, category="PHYSICIAN") ], "ratings": [], "note_partition": [] })] expected = [ ("0|10", data_lib.APData( partition=data_lib.Partition.VAL, note_id="0", subject_id="0", ap_text=ap_texts[0], char_offset=10, tokens=tokenizer_lib.tokenize(ap_texts[0]), labeled_char_spans=[ ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType. PROBLEM_TITLE, start_char=8, end_char=11), ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM, action_item_type=ap_parsing_lib.ActionItemType. MEDICATIONS, start_char=14, end_char=18) ])), ("1|10", data_lib.APData( partition=data_lib.Partition.NONRATED, note_id="1", subject_id="1", ap_text=ap_texts[1], char_offset=10, tokens=tokenizer_lib.tokenize(ap_texts[1]), labeled_char_spans=[ ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType. PROBLEM_TITLE, start_char=8, end_char=12), ap_parsing_lib.LabeledCharSpan( span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM, start_char=15, end_char=19) ])) ] with test_pipeline.TestPipeline() as p: results = (p | beam.Create(notes_with_ratings) | beam.ParDo( data_lib.ProcessAPData( filter_inorganic_threshold=0), section_markers)) util.assert_that(results, util.equal_to(expected))