def determine_iud_expulsion(document: Document): if document.has_patterns(IUD): for section in document.select_sentences_with_patterns(IUD): history = bool(section.has_patterns(PREVIOUS)) if section.has_patterns(INCORRECT, PLACEMENT, has_all=True) or \ section.has_patterns(MALPOSITION) or \ section.has_patterns(DISPLACEMENT): if not section.has_patterns(IN_CERVIX): yield ExpulsionStatus.MALPOSITION, history, section.text if section.has_patterns(LOWER_UTERINE_SEGMENT): yield ExpulsionStatus.LOWER_UTERINE_SEGMENT, history, section.text if section.has_patterns(COMPLETE): yield ExpulsionStatus.EXPULSION, history, section.text if section.has_patterns(PARTIAL_EXP): yield ExpulsionStatus.PARTIAL, history, section.text if section.has_patterns(MISSING, STRINGS, has_all=True): yield ExpulsionStatus.MISSING_STRING, history, section.text if section.has_patterns(PROPER_LOCATION): yield ExpulsionStatus.PROPER_PLACEMENT, history, section.text if section.has_patterns(IN_UTERUS): yield ExpulsionStatus.IN_UTERUS, history, section.text sects = document.select_all_sentences_with_patterns(STRING_HANGING) if sects: yield ExpulsionStatus.HANGING_STRING, None, sects.text else: yield ExpulsionStatus.SKIP, None, document.text
def determine_iud_brand(document: Document): if document.has_patterns(EXCLUDE): return [(BrandStatus.SKIP, False, None)] if document.has_patterns(*IUD, ignore_negation=True): brands = [] for section in document.select_sentences_with_patterns( *IUD, neighboring_sentences=1): # scheduled = bool(section.has_patterns(SCHEDULED)) if section.has_patterns(PARAGARD, COPPER): brands.append((BrandStatus.PARAGARD, section.has_patterns(USING), section.text)) if section.has_patterns(MIRENA): brands.append((BrandStatus.MIRENA, section.has_patterns(USING), section.text)) if section.has_patterns(LILETTA): brands.append((BrandStatus.LILETTA, section.has_patterns(USING), section.text)) if section.has_patterns(KYLEENA): brands.append((BrandStatus.KYLEENA, section.has_patterns(USING), section.text)) if section.has_patterns(SKYLA): brands.append((BrandStatus.SKYLA, section.has_patterns(USING), section.text)) if not brands: # only look for LNG if no brand name if document.has_patterns(LNG): brands.append((BrandStatus.LNG, False, document.text)) else: brands.append((BrandStatus.NONE, False, None)) return brands else: return [(BrandStatus.SKIP, False, None)]
def determine_iud_removal(document: Document): if document.has_patterns(*ALL, ignore_negation=True): section_text = [] for section in document.select_sentences_with_patterns(IUD): if section.has_pattern(REMOVE_BY): continue # these definitely have correct language if section.has_patterns(*ALL): # require either REMOVE/PLAN since this could have other refs if section.has_patterns(DEF_REMOVE): yield RemoveStatus.DEF_REMOVE, section.text if section.has_patterns(DEF_REPLACE): yield RemoveStatus.DEF_REPLACE, section.text if section.has_patterns(PROB_REMOVE): yield RemoveStatus.REMOVE, section.text if section.has_patterns(TOOL): yield RemoveStatus.TOOL_REMOVE, section.text if section.has_patterns(REMOVE): yield RemoveStatus.REMOVE, section.text if section.has_patterns(PLAN): yield RemoveStatus.PLAN, section.text else: section_text.append(section.text) if section_text: yield RemoveStatus.NONE, ' '.join(section_text) else: yield RemoveStatus.SKIP, document.text else: yield RemoveStatus.SKIP, document.text
def determine_iud_expulsion_rad(document: Document): sections = document.split(SECTIONS) start_section = sections.get_sections('HST', 'SAS', 'HISTORY', 'CLINICAL INFORMATION', 'CLINICAL HISTORY AND QUESTION') impression = sections.get_sections('IMPRESSION', 'IMPRESSIONS', 'IMP') other = sections.get_sections('FINDINGS', 'TRANSVAGINAL', 'FINDING') found = False if impression: if impression.has_patterns(PARTIAL_EXP): found = True yield ExpulsionStatus.PARTIAL, impression.text if impression.has_patterns(NOT_SEEN_IUD, IUD_NOT_SEEN): found = True yield ExpulsionStatus.LOST, impression.text if impression.has_patterns(MALPOSITION, MALPOSITION_IUD): found = True yield ExpulsionStatus.MALPOSITION, impression.text if impression.has_patterns(PROPER_LOCATION): found = True yield ExpulsionStatus.PROPER_PLACEMENT, impression.text if impression.has_patterns(IN_UTERUS): found = False # don't count this one yield ExpulsionStatus.IN_UTERUS, impression.text if impression.has_patterns(LOWER_UTERINE_SEGMENT): found = False yield ExpulsionStatus.LOWER_UTERINE_SEGMENT, impression.text if found: return elif start_section.has_patterns(IUD, IUD_PRESENT, has_all=True) or \ start_section.has_patterns(STRING, VISIBLE, has_all=True): if other.has_patterns(PARTIAL_EXP): yield ExpulsionStatus.PARTIAL, other.text if other.has_patterns(MALPOSITION, MALPOSITION_IUD): yield ExpulsionStatus.MALPOSITION, other.text if other.has_patterns(PROPER_LOCATION): yield ExpulsionStatus.PROPER_PLACEMENT, other.text if other.has_patterns(IN_UTERUS): yield ExpulsionStatus.IN_UTERUS, other.text if other.has_patterns(LOWER_UTERINE_SEGMENT): yield ExpulsionStatus.LOWER_UTERINE_SEGMENT, other.text else: sentences = list(document.select_sentences_with_patterns(IUD)) if sentences: for sentence in sentences: if sentence.has_patterns(PARTIAL_EXP): yield ExpulsionStatus.PARTIAL, sentence.text if sentence.has_patterns(MALPOSITION, MALPOSITION_IUD): yield ExpulsionStatus.MALPOSITION, sentence.text if sentence.has_patterns(PROPER_LOCATION): yield ExpulsionStatus.PROPER_PLACEMENT, sentence.text if sentence.has_patterns(IN_UTERUS): yield ExpulsionStatus.IN_UTERUS, sentence.text if sentence.has_patterns(LOWER_UTERINE_SEGMENT): yield ExpulsionStatus.LOWER_UTERINE_SEGMENT, sentence.text else: yield ExpulsionStatus.SKIP, None
def determine_parity(document: Document): res = document.get_patterns(GRAVIDA_0, PARA_0, NULLIPAROUS, CHILD_0, names=[ParitySource.GRAVIDA, ParitySource.PARITY, ParitySource.PARITY, ParitySource.CHILDREN]) if res: text, src = res if text: return ParityStatus.P0, text, src # patterns for grav and para; confirm grav >= para res = document.get_patterns(G_PARA_N, index=(0, 1, 2)) if res: text, capt_g, capt_p = res try: g = int(capt_g) p = int(capt_p) except ValueError: raise ValueError(f'Values of g/p do not appear to be numeric: {capt_p}, {capt_g} in {text}') except TypeError: raise TypeError(f'Values of g/p are None (or similar): {capt_p}, {capt_g} in {text}') else: if len(capt_p) == 4: capt_p = capt_p[0] p = int(capt_p) if g >= p: status = extract_status(capt_p) if status: return status, text, ParitySource.PARITY elif g == p - 1: # common error: counting twins as extra parity logging.warning(f'Including Gravida {g}, Parity {p} ' f'(assuming twins counted as extra parity): {document.name}') status = extract_status(capt_g) # use gravida not parity if status: return status, text, ParitySource.GRAVIDA else: logging.info(f'Gravida {g} < Parity {p}: {document.name}') res = document.get_patterns(PARA_NNNN, PARA_N, CHILD_N, CHILD_NUM, index=(0, 1), names=[ParitySource.PARITY, ParitySource.PARITY, ParitySource.CHILDREN, ParitySource.CHILDREN]) if res and res[0]: (text, captured), src = res if captured: status = extract_status(captured) if status: return status, text, src else: # e.g., "para 13 %", "multipara @ 32+2/6weeks logging.info(f'Unrecognized parity value for {document.name}: {status} in "{text}" from {src}') text = document.get_patterns(MULTIPAROUS) if text: return ParityStatus.MULTIPAROUS, text, ParitySource.PARITY return ParityStatus.SKIP, None, ParitySource.NONE
def determine_difficult_insertion(document: Document): """ :param document: :return: """ if document.has_patterns(NOT_IUD_INSERTION): yield DiffInsStatus.SKIP, None elif document.has_patterns(IUD, ignore_negation=True): sent = document.get_pattern(SUCCESSFUL_INSERTION) if sent: yield DiffInsStatus.SUCCESSFUL, sent sent = document.get_pattern(CANNOT_PLACE) if sent: yield DiffInsStatus.UNSUCCESSFUL, sent if document.has_patterns(INSERTION, ignore_negation=True): for section in document.select_sentences_with_patterns(INSERTION, neighboring_sentences=1): if section.has_patterns(UNSUCCESSFUL_INSERTION, has_all=True): yield DiffInsStatus.UNSUCCESSFUL, section.text if section.has_patterns(EASY_INSERTION): yield DiffInsStatus.NOT_DIFFICULT, section.text continue if section.has_patterns(PROVIDER): yield DiffInsStatus.PROVIDER_STATEMENT, section.text if section.has_patterns(US, US_USED, has_all=True): yield DiffInsStatus.ULTRASOUND_GUIDANCE, section.text if section.has_patterns(PARACERV): yield DiffInsStatus.PARACERVICAL_BLOCK, section.text if section.has_patterns(MISOPROSTOL): yield DiffInsStatus.MISOPROSTOL, section.text if section.has_patterns(CERV_DIL): yield DiffInsStatus.CERVICAL_DILATION, section.text
def determine_iud_insertion(document: Document): # discusses iud if document.has_patterns(IUD, INSERTION, has_all=True): section = document.select_all_sentences_with_patterns(IUD, INSERTION, STRINGS, negation=[HISTORICAL, APPOINTMENT, DATE, NEGATED], neighboring_sentences=1) if section: if section.has_patterns(PRE_SUCCESS): return InsertionStatus.SUCCESS, section.text elif section.has_patterns(UNSUCCESSFUL, NOT_SUCCESSFUL): return InsertionStatus.FAILED, section.text elif section.has_patterns(HYPOTHETICAL): return InsertionStatus.HYPOTHETICAL, section.text elif section.has_patterns(POST_SUCCESS): return InsertionStatus.SUCCESS, section.text elif section.has_patterns(POST_OP): return InsertionStatus.LIKELY_SUCCESS, section.text else: return InsertionStatus.UNKNOWN, section.text else: return InsertionStatus.UNKNOWN, None return InsertionStatus.NO_MENTION, None
def determine_iud_perforation(document: Document): if document.has_patterns(*ALL, ignore_negation=True): # see if any sentences that contain "IUD" also contain perf/embedded for section in document.select_sentences_with_patterns(IUD): date = section.get_pattern(DATE_PAT) if section.has_patterns(COMPLETE): yield PerforationStatus.COMPLETE, section.text, date elif section.has_patterns(PARTIAL): yield PerforationStatus.PARTIAL, section.text, date elif section.has_patterns(PERFORATION): if section.has_pattern(POSSIBLE): yield PerforationStatus.POSSIBLE, section.text, date else: yield PerforationStatus.PERFORATION, section.text, date # elif section.has_patterns(EMBEDDED): # yield PerforationStatus.EMBEDDED, section.text, date # elif section.has_patterns(MIGRATED): # yield PerforationStatus.MIGRATED, section.text, date # check for laparoscopic removal -> suggests complete perf if section.has_patterns(LAPAROSCOPIC_REMOVAL): yield PerforationStatus.LAPAROSCOPIC_REMOVAL, section.text, date yield PerforationStatus.NONE, document.text, None else: yield PerforationStatus.SKIP, None, None
def get_next_from_corpus(directory=None, directories=None, version=None, connections=None, skipper=None, start=0, end=None, filenames=None, encoding='utf8'): """ :param filenames: :param encoding: :param connections: :param directories: list of directories to look through :param skipper: :param directory: first to look through (for backwards compatibility) :param version: text|lemma|token :param start: :param end: :return: iterator yielding documents """ i = -1 for doc_name, path, text in itertools.chain( get_next_from_directory(directory, directories, version, filenames, encoding), get_next_from_connections(*connections or list())): if skipper and doc_name in skipper: continue i += 1 if i < start: continue elif end and i >= end: break if not text and not path: # one of these required continue yield Document(doc_name, file=path, text=text)
def determine_breastfeeding(document: Document, expected=None): my_result = partial(Result, expected=expected) has_boilerplate = False document = document.remove_patterns(BF_BOILERPLATE_SECTION) if not document: # only boilerplate return if document.has_patterns(BF_BOILERPLATE_EXCLUDE, BF_UNKNOWN): return if document.has_patterns(BF_BOILERPLATE): yield my_result(BreastfeedingStatus.BOILERPLATE) has_boilerplate = True non_exact_count = 0 non_exact_count_snippets = [] for sentence in document.sentences: # look for presence of "nutrition:" or "feeding:" followed by not info about breast if matches_nutrition_not_bf(sentence.text): yield my_result(BreastfeedingStatus.OTHER_NUTRITION, text=sentence.text) for section in document.select_sentences_with_patterns(ANY_BREAST): found_bf = False # pre boilerplate patterns: exact/not confused with boilerplate if section.has_patterns(BF_EXACT, BF_DURATION, BF_TYPE, BF_YES, BF_FEEDING): yield my_result(BreastfeedingStatus.BREASTFEEDING, text=section.text) found_bf = True if section.has_patterns(BF_NO_EXACT, BF_NOT, WHOLE_MILK): yield my_result(BreastfeedingStatus.NO, text=section.text) found_bf = True if section.has_patterns(EXPRESSED_MILK_EXACT): yield my_result(BreastfeedingStatus.EXPRESSED, text=section.text) if section.has_patterns(BF_HISTORY): yield my_result(BreastfeedingStatus.HISTORY, text=section.text) if section.has_patterns(FORMULA_EXACT): yield my_result(BreastfeedingStatus.FORMULA, text=section.text) if section.has_patterns(FORMULA_NO): yield my_result(BreastfeedingStatus.NO_FORMULA, text=section.text) if section.has_patterns(PUMPING_EXACT, PUMPING_ACTIVE): yield my_result(BreastfeedingStatus.PUMPING, text=section.text) if section.has_patterns(BOTTLE_EXACT): yield my_result(BreastfeedingStatus.BOTTLE, text=section.text) if section.has_patterns(BF_STOP): if section.has_patterns(AGO): # "stopped 2 months ago" yield my_result(BreastfeedingStatus.STOP, text=section.text) elif section.has_patterns(BF_STOP_BAD): # "stopped at 2 months age" yield my_result(BreastfeedingStatus.STOPPED_BEFORE, text=section.text) else: # "stopped" yield my_result(BreastfeedingStatus.STOP, text=section.text) found_bf = True if section.has_patterns(LATCHING): yield my_result(BreastfeedingStatus.BREASTFEEDING, text=section.text) found_bf = True if not found_bf and not has_boilerplate: # boilerplate: there is at least some template language # only non-boilerplate if section.has_patterns(NIPPLE_SHIELD, BF_SUPPLEMENT): yield my_result(BreastfeedingStatus.BREASTFEEDING, text=section.text) if section.has_pattern(BF_FEEDING, ignore_negation=True): yield my_result(BreastfeedingStatus.MAYBE, text=section.text) cnt = section.has_patterns(BREAST_MILK, BF, PUMPING, EXPRESSED_MILK, MILK_TRANSFER, BREAST_PAIN, get_count=True) if cnt: non_exact_count += cnt non_exact_count_snippets.append(section.text) if section.has_patterns(LACTATION_VISIT): yield my_result(BreastfeedingStatus.LACTATION_VISIT, text=section.text) if non_exact_count >= 2: yield my_result(BreastfeedingStatus.MAYBE, text='\n'.join(non_exact_count_snippets))
def test_gNpNNNN(): doc = Document(None, text='G 1 P 1001') status, capt, source = determine_parity(doc) assert status == ParityStatus.P1 assert source == ParitySource.PARITY
def test_g_lt_p(): doc = Document(None, text='G 3 P 7') status, capt, source = determine_parity(doc) assert status == ParityStatus.SKIP assert source == ParitySource.NONE
def test_g_eq_p_plus_1(): doc = Document(None, text='G 1 P 2') status, capt, source = determine_parity(doc) assert status == ParityStatus.P1 assert source == ParitySource.GRAVIDA
def test_clean_breastfeeding_document(): text = 'Breastfeeding?\nYes' exp_text = 'Breastfeeding: Yes' doc = Document(None, text=text) assert doc.new_text == exp_text
def test_teaching_boilerplate_document(): text = 'Teaching/Guidance provided:\nNutrition: \nwhole milk' exp_text = 'Teaching/Guidance provided: Nutrition: whole milk' doc = Document(None, text=text) assert doc.new_text == exp_text