Python Documentの例、apex.algo.pattern.Document Pythonの例

コード例 #1

0

ファイルを表示

ファイル: iud_expulsion.py プロジェクト: kpwhri/apex_iud_nlp

def determine_iud_expulsion(document: Document):
    if document.has_patterns(IUD):
        for section in document.select_sentences_with_patterns(IUD):
            history = bool(section.has_patterns(PREVIOUS))
            if section.has_patterns(INCORRECT, PLACEMENT, has_all=True) or \
                    section.has_patterns(MALPOSITION) or \
                    section.has_patterns(DISPLACEMENT):
                if not section.has_patterns(IN_CERVIX):
                    yield ExpulsionStatus.MALPOSITION, history, section.text
            if section.has_patterns(LOWER_UTERINE_SEGMENT):
                yield ExpulsionStatus.LOWER_UTERINE_SEGMENT, history, section.text
            if section.has_patterns(COMPLETE):
                yield ExpulsionStatus.EXPULSION, history, section.text
            if section.has_patterns(PARTIAL_EXP):
                yield ExpulsionStatus.PARTIAL, history, section.text
            if section.has_patterns(MISSING, STRINGS, has_all=True):
                yield ExpulsionStatus.MISSING_STRING, history, section.text
            if section.has_patterns(PROPER_LOCATION):
                yield ExpulsionStatus.PROPER_PLACEMENT, history, section.text
            if section.has_patterns(IN_UTERUS):
                yield ExpulsionStatus.IN_UTERUS, history, section.text
        sects = document.select_all_sentences_with_patterns(STRING_HANGING)
        if sects:
            yield ExpulsionStatus.HANGING_STRING, None, sects.text
    else:
        yield ExpulsionStatus.SKIP, None, document.text

コード例 #2

0

ファイルを表示

def determine_iud_brand(document: Document):
    if document.has_patterns(EXCLUDE):
        return [(BrandStatus.SKIP, False, None)]
    if document.has_patterns(*IUD, ignore_negation=True):
        brands = []
        for section in document.select_sentences_with_patterns(
                *IUD, neighboring_sentences=1):
            # scheduled = bool(section.has_patterns(SCHEDULED))
            if section.has_patterns(PARAGARD, COPPER):
                brands.append((BrandStatus.PARAGARD,
                               section.has_patterns(USING), section.text))
            if section.has_patterns(MIRENA):
                brands.append((BrandStatus.MIRENA, section.has_patterns(USING),
                               section.text))
            if section.has_patterns(LILETTA):
                brands.append((BrandStatus.LILETTA,
                               section.has_patterns(USING), section.text))
            if section.has_patterns(KYLEENA):
                brands.append((BrandStatus.KYLEENA,
                               section.has_patterns(USING), section.text))
            if section.has_patterns(SKYLA):
                brands.append((BrandStatus.SKYLA, section.has_patterns(USING),
                               section.text))
        if not brands:  # only look for LNG if no brand name
            if document.has_patterns(LNG):
                brands.append((BrandStatus.LNG, False, document.text))
            else:
                brands.append((BrandStatus.NONE, False, None))
        return brands
    else:
        return [(BrandStatus.SKIP, False, None)]

コード例 #3

0

ファイルを表示

def determine_iud_removal(document: Document):
    if document.has_patterns(*ALL, ignore_negation=True):
        section_text = []
        for section in document.select_sentences_with_patterns(IUD):
            if section.has_pattern(REMOVE_BY):
                continue
            # these definitely have correct language
            if section.has_patterns(*ALL):
                # require either REMOVE/PLAN since this could have other refs
                if section.has_patterns(DEF_REMOVE):
                    yield RemoveStatus.DEF_REMOVE, section.text
                if section.has_patterns(DEF_REPLACE):
                    yield RemoveStatus.DEF_REPLACE, section.text
                if section.has_patterns(PROB_REMOVE):
                    yield RemoveStatus.REMOVE, section.text
                if section.has_patterns(TOOL):
                    yield RemoveStatus.TOOL_REMOVE, section.text
                if section.has_patterns(REMOVE):
                    yield RemoveStatus.REMOVE, section.text
                if section.has_patterns(PLAN):
                    yield RemoveStatus.PLAN, section.text
            else:
                section_text.append(section.text)
        if section_text:
            yield RemoveStatus.NONE, ' '.join(section_text)
        else:
            yield RemoveStatus.SKIP, document.text
    else:
        yield RemoveStatus.SKIP, document.text

コード例 #4

0

ファイルを表示

ファイル: iud_expulsion_rad.py プロジェクト: kpwhri/apex_iud_nlp

def determine_iud_expulsion_rad(document: Document):
    sections = document.split(SECTIONS)
    start_section = sections.get_sections('HST', 'SAS', 'HISTORY',
                                          'CLINICAL INFORMATION',
                                          'CLINICAL HISTORY AND QUESTION')
    impression = sections.get_sections('IMPRESSION', 'IMPRESSIONS', 'IMP')
    other = sections.get_sections('FINDINGS', 'TRANSVAGINAL', 'FINDING')
    found = False
    if impression:
        if impression.has_patterns(PARTIAL_EXP):
            found = True
            yield ExpulsionStatus.PARTIAL, impression.text
        if impression.has_patterns(NOT_SEEN_IUD, IUD_NOT_SEEN):
            found = True
            yield ExpulsionStatus.LOST, impression.text
        if impression.has_patterns(MALPOSITION, MALPOSITION_IUD):
            found = True
            yield ExpulsionStatus.MALPOSITION, impression.text
        if impression.has_patterns(PROPER_LOCATION):
            found = True
            yield ExpulsionStatus.PROPER_PLACEMENT, impression.text
        if impression.has_patterns(IN_UTERUS):
            found = False  # don't count this one
            yield ExpulsionStatus.IN_UTERUS, impression.text
        if impression.has_patterns(LOWER_UTERINE_SEGMENT):
            found = False
            yield ExpulsionStatus.LOWER_UTERINE_SEGMENT, impression.text
    if found:
        return
    elif start_section.has_patterns(IUD, IUD_PRESENT, has_all=True) or \
            start_section.has_patterns(STRING, VISIBLE, has_all=True):
        if other.has_patterns(PARTIAL_EXP):
            yield ExpulsionStatus.PARTIAL, other.text
        if other.has_patterns(MALPOSITION, MALPOSITION_IUD):
            yield ExpulsionStatus.MALPOSITION, other.text
        if other.has_patterns(PROPER_LOCATION):
            yield ExpulsionStatus.PROPER_PLACEMENT, other.text
        if other.has_patterns(IN_UTERUS):
            yield ExpulsionStatus.IN_UTERUS, other.text
        if other.has_patterns(LOWER_UTERINE_SEGMENT):
            yield ExpulsionStatus.LOWER_UTERINE_SEGMENT, other.text
    else:
        sentences = list(document.select_sentences_with_patterns(IUD))
        if sentences:
            for sentence in sentences:
                if sentence.has_patterns(PARTIAL_EXP):
                    yield ExpulsionStatus.PARTIAL, sentence.text
                if sentence.has_patterns(MALPOSITION, MALPOSITION_IUD):
                    yield ExpulsionStatus.MALPOSITION, sentence.text
                if sentence.has_patterns(PROPER_LOCATION):
                    yield ExpulsionStatus.PROPER_PLACEMENT, sentence.text
                if sentence.has_patterns(IN_UTERUS):
                    yield ExpulsionStatus.IN_UTERUS, sentence.text
                if sentence.has_patterns(LOWER_UTERINE_SEGMENT):
                    yield ExpulsionStatus.LOWER_UTERINE_SEGMENT, sentence.text
        else:
            yield ExpulsionStatus.SKIP, None

コード例 #5

0

ファイルを表示

def determine_parity(document: Document):
    res = document.get_patterns(GRAVIDA_0, PARA_0, NULLIPAROUS, CHILD_0,
                                names=[ParitySource.GRAVIDA, ParitySource.PARITY,
                                       ParitySource.PARITY, ParitySource.CHILDREN])
    if res:
        text, src = res
        if text:
            return ParityStatus.P0, text, src
    # patterns for grav and para; confirm grav >= para
    res = document.get_patterns(G_PARA_N, index=(0, 1, 2))
    if res:
        text, capt_g, capt_p = res
        try:
            g = int(capt_g)
            p = int(capt_p)
        except ValueError:
            raise ValueError(f'Values of g/p do not appear to be numeric: {capt_p}, {capt_g} in {text}')
        except TypeError:
            raise TypeError(f'Values of g/p are None (or similar): {capt_p}, {capt_g} in {text}')
        else:
            if len(capt_p) == 4:
                capt_p = capt_p[0]
                p = int(capt_p)
            if g >= p:
                status = extract_status(capt_p)
                if status:
                    return status, text, ParitySource.PARITY
            elif g == p - 1:  # common error: counting twins as extra parity
                logging.warning(f'Including Gravida {g}, Parity {p} '
                                f'(assuming twins counted as extra parity): {document.name}')
                status = extract_status(capt_g)  # use gravida not parity
                if status:
                    return status, text, ParitySource.GRAVIDA
            else:
                logging.info(f'Gravida {g} < Parity {p}: {document.name}')

    res = document.get_patterns(PARA_NNNN, PARA_N, CHILD_N, CHILD_NUM, index=(0, 1),
                                names=[ParitySource.PARITY, ParitySource.PARITY,
                                       ParitySource.CHILDREN, ParitySource.CHILDREN])
    if res and res[0]:
        (text, captured), src = res
        if captured:
            status = extract_status(captured)
            if status:
                return status, text, src
            else:
                # e.g., "para 13 %", "multipara @ 32+2/6weeks
                logging.info(f'Unrecognized parity value for {document.name}: {status} in "{text}" from {src}')
    text = document.get_patterns(MULTIPAROUS)
    if text:
        return ParityStatus.MULTIPAROUS, text, ParitySource.PARITY
    return ParityStatus.SKIP, None, ParitySource.NONE

コード例 #6

0

ファイルを表示

ファイル: iud_difficult_insertion.py プロジェクト: kpwhri/apex_iud_nlp

def determine_difficult_insertion(document: Document):
    """
    :param document:
    :return:
    """
    if document.has_patterns(NOT_IUD_INSERTION):
        yield DiffInsStatus.SKIP, None
    elif document.has_patterns(IUD, ignore_negation=True):
        sent = document.get_pattern(SUCCESSFUL_INSERTION)
        if sent:
            yield DiffInsStatus.SUCCESSFUL, sent
        sent = document.get_pattern(CANNOT_PLACE)
        if sent:
            yield DiffInsStatus.UNSUCCESSFUL, sent
        if document.has_patterns(INSERTION, ignore_negation=True):
            for section in document.select_sentences_with_patterns(INSERTION, neighboring_sentences=1):
                if section.has_patterns(UNSUCCESSFUL_INSERTION, has_all=True):
                    yield DiffInsStatus.UNSUCCESSFUL, section.text
                if section.has_patterns(EASY_INSERTION):
                    yield DiffInsStatus.NOT_DIFFICULT, section.text
                    continue
                if section.has_patterns(PROVIDER):
                    yield DiffInsStatus.PROVIDER_STATEMENT, section.text
                if section.has_patterns(US, US_USED, has_all=True):
                    yield DiffInsStatus.ULTRASOUND_GUIDANCE, section.text
                if section.has_patterns(PARACERV):
                    yield DiffInsStatus.PARACERVICAL_BLOCK, section.text
                if section.has_patterns(MISOPROSTOL):
                    yield DiffInsStatus.MISOPROSTOL, section.text
                if section.has_patterns(CERV_DIL):
                    yield DiffInsStatus.CERVICAL_DILATION, section.text

コード例 #7

0

ファイルを表示

ファイル: iud_insertion.py プロジェクト: kpwhri/apex_iud_nlp

def determine_iud_insertion(document: Document):
    # discusses iud
    if document.has_patterns(IUD, INSERTION, has_all=True):
        section = document.select_all_sentences_with_patterns(IUD, INSERTION, STRINGS,
                                                              negation=[HISTORICAL, APPOINTMENT, DATE, NEGATED],
                                                              neighboring_sentences=1)
        if section:
            if section.has_patterns(PRE_SUCCESS):
                return InsertionStatus.SUCCESS, section.text
            elif section.has_patterns(UNSUCCESSFUL, NOT_SUCCESSFUL):
                return InsertionStatus.FAILED, section.text
            elif section.has_patterns(HYPOTHETICAL):
                return InsertionStatus.HYPOTHETICAL, section.text
            elif section.has_patterns(POST_SUCCESS):
                return InsertionStatus.SUCCESS, section.text
            elif section.has_patterns(POST_OP):
                return InsertionStatus.LIKELY_SUCCESS, section.text
            else:
                return InsertionStatus.UNKNOWN, section.text
        else:
            return InsertionStatus.UNKNOWN, None
    return InsertionStatus.NO_MENTION, None

コード例 #8

0

ファイルを表示

ファイル: iud_perforation.py プロジェクト: kpwhri/apex_iud_nlp

def determine_iud_perforation(document: Document):
    if document.has_patterns(*ALL, ignore_negation=True):
        # see if any sentences that contain "IUD" also contain perf/embedded
        for section in document.select_sentences_with_patterns(IUD):
            date = section.get_pattern(DATE_PAT)
            if section.has_patterns(COMPLETE):
                yield PerforationStatus.COMPLETE, section.text, date
            elif section.has_patterns(PARTIAL):
                yield PerforationStatus.PARTIAL, section.text, date
            elif section.has_patterns(PERFORATION):
                if section.has_pattern(POSSIBLE):
                    yield PerforationStatus.POSSIBLE, section.text, date
                else:
                    yield PerforationStatus.PERFORATION, section.text, date
            # elif section.has_patterns(EMBEDDED):
            #     yield PerforationStatus.EMBEDDED, section.text, date
            # elif section.has_patterns(MIGRATED):
            #     yield PerforationStatus.MIGRATED, section.text, date
            # check for laparoscopic removal -> suggests complete perf
            if section.has_patterns(LAPAROSCOPIC_REMOVAL):
                yield PerforationStatus.LAPAROSCOPIC_REMOVAL, section.text, date
        yield PerforationStatus.NONE, document.text, None
    else:
        yield PerforationStatus.SKIP, None, None

コード例 #9

0

ファイルを表示

def get_next_from_corpus(directory=None,
                         directories=None,
                         version=None,
                         connections=None,
                         skipper=None,
                         start=0,
                         end=None,
                         filenames=None,
                         encoding='utf8'):
    """

    :param filenames:
    :param encoding:
    :param connections:
    :param directories: list of directories to look through
    :param skipper:
    :param directory: first to look through (for backwards compatibility)
    :param version: text|lemma|token
    :param start:
    :param end:
    :return: iterator yielding documents
    """
    i = -1
    for doc_name, path, text in itertools.chain(
            get_next_from_directory(directory, directories, version, filenames,
                                    encoding),
            get_next_from_connections(*connections or list())):
        if skipper and doc_name in skipper:
            continue
        i += 1
        if i < start:
            continue
        elif end and i >= end:
            break
        if not text and not path:  # one of these required
            continue
        yield Document(doc_name, file=path, text=text)

コード例 #10

0

ファイルを表示

def determine_breastfeeding(document: Document, expected=None):
    my_result = partial(Result, expected=expected)
    has_boilerplate = False
    document = document.remove_patterns(BF_BOILERPLATE_SECTION)
    if not document:  # only boilerplate
        return
    if document.has_patterns(BF_BOILERPLATE_EXCLUDE, BF_UNKNOWN):
        return
    if document.has_patterns(BF_BOILERPLATE):
        yield my_result(BreastfeedingStatus.BOILERPLATE)
        has_boilerplate = True
    non_exact_count = 0
    non_exact_count_snippets = []
    for sentence in document.sentences:
        # look for presence of "nutrition:" or "feeding:" followed by not info about breast
        if matches_nutrition_not_bf(sentence.text):
            yield my_result(BreastfeedingStatus.OTHER_NUTRITION, text=sentence.text)
    for section in document.select_sentences_with_patterns(ANY_BREAST):
        found_bf = False
        # pre boilerplate patterns: exact/not confused with boilerplate
        if section.has_patterns(BF_EXACT, BF_DURATION, BF_TYPE, BF_YES, BF_FEEDING):
            yield my_result(BreastfeedingStatus.BREASTFEEDING, text=section.text)
            found_bf = True
        if section.has_patterns(BF_NO_EXACT, BF_NOT, WHOLE_MILK):
            yield my_result(BreastfeedingStatus.NO, text=section.text)
            found_bf = True
        if section.has_patterns(EXPRESSED_MILK_EXACT):
            yield my_result(BreastfeedingStatus.EXPRESSED, text=section.text)
        if section.has_patterns(BF_HISTORY):
            yield my_result(BreastfeedingStatus.HISTORY, text=section.text)
        if section.has_patterns(FORMULA_EXACT):
            yield my_result(BreastfeedingStatus.FORMULA, text=section.text)
        if section.has_patterns(FORMULA_NO):
            yield my_result(BreastfeedingStatus.NO_FORMULA, text=section.text)
        if section.has_patterns(PUMPING_EXACT, PUMPING_ACTIVE):
            yield my_result(BreastfeedingStatus.PUMPING, text=section.text)
        if section.has_patterns(BOTTLE_EXACT):
            yield my_result(BreastfeedingStatus.BOTTLE, text=section.text)
        if section.has_patterns(BF_STOP):
            if section.has_patterns(AGO):  # "stopped 2 months ago"
                yield my_result(BreastfeedingStatus.STOP, text=section.text)
            elif section.has_patterns(BF_STOP_BAD):  # "stopped at 2 months age"
                yield my_result(BreastfeedingStatus.STOPPED_BEFORE, text=section.text)
            else:  # "stopped"
                yield my_result(BreastfeedingStatus.STOP, text=section.text)
            found_bf = True
        if section.has_patterns(LATCHING):
            yield my_result(BreastfeedingStatus.BREASTFEEDING, text=section.text)
            found_bf = True
        if not found_bf and not has_boilerplate:
            # boilerplate: there is at least some template language
            # only non-boilerplate
            if section.has_patterns(NIPPLE_SHIELD, BF_SUPPLEMENT):
                yield my_result(BreastfeedingStatus.BREASTFEEDING, text=section.text)
            if section.has_pattern(BF_FEEDING, ignore_negation=True):
                yield my_result(BreastfeedingStatus.MAYBE, text=section.text)
            cnt = section.has_patterns(BREAST_MILK, BF, PUMPING,
                                       EXPRESSED_MILK, MILK_TRANSFER,
                                       BREAST_PAIN,
                                       get_count=True)
            if cnt:
                non_exact_count += cnt
                non_exact_count_snippets.append(section.text)
            if section.has_patterns(LACTATION_VISIT):
                yield my_result(BreastfeedingStatus.LACTATION_VISIT, text=section.text)
    if non_exact_count >= 2:
        yield my_result(BreastfeedingStatus.MAYBE, text='\n'.join(non_exact_count_snippets))

コード例 #11

0

ファイルを表示

def test_gNpNNNN():
    doc = Document(None, text='G 1 P 1001')
    status, capt, source = determine_parity(doc)
    assert status == ParityStatus.P1
    assert source == ParitySource.PARITY

コード例 #12

0

ファイルを表示

def test_g_lt_p():
    doc = Document(None, text='G 3 P 7')
    status, capt, source = determine_parity(doc)
    assert status == ParityStatus.SKIP
    assert source == ParitySource.NONE

コード例 #13

0

ファイルを表示

def test_g_eq_p_plus_1():
    doc = Document(None, text='G 1 P 2')
    status, capt, source = determine_parity(doc)
    assert status == ParityStatus.P1
    assert source == ParitySource.GRAVIDA

コード例 #14

0

ファイルを表示

ファイル: test_document.py プロジェクト: kpwhri/apex_iud_nlp

def test_clean_breastfeeding_document():
    text = 'Breastfeeding?\nYes'
    exp_text = 'Breastfeeding: Yes'
    doc = Document(None, text=text)
    assert doc.new_text == exp_text

コード例 #15

0

ファイルを表示

ファイル: test_document.py プロジェクト: kpwhri/apex_iud_nlp

def test_teaching_boilerplate_document():
    text = 'Teaching/Guidance provided:\nNutrition:  \nwhole milk'
    exp_text = 'Teaching/Guidance provided: Nutrition: whole milk'
    doc = Document(None, text=text)
    assert doc.new_text == exp_text