Example #1
0
def annotate_text(data):
    """
    Run a single text through the Deduce pipeline
    """
    # Remove ID from object
    record_id = None
    if 'id' in data:
        record_id = data['id']
        del data['id']

    # Run Deduce pipeline

    try:  # temporary workaround for https://github.com/vmenger/deduce/issues/44
        annotated_text = deduce.annotate_text(**data)
    except IndexError:
        annotated_text = deduce.annotate_text(**data, dates=False)

    deidentified_text = deduce.deidentify_annotations(annotated_text)

    # Format result
    result = {'text': deidentified_text}

    # Add the ID if it was passed along
    if record_id is not None:
        result['id'] = record_id

    return result
Example #2
0
def main(argv):
    if len(argv) != ARGVLEN: sys.exit(USAGE)
    first_names,initials,surname,given_name = argv
    text = readTextFromStdin()
    annotatedText = deduce.annotate_text(text, \
        first_names,initials,surname,given_name, \
        names=True, locations=True, institutions=True, dates=True, \
        ages=True, patient_numbers=True, phone_numbers=True, urls=True, \
        flatten=True)
    printResults(annotatedText)
Example #3
0
    def test_annotate_text(self):

        text = (
            u"Dit is stukje tekst met daarin de naam Jan Jansen. De patient J. Jansen "
            u"(e: [email protected], t: 06-12345678) is 64 jaar oud en woonachtig in Utrecht. Hij werd op 10 "
            u"oktober door arts Peter de Visser ontslagen van de kliniek van het UMCU."
        )

        annotated = deduce.annotate_text(text,
                                         patient_first_names="Jan",
                                         patient_surname="Jansen")

        expected_text = (
            "Dit is stukje tekst met daarin de naam <PATIENT Jan Jansen>. De <PATIENT patient J. Jansen> "
            "(e: <URL [email protected]>, t: <TELEFOONNUMMER 06-12345678>) is <LEEFTIJD 64> jaar oud en "
            "woonachtig in <LOCATIE Utrecht>. Hij werd op <DATUM 10 oktober> door arts "
            "<PERSOON Peter de Visser> ontslagen van de kliniek van het <INSTELLING UMCU>."
        )
        self.assertEqual(expected_text, annotated)
Example #4
0
import pandas as pd
import deduce  # pip install git+https://github.com/vmenger/deduce.git

data = pd.read_excel(r'Data\ICD10 letters age sex_SENSITIVE.xlsx')
# df1 = pd.DataFrame(data)
d = []
for i in range(data.shape[0]):
    text = data['UitgaandeBriefTekst_DOC'][i]
    annotated = deduce.annotate_text(
        text,  # The text to be annotated
        patient_first_names="",  # First names (separated by whitespace)
        patient_initials="",
        patient_surname="",
        patient_given_name="",  # Given name
        names=True,  # Person names, including initials
        locations=True,  # Geographical locations
        institutions=True,
        dates=True,
        ages=True,
        patient_numbers=True,
        phone_numbers=True,
        urls=True,  # Urls and e-mail addresses
        flatten=True  # Debug option
    )
    de_identified = deduce.deidentify_annotations(annotated)
    d.append(de_identified)

df2 = pd.DataFrame(d, columns=['deidentified'])
result = pd.concat([data, df2], axis=1)
result.to_excel("D:\Github\ICD10 Classification\l_anonym.xlsx")
Example #5
0
 def __init__(self, text):
     self.text = text
     self.annotated_text = deduce.annotate_text(self.text)