コード例 #1
ファイル: identifyDrugs.py プロジェクト: spence95/NLPRuler
def run(rm, records):
    totalMeds = {}
    ruids = []

    #get a list of drugs per patient ruid
    # for finalRecord in finalRecords:
    #     if finalRecord.ruid not in ruids:
    #         ruids.append(finalRecord.ruid)

    #use a mocked out set of positives to speed testing up
    ruids = [
        7, 10, 67, 68, 69, 71, 72, 74, 75, 79, 80, 101, 109, 119, 194, 195,
        196, 197, 199, 200, 201, 202, 203, 205, 210, 212, 213, 231, 278, 362,
        373, 376, 383, 387, 404, 407, 554, 555, 556, 560, 561, 562, 564, 567,
        597, 625, 626, 627, 629, 631, 633, 637, 639, 640, 641, 653, 671, 674,
        711, 715, 719, 720, 724, 741, 760, 762, 764, 850, 851, 854

    finalRecords = []

    for ruid in ruids:
        fr = FinalRecord()
        fr.ruid = ruid

    contextRule = ContextRule("ContextRule", finalRecords)

    #take each patient found in identify diagnosis year, check all their records to find the drugs
    i = 0
    length = len(records)
    for record in records:
        if (record.ruid in ruids):
            check = contextRule.run(record)
        i += 1
        progress = round((i / length) * 100, 2)
        sys.stdout.write("Identifying drug dates... %d%%   \r" % (progress))

    return contextRule.finalRecords
コード例 #2
def run(records):
    #setup info
    contextRule = ContextRule("ContextRule")
    impressionRule = ImpressionRule("ImpressionRule")

    #length is used to show progress of script to user
    length = len(records)

    #array to pass to return to the main script to be combined with other NLP scripts
    positiveRecords = {}

    #what eventually is returned. Composed of finalRecord objects
    finalRecords = []

    i = 0
    #records was retrieved from RecordsManager.py in Main.py then passed to this script
    for record in records:
        i = i + 1
        #sometimes the record doesn't even have an entry_date, we can't use that
        if(record.entry_date is not None):
            #Get the last four digits of entry_date which is the year
            entry_year = int(str(record.entry_date)[:4])
            #check becomes False or a called record object
            #contextrule uses a lot of regex to narrow down the year
            check = contextRule.run(record, entry_year)
            if(check == False):
                #impressionrule exists specifically to call out records where the patient
                #is diagnosed in the visit
                check = impressionRule.run(record, entry_year)

            #if yearCheck isn't false than it's a year i.e. 1990
            if(check != False):
                    #create final record and append to finalRecords here since hardCall
                    #means that we are sure that it's this last diagnosis year we saw
                    finalRecord = FinalRecord()
                    finalRecord.ruid = record.ruid
                    #make sure it isn't already there
                    notInFinalRecords = True
                    for record in finalRecords:
                        if(record.ruid == finalRecord.ruid):
                            notInFinalRecords = False
                        finalRecord.diagnosisYr = check.calledYear
                    #if ruid already seen, pair it with previous info
                    if(check.ruid not in positiveRecords):
                        positiveRecords[check.ruid] = [check]

                #a file to help understand what's happening during the analysis
                with open("/home/suttons/MSDataAnalysis/output/positiveRUIDsFullRecordsDiagnoseYr.txt", "a") as txtFile:
                    regex = re.compile(r'[\n\r\t]')
                    regex.sub(' ', check.calledText)
                    stringLine = str(check.ruid) + "\t" + str(check.entry_date) + "\t" + str(check.calledYear) + "\t" + str(check.calledRule) + "\t" + str(check.calledText) + "\r"

        #show the progress of the script
        progress = round((i/length) * 100, 2)
        sys.stdout.write("Identifying diagnosis years... %d%%   \r" % (progress) )

    #find the most frequent year and return that with the ruid
    for key in positiveRecords:
        #used to throw out records that have multiple different diagnosis dates
        countList = []
        positiveRecordsForRuid = positiveRecords[key]
        commonYr = 0
        count = 0
        finalRecord = FinalRecord()
        finalRecord.ruid = positiveRecordsForRuid[0].ruid

        years = []
        for record in positiveRecordsForRuid:

            #build a list of years for this record i.e. [1976, 1976] or [1992, 1992, 1995, 1995]

        #order the list
        years = sorted(years, key=int, reverse=True)
        #count first item, check if next item is same, if it is incremnt count, if not add count to countList and add one to count
        countList = []
        #make a distinct set of years
        distinctYears = list(set(years))
        distinctYears = sorted(distinctYears, key=int, reverse=True)
        #remove years that are close together from distinct list
        index = 0
        for distYear in distinctYears:
            for distYearOth in distinctYears:
                if(distYear != distYearOth):
                    if(abs(int(distYear) - int(distYearOth)) <= 3):
                        del distinctYears[index]
            index += 1

        #make a list of counting, if the lowest element in count list is lower than all other elements, we good
        highestCount = 0
        commonYr = 0
        for distYear in distinctYears:
            count = 0
            for year in years:
                if(abs(int(year) - int(distYear)) <= 3):
                    count += 1
            if(count > highestCount):
                highestCount = count
                commonYr = distYear

        #TODO: If the algorithm only identifies one diagnosis date not from a hard rule, throw it out

        #check the length countlist i.e. [2] or [2, 2]
        #if the length is one, we're good
        if(len(countList) == 1):
            #make sure ruid isn't already in finalRecords
            notInFinalRecords = True
            for record in finalRecords:
                if(record.ruid == finalRecord.ruid):
                    notInFinalRecords = False
                finalRecord.diagnosisYr = commonYr
            #order the countlist
            countList = sorted(countList, key=int, reverse=True)
            #if the first item is greater than the second item we're good
            if(countList[0] > countList[1]):
                #make sure ruid isn't already in finalRecords
                notInFinalRecords = True
                for record in finalRecords:
                    if(record.ruid == finalRecord.ruid):
                        notInFinalRecords = False
                    finalRecord.diagnosisYr = commonYr

    print("Done with Diagnosis years!")
    return finalRecords