def run(rm, records): totalMeds = {} ruids = [] #get a list of drugs per patient ruid # for finalRecord in finalRecords: # if finalRecord.ruid not in ruids: # ruids.append(finalRecord.ruid) #use a mocked out set of positives to speed testing up ruids = [ 7, 10, 67, 68, 69, 71, 72, 74, 75, 79, 80, 101, 109, 119, 194, 195, 196, 197, 199, 200, 201, 202, 203, 205, 210, 212, 213, 231, 278, 362, 373, 376, 383, 387, 404, 407, 554, 555, 556, 560, 561, 562, 564, 567, 597, 625, 626, 627, 629, 631, 633, 637, 639, 640, 641, 653, 671, 674, 711, 715, 719, 720, 724, 741, 760, 762, 764, 850, 851, 854 ] finalRecords = [] for ruid in ruids: fr = FinalRecord() fr.ruid = ruid finalRecords.append(fr) contextRule = ContextRule("ContextRule", finalRecords) #take each patient found in identify diagnosis year, check all their records to find the drugs i = 0 length = len(records) for record in records: if (record.ruid in ruids): check = contextRule.run(record) i += 1 progress = round((i / length) * 100, 2) sys.stdout.write("Identifying drug dates... %d%% \r" % (progress)) sys.stdout.flush() return contextRule.finalRecords
def run(records): #setup info contextRule = ContextRule("ContextRule") impressionRule = ImpressionRule("ImpressionRule") #length is used to show progress of script to user length = len(records) #array to pass to return to the main script to be combined with other NLP scripts positiveRecords = {} #what eventually is returned. Composed of finalRecord objects finalRecords = [] i = 0 #records was retrieved from RecordsManager.py in Main.py then passed to this script for record in records: i = i + 1 #sometimes the record doesn't even have an entry_date, we can't use that if(record.entry_date is not None): #Get the last four digits of entry_date which is the year entry_year = int(str(record.entry_date)[:4]) #check becomes False or a called record object #contextrule uses a lot of regex to narrow down the year check = contextRule.run(record, entry_year) if(check == False): #impressionrule exists specifically to call out records where the patient #is diagnosed in the visit check = impressionRule.run(record, entry_year) #if yearCheck isn't false than it's a year i.e. 1990 if(check != False): if(check.hardCall): #create final record and append to finalRecords here since hardCall #means that we are sure that it's this last diagnosis year we saw finalRecord = FinalRecord() finalRecord.ruid = record.ruid #make sure it isn't already there notInFinalRecords = True for record in finalRecords: if(record.ruid == finalRecord.ruid): notInFinalRecords = False if(notInFinalRecords): finalRecord.diagnosisYr = check.calledYear finalRecords.append(finalRecord) else: #if ruid already seen, pair it with previous info if(check.ruid not in positiveRecords): positiveRecords[check.ruid] = [check] else: positiveRecords[check.ruid].append(check) #a file to help understand what's happening during the analysis with open("/home/suttons/MSDataAnalysis/output/positiveRUIDsFullRecordsDiagnoseYr.txt", "a") as txtFile: regex = re.compile(r'[\n\r\t]') regex.sub(' ', check.calledText) stringLine = str(check.ruid) + "\t" + str(check.entry_date) + "\t" + str(check.calledYear) + "\t" + str(check.calledRule) + "\t" + str(check.calledText) + "\r" txtFile.write(stringLine) #show the progress of the script progress = round((i/length) * 100, 2) sys.stdout.write("Identifying diagnosis years... %d%% \r" % (progress) ) sys.stdout.flush() #find the most frequent year and return that with the ruid for key in positiveRecords: #used to throw out records that have multiple different diagnosis dates countList = [] positiveRecordsForRuid = positiveRecords[key] commonYr = 0 count = 0 finalRecord = FinalRecord() finalRecord.ruid = positiveRecordsForRuid[0].ruid years = [] for record in positiveRecordsForRuid: #build a list of years for this record i.e. [1976, 1976] or [1992, 1992, 1995, 1995] years.append(record.calledYear) #order the list years = sorted(years, key=int, reverse=True) #count first item, check if next item is same, if it is incremnt count, if not add count to countList and add one to count countList = [] #make a distinct set of years distinctYears = list(set(years)) distinctYears = sorted(distinctYears, key=int, reverse=True) #remove years that are close together from distinct list index = 0 for distYear in distinctYears: for distYearOth in distinctYears: if(distYear != distYearOth): if(abs(int(distYear) - int(distYearOth)) <= 3): del distinctYears[index] index += 1 #make a list of counting, if the lowest element in count list is lower than all other elements, we good highestCount = 0 commonYr = 0 for distYear in distinctYears: count = 0 for year in years: if(abs(int(year) - int(distYear)) <= 3): count += 1 if(count > highestCount): highestCount = count commonYr = distYear countList.append(count) #TODO: If the algorithm only identifies one diagnosis date not from a hard rule, throw it out #check the length countlist i.e. [2] or [2, 2] #if the length is one, we're good if(len(countList) == 1): #make sure ruid isn't already in finalRecords notInFinalRecords = True for record in finalRecords: if(record.ruid == finalRecord.ruid): notInFinalRecords = False if(notInFinalRecords): finalRecord.diagnosisYr = commonYr finalRecords.append(finalRecord) else: #order the countlist countList = sorted(countList, key=int, reverse=True) #if the first item is greater than the second item we're good if(countList[0] > countList[1]): #make sure ruid isn't already in finalRecords notInFinalRecords = True for record in finalRecords: if(record.ruid == finalRecord.ruid): notInFinalRecords = False if(notInFinalRecords): finalRecord.diagnosisYr = commonYr finalRecords.append(finalRecord) print("Done with Diagnosis years!") return finalRecords
def run(rm, records): totalMeds = {} ruids = [] #get a list of drugs per patient ruid # for finalRecord in finalRecords: # if finalRecord.ruid not in ruids: # ruids.append(finalRecord.ruid) #use a mocked out set of positives to speed testing up ruids = [ 7 , 10 , 67 , 68 , 69 , 71 , 72 , 74 , 75 , 79 , 80 , 101 , 109 , 119 , 194 , 195 , 196 , 197 , 199 , 200 , 201 , 202 , 203 , 205 , 210 , 212 , 213 , 231 , 278 , 362 , 373 , 376 , 383 , 387 , 404 , 407 , 554 , 555 , 556 , 560 , 561 , 562 , 564 , 567 , 597 , 625 , 626 , 627 , 629 , 631 , 633 , 637 , 639 , 640 , 641 , 653 , 671 , 674 , 711 , 715 , 719 , 720 , 724 , 741 , 760 , 762 , 764 , 850 , 851 , 854 ] finalRecords = [] for ruid in ruids: fr = FinalRecord() fr.ruid = ruid finalRecords.append(fr) contextRule = ContextRule("ContextRule", finalRecords) #take each patient found in identify diagnosis year, check all their records to find the drugs i = 0 length = len(records) for record in records: if(record.ruid in ruids): check = contextRule.run(record) i += 1 progress = round((i/length) * 100, 2) sys.stdout.write("Identifying drug dates... %d%% \r" % (progress) ) sys.stdout.flush() return contextRule.finalRecords