def queryItems(self, options, outputFile): """Query for all clinical item records that fulfill the options criteria and then send the results as tab-delimited output to the outputFile. """ pauseSeconds = float(options.pauseSeconds) query = SQLQuery() query.addSelect("count(order_med_id_coded) as nOrders") query.addSelect("om.med_route, om.medication_id, om.med_description") query.addFrom("starr_datalake2018.order_med as om") if options.descriptionPrefix: query.addWhereOp("om.med_description", "like", options.descriptionPrefix + "%%") # Add wildcard to enabe prefix search if options.medRoutes: query.addWhereIn("om.med_route", options.medRoutes.split(",")) query.addGroupBy("om.medication_id, om.med_description, om.med_route") query.addOrderBy("nOrders desc, om.med_description") formatter = TextResultsFormatter(outputFile) prog = ProgressDots() for row in DBUtil.execute(query, includeColumnNames=True, connFactory=self.connFactory): formatter.formatTuple(row) time.sleep(pauseSeconds) prog.update() prog.printStatus()
def queryItems(self, options, outputFile): """Query for all clinical item records that fulfill the options criteria and then send the results as tab-delimited output to the outputFile. """ pauseSeconds = float(options.pauseSeconds) query = SQLQuery() query.addSelect( "cic.description, ci.clinical_item_id, ci.name, ci.description") query.addFrom("clinical_item_category as cic") query.addFrom("clinical_item as ci") query.addWhere( "cic.clinical_item_category_id = ci.clinical_item_category_id") if options.itemPrefix: query.addWhereOp("ci.description", "like", options.itemPrefix + "%%") # Add wildcard to enabe prefix search if options.categoryNames: query.addWhereIn("cic.description", options.categoryNames.split(",")) query.addOrderBy( "cic.description, ci.name, ci.description, ci.clinical_item_id") formatter = TextResultsFormatter(outputFile) prog = ProgressDots() for row in DBUtil.execute(query, includeColumnNames=True, connFactory=self.connFactory): formatter.formatTuple(row) time.sleep(pauseSeconds) prog.update() prog.printStatus()
def parseLabResultsFile(labFile): log.info("Parse lab results file"); prog = ProgressDots(); labsByBaseNameByPatientId = dict(); # Dictionary of dictionaries of lists of result items for labResult in TabDictReader(labFile): if labResult["ord_num_value"] is not None and labResult["ord_num_value"] != NULL_STRING: patientId = int(labResult["pat_id"]); labBaseName = labResult["base_name"]; resultValue = float(labResult["ord_num_value"]); resultTime = DBUtil.parseDateValue(labResult["result_time"]); if resultValue < LAB_SENTINEL_VALUE: # Skip apparent placeholder values labResult["pat_id"] = labResult["patient_id"] = patientId; labResult["ord_num_value"] = resultValue; labResult["result_time"] = resultTime; if patientId not in labsByBaseNameByPatientId: labsByBaseNameByPatientId[patientId] = dict(); if labBaseName not in labsByBaseNameByPatientId[patientId]: labsByBaseNameByPatientId[patientId][labBaseName] = list(); labsByBaseNameByPatientId[patientId][labBaseName].append( labResult ); prog.update(); prog.printStatus(); return labsByBaseNameByPatientId;
def addLabFeatures(labsByBaseNameByPatientId, patientById, colNames, indexItemBaseName, labBaseNames, labPreTime, labPostTime): log.info("Sort lab results by result time for each patient and find items within specified time period to aggregate"); prog = ProgressDots(); for iPatient, (patientId, labsByBaseName) in enumerate(labsByBaseNameByPatientId.iteritems()): # Look for the first result of the index item (ferritin) indexItem = None; if indexItemBaseName in labsByBaseName: for labResult in labsByBaseName[indexItemBaseName]: if indexItem is None or labResult["result_time"] < indexItem["result_time"]: indexItem = labResult; if indexItem is not None: # Skip this patient if no index item found, should not be possible since pre-screened for relevant patients indexTime = indexItem["result_time"]; patient = patientById[patientId]; patient["index_time"] = indexTime; preTimeLimit = indexTime+labPreTime; postTimeLimit = indexTime+labPostTime; # Init values for each lab of interest to an empty list for labBaseName in labBaseNames: # Default to null for all values patient["%s.min" % labBaseName] = None; patient["%s.max" % labBaseName] = None; patient["%s.median" % labBaseName] = None; patient["%s.mean" % labBaseName] = None; patient["%s.std" % labBaseName] = None; patient["%s.first" % labBaseName] = None; patient["%s.last" % labBaseName] = None; patient["%s.proximate" % labBaseName] = None; proximateValue = None; if labBaseName in labsByBaseName: # Not all patients will have all labs checked proximateItem = None; # Item closest to the index item in time valueList = list(); for labResult in labsByBaseName[labBaseName]: resultTime = labResult["result_time"]; if preTimeLimit <= resultTime and resultTime < postTimeLimit: # Occurs within time frame of interest, so record this value valueList.append(labResult["ord_num_value"]); if proximateItem is None or (abs(resultTime-indexTime) < abs(proximateItem["result_time"]-indexTime)): proximateItem = labResult; proximateValue = proximateItem["ord_num_value"]; if len(valueList) > 0: patient["%s.min" % labBaseName] = np.min(valueList); patient["%s.max" % labBaseName] = np.max(valueList); patient["%s.median" % labBaseName] = np.median(valueList); patient["%s.mean" % labBaseName] = np.mean(valueList); patient["%s.std" % labBaseName] = np.std(valueList); patient["%s.first" % labBaseName] = valueList[0]; # Assumes previously sorted patient["%s.last" % labBaseName] = valueList[-1]; # by result_time patient["%s.proximate" % labBaseName] = proximateValue; prog.update(); colNames.extend(colsFromLabBaseNames(labBaseNames)); prog.printStatus();
def main(argv): """Main method, callable from command line""" usageStr = "usage: %prog -p <pid> -c <commandStr>\n" parser = OptionParser(usage=usageStr) parser.add_option( "-p", "--pid", dest="pid", help= "Process ID to monitor. As soon as it is no longer found by Unix signal (os.kill(pid,0)), will proceed to execute the given command" ) parser.add_option( "-i", "--interval", dest="interval", default="1", help= "How many seconds to wait before checking if the PID is still active. Default to 1 second." ) parser.add_option( "-c", "--commandStr", dest="commandStr", help= "Command string to execute after the specified PID is longer found. Will just pass whole string to subprocess" ) (options, args) = parser.parse_args(argv[1:]) print >> sys.stderr, "Starting: " + str.join(" ", argv) timer = time.time() if options.pid and options.commandStr: pid = int(options.pid) interval = float(options.interval) prog = ProgressDots(60, 1, "intervals") while pidExists(pid): time.sleep(interval) prog.update() prog.printStatus() print >> sys.stderr, "Executing: ", options.commandStr process = subprocess.Popen(options.commandStr) print >> sys.stderr, "Started process: ", process.pid else: parser.print_help() sys.exit(-1) timer = time.time() - timer print >> sys.stderr, ("%.3f seconds to complete" % timer)
def main(argv): conversionProcessor = STRIDEOrderResultsConversion(); conn = DBUtil.connection(); try: # Pull out list of result names to look for that are not already in the calculated nameTable = DBUtil.execute("select name from sim_result except select base_name from order_result_stat", conn=conn); prog = ProgressDots(big=1,small=1,total=len(nameTable)); for row in nameTable: baseName = row[0]; print("Calculating Stats for %s" % baseName, file=sys.stderr); statModel = conversionProcessor.calculateResultStats( baseName, conn=conn ); DBUtil.insertRow("order_result_stat", statModel, conn=conn ); prog.update(); prog.printStatus(); conn.commit(); finally: conn.close();
def parseClinicalItemFile(itemFile, patientIdCol="patient_id", timeCol="item_date"): prog = ProgressDots(); itemTimesByPatientId = dict(); for itemData in TabDictReader(itemFile): patientId = int(itemData[patientIdCol]); itemTime = DBUtil.parseDateValue(itemData[timeCol]); itemData[patientIdCol] = patientId; itemData[timeCol] = itemTime; if patientId not in itemTimesByPatientId: itemTimesByPatientId[patientId] = list(); itemTimesByPatientId[patientId].append( itemTime ); prog.update(); prog.printStatus(); return itemTimesByPatientId;
def main_quickTest(argv): modelFilename = argv[1] modeler = TopicModel() timer = time.time() (model, docCountByWordId) = modeler.loadModelAndDocCounts(modelFilename) timer = time.time() - timer log.info("%.2f seconds to load", timer) timer = time.time() weightByItemIdByTopicId = modeler.generateWeightByItemIdByTopicId( model, 100) timer = time.time() - timer log.info("%.2f seconds to generate weights", timer) for i in xrange(3): prog = ProgressDots() for (topicId, weightByItemId) in weightByItemIdByTopicId.iteritems(): for (itemId, itemWeight) in weightByItemId.iteritems(): prog.update() prog.printStatus() """
def queryPatientEpisodes(outputFile, extractor): log.info( "Select patient admissions with provider category of Tt Pamf Med (Primary) or Tt Med Univ (Primary)" ) conn = DBUtil.connection() cursor = conn.cursor() try: # # Clinical item category for admission diagnoses # # ADMIT_DX_CATEGORY_ID = 2; # admitDxCategoryId = DBUtil.execute("select clinical_item_category_id from clinical_item_category where description like '%%ADMIT_DX%%'", conn=conn)[0][0]; # # Look for items indicating suspected infection / sepsis # ivAntibioticItemIds = loadIVAntibioticItemIds(extractor); # bloodCultureItemIds = loadBloodCultureItemIds(extractor); # respiratoryViralPanelItemIds = loadRespiratoryViralPanelItemIds(extractor); # # Merge IV antibiotics and blood cultures, respiratory panels as items that suggest sepsis is suspected # suspectSepsisItemIds = ivAntibioticItemIds.union(bloodCultureItemIds.union(respiratoryViralPanelItemIds)); # suspectSepsisItemIdsStr = str.join(',', [str(itemId) for itemId in suspectSepsisItemIds]); # Convert to comma-separated string via a str.join function on list contracture # # Look for primary surgery teams to exclude # excludeTeamCategory = "SurgerySpecialty"; # excludeTreatmentTeams = list(); # for row in extractor.loadMapData("TreatmentTeamGroups"): # if row["team_category"] == excludeTeamCategory: # excludeTreatmentTeams.append(row["treatment_team"]); # query = SQLQuery(); # query.addSelect("clinical_item_id"); # query.addFrom("clinical_item"); # query.addWhereIn("description", excludeTreatmentTeams ); # excludeTeamItemIds = set(); # for row in DBUtil.execute(query, conn=conn): # excludeTeamItemIds.add(row[0]); # excludeTeamItemIdsStr = str.join(',', [str(itemId) for itemId in excludeTeamItemIds]); # Convert to comma-separated string via a str.join function on list contracture # First pass query to get the list of patients and emergency department presentation times cohortQuery = \ """ select adt1.pat_anon_id, adt1.pat_enc_csn_anon_id, adt1.shifted_transf_in_dt_tm as edAdmitTime, adt2.shifted_transf_out_dt_tm as dischargeTime from stride_adt as adt1, stride_adt as adt2 where adt1.pat_anon_id in (select patient_id from patient_item inner join clinical_item on patient_item.clinical_item_id = clinical_item.clinical_item_id where clinical_item.clinical_item_category_id = 161 AND clinical_item.description = '%s') and adt1.pat_enc_csn_anon_id = adt2.pat_enc_csn_anon_id """ % ("Tt Pamf Med (Primary)") print(cohortQuery, file=sys.stderr) cursor.execute(cohortQuery) patientEpisodes = list() patientEpisodeById = dict() # Collect Build basic patient ID and # ED presentation dates and Discharge date/time prog = ProgressDots() row = cursor.fetchone() while row is not None: (patientId, encounterId, edAdmitTime, dischargeTime) = row #patientId = int(patientId); patientEpisode = \ RowItemModel \ ( { "patient_id":patientId, "edAdmitTime":edAdmitTime, "dischargeTime":dischargeTime, "encounter_id":encounterId, "payorTitle": None, # Default encounter data to null in case can't find it later "bpSystolic": None, "bpDiastolic": None, "temperature": None, "pulse": None, "respirations": None, } ) patientEpisodes.append(patientEpisode) if patientEpisode["encounter_id"] not in patientEpisodeById: patientEpisodeById[ patientEpisode["encounter_id"]] = patientEpisode prog.update() row = cursor.fetchone() prog.printStatus() # Second query phase to link to encounter information (e.g., insurance, admitting vital signs) encounterIds = columnFromModelList(patientEpisodes, "encounter_id") query = SQLQuery() query.addSelect("pat_id") query.addSelect("pat_enc_csn_id") query.addSelect("title") query.addSelect("bp_systolic") query.addSelect("bp_diastolic") query.addSelect("temperature") query.addSelect("pulse") query.addSelect("respirations") query.addFrom("stride_patient_encounter") query.addWhereIn("pat_enc_csn_id", encounterIds) cursor.execute(str(query), query.params) row = cursor.fetchone() while row is not None: (patientId, encounterId, payorTitle, bpSystolic, bpDiastolic, temperature, pulse, respirations) = row if encounterId in patientEpisodeById: patientEpisode = patientEpisodeById[encounterId] if patientEpisode["payorTitle"] is None: patientEpisode["payorTitle"] = set() # Single encounters may have multiple payors to track patientEpisode["payorTitle"].add(payorTitle) patientEpisode["bpSystolic"] = bpSystolic patientEpisode["bpDiastolic"] = bpDiastolic patientEpisode["temperature"] = temperature patientEpisode["pulse"] = pulse patientEpisode["respirations"] = respirations row = cursor.fetchone() # Drop results as tab-delimited text output formatter = TextResultsFormatter(outputFile) formatter.formatResultDicts(patientEpisodes, addHeaderRow=True) return patientEpisodes finally: cursor.close() conn.close()
# Select the second (index one) form (the first form is a search query box) #br.select_form(nr=1) # User credentials, single login and then store a cookie #br.form['username'] = '******' #br.form['password'] = '******' # Login #br.submit() # Base URL to query for, with parameters for subsets BASE_URL = 'http://www.samhsa.gov/medication-assisted-treatment/physician-program-data/treatment-physician-locator?field_bup_physician_us_state_value=All&page=%s' BASE_OUTPUT_FILENAME = 'buprenorphinePhysicians.%s.htm'; N_PAGES = 641 #N_PAGES = 10 progress = ProgressDots(big=100,small=2); for iPage in xrange(N_PAGES): sourceURL = BASE_URL % (iPage); sourceFile = br.open(sourceURL); sourceContent = sourceFile.read(); # Just store whole file in memory for simplicity sourceFile.close(); localFilename = BASE_OUTPUT_FILENAME % (iPage); localFile = open(localFilename,"w"); localFile.write(sourceContent); localFile.close(); progress.update(); progress.printStatus();
for procCode, description, count in subResults: print str.join( "\t", [procCode, description, lastBaseName, lastComponentName]) orderProcIds.clear() # Reset once done nIds = 0 lastBaseName = baseName lastComponentName = componentName if nIds < SAMPLE_SIZE: # Don't track all, just the first sample orderProcIds.add(orderProcId) nIds += 1 row = cursor.fetchone() prog.update() # Final base name to query off of subQuery = """ select proc_code, description, count(order_proc_id) from stride_order_proc where order_proc_id in (%s) group by proc_code, description """ % generatePlaceholders(nIds) subResults = DBUtil.execute(subQuery, tuple(orderProcIds), conn=conn) for procCode, description, count in subResults: print str.join("\t", [procCode, description, lastBaseName, lastComponentName]) prog.printStatus()
def main(argv): # Initial pass to get single diagnosis baselines by looking for 2x Dx combos where Dx1 = Dx2 countByDx = dict() countByRxDx = dict() associationFile = open(argv[1]) associationFile.readline() # Dump header row for line in associationFile: line.strip() chunks = line.split("\t") rx = chunks[0] dx1 = chunks[1] dx2 = chunks[2] if dx1 == dx2: rxDxCount = int(chunks[3]) rxCount = int(chunks[4]) dxCount = int(chunks[5]) countByRxDx[(rx, dx1)] = rxDxCount countByDx[dx1] = dxCount # Second pass to now do stats for combo diagnoses, to see if prescription shows difference between 1 or both diagnoses statIds = ( "P-Fisher", "P-YatesChi2", "oddsRatio", "relativeRisk", "interest", "LR+", "LR-", "sensitivity", "specificity", "PPV", "NPV", ) headerCols = [ "Rx", "Dx1", "Dx2", "RxDx1Dx2Count", "RxDx1Count", "RxDx2Count", "RxCount", "Dx1Dx2Count", "Total", "E(RxDx1Dx2Count)", "E(RxDx2Dx1Count)", "E(Dx1Dx2Count)", "P-Chi2-Obs:Exp", ] headerCols.extend(statIds) headerStr = str.join("\t", headerCols) print(headerStr) associationFile = open(argv[1]) associationFile.readline() # Dump header row progress = ProgressDots() for line in associationFile: line.strip() chunks = line.split("\t") rx = chunks[0] dx1 = chunks[1] dx2 = chunks[2] rxDx1Dx2Count = int(chunks[3]) rxDx1Count = countByRxDx[(rx, dx1)] rxDx2Count = countByRxDx[(rx, dx2)] rxCount = int(chunks[4]) dx1Count = countByDx[dx1] dx2Count = countByDx[dx2] dx1dx2Count = int(chunks[5]) totalCount = float(chunks[6]) # Floating point to auto-convert float divisions later conStats = ContingencyStats(rxDx1Dx2Count, rxCount, dx1dx2Count, totalCount) # Expected vs. observed Rx rates dependent on presence of Dx1Dx2 combination based one whether Rx rates per diagnosis are independent of combination observed = array \ ([ (rxDx1Dx2Count), (dx1dx2Count-rxDx1Dx2Count), (rxCount-rxDx1Dx2Count), (totalCount-dx1dx2Count-rxCount+rxDx1Dx2Count), ]) # Expected rates based on assumption that diagnoses occur independently of one another expectedRxDx1Dx2 = (rxDx1Count * dx2Count / totalCount) expectedRxDx2Dx1 = (rxDx2Count * dx1Count / totalCount) expectedDx1Dx2 = (dx1Count * dx2Count / totalCount) expected = array \ ([ (expectedRxDx1Dx2), (expectedDx1Dx2-expectedRxDx1Dx2), (rxCount-expectedRxDx1Dx2), (totalCount-expectedDx1Dx2-rxCount+expectedRxDx1Dx2), ]) (chi2ObsExp, pChi2ObsExp) = chisquare(observed, expected) dataCells = [ rx, dx1, dx2, rxDx1Dx2Count, rxDx1Count, rxDx2Count, rxCount, dx1dx2Count, totalCount, expectedRxDx1Dx2, expectedRxDx2Dx1, expectedDx1Dx2, pChi2ObsExp ] for statId in statIds: try: dataCells.append(conStats[statId]) except ZeroDivisionError: dataCells.append(None) for i, value in enumerate(dataCells): dataCells[i] = str(value) # String conversion to allow for concatenation below dataStr = str.join("\t", dataCells) print(dataStr) progress.update() progress.printStatus()
]; for categoryName in CATEGORY_LIST: for itemPrefix in ITEM_PREFIXES: specificArgv = ["-c", categoryName, "-i", itemPrefix, "results/queryResults.%s.%s.tab.gz" % (itemPrefix, categoryName) ]; specificArgvList.append(specificArgv); prog = ProgressDots(1,1,"Processes",total=len(specificArgvList)); for specificArgv in specificArgvList: key = "%.3d.%s" % (prog.getCounts(), str.join("_",specificArgv) ); key = key.replace("/",".."); # Don't want to use directory separator in temp log file name argv = list(baseArgv) argv.extend(specificArgv); log.info( "Starting: "+str.join(" ", argv) ); logFile = stdOpen(LOG_FILE_TEMPLATE % key,"w") # Blocking sub-process call if want serial processes. #subprocess.call(argv, stderr=logFile); # Non-blocking subprocess.Popen to spawn parallel processes process = subprocess.Popen(argv, stderr=logFile); log.info("Process ID: %s" % process.pid); # Print command lines to effectively generate a .sh script #print "nohup", #print str.join(" ", argv), #print "&>", LOG_FILE_TEMPLATE % key,"&" prog.update(); prog.printStatus();
def queryPatientEpisodes(outputFile, extractor): log.info("Select patient admissions with possible/probable sepsis within 24 hours of admission (long query >60 min?)..."); conn = DBUtil.connection(); cursor = conn.cursor(); try: # Clinical item category for admission diagnoses # ADMIT_DX_CATEGORY_ID = 2; admitDxCategoryId = DBUtil.execute("select clinical_item_category_id from clinical_item_category where description like '%%ADMIT_DX%%'", conn=conn)[0][0]; # Look for items indicating suspected infection / sepsis ivAntibioticItemIds = loadIVAntibioticItemIds(extractor); bloodCultureItemIds = loadBloodCultureItemIds(extractor); respiratoryViralPanelItemIds = loadRespiratoryViralPanelItemIds(extractor); # Merge IV antibiotics and blood cultures, respiratory panels as items that suggest sepsis is suspected suspectSepsisItemIds = ivAntibioticItemIds.union(bloodCultureItemIds.union(respiratoryViralPanelItemIds)); suspectSepsisItemIdsStr = str.join(',', [str(itemId) for itemId in suspectSepsisItemIds]); # Convert to comma-separated string via a str.join function on list contracture # Look for primary surgery teams to exclude excludeTeamCategory = "SurgerySpecialty"; excludeTreatmentTeams = list(); for row in extractor.loadMapData("TreatmentTeamGroups"): if row["team_category"] == excludeTeamCategory: excludeTreatmentTeams.append(row["treatment_team"]); query = SQLQuery(); query.addSelect("clinical_item_id"); query.addFrom("clinical_item"); query.addWhereIn("description", excludeTreatmentTeams ); excludeTeamItemIds = set(); for row in DBUtil.execute(query, conn=conn): excludeTeamItemIds.add(row[0]); excludeTeamItemIdsStr = str.join(',', [str(itemId) for itemId in excludeTeamItemIds]); # Convert to comma-separated string via a str.join function on list contracture # First pass query to get the list of patients and emergency department presentation times cohortQuery = \ """ -- Pick out date(s) when admitted through emergency department and matching discharge time select adt1.pat_anon_id, adt1.pat_enc_csn_anon_id, adt1.shifted_transf_in_dt_tm as edAdmitTime, adt2.shifted_transf_out_dt_tm as dischargeTime from stride_adt as adt1, stride_adt as adt2 where -- Admission event adt1.department_in = 'EMERGENCY DEPARTMENT' and adt1.event_in = 'Admission' and adt1.pat_anon_id in ( -- Select any patient with any suspected sepsis related order (i.e., IV antibiotics or blood cultures) select patient_id from patient_item as pi where pi.clinical_item_id in (%s) except -- Exclude any patient who has been on a primary surgery team select patient_id from patient_item where clinical_item_id in (%s) -- -12434586418575,-12432455207729,-12428492282572,-12428492282572,-12424048595257,-12414081679705 ) and adt1.pat_enc_csn_anon_id = adt2.pat_enc_csn_anon_id and -- Discharge event adt2.event_out = 'Discharge' order by adt1.shifted_transf_in_dt_tm """ % (suspectSepsisItemIdsStr, excludeTeamItemIdsStr); print >> sys.stderr, cohortQuery; cursor.execute(cohortQuery); patientEpisodes = list(); patientEpisodeById = dict(); # Collect Build basic patient ID and # ED presentation dates and Discharge date/time prog = ProgressDots(); row = cursor.fetchone(); while row is not None: (patientId, encounterId, edAdmitTime, dischargeTime) = row; #patientId = int(patientId); patientEpisode = \ RowItemModel \ ( { "patient_id":patientId, "edAdmitTime":edAdmitTime, "dischargeTime":dischargeTime, "encounter_id":encounterId, "payorTitle": None, # Default encounter data to null in case can't find it later "bpSystolic": None, "bpDiastolic": None, "temperature": None, "pulse": None, "respirations": None, } ); patientEpisodes.append(patientEpisode); if patientEpisode["encounter_id"] not in patientEpisodeById: patientEpisodeById[patientEpisode["encounter_id"]] = patientEpisode; prog.update(); row = cursor.fetchone(); prog.printStatus(); # Second query phase to link to encounter information (e.g., insurance, admitting vital signs) encounterIds = columnFromModelList(patientEpisodes, "encounter_id"); query = SQLQuery(); query.addSelect("pat_id"); query.addSelect("pat_enc_csn_id"); query.addSelect("title"); query.addSelect("bp_systolic"); query.addSelect("bp_diastolic"); query.addSelect("temperature"); query.addSelect("pulse"); query.addSelect("respirations"); query.addFrom("stride_patient_encounter"); query.addWhereIn("pat_enc_csn_id", encounterIds); cursor.execute(str(query), query.params); row = cursor.fetchone(); while row is not None: (patientId, encounterId, payorTitle, bpSystolic, bpDiastolic, temperature, pulse, respirations) = row; if encounterId in patientEpisodeById: patientEpisode = patientEpisodeById[encounterId]; if patientEpisode["payorTitle"] is None: patientEpisode["payorTitle"] = set(); # Single encounters may have multiple payors to track patientEpisode["payorTitle"].add(payorTitle); patientEpisode["bpSystolic"] = bpSystolic; patientEpisode["bpDiastolic"] = bpDiastolic; patientEpisode["temperature"] = temperature; patientEpisode["pulse"] = pulse; patientEpisode["respirations"] = respirations; row = cursor.fetchone(); # Drop results as tab-delimited text output formatter = TextResultsFormatter(outputFile); formatter.formatResultDicts(patientEpisodes, addHeaderRow=True); return patientEpisodes; finally: cursor.close(); conn.close();
def main(argv): medIdsByActiveRx = dict() medIdsByActiveRx['Buprenorphine'] = ('125498', '114474', '212560', '114475', '114467', '114468') medIdsByActiveRx['Fentanyl Patch'] = ('2680', '27908', '125379', '27905', '27906', '540107', '540638', '540101', '27907') medIdsByActiveRx['Methadone'] = ('540483', '4953', '4951', '10546', '214468', '15996', '41938', '4954', '4952') medIdsByActiveRx['Hydrocodone'] = ('3724', '4579', '8576', '8577', '8951', '10204', '12543', '13040', '14963', '14965', '14966', '17061', '17927', '19895', '20031', '28384', '29486', '29487', '34505', '34544', '35613', '117862', '204249', '206739') medIdsByActiveRx['Hydromorphone'] = ('2458', '2459', '2464', '2465', '3757', '3758', '3759', '3760', '3761', '10224', '10225', '10226', '10227', '200439', '201094', '201096', '201098', '540125', '540179', '540666') medIdsByActiveRx['Morphine'] = ( '5167', '5168', '5172', '5173', '5176', '5177', '5178', '5179', '5180', '5183', '6977', '10655', '15852', '20908', '20909', '20910', '20914', '20915', '20919', '20920', '20921', '20922', '29464', '30138', '31413', '36140', '36141', '79691', '87820', '89282', '91497', '95244', '96810', '112562', '112564', '115335', '115336', '126132', '198543', '198544', '198623', '201842', '201848', '205011', '206731', '207949', '208896', '540182', '540300') medIdsByActiveRx['Oxycodone'] = ('5940', '5941', '6122', '6981', '10812', '10813', '10814', '14919', '16121', '16123', '16129', '16130', '19187', '26637', '26638', '27920', '27921', '27922', '27923', '28897', '28899', '28900', '31851', '31852', '31863', '31864', '92248', '126939', '200451', '203690', '203691', '203692', '203705', '203706', '203707', '204020', '204021') query = baseQuery() totalPatients = float(DBUtil.execute(query)[0][0]) # print"Total Patients\t%s" % totalPatients # print"======= Dx Groups ==========="; # print"Dx Group\tPt Count\tDx Rate"; patientsPerDxGroup = dict() query = SQLQuery() query.addSelect("count(distinct prob.pat_id) as ptCount") query.addSelect("prob.%s" % DX_COL) query.addFrom("stride_problem_list as prob") query.addWhereOp("prob.noted_date", "<", LIMIT_DATE) query.addGroupBy("prob.%s" % DX_COL) if DOUBLE_DX: query.addSelect("prob2.%s" % DX_COL) query.addFrom("stride_problem_list as prob2") query.addWhere("prob.pat_id = prob2.pat_id") query.addWhereOp("prob2.noted_date", "<", LIMIT_DATE) query.addGroupBy("prob2.%s" % DX_COL) results = DBUtil.execute(query) for row in results: patientCount = row[0] dxGroup = row[1] if DOUBLE_DX: dxGroup = (dxGroup, row[2]) # Composite tuple including second diagnosis patientsPerDxGroup[dxGroup] = patientCount progress = ProgressDots() for activeRx, medIds in medIdsByActiveRx.iteritems(): query = baseQuery() query.addWhereIn("medication_id", medIds) # Baseline prescription rates rxPtCount = DBUtil.execute(query)[0][0] # print"====== Rx Counts ======"; # print"Rx\tPt Count\tRx Rate"; # print"%s\t%s\t%s" % (activeRx, rxPtCount, (rxPtCount/totalPatients)); # print"======== Rx-Dx Association ========"; statIds = ( "P-Fisher", "P-YatesChi2", "oddsRatio", "relativeRisk", "interest", "LR+", "LR-", "sensitivity", "specificity", "PPV", "NPV", ) if progress.getCounts() == 0: headerCols = [ "Rx", "Dx", "RxDxCount", "RxCount", "DxCount", "Total" ] if DOUBLE_DX: headerCols.insert(2, "Dx2") headerCols.extend(statIds) headerStr = str.join("\t", headerCols) print headerStr # Query out per diagnosis group, but do as aggregate grouped query query.addSelect("prob.%s" % DX_COL) query.addFrom("stride_problem_list as prob") query.addWhere("med.pat_id = prob.pat_id") query.addWhereOp("prob.noted_date", "<", LIMIT_DATE) #query.addWhereIn("prob.%s" % DX_COL, dxKeys ); query.addGroupBy("prob.%s" % DX_COL) if DOUBLE_DX: query.addSelect("prob2.%s" % DX_COL) query.addFrom("stride_problem_list as prob2") query.addWhere("prob.pat_id = prob2.pat_id") query.addWhereOp("prob2.noted_date", "<", LIMIT_DATE) query.addGroupBy("prob2.%s" % DX_COL) results = DBUtil.execute(query) for row in results: rxDxPtCount = row[0] dxGroup = row[1] if DOUBLE_DX: dxGroup = (dxGroup, row[2]) # Composite tuple including second diagnosis dxPtCount = patientsPerDxGroup[dxGroup] conStats = ContingencyStats(rxDxPtCount, rxPtCount, dxPtCount, totalPatients) dataCells = [ activeRx, dxGroup, rxDxPtCount, rxPtCount, dxPtCount, totalPatients ] if DOUBLE_DX: dataCells[1] = dxGroup[0] dataCells.insert(2, dxGroup[1]) for statId in statIds: try: dataCells.append(conStats[statId]) except ZeroDivisionError: dataCells.append(None) for i, value in enumerate(dataCells): dataCells[i] = str(value) # String conversion to allow for concatenation below dataStr = str.join("\t", dataCells) print dataStr progress.update() progress.printStatus()