DESCRIPTION_PREFIXES = \ [ "A","B","C","D","E","F","G","H","I", ] specificArgvList = \ [ ] for medRoute in ROUTE_LIST: for descriptionPrefix in DESCRIPTION_PREFIXES: specificArgv = [ "-r", medRoute, "-d", descriptionPrefix, "results/queryResults.%s.%s.tab" % (descriptionPrefix, medRoute) ] specificArgvList.append(specificArgv) prog = ProgressDots(1, 1, "Processes", total=len(specificArgvList)) for specificArgv in specificArgvList: key = "%.3d.%s" % (prog.getCounts(), str.join("_", specificArgv)) key = key.replace("/", "..") # Don't want to use directory separator in temp log file name argv = list(baseArgv) argv.extend(specificArgv) log.info("Starting: " + str.join(" ", argv)) logFile = stdOpen(LOG_FILE_TEMPLATE % key, "w") # Blocking sub-process call if want serial processes. #subprocess.call(argv, stderr=logFile); # Non-blocking subprocess.Popen to spawn parallel processes process = subprocess.Popen(argv, stderr=logFile) log.info("Process ID: %s" % process.pid)
def syncTable(sourceConn, targetConn, syncTableName, rowIDStrSet=None, formatter=None): if formatter is None: idCol = DBUtil.defaultIDColumn(syncTableName) idQuery = "select %s from %s" % (idCol, syncTableName) # Collect all of the IDs known in the target database and store in memory for rapid lookup print("Querying for IDs from Target Database", file=sys.stderr) targetIdTable = DBUtil.execute(idQuery, conn=targetConn) targetIdSet = set() for row in targetIdTable: targetId = row[0] targetIdSet.add(targetId) # Query data out of the source table, but do it by a cursor so we can stream through large data tables print("Querying for Source Data", file=sys.stderr) dataQuery = "select * from %s" % (syncTableName) sourceCursor = sourceConn.cursor() sourceCursor.execute(dataQuery) colNames = DBUtil.columnNamesFromCursor(sourceCursor) targetCursor = targetConn.cursor() insertQuery = None updateQuery = None progress = ProgressDots() row = sourceCursor.fetchone() while row is not None: dataModel = RowItemModel(row, colNames) if rowIDStrSet is None or str(dataModel[idCol]) in rowIDStrSet: if rowIDStrSet is not None: print("Syncing record: %s" % dataModel[idCol], file=sys.stderr) if dataModel[idCol] not in targetIdSet: # Row does not yet exist in target database, need to insert it if insertQuery is None: insertQuery = DBUtil.buildInsertQuery( syncTableName, list(dataModel.keys())) insertParams = list(dataModel.values()) targetCursor.execute(insertQuery, insertParams) else: # Row already exists in target database, just update values if updateQuery is None: updateQuery = DBUtil.buildUpdateQuery( syncTableName, list(dataModel.keys())) updateParams = [] updateParams.extend(list(dataModel.values())) updateParams.append(dataModel[idCol]) targetCursor.execute(updateQuery, updateParams) if progress.GetCounts() % progress.big == 0: targetConn.commit() row = sourceCursor.fetchone() progress.Update() progress.PrintStatus() targetConn.commit() else: ##Do something with thr formatter ##Set up the formatter theFormatter = formatter(syncTableName, targetConn, includeColumnNames=True, autoCommit=True) #Call DB execute res = DBUtil.execute("select * from %s" % syncTableName, includeColumnNames=True, conn=sourceConn, formatter=theFormatter)
stopWords = set(stopWordFile.read().split()) countPerToken = dict() conn = DBUtil.connection() cursor = conn.cursor() try: query = \ """select stride_pager_message_id, date_entered, messaging_id, name, message_text from stride_pager_message where message_text !~ 'covering for ID' limit 100000 """ cursor.execute(query) prog = ProgressDots() row = cursor.fetchone() while row is not None: pagerText = row[-1] tokens = pagerText.split() for iToken, token in enumerate(tokens): tokens[iToken] = token.strip("#?:!.,;").lower() # Remove flanking punctuations. Case insensitive for token in tokens: if token not in stopWords and len( token) > 1: # Skip stop words and single character words if token not in countPerToken: countPerToken[token] = 0 countPerToken[token] += 1 row = cursor.fetchone() prog.update()
def queryPatientEpisodes(outputFile, extractor): log.info( "Select patient admissions with provider category of Tt Pamf Med (Primary) or Tt Med Univ (Primary)" ) conn = DBUtil.connection() cursor = conn.cursor() try: # # Clinical item category for admission diagnoses # # ADMIT_DX_CATEGORY_ID = 2; # admitDxCategoryId = DBUtil.execute("select clinical_item_category_id from clinical_item_category where description like '%%ADMIT_DX%%'", conn=conn)[0][0]; # # Look for items indicating suspected infection / sepsis # ivAntibioticItemIds = loadIVAntibioticItemIds(extractor); # bloodCultureItemIds = loadBloodCultureItemIds(extractor); # respiratoryViralPanelItemIds = loadRespiratoryViralPanelItemIds(extractor); # # Merge IV antibiotics and blood cultures, respiratory panels as items that suggest sepsis is suspected # suspectSepsisItemIds = ivAntibioticItemIds.union(bloodCultureItemIds.union(respiratoryViralPanelItemIds)); # suspectSepsisItemIdsStr = str.join(',', [str(itemId) for itemId in suspectSepsisItemIds]); # Convert to comma-separated string via a str.join function on list contracture # # Look for primary surgery teams to exclude # excludeTeamCategory = "SurgerySpecialty"; # excludeTreatmentTeams = list(); # for row in extractor.loadMapData("TreatmentTeamGroups"): # if row["team_category"] == excludeTeamCategory: # excludeTreatmentTeams.append(row["treatment_team"]); # query = SQLQuery(); # query.addSelect("clinical_item_id"); # query.addFrom("clinical_item"); # query.addWhereIn("description", excludeTreatmentTeams ); # excludeTeamItemIds = set(); # for row in DBUtil.execute(query, conn=conn): # excludeTeamItemIds.add(row[0]); # excludeTeamItemIdsStr = str.join(',', [str(itemId) for itemId in excludeTeamItemIds]); # Convert to comma-separated string via a str.join function on list contracture # First pass query to get the list of patients and emergency department presentation times cohortQuery = \ """ select adt1.pat_anon_id, adt1.pat_enc_csn_anon_id, adt1.shifted_transf_in_dt_tm as edAdmitTime, adt2.shifted_transf_out_dt_tm as dischargeTime from stride_adt as adt1, stride_adt as adt2 where adt1.pat_anon_id in (select patient_id from patient_item inner join clinical_item on patient_item.clinical_item_id = clinical_item.clinical_item_id where clinical_item.clinical_item_category_id = 161 AND clinical_item.description = '%s') and adt1.pat_enc_csn_anon_id = adt2.pat_enc_csn_anon_id """ % ("Tt Pamf Med (Primary)") print >> sys.stderr, cohortQuery cursor.execute(cohortQuery) patientEpisodes = list() patientEpisodeById = dict() # Collect Build basic patient ID and # ED presentation dates and Discharge date/time prog = ProgressDots() row = cursor.fetchone() while row is not None: (patientId, encounterId, edAdmitTime, dischargeTime) = row #patientId = int(patientId); patientEpisode = \ RowItemModel \ ( { "patient_id":patientId, "edAdmitTime":edAdmitTime, "dischargeTime":dischargeTime, "encounter_id":encounterId, "payorTitle": None, # Default encounter data to null in case can't find it later "bpSystolic": None, "bpDiastolic": None, "temperature": None, "pulse": None, "respirations": None, } ) patientEpisodes.append(patientEpisode) if patientEpisode["encounter_id"] not in patientEpisodeById: patientEpisodeById[ patientEpisode["encounter_id"]] = patientEpisode prog.update() row = cursor.fetchone() prog.printStatus() # Second query phase to link to encounter information (e.g., insurance, admitting vital signs) encounterIds = columnFromModelList(patientEpisodes, "encounter_id") query = SQLQuery() query.addSelect("pat_id") query.addSelect("pat_enc_csn_id") query.addSelect("title") query.addSelect("bp_systolic") query.addSelect("bp_diastolic") query.addSelect("temperature") query.addSelect("pulse") query.addSelect("respirations") query.addFrom("stride_patient_encounter") query.addWhereIn("pat_enc_csn_id", encounterIds) cursor.execute(str(query), query.params) row = cursor.fetchone() while row is not None: (patientId, encounterId, payorTitle, bpSystolic, bpDiastolic, temperature, pulse, respirations) = row if encounterId in patientEpisodeById: patientEpisode = patientEpisodeById[encounterId] if patientEpisode["payorTitle"] is None: patientEpisode["payorTitle"] = set() # Single encounters may have multiple payors to track patientEpisode["payorTitle"].add(payorTitle) patientEpisode["bpSystolic"] = bpSystolic patientEpisode["bpDiastolic"] = bpDiastolic patientEpisode["temperature"] = temperature patientEpisode["pulse"] = pulse patientEpisode["respirations"] = respirations row = cursor.fetchone() # Drop results as tab-delimited text output formatter = TextResultsFormatter(outputFile) formatter.formatResultDicts(patientEpisodes, addHeaderRow=True) return patientEpisodes finally: cursor.close() conn.close()
def queryPatientEpisodes(outputFile, extractor): log.info( "Select patient admissions with possible/probable sepsis within 24 hours of admission (long query >60 min?)..." ) conn = DBUtil.connection() cursor = conn.cursor() try: # Clinical item category for admission diagnoses # ADMIT_DX_CATEGORY_ID = 2; admitDxCategoryId = DBUtil.execute( "select clinical_item_category_id from clinical_item_category where description like '%%ADMIT_DX%%'", conn=conn)[0][0] # Look for items indicating suspected infection / sepsis ivAntibioticItemIds = loadIVAntibioticItemIds(extractor) bloodCultureItemIds = loadBloodCultureItemIds(extractor) respiratoryViralPanelItemIds = loadRespiratoryViralPanelItemIds( extractor) # Merge IV antibiotics and blood cultures, respiratory panels as items that suggest sepsis is suspected suspectSepsisItemIds = ivAntibioticItemIds.union( bloodCultureItemIds.union(respiratoryViralPanelItemIds)) suspectSepsisItemIdsStr = str.join( ',', [str(itemId) for itemId in suspectSepsisItemIds]) # Convert to comma-separated string via a str.join function on list contracture # Look for primary surgery teams to exclude excludeTeamCategory = "SurgerySpecialty" excludeTreatmentTeams = list() for row in extractor.loadMapData("TreatmentTeamGroups"): if row["team_category"] == excludeTeamCategory: excludeTreatmentTeams.append(row["treatment_team"]) query = SQLQuery() query.addSelect("clinical_item_id") query.addFrom("clinical_item") query.addWhereIn("description", excludeTreatmentTeams) excludeTeamItemIds = set() for row in DBUtil.execute(query, conn=conn): excludeTeamItemIds.add(row[0]) excludeTeamItemIdsStr = str.join( ',', [str(itemId) for itemId in excludeTeamItemIds]) # Convert to comma-separated string via a str.join function on list contracture # First pass query to get the list of patients and emergency department presentation times cohortQuery = \ """ -- Pick out date(s) when admitted through emergency department and matching discharge time select adt1.pat_anon_id, adt1.pat_enc_csn_anon_id, adt1.shifted_transf_in_dt_tm as edAdmitTime, adt2.shifted_transf_out_dt_tm as dischargeTime from stride_adt as adt1, stride_adt as adt2 where -- Admission event adt1.department_in = 'EMERGENCY DEPARTMENT' and adt1.event_in = 'Admission' and adt1.pat_anon_id in ( -- Select any patient with any suspected sepsis related order (i.e., IV antibiotics or blood cultures) -- select patient_id -- from patient_item as pi -- where pi.clinical_item_id in (%s) -- except -- -- Exclude any patient who has been on a primary surgery team -- select patient_id -- from patient_item -- where clinical_item_id in (%s) -12434586418575,-12432455207729,-12428492282572,-12428492282572,-12424048595257,-12414081679705 ) and adt1.pat_enc_csn_anon_id = adt2.pat_enc_csn_anon_id and -- Discharge event adt2.event_out = 'Discharge' order by adt1.shifted_transf_in_dt_tm """ % (suspectSepsisItemIdsStr, excludeTeamItemIdsStr) print >> sys.stderr, cohortQuery cursor.execute(cohortQuery) patientEpisodes = list() patientEpisodeById = dict() # Collect Build basic patient ID and # ED presentation dates and Discharge date/time prog = ProgressDots() row = cursor.fetchone() while row is not None: (patientId, encounterId, edAdmitTime, dischargeTime) = row #patientId = int(patientId); patientEpisode = \ RowItemModel \ ( { "patient_id":patientId, "edAdmitTime":edAdmitTime, "dischargeTime":dischargeTime, "encounter_id":encounterId, "payorTitle": None, # Default encounter data to null in case can't find it later "bpSystolic": None, "bpDiastolic": None, "temperature": None, "pulse": None, "respirations": None, } ) patientEpisodes.append(patientEpisode) if patientEpisode["encounter_id"] not in patientEpisodeById: patientEpisodeById[ patientEpisode["encounter_id"]] = patientEpisode prog.update() row = cursor.fetchone() prog.printStatus() # Second query phase to link to encounter information (e.g., insurance, admitting vital signs) encounterIds = columnFromModelList(patientEpisodes, "encounter_id") query = SQLQuery() query.addSelect("pat_id") query.addSelect("pat_enc_csn_id") query.addSelect("title") query.addSelect("bp_systolic") query.addSelect("bp_diastolic") query.addSelect("temperature") query.addSelect("pulse") query.addSelect("respirations") query.addFrom("stride_patient_encounter") query.addWhereIn("pat_enc_csn_id", encounterIds) cursor.execute(str(query), query.params) row = cursor.fetchone() while row is not None: (patientId, encounterId, payorTitle, bpSystolic, bpDiastolic, temperature, pulse, respirations) = row if encounterId in patientEpisodeById: patientEpisode = patientEpisodeById[encounterId] if patientEpisode["payorTitle"] is None: patientEpisode["payorTitle"] = set() # Single encounters may have multiple payors to track patientEpisode["payorTitle"].add(payorTitle) patientEpisode["bpSystolic"] = bpSystolic patientEpisode["bpDiastolic"] = bpDiastolic patientEpisode["temperature"] = temperature patientEpisode["pulse"] = pulse patientEpisode["respirations"] = respirations row = cursor.fetchone() # Drop results as tab-delimited text output formatter = TextResultsFormatter(outputFile) formatter.formatResultDicts(patientEpisodes, addHeaderRow=True) return patientEpisodes finally: cursor.close() conn.close()
from medinfo.db import DBUtil; from medinfo.db.Model import SQLQuery; from medinfo.common.Util import ProgressDots; from medinfo.common.Util import log; conn = DBUtil.connection(); try: results = DBUtil.execute("select clinical_item_id from clinical_item where clinical_item_category_id = 161",conn=conn); clinicalItemIds = tuple([row[0] for row in results]); log.info("Deleting for %s Clinical Items" % len(clinicalItemIds) ); query = SQLQuery(); query.addSelect("patient_item_id"); query.addFrom("patient_item"); query.addWhereIn("clinical_item_id", clinicalItemIds ); prog = ProgressDots(); prog.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0]; # Go ahead and load full result set into memory, so don't have potential concurrency issues with deleting items as traversing them results = DBUtil.execute(query, conn=conn ); for row in results: patientItemId = row[0]; DBUtil.execute("delete from patient_item where patient_item_id = %s", (patientItemId,), conn=conn); prog.update(); prog.printStatus(); finally: conn.close();
"""Helped script for parallel process debugging. Just takes one parameter, sleeps for that many seconds, then completes""" import sys from medinfo.common.Util import ProgressDots import time duration = int(sys.argv[1]) progress = ProgressDots() for i in xrange(duration): progress.update() progress.printStatus()
def __call__(self, analysisQuery, conn=None): """Go through the validation file to assess order set usage amongst test cases """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: conn = DBUtil.connection() preparer = PreparePatientItems() progress = ProgressDots(50, 1, "Patients") for patientItemData in preparer.loadPatientItemData(analysisQuery): patientId = patientItemData["patient_id"] analysisResults = \ self.analyzePatientItems \ ( patientItemData, analysisQuery, analysisQuery.baseRecQuery, patientId, analysisQuery.recommender, conn=conn ) if analysisResults is not None: (queryItemCountById, verifyItemCountById, recommendedItemIds, recommendedData, orderSetItemData) = analysisResults # Unpack results # Start aggregating and calculating result stats resultsStatData = self.calculateResultStats( patientItemData, queryItemCountById, verifyItemCountById, recommendedItemIds, self.supportRecommender.patientCountByItemId, analysisQuery.baseRecQuery, recommendedData) resultsStatData["usedOrderSetIds"] = orderSetItemData[ "allUsedOrderSetIds"] resultsStatData["numUsedOrderSets"] = len( orderSetItemData["allUsedOrderSetIds"]) resultsStatData["numUsedOrderSetItems"] = len( orderSetItemData["allUsedOrderSetItemIds"]) resultsStatData["numAvailableOrderSetItems"] = len( orderSetItemData["allAvailableOrderSetItemIds"]) resultsStatData["numRecommendableUsedOrderSetItems"] = len( orderSetItemData["recommendableUsedOrderSetItemIds"]) resultsStatData[ "numRecommendableAvailableOrderSetItems"] = len( orderSetItemData[ "recommendableAvailableOrderSetItemIds"]) resultsStatData["numRecommendableQueryItems"] = len( orderSetItemData["recommendableQueryItemIds"]) resultsStatData["numRecommendableVerifyItems"] = len( orderSetItemData["recommendableVerifyItemIds"]) resultsStatData["numRecommendableQueryVerifyItems"] = len( orderSetItemData["recommendableQueryItemIds"] | orderSetItemData["recommendableVerifyItemIds"]) # Union of two sets resultsStatData["orderSetItemUsageRate"] = 0.0 if resultsStatData["numAvailableOrderSetItems"] > 0: resultsStatData["orderSetItemUsageRate"] = float( resultsStatData["numUsedOrderSetItems"] ) / resultsStatData["numAvailableOrderSetItems"] resultsStatData[ "recommendableQueryVerifyItemFromOrderSetRate"] = 0.0 if resultsStatData["numRecommendableQueryVerifyItems"] > 0: resultsStatData[ "recommendableQueryVerifyItemFromOrderSetRate"] = float( resultsStatData[ "numRecommendableUsedOrderSetItems"] ) / resultsStatData[ "numRecommendableQueryVerifyItems"] yield resultsStatData progress.Update() # progress.PrintStatus(); finally: if not extConn: conn.close()