from medinfo.common.test.Util import MedInfoTestCase; from medinfo.db import DBUtil from medinfo.db.Model import SQLQuery; # Create SCRIPT_FILE # clinical_item_category_id = 13 (Discharge) # clinical_item_category_id = 23 (Admission) # H&P Note SCRIPT_FILE = StringIO() SCRIPT_FILE.write("psql stride jwang198") DATA_QUERY = SQLQuery(); DATA_QUERY.addSelect("author_name") DATA_QUERY.addSelect("pat_id") DATA_QUERY.addSelect("note_date") DATA_QUERY.addSelect("note_type") DATA_QUERY.addSelect("provider_type") DATA_QUERY.addSelect("specialty") DATA_QUERY.addSelect("pat_enc_csn_id") DATA_QUERY.addFrom("stride_note") DATA_QUERY.addWhereEqual("note_type", "H&P") DATA_QUERY.addOrderBy("author_name", dir="asc") print(DATA_QUERY) # Write out data to CSV DBUtil.runDBScript(SCRIPT_FILE, False) results = DBUtil.execute(DATA_QUERY);
def loadPatientInfo(self, patientIds=None, relativeTime=None, conn=None): """Load basic information about the specified patients. Report patient state at given time, or default to time zero """ if relativeTime is None: relativeTime = 0 # Just look for time zero default then extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("sp.sim_patient_id") query.addSelect("sp.name") query.addSelect("sp.age_years") query.addSelect("sp.gender") query.addSelect("s.sim_state_id") query.addSelect("s.name as state_name") query.addSelect("s.description as state_description") query.addSelect("sps.relative_time_start") query.addSelect("sps.relative_time_end") query.addFrom("sim_patient as sp") query.addFrom("sim_patient_state as sps") query.addFrom("sim_state as s") query.addWhere("sp.sim_patient_id = sps.sim_patient_id") query.addWhere("sps.sim_state_id = s.sim_state_id") if patientIds is not None: query.addWhereIn("sp.sim_patient_id", patientIds) # Look for the state that matches the given relative time offset query.addWhereOp("sps.relative_time_start", "<=", relativeTime) query.openWhereOrClause() query.addWhere("sps.relative_time_end is null") query.addWhereOp("sps.relative_time_end", ">", relativeTime) query.closeWhereOrClause() query.addOrderBy("sp.name") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) if len(dataModels) > 0: # Secondary query to build lookup table of possible state transition options from patient current states subQuery = SQLQuery() subQuery.addSelect("pre_state_id") subQuery.addSelect("post_state_id") subQuery.addSelect("clinical_item_id") subQuery.addSelect("time_trigger") subQuery.addFrom("sim_state_transition as sst") subQuery.addWhereIn( "pre_state_id", columnFromModelList(dataModels, "sim_state_id")) subResults = DBUtil.execute(subQuery, conn=conn) # For each pre-state, track which clinical items or times trigger which post-states postStateIdByItemIdByPreStateId = dict() postStateIdTimeTriggerByPreStateId = dict() for preStateId, postStateId, itemId, timeTrigger in subResults: if preStateId not in postStateIdByItemIdByPreStateId: postStateIdByItemIdByPreStateId[preStateId] = dict() postStateIdByItemIdByPreStateId[preStateId][ itemId] = postStateId if timeTrigger is not None: postStateIdTimeTriggerByPreStateId[preStateId] = ( postStateId, timeTrigger) # Record in patient result models for retrieval for i, dataModel in enumerate(dataModels): patientId = dataModel["sim_patient_id"] stateId = dataModel["sim_state_id"] dataModel["postStateIdByItemId"] = dict() if stateId in postStateIdByItemIdByPreStateId: dataModel[ "postStateIdByItemId"] = postStateIdByItemIdByPreStateId[ stateId] dataModel["postStateIdTimeTriggerByPreStateId"] = dict() if stateId in postStateIdTimeTriggerByPreStateId: dataModel[ "postStateIdTimeTrigger"] = postStateIdTimeTriggerByPreStateId[ stateId] if dataModel[ "relative_time_end"] is None and "postStateIdTimeTrigger" in dataModel: # Check that we haven't passed (and should thus trigger) a time-based state transition (postStateId, timeTrigger) = dataModel["postStateIdTimeTrigger"] preStateTime = dataModel["relative_time_start"] postStateTriggerTime = (preStateTime + timeTrigger) if postStateTriggerTime <= relativeTime: # Trigger state transition just by time elapsed #print >> sys.stderr, relativeTime, preStateTime, stateId, postStateTriggerTime, postStateId self.recordStateTransition(patientId, stateId, postStateId, postStateTriggerTime, conn=conn) # State change which can yield new triggers, so recursively reload. # Small risk of infinite recusion if timeTriggers are zero. Otherwise, should converge as each recursion will update the preState relativeTimeStart dataModels[i] = self.loadPatientInfo([patientId], relativeTime, conn=conn)[0] return dataModels finally: if not extConn: conn.close()
def loadResults(self, patientId, relativeTime, conn=None): """Load all results active by the given relativeTime. Will look for sim_patient_state times and sim_patient_order for diagnostic orders, to extrapolate all state-specific results for each order, or using default values if no state specific ones available. """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: # First query for all expected result labels and states, without state-specific values as # may want outer join behvaior against default state values query = SQLQuery() query.addSelect("distinct spo.sim_state_id") query.addSelect("sr.sim_result_id") query.addSelect("sr.name") query.addSelect("sr.description") query.addSelect("sr.priority") query.addSelect("sr.group_string") query.addSelect("spo.relative_time_start") query.addSelect("sorm.turnaround_time") query.addSelect( "(spo.relative_time_start + sorm.turnaround_time) as result_relative_time" ) query.addFrom("sim_patient_order as spo") query.addFrom("sim_order_result_map as sorm") query.addFrom("sim_result as sr") query.addWhere("spo.clinical_item_id = sorm.clinical_item_id") query.addWhere("sorm.sim_result_id = sr.sim_result_id") query.addWhereEqual("spo.sim_patient_id", patientId) # Only unlock results if appropiate prereq orders were placed in the past (and longer than the turnaround time) query.addWhereOp("spo.relative_time_start + sorm.turnaround_time", "<=", relativeTime) # Also check that the triggering order was not cancelled before the completion of the turnaround time query.addWhere( "( spo.relative_time_end is null or spo.relative_time_start + sorm.turnaround_time <= spo.relative_time_end )" ) query.addOrderBy("result_relative_time") query.addOrderBy("sr.priority") resultTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) resultModels = modelListFromTable(resultTable) # Pass through results to get set of states to search for stateIds = set([DEFAULT_STATE_ID ]) # Include default state to fall back on for resultModel in resultModels: stateIds.add(resultModel["sim_state_id"]) # Second query for state-specific values valueQuery = SQLQuery() valueQuery.addSelect("ssr.sim_state_id") valueQuery.addSelect("ssr.sim_result_id") valueQuery.addSelect("ssr.num_value") valueQuery.addSelect("ssr.num_value_noise") valueQuery.addSelect("ssr.text_value") valueQuery.addSelect("ssr.result_flag") valueQuery.addSelect( "ssr.clinical_item_id" ) # Output clinical item if result flag means something valueQuery.addFrom("sim_state_result as ssr") valueQuery.addWhereIn("ssr.sim_state_id", stateIds) valueTable = DBUtil.execute(valueQuery, includeColumnNames=True, conn=conn) valueModels = modelListFromTable(valueTable) # Store in-memory dictionary for rapid cross-referencing "join" to result table valueModelByStateIdByResultId = dict() for valueModel in valueModels: resultId = valueModel["sim_result_id"] stateId = valueModel["sim_state_id"] if resultId not in valueModelByStateIdByResultId: valueModelByStateIdByResultId[resultId] = dict() valueModelByStateIdByResultId[resultId][stateId] = valueModel # Now go back through original results and join up state-specific values, or use default values if needed resultValueModels = list() for resultModel in resultModels: resultId = resultModel["sim_result_id"] stateId = resultModel["sim_state_id"] if resultId in valueModelByStateIdByResultId: valueModelByStateId = valueModelByStateIdByResultId[ resultId] if stateId in valueModelByStateId: # Have a state-specific value, populate that valueModel = valueModelByStateId[stateId] resultModel.update(valueModel) elif DEFAULT_STATE_ID in valueModelByStateId: # No state-specific value, but have a default one to populate instead valueModel = valueModelByStateId[DEFAULT_STATE_ID] resultModel.update(valueModel) resultValueModels.append(resultModel) else: # No result information available, even in default state. Skip these #resultModel["num_value"] = None #resultModel["num_value_noise"] = None #resultModel["text_value"] = None #resultModel["result_flag"] = None #resultModel["clinical_item_id"] = None pass return resultValueModels finally: if not extConn: conn.close()
def queryPatientEpisodes(outputFile, extractor): log.info( "Select patient admissions with provider category of Tt Pamf Med (Primary) or Tt Med Univ (Primary)" ) conn = DBUtil.connection() cursor = conn.cursor() try: # # Clinical item category for admission diagnoses # # ADMIT_DX_CATEGORY_ID = 2; # admitDxCategoryId = DBUtil.execute("select clinical_item_category_id from clinical_item_category where description like '%%ADMIT_DX%%'", conn=conn)[0][0]; # # Look for items indicating suspected infection / sepsis # ivAntibioticItemIds = loadIVAntibioticItemIds(extractor); # bloodCultureItemIds = loadBloodCultureItemIds(extractor); # respiratoryViralPanelItemIds = loadRespiratoryViralPanelItemIds(extractor); # # Merge IV antibiotics and blood cultures, respiratory panels as items that suggest sepsis is suspected # suspectSepsisItemIds = ivAntibioticItemIds.union(bloodCultureItemIds.union(respiratoryViralPanelItemIds)); # suspectSepsisItemIdsStr = str.join(',', [str(itemId) for itemId in suspectSepsisItemIds]); # Convert to comma-separated string via a str.join function on list contracture # # Look for primary surgery teams to exclude # excludeTeamCategory = "SurgerySpecialty"; # excludeTreatmentTeams = list(); # for row in extractor.loadMapData("TreatmentTeamGroups"): # if row["team_category"] == excludeTeamCategory: # excludeTreatmentTeams.append(row["treatment_team"]); # query = SQLQuery(); # query.addSelect("clinical_item_id"); # query.addFrom("clinical_item"); # query.addWhereIn("description", excludeTreatmentTeams ); # excludeTeamItemIds = set(); # for row in DBUtil.execute(query, conn=conn): # excludeTeamItemIds.add(row[0]); # excludeTeamItemIdsStr = str.join(',', [str(itemId) for itemId in excludeTeamItemIds]); # Convert to comma-separated string via a str.join function on list contracture # First pass query to get the list of patients and emergency department presentation times cohortQuery = \ """ select adt1.pat_anon_id, adt1.pat_enc_csn_anon_id, adt1.shifted_transf_in_dt_tm as edAdmitTime, adt2.shifted_transf_out_dt_tm as dischargeTime from stride_adt as adt1, stride_adt as adt2 where adt1.pat_anon_id in (select patient_id from patient_item inner join clinical_item on patient_item.clinical_item_id = clinical_item.clinical_item_id where clinical_item.clinical_item_category_id = 161 AND clinical_item.description = '%s') and adt1.pat_enc_csn_anon_id = adt2.pat_enc_csn_anon_id """ % ("Tt Pamf Med (Primary)") print(cohortQuery, file=sys.stderr) cursor.execute(cohortQuery) patientEpisodes = list() patientEpisodeById = dict() # Collect Build basic patient ID and # ED presentation dates and Discharge date/time prog = ProgressDots() row = cursor.fetchone() while row is not None: (patientId, encounterId, edAdmitTime, dischargeTime) = row #patientId = int(patientId); patientEpisode = \ RowItemModel \ ( { "patient_id":patientId, "edAdmitTime":edAdmitTime, "dischargeTime":dischargeTime, "encounter_id":encounterId, "payorTitle": None, # Default encounter data to null in case can't find it later "bpSystolic": None, "bpDiastolic": None, "temperature": None, "pulse": None, "respirations": None, } ) patientEpisodes.append(patientEpisode) if patientEpisode["encounter_id"] not in patientEpisodeById: patientEpisodeById[ patientEpisode["encounter_id"]] = patientEpisode prog.update() row = cursor.fetchone() prog.printStatus() # Second query phase to link to encounter information (e.g., insurance, admitting vital signs) encounterIds = columnFromModelList(patientEpisodes, "encounter_id") query = SQLQuery() query.addSelect("pat_id") query.addSelect("pat_enc_csn_id") query.addSelect("title") query.addSelect("bp_systolic") query.addSelect("bp_diastolic") query.addSelect("temperature") query.addSelect("pulse") query.addSelect("respirations") query.addFrom("stride_patient_encounter") query.addWhereIn("pat_enc_csn_id", encounterIds) cursor.execute(str(query), query.params) row = cursor.fetchone() while row is not None: (patientId, encounterId, payorTitle, bpSystolic, bpDiastolic, temperature, pulse, respirations) = row if encounterId in patientEpisodeById: patientEpisode = patientEpisodeById[encounterId] if patientEpisode["payorTitle"] is None: patientEpisode["payorTitle"] = set() # Single encounters may have multiple payors to track patientEpisode["payorTitle"].add(payorTitle) patientEpisode["bpSystolic"] = bpSystolic patientEpisode["bpDiastolic"] = bpDiastolic patientEpisode["temperature"] = temperature patientEpisode["pulse"] = pulse patientEpisode["respirations"] = respirations row = cursor.fetchone() # Drop results as tab-delimited text output formatter = TextResultsFormatter(outputFile) formatter.formatResultDicts(patientEpisodes, addHeaderRow=True) return patientEpisodes finally: cursor.close() conn.close()
def _query_patient_episodes(self): log.info('Querying patient episodes...') # Initialize DB cursor. cursor = self._connection.cursor() # Build parameters for query. self._lab_components = self._get_components_in_lab_panel() random_patient_list = self._get_random_patient_list() #print("RANDOM PATIENT LIST", random_patient_list) # Build SQL query for list of patient episodes. # Note that for 2008-2014 data, result_flag can take on any of the # following values: High, Low, High Panic, Low Panic, # Low Off-Scale, Negative, Positive, Resistant, Susceptible, Abnormal, * # (NONE): Only 27 lab components can have this flag. None has this # value for more than 5 results, so ignore it. # *: Only 10 lab components can have this flag. Only 6 have it for # >10 tests, and each of them is a microbiology test for which # a flag is less meaningful, e.g. Gram Stain, AFB culture. # Susceptible: Only 15 lab components can have this flag. All of those # only have this value for 2 results, so ignore it. # Resistant: Only 1 lab component can have this flag. Only two results # have this value, so ignore it. # Abnormal: 1462 lab components can have this flag. Many (e.g. UBLOOD) # have this value for thousands of results, so include it. # Negative: Only 10 lab components can have this flag, and all for # less than 5 results, so ignore it. # Positive: Only 3 lab components can have this flag, and all for # only 1 result, so ignore it. # Low Off-Scale: Only 1 lab component can have this flag, and only for # 3 results, so ignore it. # Low Panic: 1401 lab components can have this flag, many core # metabolic components. Include it. # High Panic: 8084 lab components can have this flag, many core # metabolic components. Include it. query = SQLQuery() query.addSelect('CAST(pat_anon_id AS BIGINT) AS pat_id') query.addSelect('CAST(shifted_order_time AS TIMESTAMP)') query.addSelect('stride_culture_micro.proc_code') query.addSelect('organism_name') #one for the result # Experimenting susceptibility_flags = [ 'Trimethoprim/Sulfamethoxazole', 'Vancomycin', 'Penicillin', 'Levofloxacin', 'Clindamycin', 'Ceftriaxone', 'Erythromycin', 'Ampicillin', 'Meropenem', 'Ciprofloxacin', 'Cefepime', 'Aztreonam.', 'Ampicillin/Sulbactam', 'Piperacillin/Tazobactam', 'Linezolid', 'Oxacillin.', 'Cefazolin', 'Daptomycin' ] for med in susceptibility_flags: med_col = med.replace('/', '_').replace('.', '') query.addSelect( "MAX(CASE WHEN antibiotic_name = '%s' AND (suseptibility = 'Susceptible' OR suseptibility = 'Positive') THEN 1 ELSE 0 END) AS %s_Susc" % (med, med_col)) query.addSelect( "MAX(CASE WHEN antibiotic_name = '%s' THEN 1 ELSE 0 END) as %s_tested" % (med, med_col)) # Let us look at top 10 commonly occuring bacteria query.addSelect( "CASE WHEN organism_name IS NULL THEN 0 ELSE 1 END AS BACTERIA_PRESENT" ) query.addSelect( "CASE WHEN organism_name = 'ESCHERICHIA COLI' THEN 1 ELSE 0 END AS ESCHERICHIA_COLI" ) query.addSelect( "CASE WHEN organism_name = 'STAPHYLOCOCCUS AUREUS' THEN 1 ELSE 0 END AS STAPHYLOCOCCUS_AUREUS" ) query.addSelect( "CASE WHEN organism_name = 'ENTEROCOCCUS SPECIES' THEN 1 ELSE 0 END AS ENTEROCOCCUS_SPECIES" ) query.addSelect( "CASE WHEN organism_name = 'KLEBSIELLA PNEUMONIAE' THEN 1 ELSE 0 END AS KLEBSIELLA_PNEUMONIAE" ) query.addSelect( "CASE WHEN organism_name = 'PSEUDOMONAS AERUGINOSA' THEN 1 ELSE 0 END AS PSEUDOMONAS_AERUGINOSA" ) query.addSelect( "CASE WHEN organism_name = 'COAG NEGATIVE STAPHYLOCOCCUS' THEN 1 ELSE 0 END AS COAG_NEGATIVE_STAPHYLOCOCCUS" ) query.addSelect( "CASE WHEN organism_name = 'ENTEROCOCCUS FAECALIS' THEN 1 ELSE 0 END AS ENTEROCOCCUS_FAECALIS" ) query.addSelect( "CASE WHEN organism_name = 'PROTEUS MIRABILIS' THEN 1 ELSE 0 END AS PROTEUS_MIRABILIS" ) query.addSelect( "CASE WHEN organism_name = 'CANDIDA ALBICANS' THEN 1 ELSE 0 END AS CANDIDA_ALBICANS" ) query.addFrom('stride_culture_micro') query.addWhereIn("stride_culture_micro.proc_code", self._lab_panel) query.addWhereIn("pat_anon_id", random_patient_list) query.addGroupBy('pat_anon_id') query.addGroupBy('shifted_order_time') query.addGroupBy('stride_culture_micro.proc_code') query.addGroupBy('organism_name') query.addOrderBy('pat_anon_id') query.addOrderBy('shifted_order_time') query.addOrderBy('stride_culture_micro.proc_code') query.addOrderBy('organism_name') query.setLimit(self._num_requested_episodes) self._num_reported_episodes = FeatureMatrix._query_patient_episodes( self, query, pat_id_col='pat_id', index_time_col='shifted_order_time')
from Const import RUNNER_VERBOSITY from Util import log from Util import DBTestCase os.chdir('/Users/jwang/Desktop/ClinicalDecisionMaker') from medinfo.common.test.Util import MedInfoTestCase from medinfo.db import DBUtil from medinfo.db.Model import SQLQuery # Create SCRIPT_FILE SCRIPT_FILE = StringIO() SCRIPT_FILE.write("psql medinfo jwang198") DATA_QUERY = SQLQuery() # From patient_item or clinical_item DATA_QUERY.addSelect("clinical_item_id") # DATA_QUERY.addSelect("clinical_item_category_id") # DATA_QUERY.addSelect('name') DATA_QUERY.addSelect("description") # DATA_QUERY.addSelect("item_count") # Join DATA_QUERY.addFrom("clinical_item") DATA_QUERY.addWhereEqual("clinical_item_category_id", "2") DATA_QUERY.addOrderBy("item_count", dir="DESC") print(DATA_QUERY) # Write out data to CSV DBUtil.runDBScript(SCRIPT_FILE, False)
os.chdir('/Users/jwang/Desktop/ClinicalDecisionMaker') from medinfo.common.test.Util import MedInfoTestCase from medinfo.db import DBUtil from medinfo.db.Model import SQLQuery # Create SCRIPT_FILE SCRIPT_FILE = StringIO() SCRIPT_FILE.write("psql medinfo jwang198") # Find all clinical_item_ids associated with each patient_id # SELECT * FROM clinical_item WHERE clinical_item_category_id = 161; DATA_QUERY = SQLQuery() # From patient_item or clinical_item DATA_QUERY.addSelect("patient_id") DATA_QUERY.addSelect("clinical_item_category_id") DATA_QUERY.addSelect('name') DATA_QUERY.addSelect("description") # Join DATA_QUERY.addFrom("patient_item") DATA_QUERY.addJoin( "clinical_item", "patient_item.clinical_item_id = clinical_item.clinical_item_id", joinType="INNER") DATA_QUERY.addWhereEqual("clinical_item_category_id = 161 AND description", "Tt Med Univ (Primary)") # Everyone #DATA_QUERY.addWhereEqual("clinical_item_category_id = 161 AND description", "Tt Pamf Med (Primary)") # Expert DATA_QUERY.addOrderBy("patient_id", dir="ASC")
def grade_cases(self, sim_patient_ids, sim_grader_id, conn=None): """Given the identifiers for a bunch of simulated physician-patient case records, and the identifier for a particular grading key to use, calculate what grade each case would get based on the choices made and return a dictionary of case grades (keyed by the case ID). """ ext_conn = True if conn is None: conn = self.connFactory.connection() ext_conn = False try: # Inner query retrieves physician-patient cases with ranking group_names (to later select first) # per case for specified cases. Each NULL group_name is treated as a separate group by assigning it # sim_patient_order_id. It also omits Default user (sim_user_id = 0) from grading. inner_query = SQLQuery() inner_query.addSelect("score") inner_query.addSelect( "rank() over (" # ranks rows incrementally in the same group " partition by coalesce(group_name, sim_patient_order_id::text), sim_patient_id" " order by sim_patient_order_id" ")") inner_query.addSelect("sim_user_id") inner_query.addSelect("sim_patient_id") inner_query.addSelect("sim_grader_id") inner_query.addFrom("sim_patient_order spo") inner_query.addJoin( "sim_grading_key sgk", "sgk.clinical_item_id = spo.clinical_item_id" " and sgk.sim_state_id = spo.sim_state_id") inner_query.addWhereEqual("sgk.sim_grader_id", sim_grader_id) inner_query.addWhereNotEqual( "spo.sim_user_id", 0) # 0 = ignore 'Default user', sets up initial cases inner_query.addWhereIn("spo.sim_patient_id", sim_patient_ids) inner_query.addOrderBy("relative_time_start") inner_query.addOrderBy("sim_patient_order_id") # Outer query sums the score per patient case and selects most graded physician for the case. # Theoretically, it isn't necessarily the most active physician for the case since his orders # might have been dropped by selecting only the first record within group_name group. query = SQLQuery() query.addSelect("sim_patient_id") query.addSelect("sim_grader_id") query.addSelect("sum(score) as total_score") query.addSelect( "mode() within group (" # mode() selects most frequent value within group " order by sim_user_id" ") as most_graded_user_id") query.addFrom("(" + str(inner_query) + ") as ranked_groups") query.addWhereEqual("ranked_groups.rank", 1) # count only first order in the same group query.addGroupBy("sim_patient_id") query.addGroupBy("sim_grader_id") query_params = inner_query.getParams() + query.getParams() grades_table = DBUtil.execute(query, query_params, includeColumnNames=True, conn=conn) grades_model = modelListFromTable(grades_table) # get most active users for the cases most_active_user_query = SQLQuery() most_active_user_query.addSelect("sim_patient_id") most_active_user_query.addSelect("mode() within group (" " order by sim_user_id" ") as most_active_user_id") most_active_user_query.addFrom("sim_patient_order") most_active_user_query.addWhereNotEqual("sim_user_id", 0) # ignore Default user most_active_user_query.addWhereIn("sim_patient_id", sim_patient_ids) most_active_user_query.addGroupBy("sim_patient_id") most_active_user_query.addOrderBy("sim_patient_id") most_active_user_table = DBUtil.execute(most_active_user_query, includeColumnNames=True, conn=conn) most_active_user_model = modelListFromTable(most_active_user_table) # make a dict by sim_patient_id out of results - will be used for combining most_active_user_dict = { most_active_user["sim_patient_id"]: most_active_user for most_active_user in most_active_user_model } # combine results complete_grades = [ grade.update(most_active_user_dict[grade["sim_patient_id"]]) for grade in grades_model ] return complete_grades finally: if not ext_conn: conn.close()
def querySourceItems(self, startDate=None, endDate=None, progress=None, conn=None): """Query the database for list of all source clinical items (lab results in this case) and yield the results one at a time. If startDate provided, only return items whose result_time is on or after that date. Only include results records where the result_flag is set to an informative value, to focus only on abnormal lab results (including would be a ton more relatively uninformative data that would greatly expend data space and subsequent computation time) """ extConn = conn is not None if not extConn: conn = self.connFactory.connection() # Column headers to query for that map to respective fields in analysis table headers = [ "sor.order_proc_id", "pat_id", "pat_enc_csn_id", "order_type", "proc_id", "proc_code", "base_name", "component_name", "common_name", "ord_num_value", "result_flag", "result_in_range_yn", "sor.result_time" ] query = SQLQuery() for header in headers: query.addSelect(header) query.addFrom("stride_order_proc as sop") query.addFrom("%s as sor" % SOURCE_TABLE) query.addWhere("sop.order_proc_id = sor.order_proc_id") #query.addWhere("result_flag <> '*'"); # Will exclude nulls and the uninformative '*' values for text-based microbiology results if startDate is not None: query.addWhereOp("sor.result_time", ">=", startDate) if endDate is not None: query.addWhereOp("sor.result_time", "<", endDate) # Query to get an estimate of how long the process will be if progress is not None: progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0] cursor = conn.cursor() # Do one massive query, but yield data for one item at a time. cursor.execute(str(query), tuple(query.params)) row = cursor.fetchone() while row is not None: rowModel = RowItemModel(row, headers) # Normalize qualified labels rowModel["order_proc_id"] = rowModel["sor.order_proc_id"] rowModel["result_time"] = rowModel["sor.result_time"] if rowModel['base_name'] is None: row = cursor.fetchone() continue self.populateResultFlag(rowModel, conn=conn) yield rowModel # Yield one row worth of data at a time to avoid having to keep the whole result set in memory row = cursor.fetchone() # Slight risk here. Normally DB connection closing should be in finally of a try block, # but using the "yield" generator construct forbids us from using a try, finally construct. cursor.close() if not extConn: conn.close()
def _query_patient_episodes(self): log.info('Querying patient episodes...') # Initialize DB cursor. cursor = self._connection.cursor() # Build parameters for query. self._lab_components = self._get_components_in_lab_panel() random_patient_list = self._get_random_patient_list() #print("RANDOM PATIENT LIST", random_patient_list) # Build SQL query for list of patient episodes. # Note that for 2008-2014 data, result_flag can take on any of the # following values: High, Low, High Panic, Low Panic, # Low Off-Scale, Negative, Positive, Resistant, Susceptible, Abnormal, * # (NONE): Only 27 lab components can have this flag. None has this # value for more than 5 results, so ignore it. # *: Only 10 lab components can have this flag. Only 6 have it for # >10 tests, and each of them is a microbiology test for which # a flag is less meaningful, e.g. Gram Stain, AFB culture. # Susceptible: Only 15 lab components can have this flag. All of those # only have this value for 2 results, so ignore it. # Resistant: Only 1 lab component can have this flag. Only two results # have this value, so ignore it. # Abnormal: 1462 lab components can have this flag. Many (e.g. UBLOOD) # have this value for thousands of results, so include it. # Negative: Only 10 lab components can have this flag, and all for # less than 5 results, so ignore it. # Positive: Only 3 lab components can have this flag, and all for # only 1 result, so ignore it. # Low Off-Scale: Only 1 lab component can have this flag, and only for # 3 results, so ignore it. # Low Panic: 1401 lab components can have this flag, many core # metabolic components. Include it. # High Panic: 8084 lab components can have this flag, many core # metabolic components. Include it. query = SQLQuery() query.addSelect('CAST(pat_anon_id AS BIGINT) AS pat_id') query.addSelect('CAST(shifted_order_time AS TIMESTAMP)') query.addSelect('stride_culture_micro.proc_code') query.addSelect('organism_name') #one for the result # Let us look at top 10 commonly occuring bacteria query.addSelect( "CASE WHEN organism_name IS NULL THEN 1 ELSE 0 END AS NO_BACTERIA") query.addSelect( "CASE WHEN organism_name = 'ESCHERICHIA COLI' THEN 1 ELSE 0 END AS ESCHERICHIA_COLI" ) query.addSelect( "CASE WHEN organism_name = 'STAPHYLOCOCCUS AUREUS' THEN 1 ELSE 0 END AS STAPHYLOCOCCUS_AUREUS" ) query.addSelect( "CASE WHEN organism_name = 'ENTEROCOCCUS SPECIES' THEN 1 ELSE 0 END AS ENTEROCOCCUS_SPECIES" ) query.addSelect( "CASE WHEN organism_name = 'KLEBSIELLA PNEUMONIAE' THEN 1 ELSE 0 END AS KLEBSIELLA_PNEUMONIAE" ) query.addSelect( "CASE WHEN organism_name = 'PSEUDOMONAS AERUGINOSA' THEN 1 ELSE 0 END AS PSEUDOMONAS_AERUGINOSA" ) query.addSelect( "CASE WHEN organism_name = 'COAG NEGATIVE STAPHYLOCOCCUS' THEN 1 ELSE 0 END AS COAG_NEGATIVE_STAPHYLOCOCCUS" ) query.addSelect( "CASE WHEN organism_name = 'ENTEROCOCCUS FAECALIS' THEN 1 ELSE 0 END AS ENTEROCOCCUS_FAECALIS" ) query.addSelect( "CASE WHEN organism_name = 'PROTEUS MIRABILIS' THEN 1 ELSE 0 END AS PROTEUS_MIRABILIS" ) query.addSelect( "CASE WHEN organism_name = 'CANDIDA ALBICANS' THEN 1 ELSE 0 END AS CANDIDA_ALBICANS" ) query.addFrom('stride_culture_micro') query.addWhereIn("stride_culture_micro.proc_code", [self._lab_panel]) query.addWhereIn("pat_id", random_patient_list) query.addGroupBy('pat_id') query.addGroupBy('shifted_order_time') query.addGroupBy('stride_culture_micro.proc_code') query.addGroupBy('organism_name') query.addOrderBy('pat_id') query.addOrderBy('shifted_order_time') query.addOrderBy('stride_culture_micro.proc_code') query.addOrderBy('organism_name') query.setLimit(self._num_requested_episodes) self._num_reported_episodes = FeatureMatrix._query_patient_episodes( self, query, pat_id_col='pat_id', index_time_col='shifted_order_time')
class TestDBUtil(DBTestCase): def setUp(self): """Prepare state for test cases""" DBTestCase.setUp(self); self.SCRIPT_FILE = StringIO() self.SCRIPT_FILE.write("# Create table to test on. Also testing that comment tag is recognized\n") self.SCRIPT_FILE.write("\n") self.SCRIPT_FILE.write("create table TestTypes\n") self.SCRIPT_FILE.write("(\n") self.SCRIPT_FILE.write(" TestTypes_id serial,\n") self.SCRIPT_FILE.write(" MyText varchar(50), /* Same as character varying, also test standard SQL comment tags */\n") self.SCRIPT_FILE.write(" MyInteger integer,\n") self.SCRIPT_FILE.write(" MyReal real, -- Floating point number, also test standard SQL comment tag\n") self.SCRIPT_FILE.write(" MyDateTime TIMESTAMP, -- PostgreSQL uses TIMESTAMP, but MySQL doesn't do NULL values right, so have to use DATETIME for MySQL?\n") self.SCRIPT_FILE.write(" MyYesNo boolean\n") self.SCRIPT_FILE.write(");\n") self.SCRIPT_FILE.write("ALTER TABLE TestTypes ADD CONSTRAINT TestTypes_id PRIMARY KEY (TestTypes_id);\n"); # Should auto-create testtypes_testtypes_id_seq sequence self.SCRIPT_FILE.write("CREATE INDEX TestTypes_MyInteger_INDEX ON TestTypes(MyInteger);\n") self.SCRIPT_FILE.write("\n") self.SCRIPT_FILE.write("insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime,MyYesNo)\n") self.SCRIPT_FILE.write("values ('Sample Text', 123,123.45,'2004-09-08 19:41:47.292000',True);\n") self.SCRIPT_FILE.write("insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime,MyYesNo)\n") self.SCRIPT_FILE.write("values ('Joe Mama', 234,23.45,'1990-10-03 19:41:47.292000',False);\n") self.SCRIPT_FILE.write("insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime,MyYesNo)\n") self.SCRIPT_FILE.write("values ('Mo Fo', 345,3.45,'2014-01-04 19:41:47.292000',True);\n") self.SCRIPT_FILE.write("\n") self.SCRIPT_FILE = StringIO(self.SCRIPT_FILE.getvalue()) self.DATA_TABLE= "TestTypes"; self.DATA_COLS = "MyInteger\tMyReal\tMyYesNo\tMyText\n"; self.DATA_FILE = StringIO() self.DATA_FILE.write('100\t100.1\tNone\tATest\n'); self.DATA_FILE.write('200\t200.2\t'+FALSE_STR+'\tNone\n'); self.DATA_FILE.write('200\t200.2\t'+FALSE_STR+'\t\n'); # Skip None tag at end of line, test that white space won't get lost self.DATA_FILE.write('300\t\t'+TRUE_STR+'\tCTest\n'); self.DATA_FILE = StringIO(self.DATA_FILE.getvalue()) self.DATA_ROWS = []; self.DATA_ROWS.append([100,100.1,None, "ATest",]); self.DATA_ROWS.append([200,200.2,False, None,]); self.DATA_ROWS.append([300,None,True, "CTest",]); self.MULTI_LINE_DATA_FILE = StringIO() self.MULTI_LINE_DATA_FILE.write('myinteger\t"MyReal"\t"MyYesNo"\tMyText\n'); self.MULTI_LINE_DATA_FILE.write('100\t100.1\tNone\t"""A"" Test and ""more"""\n'); self.MULTI_LINE_DATA_FILE.write('200\t200.2\t'+FALSE_STR+'\t""\n'); self.MULTI_LINE_DATA_FILE.write('300\tNone\t'+TRUE_STR+'\t"C\\nTest"\t\n'); self.MULTI_LINE_DATA_FILE = StringIO(self.MULTI_LINE_DATA_FILE.getvalue()) self.MULTI_LINE_DATA_ROWS = []; self.MULTI_LINE_DATA_ROWS.append([100,100.1,None, '"A" Test and "more"',]); self.MULTI_LINE_DATA_ROWS.append([200,200.2,False, None,]); self.MULTI_LINE_DATA_ROWS.append([300,None,True, 'C\\nTest',]); # ID summary data to make it easier to verify stuff self.COL_NAMES = self.DATA_COLS.split(); self.ID_COL = self.COL_NAMES[0]; self.ID_DATA = []; for row in self.DATA_ROWS: self.ID_DATA.append(row[0]); # Build query to get update rows self.DATA_QUERY = SQLQuery(); for col in self.COL_NAMES: self.DATA_QUERY.addSelect(col); self.DATA_QUERY.addFrom(self.DATA_TABLE); self.DATA_QUERY.addWhereIn(self.ID_COL,self.ID_DATA); self.DATA_QUERY.addOrderBy(self.ID_COL); def tearDown(self): """Restore state from any setUp or test steps""" try: DBUtil.execute("drop table TestTypes") pass except Exception, err: log.warning(err) pass DBTestCase.tearDown(self);
def _query_patient_episodes(self): log.info('Querying patient episodes...') # Initialize DB cursor. cursor = self._connection.cursor() # Build parameters for query. self._lab_components = self._get_components_in_lab_panel() random_patient_list = self._get_random_patient_list() # Build SQL query for list of patient episodes. # Note that for 2008-2014 data, result_flag can take on any of the # following values: High, Low, High Panic, Low Panic, # Low Off-Scale, Negative, Positive, Resistant, Susceptible, Abnormal, * # (NONE): Only 27 lab components can have this flag. None has this # value for more than 5 results, so ignore it. # *: Only 10 lab components can have this flag. Only 6 have it for # >10 tests, and each of them is a microbiology test for which # a flag is less meaningful, e.g. Gram Stain, AFB culture. # Susceptible: Only 15 lab components can have this flag. All of those # only have this value for 2 results, so ignore it. # Resistant: Only 1 lab component can have this flag. Only two results # have this value, so ignore it. # Abnormal: 1462 lab components can have this flag. Many (e.g. UBLOOD) # have this value for thousands of results, so include it. # Negative: Only 10 lab components can have this flag, and all for # less than 5 results, so ignore it. # Positive: Only 3 lab components can have this flag, and all for # only 1 result, so ignore it. # Low Off-Scale: Only 1 lab component can have this flag, and only for # 3 results, so ignore it. # Low Panic: 1401 lab components can have this flag, many core # metabolic components. Include it. # High Panic: 8084 lab components can have this flag, many core # metabolic components. Include it. if LocalEnv.DATASET_SOURCE_NAME == 'STRIDE': query = SQLQuery() ''' pat_id: hashed patient id ''' query.addSelect('CAST(pat_id AS BIGINT) as pat_id') ''' order_proc_id: unique identifier for an episode ''' query.addSelect('sop.order_proc_id') ''' self._varTypeInTable: usually proc_code or base_name, the column of the lab to be queried ''' query.addSelect(self._varTypeInTable) ''' order_time: The time of the order. Note that sor table does not have this info. ''' query.addSelect('order_time') ''' y-labels related columns, choose one to predict (for now, use all_components_normal to predict). ''' if self._isLabPanel: query.addSelect( "CASE WHEN abnormal_yn = 'Y' THEN 1 ELSE 0 END AS abnormal_panel" ) # query.addSelect( "SUM(CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') OR result_flag IS NULL THEN 1 ELSE 0 END) AS num_components" ) # sx query.addSelect( "SUM(CASE WHEN result_flag IS NULL THEN 1 ELSE 0 END) AS num_normal_components" ) # sx query.addSelect( "CAST(SUM(CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') THEN 1 ELSE 0 END) = 0 AS INT) AS all_components_normal" ) # sx else: query.addSelect( "CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') THEN 0 ELSE 1 END AS component_normal" ) ''' Relevant tables. Note that sor table does not have patient_id info; need to join sop to obtain it. ''' query.addFrom('stride_order_proc AS sop') query.addFrom('stride_order_results AS sor') query.addWhere('sop.order_proc_id = sor.order_proc_id') ''' Condition: self._time_limit[0] < order_time < self._time_limit[1] ''' if self._time_limit: if self._time_limit[0]: query.addWhere("sop.order_time > '%s'" % self._time_limit[0]) if self._time_limit[1]: query.addWhere("sop.order_time < '%s'" % self._time_limit[1]) query.addWhere( "(result_flag in ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') OR result_flag IS NULL)" ) query.addWhereIn(self._varTypeInTable, [self._lab_var]) # sx query.addWhereIn("pat_id", random_patient_list) query.addGroupBy('pat_id') query.addGroupBy('sop.order_proc_id') query.addGroupBy(self._varTypeInTable) query.addGroupBy('order_time') if not self._isLabPanel: query.addGroupBy('result_flag') query.addGroupBy('abnormal_yn') # query.addOrderBy('pat_id') query.addOrderBy('sop.order_proc_id') query.addOrderBy(self._varTypeInTable) query.addOrderBy('order_time') query.setLimit(self._num_requested_episodes) self._num_reported_episodes = FeatureMatrix._query_patient_episodes( self, query, index_time_col='order_time') else: ''' Sqlite3 has an interesting limit for the total number of place_holders in a query, and this limit varies across platforms/operating systems (500-99999 on mac, 999 by defaulty). To avoid this problem when querying 1000-10000 patient ids, use string queries instead of the default (convenient) routine in DBUtil. ''' query_str = "SELECT CAST(pat_id AS BIGINT) AS pat_id, order_proc_id, %s, order_time, " % self._varTypeInTable if not self._isLabPanel: query_str += "CASE WHEN result_in_range_yn = 'Y' THEN 1 ELSE 0 END AS component_normal " else: query_str += "CAST(SUM(CASE WHEN result_in_range_yn != 'Y' THEN 1 ELSE 0 END) = 0 AS INT) AS all_components_normal " query_str += "FROM labs " query_str += "WHERE %s = '%s' " % (self._varTypeInTable, self._lab_var) query_str += "AND pat_id IN " pat_list_str = "(" for pat_id in random_patient_list: pat_list_str += str(pat_id) + "," pat_list_str = pat_list_str[:-1] + ") " query_str += pat_list_str query_str += "GROUP BY pat_id, order_proc_id, %s, order_time " % self._varTypeInTable query_str += "ORDER BY pat_id, order_proc_id, %s, order_time " % self._varTypeInTable query_str += "LIMIT %d" % self._num_requested_episodes self._num_reported_episodes = FeatureMatrix._query_patient_episodes( self, query_str, index_time_col='order_time')
def _get_random_patient_list(self): # Initialize DB cursor. cursor = self._connection.cursor() query = SQLQuery() query.addSelect('CAST(pat_id AS BIGINT) AS pat_id') if LocalEnv.DATASET_SOURCE_NAME == 'STRIDE': if self._isLabPanel: query.addSelect('COUNT(sop.order_proc_id) AS num_orders') query.addFrom('stride_order_proc AS sop') query.addFrom('stride_order_results AS sor') if self._time_limit: if self._time_limit[0]: query.addWhere("sop.order_time > '%s'" % self._time_limit[0]) if self._time_limit[1]: query.addWhere("sop.order_time < '%s'" % self._time_limit[1]) query.addWhere('sop.order_proc_id = sor.order_proc_id') query.addWhereIn('proc_code', [self._lab_var]) ''' sbala: Technically it's possible for someone to get a lab ordered without getting results ''' query.addWhereIn("base_name", self._lab_components) else: query.addSelect('COUNT(sor.order_proc_id) AS num_orders') query.addFrom('stride_order_proc AS sop') query.addFrom('stride_order_results AS sor') query.addWhere('sop.order_proc_id = sor.order_proc_id') ## query.addWhereIn("base_name", [self._lab_var]) elif LocalEnv.DATASET_SOURCE_NAME == 'UMich': query_str = "SELECT CAST(pat_id AS BIGINT) AS pat_id , " query_str += "COUNT(order_proc_id) AS num_orders " query_str += "FROM labs " # query_str += " WHERE %s IN (%s) "%(self._varTypeInTable, self._lab_var) query_str += "WHERE %s = '%s' " % (self._varTypeInTable, self._lab_var) if self.notUsePatIds: query_str += "AND pat_id NOT IN (" for pat_id in self.notUsePatIds: query_str += "%s," % pat_id query_str = query_str[:-1] + ") " # get rid of comma query_str += "GROUP BY pat_id" log.debug('Querying median orders per patient...') # TODO: best way to integrate UMich code results = DBUtil.execute(query_str) order_counts = [row[1] for row in results] if len(results) == 0: error_msg = '0 orders for order "%s."' % self._lab_var # sx log.critical(error_msg) raise Exception(error_msg) # sys.exit('[ERROR] %s' % error_msg) # sxu: sys.exit cannot be caught by Exception else: avg_orders_per_patient = numpy.median(order_counts) log.info('avg_orders_per_patient: %s' % avg_orders_per_patient) # Based on average # of results, figure out how many patients we'd # need to get for a feature matrix of requested size. self._num_patients = int(numpy.max([self._num_requested_episodes / \ avg_orders_per_patient, 1])) # Some components may have fewer associated patients than the required sample size patient_number_chosen = min([len(results), self._num_patients]) # inds_random_patients = numpy.random.choice( len(results), size=patient_number_chosen, replace=False) # print 'inds_random_patients:', inds_random_patients pat_IDs_random_patients = [] for ind in inds_random_patients: pat_IDs_random_patients.append(results[ind][0]) return pat_IDs_random_patients else: query.addSelect('COUNT(order_proc_id) AS num_orders') query.addFrom('labs') if self._isLabPanel: query.addWhereIn("proc_code", [self._lab_var]) query.addWhereIn("base_name", self._lab_components) else: query.addWhereIn("base_name", [self._lab_var]) ''' Fo hold-out set, do not use the patients already used in training/validation. ''' if self._notUsePatIds: query.addWhereNotIn('pat_id', self._notUsePatIds) query.addGroupBy('pat_id') log.debug('Querying the number of orders per patient...') results = DBUtil.execute(query) order_counts = [row[1] for row in results] if len(results) == 0: error_msg = '0 orders for component "%s."' % self._lab_var # sx log.critical(error_msg) raise Exception(error_msg) # sys.exit('[ERROR] %s' % error_msg) # sxu: sys.exit cannot be caught by Exception else: avg_orders_per_patient = numpy.median(order_counts) log.info('avg_orders_per_patient: %s' % avg_orders_per_patient) # Based on average # of results, figure out how many patients we'd # need to get for a feature matrix of requested size. self._num_patients = int(numpy.max([self._num_requested_episodes / \ avg_orders_per_patient, 1])) # Some components may have fewer associated patients than the required sample size patient_number_chosen = min([len(results), self._num_patients]) # ''' Set seed to ensure re-producibility of patient episodes. Recover int random_state here, since numpy requires int while sql requires [-1,1] ''' numpy.random.seed(int(self._random_state * float(sys.maxint))) inds_random_patients = numpy.random.choice( len(results), size=patient_number_chosen, replace=False) pat_IDs_random_patients = [ results[ind][0] for ind in inds_random_patients ] return pat_IDs_random_patients
line = line.strip().split(",") if (line[0] not in patient_encounters): patient_encounters[line[0]] = [ pandas.to_datetime(line[1]), ] # patient id: list of admission times else: patient_encounters[line[0]].append(pandas.to_datetime(line[1])) # print(patient_encounters) # Create SCRIPT_FILE SCRIPT_FILE = StringIO() SCRIPT_FILE.write("psql stride jwang198") DATA_QUERY = SQLQuery() DATA_QUERY.addSelect("pat_id") DATA_QUERY.addSelect("death_date") DATA_QUERY.addFrom("stride_patient") print(DATA_QUERY) # Write out data to CSV DBUtil.runDBScript(SCRIPT_FILE, False) results = DBUtil.execute(DATA_QUERY) #print(len(results)) output_7 = open("/Users/jwang/Desktop/Results/7_day_deaths.csv", "w") output_30 = open("/Users/jwang/Desktop/Results/30_day_deaths.csv", "w") output_90 = open("/Users/jwang/Desktop/Results/90_day_deaths.csv", "w") output_7.write("patient_id,prev_admission_time,death_time,delta\n") output_30.write("patient_id,prev_admission_time,death_time,delta\n")
def orderSetSearch(self, itemQuery, conn=None): """Look for clinical items based on specified query criteria""" extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("ic.item_collection_id") query.addSelect("ic.external_id") query.addSelect("ic.name as collection_name") query.addSelect("ic.section") query.addSelect("ic.subgroup") query.addSelect("ci.clinical_item_category_id") query.addSelect("ci.clinical_item_id") query.addSelect("ci.name") query.addSelect("ci.description") query.addFrom("item_collection as ic") query.addFrom("item_collection_item as ici") query.addFrom("clinical_item as ci") query.addWhere("ic.item_collection_id = ici.item_collection_id") query.addWhere("ici.clinical_item_id = ci.clinical_item_id") query.addWhereNotEqual("ic.section", AD_HOC_SECTION) if itemQuery.searchStr is not None: searchWords = itemQuery.searchStr.split() for searchWord in searchWords: query.addWhereOp( "ic.name", "~*", "^%(searchWord)s|[^a-z]%(searchWord)s" % {"searchWord": searchWord}) # Prefix search by regular expression if itemQuery.analysisStatus is not None: query.addWhereEqual("ci.analysis_status", itemQuery.analysisStatus) query.addOrderBy("lower(ic.name)") query.addOrderBy("ic.external_id") query.addOrderBy("lower(ic.section)") query.addOrderBy("lower(ic.subgroup)") query.addOrderBy("ci.clinical_item_id") query.addOrderBy("ci.name") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) # Aggregate up into order sets orderSetModel = None for row in dataModels: if orderSetModel is None or row[ "external_id"] != orderSetModel["external_id"]: if orderSetModel is not None: # Prior order set exists, yield/return it before preparing next one yield orderSetModel orderSetModel = \ { "external_id": row["external_id"], "name": row["collection_name"], "itemList": list(), } orderSetModel["itemList"].append(row) yield orderSetModel # Yield the last processed model finally: if not extConn: conn.close()
def querySourceItems(self, startDate=None, endDate=None, progress=None, conn=None): """Query the database for list of all source clinical items (diagnosed probelms in this case) and yield the results one at a time. If startDate provided, only return items whose noted_date is on or after that date. """ extConn = conn is not None; if not extConn: conn = self.connFactory.connection(); # Facilitate rapid lookup of ICD9/ICD10 codes if self.icd9_str_by_code is None: self.prepare_icd9_lookup(conn=conn) if self.icd10_str_by_code is None: self.prepare_icd10_lookup(conn=conn) # Column headers to query for that map to respective fields in analysis table headers = ["pat_id","pat_enc_csn_id","noted_date","resolved_date","dx_icd9_code","dx_icd9_code_list","dx_icd10_code_list","data_source"]; query = SQLQuery(); for header in headers: query.addSelect( header ); query.addFrom("stride_dx_list as dx"); query.addWhere("noted_date is not null"); # Only work with elements that have dates assigned for now if startDate is not None: query.addWhereOp("noted_date",">=", startDate); if endDate is not None: query.addWhereOp("noted_date","<", endDate); # Query to get an estimate of how long the process will be if progress is not None: progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0]; cursor = conn.cursor(); # Do one massive query, but yield data for one item at a time. cursor.execute( str(query), tuple(query.params) ); row = cursor.fetchone(); while row is not None: row_model = RowItemModel( row, headers ); # 2014-2017 data does not have dx_icd9_code. Instead, has # both dx_icd9_code_list and dx_icd10_code_list. For these items, # there is a one:many mapping of source item to converted item. # Collect icd10 codes. icd10_codes = set() if row_model['dx_icd10_code_list'] not in ['', None]: codes = row_model['dx_icd10_code_list'].split(',') for code in codes: icd10_codes.add(code) # Collect icd9 codes. icd9_codes = set() if row_model['dx_icd9_code'] not in ['', None]: icd9_codes.add(row_model['dx_icd9_code']) else: if row_model['dx_icd9_code_list'] not in ['', None]: codes = row_model['dx_icd9_code_list'].split(',') for code in codes: icd9_codes.add(code) # If there are no ICD codes, skip to next row. if len(icd9_codes) == 0 and len(icd10_codes) == 0: row = cursor.fetchone() continue # Process ICD codes. # Build a temporary dictionary so that a single loop can take care # of both ICD9 and ICD10 without mixing the data. icd_versions = { 'ICD9': { 'codes': icd9_codes, 'lookup': self.icd9_str_by_code }, 'ICD10': { 'codes': icd10_codes, 'lookup': self.icd10_str_by_code } } for version, info in icd_versions.iteritems(): icd_codes = info['codes'] icd_lookup = info['lookup'] for icd_code in icd_codes: # Look up string. Otherwise default to ICD code. row_model['icd_str'] = icd_code if icd_code in icd_lookup: row_model['icd_str'] = icd_lookup[icd_code] row_model['dx_icd_code'] = version + '.' + icd_code # Yield one row worth of data at a time to avoid having to keep # the whole result set in memory. yield row_model orig_code = icd_code if SUBCODE_DELIM in orig_code: # Insert copies of item for parent node codes to aggregate # component diagnoses into general categories. while icd_code[-1] != SUBCODE_DELIM: icd_code = icd_code[:-1] # Truncate trailing digit if icd_code in icd_lookup: # Found a a matching parent code, so yield this # version. row_model['icd_str'] = icd_lookup[icd_code] row_model['dx_icd_code'] = version + '.' + icd_code yield row_model # One more cycle to get parent node with no subcode # delimiter at all. icd_code = icd_code[:-1] # Truncate off SUBCODE_DELIM if icd_code in icd_lookup: row_model['icd_str'] = icd_lookup[icd_code] row_model['dx_icd_code'] = version + '.' + icd_code yield row_model row = cursor.fetchone(); progress.Update(); # Slight risk here. Normally DB connection closing should be in finally of a try block, # but using the "yield" generator construct forbids us from using a try, finally construct. cursor.close(); if not extConn: conn.close();
SELECT pat_id, medication_id, description, COUNT(*) as medication_co_occurrence FROM stride_order_med WHERE pat_id = {0} AND ordering_date > {1}-Xmin AND ordering_date < {1}+Xmin GROUP BY pat_id, medication_id, description """ DATA_QUERY = SQLQuery(); DATA_QUERY.addSelect("pat_id") DATA_QUERY.addSelect("medication_id") DATA_QUERY.addSelect("description") DATA_QUERY.addSelect("COUNT(*) as medication_co_occurrence") DATA_QUERY.addFrom("stride_order_med") DATA_QUERY.addWhere("pat_id = '{0}'".format(pat_id)) DATA_QUERY.addWhere("ordering_date >= '{0}'".format(window_lower)) DATA_QUERY.addWhere("ordering_date <= '{0}'".format(window_upper)) if (pat_id in orderset_order_med_ids.keys()): DATA_QUERY.addWhere("order_med_id NOT IN ({0})".format(order_med_id_str)) # do not count order instances ordered with an order set; we only want a la carte instances DATA_QUERY.addGroupBy("pat_id") DATA_QUERY.addGroupBy("medication_id") DATA_QUERY.addGroupBy("description")
def querySourceItems(self, convOptions, progress=None, conn=None): """Query the database for list of all source clinical items (culture results, etc.) and yield the results one at a time. If startDate provided, only return items whose occurence date is on or after that date. """ extConn = conn is not None if not extConn: conn = self.connFactory.connection() # Column headers to query for that map to respective fields in analysis table headers = [ "order_proc_anon_id", "pat_anon_id", "pat_enc_csn_anon_id", "proc_code", "organism_name", "antibiotic_name", "suseptibility", "shifted_result_time" ] query = SQLQuery() for header in headers: query.addSelect(header) query.addFrom("stride_culture_micro") # TODO: FIGURE OUT WHY CAN"T DO >= OPERATION HERE # if convOptions.startDate is not None: # query.addWhereOp("shifted_result_time",">=", convOptions.startDate); # if convOptions.endDate is not None: # query.addWhereOp("shifted_result_time","<", convOptions.endDate); # Still use begin date as common filter value # Query to get an estimate of how long the process will be if progress is not None: progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0] cursor = conn.cursor() # Do one massive query, but yield data for one item at a time. cursor.execute(str(query), tuple(query.params)) row = cursor.fetchone() while row is not None: rowModel = RowItemModel(row, headers) if rowModel[ 'shifted_result_time'] is None: # Don't add if no result time given row = cursor.fetchone() continue if rowModel[ 'organism_name'] is not None: # if positive culture but results uninterpretable, don't add feature if rowModel['suseptibility'] is None or rowModel[ 'antibiotic_name'] == 'Method' or rowModel[ 'antibiotic_name'] is None: row = cursor.fetchone() continue try: # So that we don't run into directory issues later when writing temp files rowModel['antibiotic_name'] = rowModel[ 'antibiotic_name'].replace('/', '-') # Change susceptibility features to either susc or not if rowModel['suseptibility'] is not None: if rowModel['suseptibility'] == 'Susceptible' or rowModel[ 'suseptibility'] == 'Positive': rowModel['suseptibility'] = 'Susceptible' else: rowModel['suseptibility'] = "Not_Susceptible" except: # When antibiotic name is none pass yield rowModel # Yield one row worth of data at a time to avoid having to keep the whole result set in memory row = cursor.fetchone() # Slight risk here. Normally DB connection closing should be in finally of a try block, # but using the "yield" generator construct forbids us from using a try, finally construct. cursor.close() if not extConn: conn.close()
def _query_patient_episodes(self): log.info('Querying patient episodes...') # Initialize DB cursor. cursor = self._connection.cursor() # Build parameters for query. self._lab_components = self._get_components_in_lab_panel() random_patient_list = self._get_random_patient_list() # Build SQL query for list of patient episodes. # Note that for 2008-2014 data, result_flag can take on any of the # following values: High, Low, High Panic, Low Panic, # Low Off-Scale, Negative, Positive, Resistant, Susceptible, Abnormal, * # (NONE): Only 27 lab components can have this flag. None has this # value for more than 5 results, so ignore it. # *: Only 10 lab components can have this flag. Only 6 have it for # >10 tests, and each of them is a microbiology test for which # a flag is less meaningful, e.g. Gram Stain, AFB culture. # Susceptible: Only 15 lab components can have this flag. All of those # only have this value for 2 results, so ignore it. # Resistant: Only 1 lab component can have this flag. Only two results # have this value, so ignore it. # Abnormal: 1462 lab components can have this flag. Many (e.g. UBLOOD) # have this value for thousands of results, so include it. # Negative: Only 10 lab components can have this flag, and all for # less than 5 results, so ignore it. # Positive: Only 3 lab components can have this flag, and all for # only 1 result, so ignore it. # Low Off-Scale: Only 1 lab component can have this flag, and only for # 3 results, so ignore it. # Low Panic: 1401 lab components can have this flag, many core # metabolic components. Include it. # High Panic: 8084 lab components can have this flag, many core # metabolic components. Include it. # Note that some episodes have ord_num_value = 9999999 # These episodes are removed query = SQLQuery() query.addSelect('CAST(pat_id AS BIGINT)') query.addSelect('sop.order_proc_id AS order_proc_id') query.addSelect('proc_code') query.addSelect('order_time') query.addSelect('ord_num_value') query.addSelect("CASE WHEN abnormal_yn = 'Y' THEN 1 ELSE 0 END AS abnormal_panel") query.addSelect("SUM(CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') OR result_flag IS NULL THEN 1 ELSE 0 END) AS num_components") query.addSelect("SUM(CASE WHEN result_flag IS NULL THEN 1 ELSE 0 END) AS num_normal_components") query.addSelect("CAST(SUM(CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') THEN 1 ELSE 0 END) = 0 AS INT) AS all_components_normal") query.addFrom('stride_order_proc AS sop') query.addFrom('stride_order_results AS sor') query.addWhere('sop.order_proc_id = sor.order_proc_id') query.addWhere("(result_flag in ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') OR result_flag IS NULL)") query.addWhere("ord_num_value != 9999999") query.addWhereIn("proc_code", [self._lab_panel]) query.addWhereIn("pat_id", random_patient_list) query.addGroupBy('pat_id') query.addGroupBy('sop.order_proc_id') query.addGroupBy('proc_code') query.addGroupBy('order_time') query.addGroupBy('abnormal_yn') query.addGroupBy('ord_num_value') query.addOrderBy('pat_id') query.addOrderBy('sop.order_proc_id') query.addOrderBy('proc_code') query.addOrderBy('order_time') query.addOrderBy('ord_num_value') query.setLimit(self._num_requested_episodes) self._num_reported_episodes = FeatureMatrix._query_patient_episodes(self, query, index_time_col='order_time')
os.chdir('/Users/jwang/Desktop/ClinicalDecisionMaker') from medinfo.common.test.Util import MedInfoTestCase from medinfo.db import DBUtil from medinfo.db.Model import SQLQuery from medinfo.db.DBUtil import NUMBER, BOOLEAN, STRING, DATETIME # Create SCRIPT_FILE # clinical_item_category_id = 13 (Discharge) # clinical_item_category_id = 23 (Admission) SCRIPT_FILE = StringIO() SCRIPT_FILE.write("psql stride jwang198") DATA_QUERY = SQLQuery() DATA_QUERY.addSelect("pat_anon_id") DATA_QUERY.addSelect("shifted_transf_in_dt_tm") DATA_QUERY.addSelect("department_in") DATA_QUERY.addSelect("event_in") DATA_QUERY.addSelect("shifted_transf_out_dt_tm") DATA_QUERY.addSelect("event_out") DATA_QUERY.addSelect("pat_enc_csn_anon_id") DATA_QUERY.addWhereEqual("event_out = 'Discharge' OR event_in", 'Admission') DATA_QUERY.addFrom("stride_adt") print(DATA_QUERY) # Write out data to CSV DBUtil.runDBScript(SCRIPT_FILE, False) results = DBUtil.execute(DATA_QUERY)
def queryPatientItemsPerPatient(self, analysisOptions, progress=None, conn=None): """Query the database for an ordered list of patient clinical items, in the order in which they occurred. This could be a large amount of data, so option to provide list of specific patientIds or date ranges to query for. In either case, results will be returned as an iterator over individual lists for each patient. Lists will contain RowItemModels, each with data: * patient_id * encounter_id * clinical_item_id * item_date * analyze_date """ extConn = conn is not None; if not extConn: conn = self.connFactory.connection(); # Reset for actual data selects query= SQLQuery(); query.addSelect("pi.patient_item_id"); query.addSelect("pi.patient_id"); query.addSelect("pi.encounter_id"); query.addSelect("pi.clinical_item_id"); query.addSelect("pi.item_date"); query.addSelect("pi.analyze_date"); query.addFrom("patient_item as pi"); query.addFrom("clinical_item as ci"); query.addWhere("pi.clinical_item_id = ci.clinical_item_id"); query.addWhere("ci.analysis_status <> 0"); # Skip steps designated to be ignored if analysisOptions.patientIds is not None: query.addWhereIn("patient_id", analysisOptions.patientIds ); if analysisOptions.startDate is not None: query.addWhereOp("pi.item_date",">=", analysisOptions.startDate); if analysisOptions.endDate is not None: query.addWhereOp("pi.item_date","<", analysisOptions.endDate); query.addOrderBy("pi.patient_id"); query.addOrderBy("pi.item_date"); query.addOrderBy("pi.clinical_item_id"); # Query to get an estimate of how long the process will be if progress is not None: progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0]; cursor = conn.cursor(); # Do one massive query, but yield data for one patient at a time. # This should minimize the number of DB queries and the amount of # data that must be kept in memory at any one time. cursor.execute( str(query), tuple(query.params) ); currentPatientId = None; currentPatientData = list(); headers = ["patient_item_id","patient_id","encounter_id","clinical_item_id","item_date","analyze_date"]; row = cursor.fetchone(); while row is not None: (patient_item_id, patientId, encounter_id, clinicalItemId, itemDate, analyzeDate) = row; if currentPatientId is None: currentPatientId = patientId; if patientId != currentPatientId: # Changed user, yield the existing data for the previous user yield currentPatientData; # Update our data tracking for the current user currentPatientId = patientId; currentPatientData = list(); rowModel = RowItemModel( row, headers ); currentPatientData.append( rowModel ); row = cursor.fetchone(); # Yield the final user's data yield currentPatientData; # Slight risk here. Normally DB connection closing should be in finally of a try block, # but using the "yield" generator construct forbids us from using a try, finally construct. cursor.close(); if not extConn: conn.close();
def querySourceItems(self, rxcuiDataByMedId, convOptions, progress=None, conn=None): """Query the database for list of all source clinical items (medications, etc.) and yield the results one at a time. If startDate provided, only return items whose ordering_date is on or after that date. """ extConn = conn is not None if not extConn: conn = self.connFactory.connection() # Column headers to query for that map to respective fields in analysis table queryHeaders = [ "med.order_med_id", "pat_id", "pat_enc_csn_id", "med.medication_id", "med.description", "ordering_date", "med_route", "number_of_doses", "protocol_id", "protocol_name", "section_name", "smart_group" ] headers = [ "order_med_id", "pat_id", "pat_enc_csn_id", "medication_id", "description", "ordering_date", "med_route", "number_of_doses", "protocol_id", "protocol_name", "section_name", "smart_group" ] query = SQLQuery() for header in queryHeaders: query.addSelect(header) query.addFrom( "stride_order_med as med left outer join stride_orderset_order_med as os on med.order_med_id = os.order_med_id" ) # Grab order set links if they exist query.addWhere("med.medication_id <> %s" % TEMPLATE_MEDICATION_ID) query.addWhere("freq_name not like '%%PRN'") # Ignore PRN orders if convOptions.startDate is not None: query.addWhereOp("ordering_date", ">=", convOptions.startDate) if convOptions.endDate is not None: query.addWhereOp("ordering_date", "<", convOptions.endDate) # Query to get an estimate of how long the process will be if progress is not None: progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0] cursor = conn.cursor() # Do one massive query, but yield data for one item at a time. cursor.execute(str(query), tuple(query.params)) row = cursor.fetchone() while row is not None: rowModel = RowItemModel(row, headers) for normalizedModel in self.normalizeMedData( rxcuiDataByMedId, rowModel, convOptions): yield normalizedModel # Yield one row worth of data at a time to avoid having to keep the whole result set in memory row = cursor.fetchone() # Slight risk here. Normally DB connection closing should be in finally of a try block, # but using the "yield" generator construct forbids us from using a try, finally construct. cursor.close() if not extConn: conn.close()
def querySourceItems(self, patientIds=None, progress=None, conn=None): """Query the database for list of all patient demographics and yield the results one at a time. If patientIds provided, only return items matching those IDs. """ extConn = conn is not None if not extConn: conn = self.connFactory.connection() # Column headers to query for that map to respective fields in analysis table headers = [ "pat_id", "birth_year", "gender", "death_date", "race", "ethnicity" ] query = SQLQuery() for header in headers: query.addSelect(header) query.addFrom("stride_patient as sp") if patientIds is not None: query.addWhereIn("sp.pat_id", patientIds) # Query to get an estimate of how long the process will be if progress is not None: progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0] cursor = conn.cursor() # Do one massive query, but yield data for one item at a time. cursor.execute(str(query), tuple(query.params)) row = cursor.fetchone() while row is not None: rowModel = RowItemModel(row, headers) if rowModel["birth_year"] is None: # Blank values, doesn't make sense. Skip it log.warning(rowModel) else: # Record birth at resolution of year rowModel["itemDate"] = datetime(rowModel["birth_year"], 1, 1) rowModel["name"] = "Birth" rowModel["description"] = "Birth Year" yield rowModel # Record another at resolution of decade decade = (rowModel["birth_year"] / 10) * 10 rowModel["itemDate"] = datetime(rowModel["birth_year"], 1, 1) rowModel["name"] = "Birth%ds" % decade rowModel["description"] = "Birth Decade %ds" % decade yield rowModel # Summarize race and ethnicity information into single field of interest raceEthnicity = self.summarizeRaceEthnicity(rowModel) rowModel["itemDate"] = datetime(rowModel["birth_year"], 1, 1) rowModel["name"] = "Race" + (raceEthnicity.translate( None, " ()-/")) # Strip off punctuation rowModel["description"] = "Race/Ethnicity: %s" % raceEthnicity yield rowModel gender = rowModel["gender"].title() rowModel["name"] = gender rowModel["description"] = "%s Gender" % gender yield rowModel if rowModel["death_date"] is not None: rowModel["name"] = "Death" rowModel["description"] = "Death Date" rowModel["itemDate"] = rowModel["death_date"] yield rowModel row = cursor.fetchone() progress.Update() # Slight risk here. Normally DB connection closing should be in finally of a try block, # but using the "yield" generator construct forbids us from using a try, finally construct. cursor.close() if not extConn: conn.close()
def queryMixSourceItems(self, rxcuiDataByMedId, convOptions, progress=None, conn=None): """Query the database for list of source clinical items (medications from mixes, etc.) and yield the results one at a time. """ extConn = conn is not None if not extConn: conn = self.connFactory.connection() # Column headers to query for that map to respective fields in analysis table queryHeaders = [ "med.order_med_id", "med.pat_id", "med.pat_enc_csn_id", "mix.medication_id", "mix.medication_name", "mix.ingredient_type", "med.ordering_date", "med.med_route", "med.number_of_doses", "protocol_id", "protocol_name", "section_name", "smart_group" ] headers = [ "order_med_id", "pat_id", "pat_enc_csn_id", "medication_id", "description", "ingredient_type", "ordering_date", "med_route", "number_of_doses", "protocol_id", "protocol_name", "section_name", "smart_group" ] query = SQLQuery() for header in queryHeaders: query.addSelect(header) query.addFrom( "stride_order_med as med left outer join stride_orderset_order_med as os on med.order_med_id = os.order_med_id" ) # Grab order set links if they exist query.addFrom("stride_order_medmixinfo as mix") query.addWhere("med.order_med_id = mix.order_med_id") #query.addWhereEqual("med.medication_id", TEMPLATE_MEDICATION_ID ); #query.addWhere("mix.line = 1"); # Just take the first item from a mix query.addWhere("freq_name not like '%%PRN'") # Ignore PRN orders if convOptions.startDate is not None: query.addWhereOp("ordering_date", ">=", convOptions.startDate) if convOptions.endDate is not None: query.addWhereOp("ordering_date", "<", convOptions.endDate) query.addOrderBy("med.ordering_date, med.order_med_id, mix.line") # Query to get an estimate of how long the process will be if progress is not None: progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0] cursor = conn.cursor() # Do one massive query, but yield data for one item at a time. cursor.execute(str(query), tuple(query.params)) # Accumulate mixture components one item at a time mixByOrderMedId = dict() row = cursor.fetchone() while row is not None: rowModel = RowItemModel(row, headers) orderMedId = rowModel["order_med_id"] if orderMedId not in mixByOrderMedId: # New medication mix encountered. Process any prior ones before moving on for normalizedModel in self.normalizeMixData( rxcuiDataByMedId, mixByOrderMedId, convOptions): yield normalizedModel # Yield one row worth of data at a time to avoid having to keep the whole result set in memory mixByOrderMedId.clear() # Discard previously processed mixes so don't have a ton left in memory mixByOrderMedId[orderMedId] = list() # Prep for next mix mixByOrderMedId[orderMedId].append(rowModel) row = cursor.fetchone() # One more pass for remaining items for normalizedModel in self.normalizeMixData(rxcuiDataByMedId, mixByOrderMedId, convOptions): yield normalizedModel # Yield one row worth of data at a time to avoid having to keep the whole result set in memory # Slight risk here. Normally DB connection closing should be in finally of a try block, # but using the "yield" generator construct forbids us from using a try, finally construct. cursor.close() if not extConn: conn.close()
def copyPatientTemplate(self, patientData, templatePatientId, conn=None): """Create a new patient record based on the given template patient ID to copy from. Will copy shallow attributes, overridden by any provided in the given patientData, as well as any patient states, notes, or physician orders UP TO (and including) relative time zero, but not subsequent states, notes, or physician orders (the latter is expected to reflect real user interaction records). """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: templatePatientData = DBUtil.loadRecordModelById("sim_patient", templatePatientId, conn=conn) del templatePatientData[ "sim_patient_id"] # Remove prior ID to allow for new one templatePatientData.update( patientData) # Override with new content (if exists) DBUtil.insertRow("sim_patient", templatePatientData, conn=conn) # Create new patient record patientId = DBUtil.execute(DBUtil.identityQuery("sim_patient"), conn=conn)[0][0] # Copy initial template patient states query = SQLQuery() query.addSelect("*") # Copy all columns query.addFrom("sim_patient_state as sps") query.addWhereEqual("sps.sim_patient_id", templatePatientId) query.addWhereOp("relative_time_start", "<=", 0) query.addOrderBy("relative_time_start") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) nStates = len(dataModels) for i, dataModel in enumerate(dataModels): del dataModel[ "sim_patient_state_id"] # Discard copied ID to allow new one if i == nStates - 1: del dataModel[ "relative_time_end"] # Last state. Blank out end time to reflect open ended for simulation dataModel["sim_patient_id"] = patientId DBUtil.insertRow("sim_patient_state", dataModel, conn=conn) # Copy initial template orders query = SQLQuery() query.addSelect("*") query.addFrom("sim_patient_order as spo") query.addWhereEqual("sim_patient_id", templatePatientId) query.addWhereOp("relative_time_start", "<=", 0) query.addOrderBy("relative_time_start") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) for dataModel in dataModels: del dataModel["sim_patient_order_id"] dataModel["sim_patient_id"] = patientId DBUtil.insertRow("sim_patient_order", dataModel, conn=conn) conn.commit() # Transactional commit for multi-step process return patientId finally: if not extConn: conn.close()
from medinfo.common.test.Util import MedInfoTestCase; from medinfo.db import DBUtil from medinfo.db.Model import SQLQuery; from medinfo.db.DBUtil import NUMBER, BOOLEAN, STRING, DATETIME; # Create SCRIPT_FILE SCRIPT_FILE = StringIO() SCRIPT_FILE.write("psql stride jwang198") # Load in top patient count diagnoses (we will only include these as binary covariates) # select * # from clinical_item # where clinical_item_category_id = 2 # order by patient_count desc DATA_QUERY = SQLQuery() DATA_QUERY.addSelect("clinical_item_id") DATA_QUERY.addSelect("name") DATA_QUERY.addSelect("description") DATA_QUERY.addSelect("patient_count") DATA_QUERY.addFrom("clinical_item") DATA_QUERY.addWhereEqual("patient_count > 0 and clinical_item_category_id", 2) top_admission_diagnoses_f = open("/Users/jwang/Desktop/Results/top_admission_diagnoses.csv", "w") top_admission_diagnoses = {} top_admission_diagnoses_string = [] DBUtil.runDBScript(SCRIPT_FILE, False) results = DBUtil.execute(DATA_QUERY); top_admission_diagnoses_f.write("clinical_item_id,name,description,patient_count\n") for line in results: top_admission_diagnoses_f.write("{0},{1},{2},{3}\n".format(line[0],line[1],line[2],line[3])) top_admission_diagnoses[line[0]] = line[2]
def loadPatientOrders(self, patientId, currentTime, loadActive=True, conn=None): """Load orders for the given patient that exist by the specified current time point. loadActive - Specify whether to load active vs. inactive/completed orders. Set to None to load both """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("po.sim_patient_order_id") query.addSelect("po.sim_user_id") query.addSelect("po.sim_patient_id") query.addSelect("po.sim_state_id") query.addSelect("po.clinical_item_id") query.addSelect("po.relative_time_start") query.addSelect("po.relative_time_end") query.addSelect("ci.name") query.addSelect("ci.description") query.addSelect("cic.source_table") query.addSelect("cic.description as category_description") query.addFrom("sim_patient_order as po") query.addFrom("clinical_item as ci") query.addFrom("clinical_item_category as cic") query.addWhere("po.clinical_item_id = ci.clinical_item_id") query.addWhere( "ci.clinical_item_category_id = cic.clinical_item_category_id") query.addWhereEqual("sim_patient_id", patientId) query.addWhereOp("relative_time_start", "<=", currentTime) if loadActive: # Filter out inactive orders here. query.openWhereOrClause() query.addWhere("relative_time_end is null") query.addWhereOp("relative_time_end", ">", currentTime) query.closeWhereOrClause() #elif loadActive is not None: # Filter out active orders here. # query.addWhereOp("relative_time_end","<=", currentTime) if loadActive: # Organize currently active orders by category query.addOrderBy("cic.description") query.addOrderBy("ci.description") query.addOrderBy("relative_time_start") else: # Otherwise chronologic order query.addOrderBy("relative_time_start") query.addOrderBy("cic.description") query.addOrderBy("ci.description") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) return dataModels finally: if not extConn: conn.close()
def loadNotes(self, patientId, currentTime, conn=None): """Load notes committed up to the given simulation time. """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("sn.sim_note_id") query.addSelect("sps.sim_patient_id") # Link query.addSelect("sn.sim_state_id") query.addSelect("sn.note_type_id") query.addSelect("sn.author_type_id") query.addSelect("sn.service_type_id") query.addSelect( "(sps.relative_time_start + sn.relative_state_time) as relative_time" ) query.addSelect("sn.content") query.addFrom("sim_note as sn") query.addFrom("sim_patient_state as sps") query.addWhere("sn.sim_state_id = sps.sim_state_id") query.addWhereEqual("sps.sim_patient_id", patientId) # Only unlock notes once traverse expected time query.addWhereOp( "(sps.relative_time_start + sn.relative_state_time)", "<=", currentTime) query.addOrderBy( "(sps.relative_time_start + sn.relative_state_time)") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) return dataModels finally: if not extConn: conn.close()
def loadPendingResultOrders(self, patientId, relativeTime, conn=None): """Load all patient orders at the given relativeTime that are due to yield results, but have not yet. Include an estimate of time until results available. """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect( "distinct po.clinical_item_id" ) # Distinct so don't report multiple times for panel orders query.addSelect("po.relative_time_start") query.addSelect("po.relative_time_end") query.addSelect("ci.name") query.addSelect("ci.description") query.addSelect( "sorm.turnaround_time" ) # Could have different turnaround times for single order if different sub results. Just report each. query.addSelect( "sorm.turnaround_time - (%d - po.relative_time_start) as time_until_result" % relativeTime) # Calculate time until expect result query.addFrom("sim_patient_order as po") query.addFrom("clinical_item as ci") query.addFrom("sim_order_result_map as sorm") query.addWhere("po.clinical_item_id = ci.clinical_item_id") query.addWhere("po.clinical_item_id = sorm.clinical_item_id") query.addWhereEqual("sim_patient_id", patientId) # Only catch orders up to the given relativeTime and not cancelled query.addWhereOp("relative_time_start", "<=", relativeTime) query.openWhereOrClause() query.addWhere("relative_time_end is null") query.addWhereOp("relative_time_end", ">", relativeTime) query.closeWhereOrClause() # Only PENDING orders, so don't report orders who results should already be available query.addWhereOp("sorm.turnaround_time + po.relative_time_start", ">", relativeTime) query.addOrderBy("time_until_result") query.addOrderBy("relative_time_start") query.addOrderBy("ci.name") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) return dataModels finally: if not extConn: conn.close()
class TestDBUtil(DBTestCase): def setUp(self): """Prepare state for test cases""" DBTestCase.setUp(self) self.SCRIPT_FILE = StringIO() self.SCRIPT_FILE.write( "# Create table to test on. Also testing that comment tag is recognized\n" ) self.SCRIPT_FILE.write("\n") self.SCRIPT_FILE.write("create table TestTypes\n") self.SCRIPT_FILE.write("(\n") self.SCRIPT_FILE.write(" TestTypes_id serial,\n") self.SCRIPT_FILE.write( " MyText varchar(50), /* Same as character varying, also test standard SQL comment tags */\n" ) self.SCRIPT_FILE.write(" MyInteger integer,\n") self.SCRIPT_FILE.write( " MyReal real, -- Floating point number, also test standard SQL comment tag\n" ) self.SCRIPT_FILE.write( " MyDateTime TIMESTAMP, -- PostgreSQL uses TIMESTAMP, but MySQL doesn't do NULL values right, so have to use DATETIME for MySQL?\n" ) self.SCRIPT_FILE.write(" MyYesNo boolean\n") self.SCRIPT_FILE.write(");\n") self.SCRIPT_FILE.write( "ALTER TABLE TestTypes ADD CONSTRAINT TestTypes_id PRIMARY KEY (TestTypes_id);\n" ) # Should auto-create testtypes_testtypes_id_seq sequence self.SCRIPT_FILE.write( "CREATE INDEX TestTypes_MyInteger_INDEX ON TestTypes(MyInteger);\n" ) self.SCRIPT_FILE.write("\n") self.SCRIPT_FILE.write( "insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime,MyYesNo)\n" ) self.SCRIPT_FILE.write( "values ('Sample Text', 123,123.45,'2004-09-08 19:41:47.292000',True);\n" ) self.SCRIPT_FILE.write( "insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime,MyYesNo)\n" ) self.SCRIPT_FILE.write( "values ('Joe Mama', 234,23.45,'1990-10-03 19:41:47.292000',False);\n" ) self.SCRIPT_FILE.write( "insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime,MyYesNo)\n" ) self.SCRIPT_FILE.write( "values ('Mo Fo', 345,3.45,'2014-01-04 19:41:47.292000',True);\n" ) self.SCRIPT_FILE.write("\n") self.SCRIPT_FILE = StringIO(self.SCRIPT_FILE.getvalue()) self.DATA_TABLE = "TestTypes" self.DATA_COLS = "MyInteger\tMyReal\tMyYesNo\tMyText\n" self.DATA_FILE = StringIO() self.DATA_FILE.write('100\t100.1\tNone\tATest\n') self.DATA_FILE.write('200\t200.2\t' + FALSE_STR + '\tNone\n') self.DATA_FILE.write('200\t200.2\t' + FALSE_STR + '\t\n') # Skip None tag at end of line, test that white space won't get lost self.DATA_FILE.write('300\t\t' + TRUE_STR + '\tCTest\n') self.DATA_FILE = StringIO(self.DATA_FILE.getvalue()) self.DATA_ROWS = [] self.DATA_ROWS.append([ 100, 100.1, None, "ATest", ]) self.DATA_ROWS.append([ 200, 200.2, False, None, ]) self.DATA_ROWS.append([ 300, None, True, "CTest", ]) self.MULTI_LINE_DATA_FILE = StringIO() self.MULTI_LINE_DATA_FILE.write( 'myinteger\t"MyReal"\t"MyYesNo"\tMyText\n') self.MULTI_LINE_DATA_FILE.write( '100\t100.1\tNone\t"""A"" Test and ""more"""\n') self.MULTI_LINE_DATA_FILE.write('200\t200.2\t' + FALSE_STR + '\t""\n') self.MULTI_LINE_DATA_FILE.write('300\tNone\t' + TRUE_STR + '\t"C\\nTest"\t\n') self.MULTI_LINE_DATA_FILE = StringIO( self.MULTI_LINE_DATA_FILE.getvalue()) self.MULTI_LINE_DATA_ROWS = [] self.MULTI_LINE_DATA_ROWS.append([ 100, 100.1, None, '"A" Test and "more"', ]) self.MULTI_LINE_DATA_ROWS.append([ 200, 200.2, False, None, ]) self.MULTI_LINE_DATA_ROWS.append([ 300, None, True, 'C\\nTest', ]) # ID summary data to make it easier to verify stuff self.COL_NAMES = self.DATA_COLS.split() self.ID_COL = self.COL_NAMES[0] self.ID_DATA = [] for row in self.DATA_ROWS: self.ID_DATA.append(row[0]) # Build query to get update rows self.DATA_QUERY = SQLQuery() for col in self.COL_NAMES: self.DATA_QUERY.addSelect(col) self.DATA_QUERY.addFrom(self.DATA_TABLE) self.DATA_QUERY.addWhereIn(self.ID_COL, self.ID_DATA) self.DATA_QUERY.addOrderBy(self.ID_COL) def tearDown(self): """Restore state from any setUp or test steps""" try: DBUtil.execute("drop table TestTypes") pass except Exception as err: log.warning(err) pass DBTestCase.tearDown(self) def test_runDBScript(self): # Just run a DB Script and make sure no ProgrammingErrors are raised. DBUtil.runDBScript(self.SCRIPT_FILE, False) # Run some other commands to see if scripts produced expected results results = DBUtil.execute( "select * from TestTypes where MyInteger > %s", (200, )) self.assertEqual(2, len(results)) # Test extra "includeColumnNames" parameter results = DBUtil.execute( "select TestTypes_id,MyText,MyInteger,MyReal,MyDateTime,MyYesNo from TestTypes where MyInteger < %s", (100, ), True) expected = [ "TestTypes_id", "MyText", "MyInteger", "MyReal", "MyDateTime", "MyYesNo" ] for iCol in range(len(expected)): # Ignore case for comparison expected[iCol] = expected[iCol].lower() results[0][iCol] = results[0][iCol].lower() self.assertEqual(expected, results[0]) self.assertEqual(0, len(results) - 1) def test_runDBScript_commandline(self): # Equivalent to test_runDBScript, but try higher level interface # through command-line "main" method origStdin = sys.stdin sys.stdin = self.SCRIPT_FILE argv = ["DBUtil.py", "--script", "-"] DBUtil.main(argv) sys.stdin = origStdin # Run some other commands to see if scripts produced expected results results = DBUtil.execute( "select * from TestTypes where MyInteger > %s", (200, )) self.assertEqual(2, len(results)) results = DBUtil.execute( "select * from TestTypes where MyInteger < %s", (100, )) self.assertEqual(0, len(results)) def test_runDBScript_skipErrors(self): # Similar to test_runDBScript_commandline, but test skipErrors option origStdin = sys.stdin sys.stdin = self.SCRIPT_FILE argv = ["DBUtil.py", "--script", "-"] DBUtil.main(argv) sys.stdin = origStdin # Run script again. Should generate errors from redundant create table, etc. But skip self.SCRIPT_FILE.seek(0) origStdin = sys.stdin sys.stdin = self.SCRIPT_FILE argv = ["DBUtil.py", "--script", "--skipErrors", "-"] DBUtil.main(argv) sys.stdin = origStdin # Run script again. Should generate errors from redundant create table, etc. Verify by catch self.SCRIPT_FILE.seek(0) origStdin = sys.stdin sys.stdin = self.SCRIPT_FILE argv = ["DBUtil.py", "--script", "-"] expectErr = True actualErr = False try: DBUtil.main(argv) except Exception as err: actualErr = True self.assertEqual(expectErr, actualErr) sys.stdin = origStdin def test_execute_commandline(self): # Run basic executes for both an update and a select query, but # using the higher-level command-line "main" method interface DBUtil.runDBScript( self.SCRIPT_FILE, False) # Assume this works based on test_runDBScript method origStdout = sys.stdout sys.stdout = StringIO() argv = [ "DBUtil.py", "select count(*) from TestTypes where MyInteger > 200", "-" ] DBUtil.main(argv) self.assertEqual(2, int(sys.stdout.getvalue())) sys.stdout = origStdout origStdout = sys.stdout sys.stdout = StringIO() argv = [ "DBUtil.py", "insert into TestTypes (MyText,MyInteger,MyYesNo) values ('Another',255,True)", "-" ] DBUtil.main(argv) #self.assertEqual( 1, int(sys.stdout.getvalue()) ) sys.stdout = origStdout origStdout = sys.stdout sys.stdout = StringIO() argv = [ "DBUtil.py", "select count(*) from TestTypes where MyInteger > 200", "-" ] DBUtil.main(argv) self.assertEqual(3, int(sys.stdout.getvalue())) sys.stdout = origStdout # Different test, includeColumnNames origStdout = sys.stdout sys.stdout = StringIO() argv = [ "DBUtil.py", "-c", "select TestTypes_id,MyText,MyInteger,MyReal,MyDateTime,MyYesNo from TestTypes where MyInteger > 200 and MyYesNo = True", "-" ] DBUtil.main(argv) sampleLines = sys.stdout.getvalue().split("\n") expected = [ "TestTypes_id", "MyText", "MyInteger", "MyReal", "MyDateTime", "MyYesNo" ] sampleColumns = sampleLines[0].split() for iCol in range(len(expected)): # Case-insensitive comparison expected[iCol] = expected[iCol].lower() sampleColumns[iCol] = sampleColumns[iCol].lower() for iCol, col in enumerate(sampleColumns): self.assertEqual(expected[iCol], col) self.assertEqual( 2 + 1 + 1, len(sampleLines) ) # 2 data lines + 1 column name line + 1 newline at end of output sys.stdout = origStdout def test_insertFile(self): # Create a test data file to insert, and verify no errors DBUtil.runDBScript( self.SCRIPT_FILE, False) # Assume this works based on test_runDBScript method tableName = "TestTypes" idFile = StringIO() DBUtil.insertFile(self.MULTI_LINE_DATA_FILE, tableName, None, "\t", idFile) # Assume column names extracted from first row of data file # Verify number rows inserted self.assertEqual(len(self.MULTI_LINE_DATA_ROWS), idFile.getvalue().count("\n")) results = DBUtil.execute(self.DATA_QUERY) self.assertEqual(self.MULTI_LINE_DATA_ROWS, results) def test_insertFile_commandline(self): # Similar to test_insertFile, but from higher-level command-line interface DBUtil.runDBScript( self.SCRIPT_FILE, False) # Assume this works based on test_runDBScript method tableName = "TestTypes" columnNames = self.DATA_COLS.split() idFile = StringIO() # Slightly different test, specify tab as delimiter, not just any whitespace origStdin = sys.stdin origStdout = sys.stdout sys.stdin = self.MULTI_LINE_DATA_FILE sys.stdout = idFile argv = ["DBUtil.py", "-i-", "-d\\t", "-t" + tableName, "-o-"] DBUtil.main(argv) sys.stdout = origStdout sys.stdin = origStdin self.assertEqual(3, idFile.getvalue().count("\n")) results = DBUtil.execute(self.DATA_QUERY) self.assertEqual(self.MULTI_LINE_DATA_ROWS, results) def test_insertFile_skipErrors(self): # Similar to test_insertFile_commandline, but just test to see if skipErrors option works # Test run will show errror / warning messages from the app, but these are expected DBUtil.runDBScript( self.SCRIPT_FILE, False) # Assume this works based on test_runDBScript method tableName = "TestTypes" columnNames = ["MyReal", "MyYesNo", "MyText", "MyInteger"] idFile = StringIO() # Try with bogus data that should generate errors dataFile = StringIO() dataFile.write("ABCD\tPositive\tBadTest\t100.123\n") dataFile.write("700.7\t" + FALSE_STR + "\tXTest\t777\n") dataFile.write("1,099\tNegative\tMoBadTest\tfoo\n") dataFile = StringIO(dataFile.getvalue()) idFile = StringIO() origStdin = sys.stdin origStdout = sys.stdout sys.stdin = dataFile sys.stdout = idFile argv = ["DBUtil.py", "-i-", "-t" + tableName, "-o-"] argv.extend(columnNames) expectErr = True actualErr = False try: DBUtil.main(argv) except Exception as err: actualErr = True self.assertEqual(expectErr, actualErr) sys.stdout = origStdout sys.stdin = origStdin # Expect no rows succesffuly inserted since errors in input self.assertEqual(0, idFile.getvalue().count("\n")) results = DBUtil.execute( "select count(*) from TestTypes where MyText like %s", ("%Test", )) self.assertEqual(0, results[0][0]) # Try again, with bogus data that should generate errors dataFile = StringIO() dataFile.write("ABCD\tPositive\tBadTest\t100.123\n") dataFile.write("700.7\t" + FALSE_STR + "\tXTest\t777\n") dataFile.write("1,099\tNegative\tMoBadTest\tfoo\n") dataFile = StringIO(dataFile.getvalue()) idFile = StringIO() origStdin = sys.stdin origStdout = sys.stdout sys.stdin = dataFile sys.stdout = idFile argv = ["DBUtil.py", "-i-", "-t" + tableName, "-o-", "-e"] # -e option skipsErrors argv.extend(columnNames) DBUtil.main(argv) sys.stdout = origStdout sys.stdin = origStdin # Still expect 1 row to get through successfuly, despite other invalid input self.assertEqual(1, idFile.getvalue().count("\n")) results = DBUtil.execute( "select count(*) from TestTypes where MyText like %s", ("%Test", )) self.assertEqual(1, results[0][0]) def test_insertFile_dateParsing(self): # Create a test data file to insert, and verify no errors DBUtil.runDBScript( self.SCRIPT_FILE, False) # Assume this works based on test_runDBScript method tableName = "TestTypes" columnNames = ["MyInteger", "MyText", "MyDateTime"] dataFile = StringIO() dataFile.write('''-1\t"12/11/2010"\t"12/11/2010"\n''') dataFile.write( '''-2\t"2013-04-15 13:45:21"\t"2013-04-15 13:45:21"\n''') dataFile.write( '''-3\t"2003-04-15 10:45:21"\t"2003-04-15 10:45:21"\n''') dataFile.write('''-4\t"4/11/12 6:20"\t"4/11/12 6:20"\n''') dataFile = StringIO(dataFile.getvalue()) dateColFormats = { "myDateTime": None } # Deliberately change capitalization to ensure robustness DBUtil.insertFile(dataFile, tableName, columnNames, dateColFormats=dateColFormats) verifyQuery = \ """select MyInteger, MyText, MyDateTime from TestTypes where MyInteger < 0 order by MyInteger desc """ expectedData = \ [ [ -1, "12/11/2010", datetime(2010,12,11) ], [ -2, "2013-04-15 13:45:21", datetime(2013,4,15,13,45,21) ], [ -3, "2003-04-15 10:45:21", datetime(2003,4,15,10,45,21) ], [ -4, "4/11/12 6:20", datetime(2012,4,11,6,20) ], ] # Verify rows inserted with properly parsed dates results = DBUtil.execute(verifyQuery) self.assertEqual(expectedData, results) def test_insertFile_escapeStrings(self): # Create a test data file to insert, and verify no errors DBUtil.runDBScript( self.SCRIPT_FILE, False) # Assume this works based on test_runDBScript method tableName = "TestTypes" columnNames = ["MyInteger", "MyText"] dataFile = StringIO() dataFile.write('''-1\t"A"\n''') dataFile.write('''-2\t"B\xaeb"\n''') dataFile.write('''-3\t"C"\n''') dataFile.write('''-4\tD\n''') dataFile = StringIO(dataFile.getvalue()) DBUtil.insertFile(dataFile, tableName, columnNames, escapeStrings=True) verifyQuery = \ """select MyInteger, MyText from TestTypes where MyInteger < 0 order by MyInteger desc """ expectedData = \ [ [ -1, "A"], [ -2, "B\\xaeb"], [ -3, "C"], [ -4, "D"], ] # Verify rows inserted with properly parsed dates results = DBUtil.execute(verifyQuery) self.assertEqual(expectedData, results) def test_identityQuery(self): DBUtil.runDBScript(self.SCRIPT_FILE, False) # Run some other commands to see if scripts produced expected results results = DBUtil.execute("select max(TestTypes_id) from TestTypes") lastSeq = results[0][0] conn = DBUtil.connection() try: cur = conn.cursor() cur.execute( "insert into TestTypes (MyText,MyInteger,MyYesNo) values ('Another',255,True)" ) cur.execute(DBUtil.identityQuery("TestTypes")) self.assertEqual(lastSeq + 1, cur.fetchone()[0]) cur.execute( "select TestTypes_id from TestTypes where MyText = 'Another' and MyInteger = 255" ) self.assertEqual(lastSeq + 1, cur.fetchone()[0]) finally: conn.close() def test_nullCheck(self): DBUtil.runDBScript(self.SCRIPT_FILE, False) conn = DBUtil.connection() try: DBUtil.execute( "insert into TestTypes (MyText,MyInteger) values ('Test With Null', 255)", conn=conn) DBUtil.execute( "insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime) values ('Test With Not Null', 255, 1.23, '2005-03-06')", conn=conn) result = DBUtil.execute( "select MyText from TestTypes where MyInteger = 255 and MyReal is null", conn=conn) self.assertEqual('Test With Null', result[0][0]) result = DBUtil.execute( "select MyText from TestTypes where MyInteger = 255 and MyReal is not null", conn=conn) self.assertEqual('Test With Not Null', result[0][0]) # Would not work with MySQL if used TIMESTAMP data type. Should be DATETIME. (TIMESTAMP tries to auto-fill values, so no nulls allowed?) result = DBUtil.execute( "select MyText from TestTypes where MyInteger = 255 and MyDateTime is null", conn=conn) self.assertEqual('Test With Null', result[0][0]) result = DBUtil.execute( "select MyText from TestTypes where MyInteger = 255 and MyDateTime is not null", conn=conn) self.assertEqual('Test With Not Null', result[0][0]) finally: conn.close() def test_findOrInsertItem(self): DBUtil.runDBScript(self.SCRIPT_FILE, False) searchDict = {} insertDict = {} searchDict["TestTypes_id"] = +123 log.debug("Insert a new item using default params") (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict) self.assertEqual(+123, data) self.assertEqual(True, isNew) log.debug("Find the existing item") (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict) self.assertEqual(+123, data) self.assertEqual(False, isNew) insertDict["TestTypes_id"] = +456 log.debug("Find existing item, with optional insert data") (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict) self.assertEqual(+123, data) self.assertEqual(False, isNew) searchDict["TestTypes_id"] = +789 insertDict["TestTypes_id"] = +789 insertDict["MyInteger"] = 123 log.debug("Insert a new item with actual data") (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict) self.assertEqual(+789, data) self.assertEqual(True, isNew) searchDict["TestTypes_id"] = +234 insertDict["TestTypes_id"] = +234 log.debug("Retrieve a different column") (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict, retrieveCol="MyText") self.assertEqual(None, data) self.assertEqual(True, isNew) searchDict["TestTypes_id"] = +345 insertDict["TestTypes_id"] = +345 insertDict["MyText"] = "testText" log.debug("Insert and retrieve a different column") (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict, retrieveCol="MyText") self.assertEqual("testText", data) self.assertEqual(True, isNew) insertDict["MyText"] = "newText" log.debug( "Try inserting a different value under an existing row. Should NOT work" ) (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict, retrieveCol="MyText") self.assertEqual("testText", data) self.assertEqual(False, isNew) log.debug( "Try inserting a different value under an existing row, but force the update" ) insertDict["MyText"] = "newText" (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict, retrieveCol="MyText", forceUpdate=True) self.assertEqual("newText", data) self.assertEqual(False, isNew) def test_updateFromFile(self): # Create a test data file to insert, and verify no errors DBUtil.runDBScript( self.SCRIPT_FILE, False) # Assume this works based on test_runDBScript method # Insert some blank data first to update for idValue in self.ID_DATA: DBUtil.execute( "insert into TestTypes (" + self.ID_COL + ") values (%s)", (idValue, )) # Negative test case results = DBUtil.execute(self.DATA_QUERY) self.assertNotEqual(self.DATA_ROWS, results) # Now do the actual update from the file DBUtil.updateFromFile(self.DATA_FILE, self.DATA_TABLE, self.COL_NAMES, delim="\t") results = DBUtil.execute(self.DATA_QUERY) self.assertEqual(self.DATA_ROWS, results) def test_updateFromFile_commandline(self): # Similar to test_updateFromFile, but from higher-level command-line interface DBUtil.runDBScript( self.SCRIPT_FILE, False) # Assume this works based on test_runDBScript method # Insert some blank data first to update for idValue in self.ID_DATA: DBUtil.execute( "insert into TestTypes (" + self.ID_COL + ") values (%s)", (idValue, )) # Negative test case results = DBUtil.execute(self.DATA_QUERY) self.assertNotEqual(self.DATA_ROWS, results) # Now do the actual update from the file, but build in column names to data file dataFileWithCols = StringIO() dataFileWithCols.write(self.DATA_COLS) dataFileWithCols.write(self.DATA_FILE.getvalue()) dataFileWithCols = StringIO(dataFileWithCols.getvalue()) sys.stdin = dataFileWithCols argv = ["DBUtil.py", "-u-", "-t" + self.DATA_TABLE, "-d\\t"] DBUtil.main(argv) # Verify positive results results = DBUtil.execute(self.DATA_QUERY) self.assertEqual(self.DATA_ROWS, results) ######################################################## # Repeat test but data file will use more than one key column (adding MyText) # Further note that MyText is used as both a key column to look up the row to update # and as a value column to modify dataFileWithCols = StringIO() dataFileWithCols.write("MyInteger\tMyText\tMyText\tMyReal\tMyYesNo\n") dataFileWithCols.write("100\tATest\tAAA\tNone\t" + TRUE_STR + "\t\n") dataFileWithCols.write("200\tNone\tBBB\t222.2\tNone\t\n") dataFileWithCols.write("300\tCTest\tNone\t333.3\t" + TRUE_STR + "\t\n") dataFileWithCols = StringIO(dataFileWithCols.getvalue()) # Expected results after this update self.DATA_ROWS = [] self.DATA_ROWS.append([ 100, None, True, "AAA", ]) self.DATA_ROWS.append([ 200, 200.2, False, None, ]) # This row is unchanged, because one of the key values cannot be found as null self.DATA_ROWS.append([ 300, 333.3, True, None, ]) # Negative test case results = DBUtil.execute(self.DATA_QUERY) self.assertNotEqual(self.DATA_ROWS, results) # Now do the actual update from the file, but with an extra parameter specifying 2 key columns sys.stdin = dataFileWithCols argv = ["DBUtil.py", "-u-", "-t" + self.DATA_TABLE, "-n2"] DBUtil.main(argv) # Verify positive results results = DBUtil.execute(self.DATA_QUERY) self.assertEqual(self.DATA_ROWS, results) def test_deleteRows(self): DBUtil.runDBScript(self.SCRIPT_FILE, False) query = "select count(*) from TestTypes;" # Insert some test data to delete tableName = "TestTypes" columnNames = self.DATA_COLS.split() idFile = StringIO() DBUtil.insertFile(self.DATA_FILE, tableName, columnNames, None, idFile) idValues = idFile.getvalue().split() # Count up rows before and after delete initialCount = DBUtil.execute(query)[0][0] DBUtil.deleteRows("TestTypes", idValues) afterCount = DBUtil.execute(query)[0][0] self.assertEqual(initialCount - len(idValues), afterCount) # Reinsert the test data to try deleting them by a non-default Id column idFile = StringIO() DBUtil.insertFile(self.DATA_FILE, tableName, columnNames, None, idFile) nonDefaultIds = [100, 200] initialCount = DBUtil.execute(query)[0][0] DBUtil.deleteRows("TestTypes", nonDefaultIds, "MyInteger") afterCount = DBUtil.execute(query)[0][0]