def updateClinicalItemCounts(self, acceptCache=False, conn=None): """Update the summary item_counts for clinical_items based on clinical_item_association summary counts. If acceptCache is True, then will first check for existence of an entry "clinicalItemCountsUpdated" in the data_cache table. If it exists, assume we have done this update already, and no need to force the calculations again """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: if acceptCache: isCacheUpdated = (self.getCacheData( "clinicalItemCountsUpdated", conn=conn) is not None) if isCacheUpdated: # Item count caches already updated, no need to recalculate them return # First reset all counts to zero query = "update clinical_item set item_count = 0, patient_count = 0, encounter_count = 0 " params = [] if self.maxClinicalItemId is not None: # Restrict to (test) data query += "where clinical_item_id < %s" % DBUtil.SQL_PLACEHOLDER params.append(self.maxClinicalItemId) DBUtil.execute(query, params, conn=conn) sqlQuery = SQLQuery() sqlQuery.addSelect("clinical_item_id") sqlQuery.addSelect("count_0 as item_count") sqlQuery.addSelect("patient_count_0 as patient_count") sqlQuery.addSelect("encounter_count_0 as encounter_count") sqlQuery.addFrom("clinical_item_association as ci") sqlQuery.addWhere("clinical_item_id = subsequent_item_id") # Look along "diagonal" of matrix for primary summary stats if self.maxClinicalItemId is not None: # Restrict to (test) data sqlQuery.addWhereOp("clinical_item_id", "<", self.maxClinicalItemId) resultTable = DBUtil.execute(sqlQuery, includeColumnNames=True, conn=conn) resultModels = modelListFromTable(resultTable) for result in resultModels: DBUtil.updateRow("clinical_item", result, result["clinical_item_id"], conn=conn) # Make a note that this cache data has been updated self.setCacheData("clinicalItemCountsUpdated", "True", conn=conn) finally: if not extConn: conn.close()
def queryItems(self, options, outputFile): """Query for all clinical item records that fulfill the options criteria and then send the results as tab-delimited output to the outputFile. """ pauseSeconds = float(options.pauseSeconds) query = SQLQuery() query.addSelect( "cic.description, ci.clinical_item_id, ci.name, ci.description") query.addFrom("clinical_item_category as cic") query.addFrom("clinical_item as ci") query.addWhere( "cic.clinical_item_category_id = ci.clinical_item_category_id") if options.itemPrefix: query.addWhereOp("ci.description", "like", options.itemPrefix + "%%") # Add wildcard to enabe prefix search if options.categoryNames: query.addWhereIn("cic.description", options.categoryNames.split(",")) query.addOrderBy( "cic.description, ci.name, ci.description, ci.clinical_item_id") formatter = TextResultsFormatter(outputFile) prog = ProgressDots() for row in DBUtil.execute(query, includeColumnNames=True, connFactory=self.connFactory): formatter.formatTuple(row) time.sleep(pauseSeconds) prog.update() prog.printStatus()
pat_enc_f.readline() for line in pat_enc_f: line = line.strip().split(",") if (line[0] not in patient_encounters): patient_encounters[line[0]] = [ pandas.to_datetime(line[1]), ] # patient id: list of admission times else: patient_encounters[line[0]].append(pandas.to_datetime(line[1])) # print(patient_encounters) # Create SCRIPT_FILE SCRIPT_FILE = StringIO() SCRIPT_FILE.write("psql stride jwang198") DATA_QUERY = SQLQuery() DATA_QUERY.addSelect("pat_id") DATA_QUERY.addSelect("death_date") DATA_QUERY.addFrom("stride_patient") print(DATA_QUERY) # Write out data to CSV DBUtil.runDBScript(SCRIPT_FILE, False) results = DBUtil.execute(DATA_QUERY) #print(len(results)) output_7 = open("/Users/jwang/Desktop/Results/7_day_deaths.csv", "w") output_30 = open("/Users/jwang/Desktop/Results/30_day_deaths.csv", "w") output_90 = open("/Users/jwang/Desktop/Results/90_day_deaths.csv", "w")
def _getNonNullLabs(self): query = SQLQuery() # SELECT query.addSelect('proc_code') # FROM query.addFrom('stride_order_proc') # WHERE query.addWhereLike('proc_code', 'LAB%') query.addWhere('abnormal_yn is not null') query.addGroupBy('proc_code') query.addOrderBy('proc_code') results = DBUtil.execute(query) df = pd.DataFrame(results, columns=query.select).to_csv( DATA_FOLDER + 'proc_codes.csv', index=False)
def _get_random_patient_list(self): #sx: this function is for avoid RANDOM() on the database cursor = self._connection.cursor() # Get average number of results for this lab test per patient. query = SQLQuery() query.addSelect('pat_id') query.addSelect('COUNT(sop.order_proc_id) AS num_orders') query.addFrom('stride_order_proc AS sop') query.addFrom('stride_order_results AS sor') query.addWhere('sop.order_proc_id = sor.order_proc_id') ## query.addWhereIn("base_name", [self._component]) query.addGroupBy('pat_id') log.debug('Querying median orders per patient...') results = DBUtil.execute(query) order_counts = [ row[1] for row in results ] if len(results) == 0: error_msg = '0 orders for component "%s."' % self._component #sx log.critical(error_msg) sys.exit('[ERROR] %s' % error_msg) else: avg_orders_per_patient = numpy.median(order_counts) log.info('avg_orders_per_patient: %s' % avg_orders_per_patient) # Based on average # of results, figure out how many patients we'd # need to get for a feature matrix of requested size. self._num_patients = int(numpy.max([self._num_requested_episodes / \ avg_orders_per_patient, 1])) # Some components may have fewer associated patients than the required sample size patient_number_chosen = min([len(results),self._num_patients]) # inds_random_patients = numpy.random.choice(len(results), size=patient_number_chosen, replace=False) # print 'inds_random_patients:', inds_random_patients pat_IDs_random_patients = [] for ind in inds_random_patients: pat_IDs_random_patients.append(results[ind][0]) # print pat_IDs_random_patients return pat_IDs_random_patients
class TestDBUtil(DBTestCase): def setUp(self): """Prepare state for test cases""" DBTestCase.setUp(self) self.SCRIPT_FILE = StringIO() self.SCRIPT_FILE.write( "# Create table to test on. Also testing that comment tag is recognized\n" ) self.SCRIPT_FILE.write("\n") self.SCRIPT_FILE.write("create table TestTypes\n") self.SCRIPT_FILE.write("(\n") self.SCRIPT_FILE.write(" TestTypes_id serial,\n") self.SCRIPT_FILE.write( " MyText varchar(50), /* Same as character varying, also test standard SQL comment tags */\n" ) self.SCRIPT_FILE.write(" MyInteger integer,\n") self.SCRIPT_FILE.write( " MyReal real, -- Floating point number, also test standard SQL comment tag\n" ) self.SCRIPT_FILE.write( " MyDateTime TIMESTAMP, -- PostgreSQL uses TIMESTAMP, but MySQL doesn't do NULL values right, so have to use DATETIME for MySQL?\n" ) self.SCRIPT_FILE.write(" MyYesNo boolean\n") self.SCRIPT_FILE.write(");\n") self.SCRIPT_FILE.write( "ALTER TABLE TestTypes ADD CONSTRAINT TestTypes_id PRIMARY KEY (TestTypes_id);\n" ) # Should auto-create testtypes_testtypes_id_seq sequence self.SCRIPT_FILE.write( "CREATE INDEX TestTypes_MyInteger_INDEX ON TestTypes(MyInteger);\n" ) self.SCRIPT_FILE.write("\n") self.SCRIPT_FILE.write( "insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime,MyYesNo)\n" ) self.SCRIPT_FILE.write( "values ('Sample Text', 123,123.45,'2004-09-08 19:41:47.292000',True);\n" ) self.SCRIPT_FILE.write( "insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime,MyYesNo)\n" ) self.SCRIPT_FILE.write( "values ('Joe Mama', 234,23.45,'1990-10-03 19:41:47.292000',False);\n" ) self.SCRIPT_FILE.write( "insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime,MyYesNo)\n" ) self.SCRIPT_FILE.write( "values ('Mo Fo', 345,3.45,'2014-01-04 19:41:47.292000',True);\n" ) self.SCRIPT_FILE.write("\n") self.SCRIPT_FILE = StringIO(self.SCRIPT_FILE.getvalue()) self.DATA_TABLE = "TestTypes" self.DATA_COLS = "MyInteger\tMyReal\tMyYesNo\tMyText\n" self.DATA_FILE = StringIO() self.DATA_FILE.write('100\t100.1\tNone\tATest\n') self.DATA_FILE.write('200\t200.2\t' + FALSE_STR + '\tNone\n') self.DATA_FILE.write('200\t200.2\t' + FALSE_STR + '\t\n') # Skip None tag at end of line, test that white space won't get lost self.DATA_FILE.write('300\t\t' + TRUE_STR + '\tCTest\n') self.DATA_FILE = StringIO(self.DATA_FILE.getvalue()) self.DATA_ROWS = [] self.DATA_ROWS.append([ 100, 100.1, None, "ATest", ]) self.DATA_ROWS.append([ 200, 200.2, False, None, ]) self.DATA_ROWS.append([ 300, None, True, "CTest", ]) self.MULTI_LINE_DATA_FILE = StringIO() self.MULTI_LINE_DATA_FILE.write( 'myinteger\t"MyReal"\t"MyYesNo"\tMyText\n') self.MULTI_LINE_DATA_FILE.write( '100\t100.1\tNone\t"""A"" Test and ""more"""\n') self.MULTI_LINE_DATA_FILE.write('200\t200.2\t' + FALSE_STR + '\t""\n') self.MULTI_LINE_DATA_FILE.write('300\tNone\t' + TRUE_STR + '\t"C\\nTest"\t\n') self.MULTI_LINE_DATA_FILE = StringIO( self.MULTI_LINE_DATA_FILE.getvalue()) self.MULTI_LINE_DATA_ROWS = [] self.MULTI_LINE_DATA_ROWS.append([ 100, 100.1, None, '"A" Test and "more"', ]) self.MULTI_LINE_DATA_ROWS.append([ 200, 200.2, False, None, ]) self.MULTI_LINE_DATA_ROWS.append([ 300, None, True, 'C\\nTest', ]) # ID summary data to make it easier to verify stuff self.COL_NAMES = self.DATA_COLS.split() self.ID_COL = self.COL_NAMES[0] self.ID_DATA = [] for row in self.DATA_ROWS: self.ID_DATA.append(row[0]) # Build query to get update rows self.DATA_QUERY = SQLQuery() for col in self.COL_NAMES: self.DATA_QUERY.addSelect(col) self.DATA_QUERY.addFrom(self.DATA_TABLE) self.DATA_QUERY.addWhereIn(self.ID_COL, self.ID_DATA) self.DATA_QUERY.addOrderBy(self.ID_COL) def tearDown(self): """Restore state from any setUp or test steps""" try: DBUtil.execute("drop table TestTypes") pass except Exception as err: log.warning(err) pass DBTestCase.tearDown(self) def test_runDBScript(self): # Just run a DB Script and make sure no ProgrammingErrors are raised. DBUtil.runDBScript(self.SCRIPT_FILE, False) # Run some other commands to see if scripts produced expected results results = DBUtil.execute( "select * from TestTypes where MyInteger > %s", (200, )) self.assertEqual(2, len(results)) # Test extra "includeColumnNames" parameter results = DBUtil.execute( "select TestTypes_id,MyText,MyInteger,MyReal,MyDateTime,MyYesNo from TestTypes where MyInteger < %s", (100, ), True) expected = [ "TestTypes_id", "MyText", "MyInteger", "MyReal", "MyDateTime", "MyYesNo" ] for iCol in range(len(expected)): # Ignore case for comparison expected[iCol] = expected[iCol].lower() results[0][iCol] = results[0][iCol].lower() self.assertEqual(expected, results[0]) self.assertEqual(0, len(results) - 1) def test_runDBScript_commandline(self): # Equivalent to test_runDBScript, but try higher level interface # through command-line "main" method origStdin = sys.stdin sys.stdin = self.SCRIPT_FILE argv = ["DBUtil.py", "--script", "-"] DBUtil.main(argv) sys.stdin = origStdin # Run some other commands to see if scripts produced expected results results = DBUtil.execute( "select * from TestTypes where MyInteger > %s", (200, )) self.assertEqual(2, len(results)) results = DBUtil.execute( "select * from TestTypes where MyInteger < %s", (100, )) self.assertEqual(0, len(results)) def test_runDBScript_skipErrors(self): # Similar to test_runDBScript_commandline, but test skipErrors option origStdin = sys.stdin sys.stdin = self.SCRIPT_FILE argv = ["DBUtil.py", "--script", "-"] DBUtil.main(argv) sys.stdin = origStdin # Run script again. Should generate errors from redundant create table, etc. But skip self.SCRIPT_FILE.seek(0) origStdin = sys.stdin sys.stdin = self.SCRIPT_FILE argv = ["DBUtil.py", "--script", "--skipErrors", "-"] DBUtil.main(argv) sys.stdin = origStdin # Run script again. Should generate errors from redundant create table, etc. Verify by catch self.SCRIPT_FILE.seek(0) origStdin = sys.stdin sys.stdin = self.SCRIPT_FILE argv = ["DBUtil.py", "--script", "-"] expectErr = True actualErr = False try: DBUtil.main(argv) except Exception as err: actualErr = True self.assertEqual(expectErr, actualErr) sys.stdin = origStdin def test_execute_commandline(self): # Run basic executes for both an update and a select query, but # using the higher-level command-line "main" method interface DBUtil.runDBScript( self.SCRIPT_FILE, False) # Assume this works based on test_runDBScript method origStdout = sys.stdout sys.stdout = StringIO() argv = [ "DBUtil.py", "select count(*) from TestTypes where MyInteger > 200", "-" ] DBUtil.main(argv) self.assertEqual(2, int(sys.stdout.getvalue())) sys.stdout = origStdout origStdout = sys.stdout sys.stdout = StringIO() argv = [ "DBUtil.py", "insert into TestTypes (MyText,MyInteger,MyYesNo) values ('Another',255,True)", "-" ] DBUtil.main(argv) #self.assertEqual( 1, int(sys.stdout.getvalue()) ) sys.stdout = origStdout origStdout = sys.stdout sys.stdout = StringIO() argv = [ "DBUtil.py", "select count(*) from TestTypes where MyInteger > 200", "-" ] DBUtil.main(argv) self.assertEqual(3, int(sys.stdout.getvalue())) sys.stdout = origStdout # Different test, includeColumnNames origStdout = sys.stdout sys.stdout = StringIO() argv = [ "DBUtil.py", "-c", "select TestTypes_id,MyText,MyInteger,MyReal,MyDateTime,MyYesNo from TestTypes where MyInteger > 200 and MyYesNo = True", "-" ] DBUtil.main(argv) sampleLines = sys.stdout.getvalue().split("\n") expected = [ "TestTypes_id", "MyText", "MyInteger", "MyReal", "MyDateTime", "MyYesNo" ] sampleColumns = sampleLines[0].split() for iCol in range(len(expected)): # Case-insensitive comparison expected[iCol] = expected[iCol].lower() sampleColumns[iCol] = sampleColumns[iCol].lower() for iCol, col in enumerate(sampleColumns): self.assertEqual(expected[iCol], col) self.assertEqual( 2 + 1 + 1, len(sampleLines) ) # 2 data lines + 1 column name line + 1 newline at end of output sys.stdout = origStdout def test_insertFile(self): # Create a test data file to insert, and verify no errors DBUtil.runDBScript( self.SCRIPT_FILE, False) # Assume this works based on test_runDBScript method tableName = "TestTypes" idFile = StringIO() DBUtil.insertFile(self.MULTI_LINE_DATA_FILE, tableName, None, "\t", idFile) # Assume column names extracted from first row of data file # Verify number rows inserted self.assertEqual(len(self.MULTI_LINE_DATA_ROWS), idFile.getvalue().count("\n")) results = DBUtil.execute(self.DATA_QUERY) self.assertEqual(self.MULTI_LINE_DATA_ROWS, results) def test_insertFile_commandline(self): # Similar to test_insertFile, but from higher-level command-line interface DBUtil.runDBScript( self.SCRIPT_FILE, False) # Assume this works based on test_runDBScript method tableName = "TestTypes" columnNames = self.DATA_COLS.split() idFile = StringIO() # Slightly different test, specify tab as delimiter, not just any whitespace origStdin = sys.stdin origStdout = sys.stdout sys.stdin = self.MULTI_LINE_DATA_FILE sys.stdout = idFile argv = ["DBUtil.py", "-i-", "-d\\t", "-t" + tableName, "-o-"] DBUtil.main(argv) sys.stdout = origStdout sys.stdin = origStdin self.assertEqual(3, idFile.getvalue().count("\n")) results = DBUtil.execute(self.DATA_QUERY) self.assertEqual(self.MULTI_LINE_DATA_ROWS, results) def test_insertFile_skipErrors(self): # Similar to test_insertFile_commandline, but just test to see if skipErrors option works # Test run will show errror / warning messages from the app, but these are expected DBUtil.runDBScript( self.SCRIPT_FILE, False) # Assume this works based on test_runDBScript method tableName = "TestTypes" columnNames = ["MyReal", "MyYesNo", "MyText", "MyInteger"] idFile = StringIO() # Try with bogus data that should generate errors dataFile = StringIO() dataFile.write("ABCD\tPositive\tBadTest\t100.123\n") dataFile.write("700.7\t" + FALSE_STR + "\tXTest\t777\n") dataFile.write("1,099\tNegative\tMoBadTest\tfoo\n") dataFile = StringIO(dataFile.getvalue()) idFile = StringIO() origStdin = sys.stdin origStdout = sys.stdout sys.stdin = dataFile sys.stdout = idFile argv = ["DBUtil.py", "-i-", "-t" + tableName, "-o-"] argv.extend(columnNames) expectErr = True actualErr = False try: DBUtil.main(argv) except Exception as err: actualErr = True self.assertEqual(expectErr, actualErr) sys.stdout = origStdout sys.stdin = origStdin # Expect no rows succesffuly inserted since errors in input self.assertEqual(0, idFile.getvalue().count("\n")) results = DBUtil.execute( "select count(*) from TestTypes where MyText like %s", ("%Test", )) self.assertEqual(0, results[0][0]) # Try again, with bogus data that should generate errors dataFile = StringIO() dataFile.write("ABCD\tPositive\tBadTest\t100.123\n") dataFile.write("700.7\t" + FALSE_STR + "\tXTest\t777\n") dataFile.write("1,099\tNegative\tMoBadTest\tfoo\n") dataFile = StringIO(dataFile.getvalue()) idFile = StringIO() origStdin = sys.stdin origStdout = sys.stdout sys.stdin = dataFile sys.stdout = idFile argv = ["DBUtil.py", "-i-", "-t" + tableName, "-o-", "-e"] # -e option skipsErrors argv.extend(columnNames) DBUtil.main(argv) sys.stdout = origStdout sys.stdin = origStdin # Still expect 1 row to get through successfuly, despite other invalid input self.assertEqual(1, idFile.getvalue().count("\n")) results = DBUtil.execute( "select count(*) from TestTypes where MyText like %s", ("%Test", )) self.assertEqual(1, results[0][0]) def test_insertFile_dateParsing(self): # Create a test data file to insert, and verify no errors DBUtil.runDBScript( self.SCRIPT_FILE, False) # Assume this works based on test_runDBScript method tableName = "TestTypes" columnNames = ["MyInteger", "MyText", "MyDateTime"] dataFile = StringIO() dataFile.write('''-1\t"12/11/2010"\t"12/11/2010"\n''') dataFile.write( '''-2\t"2013-04-15 13:45:21"\t"2013-04-15 13:45:21"\n''') dataFile.write( '''-3\t"2003-04-15 10:45:21"\t"2003-04-15 10:45:21"\n''') dataFile.write('''-4\t"4/11/12 6:20"\t"4/11/12 6:20"\n''') dataFile = StringIO(dataFile.getvalue()) dateColFormats = { "myDateTime": None } # Deliberately change capitalization to ensure robustness DBUtil.insertFile(dataFile, tableName, columnNames, dateColFormats=dateColFormats) verifyQuery = \ """select MyInteger, MyText, MyDateTime from TestTypes where MyInteger < 0 order by MyInteger desc """ expectedData = \ [ [ -1, "12/11/2010", datetime(2010,12,11) ], [ -2, "2013-04-15 13:45:21", datetime(2013,4,15,13,45,21) ], [ -3, "2003-04-15 10:45:21", datetime(2003,4,15,10,45,21) ], [ -4, "4/11/12 6:20", datetime(2012,4,11,6,20) ], ] # Verify rows inserted with properly parsed dates results = DBUtil.execute(verifyQuery) self.assertEqual(expectedData, results) def test_insertFile_escapeStrings(self): # Create a test data file to insert, and verify no errors DBUtil.runDBScript( self.SCRIPT_FILE, False) # Assume this works based on test_runDBScript method tableName = "TestTypes" columnNames = ["MyInteger", "MyText"] dataFile = StringIO() dataFile.write('''-1\t"A"\n''') dataFile.write('''-2\t"B\xaeb"\n''') dataFile.write('''-3\t"C"\n''') dataFile.write('''-4\tD\n''') dataFile = StringIO(dataFile.getvalue()) DBUtil.insertFile(dataFile, tableName, columnNames, escapeStrings=True) verifyQuery = \ """select MyInteger, MyText from TestTypes where MyInteger < 0 order by MyInteger desc """ expectedData = \ [ [ -1, "A"], [ -2, "B\\xaeb"], [ -3, "C"], [ -4, "D"], ] # Verify rows inserted with properly parsed dates results = DBUtil.execute(verifyQuery) self.assertEqual(expectedData, results) def test_identityQuery(self): DBUtil.runDBScript(self.SCRIPT_FILE, False) # Run some other commands to see if scripts produced expected results results = DBUtil.execute("select max(TestTypes_id) from TestTypes") lastSeq = results[0][0] conn = DBUtil.connection() try: cur = conn.cursor() cur.execute( "insert into TestTypes (MyText,MyInteger,MyYesNo) values ('Another',255,True)" ) cur.execute(DBUtil.identityQuery("TestTypes")) self.assertEqual(lastSeq + 1, cur.fetchone()[0]) cur.execute( "select TestTypes_id from TestTypes where MyText = 'Another' and MyInteger = 255" ) self.assertEqual(lastSeq + 1, cur.fetchone()[0]) finally: conn.close() def test_nullCheck(self): DBUtil.runDBScript(self.SCRIPT_FILE, False) conn = DBUtil.connection() try: DBUtil.execute( "insert into TestTypes (MyText,MyInteger) values ('Test With Null', 255)", conn=conn) DBUtil.execute( "insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime) values ('Test With Not Null', 255, 1.23, '2005-03-06')", conn=conn) result = DBUtil.execute( "select MyText from TestTypes where MyInteger = 255 and MyReal is null", conn=conn) self.assertEqual('Test With Null', result[0][0]) result = DBUtil.execute( "select MyText from TestTypes where MyInteger = 255 and MyReal is not null", conn=conn) self.assertEqual('Test With Not Null', result[0][0]) # Would not work with MySQL if used TIMESTAMP data type. Should be DATETIME. (TIMESTAMP tries to auto-fill values, so no nulls allowed?) result = DBUtil.execute( "select MyText from TestTypes where MyInteger = 255 and MyDateTime is null", conn=conn) self.assertEqual('Test With Null', result[0][0]) result = DBUtil.execute( "select MyText from TestTypes where MyInteger = 255 and MyDateTime is not null", conn=conn) self.assertEqual('Test With Not Null', result[0][0]) finally: conn.close() def test_findOrInsertItem(self): DBUtil.runDBScript(self.SCRIPT_FILE, False) searchDict = {} insertDict = {} searchDict["TestTypes_id"] = +123 log.debug("Insert a new item using default params") (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict) self.assertEqual(+123, data) self.assertEqual(True, isNew) log.debug("Find the existing item") (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict) self.assertEqual(+123, data) self.assertEqual(False, isNew) insertDict["TestTypes_id"] = +456 log.debug("Find existing item, with optional insert data") (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict) self.assertEqual(+123, data) self.assertEqual(False, isNew) searchDict["TestTypes_id"] = +789 insertDict["TestTypes_id"] = +789 insertDict["MyInteger"] = 123 log.debug("Insert a new item with actual data") (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict) self.assertEqual(+789, data) self.assertEqual(True, isNew) searchDict["TestTypes_id"] = +234 insertDict["TestTypes_id"] = +234 log.debug("Retrieve a different column") (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict, retrieveCol="MyText") self.assertEqual(None, data) self.assertEqual(True, isNew) searchDict["TestTypes_id"] = +345 insertDict["TestTypes_id"] = +345 insertDict["MyText"] = "testText" log.debug("Insert and retrieve a different column") (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict, retrieveCol="MyText") self.assertEqual("testText", data) self.assertEqual(True, isNew) insertDict["MyText"] = "newText" log.debug( "Try inserting a different value under an existing row. Should NOT work" ) (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict, retrieveCol="MyText") self.assertEqual("testText", data) self.assertEqual(False, isNew) log.debug( "Try inserting a different value under an existing row, but force the update" ) insertDict["MyText"] = "newText" (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict, retrieveCol="MyText", forceUpdate=True) self.assertEqual("newText", data) self.assertEqual(False, isNew) def test_updateFromFile(self): # Create a test data file to insert, and verify no errors DBUtil.runDBScript( self.SCRIPT_FILE, False) # Assume this works based on test_runDBScript method # Insert some blank data first to update for idValue in self.ID_DATA: DBUtil.execute( "insert into TestTypes (" + self.ID_COL + ") values (%s)", (idValue, )) # Negative test case results = DBUtil.execute(self.DATA_QUERY) self.assertNotEqual(self.DATA_ROWS, results) # Now do the actual update from the file DBUtil.updateFromFile(self.DATA_FILE, self.DATA_TABLE, self.COL_NAMES, delim="\t") results = DBUtil.execute(self.DATA_QUERY) self.assertEqual(self.DATA_ROWS, results) def test_updateFromFile_commandline(self): # Similar to test_updateFromFile, but from higher-level command-line interface DBUtil.runDBScript( self.SCRIPT_FILE, False) # Assume this works based on test_runDBScript method # Insert some blank data first to update for idValue in self.ID_DATA: DBUtil.execute( "insert into TestTypes (" + self.ID_COL + ") values (%s)", (idValue, )) # Negative test case results = DBUtil.execute(self.DATA_QUERY) self.assertNotEqual(self.DATA_ROWS, results) # Now do the actual update from the file, but build in column names to data file dataFileWithCols = StringIO() dataFileWithCols.write(self.DATA_COLS) dataFileWithCols.write(self.DATA_FILE.getvalue()) dataFileWithCols = StringIO(dataFileWithCols.getvalue()) sys.stdin = dataFileWithCols argv = ["DBUtil.py", "-u-", "-t" + self.DATA_TABLE, "-d\\t"] DBUtil.main(argv) # Verify positive results results = DBUtil.execute(self.DATA_QUERY) self.assertEqual(self.DATA_ROWS, results) ######################################################## # Repeat test but data file will use more than one key column (adding MyText) # Further note that MyText is used as both a key column to look up the row to update # and as a value column to modify dataFileWithCols = StringIO() dataFileWithCols.write("MyInteger\tMyText\tMyText\tMyReal\tMyYesNo\n") dataFileWithCols.write("100\tATest\tAAA\tNone\t" + TRUE_STR + "\t\n") dataFileWithCols.write("200\tNone\tBBB\t222.2\tNone\t\n") dataFileWithCols.write("300\tCTest\tNone\t333.3\t" + TRUE_STR + "\t\n") dataFileWithCols = StringIO(dataFileWithCols.getvalue()) # Expected results after this update self.DATA_ROWS = [] self.DATA_ROWS.append([ 100, None, True, "AAA", ]) self.DATA_ROWS.append([ 200, 200.2, False, None, ]) # This row is unchanged, because one of the key values cannot be found as null self.DATA_ROWS.append([ 300, 333.3, True, None, ]) # Negative test case results = DBUtil.execute(self.DATA_QUERY) self.assertNotEqual(self.DATA_ROWS, results) # Now do the actual update from the file, but with an extra parameter specifying 2 key columns sys.stdin = dataFileWithCols argv = ["DBUtil.py", "-u-", "-t" + self.DATA_TABLE, "-n2"] DBUtil.main(argv) # Verify positive results results = DBUtil.execute(self.DATA_QUERY) self.assertEqual(self.DATA_ROWS, results) def test_deleteRows(self): DBUtil.runDBScript(self.SCRIPT_FILE, False) query = "select count(*) from TestTypes;" # Insert some test data to delete tableName = "TestTypes" columnNames = self.DATA_COLS.split() idFile = StringIO() DBUtil.insertFile(self.DATA_FILE, tableName, columnNames, None, idFile) idValues = idFile.getvalue().split() # Count up rows before and after delete initialCount = DBUtil.execute(query)[0][0] DBUtil.deleteRows("TestTypes", idValues) afterCount = DBUtil.execute(query)[0][0] self.assertEqual(initialCount - len(idValues), afterCount) # Reinsert the test data to try deleting them by a non-default Id column idFile = StringIO() DBUtil.insertFile(self.DATA_FILE, tableName, columnNames, None, idFile) nonDefaultIds = [100, 200] initialCount = DBUtil.execute(query)[0][0] DBUtil.deleteRows("TestTypes", nonDefaultIds, "MyInteger") afterCount = DBUtil.execute(query)[0][0]
def _getNonNullBaseNames(self): query = SQLQuery() # SELECT query.addSelect(BASE_NAME) query.addSelect('max_result_flag') # FROM query.addFrom('order_result_stat') # WHERE query.addWhere('max_result_flag is not null') print query print query.getParams() DBUtil.runDBScript(self.SCRIPT_FILE, False) results = DBUtil.execute(str(query), query.getParams()) pd.DataFrame(results, columns=query.select).to_csv('base_names.csv', index=False)
def loadStateInfo(self, stateIds=None, conn=None): """Load basic information about the specified patient states """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("ss.sim_state_id") query.addSelect("ss.name") query.addSelect("ss.description") query.addFrom("sim_state as ss") if stateIds is not None: query.addWhereIn("ss.sim_state_id", stateIds) dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) return dataModels finally: if not extConn: conn.close()
def signOrders(self, userId, patientId, currentTime, orderItemIds, discontinuePatientOrderIds=None, conn=None): """Commit new order item IDs for the given patient and starting now, and discontinue (set end date) for any existing orders specified. Record any patient state transitions the orders would trigger """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: # Denormalized recording of current patient state to facilitate easy retrieval linked to orders later patientInfo = self.loadPatientInfo([patientId], currentTime, conn=conn)[0] stateId = patientInfo["sim_state_id"] postStateIdByItemId = patientInfo["postStateIdByItemId"] orderItemIdSet = set(orderItemIds) # Ensure unique and facilitate set operations insertDict = { "sim_user_id": userId, "sim_patient_id": patientId, "sim_state_id": stateId, "relative_time_start": currentTime } for itemId in orderItemIdSet: insertDict["clinical_item_id"] = itemId DBUtil.insertRow("sim_patient_order", insertDict, conn=conn) # See if any of these new orders triggered state transitions triggerItemIds = postStateIdByItemId.viewkeys() & orderItemIdSet while triggerItemIds: # Found a trigger item triggerItemId = None if len( triggerItemIds ) > 1: # Found multiple. Weird. Arbitrarily act on the one that appeared first in the input list for itemId in orderItemIds: if itemId in triggerItemIds: triggerItemId = itemId break else: triggerItemId = triggerItemIds.pop() postStateId = postStateIdByItemId[triggerItemId] # Record the state transition self.recordStateTransition(patientId, stateId, postStateId, currentTime, conn=conn) # Reload patientInfo to reflect new patient state patientInfo = self.loadPatientInfo([patientId], currentTime, conn=conn)[0] stateId = patientInfo["sim_state_id"] postStateIdByItemId = patientInfo["postStateIdByItemId"] orderItemIdSet.discard(triggerItemId) # Don't keep looking for this one, important to avoid infinite loop triggerItemIds = postStateIdByItemId.viewkeys( ) & orderItemIdSet if discontinuePatientOrderIds is not None: updateDict = { "relative_time_end": currentTime } for patientOrderId in discontinuePatientOrderIds: DBUtil.updateRow("sim_patient_order", updateDict, patientOrderId, conn=conn) # If order is discontinued/cancelled at the same (or before) time of entry, # take that as a signal to cleanup and delete the record altogether # (effectively there was no time at which the order was ever allowed to exist) deleteQuery = SQLQuery() deleteQuery.delete = True deleteQuery.addFrom("sim_patient_order") deleteQuery.addWhereEqual("sim_patient_id", patientId) deleteQuery.addWhere( "relative_time_end <= relative_time_start") DBUtil.execute(deleteQuery, conn=conn) finally: conn.commit() if not extConn: conn.close()
def copyPatientTemplate(self, patientData, templatePatientId, conn=None): """Create a new patient record based on the given template patient ID to copy from. Will copy shallow attributes, overridden by any provided in the given patientData, as well as any patient states, notes, or physician orders UP TO (and including) relative time zero, but not subsequent states, notes, or physician orders (the latter is expected to reflect real user interaction records). """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: templatePatientData = DBUtil.loadRecordModelById("sim_patient", templatePatientId, conn=conn) del templatePatientData["sim_patient_id"] # Remove prior ID to allow for new one templatePatientData.update(patientData) # Override with new content (if exists) DBUtil.insertRow("sim_patient", templatePatientData, conn=conn) # Create new patient record patientId = DBUtil.execute(DBUtil.identityQuery("sim_patient"), conn=conn)[0][0] # Copy initial template patient states query = SQLQuery() query.addSelect("*") # Copy all columns query.addFrom("sim_patient_state as sps") query.addWhereEqual("sps.sim_patient_id", templatePatientId) query.addWhereOp("relative_time_start", "<=", 0) query.addOrderBy("relative_time_start") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) nStates = len(dataModels) for i, dataModel in enumerate(dataModels): del dataModel["sim_patient_state_id"] # Discard copied ID to allow new one if i == nStates - 1: del dataModel["relative_time_end"] # Last state. Blank out end time to reflect open ended for simulation dataModel["sim_patient_id"] = patientId DBUtil.insertRow("sim_patient_state", dataModel, conn=conn) # Copy initial template orders query = SQLQuery() query.addSelect("*") query.addFrom("sim_patient_order as spo") query.addWhereEqual("sim_patient_id", templatePatientId) query.addWhereOp("relative_time_start", "<=", 0) query.addOrderBy("relative_time_start") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) for dataModel in dataModels: del dataModel["sim_patient_order_id"] dataModel["sim_patient_id"] = patientId DBUtil.insertRow("sim_patient_order", dataModel, conn=conn) conn.commit() # Transactional commit for multi-step process return patientId finally: if not extConn: conn.close()
def loadPatientInfo(self, patientIds=None, relativeTime=None, conn=None): """Load basic information about the specified patients. Report patient state at given time, or default to time zero """ if relativeTime is None: relativeTime = 0 # Just look for time zero default then extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("sp.sim_patient_id") query.addSelect("sp.name") query.addSelect("sp.age_years") query.addSelect("sp.gender") query.addSelect("s.sim_state_id") query.addSelect("s.name as state_name") query.addSelect("s.description as state_description") query.addSelect("sps.relative_time_start") query.addSelect("sps.relative_time_end") query.addFrom("sim_patient as sp") query.addFrom("sim_patient_state as sps") query.addFrom("sim_state as s") query.addWhere("sp.sim_patient_id = sps.sim_patient_id") query.addWhere("sps.sim_state_id = s.sim_state_id") if patientIds is not None: query.addWhereIn("sp.sim_patient_id", patientIds) # Look for the state that matches the given relative time offset query.addWhereOp("sps.relative_time_start", "<=", relativeTime) query.openWhereOrClause() query.addWhere("sps.relative_time_end is null") query.addWhereOp("sps.relative_time_end", ">", relativeTime) query.closeWhereOrClause() query.addOrderBy("sp.name") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) if len(dataModels) > 0: # Secondary query to build lookup table of possible state transition options from patient current states subQuery = SQLQuery() subQuery.addSelect("pre_state_id") subQuery.addSelect("post_state_id") subQuery.addSelect("clinical_item_id") subQuery.addSelect("time_trigger") subQuery.addFrom("sim_state_transition as sst") subQuery.addWhereIn( "pre_state_id", columnFromModelList(dataModels, "sim_state_id")) subResults = DBUtil.execute(subQuery, conn=conn) # For each pre-state, track which clinical items or times trigger which post-states postStateIdByItemIdByPreStateId = dict() postStateIdTimeTriggerByPreStateId = dict() for preStateId, postStateId, itemId, timeTrigger in subResults: if preStateId not in postStateIdByItemIdByPreStateId: postStateIdByItemIdByPreStateId[preStateId] = dict() postStateIdByItemIdByPreStateId[preStateId][ itemId] = postStateId if timeTrigger is not None: postStateIdTimeTriggerByPreStateId[preStateId] = ( postStateId, timeTrigger) # Record in patient result models for retrieval for i, dataModel in enumerate(dataModels): patientId = dataModel["sim_patient_id"] stateId = dataModel["sim_state_id"] dataModel["postStateIdByItemId"] = dict() if stateId in postStateIdByItemIdByPreStateId: dataModel[ "postStateIdByItemId"] = postStateIdByItemIdByPreStateId[ stateId] dataModel["postStateIdTimeTriggerByPreStateId"] = dict() if stateId in postStateIdTimeTriggerByPreStateId: dataModel[ "postStateIdTimeTrigger"] = postStateIdTimeTriggerByPreStateId[ stateId] if dataModel[ "relative_time_end"] is None and "postStateIdTimeTrigger" in dataModel: # Check that we haven't passed (and should thus trigger) a time-based state transition (postStateId, timeTrigger) = dataModel["postStateIdTimeTrigger"] preStateTime = dataModel["relative_time_start"] postStateTriggerTime = (preStateTime + timeTrigger) if postStateTriggerTime <= relativeTime: # Trigger state transition just by time elapsed #print >> sys.stderr, relativeTime, preStateTime, stateId, postStateTriggerTime, postStateId self.recordStateTransition(patientId, stateId, postStateId, postStateTriggerTime, conn=conn) # State change which can yield new triggers, so recursively reload. # Small risk of infinite recusion if timeTriggers are zero. Otherwise, should converge as each recursion will update the preState relativeTimeStart dataModels[i] = self.loadPatientInfo([patientId], relativeTime, conn=conn)[0] return dataModels finally: if not extConn: conn.close()
def parseParams(self, paramDict): """Look through the dictionary for key-value pairs that can be parsed into query object parameters. Facilitates easy setup from web or command-line text interfaces. """ if "searchStr" in paramDict: self.searchStr = paramDict["searchStr"] if "analysisStatus" in paramDict: self.analysisStatus = paramDict["analysisStatus"] if "sourceTables" in paramDict: self.sourceTables = paramDict["sourceTables"].split(",") if "resultCount" in paramDict: self.resultCount = int(paramDict["resultCount"]) if "sortField" in paramDict: self.sortField = paramDict["sortField"] if __name__ == "__main__": sim_manager = SimManager() # Print grading on all patient cases query = SQLQuery() query.addSelect("distinct sim_patient_id") query.addFrom("sim_patient") patients = DBUtil.execute(query) patients = [patient[0] for patient in patients] grades = sim_manager.grade_cases(patients, "Jonathan Chen") print(grades) print("length: " + str(len(grades)))
def querySourceItems(self, startDate=None, endDate=None, progress=None, conn=None): """Query the database for list of all source clinical items (diagnosed probelms in this case) and yield the results one at a time. If startDate provided, only return items whose noted_date is on or after that date. """ extConn = conn is not None if not extConn: conn = self.connFactory.connection() # Facilitate rapid lookup of ICD9/ICD10 codes if self.icd9_str_by_code is None: self.prepare_icd9_lookup(conn=conn) if self.icd10_str_by_code is None: self.prepare_icd10_lookup(conn=conn) # Column headers to query for that map to respective fields in analysis table headers = [ "pat_id", "pat_enc_csn_id", "noted_date", "resolved_date", "dx_icd9_code", "dx_icd9_code_list", "dx_icd10_code_list", "data_source" ] query = SQLQuery() for header in headers: query.addSelect(header) query.addFrom("stride_dx_list as dx") query.addWhere("noted_date is not null") # Only work with elements that have dates assigned for now if startDate is not None: query.addWhereOp("noted_date", ">=", startDate) if endDate is not None: query.addWhereOp("noted_date", "<", endDate) # Query to get an estimate of how long the process will be if progress is not None: progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0] cursor = conn.cursor() # Do one massive query, but yield data for one item at a time. cursor.execute(str(query), tuple(query.params)) row = cursor.fetchone() while row is not None: row_model = RowItemModel(row, headers) # 2014-2017 data does not have dx_icd9_code. Instead, has # both dx_icd9_code_list and dx_icd10_code_list. For these items, # there is a one:many mapping of source item to converted item. # Collect icd10 codes. icd10_codes = set() if row_model['dx_icd10_code_list'] not in ['', None]: codes = row_model['dx_icd10_code_list'].split(',') for code in codes: icd10_codes.add(code) # Collect icd9 codes. icd9_codes = set() if row_model['dx_icd9_code'] not in ['', None]: icd9_codes.add(row_model['dx_icd9_code']) else: if row_model['dx_icd9_code_list'] not in ['', None]: codes = row_model['dx_icd9_code_list'].split(',') for code in codes: icd9_codes.add(code) # If there are no ICD codes, skip to next row. if len(icd9_codes) == 0 and len(icd10_codes) == 0: row = cursor.fetchone() continue # Process ICD codes. # Build a temporary dictionary so that a single loop can take care # of both ICD9 and ICD10 without mixing the data. icd_versions = { 'ICD9': { 'codes': icd9_codes, 'lookup': self.icd9_str_by_code }, 'ICD10': { 'codes': icd10_codes, 'lookup': self.icd10_str_by_code } } for version, info in icd_versions.items(): icd_codes = info['codes'] icd_lookup = info['lookup'] for icd_code in icd_codes: # Look up string. Otherwise default to ICD code. row_model['icd_str'] = icd_code if icd_code in icd_lookup: row_model['icd_str'] = icd_lookup[icd_code] row_model['dx_icd_code'] = version + '.' + icd_code # Yield one row worth of data at a time to avoid having to keep # the whole result set in memory. yield row_model orig_code = icd_code if SUBCODE_DELIM in orig_code: # Insert copies of item for parent node codes to aggregate # component diagnoses into general categories. while icd_code[-1] != SUBCODE_DELIM: icd_code = icd_code[:-1] # Truncate trailing digit if icd_code in icd_lookup: # Found a a matching parent code, so yield this # version. row_model['icd_str'] = icd_lookup[icd_code] row_model[ 'dx_icd_code'] = version + '.' + icd_code yield row_model # One more cycle to get parent node with no subcode # delimiter at all. icd_code = icd_code[:-1] # Truncate off SUBCODE_DELIM if icd_code in icd_lookup: row_model['icd_str'] = icd_lookup[icd_code] row_model['dx_icd_code'] = version + '.' + icd_code yield row_model row = cursor.fetchone() progress.Update() # Slight risk here. Normally DB connection closing should be in finally of a try block, # but using the "yield" generator construct forbids us from using a try, finally construct. cursor.close() if not extConn: conn.close()
def querySourceItems(self, convOptions, progress=None, conn=None): """Query the database for list of all source clinical items (medications, etc.) and yield the results one at a time. If startDate provided, only return items whose occurence date is on or after that date. """ extConn = conn is not None; if not extConn: conn = self.connFactory.connection(); # Column headers to query for that map to respective fields in analysis table headers = ["stride_treatment_team_id","pat_id","pat_enc_csn_id","trtmnt_tm_begin_date","trtmnt_tm_end_date","treatment_team","prov_name"]; query = SQLQuery(); for header in headers: query.addSelect( header ); query.addFrom("stride_treatment_team"); if convOptions.startDate is not None: query.addWhereOp("trtmnt_tm_begin_date",">=", convOptions.startDate); if convOptions.endDate is not None: query.addWhereOp("trtmnt_tm_begin_date","<", convOptions.endDate); # Still use begin date as common filter value # Query to get an estimate of how long the process will be if progress is not None: progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0]; cursor = conn.cursor(); # Do one massive query, but yield data for one item at a time. cursor.execute( str(query), tuple(query.params) ); row = cursor.fetchone(); while row is not None: rowModel = RowItemModel( row, headers ); for normalizedModel in self.normalizeRowModel(rowModel, convOptions, conn=conn): yield normalizedModel; # Yield one row worth of data at a time to avoid having to keep the whole result set in memory row = cursor.fetchone(); # Slight risk here. Normally DB connection closing should be in finally of a try block, # but using the "yield" generator construct forbids us from using a try, finally construct. cursor.close(); if not extConn: conn.close();
def grade_cases(self, sim_patient_ids, sim_grader_id, conn=None): """Given the identifiers for a bunch of simulated physician-patient case records, and the identifier for a particular grading key to use, calculate what grade each case would get based on the choices made and return a dictionary of case grades (keyed by the case ID). """ ext_conn = True if conn is None: conn = self.connFactory.connection() ext_conn = False try: # Inner query retrieves physician-patient cases with ranking group_names (to later select first) # per case for specified cases. Each NULL group_name is treated as a separate group by assigning it # sim_patient_order_id. It also omits Default user (sim_user_id = 0) from grading. inner_query = SQLQuery() inner_query.addSelect("score") inner_query.addSelect( "rank() over (" # ranks rows incrementally in the same group " partition by coalesce(group_name, sim_patient_order_id::text), sim_patient_id" " order by sim_patient_order_id" ")") inner_query.addSelect("sim_user_id") inner_query.addSelect("sim_patient_id") inner_query.addFrom("sim_patient_order spo") inner_query.addJoin( "sim_grading_key sgk", "sgk.clinical_item_id = spo.clinical_item_id" " and sgk.sim_state_id = spo.sim_state_id") inner_query.addWhereEqual("sgk.sim_grader_id", sim_grader_id) inner_query.addWhereNotEqual( "spo.sim_user_id", 0) # 0 = ignore 'Default user', sets up initial cases inner_query.addWhereIn("spo.sim_patient_id", sim_patient_ids) inner_query.addOrderBy("relative_time_start") inner_query.addOrderBy("sim_patient_order_id") # Outer query sums the score per patient case and selects most graded physician for the case. # Theoretically, it isn't necessarily the most active physician for the case since his orders # might have been dropped by selecting only the first record within group_name group. query = SQLQuery() query.addSelect("sum(score) as total_score") query.addSelect("sim_patient_id") query.addSelect( "mode() within group (" # mode() selects most frequent value within group " order by sim_user_id" ") as most_graded_user_id") query.addFrom("(" + str(inner_query) + ") as ranked_groups") query.addWhereEqual("ranked_groups.rank", 1) # count only first order in the same group query.addGroupBy("sim_patient_id") query_params = inner_query.getParams() + query.getParams() grades_table = DBUtil.execute(query, query_params, includeColumnNames=True, conn=conn) grades_model = modelListFromTable(grades_table) # get most active users for the cases most_active_user_query = SQLQuery() most_active_user_query.addSelect("sim_patient_id") most_active_user_query.addSelect("mode() within group (" " order by sim_user_id" ") as most_active_user_id") most_active_user_query.addFrom("sim_patient_order") most_active_user_query.addWhereNotEqual("sim_user_id", 0) # ignore Default user most_active_user_query.addWhereIn("sim_patient_id", sim_patient_ids) most_active_user_query.addGroupBy("sim_patient_id") most_active_user_query.addOrderBy("sim_patient_id") most_active_user_table = DBUtil.execute(most_active_user_query, includeColumnNames=True, conn=conn) most_active_user_model = modelListFromTable(most_active_user_table) # make a dict by sim_patient_id out of results - will be used for combining most_active_user_dict = { most_active_user["sim_patient_id"]: most_active_user for most_active_user in most_active_user_model } # combine results complete_grades = { grade["sim_patient_id"]: grade.update(most_active_user_dict[grade["sim_patient_id"]]) for grade in grades_model } return complete_grades finally: if not ext_conn: conn.close()
def loadPatientLastEventTime(self, patientId, conn=None): """Find the last simulated time where the patient received any user orders (or default to time 0). Makes for natural starting point for resuming a simulation. Note: Misses cases where patient state also changed, not triggered by order, but by just time lag. """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("max(relative_time_start)") query.addSelect("max(relative_time_end)") query.addSelect("0") # Simply select 0 as default time if don't find any others query.addFrom("sim_patient_order as po") query.addWhereEqual("sim_patient_id", patientId) lastOrderTime = max(DBUtil.execute(query, conn=conn)[0]) return lastOrderTime finally: if not extConn: conn.close()
def add_sim_case_column_to(csv): query = SQLQuery() query.addSelect("sim_patient_id") query.addSelect("sim_case_name as sim_case") query.addFrom("sim_grading_key sgk") query.addJoin( "sim_patient_order spo", "sgk.clinical_item_id = spo.clinical_item_id and spo.sim_state_id = sgk.sim_state_id" ) query.addGroupBy("sim_patient_id") query.addGroupBy("sim_case_name") case_names = DBUtil.execute(str(query)) # merge sim_case column csv = pd.merge(csv, pd.DataFrame(case_names, columns=['sim_patient_id', 'sim_case']), left_on='patient', right_on='sim_patient_id') return csv
def loadPatientOrders(self, patientId, currentTime, loadActive=True, conn=None): """Load orders for the given patient that exist by the specified current time point. loadActive - Specify whether to load active vs. inactive/completed orders. Set to None to load both """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("po.sim_patient_order_id") query.addSelect("po.sim_user_id") query.addSelect("po.sim_patient_id") query.addSelect("po.sim_state_id") query.addSelect("po.clinical_item_id") query.addSelect("po.relative_time_start") query.addSelect("po.relative_time_end") query.addSelect("ci.name") query.addSelect("ci.description") query.addSelect("cic.source_table") query.addSelect("cic.description as category_description") query.addFrom("sim_patient_order as po") query.addFrom("clinical_item as ci") query.addFrom("clinical_item_category as cic") query.addWhere("po.clinical_item_id = ci.clinical_item_id") query.addWhere( "ci.clinical_item_category_id = cic.clinical_item_category_id") query.addWhereEqual("sim_patient_id", patientId) query.addWhereOp("relative_time_start", "<=", currentTime) if loadActive: # Filter out inactive orders here. query.openWhereOrClause() query.addWhere("relative_time_end is null") query.addWhereOp("relative_time_end", ">", currentTime) query.closeWhereOrClause() #elif loadActive is not None: # Filter out active orders here. # query.addWhereOp("relative_time_end","<=", currentTime); if loadActive: # Organize currently active orders by category query.addOrderBy("cic.description") query.addOrderBy("ci.description") query.addOrderBy("relative_time_start") else: # Otherwise chronologic order query.addOrderBy("relative_time_start") query.addOrderBy("cic.description") query.addOrderBy("ci.description") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) return dataModels finally: if not extConn: conn.close()
def setUp(self): """Prepare state for test cases""" DBTestCase.setUp(self) self.SCRIPT_FILE = StringIO() self.SCRIPT_FILE.write( "# Create table to test on. Also testing that comment tag is recognized\n" ) self.SCRIPT_FILE.write("\n") self.SCRIPT_FILE.write("create table TestTypes\n") self.SCRIPT_FILE.write("(\n") self.SCRIPT_FILE.write(" TestTypes_id serial,\n") self.SCRIPT_FILE.write( " MyText varchar(50), /* Same as character varying, also test standard SQL comment tags */\n" ) self.SCRIPT_FILE.write(" MyInteger integer,\n") self.SCRIPT_FILE.write( " MyReal real, -- Floating point number, also test standard SQL comment tag\n" ) self.SCRIPT_FILE.write( " MyDateTime TIMESTAMP, -- PostgreSQL uses TIMESTAMP, but MySQL doesn't do NULL values right, so have to use DATETIME for MySQL?\n" ) self.SCRIPT_FILE.write(" MyYesNo boolean\n") self.SCRIPT_FILE.write(");\n") self.SCRIPT_FILE.write( "ALTER TABLE TestTypes ADD CONSTRAINT TestTypes_id PRIMARY KEY (TestTypes_id);\n" ) # Should auto-create testtypes_testtypes_id_seq sequence self.SCRIPT_FILE.write( "CREATE INDEX TestTypes_MyInteger_INDEX ON TestTypes(MyInteger);\n" ) self.SCRIPT_FILE.write("\n") self.SCRIPT_FILE.write( "insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime,MyYesNo)\n" ) self.SCRIPT_FILE.write( "values ('Sample Text', 123,123.45,'2004-09-08 19:41:47.292000',True);\n" ) self.SCRIPT_FILE.write( "insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime,MyYesNo)\n" ) self.SCRIPT_FILE.write( "values ('Joe Mama', 234,23.45,'1990-10-03 19:41:47.292000',False);\n" ) self.SCRIPT_FILE.write( "insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime,MyYesNo)\n" ) self.SCRIPT_FILE.write( "values ('Mo Fo', 345,3.45,'2014-01-04 19:41:47.292000',True);\n" ) self.SCRIPT_FILE.write("\n") self.SCRIPT_FILE = StringIO(self.SCRIPT_FILE.getvalue()) self.DATA_TABLE = "TestTypes" self.DATA_COLS = "MyInteger\tMyReal\tMyYesNo\tMyText\n" self.DATA_FILE = StringIO() self.DATA_FILE.write('100\t100.1\tNone\tATest\n') self.DATA_FILE.write('200\t200.2\t' + FALSE_STR + '\tNone\n') self.DATA_FILE.write('200\t200.2\t' + FALSE_STR + '\t\n') # Skip None tag at end of line, test that white space won't get lost self.DATA_FILE.write('300\t\t' + TRUE_STR + '\tCTest\n') self.DATA_FILE = StringIO(self.DATA_FILE.getvalue()) self.DATA_ROWS = [] self.DATA_ROWS.append([ 100, 100.1, None, "ATest", ]) self.DATA_ROWS.append([ 200, 200.2, False, None, ]) self.DATA_ROWS.append([ 300, None, True, "CTest", ]) self.MULTI_LINE_DATA_FILE = StringIO() self.MULTI_LINE_DATA_FILE.write( 'myinteger\t"MyReal"\t"MyYesNo"\tMyText\n') self.MULTI_LINE_DATA_FILE.write( '100\t100.1\tNone\t"""A"" Test and ""more"""\n') self.MULTI_LINE_DATA_FILE.write('200\t200.2\t' + FALSE_STR + '\t""\n') self.MULTI_LINE_DATA_FILE.write('300\tNone\t' + TRUE_STR + '\t"C\\nTest"\t\n') self.MULTI_LINE_DATA_FILE = StringIO( self.MULTI_LINE_DATA_FILE.getvalue()) self.MULTI_LINE_DATA_ROWS = [] self.MULTI_LINE_DATA_ROWS.append([ 100, 100.1, None, '"A" Test and "more"', ]) self.MULTI_LINE_DATA_ROWS.append([ 200, 200.2, False, None, ]) self.MULTI_LINE_DATA_ROWS.append([ 300, None, True, 'C\\nTest', ]) # ID summary data to make it easier to verify stuff self.COL_NAMES = self.DATA_COLS.split() self.ID_COL = self.COL_NAMES[0] self.ID_DATA = [] for row in self.DATA_ROWS: self.ID_DATA.append(row[0]) # Build query to get update rows self.DATA_QUERY = SQLQuery() for col in self.COL_NAMES: self.DATA_QUERY.addSelect(col) self.DATA_QUERY.addFrom(self.DATA_TABLE) self.DATA_QUERY.addWhereIn(self.ID_COL, self.ID_DATA) self.DATA_QUERY.addOrderBy(self.ID_COL)
def loadResults(self, patientId, relativeTime, conn=None): """Load all results active by the given relativeTime. Will look for sim_patient_state times and sim_patient_order for diagnostic orders, to extrapolate all state-specific results for each order, or using default values if no state specific ones available. """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: # First query for all expected result labels and states, without state-specific values as # may want outer join behvaior against default state values query = SQLQuery() query.addSelect("distinct spo.sim_state_id") query.addSelect("sr.sim_result_id") query.addSelect("sr.name") query.addSelect("sr.description") query.addSelect("sr.priority") query.addSelect("sr.group_string") query.addSelect("spo.relative_time_start") query.addSelect("sorm.turnaround_time") query.addSelect( "(spo.relative_time_start + sorm.turnaround_time) as result_relative_time" ) query.addFrom("sim_patient_order as spo") query.addFrom("sim_order_result_map as sorm") query.addFrom("sim_result as sr") query.addWhere("spo.clinical_item_id = sorm.clinical_item_id") query.addWhere("sorm.sim_result_id = sr.sim_result_id") query.addWhereEqual("spo.sim_patient_id", patientId) # Only unlock results if appropiate prereq orders were placed in the past (and longer than the turnaround time) query.addWhereOp("spo.relative_time_start + sorm.turnaround_time", "<=", relativeTime) # Also check that the triggering order was not cancelled before the completion of the turnaround time query.addWhere( "( spo.relative_time_end is null or spo.relative_time_start + sorm.turnaround_time <= spo.relative_time_end )" ) query.addOrderBy("result_relative_time") query.addOrderBy("sr.priority") resultTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) resultModels = modelListFromTable(resultTable) # Pass through results to get set of states to search for stateIds = set([DEFAULT_STATE_ID]) # Include default state to fall back on for resultModel in resultModels: stateIds.add(resultModel["sim_state_id"]) # Second query for state-specific values valueQuery = SQLQuery() valueQuery.addSelect("ssr.sim_state_id") valueQuery.addSelect("ssr.sim_result_id") valueQuery.addSelect("ssr.num_value") valueQuery.addSelect("ssr.num_value_noise") valueQuery.addSelect("ssr.text_value") valueQuery.addSelect("ssr.result_flag") valueQuery.addSelect("ssr.clinical_item_id") # Output clinical item if result flag means something valueQuery.addFrom("sim_state_result as ssr") valueQuery.addWhereIn("ssr.sim_state_id", stateIds) valueTable = DBUtil.execute(valueQuery, includeColumnNames=True, conn=conn) valueModels = modelListFromTable(valueTable) # Store in-memory dictionary for rapid cross-referencing "join" to result table valueModelByStateIdByResultId = dict() for valueModel in valueModels: resultId = valueModel["sim_result_id"] stateId = valueModel["sim_state_id"] if resultId not in valueModelByStateIdByResultId: valueModelByStateIdByResultId[resultId] = dict() valueModelByStateIdByResultId[resultId][stateId] = valueModel # Now go back through original results and join up state-specific values, or use default values if needed resultValueModels = list() for resultModel in resultModels: resultId = resultModel["sim_result_id"] stateId = resultModel["sim_state_id"] if resultId in valueModelByStateIdByResultId: valueModelByStateId = valueModelByStateIdByResultId[ resultId] if stateId in valueModelByStateId: # Have a state-specific value, populate that valueModel = valueModelByStateId[stateId] resultModel.update(valueModel) elif DEFAULT_STATE_ID in valueModelByStateId: # No state-specific value, but have a default one to populate instead valueModel = valueModelByStateId[DEFAULT_STATE_ID] resultModel.update(valueModel) resultValueModels.append(resultModel) else: # No result information available, even in default state. Skip these #resultModel["num_value"] = None; #resultModel["num_value_noise"] = None; #resultModel["text_value"] = None; #resultModel["result_flag"] = None; #resultModel["clinical_item_id"] = None; pass return resultValueModels finally: if not extConn: conn.close()
def _getClinicalItems(self): query = SQLQuery() # SELECT query.addSelect(CLINICAL_ITEM_ID) query.addSelect('name') # FROM query.addFrom('clinical_item') print query print query.getParams() DBUtil.runDBScript(self.SCRIPT_FILE, False) results = DBUtil.execute(str(query), query.getParams()) pd.DataFrame(results, columns=query.select).to_csv('clinical_items.csv', index=False)
def loadPendingResultOrders(self, patientId, relativeTime, conn=None): """Load all patient orders at the given relativeTime that are due to yield results, but have not yet. Include an estimate of time until results available. """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("distinct po.clinical_item_id") # Distinct so don't report multiple times for panel orders query.addSelect("po.relative_time_start") query.addSelect("po.relative_time_end") query.addSelect("ci.name") query.addSelect("ci.description") query.addSelect("sorm.turnaround_time") # Could have different turnaround times for single order if different sub results. Just report each. query.addSelect( "sorm.turnaround_time - (%d - po.relative_time_start) as time_until_result" % relativeTime) # Calculate time until expect result query.addFrom("sim_patient_order as po") query.addFrom("clinical_item as ci") query.addFrom("sim_order_result_map as sorm") query.addWhere("po.clinical_item_id = ci.clinical_item_id") query.addWhere("po.clinical_item_id = sorm.clinical_item_id") query.addWhereEqual("sim_patient_id", patientId) # Only catch orders up to the given relativeTime and not cancelled query.addWhereOp("relative_time_start", "<=", relativeTime) query.openWhereOrClause() query.addWhere("relative_time_end is null") query.addWhereOp("relative_time_end", ">", relativeTime) query.closeWhereOrClause() # Only PENDING orders, so don't report orders who results should already be available query.addWhereOp("sorm.turnaround_time + po.relative_time_start", ">", relativeTime) query.addOrderBy("time_until_result") query.addOrderBy("relative_time_start") query.addOrderBy("ci.name") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) return dataModels finally: if not extConn: conn.close()
def _getPatientsLabsHistories(self, proc_codes): query = SQLQuery() # SELECT query.addSelect('pat_id') query.addSelect('abnormal_yn') query.addSelect('result_time') query.addSelect('proc_code') # FROM query.addFrom('stride_order_proc') # query.addFrom('patient_item as pi') # WHERE query.addWhereEqual('lab_status', 'Final result') # query.addWhereEqual('proc_code', proc_code) query.addWhereIn('proc_code', proc_codes) query.addOrderBy('proc_code') query.addOrderBy('pat_id') query.addOrderBy('result_time') return customDBUtil.execute(query)
def loadNotes(self, patientId, currentTime, conn=None): """Load notes committed up to the given simulation time. """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("sn.sim_note_id") query.addSelect("sps.sim_patient_id") # Link query.addSelect("sn.sim_state_id") query.addSelect("sn.note_type_id") query.addSelect("sn.author_type_id") query.addSelect("sn.service_type_id") query.addSelect( "(sps.relative_time_start + sn.relative_state_time) as relative_time" ) query.addSelect("sn.content") query.addFrom("sim_note as sn") query.addFrom("sim_patient_state as sps") query.addWhere("sn.sim_state_id = sps.sim_state_id") query.addWhereEqual("sps.sim_patient_id", patientId) # Only unlock notes once traverse expected time query.addWhereOp( "(sps.relative_time_start + sn.relative_state_time)", "<=", currentTime) query.addOrderBy( "(sps.relative_time_start + sn.relative_state_time)") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) return dataModels finally: if not extConn: conn.close()
def _query_patient_episodes(self): log.info('Querying patient episodes...') # Initialize DB cursor. cursor = self._connection.cursor() # Build parameters for query. # self._lab_components = self._get_components_in_lab_panel() random_patient_list = self._get_random_patient_list() # Build SQL query for list of patient episodes. # Note that for 2008-2014 data, result_flag can take on any of the # following values: High, Low, High Panic, Low Panic, # Low Off-Scale, Negative, Positive, Resistant, Susceptible, Abnormal, * # (NONE): Only 27 lab components can have this flag. None has this # value for more than 5 results, so ignore it. # *: Only 10 lab components can have this flag. Only 6 have it for # >10 tests, and each of them is a microbiology test for which # a flag is less meaningful, e.g. Gram Stain, AFB culture. # Susceptible: Only 15 lab components can have this flag. All of those # only have this value for 2 results, so ignore it. # Resistant: Only 1 lab component can have this flag. Only two results # have this value, so ignore it. # Abnormal: 1462 lab components can have this flag. Many (e.g. UBLOOD) # have this value for thousands of results, so include it. # Negative: Only 10 lab components can have this flag, and all for # less than 5 results, so ignore it. # Positive: Only 3 lab components can have this flag, and all for # only 1 result, so ignore it. # Low Off-Scale: Only 1 lab component can have this flag, and only for # 3 results, so ignore it. # Low Panic: 1401 lab components can have this flag, many core # metabolic components. Include it. # High Panic: 8084 lab components can have this flag, many core # metabolic components. Include it. query = SQLQuery() query.addSelect('CAST(pat_id AS BIGINT)') query.addSelect('sop.order_proc_id AS order_proc_id') query.addSelect('base_name') #sx query.addSelect('order_time') # query.addSelect("CASE WHEN abnormal_yn = 'Y' THEN 1 ELSE 0 END AS abnormal_panel") # query.addSelect("SUM(CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') OR result_flag IS NULL THEN 1 ELSE 0 END) AS num_components") # query.addSelect("SUM(CASE WHEN result_flag IS NULL THEN 1 ELSE 0 END) AS num_normal_components") # query.addSelect("CAST(SUM(CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') THEN 1 ELSE 0 END) = 0 AS INT) AS all_components_normal") query.addSelect("CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') THEN 0 ELSE 1 END AS component_normal") query.addFrom('stride_order_proc AS sop') query.addFrom('stride_order_results AS sor') query.addWhere('sop.order_proc_id = sor.order_proc_id') query.addWhere("(result_flag in ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') OR result_flag IS NULL)") query.addWhereIn("base_name", [self._component]) query.addWhereIn("pat_id", random_patient_list) query.addGroupBy('pat_id') query.addGroupBy('sop.order_proc_id') query.addGroupBy('base_name') #sx query.addGroupBy('order_time') query.addGroupBy('result_flag') # query.addGroupBy('abnormal_yn') #sx query.addOrderBy('pat_id') query.addOrderBy('sop.order_proc_id') query.addOrderBy('base_name') #sx query.addOrderBy('order_time') query.setLimit(self._num_requested_episodes) self._num_reported_episodes = FeatureMatrix._query_patient_episodes(self, query, index_time_col='order_time')
def clinicalItemSearch(self, itemQuery, conn=None): """Look for clinical items based on specified query criteria""" extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("ci.clinical_item_id") query.addSelect("ci.name") query.addSelect("ci.description") query.addSelect("cic.source_table") query.addSelect("cic.description as category_description") query.addFrom("clinical_item as ci") query.addFrom("clinical_item_category as cic") query.addWhere( "ci.clinical_item_category_id = cic.clinical_item_category_id") if itemQuery.searchStr is not None: searchWords = itemQuery.searchStr.split() #query.openWhereOrClause(); for searchField in ("ci.description", ): for searchWord in searchWords: query.addWhereOp( searchField, "~*", "^%(searchWord)s|[^a-z]%(searchWord)s" % {"searchWord": searchWord}) # Prefix search by regular expression #query.closeWhereOrClause(); if itemQuery.sourceTables: query.addWhereIn("cic.source_table", itemQuery.sourceTables) if itemQuery.analysisStatus is not None: query.addWhereEqual("ci.analysis_status", itemQuery.analysisStatus) query.addWhere("ci.item_count <> 0") # Also ignore items with no occurence in the analyzed data (occurs if item was accepted for analysis from multi-year dataset, but never used in a sub-time frame's analysis) if itemQuery.sortField: query.addOrderBy(itemQuery.sortField) query.addOrderBy("cic.description") query.addOrderBy("ci.name") query.addOrderBy("ci.description") if itemQuery.resultCount is not None: query.limit = itemQuery.resultCount dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) return dataModels finally: if not extConn: conn.close()
os.chdir('/Users/jwang/Desktop/ClinicalDecisionMaker') from medinfo.common.test.Util import MedInfoTestCase; from medinfo.db import DBUtil from medinfo.db.Model import SQLQuery; # Create SCRIPT_FILE # clinical_item_category_id = 13 (Discharge) # clinical_item_category_id = 23 (Admission) # H&P Note SCRIPT_FILE = StringIO() SCRIPT_FILE.write("psql stride jwang198") DATA_QUERY = SQLQuery(); DATA_QUERY.addSelect("author_name") DATA_QUERY.addSelect("pat_id") DATA_QUERY.addSelect("note_date") DATA_QUERY.addSelect("note_type") DATA_QUERY.addSelect("provider_type") DATA_QUERY.addSelect("specialty") DATA_QUERY.addSelect("pat_enc_csn_id") DATA_QUERY.addFrom("stride_note") DATA_QUERY.addWhereEqual("note_type", "H&P") DATA_QUERY.addOrderBy("author_name", dir="asc") print(DATA_QUERY) # Write out data to CSV
def orderSetSearch(self, itemQuery, conn=None): """Look for clinical items based on specified query criteria""" extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("ic.item_collection_id") query.addSelect("ic.external_id") query.addSelect("ic.name as collection_name") query.addSelect("ic.section") query.addSelect("ic.subgroup") query.addSelect("ci.clinical_item_category_id") query.addSelect("ci.clinical_item_id") query.addSelect("ci.name") query.addSelect("ci.description") query.addFrom("item_collection as ic") query.addFrom("item_collection_item as ici") query.addFrom("clinical_item as ci") query.addWhere("ic.item_collection_id = ici.item_collection_id") query.addWhere("ici.clinical_item_id = ci.clinical_item_id") query.addWhereNotEqual("ic.section", AD_HOC_SECTION) if itemQuery.searchStr is not None: searchWords = itemQuery.searchStr.split() for searchWord in searchWords: query.addWhereOp( "ic.name", "~*", "^%(searchWord)s|[^a-z]%(searchWord)s" % {"searchWord": searchWord}) # Prefix search by regular expression if itemQuery.analysisStatus is not None: query.addWhereEqual("ci.analysis_status", itemQuery.analysisStatus) query.addOrderBy("lower(ic.name)") query.addOrderBy("ic.external_id") query.addOrderBy("lower(ic.section)") query.addOrderBy("lower(ic.subgroup)") query.addOrderBy("ci.clinical_item_id") query.addOrderBy("ci.name") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) # Aggregate up into order sets orderSetModel = None for row in dataModels: if orderSetModel is None or row[ "external_id"] != orderSetModel["external_id"]: if orderSetModel is not None: # Prior order set exists, yield/return it before preparing next one yield orderSetModel orderSetModel = \ { "external_id": row["external_id"], "name": row["collection_name"], "itemList": list(), } orderSetModel["itemList"].append(row) yield orderSetModel # Yield the last processed model finally: if not extConn: conn.close()
treatment_aggregation_map = {} treatment_aggregations.readline() treatment_aggregations.readline() for line in treatment_aggregations: line = line.strip().split("\t") treatment_aggregation_map[line[0]] = line[1] print(treatment_aggregation_map) # Create SCRIPT_FILE SCRIPT_FILE = StringIO() SCRIPT_FILE.write("psql stride jwang198") # Find all clinical_item_ids associated with each patient_id # SELECT * FROM clinical_item WHERE clinical_item_category_id = 161; DATA_QUERY = SQLQuery() DATA_QUERY.addSelect("clinical_item_id") DATA_QUERY.addSelect("description") DATA_QUERY.addSelect("clinical_item_category_id") DATA_QUERY.addWhereEqual("clinical_item_category_id", "161") DATA_QUERY.addFrom("clinical_item") print(DATA_QUERY) # Write out data to CSV DBUtil.runDBScript(SCRIPT_FILE, False) results = DBUtil.execute(DATA_QUERY) output = open("/Users/jwang/Desktop/Results/treatment_teams.csv", "w") output.write( "clinical_item_id,description,clinical_item_category_id,treatment_team\n")
def mergeRelated(self, baseClinicalItemId, clinicalItemIds, reassignMergedItems=True, conn=None): """The specified clinical items will be merged / composited into the base clinical item provided. The remaining now redundant items will be deactivated Patient_item instances will be reassigned to the merged clinical_item (while backup links will be saved to backup_link_patient_item), clinical_item_association counts for the redundant items will removed and analyze_dates reset, requiring a re-run of AssociationAnalysis to redo those counts from scratch (but will now count as the merged / composite item rather than separate ones). Could theoretically figure out how to combine the association stats without re-running analysis, but patient_counts are supposed to ignore duplicates, so hard to know how to aggregate stats (not enough info in them to tell if unique cooccurrences?) Examples this could be relevant for: All blood transfusion indexes, G vs J vs Feeding tube equivalent, Ear, Eyes med routes irrelevant which ear/eye. """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: # Deactivate other items deactivateIds = set(clinicalItemIds) deactivateIds.discard(baseClinicalItemId) self.deactivateAnalysis(deactivateIds, conn=conn) # Build composite item name and description allIds = set(deactivateIds) allIds.add(baseClinicalItemId) query = SQLQuery() query.addSelect("clinical_item_id") query.addSelect("name") query.addSelect("description") query.addFrom("clinical_item") query.addWhereIn("clinical_item_id", allIds) query.addOrderBy("name") # Ensure consistency across multiple runs results = DBUtil.execute(query, conn=conn) nameList = list() descrList = list() # First pass to get Base Item Description for (clinicalItemId, name, description) in results: if clinicalItemId == baseClinicalItemId: if name is None: name = "" if description is None: description = "" nameList.append(name) descrList.append(description) break # Second pass to get the rest for (clinicalItemId, name, description) in results: if clinicalItemId != baseClinicalItemId: if name is None: name = "" if description is None: description = "" nameList.append(name) descrList.append(description) compositeName = str.join("+", nameList) compositeDescription = str.join("+", descrList) DBUtil.updateRow("clinical_item", { "name": compositeName, "description": compositeDescription }, baseClinicalItemId, conn=conn) if reassignMergedItems: # Reassign other items to the base item, but save backup data first query = SQLQuery() query.addSelect("patient_item_id") query.addSelect("clinical_item_id") query.addFrom("patient_item") query.addWhereIn("clinical_item_id", deactivateIds) results = DBUtil.execute(query, conn=conn) insertQuery = DBUtil.buildInsertQuery( "backup_link_patient_item", ["patient_item_id", "clinical_item_id"]) for (patientItemId, clinicalItemId) in results: insertParams = (patientItemId, clinicalItemId) try: # Optimistic insert of a new unique item DBUtil.execute(insertQuery, insertParams, conn=conn) except conn.IntegrityError, err: # If turns out to be a duplicate, okay, just note it and continue to insert whatever else is possible log.info(err) pass # Now to actual reassignment of patient items to the unifying base clinical item placeholders = generatePlaceholders(len(deactivateIds)) query = "update patient_item set clinical_item_id = %s where clinical_item_id in (%s)" % ( DBUtil.SQL_PLACEHOLDER, placeholders) params = [baseClinicalItemId] params.extend(deactivateIds) DBUtil.execute(query, params, conn=conn) finally: if not extConn: conn.close()