Esempio n. 1
0
    def updateClinicalItemCounts(self, acceptCache=False, conn=None):
        """Update the summary item_counts for clinical_items based
        on clinical_item_association summary counts.

        If acceptCache is True, then will first check for existence of an entry "clinicalItemCountsUpdated"
            in the data_cache table.  If it exists, assume we have done this update already, and no need to force the calculations again
        """

        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            if acceptCache:
                isCacheUpdated = (self.getCacheData(
                    "clinicalItemCountsUpdated", conn=conn) is not None)
                if isCacheUpdated:
                    # Item count caches already updated, no need to recalculate them
                    return

            # First reset all counts to zero
            query = "update clinical_item set item_count = 0, patient_count = 0, encounter_count = 0 "
            params = []
            if self.maxClinicalItemId is not None:  # Restrict to (test) data
                query += "where clinical_item_id < %s" % DBUtil.SQL_PLACEHOLDER
                params.append(self.maxClinicalItemId)
            DBUtil.execute(query, params, conn=conn)

            sqlQuery = SQLQuery()
            sqlQuery.addSelect("clinical_item_id")
            sqlQuery.addSelect("count_0 as item_count")
            sqlQuery.addSelect("patient_count_0 as patient_count")
            sqlQuery.addSelect("encounter_count_0 as encounter_count")
            sqlQuery.addFrom("clinical_item_association as ci")
            sqlQuery.addWhere("clinical_item_id = subsequent_item_id")
            # Look along "diagonal" of matrix for primary summary stats
            if self.maxClinicalItemId is not None:  # Restrict to (test) data
                sqlQuery.addWhereOp("clinical_item_id", "<",
                                    self.maxClinicalItemId)

            resultTable = DBUtil.execute(sqlQuery,
                                         includeColumnNames=True,
                                         conn=conn)
            resultModels = modelListFromTable(resultTable)

            for result in resultModels:
                DBUtil.updateRow("clinical_item",
                                 result,
                                 result["clinical_item_id"],
                                 conn=conn)

            # Make a note that this cache data has been updated
            self.setCacheData("clinicalItemCountsUpdated", "True", conn=conn)
        finally:
            if not extConn:
                conn.close()
Esempio n. 2
0
    def queryItems(self, options, outputFile):
        """Query for all clinical item records that fulfill the options criteria
        and then send the results as tab-delimited output to the outputFile.
        """
        pauseSeconds = float(options.pauseSeconds)

        query = SQLQuery()
        query.addSelect(
            "cic.description, ci.clinical_item_id, ci.name, ci.description")
        query.addFrom("clinical_item_category as cic")
        query.addFrom("clinical_item as ci")
        query.addWhere(
            "cic.clinical_item_category_id = ci.clinical_item_category_id")
        if options.itemPrefix:
            query.addWhereOp("ci.description", "like",
                             options.itemPrefix + "%%")
            # Add wildcard to enabe prefix search
        if options.categoryNames:
            query.addWhereIn("cic.description",
                             options.categoryNames.split(","))
        query.addOrderBy(
            "cic.description, ci.name, ci.description, ci.clinical_item_id")

        formatter = TextResultsFormatter(outputFile)

        prog = ProgressDots()
        for row in DBUtil.execute(query,
                                  includeColumnNames=True,
                                  connFactory=self.connFactory):
            formatter.formatTuple(row)
            time.sleep(pauseSeconds)
            prog.update()
        prog.printStatus()
Esempio n. 3
0
pat_enc_f.readline()
for line in pat_enc_f:
    line = line.strip().split(",")
    if (line[0] not in patient_encounters):
        patient_encounters[line[0]] = [
            pandas.to_datetime(line[1]),
        ]  # patient id: list of admission times
    else:
        patient_encounters[line[0]].append(pandas.to_datetime(line[1]))

# print(patient_encounters)

# Create SCRIPT_FILE
SCRIPT_FILE = StringIO()
SCRIPT_FILE.write("psql stride jwang198")
DATA_QUERY = SQLQuery()

DATA_QUERY.addSelect("pat_id")
DATA_QUERY.addSelect("death_date")
DATA_QUERY.addFrom("stride_patient")
print(DATA_QUERY)

# Write out data to CSV
DBUtil.runDBScript(SCRIPT_FILE, False)
results = DBUtil.execute(DATA_QUERY)
#print(len(results))

output_7 = open("/Users/jwang/Desktop/Results/7_day_deaths.csv", "w")
output_30 = open("/Users/jwang/Desktop/Results/30_day_deaths.csv", "w")
output_90 = open("/Users/jwang/Desktop/Results/90_day_deaths.csv", "w")
Esempio n. 4
0
    def _getNonNullLabs(self):
        query = SQLQuery()
        # SELECT
        query.addSelect('proc_code')
        # FROM
        query.addFrom('stride_order_proc')
        # WHERE
        query.addWhereLike('proc_code', 'LAB%')
        query.addWhere('abnormal_yn is not null')

        query.addGroupBy('proc_code')
        query.addOrderBy('proc_code')

        results = DBUtil.execute(query)

        df = pd.DataFrame(results, columns=query.select).to_csv(
            DATA_FOLDER + 'proc_codes.csv', index=False)
Esempio n. 5
0
    def _get_random_patient_list(self):
        #sx: this function is for avoid RANDOM() on the database
        cursor = self._connection.cursor()

        # Get average number of results for this lab test per patient.
        query = SQLQuery()
        query.addSelect('pat_id')
        query.addSelect('COUNT(sop.order_proc_id) AS num_orders')
        query.addFrom('stride_order_proc AS sop')
        query.addFrom('stride_order_results AS sor')
        query.addWhere('sop.order_proc_id = sor.order_proc_id')
        ##
        query.addWhereIn("base_name", [self._component])
        query.addGroupBy('pat_id')
        log.debug('Querying median orders per patient...')

        results = DBUtil.execute(query)

        order_counts = [ row[1] for row in results ]


        if len(results) == 0:
            error_msg = '0 orders for component "%s."' % self._component #sx
            log.critical(error_msg)
            sys.exit('[ERROR] %s' % error_msg)
        else:
            avg_orders_per_patient = numpy.median(order_counts)
            log.info('avg_orders_per_patient: %s' % avg_orders_per_patient)
            # Based on average # of results, figure out how many patients we'd
            # need to get for a feature matrix of requested size.
            self._num_patients = int(numpy.max([self._num_requested_episodes / \
                avg_orders_per_patient, 1]))
            # Some components may have fewer associated patients than the required sample size
            patient_number_chosen = min([len(results),self._num_patients]) #
            inds_random_patients = numpy.random.choice(len(results), size=patient_number_chosen, replace=False)
            # print 'inds_random_patients:', inds_random_patients
            pat_IDs_random_patients = []
            for ind in inds_random_patients:
                pat_IDs_random_patients.append(results[ind][0])
            # print pat_IDs_random_patients
            return pat_IDs_random_patients
Esempio n. 6
0
class TestDBUtil(DBTestCase):
    def setUp(self):
        """Prepare state for test cases"""
        DBTestCase.setUp(self)

        self.SCRIPT_FILE = StringIO()
        self.SCRIPT_FILE.write(
            "# Create table to test on.  Also testing that comment tag is recognized\n"
        )
        self.SCRIPT_FILE.write("\n")
        self.SCRIPT_FILE.write("create table TestTypes\n")
        self.SCRIPT_FILE.write("(\n")
        self.SCRIPT_FILE.write("    TestTypes_id    serial,\n")
        self.SCRIPT_FILE.write(
            "    MyText          varchar(50),    /* Same as character varying, also test standard SQL comment tags */\n"
        )
        self.SCRIPT_FILE.write("    MyInteger       integer,\n")
        self.SCRIPT_FILE.write(
            "    MyReal          real,   -- Floating point number, also test standard SQL comment tag\n"
        )
        self.SCRIPT_FILE.write(
            "    MyDateTime      TIMESTAMP,   -- PostgreSQL uses TIMESTAMP, but MySQL doesn't do NULL values right, so have to use DATETIME for MySQL?\n"
        )
        self.SCRIPT_FILE.write("    MyYesNo         boolean\n")
        self.SCRIPT_FILE.write(");\n")
        self.SCRIPT_FILE.write(
            "ALTER TABLE TestTypes ADD CONSTRAINT TestTypes_id PRIMARY KEY (TestTypes_id);\n"
        )
        # Should auto-create testtypes_testtypes_id_seq sequence
        self.SCRIPT_FILE.write(
            "CREATE INDEX TestTypes_MyInteger_INDEX ON TestTypes(MyInteger);\n"
        )
        self.SCRIPT_FILE.write("\n")
        self.SCRIPT_FILE.write(
            "insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime,MyYesNo)\n"
        )
        self.SCRIPT_FILE.write(
            "values ('Sample Text',  123,123.45,'2004-09-08 19:41:47.292000',True);\n"
        )
        self.SCRIPT_FILE.write(
            "insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime,MyYesNo)\n"
        )
        self.SCRIPT_FILE.write(
            "values ('Joe Mama',     234,23.45,'1990-10-03 19:41:47.292000',False);\n"
        )
        self.SCRIPT_FILE.write(
            "insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime,MyYesNo)\n"
        )
        self.SCRIPT_FILE.write(
            "values ('Mo Fo',        345,3.45,'2014-01-04 19:41:47.292000',True);\n"
        )
        self.SCRIPT_FILE.write("\n")
        self.SCRIPT_FILE = StringIO(self.SCRIPT_FILE.getvalue())

        self.DATA_TABLE = "TestTypes"
        self.DATA_COLS = "MyInteger\tMyReal\tMyYesNo\tMyText\n"

        self.DATA_FILE = StringIO()
        self.DATA_FILE.write('100\t100.1\tNone\tATest\n')
        self.DATA_FILE.write('200\t200.2\t' + FALSE_STR + '\tNone\n')
        self.DATA_FILE.write('200\t200.2\t' + FALSE_STR + '\t\n')
        # Skip None tag at end of line, test that white space won't get lost
        self.DATA_FILE.write('300\t\t' + TRUE_STR + '\tCTest\n')
        self.DATA_FILE = StringIO(self.DATA_FILE.getvalue())

        self.DATA_ROWS = []
        self.DATA_ROWS.append([
            100,
            100.1,
            None,
            "ATest",
        ])
        self.DATA_ROWS.append([
            200,
            200.2,
            False,
            None,
        ])
        self.DATA_ROWS.append([
            300,
            None,
            True,
            "CTest",
        ])

        self.MULTI_LINE_DATA_FILE = StringIO()
        self.MULTI_LINE_DATA_FILE.write(
            'myinteger\t"MyReal"\t"MyYesNo"\tMyText\n')
        self.MULTI_LINE_DATA_FILE.write(
            '100\t100.1\tNone\t"""A"" Test and ""more"""\n')
        self.MULTI_LINE_DATA_FILE.write('200\t200.2\t' + FALSE_STR + '\t""\n')
        self.MULTI_LINE_DATA_FILE.write('300\tNone\t' + TRUE_STR +
                                        '\t"C\\nTest"\t\n')
        self.MULTI_LINE_DATA_FILE = StringIO(
            self.MULTI_LINE_DATA_FILE.getvalue())

        self.MULTI_LINE_DATA_ROWS = []
        self.MULTI_LINE_DATA_ROWS.append([
            100,
            100.1,
            None,
            '"A" Test and "more"',
        ])
        self.MULTI_LINE_DATA_ROWS.append([
            200,
            200.2,
            False,
            None,
        ])
        self.MULTI_LINE_DATA_ROWS.append([
            300,
            None,
            True,
            'C\\nTest',
        ])

        # ID summary data to make it easier to verify stuff
        self.COL_NAMES = self.DATA_COLS.split()
        self.ID_COL = self.COL_NAMES[0]
        self.ID_DATA = []
        for row in self.DATA_ROWS:
            self.ID_DATA.append(row[0])

        # Build query to get update rows
        self.DATA_QUERY = SQLQuery()
        for col in self.COL_NAMES:
            self.DATA_QUERY.addSelect(col)
        self.DATA_QUERY.addFrom(self.DATA_TABLE)
        self.DATA_QUERY.addWhereIn(self.ID_COL, self.ID_DATA)
        self.DATA_QUERY.addOrderBy(self.ID_COL)

    def tearDown(self):
        """Restore state from any setUp or test steps"""
        try:
            DBUtil.execute("drop table TestTypes")
            pass
        except Exception as err:
            log.warning(err)
            pass

        DBTestCase.tearDown(self)

    def test_runDBScript(self):
        # Just run a DB Script and make sure no ProgrammingErrors are raised.
        DBUtil.runDBScript(self.SCRIPT_FILE, False)

        # Run some other commands to see if scripts produced expected results
        results = DBUtil.execute(
            "select * from TestTypes where MyInteger > %s", (200, ))
        self.assertEqual(2, len(results))

        # Test extra "includeColumnNames" parameter
        results = DBUtil.execute(
            "select TestTypes_id,MyText,MyInteger,MyReal,MyDateTime,MyYesNo from TestTypes where MyInteger < %s",
            (100, ), True)
        expected = [
            "TestTypes_id", "MyText", "MyInteger", "MyReal", "MyDateTime",
            "MyYesNo"
        ]
        for iCol in range(len(expected)):  # Ignore case for comparison
            expected[iCol] = expected[iCol].lower()
            results[0][iCol] = results[0][iCol].lower()
        self.assertEqual(expected, results[0])
        self.assertEqual(0, len(results) - 1)

    def test_runDBScript_commandline(self):
        # Equivalent to test_runDBScript, but try higher level interface
        #   through command-line "main" method
        origStdin = sys.stdin
        sys.stdin = self.SCRIPT_FILE
        argv = ["DBUtil.py", "--script", "-"]
        DBUtil.main(argv)
        sys.stdin = origStdin

        # Run some other commands to see if scripts produced expected results
        results = DBUtil.execute(
            "select * from TestTypes where MyInteger > %s", (200, ))
        self.assertEqual(2, len(results))

        results = DBUtil.execute(
            "select * from TestTypes where MyInteger < %s", (100, ))
        self.assertEqual(0, len(results))

    def test_runDBScript_skipErrors(self):
        # Similar to test_runDBScript_commandline, but test skipErrors option
        origStdin = sys.stdin
        sys.stdin = self.SCRIPT_FILE
        argv = ["DBUtil.py", "--script", "-"]
        DBUtil.main(argv)
        sys.stdin = origStdin

        # Run script again.  Should generate errors from redundant create table, etc.  But skip
        self.SCRIPT_FILE.seek(0)
        origStdin = sys.stdin
        sys.stdin = self.SCRIPT_FILE
        argv = ["DBUtil.py", "--script", "--skipErrors", "-"]
        DBUtil.main(argv)
        sys.stdin = origStdin

        # Run script again.  Should generate errors from redundant create table, etc.  Verify by catch
        self.SCRIPT_FILE.seek(0)
        origStdin = sys.stdin
        sys.stdin = self.SCRIPT_FILE
        argv = ["DBUtil.py", "--script", "-"]
        expectErr = True
        actualErr = False
        try:
            DBUtil.main(argv)
        except Exception as err:
            actualErr = True
        self.assertEqual(expectErr, actualErr)
        sys.stdin = origStdin

    def test_execute_commandline(self):
        # Run basic executes for both an update and a select query, but
        #   using the higher-level command-line "main" method interface

        DBUtil.runDBScript(
            self.SCRIPT_FILE,
            False)  # Assume this works based on test_runDBScript method

        origStdout = sys.stdout
        sys.stdout = StringIO()
        argv = [
            "DBUtil.py",
            "select count(*) from TestTypes where MyInteger > 200", "-"
        ]
        DBUtil.main(argv)
        self.assertEqual(2, int(sys.stdout.getvalue()))
        sys.stdout = origStdout

        origStdout = sys.stdout
        sys.stdout = StringIO()
        argv = [
            "DBUtil.py",
            "insert into TestTypes (MyText,MyInteger,MyYesNo) values ('Another',255,True)",
            "-"
        ]
        DBUtil.main(argv)
        #self.assertEqual( 1, int(sys.stdout.getvalue()) )
        sys.stdout = origStdout

        origStdout = sys.stdout
        sys.stdout = StringIO()
        argv = [
            "DBUtil.py",
            "select count(*) from TestTypes where MyInteger > 200", "-"
        ]
        DBUtil.main(argv)
        self.assertEqual(3, int(sys.stdout.getvalue()))
        sys.stdout = origStdout

        # Different test, includeColumnNames
        origStdout = sys.stdout
        sys.stdout = StringIO()
        argv = [
            "DBUtil.py", "-c",
            "select TestTypes_id,MyText,MyInteger,MyReal,MyDateTime,MyYesNo from TestTypes where MyInteger > 200 and MyYesNo = True",
            "-"
        ]
        DBUtil.main(argv)
        sampleLines = sys.stdout.getvalue().split("\n")
        expected = [
            "TestTypes_id", "MyText", "MyInteger", "MyReal", "MyDateTime",
            "MyYesNo"
        ]
        sampleColumns = sampleLines[0].split()
        for iCol in range(len(expected)):  # Case-insensitive comparison
            expected[iCol] = expected[iCol].lower()
            sampleColumns[iCol] = sampleColumns[iCol].lower()
        for iCol, col in enumerate(sampleColumns):
            self.assertEqual(expected[iCol], col)
        self.assertEqual(
            2 + 1 + 1, len(sampleLines)
        )  # 2 data lines + 1 column name line + 1 newline at end of output
        sys.stdout = origStdout

    def test_insertFile(self):
        # Create a test data file to insert, and verify no errors
        DBUtil.runDBScript(
            self.SCRIPT_FILE,
            False)  # Assume this works based on test_runDBScript method

        tableName = "TestTypes"

        idFile = StringIO()

        DBUtil.insertFile(self.MULTI_LINE_DATA_FILE, tableName, None, "\t",
                          idFile)
        # Assume column names extracted from first row of data file

        # Verify number rows inserted
        self.assertEqual(len(self.MULTI_LINE_DATA_ROWS),
                         idFile.getvalue().count("\n"))
        results = DBUtil.execute(self.DATA_QUERY)
        self.assertEqual(self.MULTI_LINE_DATA_ROWS, results)

    def test_insertFile_commandline(self):
        # Similar to test_insertFile, but from higher-level command-line interface
        DBUtil.runDBScript(
            self.SCRIPT_FILE,
            False)  # Assume this works based on test_runDBScript method

        tableName = "TestTypes"
        columnNames = self.DATA_COLS.split()

        idFile = StringIO()

        # Slightly different test, specify tab as delimiter, not just any whitespace
        origStdin = sys.stdin
        origStdout = sys.stdout
        sys.stdin = self.MULTI_LINE_DATA_FILE
        sys.stdout = idFile
        argv = ["DBUtil.py", "-i-", "-d\\t", "-t" + tableName, "-o-"]
        DBUtil.main(argv)
        sys.stdout = origStdout
        sys.stdin = origStdin

        self.assertEqual(3, idFile.getvalue().count("\n"))
        results = DBUtil.execute(self.DATA_QUERY)
        self.assertEqual(self.MULTI_LINE_DATA_ROWS, results)

    def test_insertFile_skipErrors(self):
        # Similar to test_insertFile_commandline, but just test to see if skipErrors option works
        # Test run will show errror / warning messages from the app, but these are expected
        DBUtil.runDBScript(
            self.SCRIPT_FILE,
            False)  # Assume this works based on test_runDBScript method

        tableName = "TestTypes"
        columnNames = ["MyReal", "MyYesNo", "MyText", "MyInteger"]

        idFile = StringIO()

        # Try with bogus data that should generate errors

        dataFile = StringIO()
        dataFile.write("ABCD\tPositive\tBadTest\t100.123\n")
        dataFile.write("700.7\t" + FALSE_STR + "\tXTest\t777\n")
        dataFile.write("1,099\tNegative\tMoBadTest\tfoo\n")
        dataFile = StringIO(dataFile.getvalue())

        idFile = StringIO()

        origStdin = sys.stdin
        origStdout = sys.stdout
        sys.stdin = dataFile
        sys.stdout = idFile
        argv = ["DBUtil.py", "-i-", "-t" + tableName, "-o-"]
        argv.extend(columnNames)
        expectErr = True
        actualErr = False
        try:
            DBUtil.main(argv)
        except Exception as err:
            actualErr = True
        self.assertEqual(expectErr, actualErr)
        sys.stdout = origStdout
        sys.stdin = origStdin

        # Expect no rows succesffuly inserted since errors in input
        self.assertEqual(0, idFile.getvalue().count("\n"))
        results = DBUtil.execute(
            "select count(*) from TestTypes where MyText like %s", ("%Test", ))
        self.assertEqual(0, results[0][0])

        # Try again, with bogus data that should generate errors
        dataFile = StringIO()
        dataFile.write("ABCD\tPositive\tBadTest\t100.123\n")
        dataFile.write("700.7\t" + FALSE_STR + "\tXTest\t777\n")
        dataFile.write("1,099\tNegative\tMoBadTest\tfoo\n")
        dataFile = StringIO(dataFile.getvalue())

        idFile = StringIO()

        origStdin = sys.stdin
        origStdout = sys.stdout
        sys.stdin = dataFile
        sys.stdout = idFile
        argv = ["DBUtil.py", "-i-", "-t" + tableName, "-o-",
                "-e"]  # -e option skipsErrors
        argv.extend(columnNames)
        DBUtil.main(argv)
        sys.stdout = origStdout
        sys.stdin = origStdin

        # Still expect 1 row to get through successfuly, despite other invalid input
        self.assertEqual(1, idFile.getvalue().count("\n"))
        results = DBUtil.execute(
            "select count(*) from TestTypes where MyText like %s", ("%Test", ))
        self.assertEqual(1, results[0][0])

    def test_insertFile_dateParsing(self):
        # Create a test data file to insert, and verify no errors
        DBUtil.runDBScript(
            self.SCRIPT_FILE,
            False)  # Assume this works based on test_runDBScript method

        tableName = "TestTypes"
        columnNames = ["MyInteger", "MyText", "MyDateTime"]

        dataFile = StringIO()
        dataFile.write('''-1\t"12/11/2010"\t"12/11/2010"\n''')
        dataFile.write(
            '''-2\t"2013-04-15 13:45:21"\t"2013-04-15 13:45:21"\n''')
        dataFile.write(
            '''-3\t"2003-04-15 10:45:21"\t"2003-04-15 10:45:21"\n''')
        dataFile.write('''-4\t"4/11/12 6:20"\t"4/11/12 6:20"\n''')
        dataFile = StringIO(dataFile.getvalue())

        dateColFormats = {
            "myDateTime": None
        }  # Deliberately change capitalization to ensure robustness
        DBUtil.insertFile(dataFile,
                          tableName,
                          columnNames,
                          dateColFormats=dateColFormats)

        verifyQuery = \
            """select MyInteger, MyText, MyDateTime
            from TestTypes
            where MyInteger < 0
            order by MyInteger desc
            """

        expectedData = \
            [   [   -1, "12/11/2010", datetime(2010,12,11)  ],
                [   -2, "2013-04-15 13:45:21", datetime(2013,4,15,13,45,21) ],
                [   -3, "2003-04-15 10:45:21", datetime(2003,4,15,10,45,21) ],
                [   -4, "4/11/12 6:20", datetime(2012,4,11,6,20) ],
            ]

        # Verify rows inserted with properly parsed dates
        results = DBUtil.execute(verifyQuery)
        self.assertEqual(expectedData, results)

    def test_insertFile_escapeStrings(self):
        # Create a test data file to insert, and verify no errors
        DBUtil.runDBScript(
            self.SCRIPT_FILE,
            False)  # Assume this works based on test_runDBScript method

        tableName = "TestTypes"
        columnNames = ["MyInteger", "MyText"]

        dataFile = StringIO()
        dataFile.write('''-1\t"A"\n''')
        dataFile.write('''-2\t"B\xaeb"\n''')
        dataFile.write('''-3\t"C"\n''')
        dataFile.write('''-4\tD\n''')
        dataFile = StringIO(dataFile.getvalue())

        DBUtil.insertFile(dataFile, tableName, columnNames, escapeStrings=True)

        verifyQuery = \
            """select MyInteger, MyText
            from TestTypes
            where MyInteger < 0
            order by MyInteger desc
            """

        expectedData = \
            [   [   -1, "A"],
                [   -2, "B\\xaeb"],
                [   -3, "C"],
                [   -4, "D"],
            ]

        # Verify rows inserted with properly parsed dates
        results = DBUtil.execute(verifyQuery)
        self.assertEqual(expectedData, results)

    def test_identityQuery(self):
        DBUtil.runDBScript(self.SCRIPT_FILE, False)

        # Run some other commands to see if scripts produced expected results
        results = DBUtil.execute("select max(TestTypes_id) from TestTypes")
        lastSeq = results[0][0]

        conn = DBUtil.connection()
        try:
            cur = conn.cursor()
            cur.execute(
                "insert into TestTypes (MyText,MyInteger,MyYesNo) values ('Another',255,True)"
            )
            cur.execute(DBUtil.identityQuery("TestTypes"))
            self.assertEqual(lastSeq + 1, cur.fetchone()[0])

            cur.execute(
                "select TestTypes_id from TestTypes where MyText = 'Another' and MyInteger = 255"
            )
            self.assertEqual(lastSeq + 1, cur.fetchone()[0])
        finally:
            conn.close()

    def test_nullCheck(self):
        DBUtil.runDBScript(self.SCRIPT_FILE, False)

        conn = DBUtil.connection()
        try:
            DBUtil.execute(
                "insert into TestTypes (MyText,MyInteger) values ('Test With Null', 255)",
                conn=conn)
            DBUtil.execute(
                "insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime) values ('Test With Not Null', 255, 1.23, '2005-03-06')",
                conn=conn)

            result = DBUtil.execute(
                "select MyText from TestTypes where MyInteger = 255 and MyReal is null",
                conn=conn)
            self.assertEqual('Test With Null', result[0][0])
            result = DBUtil.execute(
                "select MyText from TestTypes where MyInteger = 255 and MyReal is not null",
                conn=conn)
            self.assertEqual('Test With Not Null', result[0][0])

            # Would not work with MySQL if used TIMESTAMP data type.  Should be DATETIME.  (TIMESTAMP tries to auto-fill values, so no nulls allowed?)
            result = DBUtil.execute(
                "select MyText from TestTypes where MyInteger = 255 and MyDateTime is null",
                conn=conn)
            self.assertEqual('Test With Null', result[0][0])
            result = DBUtil.execute(
                "select MyText from TestTypes where MyInteger = 255 and MyDateTime is not null",
                conn=conn)
            self.assertEqual('Test With Not Null', result[0][0])

        finally:
            conn.close()

    def test_findOrInsertItem(self):
        DBUtil.runDBScript(self.SCRIPT_FILE, False)

        searchDict = {}
        insertDict = {}

        searchDict["TestTypes_id"] = +123
        log.debug("Insert a new item using default params")
        (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict)
        self.assertEqual(+123, data)
        self.assertEqual(True, isNew)

        log.debug("Find the existing item")
        (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict)
        self.assertEqual(+123, data)
        self.assertEqual(False, isNew)

        insertDict["TestTypes_id"] = +456
        log.debug("Find existing item, with optional insert data")
        (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict,
                                                insertDict)
        self.assertEqual(+123, data)
        self.assertEqual(False, isNew)

        searchDict["TestTypes_id"] = +789
        insertDict["TestTypes_id"] = +789
        insertDict["MyInteger"] = 123
        log.debug("Insert a new item with actual data")
        (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict,
                                                insertDict)
        self.assertEqual(+789, data)
        self.assertEqual(True, isNew)

        searchDict["TestTypes_id"] = +234
        insertDict["TestTypes_id"] = +234
        log.debug("Retrieve a different column")
        (data, isNew) = DBUtil.findOrInsertItem("TestTypes",
                                                searchDict,
                                                insertDict,
                                                retrieveCol="MyText")
        self.assertEqual(None, data)
        self.assertEqual(True, isNew)

        searchDict["TestTypes_id"] = +345
        insertDict["TestTypes_id"] = +345
        insertDict["MyText"] = "testText"
        log.debug("Insert and retrieve a different column")
        (data, isNew) = DBUtil.findOrInsertItem("TestTypes",
                                                searchDict,
                                                insertDict,
                                                retrieveCol="MyText")
        self.assertEqual("testText", data)
        self.assertEqual(True, isNew)

        insertDict["MyText"] = "newText"
        log.debug(
            "Try inserting a different value under an existing row.  Should NOT work"
        )
        (data, isNew) = DBUtil.findOrInsertItem("TestTypes",
                                                searchDict,
                                                insertDict,
                                                retrieveCol="MyText")
        self.assertEqual("testText", data)
        self.assertEqual(False, isNew)

        log.debug(
            "Try inserting a different value under an existing row, but force the update"
        )
        insertDict["MyText"] = "newText"
        (data, isNew) = DBUtil.findOrInsertItem("TestTypes",
                                                searchDict,
                                                insertDict,
                                                retrieveCol="MyText",
                                                forceUpdate=True)
        self.assertEqual("newText", data)
        self.assertEqual(False, isNew)

    def test_updateFromFile(self):
        # Create a test data file to insert, and verify no errors
        DBUtil.runDBScript(
            self.SCRIPT_FILE,
            False)  # Assume this works based on test_runDBScript method

        # Insert some blank data first to update
        for idValue in self.ID_DATA:
            DBUtil.execute(
                "insert into TestTypes (" + self.ID_COL + ") values (%s)",
                (idValue, ))

        # Negative test case
        results = DBUtil.execute(self.DATA_QUERY)
        self.assertNotEqual(self.DATA_ROWS, results)

        # Now do the actual update from the file
        DBUtil.updateFromFile(self.DATA_FILE,
                              self.DATA_TABLE,
                              self.COL_NAMES,
                              delim="\t")

        results = DBUtil.execute(self.DATA_QUERY)
        self.assertEqual(self.DATA_ROWS, results)

    def test_updateFromFile_commandline(self):
        # Similar to test_updateFromFile, but from higher-level command-line interface
        DBUtil.runDBScript(
            self.SCRIPT_FILE,
            False)  # Assume this works based on test_runDBScript method

        # Insert some blank data first to update
        for idValue in self.ID_DATA:
            DBUtil.execute(
                "insert into TestTypes (" + self.ID_COL + ") values (%s)",
                (idValue, ))

        # Negative test case
        results = DBUtil.execute(self.DATA_QUERY)
        self.assertNotEqual(self.DATA_ROWS, results)

        # Now do the actual update from the file, but build in column names to data file
        dataFileWithCols = StringIO()
        dataFileWithCols.write(self.DATA_COLS)
        dataFileWithCols.write(self.DATA_FILE.getvalue())
        dataFileWithCols = StringIO(dataFileWithCols.getvalue())

        sys.stdin = dataFileWithCols
        argv = ["DBUtil.py", "-u-", "-t" + self.DATA_TABLE, "-d\\t"]
        DBUtil.main(argv)

        # Verify positive results
        results = DBUtil.execute(self.DATA_QUERY)
        self.assertEqual(self.DATA_ROWS, results)

        ########################################################
        # Repeat test but data file will use more than one key column (adding MyText)
        # Further note that MyText is used as both a key column to look up the row to update
        #   and as a value column to modify
        dataFileWithCols = StringIO()
        dataFileWithCols.write("MyInteger\tMyText\tMyText\tMyReal\tMyYesNo\n")
        dataFileWithCols.write("100\tATest\tAAA\tNone\t" + TRUE_STR + "\t\n")
        dataFileWithCols.write("200\tNone\tBBB\t222.2\tNone\t\n")
        dataFileWithCols.write("300\tCTest\tNone\t333.3\t" + TRUE_STR + "\t\n")
        dataFileWithCols = StringIO(dataFileWithCols.getvalue())

        # Expected results after this update
        self.DATA_ROWS = []
        self.DATA_ROWS.append([
            100,
            None,
            True,
            "AAA",
        ])
        self.DATA_ROWS.append([
            200,
            200.2,
            False,
            None,
        ])
        # This row is unchanged, because one of the key values cannot be found as null
        self.DATA_ROWS.append([
            300,
            333.3,
            True,
            None,
        ])

        # Negative test case
        results = DBUtil.execute(self.DATA_QUERY)
        self.assertNotEqual(self.DATA_ROWS, results)

        # Now do the actual update from the file, but with an extra parameter specifying 2 key columns
        sys.stdin = dataFileWithCols
        argv = ["DBUtil.py", "-u-", "-t" + self.DATA_TABLE, "-n2"]
        DBUtil.main(argv)

        # Verify positive results
        results = DBUtil.execute(self.DATA_QUERY)
        self.assertEqual(self.DATA_ROWS, results)

    def test_deleteRows(self):
        DBUtil.runDBScript(self.SCRIPT_FILE, False)

        query = "select count(*) from TestTypes;"

        # Insert some test data to delete
        tableName = "TestTypes"
        columnNames = self.DATA_COLS.split()

        idFile = StringIO()
        DBUtil.insertFile(self.DATA_FILE, tableName, columnNames, None, idFile)
        idValues = idFile.getvalue().split()

        # Count up rows before and after delete
        initialCount = DBUtil.execute(query)[0][0]
        DBUtil.deleteRows("TestTypes", idValues)
        afterCount = DBUtil.execute(query)[0][0]

        self.assertEqual(initialCount - len(idValues), afterCount)

        # Reinsert the test data to try deleting them by a non-default Id column
        idFile = StringIO()
        DBUtil.insertFile(self.DATA_FILE, tableName, columnNames, None, idFile)

        nonDefaultIds = [100, 200]
        initialCount = DBUtil.execute(query)[0][0]
        DBUtil.deleteRows("TestTypes", nonDefaultIds, "MyInteger")
        afterCount = DBUtil.execute(query)[0][0]
Esempio n. 7
0
	def _getNonNullBaseNames(self):
		query = SQLQuery()
		# SELECT
		query.addSelect(BASE_NAME)
		query.addSelect('max_result_flag')
		# FROM
		query.addFrom('order_result_stat')
		# WHERE
		query.addWhere('max_result_flag is not null')

		print query
		print query.getParams()
		DBUtil.runDBScript(self.SCRIPT_FILE, False)
		results = DBUtil.execute(str(query), query.getParams())

		pd.DataFrame(results, columns=query.select).to_csv('base_names.csv', index=False)
Esempio n. 8
0
 def loadStateInfo(self, stateIds=None, conn=None):
     """Load basic information about the specified patient states
     """
     extConn = True
     if conn is None:
         conn = self.connFactory.connection()
         extConn = False
     try:
         query = SQLQuery()
         query.addSelect("ss.sim_state_id")
         query.addSelect("ss.name")
         query.addSelect("ss.description")
         query.addFrom("sim_state as ss")
         if stateIds is not None:
             query.addWhereIn("ss.sim_state_id", stateIds)
         dataTable = DBUtil.execute(query,
                                    includeColumnNames=True,
                                    conn=conn)
         dataModels = modelListFromTable(dataTable)
         return dataModels
     finally:
         if not extConn:
             conn.close()
Esempio n. 9
0
    def signOrders(self,
                   userId,
                   patientId,
                   currentTime,
                   orderItemIds,
                   discontinuePatientOrderIds=None,
                   conn=None):
        """Commit new order item IDs for the given patient and starting now,
        and discontinue (set end date) for any existing orders specified.
        
        Record any patient state transitions the orders would trigger
        """
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            # Denormalized recording of current patient state to facilitate easy retrieval linked to orders later
            patientInfo = self.loadPatientInfo([patientId],
                                               currentTime,
                                               conn=conn)[0]
            stateId = patientInfo["sim_state_id"]
            postStateIdByItemId = patientInfo["postStateIdByItemId"]

            orderItemIdSet = set(orderItemIds)
            # Ensure unique and facilitate set operations

            insertDict = {
                "sim_user_id": userId,
                "sim_patient_id": patientId,
                "sim_state_id": stateId,
                "relative_time_start": currentTime
            }
            for itemId in orderItemIdSet:
                insertDict["clinical_item_id"] = itemId
                DBUtil.insertRow("sim_patient_order", insertDict, conn=conn)

            # See if any of these new orders triggered state transitions
            triggerItemIds = postStateIdByItemId.viewkeys() & orderItemIdSet
            while triggerItemIds:  # Found a trigger item
                triggerItemId = None
                if len(
                        triggerItemIds
                ) > 1:  # Found multiple. Weird. Arbitrarily act on the one that appeared first in the input list
                    for itemId in orderItemIds:
                        if itemId in triggerItemIds:
                            triggerItemId = itemId
                            break
                else:
                    triggerItemId = triggerItemIds.pop()
                postStateId = postStateIdByItemId[triggerItemId]

                # Record the state transition
                self.recordStateTransition(patientId,
                                           stateId,
                                           postStateId,
                                           currentTime,
                                           conn=conn)

                # Reload patientInfo to reflect new patient state
                patientInfo = self.loadPatientInfo([patientId],
                                                   currentTime,
                                                   conn=conn)[0]
                stateId = patientInfo["sim_state_id"]
                postStateIdByItemId = patientInfo["postStateIdByItemId"]

                orderItemIdSet.discard(triggerItemId)
                # Don't keep looking for this one, important to avoid infinite loop
                triggerItemIds = postStateIdByItemId.viewkeys(
                ) & orderItemIdSet

            if discontinuePatientOrderIds is not None:
                updateDict = {
                    "relative_time_end": currentTime
                }
                for patientOrderId in discontinuePatientOrderIds:
                    DBUtil.updateRow("sim_patient_order",
                                     updateDict,
                                     patientOrderId,
                                     conn=conn)
                # If order is discontinued/cancelled at the same (or before) time of entry,
                #   take that as a signal to cleanup and delete the record altogether
                #   (effectively there was no time at which the order was ever allowed to exist)
                deleteQuery = SQLQuery()
                deleteQuery.delete = True
                deleteQuery.addFrom("sim_patient_order")
                deleteQuery.addWhereEqual("sim_patient_id", patientId)
                deleteQuery.addWhere(
                    "relative_time_end <= relative_time_start")
                DBUtil.execute(deleteQuery, conn=conn)
        finally:
            conn.commit()
            if not extConn:
                conn.close()
Esempio n. 10
0
    def copyPatientTemplate(self, patientData, templatePatientId, conn=None):
        """Create a new patient record based on the given template patient ID to copy from.
        Will copy shallow attributes, overridden by any provided in the given patientData,
        as well as any patient states, notes, or physician orders UP TO (and including)
        relative time zero, but not subsequent states, notes, or physician orders 
        (the latter is expected to reflect real user interaction records).
        """
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            templatePatientData = DBUtil.loadRecordModelById("sim_patient",
                                                             templatePatientId,
                                                             conn=conn)
            del templatePatientData["sim_patient_id"]
            # Remove prior ID to allow for new one
            templatePatientData.update(patientData)
            # Override with new content (if exists)
            DBUtil.insertRow("sim_patient", templatePatientData, conn=conn)
            # Create new patient record
            patientId = DBUtil.execute(DBUtil.identityQuery("sim_patient"),
                                       conn=conn)[0][0]

            # Copy initial template patient states
            query = SQLQuery()
            query.addSelect("*")
            # Copy all columns
            query.addFrom("sim_patient_state as sps")
            query.addWhereEqual("sps.sim_patient_id", templatePatientId)
            query.addWhereOp("relative_time_start", "<=", 0)
            query.addOrderBy("relative_time_start")
            dataTable = DBUtil.execute(query,
                                       includeColumnNames=True,
                                       conn=conn)
            dataModels = modelListFromTable(dataTable)
            nStates = len(dataModels)
            for i, dataModel in enumerate(dataModels):
                del dataModel["sim_patient_state_id"]
                # Discard copied ID to allow new one
                if i == nStates - 1:
                    del dataModel["relative_time_end"]
                    # Last state. Blank out end time to reflect open ended for simulation
                dataModel["sim_patient_id"] = patientId
                DBUtil.insertRow("sim_patient_state", dataModel, conn=conn)

            # Copy initial template orders
            query = SQLQuery()
            query.addSelect("*")
            query.addFrom("sim_patient_order as spo")
            query.addWhereEqual("sim_patient_id", templatePatientId)
            query.addWhereOp("relative_time_start", "<=", 0)
            query.addOrderBy("relative_time_start")
            dataTable = DBUtil.execute(query,
                                       includeColumnNames=True,
                                       conn=conn)
            dataModels = modelListFromTable(dataTable)
            for dataModel in dataModels:
                del dataModel["sim_patient_order_id"]
                dataModel["sim_patient_id"] = patientId
                DBUtil.insertRow("sim_patient_order", dataModel, conn=conn)

            conn.commit()
            # Transactional commit for multi-step process
            return patientId
        finally:
            if not extConn:
                conn.close()
Esempio n. 11
0
    def loadPatientInfo(self, patientIds=None, relativeTime=None, conn=None):
        """Load basic information about the specified patients.
        Report patient state at given time, or default to time zero
        """
        if relativeTime is None:
            relativeTime = 0
            # Just look for time zero default then

        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            query = SQLQuery()
            query.addSelect("sp.sim_patient_id")
            query.addSelect("sp.name")
            query.addSelect("sp.age_years")
            query.addSelect("sp.gender")
            query.addSelect("s.sim_state_id")
            query.addSelect("s.name as state_name")
            query.addSelect("s.description as state_description")
            query.addSelect("sps.relative_time_start")
            query.addSelect("sps.relative_time_end")
            query.addFrom("sim_patient as sp")
            query.addFrom("sim_patient_state as sps")
            query.addFrom("sim_state as s")
            query.addWhere("sp.sim_patient_id = sps.sim_patient_id")
            query.addWhere("sps.sim_state_id = s.sim_state_id")
            if patientIds is not None:
                query.addWhereIn("sp.sim_patient_id", patientIds)

            # Look for the state that matches the given relative time offset
            query.addWhereOp("sps.relative_time_start", "<=", relativeTime)
            query.openWhereOrClause()
            query.addWhere("sps.relative_time_end is null")
            query.addWhereOp("sps.relative_time_end", ">", relativeTime)
            query.closeWhereOrClause()

            query.addOrderBy("sp.name")

            dataTable = DBUtil.execute(query,
                                       includeColumnNames=True,
                                       conn=conn)
            dataModels = modelListFromTable(dataTable)

            if len(dataModels) > 0:
                # Secondary query to build lookup table of possible state transition options from patient current states
                subQuery = SQLQuery()
                subQuery.addSelect("pre_state_id")
                subQuery.addSelect("post_state_id")
                subQuery.addSelect("clinical_item_id")
                subQuery.addSelect("time_trigger")
                subQuery.addFrom("sim_state_transition as sst")
                subQuery.addWhereIn(
                    "pre_state_id",
                    columnFromModelList(dataModels, "sim_state_id"))
                subResults = DBUtil.execute(subQuery, conn=conn)

                # For each pre-state, track which clinical items or times trigger which post-states
                postStateIdByItemIdByPreStateId = dict()
                postStateIdTimeTriggerByPreStateId = dict()
                for preStateId, postStateId, itemId, timeTrigger in subResults:
                    if preStateId not in postStateIdByItemIdByPreStateId:
                        postStateIdByItemIdByPreStateId[preStateId] = dict()
                    postStateIdByItemIdByPreStateId[preStateId][
                        itemId] = postStateId

                    if timeTrigger is not None:
                        postStateIdTimeTriggerByPreStateId[preStateId] = (
                            postStateId, timeTrigger)

                # Record in patient result models for retrieval
                for i, dataModel in enumerate(dataModels):
                    patientId = dataModel["sim_patient_id"]
                    stateId = dataModel["sim_state_id"]

                    dataModel["postStateIdByItemId"] = dict()
                    if stateId in postStateIdByItemIdByPreStateId:
                        dataModel[
                            "postStateIdByItemId"] = postStateIdByItemIdByPreStateId[
                                stateId]
                    dataModel["postStateIdTimeTriggerByPreStateId"] = dict()
                    if stateId in postStateIdTimeTriggerByPreStateId:
                        dataModel[
                            "postStateIdTimeTrigger"] = postStateIdTimeTriggerByPreStateId[
                                stateId]

                    if dataModel[
                            "relative_time_end"] is None and "postStateIdTimeTrigger" in dataModel:
                        # Check that we haven't passed (and should thus trigger) a time-based state transition
                        (postStateId,
                         timeTrigger) = dataModel["postStateIdTimeTrigger"]
                        preStateTime = dataModel["relative_time_start"]
                        postStateTriggerTime = (preStateTime + timeTrigger)

                        if postStateTriggerTime <= relativeTime:  # Trigger state transition just by time elapsed
                            #print >> sys.stderr, relativeTime, preStateTime, stateId, postStateTriggerTime, postStateId
                            self.recordStateTransition(patientId,
                                                       stateId,
                                                       postStateId,
                                                       postStateTriggerTime,
                                                       conn=conn)
                            # State change which can yield new triggers, so recursively reload.
                            # Small risk of infinite recusion if timeTriggers are zero. Otherwise, should converge as each recursion will update the preState relativeTimeStart
                            dataModels[i] = self.loadPatientInfo([patientId],
                                                                 relativeTime,
                                                                 conn=conn)[0]

            return dataModels
        finally:
            if not extConn:
                conn.close()
Esempio n. 12
0
    def parseParams(self, paramDict):
        """Look through the dictionary for key-value pairs that can be parsed into query object parameters.
        Facilitates easy setup from web or command-line text interfaces.
        """
        if "searchStr" in paramDict:
            self.searchStr = paramDict["searchStr"]
        if "analysisStatus" in paramDict:
            self.analysisStatus = paramDict["analysisStatus"]
        if "sourceTables" in paramDict:
            self.sourceTables = paramDict["sourceTables"].split(",")
        if "resultCount" in paramDict:
            self.resultCount = int(paramDict["resultCount"])
        if "sortField" in paramDict:
            self.sortField = paramDict["sortField"]


if __name__ == "__main__":
    sim_manager = SimManager()

    # Print grading on all patient cases
    query = SQLQuery()
    query.addSelect("distinct sim_patient_id")
    query.addFrom("sim_patient")

    patients = DBUtil.execute(query)
    patients = [patient[0] for patient in patients]
    grades = sim_manager.grade_cases(patients, "Jonathan Chen")
    print(grades)
    print("length: " + str(len(grades)))
Esempio n. 13
0
    def querySourceItems(self,
                         startDate=None,
                         endDate=None,
                         progress=None,
                         conn=None):
        """Query the database for list of all source clinical items (diagnosed probelms in this case)
        and yield the results one at a time.  If startDate provided, only return items
        whose noted_date is on or after that date.
        """
        extConn = conn is not None
        if not extConn:
            conn = self.connFactory.connection()

        # Facilitate rapid lookup of ICD9/ICD10 codes
        if self.icd9_str_by_code is None:
            self.prepare_icd9_lookup(conn=conn)
        if self.icd10_str_by_code is None:
            self.prepare_icd10_lookup(conn=conn)

        # Column headers to query for that map to respective fields in analysis table
        headers = [
            "pat_id", "pat_enc_csn_id", "noted_date", "resolved_date",
            "dx_icd9_code", "dx_icd9_code_list", "dx_icd10_code_list",
            "data_source"
        ]

        query = SQLQuery()
        for header in headers:
            query.addSelect(header)
        query.addFrom("stride_dx_list as dx")
        query.addWhere("noted_date is not null")
        # Only work with elements that have dates assigned for now
        if startDate is not None:
            query.addWhereOp("noted_date", ">=", startDate)
        if endDate is not None:
            query.addWhereOp("noted_date", "<", endDate)

        # Query to get an estimate of how long the process will be
        if progress is not None:
            progress.total = DBUtil.execute(query.totalQuery(),
                                            conn=conn)[0][0]

        cursor = conn.cursor()
        # Do one massive query, but yield data for one item at a time.
        cursor.execute(str(query), tuple(query.params))

        row = cursor.fetchone()
        while row is not None:
            row_model = RowItemModel(row, headers)

            # 2014-2017 data does not have dx_icd9_code. Instead, has
            # both dx_icd9_code_list and dx_icd10_code_list. For these items,
            # there is a one:many mapping of source item to converted item.
            # Collect icd10 codes.
            icd10_codes = set()
            if row_model['dx_icd10_code_list'] not in ['', None]:
                codes = row_model['dx_icd10_code_list'].split(',')
                for code in codes:
                    icd10_codes.add(code)
            # Collect icd9 codes.
            icd9_codes = set()
            if row_model['dx_icd9_code'] not in ['', None]:
                icd9_codes.add(row_model['dx_icd9_code'])
            else:
                if row_model['dx_icd9_code_list'] not in ['', None]:
                    codes = row_model['dx_icd9_code_list'].split(',')
                    for code in codes:
                        icd9_codes.add(code)

            # If there are no ICD codes, skip to next row.
            if len(icd9_codes) == 0 and len(icd10_codes) == 0:
                row = cursor.fetchone()
                continue

            # Process ICD codes.
            # Build a temporary dictionary so that a single loop can take care
            # of both ICD9 and ICD10 without mixing the data.
            icd_versions = {
                'ICD9': {
                    'codes': icd9_codes,
                    'lookup': self.icd9_str_by_code
                },
                'ICD10': {
                    'codes': icd10_codes,
                    'lookup': self.icd10_str_by_code
                }
            }

            for version, info in icd_versions.items():
                icd_codes = info['codes']
                icd_lookup = info['lookup']

                for icd_code in icd_codes:
                    # Look up string. Otherwise default to ICD code.
                    row_model['icd_str'] = icd_code
                    if icd_code in icd_lookup:
                        row_model['icd_str'] = icd_lookup[icd_code]
                    row_model['dx_icd_code'] = version + '.' + icd_code

                    # Yield one row worth of data at a time to avoid having to keep
                    # the whole result set in memory.
                    yield row_model

                    orig_code = icd_code
                    if SUBCODE_DELIM in orig_code:
                        # Insert copies of item for parent node codes to aggregate
                        # component diagnoses into general categories.
                        while icd_code[-1] != SUBCODE_DELIM:
                            icd_code = icd_code[:-1]  # Truncate trailing digit
                            if icd_code in icd_lookup:
                                # Found a a matching parent code, so yield this
                                # version.
                                row_model['icd_str'] = icd_lookup[icd_code]
                                row_model[
                                    'dx_icd_code'] = version + '.' + icd_code
                                yield row_model

                        # One more cycle to get parent node with no subcode
                        # delimiter at all.
                        icd_code = icd_code[:-1]  # Truncate off SUBCODE_DELIM
                        if icd_code in icd_lookup:
                            row_model['icd_str'] = icd_lookup[icd_code]
                            row_model['dx_icd_code'] = version + '.' + icd_code

                            yield row_model

            row = cursor.fetchone()
            progress.Update()

        # Slight risk here.  Normally DB connection closing should be in finally of a try block,
        #   but using the "yield" generator construct forbids us from using a try, finally construct.
        cursor.close()

        if not extConn:
            conn.close()
Esempio n. 14
0
    def querySourceItems(self, convOptions, progress=None, conn=None):
        """Query the database for list of all source clinical items (medications, etc.)
        and yield the results one at a time.  If startDate provided, only return items whose
        occurence date is on or after that date.
        """
        extConn = conn is not None;
        if not extConn:
            conn = self.connFactory.connection();

        # Column headers to query for that map to respective fields in analysis table
        headers = ["stride_treatment_team_id","pat_id","pat_enc_csn_id","trtmnt_tm_begin_date","trtmnt_tm_end_date","treatment_team","prov_name"];

        query = SQLQuery();
        for header in headers:
            query.addSelect( header );
        query.addFrom("stride_treatment_team");
        if convOptions.startDate is not None:
            query.addWhereOp("trtmnt_tm_begin_date",">=", convOptions.startDate);
        if convOptions.endDate is not None:
            query.addWhereOp("trtmnt_tm_begin_date","<", convOptions.endDate);  # Still use begin date as common filter value

        # Query to get an estimate of how long the process will be
        if progress is not None:
            progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0];

        cursor = conn.cursor();
        # Do one massive query, but yield data for one item at a time.
        cursor.execute( str(query), tuple(query.params) );

        row = cursor.fetchone();
        while row is not None:
            rowModel = RowItemModel( row, headers );
            for normalizedModel in self.normalizeRowModel(rowModel, convOptions, conn=conn):
                yield normalizedModel; # Yield one row worth of data at a time to avoid having to keep the whole result set in memory
            row = cursor.fetchone();

        # Slight risk here.  Normally DB connection closing should be in finally of a try block,
        #   but using the "yield" generator construct forbids us from using a try, finally construct.
        cursor.close();

        if not extConn:
            conn.close();
Esempio n. 15
0
    def grade_cases(self, sim_patient_ids, sim_grader_id, conn=None):
        """Given the identifiers for a bunch of simulated physician-patient case
        records, and the identifier for a particular grading key to use,
        calculate what grade each case would get based on the choices made
        and return a dictionary of case grades (keyed by the case ID).
        """
        ext_conn = True
        if conn is None:
            conn = self.connFactory.connection()
            ext_conn = False
        try:
            # Inner query retrieves physician-patient cases with ranking group_names (to later select first)
            # per case for specified cases. Each NULL group_name is treated as a separate group by assigning it
            # sim_patient_order_id. It also omits Default user (sim_user_id = 0) from grading.
            inner_query = SQLQuery()
            inner_query.addSelect("score")
            inner_query.addSelect(
                "rank() over ("  # ranks rows incrementally in the same group
                "    partition by coalesce(group_name, sim_patient_order_id::text), sim_patient_id"
                "    order by sim_patient_order_id"
                ")")
            inner_query.addSelect("sim_user_id")
            inner_query.addSelect("sim_patient_id")
            inner_query.addFrom("sim_patient_order spo")
            inner_query.addJoin(
                "sim_grading_key sgk",
                "sgk.clinical_item_id = spo.clinical_item_id"
                "    and sgk.sim_state_id = spo.sim_state_id")
            inner_query.addWhereEqual("sgk.sim_grader_id", sim_grader_id)
            inner_query.addWhereNotEqual(
                "spo.sim_user_id",
                0)  # 0 = ignore 'Default user', sets up initial cases
            inner_query.addWhereIn("spo.sim_patient_id", sim_patient_ids)
            inner_query.addOrderBy("relative_time_start")
            inner_query.addOrderBy("sim_patient_order_id")

            # Outer query sums the score per patient case and selects most graded physician for the case.
            # Theoretically, it isn't necessarily the most active physician for the case since his orders
            # might have been dropped by selecting only the first record within group_name group.
            query = SQLQuery()
            query.addSelect("sum(score) as total_score")
            query.addSelect("sim_patient_id")
            query.addSelect(
                "mode() within group ("  # mode() selects most frequent value within group
                "    order by sim_user_id"
                ") as most_graded_user_id")
            query.addFrom("(" + str(inner_query) + ") as ranked_groups")
            query.addWhereEqual("ranked_groups.rank",
                                1)  # count only first order in the same group
            query.addGroupBy("sim_patient_id")

            query_params = inner_query.getParams() + query.getParams()
            grades_table = DBUtil.execute(query,
                                          query_params,
                                          includeColumnNames=True,
                                          conn=conn)
            grades_model = modelListFromTable(grades_table)

            # get most active users for the cases
            most_active_user_query = SQLQuery()
            most_active_user_query.addSelect("sim_patient_id")
            most_active_user_query.addSelect("mode() within group ("
                                             "    order by sim_user_id"
                                             ") as most_active_user_id")
            most_active_user_query.addFrom("sim_patient_order")
            most_active_user_query.addWhereNotEqual("sim_user_id",
                                                    0)  # ignore Default user
            most_active_user_query.addWhereIn("sim_patient_id",
                                              sim_patient_ids)
            most_active_user_query.addGroupBy("sim_patient_id")
            most_active_user_query.addOrderBy("sim_patient_id")

            most_active_user_table = DBUtil.execute(most_active_user_query,
                                                    includeColumnNames=True,
                                                    conn=conn)
            most_active_user_model = modelListFromTable(most_active_user_table)
            # make a dict by sim_patient_id out of results - will be used for combining
            most_active_user_dict = {
                most_active_user["sim_patient_id"]: most_active_user
                for most_active_user in most_active_user_model
            }

            # combine results
            complete_grades = {
                grade["sim_patient_id"]:
                grade.update(most_active_user_dict[grade["sim_patient_id"]])
                for grade in grades_model
            }

            return complete_grades

        finally:
            if not ext_conn:
                conn.close()
Esempio n. 16
0
    def loadPatientLastEventTime(self, patientId, conn=None):
        """Find the last simulated time where the patient received any user orders (or default to time 0).
        Makes for natural starting point for resuming a simulation.
        Note: Misses cases where patient state also changed, not triggered by order, but by just time lag.
        """
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            query = SQLQuery()
            query.addSelect("max(relative_time_start)")
            query.addSelect("max(relative_time_end)")
            query.addSelect("0")
            # Simply select 0 as default time if don't find any others
            query.addFrom("sim_patient_order as po")
            query.addWhereEqual("sim_patient_id", patientId)

            lastOrderTime = max(DBUtil.execute(query, conn=conn)[0])
            return lastOrderTime
        finally:
            if not extConn:
                conn.close()
Esempio n. 17
0
def add_sim_case_column_to(csv):
    query = SQLQuery()
    query.addSelect("sim_patient_id")
    query.addSelect("sim_case_name as sim_case")
    query.addFrom("sim_grading_key sgk")
    query.addJoin(
        "sim_patient_order spo",
        "sgk.clinical_item_id = spo.clinical_item_id and spo.sim_state_id = sgk.sim_state_id"
    )
    query.addGroupBy("sim_patient_id")
    query.addGroupBy("sim_case_name")
    case_names = DBUtil.execute(str(query))

    # merge sim_case column
    csv = pd.merge(csv,
                   pd.DataFrame(case_names,
                                columns=['sim_patient_id', 'sim_case']),
                   left_on='patient',
                   right_on='sim_patient_id')
    return csv
Esempio n. 18
0
    def loadPatientOrders(self,
                          patientId,
                          currentTime,
                          loadActive=True,
                          conn=None):
        """Load orders for the given patient that exist by the specified current time point.
        loadActive - Specify whether to load active vs. inactive/completed orders.  Set to None to load both
        """
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            query = SQLQuery()
            query.addSelect("po.sim_patient_order_id")
            query.addSelect("po.sim_user_id")
            query.addSelect("po.sim_patient_id")
            query.addSelect("po.sim_state_id")
            query.addSelect("po.clinical_item_id")
            query.addSelect("po.relative_time_start")
            query.addSelect("po.relative_time_end")
            query.addSelect("ci.name")
            query.addSelect("ci.description")
            query.addSelect("cic.source_table")
            query.addSelect("cic.description as category_description")
            query.addFrom("sim_patient_order as po")
            query.addFrom("clinical_item as ci")
            query.addFrom("clinical_item_category as cic")
            query.addWhere("po.clinical_item_id = ci.clinical_item_id")
            query.addWhere(
                "ci.clinical_item_category_id = cic.clinical_item_category_id")
            query.addWhereEqual("sim_patient_id", patientId)
            query.addWhereOp("relative_time_start", "<=", currentTime)

            if loadActive:  # Filter out inactive orders here.
                query.openWhereOrClause()
                query.addWhere("relative_time_end is null")
                query.addWhereOp("relative_time_end", ">", currentTime)
                query.closeWhereOrClause()
            #elif loadActive is not None:    # Filter out active orders here.
            #    query.addWhereOp("relative_time_end","<=", currentTime);

            if loadActive:  # Organize currently active orders by category
                query.addOrderBy("cic.description")
                query.addOrderBy("ci.description")
                query.addOrderBy("relative_time_start")
            else:  # Otherwise chronologic order
                query.addOrderBy("relative_time_start")
                query.addOrderBy("cic.description")
                query.addOrderBy("ci.description")

            dataTable = DBUtil.execute(query,
                                       includeColumnNames=True,
                                       conn=conn)
            dataModels = modelListFromTable(dataTable)
            return dataModels
        finally:
            if not extConn:
                conn.close()
Esempio n. 19
0
    def setUp(self):
        """Prepare state for test cases"""
        DBTestCase.setUp(self)

        self.SCRIPT_FILE = StringIO()
        self.SCRIPT_FILE.write(
            "# Create table to test on.  Also testing that comment tag is recognized\n"
        )
        self.SCRIPT_FILE.write("\n")
        self.SCRIPT_FILE.write("create table TestTypes\n")
        self.SCRIPT_FILE.write("(\n")
        self.SCRIPT_FILE.write("    TestTypes_id    serial,\n")
        self.SCRIPT_FILE.write(
            "    MyText          varchar(50),    /* Same as character varying, also test standard SQL comment tags */\n"
        )
        self.SCRIPT_FILE.write("    MyInteger       integer,\n")
        self.SCRIPT_FILE.write(
            "    MyReal          real,   -- Floating point number, also test standard SQL comment tag\n"
        )
        self.SCRIPT_FILE.write(
            "    MyDateTime      TIMESTAMP,   -- PostgreSQL uses TIMESTAMP, but MySQL doesn't do NULL values right, so have to use DATETIME for MySQL?\n"
        )
        self.SCRIPT_FILE.write("    MyYesNo         boolean\n")
        self.SCRIPT_FILE.write(");\n")
        self.SCRIPT_FILE.write(
            "ALTER TABLE TestTypes ADD CONSTRAINT TestTypes_id PRIMARY KEY (TestTypes_id);\n"
        )
        # Should auto-create testtypes_testtypes_id_seq sequence
        self.SCRIPT_FILE.write(
            "CREATE INDEX TestTypes_MyInteger_INDEX ON TestTypes(MyInteger);\n"
        )
        self.SCRIPT_FILE.write("\n")
        self.SCRIPT_FILE.write(
            "insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime,MyYesNo)\n"
        )
        self.SCRIPT_FILE.write(
            "values ('Sample Text',  123,123.45,'2004-09-08 19:41:47.292000',True);\n"
        )
        self.SCRIPT_FILE.write(
            "insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime,MyYesNo)\n"
        )
        self.SCRIPT_FILE.write(
            "values ('Joe Mama',     234,23.45,'1990-10-03 19:41:47.292000',False);\n"
        )
        self.SCRIPT_FILE.write(
            "insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime,MyYesNo)\n"
        )
        self.SCRIPT_FILE.write(
            "values ('Mo Fo',        345,3.45,'2014-01-04 19:41:47.292000',True);\n"
        )
        self.SCRIPT_FILE.write("\n")
        self.SCRIPT_FILE = StringIO(self.SCRIPT_FILE.getvalue())

        self.DATA_TABLE = "TestTypes"
        self.DATA_COLS = "MyInteger\tMyReal\tMyYesNo\tMyText\n"

        self.DATA_FILE = StringIO()
        self.DATA_FILE.write('100\t100.1\tNone\tATest\n')
        self.DATA_FILE.write('200\t200.2\t' + FALSE_STR + '\tNone\n')
        self.DATA_FILE.write('200\t200.2\t' + FALSE_STR + '\t\n')
        # Skip None tag at end of line, test that white space won't get lost
        self.DATA_FILE.write('300\t\t' + TRUE_STR + '\tCTest\n')
        self.DATA_FILE = StringIO(self.DATA_FILE.getvalue())

        self.DATA_ROWS = []
        self.DATA_ROWS.append([
            100,
            100.1,
            None,
            "ATest",
        ])
        self.DATA_ROWS.append([
            200,
            200.2,
            False,
            None,
        ])
        self.DATA_ROWS.append([
            300,
            None,
            True,
            "CTest",
        ])

        self.MULTI_LINE_DATA_FILE = StringIO()
        self.MULTI_LINE_DATA_FILE.write(
            'myinteger\t"MyReal"\t"MyYesNo"\tMyText\n')
        self.MULTI_LINE_DATA_FILE.write(
            '100\t100.1\tNone\t"""A"" Test and ""more"""\n')
        self.MULTI_LINE_DATA_FILE.write('200\t200.2\t' + FALSE_STR + '\t""\n')
        self.MULTI_LINE_DATA_FILE.write('300\tNone\t' + TRUE_STR +
                                        '\t"C\\nTest"\t\n')
        self.MULTI_LINE_DATA_FILE = StringIO(
            self.MULTI_LINE_DATA_FILE.getvalue())

        self.MULTI_LINE_DATA_ROWS = []
        self.MULTI_LINE_DATA_ROWS.append([
            100,
            100.1,
            None,
            '"A" Test and "more"',
        ])
        self.MULTI_LINE_DATA_ROWS.append([
            200,
            200.2,
            False,
            None,
        ])
        self.MULTI_LINE_DATA_ROWS.append([
            300,
            None,
            True,
            'C\\nTest',
        ])

        # ID summary data to make it easier to verify stuff
        self.COL_NAMES = self.DATA_COLS.split()
        self.ID_COL = self.COL_NAMES[0]
        self.ID_DATA = []
        for row in self.DATA_ROWS:
            self.ID_DATA.append(row[0])

        # Build query to get update rows
        self.DATA_QUERY = SQLQuery()
        for col in self.COL_NAMES:
            self.DATA_QUERY.addSelect(col)
        self.DATA_QUERY.addFrom(self.DATA_TABLE)
        self.DATA_QUERY.addWhereIn(self.ID_COL, self.ID_DATA)
        self.DATA_QUERY.addOrderBy(self.ID_COL)
Esempio n. 20
0
    def loadResults(self, patientId, relativeTime, conn=None):
        """Load all results active by the given relativeTime.
        Will look for sim_patient_state times and sim_patient_order for diagnostic orders,
        to extrapolate all state-specific results for each order, or using default values
        if no state specific ones available.
        """
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            # First query for all expected result labels and states, without state-specific values as
            #   may want outer join behvaior against default state values
            query = SQLQuery()

            query.addSelect("distinct spo.sim_state_id")
            query.addSelect("sr.sim_result_id")
            query.addSelect("sr.name")
            query.addSelect("sr.description")
            query.addSelect("sr.priority")
            query.addSelect("sr.group_string")

            query.addSelect("spo.relative_time_start")
            query.addSelect("sorm.turnaround_time")
            query.addSelect(
                "(spo.relative_time_start + sorm.turnaround_time) as result_relative_time"
            )

            query.addFrom("sim_patient_order as spo")
            query.addFrom("sim_order_result_map as sorm")
            query.addFrom("sim_result as sr")

            query.addWhere("spo.clinical_item_id = sorm.clinical_item_id")
            query.addWhere("sorm.sim_result_id = sr.sim_result_id")

            query.addWhereEqual("spo.sim_patient_id", patientId)
            # Only unlock results if appropiate prereq orders were placed in the past (and longer than the turnaround time)
            query.addWhereOp("spo.relative_time_start + sorm.turnaround_time",
                             "<=", relativeTime)
            # Also check that the triggering order was not cancelled before the completion of the turnaround time
            query.addWhere(
                "( spo.relative_time_end is null or spo.relative_time_start + sorm.turnaround_time <= spo.relative_time_end )"
            )

            query.addOrderBy("result_relative_time")
            query.addOrderBy("sr.priority")

            resultTable = DBUtil.execute(query,
                                         includeColumnNames=True,
                                         conn=conn)
            resultModels = modelListFromTable(resultTable)

            # Pass through results to get set of states to search for
            stateIds = set([DEFAULT_STATE_ID])
            # Include default state to fall back on
            for resultModel in resultModels:
                stateIds.add(resultModel["sim_state_id"])

            # Second query for state-specific values
            valueQuery = SQLQuery()
            valueQuery.addSelect("ssr.sim_state_id")
            valueQuery.addSelect("ssr.sim_result_id")
            valueQuery.addSelect("ssr.num_value")
            valueQuery.addSelect("ssr.num_value_noise")
            valueQuery.addSelect("ssr.text_value")
            valueQuery.addSelect("ssr.result_flag")
            valueQuery.addSelect("ssr.clinical_item_id")
            # Output clinical item if result flag means something
            valueQuery.addFrom("sim_state_result as ssr")
            valueQuery.addWhereIn("ssr.sim_state_id", stateIds)
            valueTable = DBUtil.execute(valueQuery,
                                        includeColumnNames=True,
                                        conn=conn)
            valueModels = modelListFromTable(valueTable)

            # Store in-memory dictionary for rapid cross-referencing "join" to result table
            valueModelByStateIdByResultId = dict()
            for valueModel in valueModels:
                resultId = valueModel["sim_result_id"]
                stateId = valueModel["sim_state_id"]
                if resultId not in valueModelByStateIdByResultId:
                    valueModelByStateIdByResultId[resultId] = dict()
                valueModelByStateIdByResultId[resultId][stateId] = valueModel

            # Now go back through original results and join up state-specific values, or use default values if needed
            resultValueModels = list()
            for resultModel in resultModels:
                resultId = resultModel["sim_result_id"]
                stateId = resultModel["sim_state_id"]
                if resultId in valueModelByStateIdByResultId:
                    valueModelByStateId = valueModelByStateIdByResultId[
                        resultId]
                    if stateId in valueModelByStateId:
                        # Have a state-specific value, populate that
                        valueModel = valueModelByStateId[stateId]
                        resultModel.update(valueModel)
                    elif DEFAULT_STATE_ID in valueModelByStateId:
                        # No state-specific value, but have a default one to populate instead
                        valueModel = valueModelByStateId[DEFAULT_STATE_ID]
                        resultModel.update(valueModel)
                    resultValueModels.append(resultModel)
                else:
                    # No result information available, even in default state. Skip these
                    #resultModel["num_value"] = None;
                    #resultModel["num_value_noise"] = None;
                    #resultModel["text_value"] = None;
                    #resultModel["result_flag"] = None;
                    #resultModel["clinical_item_id"] = None;
                    pass

            return resultValueModels
        finally:
            if not extConn:
                conn.close()
Esempio n. 21
0
	def _getClinicalItems(self):
		query = SQLQuery()
		# SELECT
		query.addSelect(CLINICAL_ITEM_ID)
		query.addSelect('name')
		# FROM
		query.addFrom('clinical_item')

		print query
		print query.getParams()
		DBUtil.runDBScript(self.SCRIPT_FILE, False)
		results = DBUtil.execute(str(query), query.getParams())

		pd.DataFrame(results, columns=query.select).to_csv('clinical_items.csv', index=False)
Esempio n. 22
0
    def loadPendingResultOrders(self, patientId, relativeTime, conn=None):
        """Load all patient orders at the given relativeTime that 
        are due to yield results, but have not yet. 
        Include an estimate of time until results available.
        """
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            query = SQLQuery()
            query.addSelect("distinct po.clinical_item_id")
            # Distinct so don't report multiple times for panel orders
            query.addSelect("po.relative_time_start")
            query.addSelect("po.relative_time_end")
            query.addSelect("ci.name")
            query.addSelect("ci.description")
            query.addSelect("sorm.turnaround_time")
            # Could have different turnaround times for single order if different sub results. Just report each.
            query.addSelect(
                "sorm.turnaround_time - (%d - po.relative_time_start) as time_until_result"
                % relativeTime)
            # Calculate time until expect result
            query.addFrom("sim_patient_order as po")
            query.addFrom("clinical_item as ci")
            query.addFrom("sim_order_result_map as sorm")
            query.addWhere("po.clinical_item_id = ci.clinical_item_id")
            query.addWhere("po.clinical_item_id = sorm.clinical_item_id")
            query.addWhereEqual("sim_patient_id", patientId)

            # Only catch orders up to the given relativeTime and not cancelled
            query.addWhereOp("relative_time_start", "<=", relativeTime)
            query.openWhereOrClause()
            query.addWhere("relative_time_end is null")
            query.addWhereOp("relative_time_end", ">", relativeTime)
            query.closeWhereOrClause()

            # Only PENDING orders, so don't report orders who results should already be available
            query.addWhereOp("sorm.turnaround_time + po.relative_time_start",
                             ">", relativeTime)

            query.addOrderBy("time_until_result")
            query.addOrderBy("relative_time_start")
            query.addOrderBy("ci.name")

            dataTable = DBUtil.execute(query,
                                       includeColumnNames=True,
                                       conn=conn)
            dataModels = modelListFromTable(dataTable)
            return dataModels
        finally:
            if not extConn:
                conn.close()
Esempio n. 23
0
    def _getPatientsLabsHistories(self, proc_codes):
        query = SQLQuery()
        # SELECT
        query.addSelect('pat_id')
        query.addSelect('abnormal_yn')
        query.addSelect('result_time')
        query.addSelect('proc_code')
        # FROM
        query.addFrom('stride_order_proc')
        # query.addFrom('patient_item as pi')
        # WHERE
        query.addWhereEqual('lab_status', 'Final result')
        # query.addWhereEqual('proc_code', proc_code)
        query.addWhereIn('proc_code', proc_codes)

        query.addOrderBy('proc_code')
        query.addOrderBy('pat_id')
        query.addOrderBy('result_time')

        return customDBUtil.execute(query)
Esempio n. 24
0
    def loadNotes(self, patientId, currentTime, conn=None):
        """Load notes committed up to the given simulation time.
        """
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            query = SQLQuery()

            query.addSelect("sn.sim_note_id")
            query.addSelect("sps.sim_patient_id")
            # Link
            query.addSelect("sn.sim_state_id")
            query.addSelect("sn.note_type_id")
            query.addSelect("sn.author_type_id")
            query.addSelect("sn.service_type_id")
            query.addSelect(
                "(sps.relative_time_start + sn.relative_state_time) as relative_time"
            )
            query.addSelect("sn.content")

            query.addFrom("sim_note as sn")
            query.addFrom("sim_patient_state as sps")

            query.addWhere("sn.sim_state_id = sps.sim_state_id")
            query.addWhereEqual("sps.sim_patient_id", patientId)
            # Only unlock notes once traverse expected time
            query.addWhereOp(
                "(sps.relative_time_start + sn.relative_state_time)", "<=",
                currentTime)
            query.addOrderBy(
                "(sps.relative_time_start + sn.relative_state_time)")

            dataTable = DBUtil.execute(query,
                                       includeColumnNames=True,
                                       conn=conn)
            dataModels = modelListFromTable(dataTable)
            return dataModels
        finally:
            if not extConn:
                conn.close()
Esempio n. 25
0
    def _query_patient_episodes(self):
        log.info('Querying patient episodes...')
        # Initialize DB cursor.
        cursor = self._connection.cursor()

        # Build parameters for query.
        # self._lab_components = self._get_components_in_lab_panel()

        random_patient_list = self._get_random_patient_list()

        # Build SQL query for list of patient episodes.
        # Note that for 2008-2014 data, result_flag can take on any of the
        # following values:  High, Low, High Panic, Low Panic,
        # Low Off-Scale, Negative, Positive, Resistant, Susceptible, Abnormal, *
        # (NONE): Only 27 lab components can have this flag. None has this
        #           value for more than 5 results, so ignore it.
        # *: Only 10 lab components can have this flag. Only 6 have it for
        #           >10 tests, and each of them is a microbiology test for which
        #           a flag is less meaningful, e.g. Gram Stain, AFB culture.
        # Susceptible: Only 15 lab components can have this flag. All of those
        #           only have this value for 2 results, so ignore it.
        # Resistant: Only 1 lab component can have this flag. Only two results
        #           have this value, so ignore it.
        # Abnormal: 1462 lab components can have this flag. Many (e.g. UBLOOD)
        #           have this value for thousands of results, so include it.
        # Negative: Only 10 lab components can have this flag, and all for
        #           less than 5 results, so ignore it.
        # Positive: Only 3 lab components can have this flag, and all for
        #           only 1 result, so ignore it.
        # Low Off-Scale: Only 1 lab component can have this flag, and only for
        #           3 results, so ignore it.
        # Low Panic: 1401 lab components can have this flag, many core
        #           metabolic components. Include it.
        # High Panic: 8084 lab components can have this flag, many core
        #           metabolic components. Include it.
        query = SQLQuery()
        query.addSelect('CAST(pat_id AS BIGINT)')
        query.addSelect('sop.order_proc_id AS order_proc_id')
        query.addSelect('base_name') #sx
        query.addSelect('order_time')
        # query.addSelect("CASE WHEN abnormal_yn = 'Y' THEN 1 ELSE 0 END AS abnormal_panel")
        # query.addSelect("SUM(CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') OR result_flag IS NULL THEN 1 ELSE 0 END) AS num_components")
        # query.addSelect("SUM(CASE WHEN result_flag IS NULL THEN 1 ELSE 0 END) AS num_normal_components")
        # query.addSelect("CAST(SUM(CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') THEN 1 ELSE 0 END) = 0 AS INT) AS all_components_normal")
        query.addSelect("CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') THEN 0 ELSE 1 END AS component_normal")
        query.addFrom('stride_order_proc AS sop')
        query.addFrom('stride_order_results AS sor')
        query.addWhere('sop.order_proc_id = sor.order_proc_id')
        query.addWhere("(result_flag in ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') OR result_flag IS NULL)")
        query.addWhereIn("base_name", [self._component])
        query.addWhereIn("pat_id", random_patient_list)
        query.addGroupBy('pat_id')
        query.addGroupBy('sop.order_proc_id')
        query.addGroupBy('base_name') #sx
        query.addGroupBy('order_time')
        query.addGroupBy('result_flag')
        # query.addGroupBy('abnormal_yn') #sx
        query.addOrderBy('pat_id')
        query.addOrderBy('sop.order_proc_id')
        query.addOrderBy('base_name') #sx
        query.addOrderBy('order_time')
        query.setLimit(self._num_requested_episodes)

        self._num_reported_episodes = FeatureMatrix._query_patient_episodes(self, query, index_time_col='order_time')
Esempio n. 26
0
    def clinicalItemSearch(self, itemQuery, conn=None):
        """Look for clinical items based on specified query criteria"""
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            query = SQLQuery()
            query.addSelect("ci.clinical_item_id")
            query.addSelect("ci.name")
            query.addSelect("ci.description")
            query.addSelect("cic.source_table")
            query.addSelect("cic.description as category_description")
            query.addFrom("clinical_item as ci")
            query.addFrom("clinical_item_category as cic")
            query.addWhere(
                "ci.clinical_item_category_id = cic.clinical_item_category_id")
            if itemQuery.searchStr is not None:
                searchWords = itemQuery.searchStr.split()
                #query.openWhereOrClause();
                for searchField in ("ci.description", ):
                    for searchWord in searchWords:
                        query.addWhereOp(
                            searchField, "~*",
                            "^%(searchWord)s|[^a-z]%(searchWord)s" %
                            {"searchWord": searchWord})
                        # Prefix search by regular expression
                #query.closeWhereOrClause();
            if itemQuery.sourceTables:
                query.addWhereIn("cic.source_table", itemQuery.sourceTables)
            if itemQuery.analysisStatus is not None:
                query.addWhereEqual("ci.analysis_status",
                                    itemQuery.analysisStatus)
                query.addWhere("ci.item_count <> 0")
                # Also ignore items with no occurence in the analyzed data (occurs if item was accepted for analysis from multi-year dataset, but never used in a sub-time frame's analysis)

            if itemQuery.sortField:
                query.addOrderBy(itemQuery.sortField)
            query.addOrderBy("cic.description")
            query.addOrderBy("ci.name")
            query.addOrderBy("ci.description")
            if itemQuery.resultCount is not None:
                query.limit = itemQuery.resultCount
            dataTable = DBUtil.execute(query,
                                       includeColumnNames=True,
                                       conn=conn)
            dataModels = modelListFromTable(dataTable)
            return dataModels
        finally:
            if not extConn:
                conn.close()
Esempio n. 27
0
os.chdir('/Users/jwang/Desktop/ClinicalDecisionMaker')
from medinfo.common.test.Util import MedInfoTestCase;
from medinfo.db import DBUtil
from medinfo.db.Model import SQLQuery;

# Create SCRIPT_FILE

# clinical_item_category_id = 13 (Discharge)
# clinical_item_category_id = 23 (Admission)

# H&P Note

SCRIPT_FILE = StringIO()
SCRIPT_FILE.write("psql stride jwang198")
DATA_QUERY = SQLQuery();

DATA_QUERY.addSelect("author_name")
DATA_QUERY.addSelect("pat_id")
DATA_QUERY.addSelect("note_date")
DATA_QUERY.addSelect("note_type")
DATA_QUERY.addSelect("provider_type")
DATA_QUERY.addSelect("specialty")
DATA_QUERY.addSelect("pat_enc_csn_id")

DATA_QUERY.addFrom("stride_note")
DATA_QUERY.addWhereEqual("note_type", "H&P")
DATA_QUERY.addOrderBy("author_name", dir="asc")
print(DATA_QUERY)

# Write out data to CSV
Esempio n. 28
0
    def orderSetSearch(self, itemQuery, conn=None):
        """Look for clinical items based on specified query criteria"""
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            query = SQLQuery()
            query.addSelect("ic.item_collection_id")
            query.addSelect("ic.external_id")
            query.addSelect("ic.name as collection_name")
            query.addSelect("ic.section")
            query.addSelect("ic.subgroup")
            query.addSelect("ci.clinical_item_category_id")
            query.addSelect("ci.clinical_item_id")
            query.addSelect("ci.name")
            query.addSelect("ci.description")
            query.addFrom("item_collection as ic")
            query.addFrom("item_collection_item as ici")
            query.addFrom("clinical_item as ci")
            query.addWhere("ic.item_collection_id = ici.item_collection_id")
            query.addWhere("ici.clinical_item_id = ci.clinical_item_id")
            query.addWhereNotEqual("ic.section", AD_HOC_SECTION)
            if itemQuery.searchStr is not None:
                searchWords = itemQuery.searchStr.split()
                for searchWord in searchWords:
                    query.addWhereOp(
                        "ic.name", "~*",
                        "^%(searchWord)s|[^a-z]%(searchWord)s" %
                        {"searchWord": searchWord})
                    # Prefix search by regular expression
            if itemQuery.analysisStatus is not None:
                query.addWhereEqual("ci.analysis_status",
                                    itemQuery.analysisStatus)
            query.addOrderBy("lower(ic.name)")
            query.addOrderBy("ic.external_id")
            query.addOrderBy("lower(ic.section)")
            query.addOrderBy("lower(ic.subgroup)")
            query.addOrderBy("ci.clinical_item_id")
            query.addOrderBy("ci.name")
            dataTable = DBUtil.execute(query,
                                       includeColumnNames=True,
                                       conn=conn)
            dataModels = modelListFromTable(dataTable)

            # Aggregate up into order sets
            orderSetModel = None
            for row in dataModels:
                if orderSetModel is None or row[
                        "external_id"] != orderSetModel["external_id"]:
                    if orderSetModel is not None:
                        # Prior order set exists, yield/return it before preparing next one
                        yield orderSetModel
                    orderSetModel = \
                        {   "external_id": row["external_id"],
                            "name": row["collection_name"],
                            "itemList": list(),
                        }
                orderSetModel["itemList"].append(row)
            yield orderSetModel
            # Yield the last processed model
        finally:
            if not extConn:
                conn.close()
treatment_aggregation_map = {}
treatment_aggregations.readline()
treatment_aggregations.readline()
for line in treatment_aggregations:
    line = line.strip().split("\t")
    treatment_aggregation_map[line[0]] = line[1]
print(treatment_aggregation_map)

# Create SCRIPT_FILE
SCRIPT_FILE = StringIO()
SCRIPT_FILE.write("psql stride jwang198")

# Find all clinical_item_ids associated with each patient_id
# SELECT * FROM clinical_item WHERE clinical_item_category_id = 161;

DATA_QUERY = SQLQuery()
DATA_QUERY.addSelect("clinical_item_id")
DATA_QUERY.addSelect("description")
DATA_QUERY.addSelect("clinical_item_category_id")
DATA_QUERY.addWhereEqual("clinical_item_category_id", "161")
DATA_QUERY.addFrom("clinical_item")

print(DATA_QUERY)

# Write out data to CSV
DBUtil.runDBScript(SCRIPT_FILE, False)
results = DBUtil.execute(DATA_QUERY)

output = open("/Users/jwang/Desktop/Results/treatment_teams.csv", "w")
output.write(
    "clinical_item_id,description,clinical_item_category_id,treatment_team\n")
Esempio n. 30
0
    def mergeRelated(self,
                     baseClinicalItemId,
                     clinicalItemIds,
                     reassignMergedItems=True,
                     conn=None):
        """The specified clinical items will be merged / composited into the base clinical item provided.
        The remaining now redundant items will be deactivated
        Patient_item instances will be reassigned to the merged clinical_item
        (while backup links will be saved to backup_link_patient_item),
        clinical_item_association counts for the redundant items will removed and analyze_dates reset,
        requiring a re-run of AssociationAnalysis to redo those counts from scratch
        (but will now count as the merged / composite item rather than separate ones).

        Could theoretically figure out how to combine the association stats without re-running analysis, but
            patient_counts are supposed to ignore duplicates, so hard to know how to aggregate stats
            (not enough info in them to tell if unique cooccurrences?)

        Examples this could be relevant for:
        All blood transfusion indexes, G vs J vs Feeding tube equivalent, Ear, Eyes med routes irrelevant which ear/eye.
        """
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            # Deactivate other items
            deactivateIds = set(clinicalItemIds)
            deactivateIds.discard(baseClinicalItemId)
            self.deactivateAnalysis(deactivateIds, conn=conn)

            # Build composite item name and description
            allIds = set(deactivateIds)
            allIds.add(baseClinicalItemId)

            query = SQLQuery()
            query.addSelect("clinical_item_id")
            query.addSelect("name")
            query.addSelect("description")
            query.addFrom("clinical_item")
            query.addWhereIn("clinical_item_id", allIds)
            query.addOrderBy("name")
            # Ensure consistency across multiple runs
            results = DBUtil.execute(query, conn=conn)

            nameList = list()
            descrList = list()
            # First pass to get Base Item Description
            for (clinicalItemId, name, description) in results:
                if clinicalItemId == baseClinicalItemId:
                    if name is None:
                        name = ""
                    if description is None:
                        description = ""
                    nameList.append(name)
                    descrList.append(description)
                    break
            # Second pass to get the rest
            for (clinicalItemId, name, description) in results:
                if clinicalItemId != baseClinicalItemId:
                    if name is None:
                        name = ""
                    if description is None:
                        description = ""
                    nameList.append(name)
                    descrList.append(description)
            compositeName = str.join("+", nameList)
            compositeDescription = str.join("+", descrList)

            DBUtil.updateRow("clinical_item", {
                "name": compositeName,
                "description": compositeDescription
            },
                             baseClinicalItemId,
                             conn=conn)

            if reassignMergedItems:
                # Reassign other items to the base item, but save backup data first
                query = SQLQuery()
                query.addSelect("patient_item_id")
                query.addSelect("clinical_item_id")
                query.addFrom("patient_item")
                query.addWhereIn("clinical_item_id", deactivateIds)
                results = DBUtil.execute(query, conn=conn)

                insertQuery = DBUtil.buildInsertQuery(
                    "backup_link_patient_item",
                    ["patient_item_id", "clinical_item_id"])
                for (patientItemId, clinicalItemId) in results:
                    insertParams = (patientItemId, clinicalItemId)
                    try:
                        # Optimistic insert of a new unique item
                        DBUtil.execute(insertQuery, insertParams, conn=conn)
                    except conn.IntegrityError, err:
                        # If turns out to be a duplicate, okay, just note it and continue to insert whatever else is possible
                        log.info(err)
                        pass

                # Now to actual reassignment of patient items to the unifying base clinical item
                placeholders = generatePlaceholders(len(deactivateIds))
                query = "update patient_item set clinical_item_id = %s where clinical_item_id in (%s)" % (
                    DBUtil.SQL_PLACEHOLDER, placeholders)
                params = [baseClinicalItemId]
                params.extend(deactivateIds)
                DBUtil.execute(query, params, conn=conn)

        finally:
            if not extConn:
                conn.close()